diff options
Diffstat (limited to 'contrib/llvm/lib/CodeGen')
184 files changed, 23076 insertions, 11453 deletions
diff --git a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp index 2ee7767..44345ad 100644 --- a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -14,7 +14,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "post-RA-sched" #include "AggressiveAntiDepBreaker.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -29,6 +28,8 @@ #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; +#define DEBUG_TYPE "post-RA-sched" + // If DebugDiv > 0 then only break antidep with (ID % DebugDiv) == DebugMod static cl::opt<int> DebugDiv("agg-antidep-debugdiv", @@ -121,7 +122,7 @@ AggressiveAntiDepBreaker(MachineFunction& MFi, TII(MF.getTarget().getInstrInfo()), TRI(MF.getTarget().getRegisterInfo()), RegClassInfo(RCI), - State(NULL) { + State(nullptr) { /* Collect a bitset of all registers that are only broken if they are on the critical path. */ for (unsigned i = 0, e = CriticalPathRCs.size(); i < e; ++i) { @@ -144,7 +145,7 @@ AggressiveAntiDepBreaker::~AggressiveAntiDepBreaker() { } void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { - assert(State == NULL); + assert(!State); State = new AggressiveAntiDepState(TRI->getNumRegs(), BB); bool IsReturnBlock = (!BB->empty() && BB->back().isReturn()); @@ -169,7 +170,7 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { // callee-saved register that is not saved in the prolog. const MachineFrameInfo *MFI = MF.getFrameInfo(); BitVector Pristine = MFI->getPristineRegs(BB); - for (const uint16_t *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) { + for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) { unsigned Reg = *I; if (!IsReturnBlock && !Pristine.test(Reg)) continue; for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { @@ -183,7 +184,7 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { void AggressiveAntiDepBreaker::FinishBlock() { delete State; - State = NULL; + State = nullptr; } void AggressiveAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count, @@ -230,13 +231,13 @@ bool AggressiveAntiDepBreaker::IsImplicitDefUse(MachineInstr *MI, if (Reg == 0) return false; - MachineOperand *Op = NULL; + MachineOperand *Op = nullptr; if (MO.isDef()) Op = MI->findRegisterUseOperand(Reg, true); else Op = MI->findRegisterDefOperand(Reg); - return((Op != NULL) && Op->isImplicit()); + return(Op && Op->isImplicit()); } void AggressiveAntiDepBreaker::GetPassthruRegs(MachineInstr *MI, @@ -273,10 +274,10 @@ static void AntiDepEdges(const SUnit *SU, std::vector<const SDep*>& Edges) { /// CriticalPathStep - Return the next SUnit after SU on the bottom-up /// critical path. static const SUnit *CriticalPathStep(const SUnit *SU) { - const SDep *Next = 0; + const SDep *Next = nullptr; unsigned NextDepth = 0; // Find the predecessor edge with the greatest depth. - if (SU != 0) { + if (SU) { for (SUnit::const_pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end(); P != PE; ++P) { const SUnit *PredSU = P->getSUnit(); @@ -292,7 +293,7 @@ static const SUnit *CriticalPathStep(const SUnit *SU) { } } - return (Next) ? Next->getSUnit() : 0; + return (Next) ? Next->getSUnit() : nullptr; } void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx, @@ -309,8 +310,8 @@ void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx, DefIndices[Reg] = ~0u; RegRefs.erase(Reg); State->LeaveGroup(Reg); - DEBUG(if (header != NULL) { - dbgs() << header << TRI->getName(Reg); header = NULL; }); + DEBUG(if (header) { + dbgs() << header << TRI->getName(Reg); header = nullptr; }); DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << tag); } // Repeat for subregisters. @@ -321,14 +322,14 @@ void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx, DefIndices[SubregReg] = ~0u; RegRefs.erase(SubregReg); State->LeaveGroup(SubregReg); - DEBUG(if (header != NULL) { - dbgs() << header << TRI->getName(Reg); header = NULL; }); + DEBUG(if (header) { + dbgs() << header << TRI->getName(Reg); header = nullptr; }); DEBUG(dbgs() << " " << TRI->getName(SubregReg) << "->g" << State->GetGroup(SubregReg) << tag); } } - DEBUG(if ((header == NULL) && (footer != NULL)) dbgs() << footer); + DEBUG(if (!header && footer) dbgs() << footer); } void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, @@ -382,7 +383,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, } // Note register reference... - const TargetRegisterClass *RC = NULL; + const TargetRegisterClass *RC = nullptr; if (i < MI->getDesc().getNumOperands()) RC = TII->getRegClass(MI->getDesc(), i, TRI, MF); AggressiveAntiDepState::RegisterReference RR = { &MO, RC }; @@ -403,8 +404,18 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, continue; // Update def for Reg and aliases. - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { + // We need to be careful here not to define already-live super registers. + // If the super register is already live, then this definition is not + // a definition of the whole super register (just a partial insertion + // into it). Earlier subregister definitions (which we've not yet visited + // because we're iterating bottom-up) need to be linked to the same group + // as this definition. + if (TRI->isSuperRegister(Reg, *AI) && State->IsLive(*AI)) + continue; + DefIndices[*AI] = Count; + } } } @@ -456,7 +467,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI, } // Note register reference... - const TargetRegisterClass *RC = NULL; + const TargetRegisterClass *RC = nullptr; if (i < MI->getDesc().getNumOperands()) RC = TII->getRegClass(MI->getDesc(), i, TRI, MF); AggressiveAntiDepState::RegisterReference RR = { &MO, RC }; @@ -506,7 +517,7 @@ BitVector AggressiveAntiDepBreaker::GetRenameRegisters(unsigned Reg) { AggressiveAntiDepState::RegisterReference>::iterator Q = Range.first, QE = Range.second; Q != QE; ++Q) { const TargetRegisterClass *RC = Q->second.RC; - if (RC == NULL) continue; + if (!RC) continue; BitVector RCBV = TRI->getAllocatableSet(MF, RC); if (first) { @@ -571,7 +582,9 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( unsigned Reg = Regs[i]; if (Reg == SuperReg) continue; bool IsSub = TRI->isSubRegister(SuperReg, Reg); - assert(IsSub && "Expecting group subregister"); + // FIXME: remove this once PR18663 has been properly fixed. For now, + // return a conservative answer: + // assert(IsSub && "Expecting group subregister"); if (!IsSub) return false; } @@ -724,8 +737,8 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( // Track progress along the critical path through the SUnit graph as // we walk the instructions. This is needed for regclasses that only // break critical-path anti-dependencies. - const SUnit *CriticalPathSU = 0; - MachineInstr *CriticalPathMI = 0; + const SUnit *CriticalPathSU = nullptr; + MachineInstr *CriticalPathMI = nullptr; if (CriticalPathSet.any()) { for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { const SUnit *SU = &SUnits[i]; @@ -778,10 +791,10 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( // If MI is not on the critical path, then we don't rename // registers in the CriticalPathSet. - BitVector *ExcludeRegs = NULL; + BitVector *ExcludeRegs = nullptr; if (MI == CriticalPathMI) { CriticalPathSU = CriticalPathStep(CriticalPathSU); - CriticalPathMI = (CriticalPathSU) ? CriticalPathSU->getInstr() : 0; + CriticalPathMI = (CriticalPathSU) ? CriticalPathSU->getInstr() : nullptr; } else if (CriticalPathSet.any()) { ExcludeRegs = &CriticalPathSet; } @@ -805,7 +818,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( // Don't break anti-dependencies on non-allocatable registers. DEBUG(dbgs() << " (non-allocatable)\n"); continue; - } else if ((ExcludeRegs != NULL) && ExcludeRegs->test(AntiDepReg)) { + } else if (ExcludeRegs && ExcludeRegs->test(AntiDepReg)) { // Don't break anti-dependencies for critical path registers // if not on the critical path DEBUG(dbgs() << " (not critical-path)\n"); @@ -819,9 +832,8 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( } else { // No anti-dep breaking for implicit deps MachineOperand *AntiDepOp = MI->findRegisterDefOperand(AntiDepReg); - assert(AntiDepOp != NULL && - "Can't find index for defined register operand"); - if ((AntiDepOp == NULL) || AntiDepOp->isImplicit()) { + assert(AntiDepOp && "Can't find index for defined register operand"); + if (!AntiDepOp || AntiDepOp->isImplicit()) { DEBUG(dbgs() << " (implicit)\n"); continue; } diff --git a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h index 6683630..2ab9d89 100644 --- a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h +++ b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h @@ -136,7 +136,7 @@ class RegisterClassInfo; ~AggressiveAntiDepBreaker(); /// Start - Initialize anti-dep breaking for a new basic block. - void StartBlock(MachineBasicBlock *BB); + void StartBlock(MachineBasicBlock *BB) override; /// BreakAntiDependencies - Identifiy anti-dependencies along the critical /// path @@ -146,15 +146,16 @@ class RegisterClassInfo; MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned InsertPosIndex, - DbgValueVector &DbgValues); + DbgValueVector &DbgValues) override; /// Observe - Update liveness information to account for the current /// instruction, which will not be scheduled. /// - void Observe(MachineInstr *MI, unsigned Count, unsigned InsertPosIndex); + void Observe(MachineInstr *MI, unsigned Count, + unsigned InsertPosIndex) override; /// Finish - Finish anti-dep breaking for a basic block. - void FinishBlock(); + void FinishBlock() override; private: /// Keep track of a position in the allocation order for each regclass. @@ -169,7 +170,8 @@ class RegisterClassInfo; void GetPassthruRegs(MachineInstr *MI, std::set<unsigned>& PassthruRegs); void HandleLastUse(unsigned Reg, unsigned KillIdx, const char *tag, - const char *header =NULL, const char *footer =NULL); + const char *header = nullptr, + const char *footer = nullptr); void PrescanInstruction(MachineInstr *MI, unsigned Count, std::set<unsigned>& PassthruRegs); diff --git a/contrib/llvm/lib/CodeGen/AllocationOrder.cpp b/contrib/llvm/lib/CodeGen/AllocationOrder.cpp index 3fa1f8f..dc9bcff 100644 --- a/contrib/llvm/lib/CodeGen/AllocationOrder.cpp +++ b/contrib/llvm/lib/CodeGen/AllocationOrder.cpp @@ -14,7 +14,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "regalloc" #include "AllocationOrder.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -25,6 +24,8 @@ using namespace llvm; +#define DEBUG_TYPE "regalloc" + // Compare VirtRegMap::getRegAllocPref(). AllocationOrder::AllocationOrder(unsigned VirtReg, const VirtRegMap &VRM, diff --git a/contrib/llvm/lib/CodeGen/AllocationOrder.h b/contrib/llvm/lib/CodeGen/AllocationOrder.h index aed461a..64ff2a7 100644 --- a/contrib/llvm/lib/CodeGen/AllocationOrder.h +++ b/contrib/llvm/lib/CodeGen/AllocationOrder.h @@ -45,10 +45,12 @@ public: /// Return the next physical register in the allocation order, or 0. /// It is safe to call next() again after it returned 0, it will keep /// returning 0 until rewind() is called. - unsigned next() { + unsigned next(unsigned Limit = 0) { if (Pos < 0) return Hints.end()[Pos++]; - while (Pos < int(Order.size())) { + if (!Limit) + Limit = Order.size(); + while (Pos < int(Limit)) { unsigned Reg = Order[Pos++]; if (!isHint(Reg)) return Reg; diff --git a/contrib/llvm/lib/CodeGen/Analysis.cpp b/contrib/llvm/lib/CodeGen/Analysis.cpp index 1600c67..0eabee3 100644 --- a/contrib/llvm/lib/CodeGen/Analysis.cpp +++ b/contrib/llvm/lib/CodeGen/Analysis.cpp @@ -7,13 +7,14 @@ // //===----------------------------------------------------------------------===// // -// This file defines several CodeGen-specific LLVM IR analysis utilties. +// This file defines several CodeGen-specific LLVM IR analysis utilities. // //===----------------------------------------------------------------------===// #include "llvm/CodeGen/Analysis.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/SelectionDAG.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" @@ -46,7 +47,7 @@ unsigned llvm::ComputeLinearIndex(Type *Ty, EI != EE; ++EI) { if (Indices && *Indices == unsigned(EI - EB)) return ComputeLinearIndex(*EI, Indices+1, IndicesEnd, CurIndex); - CurIndex = ComputeLinearIndex(*EI, 0, 0, CurIndex); + CurIndex = ComputeLinearIndex(*EI, nullptr, nullptr, CurIndex); } return CurIndex; } @@ -56,7 +57,7 @@ unsigned llvm::ComputeLinearIndex(Type *Ty, for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) { if (Indices && *Indices == i) return ComputeLinearIndex(EltTy, Indices+1, IndicesEnd, CurIndex); - CurIndex = ComputeLinearIndex(EltTy, 0, 0, CurIndex); + CurIndex = ComputeLinearIndex(EltTy, nullptr, nullptr, CurIndex); } return CurIndex; } @@ -228,7 +229,7 @@ static const Value *getNoopInput(const Value *V, // through. const Instruction *I = dyn_cast<Instruction>(V); if (!I || I->getNumOperands() == 0) return V; - const Value *NoopInput = 0; + const Value *NoopInput = nullptr; Value *Op = I->getOperand(0); if (isa<BitCastInst>(I)) { @@ -474,8 +475,7 @@ static bool nextRealType(SmallVectorImpl<CompositeType *> &SubTypes, /// between it and the return. /// /// This function only tests target-independent requirements. -bool llvm::isInTailCallPosition(ImmutableCallSite CS, - const TargetLowering &TLI) { +bool llvm::isInTailCallPosition(ImmutableCallSite CS, const TargetMachine &TM) { const Instruction *I = CS.getInstruction(); const BasicBlock *ExitBB = I->getParent(); const TerminatorInst *Term = ExitBB->getTerminator(); @@ -490,16 +490,14 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, // longjmp on x86), it can end up causing miscompilation that has not // been fully understood. if (!Ret && - (!TLI.getTargetMachine().Options.GuaranteedTailCallOpt || - !isa<UnreachableInst>(Term))) + (!TM.Options.GuaranteedTailCallOpt || !isa<UnreachableInst>(Term))) return false; // If I will have a chain, make sure no other instruction that will have a // chain interposes between I and the return. if (I->mayHaveSideEffects() || I->mayReadFromMemory() || !isSafeToSpeculativelyExecute(I)) - for (BasicBlock::const_iterator BBI = prior(prior(ExitBB->end())); ; - --BBI) { + for (BasicBlock::const_iterator BBI = std::prev(ExitBB->end(), 2);; --BBI) { if (&*BBI == I) break; // Debug info intrinsics do not get in the way of tail call optimization. @@ -510,7 +508,8 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, return false; } - return returnTypeIsEligibleForTailCall(ExitBB->getParent(), I, Ret, TLI); + return returnTypeIsEligibleForTailCall(ExitBB->getParent(), I, Ret, + *TM.getTargetLowering()); } bool llvm::returnTypeIsEligibleForTailCall(const Function *F, diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp index 5d82dd9..251f5ef 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp @@ -20,6 +20,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/Mangler.h" #include "llvm/IR/Module.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" @@ -30,76 +31,83 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/FormattedStream.h" -#include "llvm/Target/Mangler.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; -static cl::opt<bool> -EnableARMEHABIDescriptors("arm-enable-ehabi-descriptors", cl::Hidden, - cl::desc("Generate ARM EHABI tables with unwinding descriptors"), - cl::init(false)); - - ARMException::ARMException(AsmPrinter *A) - : DwarfException(A) {} + : EHStreamer(A), shouldEmitCFI(false) {} ARMException::~ARMException() {} ARMTargetStreamer &ARMException::getTargetStreamer() { - MCTargetStreamer &TS = Asm->OutStreamer.getTargetStreamer(); + MCTargetStreamer &TS = *Asm->OutStreamer.getTargetStreamer(); return static_cast<ARMTargetStreamer &>(TS); } -void ARMException::EndModule() { +/// endModule - Emit all exception information that should come after the +/// content. +void ARMException::endModule() { + if (shouldEmitCFI) + Asm->OutStreamer.EmitCFISections(false, true); } -/// BeginFunction - Gather pre-function exception information. Assumes it's +/// beginFunction - Gather pre-function exception information. Assumes it's /// being emitted immediately after the function entry point. -void ARMException::BeginFunction(const MachineFunction *MF) { - getTargetStreamer().emitFnStart(); - if (Asm->MF->getFunction()->needsUnwindTableEntry()) - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin", - Asm->getFunctionNumber())); +void ARMException::beginFunction(const MachineFunction *MF) { + if (Asm->MAI->getExceptionHandlingType() == ExceptionHandling::ARM) + getTargetStreamer().emitFnStart(); + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin", + Asm->getFunctionNumber())); + // See if we need call frame info. + AsmPrinter::CFIMoveType MoveType = Asm->needsCFIMoves(); + assert(MoveType != AsmPrinter::CFI_M_EH && + "non-EH CFI not yet supported in prologue with EHABI lowering"); + if (MoveType == AsmPrinter::CFI_M_Debug) { + shouldEmitCFI = true; + Asm->OutStreamer.EmitCFIStartProc(false); + } } -/// EndFunction - Gather and emit post-function exception information. +/// endFunction - Gather and emit post-function exception information. /// -void ARMException::EndFunction() { +void ARMException::endFunction(const MachineFunction *) { + if (shouldEmitCFI) + Asm->OutStreamer.EmitCFIEndProc(); + + // Map all labels and get rid of any dead landing pads. + MMI->TidyLandingPads(); + ARMTargetStreamer &ATS = getTargetStreamer(); - if (!Asm->MF->getFunction()->needsUnwindTableEntry()) + if (!Asm->MF->getFunction()->needsUnwindTableEntry() && + MMI->getLandingPads().empty()) ATS.emitCantUnwind(); else { Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end", Asm->getFunctionNumber())); + if (!MMI->getLandingPads().empty()) { + // Emit references to personality. + if (const Function * Personality = + MMI->getPersonalities()[MMI->getPersonalityIndex()]) { + MCSymbol *PerSym = Asm->getSymbol(Personality); + Asm->OutStreamer.EmitSymbolAttribute(PerSym, MCSA_Global); + ATS.emitPersonality(PerSym); + } - if (EnableARMEHABIDescriptors) { - // Map all labels and get rid of any dead landing pads. - MMI->TidyLandingPads(); - - if (!MMI->getLandingPads().empty()) { - // Emit references to personality. - if (const Function * Personality = - MMI->getPersonalities()[MMI->getPersonalityIndex()]) { - MCSymbol *PerSym = Asm->getSymbol(Personality); - Asm->OutStreamer.EmitSymbolAttribute(PerSym, MCSA_Global); - ATS.emitPersonality(PerSym); - } - - // Emit .handlerdata directive. - ATS.emitHandlerData(); + // Emit .handlerdata directive. + ATS.emitHandlerData(); - // Emit actual exception table - EmitExceptionTable(); - } + // Emit actual exception table + emitExceptionTable(); } } - ATS.emitFnEnd(); + if (Asm->MAI->getExceptionHandlingType() == ExceptionHandling::ARM) + ATS.emitFnEnd(); } -void ARMException::EmitTypeInfos(unsigned TTypeEncoding) { +void ARMException::emitTypeInfos(unsigned TTypeEncoding) { const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos(); const std::vector<unsigned> &FilterIds = MMI->getFilterIds(); @@ -136,7 +144,7 @@ void ARMException::EmitTypeInfos(unsigned TTypeEncoding) { Asm->OutStreamer.AddComment("FilterInfo " + Twine(Entry)); } - Asm->EmitTTypeReference((TypeID == 0 ? 0 : TypeInfos[TypeID - 1]), + Asm->EmitTTypeReference((TypeID == 0 ? nullptr : TypeInfos[TypeID - 1]), TTypeEncoding); } } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp new file mode 100644 index 0000000..8dab5e5 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp @@ -0,0 +1,45 @@ +//===-- llvm/CodeGen/AddressPool.cpp - Dwarf Debug Framework ---*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "AddressPool.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/Target/TargetLoweringObjectFile.h" + +using namespace llvm; + +class MCExpr; + +unsigned AddressPool::getIndex(const MCSymbol *Sym, bool TLS) { + HasBeenUsed = true; + auto IterBool = + Pool.insert(std::make_pair(Sym, AddressPoolEntry(Pool.size(), TLS))); + return IterBool.first->second.Number; +} + +// Emit addresses into the section given. +void AddressPool::emit(AsmPrinter &Asm, const MCSection *AddrSection) { + if (Pool.empty()) + return; + + // Start the dwarf addr section. + Asm.OutStreamer.SwitchSection(AddrSection); + + // Order the address pool entries by ID + SmallVector<const MCExpr *, 64> Entries(Pool.size()); + + for (const auto &I : Pool) + Entries[I.second.Number] = + I.second.TLS + ? Asm.getObjFileLowering().getDebugThreadLocalSymbol(I.first) + : MCSymbolRefExpr::Create(I.first, Asm.OutContext); + + for (const MCExpr *Entry : Entries) + Asm.OutStreamer.EmitValue(Entry, Asm.getDataLayout().getPointerSize()); +} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.h b/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.h new file mode 100644 index 0000000..42757d7 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.h @@ -0,0 +1,52 @@ +//===-- llvm/CodeGen/AddressPool.h - Dwarf Debug Framework -----*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef CODEGEN_ASMPRINTER_ADDRESSPOOL_H__ +#define CODEGEN_ASMPRINTER_ADDRESSPOOL_H__ + +#include "llvm/ADT/DenseMap.h" + +namespace llvm { +class MCSection; +class MCSymbol; +class AsmPrinter; +// Collection of addresses for this unit and assorted labels. +// A Symbol->unsigned mapping of addresses used by indirect +// references. +class AddressPool { + struct AddressPoolEntry { + unsigned Number; + bool TLS; + AddressPoolEntry(unsigned Number, bool TLS) : Number(Number), TLS(TLS) {} + }; + DenseMap<const MCSymbol *, AddressPoolEntry> Pool; + + /// Record whether the AddressPool has been queried for an address index since + /// the last "resetUsedFlag" call. Used to implement type unit fallback - a + /// type that references addresses cannot be placed in a type unit when using + /// fission. + bool HasBeenUsed; + +public: + AddressPool() : HasBeenUsed(false) {} + + /// \brief Returns the index into the address pool with the given + /// label/symbol. + unsigned getIndex(const MCSymbol *Sym, bool TLS = false); + + void emit(AsmPrinter &Asm, const MCSection *AddrSection); + + bool isEmpty() { return Pool.empty(); } + + bool hasBeenUsed() const { return HasBeenUsed; } + + void resetUsedFlag() { HasBeenUsed = false; } +}; +} +#endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 060e010..424e759 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -11,14 +11,14 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "asm-printer" #include "llvm/CodeGen/AsmPrinter.h" #include "DwarfDebug.h" #include "DwarfException.h" +#include "WinCodeViewLineTables.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ConstantFolding.h" -#include "llvm/Assembly/Writer.h" +#include "llvm/Analysis/JumpInstrTableInfo.h" #include "llvm/CodeGen/GCMetadataPrinter.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -27,8 +27,9 @@ #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/DebugInfo.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/Mangler.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/MC/MCAsmInfo.h" @@ -42,27 +43,29 @@ #include "llvm/Support/Format.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Timer.h" -#include "llvm/Target/Mangler.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Transforms/Utils/GlobalStatus.h" using namespace llvm; +#define DEBUG_TYPE "asm-printer" + static const char *const DWARFGroupName = "DWARF Emission"; -static const char *const DbgTimerName = "DWARF Debug Writer"; +static const char *const DbgTimerName = "Debug Info Emission"; static const char *const EHTimerName = "DWARF Exception Writer"; +static const char *const CodeViewLineTablesGroupName = "CodeView Line Tables"; STATISTIC(EmittedInsts, "Number of machine instrs printed"); char AsmPrinter::ID = 0; -typedef DenseMap<GCStrategy*,GCMetadataPrinter*> gcp_map_type; +typedef DenseMap<GCStrategy*, std::unique_ptr<GCMetadataPrinter>> gcp_map_type; static gcp_map_type &getGCMap(void *&P) { - if (P == 0) + if (!P) P = new gcp_map_type(); return *(gcp_map_type*)P; } @@ -99,23 +102,21 @@ AsmPrinter::AsmPrinter(TargetMachine &tm, MCStreamer &Streamer) TM(tm), MAI(tm.getMCAsmInfo()), MII(tm.getInstrInfo()), OutContext(Streamer.getContext()), OutStreamer(Streamer), - LastMI(0), LastFn(0), Counter(~0U), SetCounter(0) { - DD = 0; DE = 0; MMI = 0; LI = 0; MF = 0; - CurrentFnSym = CurrentFnSymForSize = 0; - GCMetadataPrinters = 0; + LastMI(nullptr), LastFn(0), Counter(~0U), SetCounter(0) { + DD = nullptr; MMI = nullptr; LI = nullptr; MF = nullptr; + CurrentFnSym = CurrentFnSymForSize = nullptr; + GCMetadataPrinters = nullptr; VerboseAsm = Streamer.isVerboseAsm(); } AsmPrinter::~AsmPrinter() { - assert(DD == 0 && DE == 0 && "Debug/EH info didn't get finalized"); + assert(!DD && Handlers.empty() && "Debug/EH info didn't get finalized"); - if (GCMetadataPrinters != 0) { + if (GCMetadataPrinters) { gcp_map_type &GCMap = getGCMap(GCMetadataPrinters); - for (gcp_map_type::iterator I = GCMap.begin(), E = GCMap.end(); I != E; ++I) - delete I->second; delete &GCMap; - GCMetadataPrinters = 0; + GCMetadataPrinters = nullptr; } delete &OutStreamer; @@ -136,6 +137,14 @@ const DataLayout &AsmPrinter::getDataLayout() const { return *TM.getDataLayout(); } +const MCSubtargetInfo &AsmPrinter::getSubtargetInfo() const { + return TM.getSubtarget<MCSubtargetInfo>(); +} + +void AsmPrinter::EmitToStreamer(MCStreamer &S, const MCInst &Inst) { + S.EmitInstruction(Inst, getSubtargetInfo()); +} + StringRef AsmPrinter::getTargetTriple() const { return TM.getTargetTriple(); } @@ -164,9 +173,28 @@ bool AsmPrinter::doInitialization(Module &M) { const_cast<TargetLoweringObjectFile&>(getObjFileLowering()) .Initialize(OutContext, TM); - OutStreamer.InitStreamer(); + OutStreamer.InitSections(); - Mang = new Mangler(&TM); + Mang = new Mangler(TM.getDataLayout()); + + // Emit the version-min deplyment target directive if needed. + // + // FIXME: If we end up with a collection of these sorts of Darwin-specific + // or ELF-specific things, it may make sense to have a platform helper class + // that will work with the target helper class. For now keep it here, as the + // alternative is duplicated code in each of the target asm printers that + // use the directive, where it would need the same conditionalization + // anyway. + Triple TT(getTargetTriple()); + if (TT.isOSDarwin()) { + unsigned Major, Minor, Update; + TT.getOSVersion(Major, Minor, Update); + // If there is a version specified, Major will be non-zero. + if (Major) + OutStreamer.EmitVersionMin((TT.isMacOSX() ? + MCVM_OSXVersionMin : MCVM_IOSVersionMin), + Major, Minor, Update); + } // Allow the target to emit any magic that it wants at the start of the file. EmitStartOfAsmFile(M); @@ -180,7 +208,7 @@ bool AsmPrinter::doInitialization(Module &M) { GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>(); assert(MI && "AsmPrinter didn't require GCModuleInfo?"); - for (GCModuleInfo::iterator I = MI->begin(), E = MI->end(); I != E; ++I) + for (auto &I : *MI) if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*I)) MP->beginAssembly(*this); @@ -193,25 +221,65 @@ bool AsmPrinter::doInitialization(Module &M) { OutStreamer.AddBlankLine(); } - if (MAI->doesSupportDebugInformation()) - DD = new DwarfDebug(this, &M); + if (MAI->doesSupportDebugInformation()) { + if (Triple(TM.getTargetTriple()).isKnownWindowsMSVCEnvironment()) { + Handlers.push_back(HandlerInfo(new WinCodeViewLineTables(this), + DbgTimerName, + CodeViewLineTablesGroupName)); + } else { + DD = new DwarfDebug(this, &M); + Handlers.push_back(HandlerInfo(DD, DbgTimerName, DWARFGroupName)); + } + } + EHStreamer *ES = nullptr; switch (MAI->getExceptionHandlingType()) { case ExceptionHandling::None: - return false; + break; case ExceptionHandling::SjLj: case ExceptionHandling::DwarfCFI: - DE = new DwarfCFIException(this); - return false; + ES = new DwarfCFIException(this); + break; case ExceptionHandling::ARM: - DE = new ARMException(this); + ES = new ARMException(this); + break; + case ExceptionHandling::WinEH: + ES = new Win64Exception(this); + break; + } + if (ES) + Handlers.push_back(HandlerInfo(ES, EHTimerName, DWARFGroupName)); + return false; +} + +static bool canBeHidden(const GlobalValue *GV, const MCAsmInfo &MAI) { + GlobalValue::LinkageTypes Linkage = GV->getLinkage(); + if (Linkage != GlobalValue::LinkOnceODRLinkage) + return false; + + if (!MAI.hasWeakDefCanBeHiddenDirective()) return false; - case ExceptionHandling::Win64: - DE = new Win64Exception(this); + + if (GV->hasUnnamedAddr()) + return true; + + // This is only used for MachO, so right now it doesn't really matter how + // we handle alias. Revisit this once the MachO linker implements aliases. + if (isa<GlobalAlias>(GV)) return false; + + // If it is a non constant variable, it needs to be uniqued across shared + // objects. + if (const GlobalVariable *Var = dyn_cast<GlobalVariable>(GV)) { + if (!Var->isConstant()) + return false; } - llvm_unreachable("Unknown exception type."); + GlobalStatus GS; + if (!GlobalStatus::analyzeGlobal(GV, GS) && !GS.IsCompared) + return true; + + return false; } void AsmPrinter::EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const { @@ -222,25 +290,11 @@ void AsmPrinter::EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const { case GlobalValue::LinkOnceODRLinkage: case GlobalValue::WeakAnyLinkage: case GlobalValue::WeakODRLinkage: - case GlobalValue::LinkerPrivateWeakLinkage: if (MAI->hasWeakDefDirective()) { // .globl _foo OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global); - bool CanBeHidden = false; - - if (Linkage == GlobalValue::LinkOnceODRLinkage && - MAI->hasWeakDefCanBeHiddenDirective()) { - if (GV->hasUnnamedAddr()) { - CanBeHidden = true; - } else { - GlobalStatus GS; - if (!GlobalStatus::analyzeGlobal(GV, GS) && !GS.IsCompared) - CanBeHidden = true; - } - } - - if (!CanBeHidden) + if (!canBeHidden(GV, *MAI)) // .weak_definition _foo OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefinition); else @@ -254,7 +308,6 @@ void AsmPrinter::EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const { OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Weak); } return; - case GlobalValue::DLLExportLinkage: case GlobalValue::AppendingLinkage: // FIXME: appending linkage variables should go into a section of // their name or something. For now, just emit them as external. @@ -265,19 +318,22 @@ void AsmPrinter::EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const { return; case GlobalValue::PrivateLinkage: case GlobalValue::InternalLinkage: - case GlobalValue::LinkerPrivateLinkage: return; case GlobalValue::AvailableExternallyLinkage: llvm_unreachable("Should never emit this"); - case GlobalValue::DLLImportLinkage: case GlobalValue::ExternalWeakLinkage: llvm_unreachable("Don't know how to emit these"); } llvm_unreachable("Unknown linkage type!"); } +void AsmPrinter::getNameWithPrefix(SmallVectorImpl<char> &Name, + const GlobalValue *GV) const { + TM.getNameWithPrefix(Name, GV, *Mang); +} + MCSymbol *AsmPrinter::getSymbol(const GlobalValue *GV) const { - return getObjFileLowering().getSymbol(*Mang, GV); + return TM.getSymbol(GV, *Mang); } /// EmitGlobalVariable - Emit the specified global variable to the .s file. @@ -288,7 +344,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { return; if (isVerbose()) { - WriteAsOperand(OutStreamer.GetCommentOS(), GV, + GV->printAsOperand(OutStreamer.GetCommentOS(), /*PrintType=*/false, GV->getParent()); OutStreamer.GetCommentOS() << '\n'; } @@ -313,8 +369,10 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { // sections and expected to be contiguous (e.g. ObjC metadata). unsigned AlignLog = getGVAlignmentLog2(GV, *DL); - if (DD) - DD->setSymbolSize(GVSym, Size); + for (const HandlerInfo &HI : Handlers) { + NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, TimePassesIsEnabled); + HI.Handler->setSymbolSize(GVSym, Size); + } // Handle common and BSS local symbols (.lcomm). if (GVKind.isCommon() || GVKind.isBSSLocal()) { @@ -334,7 +392,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { // Handle local BSS symbols. if (MAI->hasMachoZeroFillDirective()) { const MCSection *TheSection = - getObjFileLowering().SectionForGlobal(GV, GVKind, Mang, TM); + getObjFileLowering().SectionForGlobal(GV, GVKind, *Mang, TM); // .zerofill __DATA, __bss, _foo, 400, 5 OutStreamer.EmitZerofill(TheSection, GVSym, Size, Align); return; @@ -363,7 +421,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { } const MCSection *TheSection = - getObjFileLowering().SectionForGlobal(GV, GVKind, Mang, TM); + getObjFileLowering().SectionForGlobal(GV, GVKind, *Mang, TM); // Handle the zerofill directive on darwin, which is a special form of BSS // emission. @@ -454,7 +512,8 @@ void AsmPrinter::EmitFunctionHeader() { // Print the 'header' of function. const Function *F = MF->getFunction(); - OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM)); + OutStreamer.SwitchSection( + getObjFileLowering().SectionForGlobal(F, *Mang, TM)); EmitVisibility(CurrentFnSym, F->getVisibility()); EmitLinkage(F, CurrentFnSym); @@ -464,7 +523,7 @@ void AsmPrinter::EmitFunctionHeader() { OutStreamer.EmitSymbolAttribute(CurrentFnSym, MCSA_ELF_TypeFunction); if (isVerbose()) { - WriteAsOperand(OutStreamer.GetCommentOS(), F, + F->printAsOperand(OutStreamer.GetCommentOS(), /*PrintType=*/false, F->getParent()); OutStreamer.GetCommentOS() << '\n'; } @@ -484,13 +543,9 @@ void AsmPrinter::EmitFunctionHeader() { } // Emit pre-function debug and/or EH information. - if (DE) { - NamedRegionTimer T(EHTimerName, DWARFGroupName, TimePassesIsEnabled); - DE->BeginFunction(MF); - } - if (DD) { - NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); - DD->beginFunction(MF); + for (const HandlerInfo &HI : Handlers) { + NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, TimePassesIsEnabled); + HI.Handler->beginFunction(MF); } // Emit the prefix data. @@ -578,10 +633,9 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { SmallString<128> Str; raw_svector_ostream OS(Str); - OS << '\t' << AP.MAI->getCommentString() << "DEBUG_VALUE: "; + OS << "DEBUG_VALUE: "; - // cast away const; DIetc do not take const operands for some reason. - DIVariable V(const_cast<MDNode*>(MI->getOperand(2).getMetadata())); + DIVariable V(MI->getOperand(2).getMetadata()); if (V.getContext().isSubprogram()) { StringRef Name = DISubprogram(V.getContext()).getDisplayName(); if (!Name.empty()) @@ -627,7 +681,7 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { // Suppress offset, it is not meaningful here. OS << "undef"; // NOTE: Want this comment at start of line, don't emit with AddComment. - AP.OutStreamer.EmitRawText(OS.str()); + AP.OutStreamer.emitRawComment(OS.str()); return true; } if (Deref) @@ -639,7 +693,7 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { OS << '+' << Offset << ']'; // NOTE: Want this comment at start of line, don't emit with AddComment. - AP.OutStreamer.EmitRawText(OS.str()); + AP.OutStreamer.emitRawComment(OS.str()); return true; } @@ -655,18 +709,14 @@ AsmPrinter::CFIMoveType AsmPrinter::needsCFIMoves() { } bool AsmPrinter::needsSEHMoves() { - return MAI->getExceptionHandlingType() == ExceptionHandling::Win64 && + return MAI->getExceptionHandlingType() == ExceptionHandling::WinEH && MF->getFunction()->needsUnwindTableEntry(); } -bool AsmPrinter::needsRelocationsForDwarfStringPool() const { - return MAI->doesDwarfUseRelocationsAcrossSections(); -} - -void AsmPrinter::emitPrologLabel(const MachineInstr &MI) { - const MCSymbol *Label = MI.getOperand(0).getMCSymbol(); - - if (MAI->getExceptionHandlingType() != ExceptionHandling::DwarfCFI) +void AsmPrinter::emitCFIInstruction(const MachineInstr &MI) { + ExceptionHandling ExceptionHandlingType = MAI->getExceptionHandlingType(); + if (ExceptionHandlingType != ExceptionHandling::DwarfCFI && + ExceptionHandlingType != ExceptionHandling::ARM) return; if (needsCFIMoves() == CFI_M_None) @@ -677,16 +727,9 @@ void AsmPrinter::emitPrologLabel(const MachineInstr &MI) { const MachineModuleInfo &MMI = MF->getMMI(); const std::vector<MCCFIInstruction> &Instrs = MMI.getFrameInstructions(); - bool FoundOne = false; - (void)FoundOne; - for (std::vector<MCCFIInstruction>::const_iterator I = Instrs.begin(), - E = Instrs.end(); I != E; ++I) { - if (I->getLabel() == Label) { - emitCFIInstruction(*I); - FoundOne = true; - } - } - assert(FoundOne); + unsigned CFIIndex = MI.getOperand(0).getCFIIndex(); + const MCCFIInstruction &CFI = Instrs[CFIIndex]; + emitCFIInstruction(CFI); } /// EmitFunctionBody - This method emits the body and trailer for a @@ -695,69 +738,70 @@ void AsmPrinter::EmitFunctionBody() { // Emit target-specific gunk before the function body. EmitFunctionBodyStart(); - bool ShouldPrintDebugScopes = DD && MMI->hasDebugInfo(); + bool ShouldPrintDebugScopes = MMI->hasDebugInfo(); // Print out code for the function. bool HasAnyRealCode = false; - const MachineInstr *LastMI = 0; - for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); - I != E; ++I) { + const MachineInstr *LastMI = nullptr; + for (auto &MBB : *MF) { // Print a label for the basic block. - EmitBasicBlockStart(I); - for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); - II != IE; ++II) { - LastMI = II; + EmitBasicBlockStart(MBB); + for (auto &MI : MBB) { + LastMI = &MI; // Print the assembly for the instruction. - if (!II->isLabel() && !II->isImplicitDef() && !II->isKill() && - !II->isDebugValue()) { + if (!MI.isPosition() && !MI.isImplicitDef() && !MI.isKill() && + !MI.isDebugValue()) { HasAnyRealCode = true; ++EmittedInsts; } if (ShouldPrintDebugScopes) { - NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); - DD->beginInstruction(II); + for (const HandlerInfo &HI : Handlers) { + NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, + TimePassesIsEnabled); + HI.Handler->beginInstruction(&MI); + } } if (isVerbose()) - emitComments(*II, OutStreamer.GetCommentOS()); + emitComments(MI, OutStreamer.GetCommentOS()); - switch (II->getOpcode()) { - case TargetOpcode::PROLOG_LABEL: - emitPrologLabel(*II); + switch (MI.getOpcode()) { + case TargetOpcode::CFI_INSTRUCTION: + emitCFIInstruction(MI); break; case TargetOpcode::EH_LABEL: case TargetOpcode::GC_LABEL: - OutStreamer.EmitLabel(II->getOperand(0).getMCSymbol()); + OutStreamer.EmitLabel(MI.getOperand(0).getMCSymbol()); break; case TargetOpcode::INLINEASM: - EmitInlineAsm(II); + EmitInlineAsm(&MI); break; case TargetOpcode::DBG_VALUE: if (isVerbose()) { - if (!emitDebugValueComment(II, *this)) - EmitInstruction(II); + if (!emitDebugValueComment(&MI, *this)) + EmitInstruction(&MI); } break; case TargetOpcode::IMPLICIT_DEF: - if (isVerbose()) emitImplicitDef(II); + if (isVerbose()) emitImplicitDef(&MI); break; case TargetOpcode::KILL: - if (isVerbose()) emitKill(II, *this); + if (isVerbose()) emitKill(&MI, *this); break; default: - if (!TM.hasMCUseLoc()) - MCLineEntry::Make(&OutStreamer, getCurrentSection()); - - EmitInstruction(II); + EmitInstruction(&MI); break; } if (ShouldPrintDebugScopes) { - NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); - DD->endInstruction(II); + for (const HandlerInfo &HI : Handlers) { + NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, + TimePassesIsEnabled); + HI.Handler->endInstruction(); + } } } } @@ -767,7 +811,7 @@ void AsmPrinter::EmitFunctionBody() { // label equaling the end of function label and an invalid "row" in the // FDE. We need to emit a noop in this situation so that the FDE's rows are // valid. - bool RequiresNoop = LastMI && LastMI->isPrologLabel(); + bool RequiresNoop = LastMI && LastMI->isCFIInstruction(); // If the function is empty and the object file uses .subsections_via_symbols, // then we need to emit *something* to the function body to prevent the @@ -777,17 +821,16 @@ void AsmPrinter::EmitFunctionBody() { TM.getInstrInfo()->getNoopForMachoTarget(Noop); if (Noop.getOpcode()) { OutStreamer.AddComment("avoids zero-length function"); - OutStreamer.EmitInstruction(Noop); + OutStreamer.EmitInstruction(Noop, getSubtargetInfo()); } else // Target not mc-ized yet. OutStreamer.EmitRawText(StringRef("\tnop\n")); } const Function *F = MF->getFunction(); - for (Function::const_iterator i = F->begin(), e = F->end(); i != e; ++i) { - const BasicBlock *BB = i; - if (!BB->hasAddressTaken()) + for (const auto &BB : *F) { + if (!BB.hasAddressTaken()) continue; - MCSymbol *Sym = GetBlockAddressSymbol(BB); + MCSymbol *Sym = GetBlockAddressSymbol(&BB); if (Sym->isDefined()) continue; OutStreamer.AddComment("Address of block that was removed by CodeGen"); @@ -813,14 +856,10 @@ void AsmPrinter::EmitFunctionBody() { OutStreamer.EmitELFSize(CurrentFnSym, SizeExp); } - // Emit post-function debug information. - if (DD) { - NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); - DD->endFunction(MF); - } - if (DE) { - NamedRegionTimer T(EHTimerName, DWARFGroupName, TimePassesIsEnabled); - DE->EndFunction(); + // Emit post-function debug and/or EH information. + for (const HandlerInfo &HI : Handlers) { + NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, TimePassesIsEnabled); + HI.Handler->endFunction(MF); } MMI->EndFunction(); @@ -830,65 +869,15 @@ void AsmPrinter::EmitFunctionBody() { OutStreamer.AddBlankLine(); } -/// EmitDwarfRegOp - Emit dwarf register operation. -void AsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc, - bool Indirect) const { - const TargetRegisterInfo *TRI = TM.getRegisterInfo(); - int Reg = TRI->getDwarfRegNum(MLoc.getReg(), false); - - for (MCSuperRegIterator SR(MLoc.getReg(), TRI); SR.isValid() && Reg < 0; - ++SR) { - Reg = TRI->getDwarfRegNum(*SR, false); - // FIXME: Get the bit range this register uses of the superregister - // so that we can produce a DW_OP_bit_piece - } - - // FIXME: Handle cases like a super register being encoded as - // DW_OP_reg 32 DW_OP_piece 4 DW_OP_reg 33 - - // FIXME: We have no reasonable way of handling errors in here. The - // caller might be in the middle of an dwarf expression. We should - // probably assert that Reg >= 0 once debug info generation is more mature. - - if (MLoc.isIndirect() || Indirect) { - if (Reg < 32) { - OutStreamer.AddComment( - dwarf::OperationEncodingString(dwarf::DW_OP_breg0 + Reg)); - EmitInt8(dwarf::DW_OP_breg0 + Reg); - } else { - OutStreamer.AddComment("DW_OP_bregx"); - EmitInt8(dwarf::DW_OP_bregx); - OutStreamer.AddComment(Twine(Reg)); - EmitULEB128(Reg); - } - EmitSLEB128(!MLoc.isIndirect() ? 0 : MLoc.getOffset()); - if (MLoc.isIndirect() && Indirect) - EmitInt8(dwarf::DW_OP_deref); - } else { - if (Reg < 32) { - OutStreamer.AddComment( - dwarf::OperationEncodingString(dwarf::DW_OP_reg0 + Reg)); - EmitInt8(dwarf::DW_OP_reg0 + Reg); - } else { - OutStreamer.AddComment("DW_OP_regx"); - EmitInt8(dwarf::DW_OP_regx); - OutStreamer.AddComment(Twine(Reg)); - EmitULEB128(Reg); - } - } - - // FIXME: Produce a DW_OP_bit_piece if we used a superregister -} +static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP); bool AsmPrinter::doFinalization(Module &M) { // Emit global variables. - for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) - EmitGlobalVariable(I); + for (const auto &G : M.globals()) + EmitGlobalVariable(&G); // Emit visibility info for declarations - for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) { - const Function &F = *I; + for (const Function &F : M) { if (!F.isDeclaration()) continue; GlobalValue::VisibilityTypes V = F.getVisibility(); @@ -899,30 +888,72 @@ bool AsmPrinter::doFinalization(Module &M) { EmitVisibility(Name, V, false); } + // Get information about jump-instruction tables to print. + JumpInstrTableInfo *JITI = getAnalysisIfAvailable<JumpInstrTableInfo>(); + + if (JITI && !JITI->getTables().empty()) { + unsigned Arch = Triple(getTargetTriple()).getArch(); + bool IsThumb = (Arch == Triple::thumb || Arch == Triple::thumbeb); + MCInst TrapInst; + TM.getInstrInfo()->getTrap(TrapInst); + for (const auto &KV : JITI->getTables()) { + uint64_t Count = 0; + for (const auto &FunPair : KV.second) { + // Emit the function labels to make this be a function entry point. + MCSymbol *FunSym = + OutContext.GetOrCreateSymbol(FunPair.second->getName()); + OutStreamer.EmitSymbolAttribute(FunSym, MCSA_Global); + // FIXME: JumpTableInstrInfo should store information about the required + // alignment of table entries and the size of the padding instruction. + EmitAlignment(3); + if (IsThumb) + OutStreamer.EmitThumbFunc(FunSym); + if (MAI->hasDotTypeDotSizeDirective()) + OutStreamer.EmitSymbolAttribute(FunSym, MCSA_ELF_TypeFunction); + OutStreamer.EmitLabel(FunSym); + + // Emit the jump instruction to transfer control to the original + // function. + MCInst JumpToFun; + MCSymbol *TargetSymbol = + OutContext.GetOrCreateSymbol(FunPair.first->getName()); + const MCSymbolRefExpr *TargetSymRef = + MCSymbolRefExpr::Create(TargetSymbol, MCSymbolRefExpr::VK_PLT, + OutContext); + TM.getInstrInfo()->getUnconditionalBranch(JumpToFun, TargetSymRef); + OutStreamer.EmitInstruction(JumpToFun, getSubtargetInfo()); + ++Count; + } + + // Emit enough padding instructions to fill up to the next power of two. + // This assumes that the trap instruction takes 8 bytes or fewer. + uint64_t Remaining = NextPowerOf2(Count) - Count; + for (uint64_t C = 0; C < Remaining; ++C) { + EmitAlignment(3); + OutStreamer.EmitInstruction(TrapInst, getSubtargetInfo()); + } + + } + } + // Emit module flags. SmallVector<Module::ModuleFlagEntry, 8> ModuleFlags; M.getModuleFlagsMetadata(ModuleFlags); if (!ModuleFlags.empty()) - getObjFileLowering().emitModuleFlags(OutStreamer, ModuleFlags, Mang, TM); + getObjFileLowering().emitModuleFlags(OutStreamer, ModuleFlags, *Mang, TM); // Make sure we wrote out everything we need. OutStreamer.Flush(); // Finalize debug and EH information. - if (DE) { - { - NamedRegionTimer T(EHTimerName, DWARFGroupName, TimePassesIsEnabled); - DE->EndModule(); - } - delete DE; DE = 0; - } - if (DD) { - { - NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); - DD->endModule(); - } - delete DD; DD = 0; + for (const HandlerInfo &HI : Handlers) { + NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, + TimePassesIsEnabled); + HI.Handler->endModule(); + delete HI.Handler; } + Handlers.clear(); + DD = nullptr; // If the target wants to know about weak references, print them all. if (MAI->getWeakRefDirective()) { @@ -932,51 +963,43 @@ bool AsmPrinter::doFinalization(Module &M) { // happen with the MC stuff eventually. // Print out module-level global variables here. - for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) { - if (!I->hasExternalWeakLinkage()) continue; - OutStreamer.EmitSymbolAttribute(getSymbol(I), MCSA_WeakReference); + for (const auto &G : M.globals()) { + if (!G.hasExternalWeakLinkage()) + continue; + OutStreamer.EmitSymbolAttribute(getSymbol(&G), MCSA_WeakReference); } - for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) { - if (!I->hasExternalWeakLinkage()) continue; - OutStreamer.EmitSymbolAttribute(getSymbol(I), MCSA_WeakReference); + for (const auto &F : M) { + if (!F.hasExternalWeakLinkage()) + continue; + OutStreamer.EmitSymbolAttribute(getSymbol(&F), MCSA_WeakReference); } } if (MAI->hasSetDirective()) { OutStreamer.AddBlankLine(); - for (Module::const_alias_iterator I = M.alias_begin(), E = M.alias_end(); - I != E; ++I) { - MCSymbol *Name = getSymbol(I); - - const GlobalValue *GV = I->getAliasedGlobal(); - if (GV->isDeclaration()) { - report_fatal_error(Name->getName() + - ": Target doesn't support aliases to declarations"); - } - - MCSymbol *Target = getSymbol(GV); + for (const auto &Alias : M.aliases()) { + MCSymbol *Name = getSymbol(&Alias); - if (I->hasExternalLinkage() || !MAI->getWeakRefDirective()) + if (Alias.hasExternalLinkage() || !MAI->getWeakRefDirective()) OutStreamer.EmitSymbolAttribute(Name, MCSA_Global); - else if (I->hasWeakLinkage() || I->hasLinkOnceLinkage()) + else if (Alias.hasWeakLinkage() || Alias.hasLinkOnceLinkage()) OutStreamer.EmitSymbolAttribute(Name, MCSA_WeakReference); else - assert(I->hasLocalLinkage() && "Invalid alias linkage"); + assert(Alias.hasLocalLinkage() && "Invalid alias linkage"); - EmitVisibility(Name, I->getVisibility()); + EmitVisibility(Name, Alias.getVisibility()); // Emit the directives as assignments aka .set: OutStreamer.EmitAssignment(Name, - MCSymbolRefExpr::Create(Target, OutContext)); + lowerConstant(Alias.getAliasee(), *this)); } } GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>(); assert(MI && "AsmPrinter didn't require GCModuleInfo?"); for (GCModuleInfo::iterator I = MI->end(), E = MI->begin(); I != E; ) - if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*--I)) + if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(**--I)) MP->finishAssembly(*this); // Emit llvm.ident metadata in an '.ident' directive. @@ -993,8 +1016,8 @@ bool AsmPrinter::doFinalization(Module &M) { // after everything else has gone out. EmitEndOfAsmFile(M); - delete Mang; Mang = 0; - MMI = 0; + delete Mang; Mang = nullptr; + MMI = nullptr; OutStreamer.Finish(); OutStreamer.reset(); @@ -1039,23 +1062,13 @@ void AsmPrinter::EmitConstantPool() { const MachineConstantPoolEntry &CPE = CP[i]; unsigned Align = CPE.getAlignment(); - SectionKind Kind; - switch (CPE.getRelocationInfo()) { - default: llvm_unreachable("Unknown section kind"); - case 2: Kind = SectionKind::getReadOnlyWithRel(); break; - case 1: - Kind = SectionKind::getReadOnlyWithRelLocal(); - break; - case 0: - switch (TM.getDataLayout()->getTypeAllocSize(CPE.getType())) { - case 4: Kind = SectionKind::getMergeableConst4(); break; - case 8: Kind = SectionKind::getMergeableConst8(); break; - case 16: Kind = SectionKind::getMergeableConst16();break; - default: Kind = SectionKind::getMergeableConst(); break; - } - } + SectionKind Kind = CPE.getSectionKind(TM.getDataLayout()); - const MCSection *S = getObjFileLowering().getSectionForConstant(Kind); + const Constant *C = nullptr; + if (!CPE.isMachineConstantPoolEntry()) + C = CPE.Val.ConstVal; + + const MCSection *S = getObjFileLowering().getSectionForConstant(Kind, C); // The number of sections are small, just do a linear search from the // last section to the first. @@ -1078,13 +1091,22 @@ void AsmPrinter::EmitConstantPool() { } // Now print stuff into the calculated sections. + const MCSection *CurSection = nullptr; + unsigned Offset = 0; for (unsigned i = 0, e = CPSections.size(); i != e; ++i) { - OutStreamer.SwitchSection(CPSections[i].S); - EmitAlignment(Log2_32(CPSections[i].Alignment)); - - unsigned Offset = 0; for (unsigned j = 0, ee = CPSections[i].CPEs.size(); j != ee; ++j) { unsigned CPI = CPSections[i].CPEs[j]; + MCSymbol *Sym = GetCPISymbol(CPI); + if (!Sym->isUndefined()) + continue; + + if (CurSection != CPSections[i].S) { + OutStreamer.SwitchSection(CPSections[i].S); + EmitAlignment(Log2_32(CPSections[i].Alignment)); + CurSection = CPSections[i].S; + Offset = 0; + } + MachineConstantPoolEntry CPE = CP[CPI]; // Emit inter-object padding for alignment. @@ -1094,8 +1116,8 @@ void AsmPrinter::EmitConstantPool() { Type *Ty = CPE.getType(); Offset = NewOffset + TM.getDataLayout()->getTypeAllocSize(Ty); - OutStreamer.EmitLabel(GetCPISymbol(CPI)); + OutStreamer.EmitLabel(Sym); if (CPE.isMachineConstantPoolEntry()) EmitMachineConstantPoolValue(CPE.Val.MachineCPVal); else @@ -1108,8 +1130,9 @@ void AsmPrinter::EmitConstantPool() { /// by the current function to the current output stream. /// void AsmPrinter::EmitJumpTableInfo() { + const DataLayout *DL = MF->getTarget().getDataLayout(); const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); - if (MJTI == 0) return; + if (!MJTI) return; if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_Inline) return; const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); if (JT.empty()) return; @@ -1127,11 +1150,13 @@ void AsmPrinter::EmitJumpTableInfo() { // FIXME: this isn't the right predicate, should be based on the MCSection // for the function. F->isWeakForLinker()) { - OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F,Mang,TM)); + OutStreamer.SwitchSection( + getObjFileLowering().SectionForGlobal(F, *Mang, TM)); } else { // Otherwise, drop it in the readonly section. const MCSection *ReadOnlySection = - getObjFileLowering().getSectionForConstant(SectionKind::getReadOnly()); + getObjFileLowering().getSectionForConstant(SectionKind::getReadOnly(), + /*C=*/nullptr); OutStreamer.SwitchSection(ReadOnlySection); JTInDiffSection = true; } @@ -1173,7 +1198,7 @@ void AsmPrinter::EmitJumpTableInfo() { // before each jump table. The first label is never referenced, but tells // the assembler and linker the extents of the jump table object. The // second label is actually referenced by the code. - if (JTInDiffSection && MAI->getLinkerPrivateGlobalPrefix()[0]) + if (JTInDiffSection && DL->hasLinkerPrivateGlobalPrefix()) // FIXME: This doesn't have to have any specific name, just any randomly // named and numbered 'l' label would work. Simplify GetJTISymbol. OutStreamer.EmitLabel(GetJTISymbol(JTI, true)); @@ -1193,7 +1218,7 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned UID) const { assert(MBB && MBB->getNumber() >= 0 && "Invalid basic block"); - const MCExpr *Value = 0; + const MCExpr *Value = nullptr; switch (MJTI->getEntryKind()) { case MachineJumpTableInfo::EK_Inline: llvm_unreachable("Cannot emit EK_Inline jump table entry"); @@ -1268,7 +1293,7 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) { } // Ignore debug and non-emitted data. This handles llvm.compiler.used. - if (GV->getSection() == "llvm.metadata" || + if (StringRef(GV->getSection()) == "llvm.metadata" || GV->hasAvailableExternallyLinkage()) return true; @@ -1311,11 +1336,20 @@ void AsmPrinter::EmitLLVMUsedList(const ConstantArray *InitList) { for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) { const GlobalValue *GV = dyn_cast<GlobalValue>(InitList->getOperand(i)->stripPointerCasts()); - if (GV && getObjFileLowering().shouldEmitUsedDirectiveFor(GV, Mang)) + if (GV) OutStreamer.EmitSymbolAttribute(getSymbol(GV), MCSA_NoDeadStrip); } } +namespace { +struct Structor { + Structor() : Priority(0), Func(nullptr), ComdatKey(nullptr) {} + int Priority; + llvm::Constant *Func; + llvm::GlobalValue *ComdatKey; +}; +} // end namespace + /// EmitXXStructorList - Emit the ctor or dtor list taking into account the init /// priority. void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) { @@ -1327,37 +1361,55 @@ void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) { const ConstantArray *InitList = dyn_cast<ConstantArray>(List); if (!InitList) return; // Not an array! StructType *ETy = dyn_cast<StructType>(InitList->getType()->getElementType()); - if (!ETy || ETy->getNumElements() != 2) return; // Not an array of pairs! + // FIXME: Only allow the 3-field form in LLVM 4.0. + if (!ETy || ETy->getNumElements() < 2 || ETy->getNumElements() > 3) + return; // Not an array of two or three elements! if (!isa<IntegerType>(ETy->getTypeAtIndex(0U)) || !isa<PointerType>(ETy->getTypeAtIndex(1U))) return; // Not (int, ptr). + if (ETy->getNumElements() == 3 && !isa<PointerType>(ETy->getTypeAtIndex(2U))) + return; // Not (int, ptr, ptr). // Gather the structors in a form that's convenient for sorting by priority. - typedef std::pair<unsigned, Constant *> Structor; SmallVector<Structor, 8> Structors; - for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) { - ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(i)); + for (Value *O : InitList->operands()) { + ConstantStruct *CS = dyn_cast<ConstantStruct>(O); if (!CS) continue; // Malformed. if (CS->getOperand(1)->isNullValue()) break; // Found a null terminator, skip the rest. ConstantInt *Priority = dyn_cast<ConstantInt>(CS->getOperand(0)); if (!Priority) continue; // Malformed. - Structors.push_back(std::make_pair(Priority->getLimitedValue(65535), - CS->getOperand(1))); + Structors.push_back(Structor()); + Structor &S = Structors.back(); + S.Priority = Priority->getLimitedValue(65535); + S.Func = CS->getOperand(1); + if (ETy->getNumElements() == 3 && !CS->getOperand(2)->isNullValue()) + S.ComdatKey = dyn_cast<GlobalValue>(CS->getOperand(2)->stripPointerCasts()); } // Emit the function pointers in the target-specific order const DataLayout *DL = TM.getDataLayout(); unsigned Align = Log2_32(DL->getPointerPrefAlignment()); - std::stable_sort(Structors.begin(), Structors.end(), less_first()); - for (unsigned i = 0, e = Structors.size(); i != e; ++i) { + std::stable_sort(Structors.begin(), Structors.end(), + [](const Structor &L, + const Structor &R) { return L.Priority < R.Priority; }); + for (Structor &S : Structors) { + const TargetLoweringObjectFile &Obj = getObjFileLowering(); + const MCSymbol *KeySym = nullptr; + if (GlobalValue *GV = S.ComdatKey) { + if (GV->hasAvailableExternallyLinkage()) + // If the associated variable is available_externally, some other TU + // will provide its dynamic initializer. + continue; + + KeySym = getSymbol(GV); + } const MCSection *OutputSection = - (isCtor ? - getObjFileLowering().getStaticCtorSection(Structors[i].first) : - getObjFileLowering().getStaticDtorSection(Structors[i].first)); + (isCtor ? Obj.getStaticCtorSection(S.Priority, KeySym) + : Obj.getStaticDtorSection(S.Priority, KeySym)); OutStreamer.SwitchSection(OutputSection); if (OutStreamer.getCurrentSection() != OutStreamer.getPreviousSection()) EmitAlignment(Align); - EmitXXStructor(Structors[i].second); + EmitXXStructor(S.Func); } } @@ -1368,7 +1420,7 @@ void AsmPrinter::EmitModuleIdents(Module &M) { if (const NamedMDNode *NMD = M.getNamedMetadata("llvm.ident")) { for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { const MDNode *N = NMD->getOperand(i); - assert(N->getNumOperands() == 1 && + assert(N->getNumOperands() == 1 && "llvm.ident metadata entry can have only one operand"); const MDString *S = cast<MDString>(N->getOperand(0)); OutStreamer.EmitIdent(S->getString()); @@ -1424,8 +1476,8 @@ void AsmPrinter::EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo, /// where the size in bytes of the directive is specified by Size and Hi/Lo /// specify the labels. This implicitly uses .set if it is available. void AsmPrinter::EmitLabelOffsetDifference(const MCSymbol *Hi, uint64_t Offset, - const MCSymbol *Lo, unsigned Size) - const { + const MCSymbol *Lo, + unsigned Size) const { // Emit Hi+Offset - Lo // Get the Hi+Offset expression. @@ -1454,8 +1506,8 @@ void AsmPrinter::EmitLabelOffsetDifference(const MCSymbol *Hi, uint64_t Offset, /// where the size in bytes of the directive is specified by Size and Label /// specifies the label. This implicitly uses .set if it is available. void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset, - unsigned Size, bool IsSectionRelative) - const { + unsigned Size, + bool IsSectionRelative) const { if (MAI->needsDwarfSectionOffsetDirective() && IsSectionRelative) { OutStreamer.EmitCOFFSecRel32(Label); return; @@ -1464,14 +1516,12 @@ void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset, // Emit Label+Offset (or just Label if Offset is zero) const MCExpr *Expr = MCSymbolRefExpr::Create(Label, OutContext); if (Offset) - Expr = MCBinaryExpr::CreateAdd(Expr, - MCConstantExpr::Create(Offset, OutContext), - OutContext); + Expr = MCBinaryExpr::CreateAdd( + Expr, MCConstantExpr::Create(Offset, OutContext), OutContext); OutStreamer.EmitValue(Expr, Size); } - //===----------------------------------------------------------------------===// // EmitAlignment - Emit an alignment directive to the specified power of @@ -1480,7 +1530,7 @@ void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset, // an explicit alignment requested, it will override the alignment request // if required for correctness. // -void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalValue *GV) const { +void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalObject *GV) const { if (GV) NumBits = getGVAlignmentLog2(GV, *TM.getDataLayout(), NumBits); if (NumBits == 0) return; // 1-byte aligned: no need to emit alignment. @@ -1488,7 +1538,7 @@ void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalValue *GV) const { if (getCurrentSection()->getKind().isText()) OutStreamer.EmitCodeAlignment(1 << NumBits); else - OutStreamer.EmitValueToAlignment(1 << NumBits, 0, 1, 0); + OutStreamer.EmitValueToAlignment(1 << NumBits); } //===----------------------------------------------------------------------===// @@ -1513,10 +1563,15 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) { return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx); const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV); - if (CE == 0) { + if (!CE) { llvm_unreachable("Unknown constant value to lower!"); } + if (const MCExpr *RelocExpr = + AP.getObjFileLowering().getExecutableRelativeSymbol(CE, *AP.Mang, + AP.TM)) + return RelocExpr; + switch (CE->getOpcode()) { default: // If the code isn't optimized, there may be outstanding folding @@ -1532,8 +1587,8 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) { std::string S; raw_string_ostream OS(S); OS << "Unsupported expression in static initializer: "; - WriteAsOperand(OS, CE, /*PrintType=*/false, - !AP.MF ? 0 : AP.MF->getFunction()->getParent()); + CE->printAsOperand(OS, /*PrintType=*/false, + !AP.MF ? nullptr : AP.MF->getFunction()->getParent()); report_fatal_error(OS.str()); } case Instruction::GetElementPtr: { @@ -1810,7 +1865,10 @@ static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) { SmallString<8> StrVal; CFP->getValueAPF().toString(StrVal); - CFP->getType()->print(AP.OutStreamer.GetCommentOS()); + if (CFP->getType()) + CFP->getType()->print(AP.OutStreamer.GetCommentOS()); + else + AP.OutStreamer.GetCommentOS() << "Printing <null> Type"; AP.OutStreamer.GetCommentOS() << ' ' << StrVal << '\n'; } @@ -1823,7 +1881,8 @@ static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) { // PPC's long double has odd notions of endianness compared to how LLVM // handles it: p[0] goes first for *big* endian on PPC. - if (AP.TM.getDataLayout()->isBigEndian() != CFP->getType()->isPPC_FP128Ty()) { + if (AP.TM.getDataLayout()->isBigEndian() && + !CFP->getType()->isPPC_FP128Ty()) { int Chunk = API.getNumWords() - 1; if (TrailingBytes) @@ -1996,15 +2055,17 @@ void AsmPrinter::printOffset(int64_t Offset, raw_ostream &OS) const { /// GetTempSymbol - Return the MCSymbol corresponding to the assembler /// temporary label with the specified stem and unique ID. -MCSymbol *AsmPrinter::GetTempSymbol(StringRef Name, unsigned ID) const { - return OutContext.GetOrCreateSymbol(Twine(MAI->getPrivateGlobalPrefix()) + +MCSymbol *AsmPrinter::GetTempSymbol(Twine Name, unsigned ID) const { + const DataLayout *DL = TM.getDataLayout(); + return OutContext.GetOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix()) + Name + Twine(ID)); } /// GetTempSymbol - Return an assembler temporary label with the specified /// stem. -MCSymbol *AsmPrinter::GetTempSymbol(StringRef Name) const { - return OutContext.GetOrCreateSymbol(Twine(MAI->getPrivateGlobalPrefix())+ +MCSymbol *AsmPrinter::GetTempSymbol(Twine Name) const { + const DataLayout *DL = TM.getDataLayout(); + return OutContext.GetOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix())+ Name); } @@ -2019,8 +2080,9 @@ MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BasicBlock *BB) const { /// GetCPISymbol - Return the symbol for the specified constant pool entry. MCSymbol *AsmPrinter::GetCPISymbol(unsigned CPID) const { + const DataLayout *DL = TM.getDataLayout(); return OutContext.GetOrCreateSymbol - (Twine(MAI->getPrivateGlobalPrefix()) + "CPI" + Twine(getFunctionNumber()) + (Twine(DL->getPrivateGlobalPrefix()) + "CPI" + Twine(getFunctionNumber()) + "_" + Twine(CPID)); } @@ -2032,21 +2094,16 @@ MCSymbol *AsmPrinter::GetJTISymbol(unsigned JTID, bool isLinkerPrivate) const { /// GetJTSetSymbol - Return the symbol for the specified jump table .set /// FIXME: privatize to AsmPrinter. MCSymbol *AsmPrinter::GetJTSetSymbol(unsigned UID, unsigned MBBID) const { + const DataLayout *DL = TM.getDataLayout(); return OutContext.GetOrCreateSymbol - (Twine(MAI->getPrivateGlobalPrefix()) + Twine(getFunctionNumber()) + "_" + + (Twine(DL->getPrivateGlobalPrefix()) + Twine(getFunctionNumber()) + "_" + Twine(UID) + "_set_" + Twine(MBBID)); } -/// GetSymbolWithGlobalValueBase - Return the MCSymbol for a symbol with -/// global value name as its base, with the specified suffix, and where the -/// symbol is forced to have private linkage if ForcePrivate is true. -MCSymbol *AsmPrinter::GetSymbolWithGlobalValueBase(const GlobalValue *GV, - StringRef Suffix, - bool ForcePrivate) const { - SmallString<60> NameStr; - Mang->getNameWithPrefix(NameStr, GV, ForcePrivate); - NameStr.append(Suffix.begin(), Suffix.end()); - return OutContext.GetOrCreateSymbol(NameStr.str()); +MCSymbol *AsmPrinter::getSymbolWithGlobalValueBase(const GlobalValue *GV, + StringRef Suffix) const { + return getObjFileLowering().getSymbolWithGlobalValueBase(GV, Suffix, *Mang, + TM); } /// GetExternalSymbolSymbol - Return the MCSymbol for the specified @@ -2062,7 +2119,7 @@ MCSymbol *AsmPrinter::GetExternalSymbolSymbol(StringRef Sym) const { /// PrintParentLoopComment - Print comments about parent loops of this one. static void PrintParentLoopComment(raw_ostream &OS, const MachineLoop *Loop, unsigned FunctionNumber) { - if (Loop == 0) return; + if (!Loop) return; PrintParentLoopComment(OS, Loop->getParentLoop(), FunctionNumber); OS.indent(Loop->getLoopDepth()*2) << "Parent Loop BB" << FunctionNumber << "_" @@ -2076,12 +2133,12 @@ static void PrintParentLoopComment(raw_ostream &OS, const MachineLoop *Loop, static void PrintChildLoopComment(raw_ostream &OS, const MachineLoop *Loop, unsigned FunctionNumber) { // Add child loop information - for (MachineLoop::iterator CL = Loop->begin(), E = Loop->end();CL != E; ++CL){ - OS.indent((*CL)->getLoopDepth()*2) + for (const MachineLoop *CL : *Loop) { + OS.indent(CL->getLoopDepth()*2) << "Child Loop BB" << FunctionNumber << "_" - << (*CL)->getHeader()->getNumber() << " Depth " << (*CL)->getLoopDepth() + << CL->getHeader()->getNumber() << " Depth " << CL->getLoopDepth() << '\n'; - PrintChildLoopComment(OS, *CL, FunctionNumber); + PrintChildLoopComment(OS, CL, FunctionNumber); } } @@ -2091,7 +2148,7 @@ static void emitBasicBlockLoopComments(const MachineBasicBlock &MBB, const AsmPrinter &AP) { // Add loop depth information const MachineLoop *Loop = LI->getLoopFor(&MBB); - if (Loop == 0) return; + if (!Loop) return; MachineBasicBlock *Header = Loop->getHeader(); assert(Header && "No header for loop"); @@ -2127,43 +2184,41 @@ static void emitBasicBlockLoopComments(const MachineBasicBlock &MBB, /// EmitBasicBlockStart - This method prints the label for the specified /// MachineBasicBlock, an alignment (if present) and a comment describing /// it if appropriate. -void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const { +void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const { // Emit an alignment directive for this block, if needed. - if (unsigned Align = MBB->getAlignment()) + if (unsigned Align = MBB.getAlignment()) EmitAlignment(Align); // If the block has its address taken, emit any labels that were used to // reference the block. It is possible that there is more than one label // here, because multiple LLVM BB's may have been RAUW'd to this block after // the references were generated. - if (MBB->hasAddressTaken()) { - const BasicBlock *BB = MBB->getBasicBlock(); + if (MBB.hasAddressTaken()) { + const BasicBlock *BB = MBB.getBasicBlock(); if (isVerbose()) OutStreamer.AddComment("Block address taken"); - std::vector<MCSymbol*> Syms = MMI->getAddrLabelSymbolToEmit(BB); - - for (unsigned i = 0, e = Syms.size(); i != e; ++i) - OutStreamer.EmitLabel(Syms[i]); + std::vector<MCSymbol*> Symbols = MMI->getAddrLabelSymbolToEmit(BB); + for (auto *Sym : Symbols) + OutStreamer.EmitLabel(Sym); } // Print some verbose block comments. if (isVerbose()) { - if (const BasicBlock *BB = MBB->getBasicBlock()) + if (const BasicBlock *BB = MBB.getBasicBlock()) if (BB->hasName()) OutStreamer.AddComment("%" + BB->getName()); - emitBasicBlockLoopComments(*MBB, LI, *this); + emitBasicBlockLoopComments(MBB, LI, *this); } // Print the main label for the block. - if (MBB->pred_empty() || isBlockOnlyReachableByFallthrough(MBB)) { - if (isVerbose() && OutStreamer.hasRawTextSupport()) { + if (MBB.pred_empty() || isBlockOnlyReachableByFallthrough(&MBB)) { + if (isVerbose()) { // NOTE: Want this comment at start of line, don't emit with AddComment. - OutStreamer.EmitRawText(Twine(MAI->getCommentString()) + " BB#" + - Twine(MBB->getNumber()) + ":"); + OutStreamer.emitRawComment(" BB#" + Twine(MBB.getNumber()) + ":", false); } } else { - OutStreamer.EmitLabel(MBB->getSymbol()); + OutStreamer.EmitLabel(MBB.getSymbol()); } } @@ -2199,14 +2254,11 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const { return false; // If there isn't exactly one predecessor, it can't be a fall through. - MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PI2 = PI; - ++PI2; - if (PI2 != MBB->pred_end()) + if (MBB->pred_size() > 1) return false; // The predecessor has to be immediately before this block. - MachineBasicBlock *Pred = *PI; - + MachineBasicBlock *Pred = *MBB->pred_begin(); if (!Pred->isLayoutSuccessor(MBB)) return false; @@ -2215,10 +2267,7 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const { return true; // Check the terminators in the previous blocks - for (MachineBasicBlock::iterator II = Pred->getFirstTerminator(), - IE = Pred->end(); II != IE; ++II) { - MachineInstr &MI = *II; - + for (const auto &MI : Pred->terminators()) { // If it is not a simple branch, we are in a table somewhere. if (!MI.isBranch() || MI.isIndirectBranch()) return false; @@ -2239,26 +2288,29 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const { -GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy *S) { - if (!S->usesMetadata()) - return 0; +GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy &S) { + if (!S.usesMetadata()) + return nullptr; gcp_map_type &GCMap = getGCMap(GCMetadataPrinters); - gcp_map_type::iterator GCPI = GCMap.find(S); + gcp_map_type::iterator GCPI = GCMap.find(&S); if (GCPI != GCMap.end()) - return GCPI->second; + return GCPI->second.get(); - const char *Name = S->getName().c_str(); + const char *Name = S.getName().c_str(); for (GCMetadataPrinterRegistry::iterator I = GCMetadataPrinterRegistry::begin(), E = GCMetadataPrinterRegistry::end(); I != E; ++I) if (strcmp(Name, I->getName()) == 0) { - GCMetadataPrinter *GMP = I->instantiate(); - GMP->S = S; - GCMap.insert(std::make_pair(S, GMP)); - return GMP; + std::unique_ptr<GCMetadataPrinter> GMP = I->instantiate(); + GMP->S = &S; + auto IterBool = GCMap.insert(std::make_pair(&S, std::move(GMP))); + return IterBool.first->second.get(); } report_fatal_error("no GCMetadataPrinter registered for GC: " + Twine(Name)); } + +/// Pin vtable to this file. +AsmPrinterHandler::~AsmPrinterHandler() {} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index b92f49c..02cd12b 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -11,8 +11,9 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "asm-printer" +#include "ByteStreamer.h" #include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/Twine.h" #include "llvm/IR/DataLayout.h" #include "llvm/MC/MCAsmInfo.h" @@ -28,6 +29,8 @@ #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; +#define DEBUG_TYPE "asm-printer" + //===----------------------------------------------------------------------===// // Dwarf Emission Helper Routines //===----------------------------------------------------------------------===// @@ -52,9 +55,9 @@ void AsmPrinter::EmitULEB128(uint64_t Value, const char *Desc, /// EmitCFAByte - Emit a .byte 42 directive for a DW_CFA_xxx value. void AsmPrinter::EmitCFAByte(unsigned Val) const { if (isVerbose()) { - if (Val >= dwarf::DW_CFA_offset && Val < dwarf::DW_CFA_offset+64) + if (Val >= dwarf::DW_CFA_offset && Val < dwarf::DW_CFA_offset + 64) OutStreamer.AddComment("DW_CFA_offset + Reg (" + - Twine(Val-dwarf::DW_CFA_offset) + ")"); + Twine(Val - dwarf::DW_CFA_offset) + ")"); else OutStreamer.AddComment(dwarf::CallFrameString(Val)); } @@ -63,43 +66,56 @@ void AsmPrinter::EmitCFAByte(unsigned Val) const { static const char *DecodeDWARFEncoding(unsigned Encoding) { switch (Encoding) { - case dwarf::DW_EH_PE_absptr: return "absptr"; - case dwarf::DW_EH_PE_omit: return "omit"; - case dwarf::DW_EH_PE_pcrel: return "pcrel"; - case dwarf::DW_EH_PE_udata4: return "udata4"; - case dwarf::DW_EH_PE_udata8: return "udata8"; - case dwarf::DW_EH_PE_sdata4: return "sdata4"; - case dwarf::DW_EH_PE_sdata8: return "sdata8"; - case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata4: return "pcrel udata4"; - case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4: return "pcrel sdata4"; - case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8: return "pcrel udata8"; - case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8: return "pcrel sdata8"; - case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_udata4: + case dwarf::DW_EH_PE_absptr: + return "absptr"; + case dwarf::DW_EH_PE_omit: + return "omit"; + case dwarf::DW_EH_PE_pcrel: + return "pcrel"; + case dwarf::DW_EH_PE_udata4: + return "udata4"; + case dwarf::DW_EH_PE_udata8: + return "udata8"; + case dwarf::DW_EH_PE_sdata4: + return "sdata4"; + case dwarf::DW_EH_PE_sdata8: + return "sdata8"; + case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata4: + return "pcrel udata4"; + case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4: + return "pcrel sdata4"; + case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8: + return "pcrel udata8"; + case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8: + return "pcrel sdata8"; + case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata4 + : return "indirect pcrel udata4"; - case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_sdata4: + case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4 + : return "indirect pcrel sdata4"; - case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_udata8: + case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8 + : return "indirect pcrel udata8"; - case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_sdata8: + case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8 + : return "indirect pcrel sdata8"; } return "<unknown encoding>"; } - /// EmitEncodingByte - Emit a .byte 42 directive that corresponds to an /// encoding. If verbose assembly output is enabled, we output comments /// describing the encoding. Desc is an optional string saying what the /// encoding is specifying (e.g. "LSDA"). void AsmPrinter::EmitEncodingByte(unsigned Val, const char *Desc) const { if (isVerbose()) { - if (Desc != 0) - OutStreamer.AddComment(Twine(Desc)+" Encoding = " + + if (Desc) + OutStreamer.AddComment(Twine(Desc) + " Encoding = " + Twine(DecodeDWARFEncoding(Val))); else - OutStreamer.AddComment(Twine("Encoding = ") + - DecodeDWARFEncoding(Val)); + OutStreamer.AddComment(Twine("Encoding = ") + DecodeDWARFEncoding(Val)); } OutStreamer.EmitIntValue(Val, 1); @@ -111,11 +127,16 @@ unsigned AsmPrinter::GetSizeOfEncodedValue(unsigned Encoding) const { return 0; switch (Encoding & 0x07) { - default: llvm_unreachable("Invalid encoded value."); - case dwarf::DW_EH_PE_absptr: return TM.getDataLayout()->getPointerSize(); - case dwarf::DW_EH_PE_udata2: return 2; - case dwarf::DW_EH_PE_udata4: return 4; - case dwarf::DW_EH_PE_udata8: return 8; + default: + llvm_unreachable("Invalid encoded value."); + case dwarf::DW_EH_PE_absptr: + return TM.getDataLayout()->getPointerSize(); + case dwarf::DW_EH_PE_udata2: + return 2; + case dwarf::DW_EH_PE_udata4: + return 4; + case dwarf::DW_EH_PE_udata8: + return 8; } } @@ -125,7 +146,7 @@ void AsmPrinter::EmitTTypeReference(const GlobalValue *GV, const TargetLoweringObjectFile &TLOF = getObjFileLowering(); const MCExpr *Exp = - TLOF.getTTypeGlobalReference(GV, Mang, MMI, Encoding, OutStreamer); + TLOF.getTTypeGlobalReference(GV, Encoding, *Mang, TM, MMI, OutStreamer); OutStreamer.EmitValue(Exp, GetSizeOfEncodedValue(Encoding)); } else OutStreamer.EmitIntValue(0, GetSizeOfEncodedValue(Encoding)); @@ -165,6 +186,177 @@ void AsmPrinter::EmitSectionOffset(const MCSymbol *Label, EmitLabelDifference(Label, SectionLabel, 4); } +/// Emit a dwarf register operation. +static void emitDwarfRegOp(ByteStreamer &Streamer, int Reg) { + assert(Reg >= 0); + if (Reg < 32) { + Streamer.EmitInt8(dwarf::DW_OP_reg0 + Reg, + dwarf::OperationEncodingString(dwarf::DW_OP_reg0 + Reg)); + } else { + Streamer.EmitInt8(dwarf::DW_OP_regx, "DW_OP_regx"); + Streamer.EmitULEB128(Reg, Twine(Reg)); + } +} + +/// Emit an (double-)indirect dwarf register operation. +static void emitDwarfRegOpIndirect(ByteStreamer &Streamer, int Reg, int Offset, + bool Deref) { + assert(Reg >= 0); + if (Reg < 32) { + Streamer.EmitInt8(dwarf::DW_OP_breg0 + Reg, + dwarf::OperationEncodingString(dwarf::DW_OP_breg0 + Reg)); + } else { + Streamer.EmitInt8(dwarf::DW_OP_bregx, "DW_OP_bregx"); + Streamer.EmitULEB128(Reg, Twine(Reg)); + } + Streamer.EmitSLEB128(Offset); + if (Deref) + Streamer.EmitInt8(dwarf::DW_OP_deref, "DW_OP_deref"); +} + +/// Emit a dwarf register operation for describing +/// - a small value occupying only part of a register or +/// - a small register representing only part of a value. +static void emitDwarfOpPiece(ByteStreamer &Streamer, unsigned SizeInBits, + unsigned OffsetInBits) { + assert(SizeInBits > 0 && "zero-sized piece"); + unsigned SizeOfByte = 8; + if (OffsetInBits > 0 || SizeInBits % SizeOfByte) { + Streamer.EmitInt8(dwarf::DW_OP_bit_piece, "DW_OP_bit_piece"); + Streamer.EmitULEB128(SizeInBits, Twine(SizeInBits)); + Streamer.EmitULEB128(OffsetInBits, Twine(OffsetInBits)); + } else { + Streamer.EmitInt8(dwarf::DW_OP_piece, "DW_OP_piece"); + unsigned ByteSize = SizeInBits / SizeOfByte; + Streamer.EmitULEB128(ByteSize, Twine(ByteSize)); + } +} + +/// Emit a shift-right dwarf expression. +static void emitDwarfOpShr(ByteStreamer &Streamer, + unsigned ShiftBy) { + Streamer.EmitInt8(dwarf::DW_OP_constu, "DW_OP_constu"); + Streamer.EmitULEB128(ShiftBy); + Streamer.EmitInt8(dwarf::DW_OP_shr, "DW_OP_shr"); +} + +// Some targets do not provide a DWARF register number for every +// register. This function attempts to emit a DWARF register by +// emitting a piece of a super-register or by piecing together +// multiple subregisters that alias the register. +void AsmPrinter::EmitDwarfRegOpPiece(ByteStreamer &Streamer, + const MachineLocation &MLoc, + unsigned PieceSizeInBits, + unsigned PieceOffsetInBits) const { + assert(MLoc.isReg() && "MLoc must be a register"); + const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + int Reg = TRI->getDwarfRegNum(MLoc.getReg(), false); + + // If this is a valid register number, emit it. + if (Reg >= 0) { + emitDwarfRegOp(Streamer, Reg); + emitDwarfOpPiece(Streamer, PieceSizeInBits, PieceOffsetInBits); + return; + } + + // Walk up the super-register chain until we find a valid number. + // For example, EAX on x86_64 is a 32-bit piece of RAX with offset 0. + for (MCSuperRegIterator SR(MLoc.getReg(), TRI); SR.isValid(); ++SR) { + Reg = TRI->getDwarfRegNum(*SR, false); + if (Reg >= 0) { + unsigned Idx = TRI->getSubRegIndex(*SR, MLoc.getReg()); + unsigned Size = TRI->getSubRegIdxSize(Idx); + unsigned Offset = TRI->getSubRegIdxOffset(Idx); + OutStreamer.AddComment("super-register"); + emitDwarfRegOp(Streamer, Reg); + if (PieceOffsetInBits == Offset) { + emitDwarfOpPiece(Streamer, Size, Offset); + } else { + // If this is part of a variable in a sub-register at a + // non-zero offset, we need to manually shift the value into + // place, since the DW_OP_piece describes the part of the + // variable, not the position of the subregister. + emitDwarfOpPiece(Streamer, Size, PieceOffsetInBits); + if (Offset) + emitDwarfOpShr(Streamer, Offset); + } + return; + } + } + + // Otherwise, attempt to find a covering set of sub-register numbers. + // For example, Q0 on ARM is a composition of D0+D1. + // + // Keep track of the current position so we can emit the more + // efficient DW_OP_piece. + unsigned CurPos = PieceOffsetInBits; + // The size of the register in bits, assuming 8 bits per byte. + unsigned RegSize = TRI->getMinimalPhysRegClass(MLoc.getReg())->getSize() * 8; + // Keep track of the bits in the register we already emitted, so we + // can avoid emitting redundant aliasing subregs. + SmallBitVector Coverage(RegSize, false); + for (MCSubRegIterator SR(MLoc.getReg(), TRI); SR.isValid(); ++SR) { + unsigned Idx = TRI->getSubRegIndex(MLoc.getReg(), *SR); + unsigned Size = TRI->getSubRegIdxSize(Idx); + unsigned Offset = TRI->getSubRegIdxOffset(Idx); + Reg = TRI->getDwarfRegNum(*SR, false); + + // Intersection between the bits we already emitted and the bits + // covered by this subregister. + SmallBitVector Intersection(RegSize, false); + Intersection.set(Offset, Offset + Size); + Intersection ^= Coverage; + + // If this sub-register has a DWARF number and we haven't covered + // its range, emit a DWARF piece for it. + if (Reg >= 0 && Intersection.any()) { + OutStreamer.AddComment("sub-register"); + emitDwarfRegOp(Streamer, Reg); + emitDwarfOpPiece(Streamer, Size, Offset == CurPos ? 0 : Offset); + CurPos = Offset + Size; + + // Mark it as emitted. + Coverage.set(Offset, Offset + Size); + } + } + + if (CurPos == PieceOffsetInBits) { + // FIXME: We have no reasonable way of handling errors in here. + Streamer.EmitInt8(dwarf::DW_OP_nop, + "nop (could not find a dwarf register number)"); + } +} + +/// EmitDwarfRegOp - Emit dwarf register operation. +void AsmPrinter::EmitDwarfRegOp(ByteStreamer &Streamer, + const MachineLocation &MLoc, + bool Indirect) const { + const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + int Reg = TRI->getDwarfRegNum(MLoc.getReg(), false); + if (Reg < 0) { + // We assume that pointers are always in an addressable register. + if (Indirect || MLoc.isIndirect()) { + // FIXME: We have no reasonable way of handling errors in here. The + // caller might be in the middle of a dwarf expression. We should + // probably assert that Reg >= 0 once debug info generation is more + // mature. + Streamer.EmitInt8(dwarf::DW_OP_nop, + "nop (invalid dwarf register number for indirect loc)"); + return; + } + + // Attempt to find a valid super- or sub-register. + return EmitDwarfRegOpPiece(Streamer, MLoc); + } + + if (MLoc.isIndirect()) + emitDwarfRegOpIndirect(Streamer, Reg, MLoc.getOffset(), Indirect); + else if (Indirect) + emitDwarfRegOpIndirect(Streamer, Reg, 0, false); + else + emitDwarfRegOp(Streamer, Reg); +} + //===----------------------------------------------------------------------===// // Dwarf Lowering Routines //===----------------------------------------------------------------------===// @@ -191,5 +383,8 @@ void AsmPrinter::emitCFIInstruction(const MCCFIInstruction &Inst) const { case MCCFIInstruction::OpWindowSave: OutStreamer.EmitCFIWindowSave(); break; + case MCCFIInstruction::OpSameValue: + OutStreamer.EmitCFISameValue(Inst.getRegister()); + break; } } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h new file mode 100644 index 0000000..2825367 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h @@ -0,0 +1,57 @@ +//===-- lib/CodeGen/AsmPrinter/AsmPrinterHandler.h -------------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a generic interface for AsmPrinter handlers, +// like debug and EH info emitters. +// +//===----------------------------------------------------------------------===// + +#ifndef CODEGEN_ASMPRINTER_ASMPRINTERHANDLER_H__ +#define CODEGEN_ASMPRINTER_ASMPRINTERHANDLER_H__ + +#include "llvm/Support/DataTypes.h" + +namespace llvm { + +class MachineFunction; +class MachineInstr; +class MCSymbol; + +/// \brief Collects and handles AsmPrinter objects required to build debug +/// or EH information. +class AsmPrinterHandler { +public: + virtual ~AsmPrinterHandler(); + + /// \brief For symbols that have a size designated (e.g. common symbols), + /// this tracks that size. + virtual void setSymbolSize(const MCSymbol *Sym, uint64_t Size) = 0; + + /// \brief Emit all sections that should come after the content. + virtual void endModule() = 0; + + /// \brief Gather pre-function debug information. + /// Every beginFunction(MF) call should be followed by an endFunction(MF) + /// call. + virtual void beginFunction(const MachineFunction *MF) = 0; + + /// \brief Gather post-function debug information. + /// Please note that some AsmPrinter implementations may not call + /// beginFunction at all. + virtual void endFunction(const MachineFunction *MF) = 0; + + /// \brief Process beginning of an instruction. + virtual void beginInstruction(const MachineInstr *MI) = 0; + + /// \brief Process end of an instruction. + virtual void endInstruction() = 0; +}; +} // End of namespace llvm + +#endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index 4f927f6..46ee0c8 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -11,14 +11,14 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "asm-printer" #include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" @@ -33,8 +33,11 @@ #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; +#define DEBUG_TYPE "asm-printer" + namespace { struct SrcMgrDiagInfo { const MDNode *LocInfo; @@ -77,11 +80,17 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode, if (isNullTerminated) Str = Str.substr(0, Str.size()-1); - // If the output streamer is actually a .s file, just emit the blob textually. + // If the output streamer does not have mature MC support or the integrated + // assembler has been disabled, just emit the blob textually. + // Otherwise parse the asm and emit it via MC support. // This is useful in case the asm parser doesn't handle something but the // system assembler does. - if (OutStreamer.hasRawTextSupport()) { + const MCAsmInfo *MCAI = TM.getMCAsmInfo(); + assert(MCAI && "No MCAsmInfo"); + if (!MCAI->useIntegratedAssembler() && + !OutStreamer.isIntegratedAssemblerRequired()) { OutStreamer.EmitRawText(Str); + emitInlineAsmEnd(TM.getSubtarget<MCSubtargetInfo>(), nullptr); return; } @@ -91,7 +100,7 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode, // If the current LLVMContext has an inline asm handler, set it in SourceMgr. LLVMContext &LLVMCtx = MMI->getModule()->getContext(); bool HasDiagHandler = false; - if (LLVMCtx.getInlineAsmDiagnosticHandler() != 0) { + if (LLVMCtx.getInlineAsmDiagnosticHandler() != nullptr) { // If the source manager has an issue, we arrange for srcMgrDiagHandler // to be invoked, getting DiagInfo passed into it. DiagInfo.LocInfo = LocMDNode; @@ -110,20 +119,28 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode, // Tell SrcMgr about this buffer, it takes ownership of the buffer. SrcMgr.AddNewSourceBuffer(Buffer, SMLoc()); - OwningPtr<MCAsmParser> Parser(createMCAsmParser(SrcMgr, - OutContext, OutStreamer, - *MAI)); - - // FIXME: It would be nice if we can avoid createing a new instance of - // MCSubtargetInfo here given TargetSubtargetInfo is available. However, - // we have to watch out for asm directives which can change subtarget - // state. e.g. .code 16, .code 32. - OwningPtr<MCSubtargetInfo> - STI(TM.getTarget().createMCSubtargetInfo(TM.getTargetTriple(), - TM.getTargetCPU(), - TM.getTargetFeatureString())); - OwningPtr<MCTargetAsmParser> - TAP(TM.getTarget().createMCAsmParser(*STI, *Parser, *MII)); + std::unique_ptr<MCAsmParser> Parser( + createMCAsmParser(SrcMgr, OutContext, OutStreamer, *MAI)); + + // Initialize the parser with a fresh subtarget info. It is better to use a + // new STI here because the parser may modify it and we do not want those + // modifications to persist after parsing the inlineasm. The modifications + // made by the parser will be seen by the code emitters because it passes + // the current STI down to the EncodeInstruction() method. + std::unique_ptr<MCSubtargetInfo> STI(TM.getTarget().createMCSubtargetInfo( + TM.getTargetTriple(), TM.getTargetCPU(), TM.getTargetFeatureString())); + + // Preserve a copy of the original STI because the parser may modify it. For + // example, when switching between arm and thumb mode. If the target needs to + // emit code to return to the original state it can do so in + // emitInlineAsmEnd(). + MCSubtargetInfo STIOrig = *STI; + + MCTargetOptions MCOptions; + if (MF) + MCOptions = MF->getTarget().Options.MCOptions; + std::unique_ptr<MCTargetAsmParser> TAP( + TM.getTarget().createMCAsmParser(*STI, *Parser, *MII, MCOptions)); if (!TAP) report_fatal_error("Inline asm not supported by this streamer because" " we don't have an asm parser for this target\n"); @@ -133,6 +150,7 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode, // Don't implicitly switch to the text section before the asm. int Res = Parser->Run(/*NoInitialTextSection*/ true, /*NoFinalize*/ true); + emitInlineAsmEnd(STIOrig, STI.get()); if (Res && !HasDiagHandler) report_fatal_error("Error parsing inline asm\n"); } @@ -216,10 +234,10 @@ static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI, if (InlineAsm::isMemKind(OpFlags)) { Error = AP->PrintAsmMemoryOperand(MI, OpNo, InlineAsmVariant, - /*Modifier*/ 0, OS); + /*Modifier*/ nullptr, OS); } else { Error = AP->PrintAsmOperand(MI, OpNo, InlineAsmVariant, - /*Modifier*/ 0, OS); + /*Modifier*/ nullptr, OS); } } if (Error) { @@ -311,7 +329,7 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI, ++LastEmitted; const char *StrStart = LastEmitted; const char *StrEnd = strchr(StrStart, '}'); - if (StrEnd == 0) + if (!StrEnd) report_fatal_error("Unterminated ${:foo} operand in inline asm" " string: '" + Twine(AsmStr) + "'"); @@ -386,11 +404,11 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI, else { if (InlineAsm::isMemKind(OpFlags)) { Error = AP->PrintAsmMemoryOperand(MI, OpNo, InlineAsmVariant, - Modifier[0] ? Modifier : 0, + Modifier[0] ? Modifier : nullptr, OS); } else { Error = AP->PrintAsmOperand(MI, OpNo, InlineAsmVariant, - Modifier[0] ? Modifier : 0, OS); + Modifier[0] ? Modifier : nullptr, OS); } } } @@ -427,26 +445,19 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { // If this asmstr is empty, just print the #APP/#NOAPP markers. // These are useful to see where empty asm's wound up. if (AsmStr[0] == 0) { - // Don't emit the comments if writing to a .o file. - if (!OutStreamer.hasRawTextSupport()) return; - - OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+ - MAI->getInlineAsmStart()); - OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+ - MAI->getInlineAsmEnd()); + OutStreamer.emitRawComment(MAI->getInlineAsmStart()); + OutStreamer.emitRawComment(MAI->getInlineAsmEnd()); return; } // Emit the #APP start marker. This has to happen even if verbose-asm isn't - // enabled, so we use EmitRawText. - if (OutStreamer.hasRawTextSupport()) - OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+ - MAI->getInlineAsmStart()); + // enabled, so we use emitRawComment. + OutStreamer.emitRawComment(MAI->getInlineAsmStart()); // Get the !srcloc metadata node if we have it, and decode the loc cookie from // it. unsigned LocCookie = 0; - const MDNode *LocMD = 0; + const MDNode *LocMD = nullptr; for (unsigned i = MI->getNumOperands(); i != 0; --i) { if (MI->getOperand(i-1).isMetadata() && (LocMD = MI->getOperand(i-1).getMetadata()) && @@ -476,10 +487,8 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { EmitInlineAsm(OS.str(), LocMD, MI->getInlineAsmDialect()); // Emit the #NOAPP end marker. This has to happen even if verbose-asm isn't - // enabled, so we use EmitRawText. - if (OutStreamer.hasRawTextSupport()) - OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+ - MAI->getInlineAsmEnd()); + // enabled, so we use emitRawComment. + OutStreamer.emitRawComment(MAI->getInlineAsmEnd()); } @@ -491,8 +500,9 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { /// for their own strange codes. void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS, const char *Code) const { + const DataLayout *DL = TM.getDataLayout(); if (!strcmp(Code, "private")) { - OS << MAI->getPrivateGlobalPrefix(); + OS << DL->getPrivateGlobalPrefix(); } else if (!strcmp(Code, "comment")) { OS << MAI->getCommentString(); } else if (!strcmp(Code, "uid")) { @@ -551,3 +561,5 @@ bool AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, return true; } +void AsmPrinter::emitInlineAsmEnd(const MCSubtargetInfo &StartInfo, + const MCSubtargetInfo *EndInfo) const {} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h b/contrib/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h new file mode 100644 index 0000000..6c01d65 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h @@ -0,0 +1,71 @@ +//===-- llvm/CodeGen/ByteStreamer.h - ByteStreamer class --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a class that can take bytes that would normally be +// streamed via the AsmPrinter. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_BYTESTREAMER_H +#define LLVM_CODEGEN_BYTESTREAMER_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/MC/MCStreamer.h" +#include "DIEHash.h" + +namespace llvm { +class ByteStreamer { + public: + virtual ~ByteStreamer() {} + + // For now we're just handling the calls we need for dwarf emission/hashing. + virtual void EmitInt8(uint8_t Byte, const Twine &Comment = "") = 0; + virtual void EmitSLEB128(uint64_t DWord, const Twine &Comment = "") = 0; + virtual void EmitULEB128(uint64_t DWord, const Twine &Comment = "") = 0; +}; + +class APByteStreamer : public ByteStreamer { +private: + AsmPrinter &AP; + +public: + APByteStreamer(AsmPrinter &Asm) : AP(Asm) {} + void EmitInt8(uint8_t Byte, const Twine &Comment) override { + AP.OutStreamer.AddComment(Comment); + AP.EmitInt8(Byte); + } + void EmitSLEB128(uint64_t DWord, const Twine &Comment) override { + AP.OutStreamer.AddComment(Comment); + AP.EmitSLEB128(DWord); + } + void EmitULEB128(uint64_t DWord, const Twine &Comment) override { + AP.OutStreamer.AddComment(Comment); + AP.EmitULEB128(DWord); + } +}; + +class HashingByteStreamer : public ByteStreamer { + private: + DIEHash &Hash; + public: + HashingByteStreamer(DIEHash &H) : Hash(H) {} + void EmitInt8(uint8_t Byte, const Twine &Comment) override { + Hash.update(Byte); + } + void EmitSLEB128(uint64_t DWord, const Twine &Comment) override { + Hash.addSLEB128(DWord); + } + void EmitULEB128(uint64_t DWord, const Twine &Comment) override { + Hash.addULEB128(DWord); + } +}; +} + +#endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp index e39b374..c3dcd9c 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp @@ -13,17 +13,18 @@ #include "DIE.h" #include "DwarfDebug.h" +#include "DwarfUnit.h" #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/IR/DataLayout.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Support/Allocator.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/FormattedStream.h" +#include "llvm/Support/LEB128.h" #include "llvm/Support/MD5.h" using namespace llvm; @@ -48,7 +49,7 @@ void DIEAbbrevData::Profile(FoldingSetNodeID &ID) const { /// void DIEAbbrev::Profile(FoldingSetNodeID &ID) const { ID.AddInteger(unsigned(Tag)); - ID.AddInteger(ChildrenFlag); + ID.AddInteger(unsigned(Children)); // For each attribute description. for (unsigned i = 0, N = Data.size(); i < N; ++i) @@ -62,7 +63,7 @@ void DIEAbbrev::Emit(AsmPrinter *AP) const { AP->EmitULEB128(Tag, dwarf::TagString(Tag)); // Emit whether it has children DIEs. - AP->EmitULEB128(ChildrenFlag, dwarf::ChildrenString(ChildrenFlag)); + AP->EmitULEB128((unsigned)Children, dwarf::ChildrenString(Children)); // For each attribute description. for (unsigned i = 0, N = Data.size(); i < N; ++i) { @@ -89,7 +90,7 @@ void DIEAbbrev::print(raw_ostream &O) { << " " << dwarf::TagString(Tag) << " " - << dwarf::ChildrenString(ChildrenFlag) + << dwarf::ChildrenString(Children) << '\n'; for (unsigned i = 0, N = Data.size(); i < N; ++i) { @@ -103,36 +104,28 @@ void DIEAbbrev::print(raw_ostream &O) { void DIEAbbrev::dump() { print(dbgs()); } #endif -//===----------------------------------------------------------------------===// -// DIE Implementation -//===----------------------------------------------------------------------===// - -DIE::~DIE() { - for (unsigned i = 0, N = Children.size(); i < N; ++i) - delete Children[i]; -} - -/// Climb up the parent chain to get the compile unit DIE to which this DIE +/// Climb up the parent chain to get the unit DIE to which this DIE /// belongs. -const DIE *DIE::getCompileUnit() const { - const DIE *Cu = getCompileUnitOrNull(); +const DIE *DIE::getUnit() const { + const DIE *Cu = getUnitOrNull(); assert(Cu && "We should not have orphaned DIEs."); return Cu; } -/// Climb up the parent chain to get the compile unit DIE this DIE belongs +/// Climb up the parent chain to get the unit DIE this DIE belongs /// to. Return NULL if DIE is not added to an owner yet. -const DIE *DIE::getCompileUnitOrNull() const { +const DIE *DIE::getUnitOrNull() const { const DIE *p = this; while (p) { - if (p->getTag() == dwarf::DW_TAG_compile_unit) + if (p->getTag() == dwarf::DW_TAG_compile_unit || + p->getTag() == dwarf::DW_TAG_type_unit) return p; p = p->getParent(); } - return NULL; + return nullptr; } -DIEValue *DIE::findAttribute(uint16_t Attribute) { +DIEValue *DIE::findAttribute(dwarf::Attribute Attribute) const { const SmallVectorImpl<DIEValue *> &Values = getValues(); const DIEAbbrev &Abbrevs = getAbbrev(); @@ -141,7 +134,7 @@ DIEValue *DIE::findAttribute(uint16_t Attribute) { for (size_t i = 0; i < Values.size(); ++i) if (Abbrevs.getData()[i].getAttribute() == Attribute) return Values[i]; - return NULL; + return nullptr; } #ifndef NDEBUG @@ -159,7 +152,7 @@ void DIE::print(raw_ostream &O, unsigned IndentCount) const { O << Indent << dwarf::TagString(Abbrev.getTag()) << " " - << dwarf::ChildrenString(Abbrev.getChildrenFlag()) << "\n"; + << dwarf::ChildrenString(Abbrev.hasChildren()) << "\n"; } else { O << "Size: " << Size << "\n"; } @@ -215,8 +208,7 @@ void DIEInteger::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const { case dwarf::DW_FORM_flag_present: // Emit something to keep the lines and comments in sync. // FIXME: Is there a better way to do this? - if (Asm->OutStreamer.hasRawTextSupport()) - Asm->OutStreamer.EmitRawText(""); + Asm->OutStreamer.AddBlankLine(); return; case dwarf::DW_FORM_flag: // Fall thru case dwarf::DW_FORM_ref1: // Fall thru @@ -227,6 +219,7 @@ void DIEInteger::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const { case dwarf::DW_FORM_ref4: // Fall thru case dwarf::DW_FORM_data4: Size = 4; break; case dwarf::DW_FORM_ref8: // Fall thru + case dwarf::DW_FORM_ref_sig8: // Fall thru case dwarf::DW_FORM_data8: Size = 8; break; case dwarf::DW_FORM_GNU_str_index: Asm->EmitULEB128(Integer); return; case dwarf::DW_FORM_GNU_addr_index: Asm->EmitULEB128(Integer); return; @@ -253,11 +246,12 @@ unsigned DIEInteger::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { case dwarf::DW_FORM_ref4: // Fall thru case dwarf::DW_FORM_data4: return sizeof(int32_t); case dwarf::DW_FORM_ref8: // Fall thru + case dwarf::DW_FORM_ref_sig8: // Fall thru case dwarf::DW_FORM_data8: return sizeof(int64_t); - case dwarf::DW_FORM_GNU_str_index: return MCAsmInfo::getULEB128Size(Integer); - case dwarf::DW_FORM_GNU_addr_index: return MCAsmInfo::getULEB128Size(Integer); - case dwarf::DW_FORM_udata: return MCAsmInfo::getULEB128Size(Integer); - case dwarf::DW_FORM_sdata: return MCAsmInfo::getSLEB128Size(Integer); + case dwarf::DW_FORM_GNU_str_index: return getULEB128Size(Integer); + case dwarf::DW_FORM_GNU_addr_index: return getULEB128Size(Integer); + case dwarf::DW_FORM_udata: return getULEB128Size(Integer); + case dwarf::DW_FORM_sdata: return getSLEB128Size(Integer); case dwarf::DW_FORM_addr: return AP->getDataLayout().getPointerSize(); default: llvm_unreachable("DIE Value form not supported yet"); } @@ -379,7 +373,26 @@ void DIEString::print(raw_ostream &O) const { /// EmitValue - Emit debug information entry offset. /// void DIEEntry::EmitValue(AsmPrinter *AP, dwarf::Form Form) const { - AP->EmitInt32(Entry->getOffset()); + + if (Form == dwarf::DW_FORM_ref_addr) { + const DwarfDebug *DD = AP->getDwarfDebug(); + unsigned Addr = Entry.getOffset(); + assert(!DD->useSplitDwarf() && "TODO: dwo files can't have relocations."); + // For DW_FORM_ref_addr, output the offset from beginning of debug info + // section. Entry->getOffset() returns the offset from start of the + // compile unit. + DwarfCompileUnit *CU = DD->lookupUnit(Entry.getUnit()); + assert(CU && "CUDie should belong to a CU."); + Addr += CU->getDebugInfoOffset(); + if (AP->MAI->doesDwarfUseRelocationsAcrossSections()) + AP->EmitLabelPlusOffset(CU->getSectionSym(), Addr, + DIEEntry::getRefAddrSize(AP)); + else + AP->EmitLabelOffsetDifference(CU->getSectionSym(), Addr, + CU->getSectionSym(), + DIEEntry::getRefAddrSize(AP)); + } else + AP->EmitInt32(Entry.getOffset()); } unsigned DIEEntry::getRefAddrSize(AsmPrinter *AP) { @@ -387,14 +400,87 @@ unsigned DIEEntry::getRefAddrSize(AsmPrinter *AP) { // specified to be four bytes in the DWARF 32-bit format and eight bytes // in the DWARF 64-bit format, while DWARF Version 2 specifies that such // references have the same size as an address on the target system. - if (AP->getDwarfDebug()->getDwarfVersion() == 2) + const DwarfDebug *DD = AP->getDwarfDebug(); + assert(DD && "Expected Dwarf Debug info to be available"); + if (DD->getDwarfVersion() == 2) return AP->getDataLayout().getPointerSize(); return sizeof(int32_t); } #ifndef NDEBUG void DIEEntry::print(raw_ostream &O) const { - O << format("Die: 0x%lx", (long)(intptr_t)Entry); + O << format("Die: 0x%lx", (long)(intptr_t)&Entry); +} +#endif + +//===----------------------------------------------------------------------===// +// DIETypeSignature Implementation +//===----------------------------------------------------------------------===// +void DIETypeSignature::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const { + assert(Form == dwarf::DW_FORM_ref_sig8); + Asm->OutStreamer.EmitIntValue(Unit.getTypeSignature(), 8); +} + +#ifndef NDEBUG +void DIETypeSignature::print(raw_ostream &O) const { + O << format("Type Unit: 0x%lx", Unit.getTypeSignature()); +} + +void DIETypeSignature::dump() const { print(dbgs()); } +#endif + +//===----------------------------------------------------------------------===// +// DIELoc Implementation +//===----------------------------------------------------------------------===// + +/// ComputeSize - calculate the size of the location expression. +/// +unsigned DIELoc::ComputeSize(AsmPrinter *AP) const { + if (!Size) { + const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData(); + for (unsigned i = 0, N = Values.size(); i < N; ++i) + Size += Values[i]->SizeOf(AP, AbbrevData[i].getForm()); + } + + return Size; +} + +/// EmitValue - Emit location data. +/// +void DIELoc::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const { + switch (Form) { + default: llvm_unreachable("Improper form for block"); + case dwarf::DW_FORM_block1: Asm->EmitInt8(Size); break; + case dwarf::DW_FORM_block2: Asm->EmitInt16(Size); break; + case dwarf::DW_FORM_block4: Asm->EmitInt32(Size); break; + case dwarf::DW_FORM_block: + case dwarf::DW_FORM_exprloc: + Asm->EmitULEB128(Size); break; + } + + const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData(); + for (unsigned i = 0, N = Values.size(); i < N; ++i) + Values[i]->EmitValue(Asm, AbbrevData[i].getForm()); +} + +/// SizeOf - Determine size of location data in bytes. +/// +unsigned DIELoc::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { + switch (Form) { + case dwarf::DW_FORM_block1: return Size + sizeof(int8_t); + case dwarf::DW_FORM_block2: return Size + sizeof(int16_t); + case dwarf::DW_FORM_block4: return Size + sizeof(int32_t); + case dwarf::DW_FORM_block: + case dwarf::DW_FORM_exprloc: + return Size + getULEB128Size(Size); + default: llvm_unreachable("Improper form for block"); + } +} + +#ifndef NDEBUG +void DIELoc::print(raw_ostream &O) const { + O << "ExprLoc: "; + DIE::print(O, 5); } #endif @@ -404,7 +490,7 @@ void DIEEntry::print(raw_ostream &O) const { /// ComputeSize - calculate the size of the block. /// -unsigned DIEBlock::ComputeSize(AsmPrinter *AP) { +unsigned DIEBlock::ComputeSize(AsmPrinter *AP) const { if (!Size) { const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData(); for (unsigned i = 0, N = Values.size(); i < N; ++i) @@ -437,7 +523,7 @@ unsigned DIEBlock::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { case dwarf::DW_FORM_block1: return Size + sizeof(int8_t); case dwarf::DW_FORM_block2: return Size + sizeof(int16_t); case dwarf::DW_FORM_block4: return Size + sizeof(int32_t); - case dwarf::DW_FORM_block: return Size + MCAsmInfo::getULEB128Size(Size); + case dwarf::DW_FORM_block: return Size + getULEB128Size(Size); default: llvm_unreachable("Improper form for block"); } } @@ -448,3 +534,34 @@ void DIEBlock::print(raw_ostream &O) const { DIE::print(O, 5); } #endif + +//===----------------------------------------------------------------------===// +// DIELocList Implementation +//===----------------------------------------------------------------------===// + +unsigned DIELocList::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { + if (Form == dwarf::DW_FORM_data4) + return 4; + if (Form == dwarf::DW_FORM_sec_offset) + return 4; + return AP->getDataLayout().getPointerSize(); +} + +/// EmitValue - Emit label value. +/// +void DIELocList::EmitValue(AsmPrinter *AP, dwarf::Form Form) const { + DwarfDebug *DD = AP->getDwarfDebug(); + MCSymbol *Label = DD->getDebugLocEntries()[Index].Label; + + if (AP->MAI->doesDwarfUseRelocationsAcrossSections() && !DD->useSplitDwarf()) + AP->EmitSectionOffset(Label, DD->getDebugLocSym()); + else + AP->EmitLabelDifference(Label, DD->getDebugLocSym(), 4); +} + +#ifndef NDEBUG +void DIELocList::print(raw_ostream &O) const { + O << "LocList: " << Index; + +} +#endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h index f4fa326..ef05f17 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h @@ -16,437 +16,571 @@ #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/Dwarf.h" -#include "llvm/MC/MCExpr.h" #include <vector> namespace llvm { - class AsmPrinter; - class MCSymbol; - class MCSymbolRefExpr; - class raw_ostream; - - //===--------------------------------------------------------------------===// - /// DIEAbbrevData - Dwarf abbreviation data, describes one attribute of a - /// Dwarf abbreviation. - class DIEAbbrevData { - /// Attribute - Dwarf attribute code. - /// - dwarf::Attribute Attribute; - - /// Form - Dwarf form code. - /// - dwarf::Form Form; - public: - DIEAbbrevData(dwarf::Attribute A, dwarf::Form F) : Attribute(A), Form(F) {} - - // Accessors. - dwarf::Attribute getAttribute() const { return Attribute; } - dwarf::Form getForm() const { return Form; } - - /// Profile - Used to gather unique data for the abbreviation folding set. - /// - void Profile(FoldingSetNodeID &ID) const; - }; +class AsmPrinter; +class MCExpr; +class MCSymbol; +class raw_ostream; +class DwarfTypeUnit; + +//===--------------------------------------------------------------------===// +/// DIEAbbrevData - Dwarf abbreviation data, describes one attribute of a +/// Dwarf abbreviation. +class DIEAbbrevData { + /// Attribute - Dwarf attribute code. + /// + dwarf::Attribute Attribute; - //===--------------------------------------------------------------------===// - /// DIEAbbrev - Dwarf abbreviation, describes the organization of a debug - /// information object. - class DIEAbbrev : public FoldingSetNode { - /// Tag - Dwarf tag code. - /// - dwarf::Tag Tag; - - /// ChildrenFlag - Dwarf children flag. - /// - uint16_t ChildrenFlag; - - /// Unique number for node. - /// - unsigned Number; - - /// Data - Raw data bytes for abbreviation. - /// - SmallVector<DIEAbbrevData, 12> Data; - - public: - DIEAbbrev(dwarf::Tag T, uint16_t C) : Tag(T), ChildrenFlag(C), Data() {} - - // Accessors. - dwarf::Tag getTag() const { return Tag; } - unsigned getNumber() const { return Number; } - uint16_t getChildrenFlag() const { return ChildrenFlag; } - const SmallVectorImpl<DIEAbbrevData> &getData() const { return Data; } - void setChildrenFlag(uint16_t CF) { ChildrenFlag = CF; } - void setNumber(unsigned N) { Number = N; } - - /// AddAttribute - Adds another set of attribute information to the - /// abbreviation. - void AddAttribute(dwarf::Attribute Attribute, dwarf::Form Form) { - Data.push_back(DIEAbbrevData(Attribute, Form)); - } + /// Form - Dwarf form code. + /// + dwarf::Form Form; + +public: + DIEAbbrevData(dwarf::Attribute A, dwarf::Form F) : Attribute(A), Form(F) {} + + // Accessors. + dwarf::Attribute getAttribute() const { return Attribute; } + dwarf::Form getForm() const { return Form; } + + /// Profile - Used to gather unique data for the abbreviation folding set. + /// + void Profile(FoldingSetNodeID &ID) const; +}; + +//===--------------------------------------------------------------------===// +/// DIEAbbrev - Dwarf abbreviation, describes the organization of a debug +/// information object. +class DIEAbbrev : public FoldingSetNode { + /// Unique number for node. + /// + unsigned Number; - /// Profile - Used to gather unique data for the abbreviation folding set. - /// - void Profile(FoldingSetNodeID &ID) const; + /// Tag - Dwarf tag code. + /// + dwarf::Tag Tag; - /// Emit - Print the abbreviation using the specified asm printer. - /// - void Emit(AsmPrinter *AP) const; + /// Children - Whether or not this node has children. + /// + // This cheats a bit in all of the uses since the values in the standard + // are 0 and 1 for no children and children respectively. + bool Children; + + /// Data - Raw data bytes for abbreviation. + /// + SmallVector<DIEAbbrevData, 12> Data; + +public: + DIEAbbrev(dwarf::Tag T, bool C) : Tag(T), Children(C), Data() {} + + // Accessors. + dwarf::Tag getTag() const { return Tag; } + unsigned getNumber() const { return Number; } + bool hasChildren() const { return Children; } + const SmallVectorImpl<DIEAbbrevData> &getData() const { return Data; } + void setChildrenFlag(bool hasChild) { Children = hasChild; } + void setNumber(unsigned N) { Number = N; } + + /// AddAttribute - Adds another set of attribute information to the + /// abbreviation. + void AddAttribute(dwarf::Attribute Attribute, dwarf::Form Form) { + Data.push_back(DIEAbbrevData(Attribute, Form)); + } + + /// Profile - Used to gather unique data for the abbreviation folding set. + /// + void Profile(FoldingSetNodeID &ID) const; + + /// Emit - Print the abbreviation using the specified asm printer. + /// + void Emit(AsmPrinter *AP) const; #ifndef NDEBUG - void print(raw_ostream &O); - void dump(); + void print(raw_ostream &O); + void dump(); #endif - }; +}; - //===--------------------------------------------------------------------===// - /// DIE - A structured debug information entry. Has an abbreviation which - /// describes its organization. - class DIEValue; - - class DIE { - protected: - /// Offset - Offset in debug info section. - /// - unsigned Offset; - - /// Size - Size of instance + children. - /// - unsigned Size; - - /// Abbrev - Buffer for constructing abbreviation. - /// - DIEAbbrev Abbrev; - - /// Children DIEs. - /// - std::vector<DIE *> Children; - - DIE *Parent; - - /// Attribute values. - /// - SmallVector<DIEValue*, 12> Values; - - public: - explicit DIE(unsigned Tag) - : Offset(0), Size(0), Abbrev((dwarf::Tag)Tag, dwarf::DW_CHILDREN_no), - Parent(0) {} - virtual ~DIE(); - - // Accessors. - DIEAbbrev &getAbbrev() { return Abbrev; } - const DIEAbbrev &getAbbrev() const { return Abbrev; } - unsigned getAbbrevNumber() const { return Abbrev.getNumber(); } - dwarf::Tag getTag() const { return Abbrev.getTag(); } - unsigned getOffset() const { return Offset; } - unsigned getSize() const { return Size; } - const std::vector<DIE *> &getChildren() const { return Children; } - const SmallVectorImpl<DIEValue*> &getValues() const { return Values; } - DIE *getParent() const { return Parent; } - /// Climb up the parent chain to get the compile unit DIE this DIE belongs - /// to. - const DIE *getCompileUnit() const; - /// Similar to getCompileUnit, returns null when DIE is not added to an - /// owner yet. - const DIE *getCompileUnitOrNull() const; - void setOffset(unsigned O) { Offset = O; } - void setSize(unsigned S) { Size = S; } - - /// addValue - Add a value and attributes to a DIE. - /// - void addValue(dwarf::Attribute Attribute, dwarf::Form Form, - DIEValue *Value) { - Abbrev.AddAttribute(Attribute, Form); - Values.push_back(Value); - } +//===--------------------------------------------------------------------===// +/// DIE - A structured debug information entry. Has an abbreviation which +/// describes its organization. +class DIEValue; - /// addChild - Add a child to the DIE. - /// - void addChild(DIE *Child) { - assert(!Child->getParent()); - Abbrev.setChildrenFlag(dwarf::DW_CHILDREN_yes); - Children.push_back(Child); - Child->Parent = this; - } +class DIE { +protected: + /// Offset - Offset in debug info section. + /// + unsigned Offset; + + /// Size - Size of instance + children. + /// + unsigned Size; + + /// Abbrev - Buffer for constructing abbreviation. + /// + DIEAbbrev Abbrev; + + /// Children DIEs. + /// + // This can't be a vector<DIE> because pointer validity is requirent for the + // Parent pointer and DIEEntry. + // It can't be a list<DIE> because some clients need pointer validity before + // the object has been added to any child list + // (eg: DwarfUnit::constructVariableDIE). These aren't insurmountable, but may + // be more convoluted than beneficial. + std::vector<std::unique_ptr<DIE>> Children; + + DIE *Parent; - /// findAttribute - Find a value in the DIE with the attribute given, returns NULL - /// if no such attribute exists. - DIEValue *findAttribute(uint16_t Attribute); + /// Attribute values. + /// + SmallVector<DIEValue *, 12> Values; + +protected: + DIE() + : Offset(0), Size(0), Abbrev((dwarf::Tag)0, dwarf::DW_CHILDREN_no), + Parent(nullptr) {} + +public: + explicit DIE(dwarf::Tag Tag) + : Offset(0), Size(0), Abbrev((dwarf::Tag)Tag, dwarf::DW_CHILDREN_no), + Parent(nullptr) {} + + // Accessors. + DIEAbbrev &getAbbrev() { return Abbrev; } + const DIEAbbrev &getAbbrev() const { return Abbrev; } + unsigned getAbbrevNumber() const { return Abbrev.getNumber(); } + dwarf::Tag getTag() const { return Abbrev.getTag(); } + unsigned getOffset() const { return Offset; } + unsigned getSize() const { return Size; } + const std::vector<std::unique_ptr<DIE>> &getChildren() const { + return Children; + } + const SmallVectorImpl<DIEValue *> &getValues() const { return Values; } + DIE *getParent() const { return Parent; } + /// Climb up the parent chain to get the compile or type unit DIE this DIE + /// belongs to. + const DIE *getUnit() const; + /// Similar to getUnit, returns null when DIE is not added to an + /// owner yet. + const DIE *getUnitOrNull() const; + void setOffset(unsigned O) { Offset = O; } + void setSize(unsigned S) { Size = S; } + + /// addValue - Add a value and attributes to a DIE. + /// + void addValue(dwarf::Attribute Attribute, dwarf::Form Form, DIEValue *Value) { + Abbrev.AddAttribute(Attribute, Form); + Values.push_back(Value); + } + + /// addChild - Add a child to the DIE. + /// + void addChild(std::unique_ptr<DIE> Child) { + assert(!Child->getParent()); + Abbrev.setChildrenFlag(dwarf::DW_CHILDREN_yes); + Child->Parent = this; + Children.push_back(std::move(Child)); + } + + /// findAttribute - Find a value in the DIE with the attribute given, + /// returns NULL if no such attribute exists. + DIEValue *findAttribute(dwarf::Attribute Attribute) const; #ifndef NDEBUG - void print(raw_ostream &O, unsigned IndentCount = 0) const; - void dump(); + void print(raw_ostream &O, unsigned IndentCount = 0) const; + void dump(); #endif +}; + +//===--------------------------------------------------------------------===// +/// DIEValue - A debug information entry value. Some of these roughly correlate +/// to DWARF attribute classes. +/// +class DIEValue { + virtual void anchor(); + +public: + enum Type { + isInteger, + isString, + isExpr, + isLabel, + isDelta, + isEntry, + isTypeSignature, + isBlock, + isLoc, + isLocList, }; - //===--------------------------------------------------------------------===// - /// DIEValue - A debug information entry value. - /// - class DIEValue { - virtual void anchor(); - public: - enum { - isInteger, - isString, - isExpr, - isLabel, - isDelta, - isEntry, - isBlock - }; - protected: - /// Type - Type of data stored in the value. - /// - unsigned Type; - public: - explicit DIEValue(unsigned T) : Type(T) {} - virtual ~DIEValue() {} - - // Accessors - unsigned getType() const { return Type; } - - /// EmitValue - Emit value via the Dwarf writer. - /// - virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const = 0; - - /// SizeOf - Return the size of a value in bytes. - /// - virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const = 0; +protected: + /// Ty - Type of data stored in the value. + /// + Type Ty; + + explicit DIEValue(Type T) : Ty(T) {} + virtual ~DIEValue() {} + +public: + // Accessors + Type getType() const { return Ty; } + + /// EmitValue - Emit value via the Dwarf writer. + /// + virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const = 0; + + /// SizeOf - Return the size of a value in bytes. + /// + virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const = 0; #ifndef NDEBUG - virtual void print(raw_ostream &O) const = 0; - void dump() const; + virtual void print(raw_ostream &O) const = 0; + void dump() const; #endif - }; +}; + +//===--------------------------------------------------------------------===// +/// DIEInteger - An integer value DIE. +/// +class DIEInteger : public DIEValue { + uint64_t Integer; - //===--------------------------------------------------------------------===// - /// DIEInteger - An integer value DIE. - /// - class DIEInteger : public DIEValue { - uint64_t Integer; - public: - explicit DIEInteger(uint64_t I) : DIEValue(isInteger), Integer(I) {} - - /// BestForm - Choose the best form for integer. - /// - static dwarf::Form BestForm(bool IsSigned, uint64_t Int) { - if (IsSigned) { - const int64_t SignedInt = Int; - if ((char)Int == SignedInt) return dwarf::DW_FORM_data1; - if ((short)Int == SignedInt) return dwarf::DW_FORM_data2; - if ((int)Int == SignedInt) return dwarf::DW_FORM_data4; - } else { - if ((unsigned char)Int == Int) return dwarf::DW_FORM_data1; - if ((unsigned short)Int == Int) return dwarf::DW_FORM_data2; - if ((unsigned int)Int == Int) return dwarf::DW_FORM_data4; - } - return dwarf::DW_FORM_data8; +public: + explicit DIEInteger(uint64_t I) : DIEValue(isInteger), Integer(I) {} + + /// BestForm - Choose the best form for integer. + /// + static dwarf::Form BestForm(bool IsSigned, uint64_t Int) { + if (IsSigned) { + const int64_t SignedInt = Int; + if ((char)Int == SignedInt) + return dwarf::DW_FORM_data1; + if ((short)Int == SignedInt) + return dwarf::DW_FORM_data2; + if ((int)Int == SignedInt) + return dwarf::DW_FORM_data4; + } else { + if ((unsigned char)Int == Int) + return dwarf::DW_FORM_data1; + if ((unsigned short)Int == Int) + return dwarf::DW_FORM_data2; + if ((unsigned int)Int == Int) + return dwarf::DW_FORM_data4; } + return dwarf::DW_FORM_data8; + } - /// EmitValue - Emit integer of appropriate size. - /// - virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const; + /// EmitValue - Emit integer of appropriate size. + /// + void EmitValue(AsmPrinter *AP, dwarf::Form Form) const override; - uint64_t getValue() const { return Integer; } + uint64_t getValue() const { return Integer; } - /// SizeOf - Determine size of integer value in bytes. - /// - virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const; + /// SizeOf - Determine size of integer value in bytes. + /// + unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override; - // Implement isa/cast/dyncast. - static bool classof(const DIEValue *I) { return I->getType() == isInteger; } + // Implement isa/cast/dyncast. + static bool classof(const DIEValue *I) { return I->getType() == isInteger; } #ifndef NDEBUG - virtual void print(raw_ostream &O) const; + void print(raw_ostream &O) const override; #endif - }; +}; - //===--------------------------------------------------------------------===// - /// DIEExpr - An expression DIE. - // - class DIEExpr : public DIEValue { - const MCExpr *Expr; - public: - explicit DIEExpr(const MCExpr *E) : DIEValue(isExpr), Expr(E) {} +//===--------------------------------------------------------------------===// +/// DIEExpr - An expression DIE. +// +class DIEExpr : public DIEValue { + const MCExpr *Expr; - /// EmitValue - Emit expression value. - /// - virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const; +public: + explicit DIEExpr(const MCExpr *E) : DIEValue(isExpr), Expr(E) {} - /// getValue - Get MCExpr. - /// - const MCExpr *getValue() const { return Expr; } + /// EmitValue - Emit expression value. + /// + void EmitValue(AsmPrinter *AP, dwarf::Form Form) const override; - /// SizeOf - Determine size of expression value in bytes. - /// - virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const; + /// getValue - Get MCExpr. + /// + const MCExpr *getValue() const { return Expr; } + + /// SizeOf - Determine size of expression value in bytes. + /// + unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override; - // Implement isa/cast/dyncast. - static bool classof(const DIEValue *E) { return E->getType() == isExpr; } + // Implement isa/cast/dyncast. + static bool classof(const DIEValue *E) { return E->getType() == isExpr; } #ifndef NDEBUG - virtual void print(raw_ostream &O) const; + void print(raw_ostream &O) const override; #endif - }; +}; + +//===--------------------------------------------------------------------===// +/// DIELabel - A label DIE. +// +class DIELabel : public DIEValue { + const MCSymbol *Label; - //===--------------------------------------------------------------------===// - /// DIELabel - A label DIE. - // - class DIELabel : public DIEValue { - const MCSymbol *Label; - public: - explicit DIELabel(const MCSymbol *L) : DIEValue(isLabel), Label(L) {} +public: + explicit DIELabel(const MCSymbol *L) : DIEValue(isLabel), Label(L) {} - /// EmitValue - Emit label value. - /// - virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const; + /// EmitValue - Emit label value. + /// + void EmitValue(AsmPrinter *AP, dwarf::Form Form) const override; - /// getValue - Get MCSymbol. - /// - const MCSymbol *getValue() const { return Label; } + /// getValue - Get MCSymbol. + /// + const MCSymbol *getValue() const { return Label; } - /// SizeOf - Determine size of label value in bytes. - /// - virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const; + /// SizeOf - Determine size of label value in bytes. + /// + unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override; - // Implement isa/cast/dyncast. - static bool classof(const DIEValue *L) { return L->getType() == isLabel; } + // Implement isa/cast/dyncast. + static bool classof(const DIEValue *L) { return L->getType() == isLabel; } #ifndef NDEBUG - virtual void print(raw_ostream &O) const; + void print(raw_ostream &O) const override; #endif - }; +}; - //===--------------------------------------------------------------------===// - /// DIEDelta - A simple label difference DIE. - /// - class DIEDelta : public DIEValue { - const MCSymbol *LabelHi; - const MCSymbol *LabelLo; - public: - DIEDelta(const MCSymbol *Hi, const MCSymbol *Lo) +//===--------------------------------------------------------------------===// +/// DIEDelta - A simple label difference DIE. +/// +class DIEDelta : public DIEValue { + const MCSymbol *LabelHi; + const MCSymbol *LabelLo; + +public: + DIEDelta(const MCSymbol *Hi, const MCSymbol *Lo) : DIEValue(isDelta), LabelHi(Hi), LabelLo(Lo) {} - /// EmitValue - Emit delta value. - /// - virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const; + /// EmitValue - Emit delta value. + /// + void EmitValue(AsmPrinter *AP, dwarf::Form Form) const override; - /// SizeOf - Determine size of delta value in bytes. - /// - virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const; + /// SizeOf - Determine size of delta value in bytes. + /// + unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override; - // Implement isa/cast/dyncast. - static bool classof(const DIEValue *D) { return D->getType() == isDelta; } + // Implement isa/cast/dyncast. + static bool classof(const DIEValue *D) { return D->getType() == isDelta; } #ifndef NDEBUG - virtual void print(raw_ostream &O) const; + void print(raw_ostream &O) const override; #endif - }; +}; + +//===--------------------------------------------------------------------===// +/// DIEString - A container for string values. +/// +class DIEString : public DIEValue { + const DIEValue *Access; + const StringRef Str; + +public: + DIEString(const DIEValue *Acc, const StringRef S) + : DIEValue(isString), Access(Acc), Str(S) {} - //===--------------------------------------------------------------------===// - /// DIEString - A container for string values. + /// getString - Grab the string out of the object. + StringRef getString() const { return Str; } + + /// EmitValue - Emit delta value. /// - class DIEString : public DIEValue { - const DIEValue *Access; - const StringRef Str; + void EmitValue(AsmPrinter *AP, dwarf::Form Form) const override; - public: - DIEString(const DIEValue *Acc, const StringRef S) - : DIEValue(isString), Access(Acc), Str(S) {} + /// SizeOf - Determine size of delta value in bytes. + /// + unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override; - /// getString - Grab the string out of the object. - StringRef getString() const { return Str; } + // Implement isa/cast/dyncast. + static bool classof(const DIEValue *D) { return D->getType() == isString; } - /// EmitValue - Emit delta value. - /// - virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const; +#ifndef NDEBUG + void print(raw_ostream &O) const override; +#endif +}; - /// SizeOf - Determine size of delta value in bytes. - /// - virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const; +//===--------------------------------------------------------------------===// +/// DIEEntry - A pointer to another debug information entry. An instance of +/// this class can also be used as a proxy for a debug information entry not +/// yet defined (ie. types.) +class DIEEntry : public DIEValue { + DIE &Entry; - // Implement isa/cast/dyncast. - static bool classof(const DIEValue *D) { return D->getType() == isString; } +public: + explicit DIEEntry(DIE &E) : DIEValue(isEntry), Entry(E) { + } - #ifndef NDEBUG - virtual void print(raw_ostream &O) const; - #endif - }; + DIE &getEntry() const { return Entry; } - //===--------------------------------------------------------------------===// - /// DIEEntry - A pointer to another debug information entry. An instance of - /// this class can also be used as a proxy for a debug information entry not - /// yet defined (ie. types.) - class DIEEntry : public DIEValue { - DIE *const Entry; - public: - explicit DIEEntry(DIE *E) : DIEValue(isEntry), Entry(E) { - assert(E && "Cannot construct a DIEEntry with a null DIE"); - } + /// EmitValue - Emit debug information entry offset. + /// + void EmitValue(AsmPrinter *AP, dwarf::Form Form) const override; - DIE *getEntry() const { return Entry; } + /// SizeOf - Determine size of debug information entry in bytes. + /// + unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override { + return Form == dwarf::DW_FORM_ref_addr ? getRefAddrSize(AP) + : sizeof(int32_t); + } - /// EmitValue - Emit debug information entry offset. - /// - virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const; + /// Returns size of a ref_addr entry. + static unsigned getRefAddrSize(AsmPrinter *AP); - /// SizeOf - Determine size of debug information entry in bytes. - /// - virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const { - return Form == dwarf::DW_FORM_ref_addr ? getRefAddrSize(AP) - : sizeof(int32_t); - } + // Implement isa/cast/dyncast. + static bool classof(const DIEValue *E) { return E->getType() == isEntry; } + +#ifndef NDEBUG + void print(raw_ostream &O) const override; +#endif +}; + +//===--------------------------------------------------------------------===// +/// \brief A signature reference to a type unit. +class DIETypeSignature : public DIEValue { + const DwarfTypeUnit &Unit; + +public: + explicit DIETypeSignature(const DwarfTypeUnit &Unit) + : DIEValue(isTypeSignature), Unit(Unit) {} + + /// \brief Emit type unit signature. + void EmitValue(AsmPrinter *Asm, dwarf::Form Form) const override; + + /// Returns size of a ref_sig8 entry. + unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override { + assert(Form == dwarf::DW_FORM_ref_sig8); + return 8; + } + + // \brief Implement isa/cast/dyncast. + static bool classof(const DIEValue *E) { + return E->getType() == isTypeSignature; + } +#ifndef NDEBUG + void print(raw_ostream &O) const override; + void dump() const; +#endif +}; + +//===--------------------------------------------------------------------===// +/// DIELoc - Represents an expression location. +// +class DIELoc : public DIEValue, public DIE { + mutable unsigned Size; // Size in bytes excluding size header. +public: + DIELoc() : DIEValue(isLoc), Size(0) {} - /// Returns size of a ref_addr entry. - static unsigned getRefAddrSize(AsmPrinter *AP); + /// ComputeSize - Calculate the size of the location expression. + /// + unsigned ComputeSize(AsmPrinter *AP) const; - // Implement isa/cast/dyncast. - static bool classof(const DIEValue *E) { return E->getType() == isEntry; } + /// BestForm - Choose the best form for data. + /// + dwarf::Form BestForm(unsigned DwarfVersion) const { + if (DwarfVersion > 3) + return dwarf::DW_FORM_exprloc; + // Pre-DWARF4 location expressions were blocks and not exprloc. + if ((unsigned char)Size == Size) + return dwarf::DW_FORM_block1; + if ((unsigned short)Size == Size) + return dwarf::DW_FORM_block2; + if ((unsigned int)Size == Size) + return dwarf::DW_FORM_block4; + return dwarf::DW_FORM_block; + } + + /// EmitValue - Emit location data. + /// + void EmitValue(AsmPrinter *AP, dwarf::Form Form) const override; + + /// SizeOf - Determine size of location data in bytes. + /// + unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override; + + // Implement isa/cast/dyncast. + static bool classof(const DIEValue *E) { return E->getType() == isLoc; } #ifndef NDEBUG - virtual void print(raw_ostream &O) const; + void print(raw_ostream &O) const override; #endif - }; +}; - //===--------------------------------------------------------------------===// - /// DIEBlock - A block of values. Primarily used for location expressions. - // - class DIEBlock : public DIEValue, public DIE { - unsigned Size; // Size in bytes excluding size header. - public: - DIEBlock() : DIEValue(isBlock), DIE(0), Size(0) {} - - /// ComputeSize - calculate the size of the block. - /// - unsigned ComputeSize(AsmPrinter *AP); - - /// BestForm - Choose the best form for data. - /// - dwarf::Form BestForm() const { - if ((unsigned char)Size == Size) return dwarf::DW_FORM_block1; - if ((unsigned short)Size == Size) return dwarf::DW_FORM_block2; - if ((unsigned int)Size == Size) return dwarf::DW_FORM_block4; - return dwarf::DW_FORM_block; - } +//===--------------------------------------------------------------------===// +/// DIEBlock - Represents a block of values. +// +class DIEBlock : public DIEValue, public DIE { + mutable unsigned Size; // Size in bytes excluding size header. +public: + DIEBlock() : DIEValue(isBlock), Size(0) {} - /// EmitValue - Emit block data. - /// - virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const; + /// ComputeSize - Calculate the size of the location expression. + /// + unsigned ComputeSize(AsmPrinter *AP) const; - /// SizeOf - Determine size of block data in bytes. - /// - virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const; + /// BestForm - Choose the best form for data. + /// + dwarf::Form BestForm() const { + if ((unsigned char)Size == Size) + return dwarf::DW_FORM_block1; + if ((unsigned short)Size == Size) + return dwarf::DW_FORM_block2; + if ((unsigned int)Size == Size) + return dwarf::DW_FORM_block4; + return dwarf::DW_FORM_block; + } + + /// EmitValue - Emit location data. + /// + void EmitValue(AsmPrinter *AP, dwarf::Form Form) const override; + + /// SizeOf - Determine size of location data in bytes. + /// + unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override; - // Implement isa/cast/dyncast. - static bool classof(const DIEValue *E) { return E->getType() == isBlock; } + // Implement isa/cast/dyncast. + static bool classof(const DIEValue *E) { return E->getType() == isBlock; } #ifndef NDEBUG - virtual void print(raw_ostream &O) const; + void print(raw_ostream &O) const override; #endif - }; +}; + +//===--------------------------------------------------------------------===// +/// DIELocList - Represents a pointer to a location list in the debug_loc +/// section. +// +class DIELocList : public DIEValue { + // Index into the .debug_loc vector. + size_t Index; + +public: + DIELocList(size_t I) : DIEValue(isLocList), Index(I) {} + + /// getValue - Grab the current index out. + size_t getValue() const { return Index; } + + /// EmitValue - Emit location data. + /// + void EmitValue(AsmPrinter *AP, dwarf::Form Form) const override; + + /// SizeOf - Determine size of location data in bytes. + /// + unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override; + + // Implement isa/cast/dyncast. + static bool classof(const DIEValue *E) { return E->getType() == isLocList; } + +#ifndef NDEBUG + void print(raw_ostream &O) const override; +#endif +}; } // end llvm namespace diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp index 95eca90..c2fad59 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp @@ -11,14 +11,13 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "dwarfdebug" - +#include "ByteStreamer.h" #include "DIEHash.h" - #include "DIE.h" -#include "DwarfCompileUnit.h" +#include "DwarfDebug.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/AsmPrinter.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/Endian.h" @@ -27,6 +26,8 @@ using namespace llvm; +#define DEBUG_TYPE "dwarfdebug" + /// \brief Grabs the string in whichever attribute is passed in and returns /// a reference to it. static StringRef getDIEStringAttr(const DIE &Die, uint16_t Attr) { @@ -75,7 +76,7 @@ void DIEHash::addSLEB128(int64_t Value) { do { uint8_t Byte = Value & 0x7f; Value >>= 7; - More = !((((Value == 0 ) && ((Byte & 0x40) == 0)) || + More = !((((Value == 0) && ((Byte & 0x40) == 0)) || ((Value == -1) && ((Byte & 0x40) != 0)))); if (More) Byte |= 0x80; // Mark this byte to show that more bytes will follow. @@ -92,10 +93,12 @@ void DIEHash::addParentContext(const DIE &Parent) { // outermost such construct... SmallVector<const DIE *, 1> Parents; const DIE *Cur = &Parent; - while (Cur->getTag() != dwarf::DW_TAG_compile_unit) { + while (Cur->getParent()) { Parents.push_back(Cur); Cur = Cur->getParent(); } + assert(Cur->getTag() == dwarf::DW_TAG_compile_unit || + Cur->getTag() == dwarf::DW_TAG_type_unit); // Reverse iterate over our list to go from the outermost construct to the // innermost. @@ -134,55 +137,55 @@ void DIEHash::collectAttributes(const DIE &Die, DIEAttrs &Attrs) { << dwarf::AttributeString(Abbrevs.getData()[i].getAttribute()) << " added.\n"); switch (Abbrevs.getData()[i].getAttribute()) { - COLLECT_ATTR(DW_AT_name); - COLLECT_ATTR(DW_AT_accessibility); - COLLECT_ATTR(DW_AT_address_class); - COLLECT_ATTR(DW_AT_allocated); - COLLECT_ATTR(DW_AT_artificial); - COLLECT_ATTR(DW_AT_associated); - COLLECT_ATTR(DW_AT_binary_scale); - COLLECT_ATTR(DW_AT_bit_offset); - COLLECT_ATTR(DW_AT_bit_size); - COLLECT_ATTR(DW_AT_bit_stride); - COLLECT_ATTR(DW_AT_byte_size); - COLLECT_ATTR(DW_AT_byte_stride); - COLLECT_ATTR(DW_AT_const_expr); - COLLECT_ATTR(DW_AT_const_value); - COLLECT_ATTR(DW_AT_containing_type); - COLLECT_ATTR(DW_AT_count); - COLLECT_ATTR(DW_AT_data_bit_offset); - COLLECT_ATTR(DW_AT_data_location); - COLLECT_ATTR(DW_AT_data_member_location); - COLLECT_ATTR(DW_AT_decimal_scale); - COLLECT_ATTR(DW_AT_decimal_sign); - COLLECT_ATTR(DW_AT_default_value); - COLLECT_ATTR(DW_AT_digit_count); - COLLECT_ATTR(DW_AT_discr); - COLLECT_ATTR(DW_AT_discr_list); - COLLECT_ATTR(DW_AT_discr_value); - COLLECT_ATTR(DW_AT_encoding); - COLLECT_ATTR(DW_AT_enum_class); - COLLECT_ATTR(DW_AT_endianity); - COLLECT_ATTR(DW_AT_explicit); - COLLECT_ATTR(DW_AT_is_optional); - COLLECT_ATTR(DW_AT_location); - COLLECT_ATTR(DW_AT_lower_bound); - COLLECT_ATTR(DW_AT_mutable); - COLLECT_ATTR(DW_AT_ordering); - COLLECT_ATTR(DW_AT_picture_string); - COLLECT_ATTR(DW_AT_prototyped); - COLLECT_ATTR(DW_AT_small); - COLLECT_ATTR(DW_AT_segment); - COLLECT_ATTR(DW_AT_string_length); - COLLECT_ATTR(DW_AT_threads_scaled); - COLLECT_ATTR(DW_AT_upper_bound); - COLLECT_ATTR(DW_AT_use_location); - COLLECT_ATTR(DW_AT_use_UTF8); - COLLECT_ATTR(DW_AT_variable_parameter); - COLLECT_ATTR(DW_AT_virtuality); - COLLECT_ATTR(DW_AT_visibility); - COLLECT_ATTR(DW_AT_vtable_elem_location); - COLLECT_ATTR(DW_AT_type); + COLLECT_ATTR(DW_AT_name); + COLLECT_ATTR(DW_AT_accessibility); + COLLECT_ATTR(DW_AT_address_class); + COLLECT_ATTR(DW_AT_allocated); + COLLECT_ATTR(DW_AT_artificial); + COLLECT_ATTR(DW_AT_associated); + COLLECT_ATTR(DW_AT_binary_scale); + COLLECT_ATTR(DW_AT_bit_offset); + COLLECT_ATTR(DW_AT_bit_size); + COLLECT_ATTR(DW_AT_bit_stride); + COLLECT_ATTR(DW_AT_byte_size); + COLLECT_ATTR(DW_AT_byte_stride); + COLLECT_ATTR(DW_AT_const_expr); + COLLECT_ATTR(DW_AT_const_value); + COLLECT_ATTR(DW_AT_containing_type); + COLLECT_ATTR(DW_AT_count); + COLLECT_ATTR(DW_AT_data_bit_offset); + COLLECT_ATTR(DW_AT_data_location); + COLLECT_ATTR(DW_AT_data_member_location); + COLLECT_ATTR(DW_AT_decimal_scale); + COLLECT_ATTR(DW_AT_decimal_sign); + COLLECT_ATTR(DW_AT_default_value); + COLLECT_ATTR(DW_AT_digit_count); + COLLECT_ATTR(DW_AT_discr); + COLLECT_ATTR(DW_AT_discr_list); + COLLECT_ATTR(DW_AT_discr_value); + COLLECT_ATTR(DW_AT_encoding); + COLLECT_ATTR(DW_AT_enum_class); + COLLECT_ATTR(DW_AT_endianity); + COLLECT_ATTR(DW_AT_explicit); + COLLECT_ATTR(DW_AT_is_optional); + COLLECT_ATTR(DW_AT_location); + COLLECT_ATTR(DW_AT_lower_bound); + COLLECT_ATTR(DW_AT_mutable); + COLLECT_ATTR(DW_AT_ordering); + COLLECT_ATTR(DW_AT_picture_string); + COLLECT_ATTR(DW_AT_prototyped); + COLLECT_ATTR(DW_AT_small); + COLLECT_ATTR(DW_AT_segment); + COLLECT_ATTR(DW_AT_string_length); + COLLECT_ATTR(DW_AT_threads_scaled); + COLLECT_ATTR(DW_AT_upper_bound); + COLLECT_ATTR(DW_AT_use_location); + COLLECT_ATTR(DW_AT_use_UTF8); + COLLECT_ATTR(DW_AT_variable_parameter); + COLLECT_ATTR(DW_AT_virtuality); + COLLECT_ATTR(DW_AT_visibility); + COLLECT_ATTR(DW_AT_vtable_elem_location); + COLLECT_ATTR(DW_AT_type); default: break; } @@ -269,6 +272,24 @@ void DIEHash::hashDIEEntry(dwarf::Attribute Attribute, dwarf::Tag Tag, computeHash(Entry); } +// Hash all of the values in a block like set of values. This assumes that +// all of the data is going to be added as integers. +void DIEHash::hashBlockData(const SmallVectorImpl<DIEValue *> &Values) { + for (SmallVectorImpl<DIEValue *>::const_iterator I = Values.begin(), + E = Values.end(); + I != E; ++I) + Hash.update((uint64_t)cast<DIEInteger>(*I)->getValue()); +} + +// Hash the contents of a loclistptr class. +void DIEHash::hashLocList(const DIELocList &LocList) { + HashingByteStreamer Streamer(*this); + DwarfDebug &DD = *AP->getDwarfDebug(); + for (const auto &Entry : + DD.getDebugLocEntries()[LocList.getValue()].List) + DD.emitDebugLocEntry(Streamer, Entry); +} + // Hash an individual attribute \param Attr based on the type of attribute and // the form. void DIEHash::hashAttribute(AttrEntry Attr, dwarf::Tag Tag) { @@ -276,43 +297,76 @@ void DIEHash::hashAttribute(AttrEntry Attr, dwarf::Tag Tag) { const DIEAbbrevData *Desc = Attr.Desc; dwarf::Attribute Attribute = Desc->getAttribute(); - // 7.27 Step 3 - // ... An attribute that refers to another type entry T is processed as - // follows: - if (const DIEEntry *EntryAttr = dyn_cast<DIEEntry>(Value)) { - hashDIEEntry(Attribute, Tag, *EntryAttr->getEntry()); - return; + // Other attribute values use the letter 'A' as the marker, and the value + // consists of the form code (encoded as an unsigned LEB128 value) followed by + // the encoding of the value according to the form code. To ensure + // reproducibility of the signature, the set of forms used in the signature + // computation is limited to the following: DW_FORM_sdata, DW_FORM_flag, + // DW_FORM_string, and DW_FORM_block. + + switch (Value->getType()) { + // 7.27 Step 3 + // ... An attribute that refers to another type entry T is processed as + // follows: + case DIEValue::isEntry: + hashDIEEntry(Attribute, Tag, cast<DIEEntry>(Value)->getEntry()); + break; + case DIEValue::isInteger: { + addULEB128('A'); + addULEB128(Attribute); + switch (Desc->getForm()) { + case dwarf::DW_FORM_data1: + case dwarf::DW_FORM_data2: + case dwarf::DW_FORM_data4: + case dwarf::DW_FORM_data8: + case dwarf::DW_FORM_udata: + case dwarf::DW_FORM_sdata: + addULEB128(dwarf::DW_FORM_sdata); + addSLEB128((int64_t)cast<DIEInteger>(Value)->getValue()); + break; + // DW_FORM_flag_present is just flag with a value of one. We still give it a + // value so just use the value. + case dwarf::DW_FORM_flag_present: + case dwarf::DW_FORM_flag: + addULEB128(dwarf::DW_FORM_flag); + addULEB128((int64_t)cast<DIEInteger>(Value)->getValue()); + break; + default: + llvm_unreachable("Unknown integer form!"); + } + break; } - - // Other attribute values use the letter 'A' as the marker, ... - addULEB128('A'); - - addULEB128(Attribute); - - // ... and the value consists of the form code (encoded as an unsigned LEB128 - // value) followed by the encoding of the value according to the form code. To - // ensure reproducibility of the signature, the set of forms used in the - // signature computation is limited to the following: DW_FORM_sdata, - // DW_FORM_flag, DW_FORM_string, and DW_FORM_block. - switch (Desc->getForm()) { - case dwarf::DW_FORM_string: - llvm_unreachable( - "Add support for DW_FORM_string if we ever start emitting them again"); - case dwarf::DW_FORM_GNU_str_index: - case dwarf::DW_FORM_strp: + case DIEValue::isString: + addULEB128('A'); + addULEB128(Attribute); addULEB128(dwarf::DW_FORM_string); addString(cast<DIEString>(Value)->getString()); break; - case dwarf::DW_FORM_data1: - case dwarf::DW_FORM_data2: - case dwarf::DW_FORM_data4: - case dwarf::DW_FORM_data8: - case dwarf::DW_FORM_udata: - addULEB128(dwarf::DW_FORM_sdata); - addSLEB128((int64_t)cast<DIEInteger>(Value)->getValue()); + case DIEValue::isBlock: + case DIEValue::isLoc: + case DIEValue::isLocList: + addULEB128('A'); + addULEB128(Attribute); + addULEB128(dwarf::DW_FORM_block); + if (isa<DIEBlock>(Value)) { + addULEB128(cast<DIEBlock>(Value)->ComputeSize(AP)); + hashBlockData(cast<DIEBlock>(Value)->getValues()); + } else if (isa<DIELoc>(Value)) { + addULEB128(cast<DIELoc>(Value)->ComputeSize(AP)); + hashBlockData(cast<DIELoc>(Value)->getValues()); + } else { + // We could add the block length, but that would take + // a bit of work and not add a lot of uniqueness + // to the hash in some way we could test. + hashLocList(*cast<DIELocList>(Value)); + } break; - default: - llvm_unreachable("Add support for additional forms"); + // FIXME: It's uncertain whether or not we should handle this at the moment. + case DIEValue::isExpr: + case DIEValue::isLabel: + case DIEValue::isDelta: + case DIEValue::isTypeSignature: + llvm_unreachable("Add support for additional value types."); } } @@ -409,20 +463,18 @@ void DIEHash::computeHash(const DIE &Die) { addAttributes(Die); // Then hash each of the children of the DIE. - for (std::vector<DIE *>::const_iterator I = Die.getChildren().begin(), - E = Die.getChildren().end(); - I != E; ++I) { + for (auto &C : Die.getChildren()) { // 7.27 Step 7 // If C is a nested type entry or a member function entry, ... - if (isType((*I)->getTag()) || (*I)->getTag() == dwarf::DW_TAG_subprogram) { - StringRef Name = getDIEStringAttr(**I, dwarf::DW_AT_name); + if (isType(C->getTag()) || C->getTag() == dwarf::DW_TAG_subprogram) { + StringRef Name = getDIEStringAttr(*C, dwarf::DW_AT_name); // ... and has a DW_AT_name attribute if (!Name.empty()) { - hashNestedType(**I, Name); + hashNestedType(*C, Name); continue; } } - computeHash(**I); + computeHash(*C); } // Following the last (or if there are no children), append a zero byte. diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h index f0c4ef9..175d660 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h @@ -11,17 +11,22 @@ // //===----------------------------------------------------------------------===// +#ifndef CODEGEN_ASMPRINTER_DIEHASH_H__ +#define CODEGEN_ASMPRINTER_DIEHASH_H__ + #include "DIE.h" #include "llvm/ADT/DenseMap.h" #include "llvm/Support/MD5.h" namespace llvm { +class AsmPrinter; class CompileUnit; /// \brief An object containing the capability of hashing and adding hash /// attributes onto a DIE. class DIEHash { + // The entry for a particular attribute. struct AttrEntry { const DIEValue *Val; @@ -84,6 +89,8 @@ class DIEHash { }; public: + DIEHash(AsmPrinter *A = nullptr) : AP(A) {} + /// \brief Computes the ODR signature. uint64_t computeDIEODRSignature(const DIE &Die); @@ -105,13 +112,17 @@ private: void computeHash(const DIE &Die); // Routines that add DIEValues to the hash. -private: +public: + /// \brief Adds \param Value to the hash. + void update(uint8_t Value) { Hash.update(Value); } + /// \brief Encodes and adds \param Value to the hash as a ULEB128. void addULEB128(uint64_t Value); /// \brief Encodes and adds \param Value to the hash as a SLEB128. void addSLEB128(int64_t Value); +private: /// \brief Adds \param Str to the hash and includes a NULL byte. void addString(StringRef Str); @@ -122,6 +133,13 @@ private: /// \brief Hashes the attributes in \param Attrs in order. void hashAttributes(const DIEAttrs &Attrs, dwarf::Tag Tag); + /// \brief Hashes the data in a block like DIEValue, e.g. DW_FORM_block or + /// DW_FORM_exprloc. + void hashBlockData(const SmallVectorImpl<DIEValue *> &Values); + + /// \brief Hashes the contents pointed to in the .debug_loc section. + void hashLocList(const DIELocList &LocList); + /// \brief Hashes an individual attribute. void hashAttribute(AttrEntry Attr, dwarf::Tag Tag); @@ -136,12 +154,16 @@ private: StringRef Name); /// \brief Hashes a reference to a previously referenced type DIE. - void hashRepeatedTypeReference(dwarf::Attribute Attribute, unsigned DieNumber); + void hashRepeatedTypeReference(dwarf::Attribute Attribute, + unsigned DieNumber); void hashNestedType(const DIE &Die, StringRef Name); private: MD5 Hash; + AsmPrinter *AP; DenseMap<const DIE *, unsigned> Numbering; }; } + +#endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp new file mode 100644 index 0000000..a66d08e --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp @@ -0,0 +1,206 @@ +//===-- llvm/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "DbgValueHistoryCalculator.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include <algorithm> +#include <map> +#include <set> + +#define DEBUG_TYPE "dwarfdebug" + +namespace llvm { + +// \brief If @MI is a DBG_VALUE with debug value described by a +// defined register, returns the number of this register. +// In the other case, returns 0. +static unsigned isDescribedByReg(const MachineInstr &MI) { + assert(MI.isDebugValue()); + assert(MI.getNumOperands() == 3); + // If location of variable is described using a register (directly or + // indirecltly), this register is always a first operand. + return MI.getOperand(0).isReg() ? MI.getOperand(0).getReg() : 0; +} + +void DbgValueHistoryMap::startInstrRange(const MDNode *Var, + const MachineInstr &MI) { + // Instruction range should start with a DBG_VALUE instruction for the + // variable. + assert(MI.isDebugValue() && MI.getDebugVariable() == Var); + auto &Ranges = VarInstrRanges[Var]; + if (!Ranges.empty() && Ranges.back().second == nullptr && + Ranges.back().first->isIdenticalTo(&MI)) { + DEBUG(dbgs() << "Coalescing identical DBG_VALUE entries:\n" + << "\t" << Ranges.back().first << "\t" << MI << "\n"); + return; + } + Ranges.push_back(std::make_pair(&MI, nullptr)); +} + +void DbgValueHistoryMap::endInstrRange(const MDNode *Var, + const MachineInstr &MI) { + auto &Ranges = VarInstrRanges[Var]; + // Verify that the current instruction range is not yet closed. + assert(!Ranges.empty() && Ranges.back().second == nullptr); + // For now, instruction ranges are not allowed to cross basic block + // boundaries. + assert(Ranges.back().first->getParent() == MI.getParent()); + Ranges.back().second = &MI; +} + +unsigned DbgValueHistoryMap::getRegisterForVar(const MDNode *Var) const { + const auto &I = VarInstrRanges.find(Var); + if (I == VarInstrRanges.end()) + return 0; + const auto &Ranges = I->second; + if (Ranges.empty() || Ranges.back().second != nullptr) + return 0; + return isDescribedByReg(*Ranges.back().first); +} + +namespace { +// Maps physreg numbers to the variables they describe. +typedef std::map<unsigned, SmallVector<const MDNode *, 1>> RegDescribedVarsMap; +} + +// \brief Claim that @Var is not described by @RegNo anymore. +static void dropRegDescribedVar(RegDescribedVarsMap &RegVars, + unsigned RegNo, const MDNode *Var) { + const auto &I = RegVars.find(RegNo); + assert(RegNo != 0U && I != RegVars.end()); + auto &VarSet = I->second; + const auto &VarPos = std::find(VarSet.begin(), VarSet.end(), Var); + assert(VarPos != VarSet.end()); + VarSet.erase(VarPos); + // Don't keep empty sets in a map to keep it as small as possible. + if (VarSet.empty()) + RegVars.erase(I); +} + +// \brief Claim that @Var is now described by @RegNo. +static void addRegDescribedVar(RegDescribedVarsMap &RegVars, + unsigned RegNo, const MDNode *Var) { + assert(RegNo != 0U); + auto &VarSet = RegVars[RegNo]; + assert(std::find(VarSet.begin(), VarSet.end(), Var) == VarSet.end()); + VarSet.push_back(Var); +} + +// \brief Terminate the location range for variables described by register +// @RegNo by inserting @ClobberingInstr to their history. +static void clobberRegisterUses(RegDescribedVarsMap &RegVars, unsigned RegNo, + DbgValueHistoryMap &HistMap, + const MachineInstr &ClobberingInstr) { + const auto &I = RegVars.find(RegNo); + if (I == RegVars.end()) + return; + // Iterate over all variables described by this register and add this + // instruction to their history, clobbering it. + for (const auto &Var : I->second) + HistMap.endInstrRange(Var, ClobberingInstr); + RegVars.erase(I); +} + +// \brief Collect all registers clobbered by @MI and insert them to @Regs. +static void collectClobberedRegisters(const MachineInstr &MI, + const TargetRegisterInfo *TRI, + std::set<unsigned> &Regs) { + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isReg() || !MO.isDef() || !MO.getReg()) + continue; + for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI) + Regs.insert(*AI); + } +} + +// \brief Returns the first instruction in @MBB which corresponds to +// the function epilogue, or nullptr if @MBB doesn't contain an epilogue. +static const MachineInstr *getFirstEpilogueInst(const MachineBasicBlock &MBB) { + auto LastMI = MBB.getLastNonDebugInstr(); + if (LastMI == MBB.end() || !LastMI->isReturn()) + return nullptr; + // Assume that epilogue starts with instruction having the same debug location + // as the return instruction. + DebugLoc LastLoc = LastMI->getDebugLoc(); + auto Res = LastMI; + for (MachineBasicBlock::const_reverse_iterator I(std::next(LastMI)); I != MBB.rend(); + ++I) { + if (I->getDebugLoc() != LastLoc) + return Res; + Res = std::prev(I.base()); + } + // If all instructions have the same debug location, assume whole MBB is + // an epilogue. + return MBB.begin(); +} + +// \brief Collect registers that are modified in the function body (their +// contents is changed only in the prologue and epilogue). +static void collectChangingRegs(const MachineFunction *MF, + const TargetRegisterInfo *TRI, + std::set<unsigned> &Regs) { + for (const auto &MBB : *MF) { + auto FirstEpilogueInst = getFirstEpilogueInst(MBB); + bool IsInEpilogue = false; + for (const auto &MI : MBB) { + IsInEpilogue |= &MI == FirstEpilogueInst; + if (!MI.getFlag(MachineInstr::FrameSetup) && !IsInEpilogue) + collectClobberedRegisters(MI, TRI, Regs); + } + } +} + +void calculateDbgValueHistory(const MachineFunction *MF, + const TargetRegisterInfo *TRI, + DbgValueHistoryMap &Result) { + std::set<unsigned> ChangingRegs; + collectChangingRegs(MF, TRI, ChangingRegs); + + RegDescribedVarsMap RegVars; + for (const auto &MBB : *MF) { + for (const auto &MI : MBB) { + if (!MI.isDebugValue()) { + // Not a DBG_VALUE instruction. It may clobber registers which describe + // some variables. + std::set<unsigned> MIClobberedRegs; + collectClobberedRegisters(MI, TRI, MIClobberedRegs); + for (unsigned RegNo : MIClobberedRegs) { + if (ChangingRegs.count(RegNo)) + clobberRegisterUses(RegVars, RegNo, Result, MI); + } + continue; + } + + assert(MI.getNumOperands() > 1 && "Invalid DBG_VALUE instruction!"); + const MDNode *Var = MI.getDebugVariable(); + + if (unsigned PrevReg = Result.getRegisterForVar(Var)) + dropRegDescribedVar(RegVars, PrevReg, Var); + + Result.startInstrRange(Var, MI); + + if (unsigned NewReg = isDescribedByReg(MI)) + addRegDescribedVar(RegVars, NewReg, Var); + } + + // Make sure locations for register-described variables are valid only + // until the end of the basic block (unless it's the last basic block, in + // which case let their liveness run off to the end of the function). + if (!MBB.empty() && &MBB != &MF->back()) { + for (unsigned RegNo : ChangingRegs) + clobberRegisterUses(RegVars, RegNo, Result, MBB.back()); + } + } +} + +} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h new file mode 100644 index 0000000..b9177f0 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h @@ -0,0 +1,54 @@ +//===-- llvm/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h ----*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef CODEGEN_ASMPRINTER_DBGVALUEHISTORYCALCULATOR_H_ +#define CODEGEN_ASMPRINTER_DBGVALUEHISTORYCALCULATOR_H_ + +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SmallVector.h" + +namespace llvm { + +class MachineFunction; +class MachineInstr; +class MDNode; +class TargetRegisterInfo; + +// For each user variable, keep a list of instruction ranges where this variable +// is accessible. The variables are listed in order of appearance. +class DbgValueHistoryMap { + // Each instruction range starts with a DBG_VALUE instruction, specifying the + // location of a variable, which is assumed to be valid until the end of the + // range. If end is not specified, location is valid until the start + // instruction of the next instruction range, or until the end of the + // function. + typedef std::pair<const MachineInstr *, const MachineInstr *> InstrRange; + typedef SmallVector<InstrRange, 4> InstrRanges; + typedef MapVector<const MDNode *, InstrRanges> InstrRangesMap; + InstrRangesMap VarInstrRanges; + +public: + void startInstrRange(const MDNode *Var, const MachineInstr &MI); + void endInstrRange(const MDNode *Var, const MachineInstr &MI); + // Returns register currently describing @Var. If @Var is currently + // unaccessible or is not described by a register, returns 0. + unsigned getRegisterForVar(const MDNode *Var) const; + + bool empty() const { return VarInstrRanges.empty(); } + void clear() { VarInstrRanges.clear(); } + InstrRangesMap::const_iterator begin() const { return VarInstrRanges.begin(); } + InstrRangesMap::const_iterator end() const { return VarInstrRanges.end(); } +}; + +void calculateDbgValueHistory(const MachineFunction *MF, + const TargetRegisterInfo *TRI, + DbgValueHistoryMap &Result); +} + +#endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h new file mode 100644 index 0000000..3beb799 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h @@ -0,0 +1,123 @@ +//===-- llvm/CodeGen/DebugLocEntry.h - Entry in debug_loc list -*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef CODEGEN_ASMPRINTER_DEBUGLOCENTRY_H__ +#define CODEGEN_ASMPRINTER_DEBUGLOCENTRY_H__ +#include "llvm/IR/Constants.h" +#include "llvm/MC/MachineLocation.h" +#include "llvm/MC/MCSymbol.h" + +namespace llvm { +class DwarfCompileUnit; +class MDNode; +/// \brief This struct describes location entries emitted in the .debug_loc +/// section. +class DebugLocEntry { + // Begin and end symbols for the address range that this location is valid. + const MCSymbol *Begin; + const MCSymbol *End; + +public: + /// A single location or constant. + struct Value { + Value(const MDNode *Var, int64_t i) + : Variable(Var), EntryKind(E_Integer) { + Constant.Int = i; + } + Value(const MDNode *Var, const ConstantFP *CFP) + : Variable(Var), EntryKind(E_ConstantFP) { + Constant.CFP = CFP; + } + Value(const MDNode *Var, const ConstantInt *CIP) + : Variable(Var), EntryKind(E_ConstantInt) { + Constant.CIP = CIP; + } + Value(const MDNode *Var, MachineLocation Loc) + : Variable(Var), EntryKind(E_Location), Loc(Loc) { + } + + // The variable to which this location entry corresponds. + const MDNode *Variable; + + // Type of entry that this represents. + enum EntryType { E_Location, E_Integer, E_ConstantFP, E_ConstantInt }; + enum EntryType EntryKind; + + // Either a constant, + union { + int64_t Int; + const ConstantFP *CFP; + const ConstantInt *CIP; + } Constant; + + // Or a location in the machine frame. + MachineLocation Loc; + + bool operator==(const Value &other) const { + if (EntryKind != other.EntryKind) + return false; + + switch (EntryKind) { + case E_Location: + return Loc == other.Loc; + case E_Integer: + return Constant.Int == other.Constant.Int; + case E_ConstantFP: + return Constant.CFP == other.Constant.CFP; + case E_ConstantInt: + return Constant.CIP == other.Constant.CIP; + } + llvm_unreachable("unhandled EntryKind"); + } + + bool isLocation() const { return EntryKind == E_Location; } + bool isInt() const { return EntryKind == E_Integer; } + bool isConstantFP() const { return EntryKind == E_ConstantFP; } + bool isConstantInt() const { return EntryKind == E_ConstantInt; } + int64_t getInt() const { return Constant.Int; } + const ConstantFP *getConstantFP() const { return Constant.CFP; } + const ConstantInt *getConstantInt() const { return Constant.CIP; } + MachineLocation getLoc() const { return Loc; } + const MDNode *getVariable() const { return Variable; } + }; +private: + /// A list of locations/constants belonging to this entry. + SmallVector<Value, 1> Values; + + /// The compile unit that this location entry is referenced by. + const DwarfCompileUnit *Unit; + +public: + DebugLocEntry() : Begin(nullptr), End(nullptr), Unit(nullptr) {} + DebugLocEntry(const MCSymbol *B, const MCSymbol *E, + Value Val, const DwarfCompileUnit *U) + : Begin(B), End(E), Unit(U) { + Values.push_back(std::move(Val)); + } + + /// \brief Attempt to merge this DebugLocEntry with Next and return + /// true if the merge was successful. Entries can be merged if they + /// share the same Loc/Constant and if Next immediately follows this + /// Entry. + bool Merge(const DebugLocEntry &Next) { + if ((End == Next.Begin && Values == Next.Values)) { + End = Next.End; + return true; + } + return false; + } + const MCSymbol *getBeginSym() const { return Begin; } + const MCSymbol *getEndSym() const { return End; } + const DwarfCompileUnit *getCU() const { return Unit; } + const ArrayRef<Value> getValues() const { return Values; } + void addValue(Value Val) { Values.push_back(Val); } +}; + +} +#endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocList.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocList.h new file mode 100644 index 0000000..7a51c7b --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocList.h @@ -0,0 +1,23 @@ +//===--- lib/CodeGen/DebugLocList.h - DWARF debug_loc list ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef CODEGEN_ASMPRINTER_DEBUGLOCLIST_H__ +#define CODEGEN_ASMPRINTER_DEBUGLOCLIST_H__ + +#include "llvm/MC/MCSymbol.h" +#include "llvm/ADT/SmallVector.h" +#include "DebugLocEntry.h" + +namespace llvm { +struct DebugLocList { + MCSymbol *Label; + SmallVector<DebugLocEntry, 4> List; +}; +} +#endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp index 689aeda..e9527c4 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp @@ -29,14 +29,15 @@ DwarfAccelTable::DwarfAccelTable(ArrayRef<DwarfAccelTable::Atom> atomList) : Header(8 + (atomList.size() * 4)), HeaderData(atomList), Entries(Allocator) {} -DwarfAccelTable::~DwarfAccelTable() {} - -void DwarfAccelTable::AddName(StringRef Name, DIE *die, char Flags) { +void DwarfAccelTable::AddName(StringRef Name, MCSymbol *StrSym, const DIE *die, + char Flags) { assert(Data.empty() && "Already finalized!"); // If the string is in the list already then add this die to the list // otherwise add a new one. DataArray &DIEs = Entries[Name]; - DIEs.push_back(new (Allocator) HashDataContents(die, Flags)); + assert(!DIEs.StrSym || DIEs.StrSym == StrSym); + DIEs.StrSym = StrSym; + DIEs.Values.push_back(new (Allocator) HashDataContents(die, Flags)); } void DwarfAccelTable::ComputeBucketCount(void) { @@ -72,9 +73,10 @@ void DwarfAccelTable::FinalizeTable(AsmPrinter *Asm, StringRef Prefix) { EI != EE; ++EI) { // Unique the entries. - std::stable_sort(EI->second.begin(), EI->second.end(), compareDIEs); - EI->second.erase(std::unique(EI->second.begin(), EI->second.end()), - EI->second.end()); + std::stable_sort(EI->second.Values.begin(), EI->second.Values.end(), compareDIEs); + EI->second.Values.erase( + std::unique(EI->second.Values.begin(), EI->second.Values.end()), + EI->second.Values.end()); HashData *Entry = new (Allocator) HashData(EI->getKey(), EI->second); Data.push_back(Entry); @@ -172,7 +174,7 @@ void DwarfAccelTable::EmitOffsets(AsmPrinter *Asm, MCSymbol *SecBegin) { // Walk through the buckets and emit the full data for each element in // the bucket. For the string case emit the dies and the various offsets. // Terminate each HashData bucket with 0. -void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfUnits *D) { +void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfFile *D) { uint64_t PrevHash = UINT64_MAX; for (size_t i = 0, e = Buckets.size(); i < e; ++i) { for (HashList::const_iterator HI = Buckets[i].begin(), @@ -181,21 +183,18 @@ void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfUnits *D) { // Remember to emit the label for our offset. Asm->OutStreamer.EmitLabel((*HI)->Sym); Asm->OutStreamer.AddComment((*HI)->Str); - Asm->EmitSectionOffset(D->getStringPoolEntry((*HI)->Str), - D->getStringPoolSym()); + Asm->EmitSectionOffset((*HI)->Data.StrSym, + D->getStringPool().getSectionSymbol()); Asm->OutStreamer.AddComment("Num DIEs"); - Asm->EmitInt32((*HI)->Data.size()); - for (ArrayRef<HashDataContents *>::const_iterator - DI = (*HI)->Data.begin(), - DE = (*HI)->Data.end(); - DI != DE; ++DI) { + Asm->EmitInt32((*HI)->Data.Values.size()); + for (HashDataContents *HD : (*HI)->Data.Values) { // Emit the DIE offset - Asm->EmitInt32((*DI)->Die->getOffset()); + Asm->EmitInt32(HD->Die->getOffset()); // If we have multiple Atoms emit that info too. // FIXME: A bit of a hack, we either emit only one atom or all info. if (HeaderData.Atoms.size() > 1) { - Asm->EmitInt16((*DI)->Die->getTag()); - Asm->EmitInt8((*DI)->Flags); + Asm->EmitInt16(HD->Die->getTag()); + Asm->EmitInt8(HD->Flags); } } // Emit a 0 to terminate the data unless we have a hash collision. @@ -207,7 +206,7 @@ void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfUnits *D) { } // Emit the entire data structure to the output file. -void DwarfAccelTable::Emit(AsmPrinter *Asm, MCSymbol *SecBegin, DwarfUnits *D) { +void DwarfAccelTable::Emit(AsmPrinter *Asm, MCSymbol *SecBegin, DwarfFile *D) { // Emit the header. EmitHeader(Asm); @@ -235,10 +234,8 @@ void DwarfAccelTable::print(raw_ostream &O) { EE = Entries.end(); EI != EE; ++EI) { O << "Name: " << EI->getKeyData() << "\n"; - for (DataArray::const_iterator DI = EI->second.begin(), - DE = EI->second.end(); - DI != DE; ++DI) - (*DI)->print(O); + for (HashDataContents *HD : EI->second.Values) + HD->print(O); } O << "Buckets and Hashes: \n"; diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h index 7627313..a3cc95f 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h @@ -18,13 +18,13 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringMap.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/FormattedStream.h" -#include <map> #include <vector> // The dwarf accelerator tables are an indirect hash table optimized @@ -62,8 +62,7 @@ namespace llvm { class AsmPrinter; -class DIE; -class DwarfUnits; +class DwarfFile; class DwarfAccelTable { @@ -127,7 +126,8 @@ public: uint16_t type; // enum AtomType uint16_t form; // DWARF DW_FORM_ defines - Atom(uint16_t type, uint16_t form) : type(type), form(form) {} + LLVM_CONSTEXPR Atom(uint16_t type, uint16_t form) + : type(type), form(form) {} #ifndef NDEBUG void print(raw_ostream &O) { O << "Type: " << dwarf::AtomTypeString(type) << "\n" @@ -165,10 +165,10 @@ private: // HashData[hash_data_count] public: struct HashDataContents { - DIE *Die; // Offsets + const DIE *Die; // Offsets char Flags; // Specific flags to output - HashDataContents(DIE *D, char Flags) : Die(D), Flags(Flags) {} + HashDataContents(const DIE *D, char Flags) : Die(D), Flags(Flags) {} #ifndef NDEBUG void print(raw_ostream &O) const { O << " Offset: " << Die->getOffset() << "\n"; @@ -179,12 +179,19 @@ public: }; private: + // String Data + struct DataArray { + MCSymbol *StrSym; + std::vector<HashDataContents *> Values; + DataArray() : StrSym(nullptr) {} + }; + friend struct HashData; struct HashData { StringRef Str; uint32_t HashValue; MCSymbol *Sym; - ArrayRef<HashDataContents *> Data; // offsets - HashData(StringRef S, ArrayRef<HashDataContents *> Data) + DwarfAccelTable::DataArray &Data; // offsets + HashData(StringRef S, DwarfAccelTable::DataArray &Data) : Str(S), Data(Data) { HashValue = DwarfAccelTable::HashDJB(S); } @@ -198,10 +205,10 @@ private: else O << "<none>"; O << "\n"; - for (size_t i = 0; i < Data.size(); i++) { - O << " Offset: " << Data[i]->Die->getOffset() << "\n"; - O << " Tag: " << dwarf::TagString(Data[i]->Die->getTag()) << "\n"; - O << " Flags: " << Data[i]->Flags << "\n"; + for (HashDataContents *C : Data.Values) { + O << " Offset: " << C->Die->getOffset() << "\n"; + O << " Tag: " << dwarf::TagString(C->Die->getTag()) << "\n"; + O << " Flags: " << C->Flags << "\n"; } } void dump() { print(dbgs()); } @@ -216,7 +223,7 @@ private: void EmitBuckets(AsmPrinter *); void EmitHashes(AsmPrinter *); void EmitOffsets(AsmPrinter *, MCSymbol *); - void EmitData(AsmPrinter *, DwarfUnits *D); + void EmitData(AsmPrinter *, DwarfFile *D); // Allocator for HashData and HashDataContents. BumpPtrAllocator Allocator; @@ -226,8 +233,6 @@ private: TableHeaderData HeaderData; std::vector<HashData *> Data; - // String Data - typedef std::vector<HashDataContents *> DataArray; typedef StringMap<DataArray, BumpPtrAllocator &> StringEntries; StringEntries Entries; @@ -240,10 +245,10 @@ private: // Public Implementation public: DwarfAccelTable(ArrayRef<DwarfAccelTable::Atom>); - ~DwarfAccelTable(); - void AddName(StringRef, DIE *, char = 0); + void AddName(StringRef Name, MCSymbol *StrSym, const DIE *Die, + char Flags = 0); void FinalizeTable(AsmPrinter *, StringRef); - void Emit(AsmPrinter *, MCSymbol *, DwarfUnits *); + void Emit(AsmPrinter *, MCSymbol *, DwarfFile *); #ifndef NDEBUG void print(raw_ostream &O); void dump() { print(dbgs()); } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp index 8918f3d..74215aa 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp @@ -20,6 +20,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/Mangler.h" #include "llvm/IR/Module.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" @@ -31,7 +32,6 @@ #include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" -#include "llvm/Target/Mangler.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" @@ -40,15 +40,14 @@ using namespace llvm; DwarfCFIException::DwarfCFIException(AsmPrinter *A) - : DwarfException(A), - shouldEmitPersonality(false), shouldEmitLSDA(false), shouldEmitMoves(false), - moveTypeModule(AsmPrinter::CFI_M_None) {} + : EHStreamer(A), shouldEmitPersonality(false), shouldEmitLSDA(false), + shouldEmitMoves(false), moveTypeModule(AsmPrinter::CFI_M_None) {} DwarfCFIException::~DwarfCFIException() {} -/// EndModule - Emit all exception information that should come after the +/// endModule - Emit all exception information that should come after the /// content. -void DwarfCFIException::EndModule() { +void DwarfCFIException::endModule() { if (moveTypeModule == AsmPrinter::CFI_M_Debug) Asm->OutStreamer.EmitCFISections(false, true); @@ -59,32 +58,22 @@ void DwarfCFIException::EndModule() { unsigned PerEncoding = TLOF.getPersonalityEncoding(); - if ((PerEncoding & 0x70) != dwarf::DW_EH_PE_pcrel) + if ((PerEncoding & 0x80) != dwarf::DW_EH_PE_indirect) return; // Emit references to all used personality functions - bool AtLeastOne = false; const std::vector<const Function*> &Personalities = MMI->getPersonalities(); for (size_t i = 0, e = Personalities.size(); i != e; ++i) { if (!Personalities[i]) continue; MCSymbol *Sym = Asm->getSymbol(Personalities[i]); TLOF.emitPersonalityValue(Asm->OutStreamer, Asm->TM, Sym); - AtLeastOne = true; - } - - if (AtLeastOne && !TLOF.isFunctionEHFrameSymbolPrivate()) { - // This is a temporary hack to keep sections in the same order they - // were before. This lets us produce bit identical outputs while - // transitioning to CFI. - Asm->OutStreamer.SwitchSection( - const_cast<TargetLoweringObjectFile&>(TLOF).getEHFrameSection()); } } -/// BeginFunction - Gather pre-function exception information. Assumes it's +/// beginFunction - Gather pre-function exception information. Assumes it's /// being emitted immediately after the function entry point. -void DwarfCFIException::BeginFunction(const MachineFunction *MF) { +void DwarfCFIException::beginFunction(const MachineFunction *MF) { shouldEmitMoves = shouldEmitPersonality = shouldEmitLSDA = false; // If any landing pads survive, we need an EH table. @@ -113,18 +102,27 @@ void DwarfCFIException::BeginFunction(const MachineFunction *MF) { if (!shouldEmitPersonality && !shouldEmitMoves) return; - Asm->OutStreamer.EmitCFIStartProc(); + Asm->OutStreamer.EmitCFIStartProc(/*IsSimple=*/false); // Indicate personality routine, if any. if (!shouldEmitPersonality) return; - const MCSymbol *Sym = TLOF.getCFIPersonalitySymbol(Per, Asm->Mang, MMI); + const MCSymbol *Sym = + TLOF.getCFIPersonalitySymbol(Per, *Asm->Mang, Asm->TM, MMI); Asm->OutStreamer.EmitCFIPersonality(Sym, PerEncoding); - Asm->OutStreamer.EmitDebugLabel - (Asm->GetTempSymbol("eh_func_begin", - Asm->getFunctionNumber())); + MCSymbol *EHBegin = + Asm->GetTempSymbol("eh_func_begin", Asm->getFunctionNumber()); + if (Asm->MAI->useAssignmentForEHBegin()) { + MCContext &Ctx = Asm->OutContext; + MCSymbol *CurPos = Ctx.CreateTempSymbol(); + Asm->OutStreamer.EmitLabel(CurPos); + Asm->OutStreamer.EmitAssignment(EHBegin, + MCSymbolRefExpr::Create(CurPos, Ctx)); + } else { + Asm->OutStreamer.EmitLabel(EHBegin); + } // Provide LSDA information. if (!shouldEmitLSDA) @@ -135,9 +133,9 @@ void DwarfCFIException::BeginFunction(const MachineFunction *MF) { LSDAEncoding); } -/// EndFunction - Gather and emit post-function exception information. +/// endFunction - Gather and emit post-function exception information. /// -void DwarfCFIException::EndFunction() { +void DwarfCFIException::endFunction(const MachineFunction *) { if (!shouldEmitPersonality && !shouldEmitMoves) return; @@ -152,5 +150,5 @@ void DwarfCFIException::EndFunction() { // Map all labels and get rid of any dead landing pads. MMI->TidyLandingPads(); - EmitExceptionTable(); + emitExceptionTable(); } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h deleted file mode 100644 index b9e941e..0000000 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ /dev/null @@ -1,419 +0,0 @@ -//===-- llvm/CodeGen/DwarfCompileUnit.h - Dwarf Compile Unit ---*- C++ -*--===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains support for writing dwarf compile unit. -// -//===----------------------------------------------------------------------===// - -#ifndef CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H -#define CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H - -#include "DIE.h" -#include "DwarfDebug.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/Optional.h" -#include "llvm/ADT/OwningPtr.h" -#include "llvm/ADT/StringMap.h" -#include "llvm/DebugInfo.h" -#include "llvm/MC/MCExpr.h" - -namespace llvm { - -class MachineLocation; -class MachineOperand; -class ConstantInt; -class ConstantFP; -class DbgVariable; - -//===----------------------------------------------------------------------===// -/// CompileUnit - This dwarf writer support class manages information associated -/// with a source file. -class CompileUnit { - /// UniqueID - a numeric ID unique among all CUs in the module - /// - unsigned UniqueID; - - /// Node - MDNode for the compile unit. - DICompileUnit Node; - - /// CUDie - Compile unit debug information entry. - /// - const OwningPtr<DIE> CUDie; - - /// Asm - Target of Dwarf emission. - AsmPrinter *Asm; - - // Holders for some common dwarf information. - DwarfDebug *DD; - DwarfUnits *DU; - - /// IndexTyDie - An anonymous type for index type. Owned by CUDie. - DIE *IndexTyDie; - - /// MDNodeToDieMap - Tracks the mapping of unit level debug information - /// variables to debug information entries. - DenseMap<const MDNode *, DIE *> MDNodeToDieMap; - - /// MDNodeToDIEEntryMap - Tracks the mapping of unit level debug information - /// descriptors to debug information entries using a DIEEntry proxy. - DenseMap<const MDNode *, DIEEntry *> MDNodeToDIEEntryMap; - - /// GlobalNames - A map of globally visible named entities for this unit. - /// - StringMap<DIE *> GlobalNames; - - /// GlobalTypes - A map of globally visible types for this unit. - /// - StringMap<DIE *> GlobalTypes; - - /// AccelNames - A map of names for the name accelerator table. - /// - StringMap<std::vector<DIE *> > AccelNames; - StringMap<std::vector<DIE *> > AccelObjC; - StringMap<std::vector<DIE *> > AccelNamespace; - StringMap<std::vector<std::pair<DIE *, unsigned> > > AccelTypes; - - /// DIEBlocks - A list of all the DIEBlocks in use. - std::vector<DIEBlock *> DIEBlocks; - - /// ContainingTypeMap - This map is used to keep track of subprogram DIEs that - /// need DW_AT_containing_type attribute. This attribute points to a DIE that - /// corresponds to the MDNode mapped with the subprogram DIE. - DenseMap<DIE *, const MDNode *> ContainingTypeMap; - - // DIEValueAllocator - All DIEValues are allocated through this allocator. - BumpPtrAllocator DIEValueAllocator; - - // DIEIntegerOne - A preallocated DIEValue because 1 is used frequently. - DIEInteger *DIEIntegerOne; - -public: - CompileUnit(unsigned UID, DIE *D, DICompileUnit CU, AsmPrinter *A, - DwarfDebug *DW, DwarfUnits *DWU); - ~CompileUnit(); - - // Accessors. - unsigned getUniqueID() const { return UniqueID; } - uint16_t getLanguage() const { return Node.getLanguage(); } - DICompileUnit getNode() const { return Node; } - DIE *getCUDie() const { return CUDie.get(); } - const StringMap<DIE *> &getGlobalNames() const { return GlobalNames; } - const StringMap<DIE *> &getGlobalTypes() const { return GlobalTypes; } - - const StringMap<std::vector<DIE *> > &getAccelNames() const { - return AccelNames; - } - const StringMap<std::vector<DIE *> > &getAccelObjC() const { - return AccelObjC; - } - const StringMap<std::vector<DIE *> > &getAccelNamespace() const { - return AccelNamespace; - } - const StringMap<std::vector<std::pair<DIE *, unsigned> > > & - getAccelTypes() const { - return AccelTypes; - } - - unsigned getDebugInfoOffset() const { return DebugInfoOffset; } - void setDebugInfoOffset(unsigned DbgInfoOff) { DebugInfoOffset = DbgInfoOff; } - - /// hasContent - Return true if this compile unit has something to write out. - /// - bool hasContent() const { return !CUDie->getChildren().empty(); } - - /// getParentContextString - Get a string containing the language specific - /// context for a global name. - std::string getParentContextString(DIScope Context) const; - - /// addGlobalName - Add a new global entity to the compile unit. - /// - void addGlobalName(StringRef Name, DIE *Die, DIScope Context); - - /// addGlobalType - Add a new global type to the compile unit. - /// - void addGlobalType(DIType Ty); - - /// addPubTypes - Add a set of types from the subprogram to the global types. - void addPubTypes(DISubprogram SP); - - /// addAccelName - Add a new name to the name accelerator table. - void addAccelName(StringRef Name, DIE *Die); - - /// addAccelObjC - Add a new name to the ObjC accelerator table. - void addAccelObjC(StringRef Name, DIE *Die); - - /// addAccelNamespace - Add a new name to the namespace accelerator table. - void addAccelNamespace(StringRef Name, DIE *Die); - - /// addAccelType - Add a new type to the type accelerator table. - void addAccelType(StringRef Name, std::pair<DIE *, unsigned> Die); - - /// getDIE - Returns the debug information entry map slot for the - /// specified debug variable. We delegate the request to DwarfDebug - /// when the MDNode can be part of the type system, since DIEs for - /// the type system can be shared across CUs and the mappings are - /// kept in DwarfDebug. - DIE *getDIE(DIDescriptor D) const; - - DIEBlock *getDIEBlock() { return new (DIEValueAllocator) DIEBlock(); } - - /// insertDIE - Insert DIE into the map. We delegate the request to DwarfDebug - /// when the MDNode can be part of the type system, since DIEs for - /// the type system can be shared across CUs and the mappings are - /// kept in DwarfDebug. - void insertDIE(DIDescriptor Desc, DIE *D); - - /// addDie - Adds or interns the DIE to the compile unit. - /// - void addDie(DIE *Buffer) { CUDie->addChild(Buffer); } - - /// addFlag - Add a flag that is true to the DIE. - void addFlag(DIE *Die, dwarf::Attribute Attribute); - - /// addUInt - Add an unsigned integer attribute data and value. - /// - void addUInt(DIE *Die, dwarf::Attribute Attribute, Optional<dwarf::Form> Form, - uint64_t Integer); - - void addUInt(DIEBlock *Block, dwarf::Form Form, uint64_t Integer); - - /// addSInt - Add an signed integer attribute data and value. - /// - void addSInt(DIE *Die, dwarf::Attribute Attribute, Optional<dwarf::Form> Form, - int64_t Integer); - - void addSInt(DIEBlock *Die, Optional<dwarf::Form> Form, int64_t Integer); - - /// addString - Add a string attribute data and value. - /// - void addString(DIE *Die, dwarf::Attribute Attribute, const StringRef Str); - - /// addLocalString - Add a string attribute data and value. - /// - void addLocalString(DIE *Die, dwarf::Attribute Attribute, const StringRef Str); - - /// addExpr - Add a Dwarf expression attribute data and value. - /// - void addExpr(DIEBlock *Die, dwarf::Form Form, const MCExpr *Expr); - - /// addLabel - Add a Dwarf label attribute data and value. - /// - void addLabel(DIE *Die, dwarf::Attribute Attribute, dwarf::Form Form, - const MCSymbol *Label); - - void addLabel(DIEBlock *Die, dwarf::Form Form, const MCSymbol *Label); - - /// addSectionLabel - Add a Dwarf section label attribute data and value. - /// - void addSectionLabel(DIE *Die, dwarf::Attribute Attribute, const MCSymbol *Label); - - /// addSectionOffset - Add an offset into a section attribute data and value. - /// - void addSectionOffset(DIE *Die, dwarf::Attribute Attribute, uint64_t Integer); - - /// addLabelAddress - Add a dwarf label attribute data and value using - /// either DW_FORM_addr or DW_FORM_GNU_addr_index. - /// - void addLabelAddress(DIE *Die, dwarf::Attribute Attribute, MCSymbol *Label); - - /// addOpAddress - Add a dwarf op address data and value using the - /// form given and an op of either DW_FORM_addr or DW_FORM_GNU_addr_index. - /// - void addOpAddress(DIEBlock *Die, const MCSymbol *Label); - - /// addSectionDelta - Add a label delta attribute data and value. - void addSectionDelta(DIE *Die, dwarf::Attribute Attribute, const MCSymbol *Hi, - const MCSymbol *Lo); - - /// addDIEEntry - Add a DIE attribute data and value. - /// - void addDIEEntry(DIE *Die, dwarf::Attribute Attribute, DIE *Entry); - - /// addDIEEntry - Add a DIE attribute data and value. - /// - void addDIEEntry(DIE *Die, dwarf::Attribute Attribute, DIEEntry *Entry); - - /// addBlock - Add block data. - /// - void addBlock(DIE *Die, dwarf::Attribute Attribute, DIEBlock *Block); - - /// addSourceLine - Add location information to specified debug information - /// entry. - void addSourceLine(DIE *Die, DIVariable V); - void addSourceLine(DIE *Die, DIGlobalVariable G); - void addSourceLine(DIE *Die, DISubprogram SP); - void addSourceLine(DIE *Die, DIType Ty); - void addSourceLine(DIE *Die, DINameSpace NS); - void addSourceLine(DIE *Die, DIObjCProperty Ty); - - /// addAddress - Add an address attribute to a die based on the location - /// provided. - void addAddress(DIE *Die, dwarf::Attribute Attribute, const MachineLocation &Location, - bool Indirect = false); - - /// addConstantValue - Add constant value entry in variable DIE. - void addConstantValue(DIE *Die, const MachineOperand &MO, DIType Ty); - void addConstantValue(DIE *Die, const ConstantInt *CI, bool Unsigned); - void addConstantValue(DIE *Die, const APInt &Val, bool Unsigned); - - /// addConstantFPValue - Add constant value entry in variable DIE. - void addConstantFPValue(DIE *Die, const MachineOperand &MO); - void addConstantFPValue(DIE *Die, const ConstantFP *CFP); - - /// addTemplateParams - Add template parameters in buffer. - void addTemplateParams(DIE &Buffer, DIArray TParams); - - /// addRegisterOp - Add register operand. - void addRegisterOp(DIEBlock *TheDie, unsigned Reg); - - /// addRegisterOffset - Add register offset. - void addRegisterOffset(DIEBlock *TheDie, unsigned Reg, int64_t Offset); - - /// addComplexAddress - Start with the address based on the location provided, - /// and generate the DWARF information necessary to find the actual variable - /// (navigating the extra location information encoded in the type) based on - /// the starting location. Add the DWARF information to the die. - /// - void addComplexAddress(const DbgVariable &DV, DIE *Die, dwarf::Attribute Attribute, - const MachineLocation &Location); - - // FIXME: Should be reformulated in terms of addComplexAddress. - /// addBlockByrefAddress - Start with the address based on the location - /// provided, and generate the DWARF information necessary to find the - /// actual Block variable (navigating the Block struct) based on the - /// starting location. Add the DWARF information to the die. Obsolete, - /// please use addComplexAddress instead. - /// - void addBlockByrefAddress(const DbgVariable &DV, DIE *Die, dwarf::Attribute Attribute, - const MachineLocation &Location); - - /// addVariableAddress - Add DW_AT_location attribute for a - /// DbgVariable based on provided MachineLocation. - void addVariableAddress(const DbgVariable &DV, DIE *Die, - MachineLocation Location); - - /// addType - Add a new type attribute to the specified entity. This takes - /// and attribute parameter because DW_AT_friend attributes are also - /// type references. - void addType(DIE *Entity, DIType Ty, dwarf::Attribute Attribute = dwarf::DW_AT_type); - - /// getOrCreateNameSpace - Create a DIE for DINameSpace. - DIE *getOrCreateNameSpace(DINameSpace NS); - - /// getOrCreateSubprogramDIE - Create new DIE using SP. - DIE *getOrCreateSubprogramDIE(DISubprogram SP); - - /// getOrCreateTypeDIE - Find existing DIE or create new DIE for the - /// given DIType. - DIE *getOrCreateTypeDIE(const MDNode *N); - - /// getOrCreateContextDIE - Get context owner's DIE. - DIE *getOrCreateContextDIE(DIScope Context); - - /// createGlobalVariableDIE - create global variable DIE. - void createGlobalVariableDIE(DIGlobalVariable GV); - - /// constructContainingTypeDIEs - Construct DIEs for types that contain - /// vtables. - void constructContainingTypeDIEs(); - - /// constructVariableDIE - Construct a DIE for the given DbgVariable. - DIE *constructVariableDIE(DbgVariable &DV, bool isScopeAbstract); - - /// Create a DIE with the given Tag, add the DIE to its parent, and - /// call insertDIE if MD is not null. - DIE *createAndAddDIE(unsigned Tag, DIE &Parent, DIDescriptor N = DIDescriptor()); - - /// Compute the size of a header for this unit, not including the initial - /// length field. - unsigned getHeaderSize() const { - return sizeof(int16_t) + // DWARF version number - sizeof(int32_t) + // Offset Into Abbrev. Section - sizeof(int8_t); // Pointer Size (in bytes) - } - - /// Emit the header for this unit, not including the initial length field. - void emitHeader(const MCSection *ASection, const MCSymbol *ASectionSym); - -private: - /// constructSubprogramArguments - Construct function argument DIEs. - void constructSubprogramArguments(DIE &Buffer, DIArray Args); - - /// constructTypeDIE - Construct basic type die from DIBasicType. - void constructTypeDIE(DIE &Buffer, DIBasicType BTy); - - /// constructTypeDIE - Construct derived type die from DIDerivedType. - void constructTypeDIE(DIE &Buffer, DIDerivedType DTy); - - /// constructTypeDIE - Construct type DIE from DICompositeType. - void constructTypeDIE(DIE &Buffer, DICompositeType CTy); - - /// constructSubrangeDIE - Construct subrange DIE from DISubrange. - void constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy); - - /// constructArrayTypeDIE - Construct array type DIE from DICompositeType. - void constructArrayTypeDIE(DIE &Buffer, DICompositeType CTy); - - /// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator. - void constructEnumTypeDIE(DIE &Buffer, DICompositeType CTy); - - /// constructMemberDIE - Construct member DIE from DIDerivedType. - void constructMemberDIE(DIE &Buffer, DIDerivedType DT); - - /// constructTemplateTypeParameterDIE - Construct new DIE for the given - /// DITemplateTypeParameter. - void constructTemplateTypeParameterDIE(DIE &Buffer, - DITemplateTypeParameter TP); - - /// constructTemplateValueParameterDIE - Construct new DIE for the given - /// DITemplateValueParameter. - void constructTemplateValueParameterDIE(DIE &Buffer, - DITemplateValueParameter TVP); - - /// getOrCreateStaticMemberDIE - Create new static data member DIE. - DIE *getOrCreateStaticMemberDIE(DIDerivedType DT); - - /// Offset of the CUDie from beginning of debug info section. - unsigned DebugInfoOffset; - - /// getLowerBoundDefault - Return the default lower bound for an array. If the - /// DWARF version doesn't handle the language, return -1. - int64_t getDefaultLowerBound() const; - - /// getDIEEntry - Returns the debug information entry for the specified - /// debug variable. - DIEEntry *getDIEEntry(const MDNode *N) const { - return MDNodeToDIEEntryMap.lookup(N); - } - - /// insertDIEEntry - Insert debug information entry into the map. - void insertDIEEntry(const MDNode *N, DIEEntry *E) { - MDNodeToDIEEntryMap.insert(std::make_pair(N, E)); - } - - // getIndexTyDie - Get an anonymous type for index type. - DIE *getIndexTyDie() { return IndexTyDie; } - - // setIndexTyDie - Set D as anonymous type for index which can be reused - // later. - void setIndexTyDie(DIE *D) { IndexTyDie = D; } - - /// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug - /// information entry. - DIEEntry *createDIEEntry(DIE *Entry); - - /// resolve - Look in the DwarfDebug map for the MDNode that - /// corresponds to the reference. - template <typename T> T resolve(DIRef<T> Ref) const { - return DD->resolve(Ref); - } -}; - -} // end llvm namespace -#endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index d922433..ac1c0ff 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -11,24 +11,24 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "dwarfdebug" +#include "ByteStreamer.h" #include "DwarfDebug.h" #include "DIE.h" #include "DIEHash.h" -#include "DwarfAccelTable.h" -#include "DwarfCompileUnit.h" +#include "DwarfUnit.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Triple.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/DIBuilder.h" -#include "llvm/DebugInfo.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DIBuilder.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" +#include "llvm/IR/ValueHandle.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" @@ -36,12 +36,13 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Dwarf.h" +#include "llvm/Support/Endian.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" +#include "llvm/Support/LEB128.h" #include "llvm/Support/MD5.h" #include "llvm/Support/Path.h" #include "llvm/Support/Timer.h" -#include "llvm/Support/ValueHandle.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" @@ -49,6 +50,8 @@ #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; +#define DEBUG_TYPE "dwarfdebug" + static cl::opt<bool> DisableDebugInfoPrinting("disable-debug-info-print", cl::Hidden, cl::desc("Disable debug info printing")); @@ -59,26 +62,17 @@ static cl::opt<bool> UnknownLocations( cl::init(false)); static cl::opt<bool> -GenerateODRHash("generate-odr-hash", cl::Hidden, - cl::desc("Add an ODR hash to external type DIEs."), - cl::init(false)); - -static cl::opt<bool> -GenerateCUHash("generate-cu-hash", cl::Hidden, - cl::desc("Add the CU hash as the dwo_id."), - cl::init(false)); - -static cl::opt<bool> GenerateGnuPubSections("generate-gnu-dwarf-pub-sections", cl::Hidden, cl::desc("Generate GNU-style pubnames and pubtypes"), cl::init(false)); +static cl::opt<bool> GenerateARangeSection("generate-arange-section", + cl::Hidden, + cl::desc("Generate dwarf aranges"), + cl::init(false)); + namespace { -enum DefaultOnOff { - Default, - Enable, - Disable -}; +enum DefaultOnOff { Default, Enable, Disable }; } static cl::opt<DefaultOnOff> @@ -91,7 +85,7 @@ DwarfAccelTables("dwarf-accel-tables", cl::Hidden, static cl::opt<DefaultOnOff> SplitDwarf("split-dwarf", cl::Hidden, - cl::desc("Output prototype dwarf split debug info."), + cl::desc("Output DWARF5 split debug info."), cl::values(clEnumVal(Default, "Default for platform"), clEnumVal(Enable, "Enabled"), clEnumVal(Disable, "Disabled"), clEnumValEnd), @@ -105,34 +99,27 @@ DwarfPubSections("generate-dwarf-pub-sections", cl::Hidden, clEnumVal(Disable, "Disabled"), clEnumValEnd), cl::init(Default)); -static cl::opt<unsigned> -DwarfVersionNumber("dwarf-version", cl::Hidden, - cl::desc("Generate DWARF for dwarf version."), - cl::init(0)); - static const char *const DWARFGroupName = "DWARF Emission"; static const char *const DbgTimerName = "DWARF Debug Writer"; //===----------------------------------------------------------------------===// -// Configuration values for initial hash set sizes (log2). -// -static const unsigned InitAbbreviationsSetSize = 9; // log2(512) - -namespace llvm { - /// resolve - Look in the DwarfDebug map for the MDNode that /// corresponds to the reference. -template <typename T> -T DbgVariable::resolve(DIRef<T> Ref) const { +template <typename T> T DbgVariable::resolve(DIRef<T> Ref) const { return DD->resolve(Ref); } +bool DbgVariable::isBlockByrefVariable() const { + assert(Var.isVariable() && "Invalid complex DbgVariable!"); + return Var.isBlockByrefVariable(DD->getTypeIdentifierMap()); +} + DIType DbgVariable::getType() const { - DIType Ty = Var.getType(); + DIType Ty = Var.getType().resolve(DD->getTypeIdentifierMap()); // FIXME: isBlockByrefVariable should be reformulated in terms of complex // addresses instead. - if (Var.isBlockByrefVariable()) { + if (Var.isBlockByrefVariable(DD->getTypeIdentifierMap())) { /* Byref variables, in Blocks, are declared by the programmer as "SomeType VarName;", but the compiler creates a __Block_byref_x_VarName struct, and gives the variable VarName @@ -173,33 +160,32 @@ DIType DbgVariable::getType() const { return Ty; } -} // end llvm namespace - -/// Return Dwarf Version by checking module flags. -static unsigned getDwarfVersionFromModule(const Module *M) { - Value *Val = M->getModuleFlag("Dwarf Version"); - if (!Val) - return dwarf::DWARF_VERSION; - return cast<ConstantInt>(Val)->getZExtValue(); -} +static LLVM_CONSTEXPR DwarfAccelTable::Atom TypeAtoms[] = { + DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4), + DwarfAccelTable::Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2), + DwarfAccelTable::Atom(dwarf::DW_ATOM_type_flags, dwarf::DW_FORM_data1)}; DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) - : Asm(A), MMI(Asm->MMI), FirstCU(0), - AbbreviationsSet(InitAbbreviationsSetSize), - SourceIdMap(DIEValueAllocator), - PrevLabel(NULL), GlobalCUIndexCount(0), - InfoHolder(A, &AbbreviationsSet, Abbreviations, "info_string", - DIEValueAllocator), - SkeletonAbbrevSet(InitAbbreviationsSetSize), - SkeletonHolder(A, &SkeletonAbbrevSet, SkeletonAbbrevs, "skel_string", - DIEValueAllocator) { - - DwarfInfoSectionSym = DwarfAbbrevSectionSym = 0; - DwarfStrSectionSym = TextSectionSym = 0; - DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = DwarfLineSectionSym = 0; - DwarfAddrSectionSym = 0; - DwarfAbbrevDWOSectionSym = DwarfStrDWOSectionSym = 0; - FunctionBeginSym = FunctionEndSym = 0; + : Asm(A), MMI(Asm->MMI), FirstCU(nullptr), PrevLabel(nullptr), + GlobalRangeCount(0), InfoHolder(A, "info_string", DIEValueAllocator), + UsedNonDefaultText(false), + SkeletonHolder(A, "skel_string", DIEValueAllocator), + AccelNames(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, + dwarf::DW_FORM_data4)), + AccelObjC(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, + dwarf::DW_FORM_data4)), + AccelNamespace(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, + dwarf::DW_FORM_data4)), + AccelTypes(TypeAtoms) { + + DwarfInfoSectionSym = DwarfAbbrevSectionSym = DwarfStrSectionSym = nullptr; + DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = nullptr; + DwarfLineSectionSym = nullptr; + DwarfAddrSectionSym = nullptr; + DwarfAbbrevDWOSectionSym = DwarfStrDWOSectionSym = nullptr; + FunctionBeginSym = FunctionEndSym = nullptr; + CurFn = nullptr; + CurMI = nullptr; // Turn on accelerator tables for Darwin by default, pubnames by // default for non-Darwin, and handle split dwarf. @@ -220,9 +206,11 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) else HasDwarfPubSections = DwarfPubSections == Enable; - DwarfVersion = DwarfVersionNumber - ? DwarfVersionNumber - : getDwarfVersionFromModule(MMI->getModule()); + unsigned DwarfVersionNumber = Asm->TM.Options.MCOptions.DwarfVersion; + DwarfVersion = DwarfVersionNumber ? DwarfVersionNumber + : MMI->getModule()->getDwarfVersion(); + + Asm->OutStreamer.getContext().setDwarfVersion(DwarfVersion); { NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); @@ -230,78 +218,29 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) } } +// Define out of line so we don't have to include DwarfUnit.h in DwarfDebug.h. +DwarfDebug::~DwarfDebug() { } + // Switch to the specified MCSection and emit an assembler // temporary label to it if SymbolStem is specified. static MCSymbol *emitSectionSym(AsmPrinter *Asm, const MCSection *Section, - const char *SymbolStem = 0) { + const char *SymbolStem = nullptr) { Asm->OutStreamer.SwitchSection(Section); - if (!SymbolStem) return 0; + if (!SymbolStem) + return nullptr; MCSymbol *TmpSym = Asm->GetTempSymbol(SymbolStem); Asm->OutStreamer.EmitLabel(TmpSym); return TmpSym; } -MCSymbol *DwarfUnits::getStringPoolSym() { - return Asm->GetTempSymbol(StringPref); -} - -MCSymbol *DwarfUnits::getStringPoolEntry(StringRef Str) { - std::pair<MCSymbol*, unsigned> &Entry = - StringPool.GetOrCreateValue(Str).getValue(); - if (Entry.first) return Entry.first; - - Entry.second = NextStringPoolNumber++; - return Entry.first = Asm->GetTempSymbol(StringPref, Entry.second); -} - -unsigned DwarfUnits::getStringPoolIndex(StringRef Str) { - std::pair<MCSymbol*, unsigned> &Entry = - StringPool.GetOrCreateValue(Str).getValue(); - if (Entry.first) return Entry.second; - - Entry.second = NextStringPoolNumber++; - Entry.first = Asm->GetTempSymbol(StringPref, Entry.second); - return Entry.second; -} - -unsigned DwarfUnits::getAddrPoolIndex(const MCSymbol *Sym) { - return getAddrPoolIndex(MCSymbolRefExpr::Create(Sym, Asm->OutContext)); -} - -unsigned DwarfUnits::getAddrPoolIndex(const MCExpr *Sym) { - std::pair<DenseMap<const MCExpr *, unsigned>::iterator, bool> P = - AddressPool.insert(std::make_pair(Sym, NextAddrPoolNumber)); - if (P.second) - ++NextAddrPoolNumber; - return P.first->second; -} - -// Define a unique number for the abbreviation. -// -void DwarfUnits::assignAbbrevNumber(DIEAbbrev &Abbrev) { - // Check the set for priors. - DIEAbbrev *InSet = AbbreviationsSet->GetOrInsertNode(&Abbrev); - - // If it's newly added. - if (InSet == &Abbrev) { - // Add to abbreviation list. - Abbreviations.push_back(&Abbrev); - - // Assign the vector position + 1 as its number. - Abbrev.setNumber(Abbreviations.size()); - } else { - // Assign existing abbreviation number. - Abbrev.setNumber(InSet->getNumber()); - } -} - static bool isObjCClass(StringRef Name) { return Name.startswith("+") || Name.startswith("-"); } static bool hasObjCCategory(StringRef Name) { - if (!isObjCClass(Name)) return false; + if (!isObjCClass(Name)) + return false; return Name.find(") ") != StringRef::npos; } @@ -325,35 +264,35 @@ static StringRef getObjCMethodName(StringRef In) { // Helper for sorting sections into a stable output order. static bool SectionSort(const MCSection *A, const MCSection *B) { - std::string LA = (A ? A->getLabelBeginName() : ""); - std::string LB = (B ? B->getLabelBeginName() : ""); - return LA < LB; + std::string LA = (A ? A->getLabelBeginName() : ""); + std::string LB = (B ? B->getLabelBeginName() : ""); + return LA < LB; } // Add the various names to the Dwarf accelerator table names. // TODO: Determine whether or not we should add names for programs // that do not have a DW_AT_name or DW_AT_linkage_name field - this // is only slightly different than the lookup of non-standard ObjC names. -static void addSubprogramNames(CompileUnit *TheCU, DISubprogram SP, - DIE* Die) { - if (!SP.isDefinition()) return; - TheCU->addAccelName(SP.getName(), Die); +void DwarfDebug::addSubprogramNames(DISubprogram SP, DIE &Die) { + if (!SP.isDefinition()) + return; + addAccelName(SP.getName(), Die); // If the linkage name is different than the name, go ahead and output // that as well into the name table. if (SP.getLinkageName() != "" && SP.getName() != SP.getLinkageName()) - TheCU->addAccelName(SP.getLinkageName(), Die); + addAccelName(SP.getLinkageName(), Die); // If this is an Objective-C selector name add it to the ObjC accelerator // too. if (isObjCClass(SP.getName())) { StringRef Class, Category; getObjCClassCategory(SP.getName(), Class, Category); - TheCU->addAccelObjC(Class, Die); + addAccelObjC(Class, Die); if (Category != "") - TheCU->addAccelObjC(Category, Die); + addAccelObjC(Category, Die); // Also add the base method name to the name table. - TheCU->addAccelName(getObjCMethodName(SP.getName()), Die); + addAccelName(getObjCMethodName(SP.getName()), Die); } } @@ -373,76 +312,21 @@ bool DwarfDebug::isSubprogramContext(const MDNode *Context) { // Find DIE for the given subprogram and attach appropriate DW_AT_low_pc // and DW_AT_high_pc attributes. If there are global variables in this // scope then create and insert DIEs for these variables. -DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU, DISubprogram SP) { - DIE *SPDie = SPCU->getDIE(SP); - - assert(SPDie && "Unable to find subprogram DIE!"); - - // If we're updating an abstract DIE, then we will be adding the children and - // object pointer later on. But what we don't want to do is process the - // concrete DIE twice. - if (DIE *AbsSPDIE = AbstractSPDies.lookup(SP)) { - // Pick up abstract subprogram DIE. - SPDie = SPCU->createAndAddDIE(dwarf::DW_TAG_subprogram, *SPCU->getCUDie()); - SPCU->addDIEEntry(SPDie, dwarf::DW_AT_abstract_origin, AbsSPDIE); - } else { - DISubprogram SPDecl = SP.getFunctionDeclaration(); - if (!SPDecl.isSubprogram()) { - // There is not any need to generate specification DIE for a function - // defined at compile unit level. If a function is defined inside another - // function then gdb prefers the definition at top level and but does not - // expect specification DIE in parent function. So avoid creating - // specification DIE for a function defined inside a function. - DIScope SPContext = resolve(SP.getContext()); - if (SP.isDefinition() && !SPContext.isCompileUnit() && - !SPContext.isFile() && - !isSubprogramContext(SPContext)) { - SPCU->addFlag(SPDie, dwarf::DW_AT_declaration); - - // Add arguments. - DICompositeType SPTy = SP.getType(); - DIArray Args = SPTy.getTypeArray(); - uint16_t SPTag = SPTy.getTag(); - if (SPTag == dwarf::DW_TAG_subroutine_type) - // FIXME: Use DwarfUnit::constructSubprogramArguments() here. - for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) { - DIType ATy(Args.getElement(i)); - if (ATy.isUnspecifiedParameter()) { - assert(i == N-1 && "ellipsis must be the last argument"); - SPCU->createAndAddDIE(dwarf::DW_TAG_unspecified_parameters, *SPDie); - } else { - DIE *Arg = - SPCU->createAndAddDIE(dwarf::DW_TAG_formal_parameter, *SPDie); - SPCU->addType(Arg, ATy); - if (ATy.isArtificial()) - SPCU->addFlag(Arg, dwarf::DW_AT_artificial); - if (ATy.isObjectPointer()) - SPCU->addDIEEntry(SPDie, dwarf::DW_AT_object_pointer, Arg); - } - } - DIE *SPDeclDie = SPDie; - SPDie = - SPCU->createAndAddDIE(dwarf::DW_TAG_subprogram, *SPCU->getCUDie()); - SPCU->addDIEEntry(SPDie, dwarf::DW_AT_specification, SPDeclDie); - } - } - } +DIE &DwarfDebug::updateSubprogramScopeDIE(DwarfCompileUnit &SPCU, + DISubprogram SP) { + DIE *SPDie = SPCU.getOrCreateSubprogramDIE(SP); + + attachLowHighPC(SPCU, *SPDie, FunctionBeginSym, FunctionEndSym); - SPCU->addLabelAddress(SPDie, dwarf::DW_AT_low_pc, - Asm->GetTempSymbol("func_begin", - Asm->getFunctionNumber())); - SPCU->addLabelAddress(SPDie, dwarf::DW_AT_high_pc, - Asm->GetTempSymbol("func_end", - Asm->getFunctionNumber())); const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); MachineLocation Location(RI->getFrameRegister(*Asm->MF)); - SPCU->addAddress(SPDie, dwarf::DW_AT_frame_base, Location); + SPCU.addAddress(*SPDie, dwarf::DW_AT_frame_base, Location); // Add name to the name table, we do this here because we're guaranteed // to have concrete versions of our DW_TAG_subprogram nodes. - addSubprogramNames(SPCU, SP, SPDie); + addSubprogramNames(SP, *SPDie); - return SPDie; + return *SPDie; } /// Check whether we should create a DIE for the given Scope, return true @@ -466,450 +350,376 @@ bool DwarfDebug::isLexicalScopeDIENull(LexicalScope *Scope) { return !End; } -// Construct new DW_TAG_lexical_block for this scope and attach -// DW_AT_low_pc/DW_AT_high_pc labels. -DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU, - LexicalScope *Scope) { - if (isLexicalScopeDIENull(Scope)) - return 0; +static void addSectionLabel(AsmPrinter &Asm, DwarfUnit &U, DIE &D, + dwarf::Attribute A, const MCSymbol *L, + const MCSymbol *Sec) { + if (Asm.MAI->doesDwarfUseRelocationsAcrossSections()) + U.addSectionLabel(D, A, L); + else + U.addSectionDelta(D, A, L, Sec); +} - DIE *ScopeDIE = new DIE(dwarf::DW_TAG_lexical_block); - if (Scope->isAbstractScope()) - return ScopeDIE; +void DwarfDebug::addScopeRangeList(DwarfCompileUnit &TheCU, DIE &ScopeDIE, + const SmallVectorImpl<InsnRange> &Range) { + // Emit offset in .debug_range as a relocatable label. emitDIE will handle + // emitting it appropriately. + MCSymbol *RangeSym = Asm->GetTempSymbol("debug_ranges", GlobalRangeCount++); - const SmallVectorImpl<InsnRange> &Ranges = Scope->getRanges(); - // If we have multiple ranges, emit them into the range section. - if (Ranges.size() > 1) { - // .debug_range section has not been laid out yet. Emit offset in - // .debug_range as a uint, size 4, for now. emitDIE will handle - // DW_AT_ranges appropriately. - TheCU->addSectionOffset(ScopeDIE, dwarf::DW_AT_ranges, - DebugRangeSymbols.size() * - Asm->getDataLayout().getPointerSize()); - for (SmallVectorImpl<InsnRange>::const_iterator RI = Ranges.begin(), - RE = Ranges.end(); RI != RE; ++RI) { - DebugRangeSymbols.push_back(getLabelBeforeInsn(RI->first)); - DebugRangeSymbols.push_back(getLabelAfterInsn(RI->second)); - } + // Under fission, ranges are specified by constant offsets relative to the + // CU's DW_AT_GNU_ranges_base. + if (useSplitDwarf()) + TheCU.addSectionDelta(ScopeDIE, dwarf::DW_AT_ranges, RangeSym, + DwarfDebugRangeSectionSym); + else + addSectionLabel(*Asm, TheCU, ScopeDIE, dwarf::DW_AT_ranges, RangeSym, + DwarfDebugRangeSectionSym); - // Terminate the range list. - DebugRangeSymbols.push_back(NULL); - DebugRangeSymbols.push_back(NULL); - return ScopeDIE; + RangeSpanList List(RangeSym); + for (const InsnRange &R : Range) { + RangeSpan Span(getLabelBeforeInsn(R.first), getLabelAfterInsn(R.second)); + List.addRange(std::move(Span)); } - // Construct the address range for this DIE. - SmallVectorImpl<InsnRange>::const_iterator RI = Ranges.begin(); - MCSymbol *Start = getLabelBeforeInsn(RI->first); - MCSymbol *End = getLabelAfterInsn(RI->second); - assert(End && "End label should not be null!"); + // Add the range list to the set of ranges to be emitted. + TheCU.addRangeList(std::move(List)); +} + +void DwarfDebug::attachRangesOrLowHighPC(DwarfCompileUnit &TheCU, DIE &Die, + const SmallVectorImpl<InsnRange> &Ranges) { + assert(!Ranges.empty()); + if (Ranges.size() == 1) + attachLowHighPC(TheCU, Die, getLabelBeforeInsn(Ranges.front().first), + getLabelAfterInsn(Ranges.front().second)); + else + addScopeRangeList(TheCU, Die, Ranges); +} + +// Construct new DW_TAG_lexical_block for this scope and attach +// DW_AT_low_pc/DW_AT_high_pc labels. +std::unique_ptr<DIE> +DwarfDebug::constructLexicalScopeDIE(DwarfCompileUnit &TheCU, + LexicalScope *Scope) { + if (isLexicalScopeDIENull(Scope)) + return nullptr; - assert(Start->isDefined() && "Invalid starting label for an inlined scope!"); - assert(End->isDefined() && "Invalid end label for an inlined scope!"); + auto ScopeDIE = make_unique<DIE>(dwarf::DW_TAG_lexical_block); + if (Scope->isAbstractScope()) + return ScopeDIE; - TheCU->addLabelAddress(ScopeDIE, dwarf::DW_AT_low_pc, Start); - TheCU->addLabelAddress(ScopeDIE, dwarf::DW_AT_high_pc, End); + attachRangesOrLowHighPC(TheCU, *ScopeDIE, Scope->getRanges()); return ScopeDIE; } // This scope represents inlined body of a function. Construct DIE to // represent this concrete inlined copy of the function. -DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU, - LexicalScope *Scope) { - const SmallVectorImpl<InsnRange> &Ranges = Scope->getRanges(); - assert(Ranges.empty() == false && - "LexicalScope does not have instruction markers!"); - - if (!Scope->getScopeNode()) - return NULL; +std::unique_ptr<DIE> +DwarfDebug::constructInlinedScopeDIE(DwarfCompileUnit &TheCU, + LexicalScope *Scope) { + assert(Scope->getScopeNode()); DIScope DS(Scope->getScopeNode()); DISubprogram InlinedSP = getDISubprogram(DS); - DIE *OriginDIE = TheCU->getDIE(InlinedSP); - if (!OriginDIE) { - DEBUG(dbgs() << "Unable to find original DIE for an inlined subprogram."); - return NULL; - } - - DIE *ScopeDIE = new DIE(dwarf::DW_TAG_inlined_subroutine); - TheCU->addDIEEntry(ScopeDIE, dwarf::DW_AT_abstract_origin, OriginDIE); - - if (Ranges.size() > 1) { - // .debug_range section has not been laid out yet. Emit offset in - // .debug_range as a uint, size 4, for now. emitDIE will handle - // DW_AT_ranges appropriately. - TheCU->addSectionOffset(ScopeDIE, dwarf::DW_AT_ranges, - DebugRangeSymbols.size() * - Asm->getDataLayout().getPointerSize()); - for (SmallVectorImpl<InsnRange>::const_iterator RI = Ranges.begin(), - RE = Ranges.end(); RI != RE; ++RI) { - DebugRangeSymbols.push_back(getLabelBeforeInsn(RI->first)); - DebugRangeSymbols.push_back(getLabelAfterInsn(RI->second)); - } - DebugRangeSymbols.push_back(NULL); - DebugRangeSymbols.push_back(NULL); - } else { - SmallVectorImpl<InsnRange>::const_iterator RI = Ranges.begin(); - MCSymbol *StartLabel = getLabelBeforeInsn(RI->first); - MCSymbol *EndLabel = getLabelAfterInsn(RI->second); + // Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram + // was inlined from another compile unit. + DIE *OriginDIE = AbstractSPDies[InlinedSP]; + assert(OriginDIE && "Unable to find original DIE for an inlined subprogram."); - if (StartLabel == 0 || EndLabel == 0) - llvm_unreachable("Unexpected Start and End labels for an inlined scope!"); + auto ScopeDIE = make_unique<DIE>(dwarf::DW_TAG_inlined_subroutine); + TheCU.addDIEEntry(*ScopeDIE, dwarf::DW_AT_abstract_origin, *OriginDIE); - assert(StartLabel->isDefined() && - "Invalid starting label for an inlined scope!"); - assert(EndLabel->isDefined() && "Invalid end label for an inlined scope!"); - - TheCU->addLabelAddress(ScopeDIE, dwarf::DW_AT_low_pc, StartLabel); - TheCU->addLabelAddress(ScopeDIE, dwarf::DW_AT_high_pc, EndLabel); - } + attachRangesOrLowHighPC(TheCU, *ScopeDIE, Scope->getRanges()); InlinedSubprogramDIEs.insert(OriginDIE); // Add the call site information to the DIE. DILocation DL(Scope->getInlinedAt()); - TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_file, None, - getOrCreateSourceID(DL.getFilename(), DL.getDirectory(), - TheCU->getUniqueID())); - TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_line, None, DL.getLineNumber()); + TheCU.addUInt(*ScopeDIE, dwarf::DW_AT_call_file, None, + TheCU.getOrCreateSourceID(DL.getFilename(), DL.getDirectory())); + TheCU.addUInt(*ScopeDIE, dwarf::DW_AT_call_line, None, DL.getLineNumber()); // Add name to the name table, we do this here because we're guaranteed // to have concrete versions of our DW_TAG_inlined_subprogram nodes. - addSubprogramNames(TheCU, InlinedSP, ScopeDIE); + addSubprogramNames(InlinedSP, *ScopeDIE); return ScopeDIE; } -DIE *DwarfDebug::createScopeChildrenDIE(CompileUnit *TheCU, LexicalScope *Scope, - SmallVectorImpl<DIE*> &Children) { - DIE *ObjectPointer = NULL; +static std::unique_ptr<DIE> constructVariableDIE(DwarfCompileUnit &TheCU, + DbgVariable &DV, + const LexicalScope &Scope, + DIE *&ObjectPointer) { + auto Var = TheCU.constructVariableDIE(DV, Scope.isAbstractScope()); + if (DV.isObjectPointer()) + ObjectPointer = Var.get(); + return Var; +} + +DIE *DwarfDebug::createScopeChildrenDIE( + DwarfCompileUnit &TheCU, LexicalScope *Scope, + SmallVectorImpl<std::unique_ptr<DIE>> &Children) { + DIE *ObjectPointer = nullptr; // Collect arguments for current function. if (LScopes.isCurrentFunctionScope(Scope)) { - for (unsigned i = 0, N = CurrentFnArguments.size(); i < N; ++i) - if (DbgVariable *ArgDV = CurrentFnArguments[i]) - if (DIE *Arg = - TheCU->constructVariableDIE(*ArgDV, Scope->isAbstractScope())) { - Children.push_back(Arg); - if (ArgDV->isObjectPointer()) ObjectPointer = Arg; - } + for (DbgVariable *ArgDV : CurrentFnArguments) + if (ArgDV) + Children.push_back( + constructVariableDIE(TheCU, *ArgDV, *Scope, ObjectPointer)); - // Create the unspecified parameter that marks a function as variadic. + // If this is a variadic function, add an unspecified parameter. DISubprogram SP(Scope->getScopeNode()); - assert(SP.Verify()); DIArray FnArgs = SP.getType().getTypeArray(); - if (FnArgs.getElement(FnArgs.getNumElements()-1).isUnspecifiedParameter()) { - DIE *Ellipsis = new DIE(dwarf::DW_TAG_unspecified_parameters); - Children.push_back(Ellipsis); + if (FnArgs.getElement(FnArgs.getNumElements() - 1) + .isUnspecifiedParameter()) { + Children.push_back( + make_unique<DIE>(dwarf::DW_TAG_unspecified_parameters)); } } // Collect lexical scope children first. - const SmallVectorImpl<DbgVariable *> &Variables =ScopeVariables.lookup(Scope); - for (unsigned i = 0, N = Variables.size(); i < N; ++i) - if (DIE *Variable = - TheCU->constructVariableDIE(*Variables[i], Scope->isAbstractScope())) { - Children.push_back(Variable); - if (Variables[i]->isObjectPointer()) ObjectPointer = Variable; - } - const SmallVectorImpl<LexicalScope *> &Scopes = Scope->getChildren(); - for (unsigned j = 0, M = Scopes.size(); j < M; ++j) - if (DIE *Nested = constructScopeDIE(TheCU, Scopes[j])) - Children.push_back(Nested); + for (DbgVariable *DV : ScopeVariables.lookup(Scope)) + Children.push_back(constructVariableDIE(TheCU, *DV, *Scope, ObjectPointer)); + + for (LexicalScope *LS : Scope->getChildren()) + if (std::unique_ptr<DIE> Nested = constructScopeDIE(TheCU, LS)) + Children.push_back(std::move(Nested)); return ObjectPointer; } +void DwarfDebug::createAndAddScopeChildren(DwarfCompileUnit &TheCU, + LexicalScope *Scope, DIE &ScopeDIE) { + // We create children when the scope DIE is not null. + SmallVector<std::unique_ptr<DIE>, 8> Children; + if (DIE *ObjectPointer = createScopeChildrenDIE(TheCU, Scope, Children)) + TheCU.addDIEEntry(ScopeDIE, dwarf::DW_AT_object_pointer, *ObjectPointer); + + // Add children + for (auto &I : Children) + ScopeDIE.addChild(std::move(I)); +} + +void DwarfDebug::constructAbstractSubprogramScopeDIE(DwarfCompileUnit &TheCU, + LexicalScope *Scope) { + assert(Scope && Scope->getScopeNode()); + assert(Scope->isAbstractScope()); + assert(!Scope->getInlinedAt()); + + DISubprogram SP(Scope->getScopeNode()); + + ProcessedSPNodes.insert(SP); + + DIE *&AbsDef = AbstractSPDies[SP]; + if (AbsDef) + return; + + // Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram + // was inlined from another compile unit. + DwarfCompileUnit &SPCU = *SPMap[SP]; + DIE *ContextDIE; + + // Some of this is duplicated from DwarfUnit::getOrCreateSubprogramDIE, with + // the important distinction that the DIDescriptor is not associated with the + // DIE (since the DIDescriptor will be associated with the concrete DIE, if + // any). It could be refactored to some common utility function. + if (DISubprogram SPDecl = SP.getFunctionDeclaration()) { + ContextDIE = &SPCU.getUnitDie(); + SPCU.getOrCreateSubprogramDIE(SPDecl); + } else + ContextDIE = SPCU.getOrCreateContextDIE(resolve(SP.getContext())); + + // Passing null as the associated DIDescriptor because the abstract definition + // shouldn't be found by lookup. + AbsDef = &SPCU.createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE, + DIDescriptor()); + SPCU.applySubprogramAttributesToDefinition(SP, *AbsDef); + + SPCU.addUInt(*AbsDef, dwarf::DW_AT_inline, None, dwarf::DW_INL_inlined); + createAndAddScopeChildren(SPCU, Scope, *AbsDef); +} + +DIE &DwarfDebug::constructSubprogramScopeDIE(DwarfCompileUnit &TheCU, + LexicalScope *Scope) { + assert(Scope && Scope->getScopeNode()); + assert(!Scope->getInlinedAt()); + assert(!Scope->isAbstractScope()); + DISubprogram Sub(Scope->getScopeNode()); + + assert(Sub.isSubprogram()); + + ProcessedSPNodes.insert(Sub); + + DIE &ScopeDIE = updateSubprogramScopeDIE(TheCU, Sub); + + createAndAddScopeChildren(TheCU, Scope, ScopeDIE); + + return ScopeDIE; +} + // Construct a DIE for this scope. -DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { +std::unique_ptr<DIE> DwarfDebug::constructScopeDIE(DwarfCompileUnit &TheCU, + LexicalScope *Scope) { if (!Scope || !Scope->getScopeNode()) - return NULL; + return nullptr; DIScope DS(Scope->getScopeNode()); - SmallVector<DIE *, 8> Children; - DIE *ObjectPointer = NULL; - bool ChildrenCreated = false; + assert((Scope->getInlinedAt() || !DS.isSubprogram()) && + "Only handle inlined subprograms here, use " + "constructSubprogramScopeDIE for non-inlined " + "subprograms"); + + SmallVector<std::unique_ptr<DIE>, 8> Children; // We try to create the scope DIE first, then the children DIEs. This will // avoid creating un-used children then removing them later when we find out // the scope DIE is null. - DIE *ScopeDIE = NULL; - if (Scope->getInlinedAt()) + std::unique_ptr<DIE> ScopeDIE; + if (Scope->getParent() && DS.isSubprogram()) { ScopeDIE = constructInlinedScopeDIE(TheCU, Scope); - else if (DS.isSubprogram()) { - ProcessedSPNodes.insert(DS); - if (Scope->isAbstractScope()) { - ScopeDIE = TheCU->getDIE(DS); - // Note down abstract DIE. - if (ScopeDIE) - AbstractSPDies.insert(std::make_pair(DS, ScopeDIE)); - } else - ScopeDIE = updateSubprogramScopeDIE(TheCU, DISubprogram(DS)); + if (!ScopeDIE) + return nullptr; + // We create children when the scope DIE is not null. + createScopeChildrenDIE(TheCU, Scope, Children); } else { // Early exit when we know the scope DIE is going to be null. if (isLexicalScopeDIENull(Scope)) - return NULL; + return nullptr; // We create children here when we know the scope DIE is not going to be // null and the children will be added to the scope DIE. - ObjectPointer = createScopeChildrenDIE(TheCU, Scope, Children); - ChildrenCreated = true; + createScopeChildrenDIE(TheCU, Scope, Children); // There is no need to emit empty lexical block DIE. std::pair<ImportedEntityMap::const_iterator, - ImportedEntityMap::const_iterator> Range = std::equal_range( - ScopesWithImportedEntities.begin(), ScopesWithImportedEntities.end(), - std::pair<const MDNode *, const MDNode *>(DS, (const MDNode*)0), - less_first()); + ImportedEntityMap::const_iterator> Range = + std::equal_range(ScopesWithImportedEntities.begin(), + ScopesWithImportedEntities.end(), + std::pair<const MDNode *, const MDNode *>(DS, nullptr), + less_first()); if (Children.empty() && Range.first == Range.second) - return NULL; + return nullptr; ScopeDIE = constructLexicalScopeDIE(TheCU, Scope); assert(ScopeDIE && "Scope DIE should not be null."); for (ImportedEntityMap::const_iterator i = Range.first; i != Range.second; ++i) - constructImportedEntityDIE(TheCU, i->second, ScopeDIE); - } - - if (!ScopeDIE) { - assert(Children.empty() && - "We create children only when the scope DIE is not null."); - return NULL; + constructImportedEntityDIE(TheCU, i->second, *ScopeDIE); } - if (!ChildrenCreated) - // We create children when the scope DIE is not null. - ObjectPointer = createScopeChildrenDIE(TheCU, Scope, Children); // Add children - for (SmallVectorImpl<DIE *>::iterator I = Children.begin(), - E = Children.end(); I != E; ++I) - ScopeDIE->addChild(*I); - - if (DS.isSubprogram() && ObjectPointer != NULL) - TheCU->addDIEEntry(ScopeDIE, dwarf::DW_AT_object_pointer, ObjectPointer); - - if (DS.isSubprogram()) - TheCU->addPubTypes(DISubprogram(DS)); + for (auto &I : Children) + ScopeDIE->addChild(std::move(I)); return ScopeDIE; } -// Look up the source id with the given directory and source file names. -// If none currently exists, create a new id and insert it in the -// SourceIds map. This can update DirectoryNames and SourceFileNames maps -// as well. -unsigned DwarfDebug::getOrCreateSourceID(StringRef FileName, - StringRef DirName, unsigned CUID) { - // If we use .loc in assembly, we can't separate .file entries according to - // compile units. Thus all files will belong to the default compile unit. - - // FIXME: add a better feature test than hasRawTextSupport. Even better, - // extend .file to support this. - if (Asm->TM.hasMCUseLoc() && Asm->OutStreamer.hasRawTextSupport()) - CUID = 0; - - // If FE did not provide a file name, then assume stdin. - if (FileName.empty()) - return getOrCreateSourceID("<stdin>", StringRef(), CUID); - - // TODO: this might not belong here. See if we can factor this better. - if (DirName == CompilationDir) - DirName = ""; - - // FileIDCUMap stores the current ID for the given compile unit. - unsigned SrcId = FileIDCUMap[CUID] + 1; - - // We look up the CUID/file/dir by concatenating them with a zero byte. - SmallString<128> NamePair; - NamePair += utostr(CUID); - NamePair += '\0'; - NamePair += DirName; - NamePair += '\0'; // Zero bytes are not allowed in paths. - NamePair += FileName; - - StringMapEntry<unsigned> &Ent = SourceIdMap.GetOrCreateValue(NamePair, SrcId); - if (Ent.getValue() != SrcId) - return Ent.getValue(); - - FileIDCUMap[CUID] = SrcId; - // Print out a .file directive to specify files for .loc directives. - Asm->OutStreamer.EmitDwarfFileDirective(SrcId, DirName, FileName, CUID); +void DwarfDebug::addGnuPubAttributes(DwarfUnit &U, DIE &D) const { + if (!GenerateGnuPubSections) + return; - return SrcId; + U.addFlag(D, dwarf::DW_AT_GNU_pubnames); } -// Create new CompileUnit for the given metadata node with tag +// Create new DwarfCompileUnit for the given metadata node with tag // DW_TAG_compile_unit. -CompileUnit *DwarfDebug::constructCompileUnit(DICompileUnit DIUnit) { +DwarfCompileUnit &DwarfDebug::constructDwarfCompileUnit(DICompileUnit DIUnit) { StringRef FN = DIUnit.getFilename(); CompilationDir = DIUnit.getDirectory(); - DIE *Die = new DIE(dwarf::DW_TAG_compile_unit); - CompileUnit *NewCU = new CompileUnit(GlobalCUIndexCount++, Die, DIUnit, Asm, - this, &InfoHolder); - - FileIDCUMap[NewCU->getUniqueID()] = 0; - // Call this to emit a .file directive if it wasn't emitted for the source - // file this CU comes from yet. - getOrCreateSourceID(FN, CompilationDir, NewCU->getUniqueID()); - - NewCU->addString(Die, dwarf::DW_AT_producer, DIUnit.getProducer()); - NewCU->addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2, - DIUnit.getLanguage()); - NewCU->addString(Die, dwarf::DW_AT_name, FN); - - // 2.17.1 requires that we use DW_AT_low_pc for a single entry point - // into an entity. We're using 0 (or a NULL label) for this. For - // split dwarf it's in the skeleton CU so omit it here. - if (!useSplitDwarf()) - NewCU->addLabelAddress(Die, dwarf::DW_AT_low_pc, NULL); - - // Define start line table label for each Compile Unit. - MCSymbol *LineTableStartSym = Asm->GetTempSymbol("line_table_start", - NewCU->getUniqueID()); - Asm->OutStreamer.getContext().setMCLineTableSymbol(LineTableStartSym, - NewCU->getUniqueID()); - - // Use a single line table if we are using .loc and generating assembly. - bool UseTheFirstCU = - (Asm->TM.hasMCUseLoc() && Asm->OutStreamer.hasRawTextSupport()) || - (NewCU->getUniqueID() == 0); + auto OwnedUnit = make_unique<DwarfCompileUnit>( + InfoHolder.getUnits().size(), DIUnit, Asm, this, &InfoHolder); + DwarfCompileUnit &NewCU = *OwnedUnit; + DIE &Die = NewCU.getUnitDie(); + InfoHolder.addUnit(std::move(OwnedUnit)); + + // LTO with assembly output shares a single line table amongst multiple CUs. + // To avoid the compilation directory being ambiguous, let the line table + // explicitly describe the directory of all files, never relying on the + // compilation directory. + if (!Asm->OutStreamer.hasRawTextSupport() || SingleCU) + Asm->OutStreamer.getContext().setMCLineTableCompilationDir( + NewCU.getUniqueID(), CompilationDir); + + NewCU.addString(Die, dwarf::DW_AT_producer, DIUnit.getProducer()); + NewCU.addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2, + DIUnit.getLanguage()); + NewCU.addString(Die, dwarf::DW_AT_name, FN); if (!useSplitDwarf()) { - // DW_AT_stmt_list is a offset of line number information for this - // compile unit in debug_line section. For split dwarf this is - // left in the skeleton CU and so not included. - // The line table entries are not always emitted in assembly, so it - // is not okay to use line_table_start here. - if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) - NewCU->addSectionLabel( - Die, dwarf::DW_AT_stmt_list, - UseTheFirstCU ? Asm->GetTempSymbol("section_line") - : LineTableStartSym); - else if (UseTheFirstCU) - NewCU->addSectionOffset(Die, dwarf::DW_AT_stmt_list, 0); - else - NewCU->addSectionDelta(Die, dwarf::DW_AT_stmt_list, - LineTableStartSym, DwarfLineSectionSym); + NewCU.initStmtList(DwarfLineSectionSym); // If we're using split dwarf the compilation dir is going to be in the // skeleton CU and so we don't need to duplicate it here. if (!CompilationDir.empty()) - NewCU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir); - - // Flags to let the linker know we have emitted new style pubnames. Only - // emit it here if we don't have a skeleton CU for split dwarf. - if (GenerateGnuPubSections) { - if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) - NewCU->addSectionLabel( - Die, dwarf::DW_AT_GNU_pubnames, - Asm->GetTempSymbol("gnu_pubnames", NewCU->getUniqueID())); - else - NewCU->addSectionDelta( - Die, dwarf::DW_AT_GNU_pubnames, - Asm->GetTempSymbol("gnu_pubnames", NewCU->getUniqueID()), - DwarfGnuPubNamesSectionSym); - - if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) - NewCU->addSectionLabel( - Die, dwarf::DW_AT_GNU_pubtypes, - Asm->GetTempSymbol("gnu_pubtypes", NewCU->getUniqueID())); - else - NewCU->addSectionDelta( - Die, dwarf::DW_AT_GNU_pubtypes, - Asm->GetTempSymbol("gnu_pubtypes", NewCU->getUniqueID()), - DwarfGnuPubTypesSectionSym); - } + NewCU.addString(Die, dwarf::DW_AT_comp_dir, CompilationDir); + + addGnuPubAttributes(NewCU, Die); } if (DIUnit.isOptimized()) - NewCU->addFlag(Die, dwarf::DW_AT_APPLE_optimized); + NewCU.addFlag(Die, dwarf::DW_AT_APPLE_optimized); StringRef Flags = DIUnit.getFlags(); if (!Flags.empty()) - NewCU->addString(Die, dwarf::DW_AT_APPLE_flags, Flags); + NewCU.addString(Die, dwarf::DW_AT_APPLE_flags, Flags); if (unsigned RVer = DIUnit.getRunTimeVersion()) - NewCU->addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers, - dwarf::DW_FORM_data1, RVer); + NewCU.addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers, + dwarf::DW_FORM_data1, RVer); if (!FirstCU) - FirstCU = NewCU; - - InfoHolder.addUnit(NewCU); + FirstCU = &NewCU; - CUMap.insert(std::make_pair(DIUnit, NewCU)); - CUDieMap.insert(std::make_pair(Die, NewCU)); + if (useSplitDwarf()) { + NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoDWOSection(), + DwarfInfoDWOSectionSym); + NewCU.setSkeleton(constructSkeletonCU(NewCU)); + } else + NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoSection(), + DwarfInfoSectionSym); + + CUMap.insert(std::make_pair(DIUnit, &NewCU)); + CUDieMap.insert(std::make_pair(&Die, &NewCU)); return NewCU; } -// Construct subprogram DIE. -void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU, const MDNode *N) { - // FIXME: We should only call this routine once, however, during LTO if a - // program is defined in multiple CUs we could end up calling it out of - // beginModule as we walk the CUs. - - CompileUnit *&CURef = SPMap[N]; - if (CURef) - return; - CURef = TheCU; - - DISubprogram SP(N); - if (!SP.isDefinition()) - // This is a method declaration which will be handled while constructing - // class type. - return; - - DIE *SubprogramDie = TheCU->getOrCreateSubprogramDIE(SP); - - // Expose as a global name. - TheCU->addGlobalName(SP.getName(), SubprogramDie, resolve(SP.getContext())); -} - -void DwarfDebug::constructImportedEntityDIE(CompileUnit *TheCU, +void DwarfDebug::constructImportedEntityDIE(DwarfCompileUnit &TheCU, const MDNode *N) { DIImportedEntity Module(N); - if (!Module.Verify()) - return; - if (DIE *D = TheCU->getOrCreateContextDIE(Module.getContext())) - constructImportedEntityDIE(TheCU, Module, D); + assert(Module.Verify()); + if (DIE *D = TheCU.getOrCreateContextDIE(Module.getContext())) + constructImportedEntityDIE(TheCU, Module, *D); } -void DwarfDebug::constructImportedEntityDIE(CompileUnit *TheCU, const MDNode *N, - DIE *Context) { +void DwarfDebug::constructImportedEntityDIE(DwarfCompileUnit &TheCU, + const MDNode *N, DIE &Context) { DIImportedEntity Module(N); - if (!Module.Verify()) - return; + assert(Module.Verify()); return constructImportedEntityDIE(TheCU, Module, Context); } -void DwarfDebug::constructImportedEntityDIE(CompileUnit *TheCU, +void DwarfDebug::constructImportedEntityDIE(DwarfCompileUnit &TheCU, const DIImportedEntity &Module, - DIE *Context) { + DIE &Context) { assert(Module.Verify() && "Use one of the MDNode * overloads to handle invalid metadata"); - assert(Context && "Should always have a context for an imported_module"); - DIE *IMDie = new DIE(Module.getTag()); - TheCU->insertDIE(Module, IMDie); + DIE &IMDie = TheCU.createAndAddDIE(Module.getTag(), Context, Module); DIE *EntityDie; - DIDescriptor Entity = Module.getEntity(); + DIDescriptor Entity = resolve(Module.getEntity()); if (Entity.isNameSpace()) - EntityDie = TheCU->getOrCreateNameSpace(DINameSpace(Entity)); + EntityDie = TheCU.getOrCreateNameSpace(DINameSpace(Entity)); else if (Entity.isSubprogram()) - EntityDie = TheCU->getOrCreateSubprogramDIE(DISubprogram(Entity)); + EntityDie = TheCU.getOrCreateSubprogramDIE(DISubprogram(Entity)); else if (Entity.isType()) - EntityDie = TheCU->getOrCreateTypeDIE(DIType(Entity)); + EntityDie = TheCU.getOrCreateTypeDIE(DIType(Entity)); else - EntityDie = TheCU->getDIE(Entity); - unsigned FileID = getOrCreateSourceID(Module.getContext().getFilename(), - Module.getContext().getDirectory(), - TheCU->getUniqueID()); - TheCU->addUInt(IMDie, dwarf::DW_AT_decl_file, None, FileID); - TheCU->addUInt(IMDie, dwarf::DW_AT_decl_line, None, Module.getLineNumber()); - TheCU->addDIEEntry(IMDie, dwarf::DW_AT_import, EntityDie); + EntityDie = TheCU.getDIE(Entity); + TheCU.addSourceLine(IMDie, Module.getLineNumber(), + Module.getContext().getFilename(), + Module.getContext().getDirectory()); + TheCU.addDIEEntry(IMDie, dwarf::DW_AT_import, *EntityDie); StringRef Name = Module.getName(); if (!Name.empty()) - TheCU->addString(IMDie, dwarf::DW_AT_name, Name); - Context->addChild(IMDie); + TheCU.addString(IMDie, dwarf::DW_AT_name, Name); } // Emit all Dwarf sections that should come prior to the content. Create @@ -931,9 +741,11 @@ void DwarfDebug::beginModule() { // Emit initial sections so we can reference labels later. emitSectionLabels(); - for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { - DICompileUnit CUNode(CU_Nodes->getOperand(i)); - CompileUnit *CU = constructCompileUnit(CUNode); + SingleCU = CU_Nodes->getNumOperands() == 1; + + for (MDNode *N : CU_Nodes->operands()) { + DICompileUnit CUNode(N); + DwarfCompileUnit &CU = constructDwarfCompileUnit(CUNode); DIArray ImportedEntities = CUNode.getImportedEntities(); for (unsigned i = 0, e = ImportedEntities.getNumElements(); i != e; ++i) ScopesWithImportedEntities.push_back(std::make_pair( @@ -943,16 +755,21 @@ void DwarfDebug::beginModule() { ScopesWithImportedEntities.end(), less_first()); DIArray GVs = CUNode.getGlobalVariables(); for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i) - CU->createGlobalVariableDIE(DIGlobalVariable(GVs.getElement(i))); + CU.createGlobalVariableDIE(DIGlobalVariable(GVs.getElement(i))); DIArray SPs = CUNode.getSubprograms(); for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) - constructSubprogramDIE(CU, SPs.getElement(i)); + SPMap.insert(std::make_pair(SPs.getElement(i), &CU)); DIArray EnumTypes = CUNode.getEnumTypes(); for (unsigned i = 0, e = EnumTypes.getNumElements(); i != e; ++i) - CU->getOrCreateTypeDIE(EnumTypes.getElement(i)); + CU.getOrCreateTypeDIE(EnumTypes.getElement(i)); DIArray RetainedTypes = CUNode.getRetainedTypes(); - for (unsigned i = 0, e = RetainedTypes.getNumElements(); i != e; ++i) - CU->getOrCreateTypeDIE(RetainedTypes.getElement(i)); + for (unsigned i = 0, e = RetainedTypes.getNumElements(); i != e; ++i) { + DIType Ty(RetainedTypes.getElement(i)); + // The retained types array by design contains pointers to + // MDNodes rather than DIRefs. Unique them here. + DIType UniqueTy(resolve(Ty.getRef())); + CU.getOrCreateTypeDIE(UniqueTy); + } // Emit imported_modules last so that the relevant context is already // available. for (unsigned i = 0, e = ImportedEntities.getNumElements(); i != e; ++i) @@ -966,147 +783,171 @@ void DwarfDebug::beginModule() { SectionMap[Asm->getObjFileLowering().getTextSection()]; } -// Attach DW_AT_inline attribute with inlined subprogram DIEs. -void DwarfDebug::computeInlinedDIEs() { - // Attach DW_AT_inline attribute with inlined subprogram DIEs. - for (SmallPtrSet<DIE *, 4>::iterator AI = InlinedSubprogramDIEs.begin(), - AE = InlinedSubprogramDIEs.end(); AI != AE; ++AI) { - DIE *ISP = *AI; - FirstCU->addUInt(ISP, dwarf::DW_AT_inline, None, dwarf::DW_INL_inlined); - } - for (DenseMap<const MDNode *, DIE *>::iterator AI = AbstractSPDies.begin(), - AE = AbstractSPDies.end(); AI != AE; ++AI) { - DIE *ISP = AI->second; - if (InlinedSubprogramDIEs.count(ISP)) +void DwarfDebug::finishVariableDefinitions() { + for (const auto &Var : ConcreteVariables) { + DIE *VariableDie = Var->getDIE(); + // FIXME: There shouldn't be any variables without DIEs. + if (!VariableDie) continue; - FirstCU->addUInt(ISP, dwarf::DW_AT_inline, None, dwarf::DW_INL_inlined); + // FIXME: Consider the time-space tradeoff of just storing the unit pointer + // in the ConcreteVariables list, rather than looking it up again here. + // DIE::getUnit isn't simple - it walks parent pointers, etc. + DwarfCompileUnit *Unit = lookupUnit(VariableDie->getUnit()); + assert(Unit); + DbgVariable *AbsVar = getExistingAbstractVariable(Var->getVariable()); + if (AbsVar && AbsVar->getDIE()) { + Unit->addDIEEntry(*VariableDie, dwarf::DW_AT_abstract_origin, + *AbsVar->getDIE()); + } else + Unit->applyVariableAttributes(*Var, *VariableDie); } } +void DwarfDebug::finishSubprogramDefinitions() { + const Module *M = MMI->getModule(); + + NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu"); + for (MDNode *N : CU_Nodes->operands()) { + DICompileUnit TheCU(N); + // Construct subprogram DIE and add variables DIEs. + DwarfCompileUnit *SPCU = + static_cast<DwarfCompileUnit *>(CUMap.lookup(TheCU)); + DIArray Subprograms = TheCU.getSubprograms(); + for (unsigned i = 0, e = Subprograms.getNumElements(); i != e; ++i) { + DISubprogram SP(Subprograms.getElement(i)); + // Perhaps the subprogram is in another CU (such as due to comdat + // folding, etc), in which case ignore it here. + if (SPMap[SP] != SPCU) + continue; + DIE *D = SPCU->getDIE(SP); + if (DIE *AbsSPDIE = AbstractSPDies.lookup(SP)) { + if (D) + // If this subprogram has an abstract definition, reference that + SPCU->addDIEEntry(*D, dwarf::DW_AT_abstract_origin, *AbsSPDIE); + } else { + if (!D) + // Lazily construct the subprogram if we didn't see either concrete or + // inlined versions during codegen. + D = SPCU->getOrCreateSubprogramDIE(SP); + // And attach the attributes + SPCU->applySubprogramAttributesToDefinition(SP, *D); + } + } + } +} + + // Collect info for variables that were optimized out. void DwarfDebug::collectDeadVariables() { const Module *M = MMI->getModule(); if (NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu")) { - for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { - DICompileUnit TheCU(CU_Nodes->getOperand(i)); + for (MDNode *N : CU_Nodes->operands()) { + DICompileUnit TheCU(N); + // Construct subprogram DIE and add variables DIEs. + DwarfCompileUnit *SPCU = + static_cast<DwarfCompileUnit *>(CUMap.lookup(TheCU)); + assert(SPCU && "Unable to find Compile Unit!"); DIArray Subprograms = TheCU.getSubprograms(); for (unsigned i = 0, e = Subprograms.getNumElements(); i != e; ++i) { DISubprogram SP(Subprograms.getElement(i)); if (ProcessedSPNodes.count(SP) != 0) continue; - if (!SP.isSubprogram()) - continue; - if (!SP.isDefinition()) - continue; + assert(SP.isSubprogram() && + "CU's subprogram list contains a non-subprogram"); + assert(SP.isDefinition() && + "CU's subprogram list contains a subprogram declaration"); DIArray Variables = SP.getVariables(); if (Variables.getNumElements() == 0) continue; - // Construct subprogram DIE and add variables DIEs. - CompileUnit *SPCU = CUMap.lookup(TheCU); - assert(SPCU && "Unable to find Compile Unit!"); - // FIXME: See the comment in constructSubprogramDIE about duplicate - // subprogram DIEs. - constructSubprogramDIE(SPCU, SP); - DIE *SPDIE = SPCU->getDIE(SP); + DIE *SPDIE = AbstractSPDies.lookup(SP); + if (!SPDIE) + SPDIE = SPCU->getDIE(SP); + assert(SPDIE); for (unsigned vi = 0, ve = Variables.getNumElements(); vi != ve; ++vi) { DIVariable DV(Variables.getElement(vi)); - if (!DV.isVariable()) - continue; - DbgVariable NewVar(DV, NULL, this); - if (DIE *VariableDIE = - SPCU->constructVariableDIE(NewVar, false)) - SPDIE->addChild(VariableDIE); + assert(DV.isVariable()); + DbgVariable NewVar(DV, this); + auto VariableDie = SPCU->constructVariableDIE(NewVar); + SPCU->applyVariableAttributes(NewVar, *VariableDie); + SPDIE->addChild(std::move(VariableDie)); } } } } } -// Type Signature [7.27] and ODR Hash code. - -/// \brief Grabs the string in whichever attribute is passed in and returns -/// a reference to it. Returns "" if the attribute doesn't exist. -static StringRef getDIEStringAttr(DIE *Die, unsigned Attr) { - DIEValue *V = Die->findAttribute(Attr); - - if (DIEString *S = dyn_cast_or_null<DIEString>(V)) - return S->getString(); - - return StringRef(""); -} - -/// Return true if the current DIE is contained within an anonymous namespace. -static bool isContainedInAnonNamespace(DIE *Die) { - DIE *Parent = Die->getParent(); - - while (Parent) { - if (Parent->getTag() == dwarf::DW_TAG_namespace && - getDIEStringAttr(Parent, dwarf::DW_AT_name) == "") - return true; - Parent = Parent->getParent(); - } - - return false; -} +void DwarfDebug::finalizeModuleInfo() { + finishSubprogramDefinitions(); -/// Test if the current CU language is C++ and that we have -/// a named type that is not contained in an anonymous namespace. -static bool shouldAddODRHash(CompileUnit *CU, DIE *Die) { - return CU->getLanguage() == dwarf::DW_LANG_C_plus_plus && - getDIEStringAttr(Die, dwarf::DW_AT_name) != "" && - !isContainedInAnonNamespace(Die); -} + finishVariableDefinitions(); -void DwarfDebug::finalizeModuleInfo() { // Collect info for variables that were optimized out. collectDeadVariables(); - // Attach DW_AT_inline attribute with inlined subprogram DIEs. - computeInlinedDIEs(); - - // Split out type units and conditionally add an ODR tag to the split - // out type. - // FIXME: Do type splitting. - for (unsigned i = 0, e = TypeUnits.size(); i != e; ++i) { - DIE *Die = TypeUnits[i]; - DIEHash Hash; - // If we've requested ODR hashes and it's applicable for an ODR hash then - // add the ODR signature now. - // FIXME: This should be added onto the type unit, not the type, but this - // works as an intermediate stage. - if (GenerateODRHash && shouldAddODRHash(CUMap.begin()->second, Die)) - CUMap.begin()->second->addUInt(Die, dwarf::DW_AT_GNU_odr_signature, - dwarf::DW_FORM_data8, - Hash.computeDIEODRSignature(*Die)); - } - - // Handle anything that needs to be done on a per-cu basis. - for (DenseMap<const MDNode *, CompileUnit *>::iterator CUI = CUMap.begin(), - CUE = CUMap.end(); - CUI != CUE; ++CUI) { - CompileUnit *TheCU = CUI->second; + // Handle anything that needs to be done on a per-unit basis after + // all other generation. + for (const auto &TheU : getUnits()) { // Emit DW_AT_containing_type attribute to connect types with their // vtable holding type. - TheCU->constructContainingTypeDIEs(); - - // If we're splitting the dwarf out now that we've got the entire - // CU then construct a skeleton CU based upon it. - if (useSplitDwarf()) { - uint64_t ID = 0; - if (GenerateCUHash) { - DIEHash CUHash; - ID = CUHash.computeCUSignature(*TheCU->getCUDie()); + TheU->constructContainingTypeDIEs(); + + // Add CU specific attributes if we need to add any. + if (TheU->getUnitDie().getTag() == dwarf::DW_TAG_compile_unit) { + // If we're splitting the dwarf out now that we've got the entire + // CU then add the dwo id to it. + DwarfCompileUnit *SkCU = + static_cast<DwarfCompileUnit *>(TheU->getSkeleton()); + if (useSplitDwarf()) { + // Emit a unique identifier for this CU. + uint64_t ID = DIEHash(Asm).computeCUSignature(TheU->getUnitDie()); + TheU->addUInt(TheU->getUnitDie(), dwarf::DW_AT_GNU_dwo_id, + dwarf::DW_FORM_data8, ID); + SkCU->addUInt(SkCU->getUnitDie(), dwarf::DW_AT_GNU_dwo_id, + dwarf::DW_FORM_data8, ID); + + // We don't keep track of which addresses are used in which CU so this + // is a bit pessimistic under LTO. + if (!AddrPool.isEmpty()) + addSectionLabel(*Asm, *SkCU, SkCU->getUnitDie(), + dwarf::DW_AT_GNU_addr_base, DwarfAddrSectionSym, + DwarfAddrSectionSym); + if (!TheU->getRangeLists().empty()) + addSectionLabel(*Asm, *SkCU, SkCU->getUnitDie(), + dwarf::DW_AT_GNU_ranges_base, + DwarfDebugRangeSectionSym, DwarfDebugRangeSectionSym); + } + + // If we have code split among multiple sections or non-contiguous + // ranges of code then emit a DW_AT_ranges attribute on the unit that will + // remain in the .o file, otherwise add a DW_AT_low_pc. + // FIXME: We should use ranges allow reordering of code ala + // .subsections_via_symbols in mach-o. This would mean turning on + // ranges for all subprogram DIEs for mach-o. + DwarfCompileUnit &U = + SkCU ? *SkCU : static_cast<DwarfCompileUnit &>(*TheU); + unsigned NumRanges = TheU->getRanges().size(); + if (NumRanges) { + if (NumRanges > 1) { + addSectionLabel(*Asm, U, U.getUnitDie(), dwarf::DW_AT_ranges, + Asm->GetTempSymbol("cu_ranges", U.getUniqueID()), + DwarfDebugRangeSectionSym); + + // A DW_AT_low_pc attribute may also be specified in combination with + // DW_AT_ranges to specify the default base address for use in + // location lists (see Section 2.6.2) and range lists (see Section + // 2.17.3). + U.addUInt(U.getUnitDie(), dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, + 0); + } else { + RangeSpan &Range = TheU->getRanges().back(); + U.addLocalLabelAddress(U.getUnitDie(), dwarf::DW_AT_low_pc, + Range.getStart()); + U.addLabelDelta(U.getUnitDie(), dwarf::DW_AT_high_pc, Range.getEnd(), + Range.getStart()); + } } - // This should be a unique identifier when we want to build .dwp files. - TheCU->addUInt(TheCU->getCUDie(), dwarf::DW_AT_GNU_dwo_id, - dwarf::DW_FORM_data8, ID); - // Now construct the skeleton CU associated. - CompileUnit *SkCU = constructSkeletonCU(TheCU); - // This should be a unique identifier when we want to build .dwp files. - SkCU->addUInt(SkCU->getCUDie(), dwarf::DW_AT_GNU_dwo_id, - dwarf::DW_FORM_data8, ID); } } @@ -1117,9 +958,8 @@ void DwarfDebug::finalizeModuleInfo() { } void DwarfDebug::endSections() { - // Filter labels by section. - for (size_t n = 0; n < ArangeLabels.size(); n++) { - const SymbolCU &SCU = ArangeLabels[n]; + // Filter labels by section. + for (const SymbolCU &SCU : ArangeLabels) { if (SCU.Sym->isInSection()) { // Make a note of this symbol and it's section. const MCSection *Section = &SCU.Sym->getSection(); @@ -1129,15 +969,14 @@ void DwarfDebug::endSections() { // Some symbols (e.g. common/bss on mach-o) can have no section but still // appear in the output. This sucks as we rely on sections to build // arange spans. We can do it without, but it's icky. - SectionMap[NULL].push_back(SCU); + SectionMap[nullptr].push_back(SCU); } } // Build a list of sections used. std::vector<const MCSection *> Sections; - for (SectionMapType::iterator it = SectionMap.begin(); it != SectionMap.end(); - it++) { - const MCSection *Section = it->first; + for (const auto &it : SectionMap) { + const MCSection *Section = it.first; Sections.push_back(Section); } @@ -1146,29 +985,32 @@ void DwarfDebug::endSections() { std::sort(Sections.begin(), Sections.end(), SectionSort); // Add terminating symbols for each section. - for (unsigned ID=0;ID<Sections.size();ID++) { + for (unsigned ID = 0, E = Sections.size(); ID != E; ID++) { const MCSection *Section = Sections[ID]; - MCSymbol *Sym = NULL; + MCSymbol *Sym = nullptr; if (Section) { // We can't call MCSection::getLabelEndName, as it's only safe to do so - // if we know the section name up-front. For user-created sections, the resulting - // label may not be valid to use as a label. (section names can use a greater - // set of characters on some systems) + // if we know the section name up-front. For user-created sections, the + // resulting label may not be valid to use as a label. (section names can + // use a greater set of characters on some systems) Sym = Asm->GetTempSymbol("debug_end", ID); Asm->OutStreamer.SwitchSection(Section); Asm->OutStreamer.EmitLabel(Sym); } // Insert a final terminator. - SectionMap[Section].push_back(SymbolCU(NULL, Sym)); + SectionMap[Section].push_back(SymbolCU(nullptr, Sym)); } } // Emit all Dwarf sections that should come after the content. void DwarfDebug::endModule() { + assert(CurFn == nullptr); + assert(CurMI == nullptr); - if (!FirstCU) return; + if (!FirstCU) + return; // End any existing sections. // TODO: Does this need to happen? @@ -1177,59 +1019,33 @@ void DwarfDebug::endModule() { // Finalize the debug info for the module. finalizeModuleInfo(); - if (!useSplitDwarf()) { - emitDebugStr(); - - // Emit all the DIEs into a debug info section. - emitDebugInfo(); + emitDebugStr(); - // Corresponding abbreviations into a abbrev section. - emitAbbreviations(); + // Emit all the DIEs into a debug info section. + emitDebugInfo(); - // Emit info into a debug loc section. - emitDebugLoc(); + // Corresponding abbreviations into a abbrev section. + emitAbbreviations(); - // Emit info into a debug aranges section. + // Emit info into a debug aranges section. + if (GenerateARangeSection) emitDebugARanges(); - // Emit info into a debug ranges section. - emitDebugRanges(); - - // Emit info into a debug macinfo section. - emitDebugMacInfo(); + // Emit info into a debug ranges section. + emitDebugRanges(); - } else { - // TODO: Fill this in for separated debug sections and separate - // out information into new sections. - emitDebugStr(); - if (useSplitDwarf()) - emitDebugStrDWO(); - - // Emit the debug info section and compile units. - emitDebugInfo(); + if (useSplitDwarf()) { + emitDebugStrDWO(); emitDebugInfoDWO(); - - // Corresponding abbreviations into a abbrev section. - emitAbbreviations(); emitDebugAbbrevDWO(); - + emitDebugLineDWO(); + emitDebugLocDWO(); + // Emit DWO addresses. + AddrPool.emit(*Asm, Asm->getObjFileLowering().getDwarfAddrSection()); + } else // Emit info into a debug loc section. emitDebugLoc(); - // Emit info into a debug aranges section. - emitDebugARanges(); - - // Emit info into a debug ranges section. - emitDebugRanges(); - - // Emit info into a debug macinfo section. - emitDebugMacInfo(); - - // Emit DWO addresses. - InfoHolder.emitAddresses(Asm->getObjFileLowering().getDwarfAddrSection()); - - } - // Emit info into the dwarf accelerator table sections. if (useDwarfAccelTables()) { emitAccelNames(); @@ -1246,41 +1062,62 @@ void DwarfDebug::endModule() { // clean up. SPMap.clear(); - for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(), - E = CUMap.end(); I != E; ++I) - delete I->second; - - for (SmallVectorImpl<CompileUnit *>::iterator I = SkeletonCUs.begin(), - E = SkeletonCUs.end(); I != E; ++I) - delete *I; + AbstractVariables.clear(); // Reset these for the next Module if we have one. - FirstCU = NULL; + FirstCU = nullptr; } // Find abstract variable, if any, associated with Var. -DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &DV, - DebugLoc ScopeLoc) { +DbgVariable *DwarfDebug::getExistingAbstractVariable(const DIVariable &DV, + DIVariable &Cleansed) { LLVMContext &Ctx = DV->getContext(); // More then one inlined variable corresponds to one abstract variable. - DIVariable Var = cleanseInlinedVariable(DV, Ctx); - DbgVariable *AbsDbgVariable = AbstractVariables.lookup(Var); - if (AbsDbgVariable) - return AbsDbgVariable; + // FIXME: This duplication of variables when inlining should probably be + // removed. It's done to allow each DIVariable to describe its location + // because the DebugLoc on the dbg.value/declare isn't accurate. We should + // make it accurate then remove this duplication/cleansing stuff. + Cleansed = cleanseInlinedVariable(DV, Ctx); + auto I = AbstractVariables.find(Cleansed); + if (I != AbstractVariables.end()) + return I->second.get(); + return nullptr; +} + +DbgVariable *DwarfDebug::getExistingAbstractVariable(const DIVariable &DV) { + DIVariable Cleansed; + return getExistingAbstractVariable(DV, Cleansed); +} + +void DwarfDebug::createAbstractVariable(const DIVariable &Var, + LexicalScope *Scope) { + auto AbsDbgVariable = make_unique<DbgVariable>(Var, this); + addScopeVariable(Scope, AbsDbgVariable.get()); + AbstractVariables[Var] = std::move(AbsDbgVariable); +} + +void DwarfDebug::ensureAbstractVariableIsCreated(const DIVariable &DV, + const MDNode *ScopeNode) { + DIVariable Cleansed = DV; + if (getExistingAbstractVariable(DV, Cleansed)) + return; + + createAbstractVariable(Cleansed, LScopes.getOrCreateAbstractScope(ScopeNode)); +} - LexicalScope *Scope = LScopes.findAbstractScope(ScopeLoc.getScope(Ctx)); - if (!Scope) - return NULL; +void +DwarfDebug::ensureAbstractVariableIsCreatedIfScoped(const DIVariable &DV, + const MDNode *ScopeNode) { + DIVariable Cleansed = DV; + if (getExistingAbstractVariable(DV, Cleansed)) + return; - AbsDbgVariable = new DbgVariable(Var, NULL, this); - addScopeVariable(Scope, AbsDbgVariable); - AbstractVariables[Var] = AbsDbgVariable; - return AbsDbgVariable; + if (LexicalScope *Scope = LScopes.findAbstractScope(ScopeNode)) + createAbstractVariable(Cleansed, Scope); } // If Var is a current function argument then add it to CurrentFnArguments list. -bool DwarfDebug::addCurrentFnArgument(const MachineFunction *MF, - DbgVariable *Var, LexicalScope *Scope) { +bool DwarfDebug::addCurrentFnArgument(DbgVariable *Var, LexicalScope *Scope) { if (!LScopes.isCurrentFunctionScope(Scope)) return false; DIVariable DV = Var->getVariable(); @@ -1292,7 +1129,7 @@ bool DwarfDebug::addCurrentFnArgument(const MachineFunction *MF, size_t Size = CurrentFnArguments.size(); if (Size == 0) - CurrentFnArguments.resize(MF->getFunction()->arg_size()); + CurrentFnArguments.resize(CurFn->getFunction()->arg_size()); // llvm::Function argument size is not good indicator of how many // arguments does the function have at source level. if (ArgNo > Size) @@ -1302,50 +1139,30 @@ bool DwarfDebug::addCurrentFnArgument(const MachineFunction *MF, } // Collect variable information from side table maintained by MMI. -void -DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction *MF, - SmallPtrSet<const MDNode *, 16> &Processed) { - MachineModuleInfo::VariableDbgInfoMapTy &VMap = MMI->getVariableDbgInfo(); - for (MachineModuleInfo::VariableDbgInfoMapTy::iterator VI = VMap.begin(), - VE = VMap.end(); VI != VE; ++VI) { - const MDNode *Var = VI->first; - if (!Var) continue; - Processed.insert(Var); - DIVariable DV(Var); - const std::pair<unsigned, DebugLoc> &VP = VI->second; - - LexicalScope *Scope = LScopes.findLexicalScope(VP.second); +void DwarfDebug::collectVariableInfoFromMMITable( + SmallPtrSet<const MDNode *, 16> &Processed) { + for (const auto &VI : MMI->getVariableDbgInfo()) { + if (!VI.Var) + continue; + Processed.insert(VI.Var); + DIVariable DV(VI.Var); + LexicalScope *Scope = LScopes.findLexicalScope(VI.Loc); // If variable scope is not found then skip this variable. - if (Scope == 0) + if (!Scope) continue; - DbgVariable *AbsDbgVariable = findAbstractVariable(DV, VP.second); - DbgVariable *RegVar = new DbgVariable(DV, AbsDbgVariable, this); - RegVar->setFrameIndex(VP.first); - if (!addCurrentFnArgument(MF, RegVar, Scope)) - addScopeVariable(Scope, RegVar); - if (AbsDbgVariable) - AbsDbgVariable->setFrameIndex(VP.first); + ensureAbstractVariableIsCreatedIfScoped(DV, Scope->getScopeNode()); + ConcreteVariables.push_back(make_unique<DbgVariable>(DV, this)); + DbgVariable *RegVar = ConcreteVariables.back().get(); + RegVar->setFrameIndex(VI.Slot); + addScopeVariable(Scope, RegVar); } } -// Return true if debug value, encoded by DBG_VALUE instruction, is in a -// defined reg. -static bool isDbgValueInDefinedReg(const MachineInstr *MI) { - assert(MI->isDebugValue() && "Invalid DBG_VALUE machine instruction!"); - return MI->getNumOperands() == 3 && - MI->getOperand(0).isReg() && MI->getOperand(0).getReg() && - (MI->getOperand(1).isImm() || - (MI->getOperand(1).isReg() && MI->getOperand(1).getReg() == 0U)); -} - // Get .debug_loc entry for the instruction range starting at MI. -static DotDebugLocEntry getDebugLocEntry(AsmPrinter *Asm, - const MCSymbol *FLabel, - const MCSymbol *SLabel, - const MachineInstr *MI) { - const MDNode *Var = MI->getOperand(MI->getNumOperands() - 1).getMetadata(); +static DebugLocEntry::Value getDebugLocValue(const MachineInstr *MI) { + const MDNode *Var = MI->getDebugVariable(); assert(MI->getNumOperands() == 3); if (MI->getOperand(0).isReg()) { @@ -1356,120 +1173,116 @@ static DotDebugLocEntry getDebugLocEntry(AsmPrinter *Asm, MLoc.set(MI->getOperand(0).getReg()); else MLoc.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm()); - return DotDebugLocEntry(FLabel, SLabel, MLoc, Var); + return DebugLocEntry::Value(Var, MLoc); } if (MI->getOperand(0).isImm()) - return DotDebugLocEntry(FLabel, SLabel, MI->getOperand(0).getImm()); + return DebugLocEntry::Value(Var, MI->getOperand(0).getImm()); if (MI->getOperand(0).isFPImm()) - return DotDebugLocEntry(FLabel, SLabel, MI->getOperand(0).getFPImm()); + return DebugLocEntry::Value(Var, MI->getOperand(0).getFPImm()); if (MI->getOperand(0).isCImm()) - return DotDebugLocEntry(FLabel, SLabel, MI->getOperand(0).getCImm()); + return DebugLocEntry::Value(Var, MI->getOperand(0).getCImm()); llvm_unreachable("Unexpected 3 operand DBG_VALUE instruction!"); } // Find variables for each lexical scope. void -DwarfDebug::collectVariableInfo(const MachineFunction *MF, - SmallPtrSet<const MDNode *, 16> &Processed) { +DwarfDebug::collectVariableInfo(SmallPtrSet<const MDNode *, 16> &Processed) { + LexicalScope *FnScope = LScopes.getCurrentFunctionScope(); + DwarfCompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode()); // Grab the variable info that was squirreled away in the MMI side-table. - collectVariableInfoFromMMITable(MF, Processed); + collectVariableInfoFromMMITable(Processed); - for (SmallVectorImpl<const MDNode*>::const_iterator - UVI = UserVariables.begin(), UVE = UserVariables.end(); UVI != UVE; - ++UVI) { - const MDNode *Var = *UVI; - if (Processed.count(Var)) + for (const auto &I : DbgValues) { + DIVariable DV(I.first); + if (Processed.count(DV)) continue; - // History contains relevant DBG_VALUE instructions for Var and instructions - // clobbering it. - SmallVectorImpl<const MachineInstr*> &History = DbgValues[Var]; - if (History.empty()) + // Instruction ranges, specifying where DV is accessible. + const auto &Ranges = I.second; + if (Ranges.empty()) continue; - const MachineInstr *MInsn = History.front(); - DIVariable DV(Var); - LexicalScope *Scope = NULL; + LexicalScope *Scope = nullptr; if (DV.getTag() == dwarf::DW_TAG_arg_variable && - DISubprogram(DV.getContext()).describes(MF->getFunction())) + DISubprogram(DV.getContext()).describes(CurFn->getFunction())) Scope = LScopes.getCurrentFunctionScope(); - else if (MDNode *IA = DV.getInlinedAt()) - Scope = LScopes.findInlinedScope(DebugLoc::getFromDILocation(IA)); - else - Scope = LScopes.findLexicalScope(cast<MDNode>(DV->getOperand(1))); + else if (MDNode *IA = DV.getInlinedAt()) { + DebugLoc DL = DebugLoc::getFromDILocation(IA); + Scope = LScopes.findInlinedScope(DebugLoc::get( + DL.getLine(), DL.getCol(), DV.getContext(), IA)); + } else + Scope = LScopes.findLexicalScope(DV.getContext()); // If variable scope is not found then skip this variable. if (!Scope) continue; Processed.insert(DV); + const MachineInstr *MInsn = Ranges.front().first; assert(MInsn->isDebugValue() && "History must begin with debug value"); - DbgVariable *AbsVar = findAbstractVariable(DV, MInsn->getDebugLoc()); - DbgVariable *RegVar = new DbgVariable(DV, AbsVar, this); - if (!addCurrentFnArgument(MF, RegVar, Scope)) - addScopeVariable(Scope, RegVar); - if (AbsVar) - AbsVar->setMInsn(MInsn); - - // Simplify ranges that are fully coalesced. - if (History.size() <= 1 || (History.size() == 2 && - MInsn->isIdenticalTo(History.back()))) { - RegVar->setMInsn(MInsn); + ensureAbstractVariableIsCreatedIfScoped(DV, Scope->getScopeNode()); + ConcreteVariables.push_back(make_unique<DbgVariable>(MInsn, this)); + DbgVariable *RegVar = ConcreteVariables.back().get(); + addScopeVariable(Scope, RegVar); + + // Check if the first DBG_VALUE is valid for the rest of the function. + if (Ranges.size() == 1 && Ranges.front().second == nullptr) continue; - } // Handle multiple DBG_VALUE instructions describing one variable. RegVar->setDotDebugLocOffset(DotDebugLocEntries.size()); - for (SmallVectorImpl<const MachineInstr*>::const_iterator - HI = History.begin(), HE = History.end(); HI != HE; ++HI) { - const MachineInstr *Begin = *HI; + DotDebugLocEntries.resize(DotDebugLocEntries.size() + 1); + DebugLocList &LocList = DotDebugLocEntries.back(); + LocList.Label = + Asm->GetTempSymbol("debug_loc", DotDebugLocEntries.size() - 1); + SmallVector<DebugLocEntry, 4> &DebugLoc = LocList.List; + for (auto I = Ranges.begin(), E = Ranges.end(); I != E; ++I) { + const MachineInstr *Begin = I->first; + const MachineInstr *End = I->second; assert(Begin->isDebugValue() && "Invalid History entry"); - // Check if DBG_VALUE is truncating a range. - if (Begin->getNumOperands() > 1 && Begin->getOperand(0).isReg() - && !Begin->getOperand(0).getReg()) + // Check if a variable is unaccessible in this range. + if (Begin->getNumOperands() > 1 && Begin->getOperand(0).isReg() && + !Begin->getOperand(0).getReg()) continue; + DEBUG(dbgs() << "DotDebugLoc Pair:\n" << "\t" << *Begin); + if (End != nullptr) + DEBUG(dbgs() << "\t" << *End); + else + DEBUG(dbgs() << "\tNULL\n"); - // Compute the range for a register location. - const MCSymbol *FLabel = getLabelBeforeInsn(Begin); - const MCSymbol *SLabel = 0; - - if (HI + 1 == HE) - // If Begin is the last instruction in History then its value is valid - // until the end of the function. - SLabel = FunctionEndSym; - else { - const MachineInstr *End = HI[1]; - DEBUG(dbgs() << "DotDebugLoc Pair:\n" - << "\t" << *Begin << "\t" << *End << "\n"); - if (End->isDebugValue()) - SLabel = getLabelBeforeInsn(End); - else { - // End is a normal instruction clobbering the range. - SLabel = getLabelAfterInsn(End); - assert(SLabel && "Forgot label after clobber instruction"); - ++HI; - } - } + const MCSymbol *StartLabel = getLabelBeforeInsn(Begin); + assert(StartLabel && "Forgot label before DBG_VALUE starting a range!"); + + const MCSymbol *EndLabel; + if (End != nullptr) + EndLabel = getLabelAfterInsn(End); + else if (std::next(I) == Ranges.end()) + EndLabel = FunctionEndSym; + else + EndLabel = getLabelBeforeInsn(std::next(I)->first); + assert(EndLabel && "Forgot label after instruction ending a range!"); - // The value is valid until the next DBG_VALUE or clobber. - DotDebugLocEntries.push_back(getDebugLocEntry(Asm, FLabel, SLabel, - Begin)); + DebugLocEntry Loc(StartLabel, EndLabel, getDebugLocValue(Begin), TheCU); + if (DebugLoc.empty() || !DebugLoc.back().Merge(Loc)) + DebugLoc.push_back(std::move(Loc)); } - DotDebugLocEntries.push_back(DotDebugLocEntry()); } // Collect info for variables that were optimized out. - LexicalScope *FnScope = LScopes.getCurrentFunctionScope(); DIArray Variables = DISubprogram(FnScope->getScopeNode()).getVariables(); for (unsigned i = 0, e = Variables.getNumElements(); i != e; ++i) { DIVariable DV(Variables.getElement(i)); - if (!DV || !DV.isVariable() || !Processed.insert(DV)) + assert(DV.isVariable()); + if (!Processed.insert(DV)) continue; - if (LexicalScope *Scope = LScopes.findLexicalScope(DV.getContext())) - addScopeVariable(Scope, new DbgVariable(DV, NULL, this)); + if (LexicalScope *Scope = LScopes.findLexicalScope(DV.getContext())) { + ensureAbstractVariableIsCreatedIfScoped(DV, Scope->getScopeNode()); + ConcreteVariables.push_back(make_unique<DbgVariable>(DV, this)); + addScopeVariable(Scope, ConcreteVariables.back().get()); + } } } @@ -1487,6 +1300,8 @@ MCSymbol *DwarfDebug::getLabelAfterInsn(const MachineInstr *MI) { // Process beginning of an instruction. void DwarfDebug::beginInstruction(const MachineInstr *MI) { + assert(CurMI == nullptr); + CurMI = MI; // Check if source location changes, but ignore DBG_VALUE locations. if (!MI->isDebugValue()) { DebugLoc DL = MI->getDebugLoc(); @@ -1504,13 +1319,13 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) { const MDNode *Scope = DL.getScope(Asm->MF->getFunction()->getContext()); recordSourceLine(DL.getLine(), DL.getCol(), Scope, Flags); } else - recordSourceLine(0, 0, 0, 0); + recordSourceLine(0, 0, nullptr, 0); } } // Insert labels where requested. - DenseMap<const MachineInstr*, MCSymbol*>::iterator I = - LabelsBeforeInsn.find(MI); + DenseMap<const MachineInstr *, MCSymbol *>::iterator I = + LabelsBeforeInsn.find(MI); // No label needed. if (I == LabelsBeforeInsn.end()) @@ -1528,14 +1343,16 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) { } // Process end of an instruction. -void DwarfDebug::endInstruction(const MachineInstr *MI) { +void DwarfDebug::endInstruction() { + assert(CurMI != nullptr); // Don't create a new label after DBG_VALUE instructions. // They don't generate code. - if (!MI->isDebugValue()) - PrevLabel = 0; + if (!CurMI->isDebugValue()) + PrevLabel = nullptr; - DenseMap<const MachineInstr*, MCSymbol*>::iterator I = - LabelsAfterInsn.find(MI); + DenseMap<const MachineInstr *, MCSymbol *>::iterator I = + LabelsAfterInsn.find(CurMI); + CurMI = nullptr; // No label needed. if (I == LabelsAfterInsn.end()) @@ -1565,53 +1382,35 @@ void DwarfDebug::identifyScopeMarkers() { const SmallVectorImpl<LexicalScope *> &Children = S->getChildren(); if (!Children.empty()) - for (SmallVectorImpl<LexicalScope *>::const_iterator SI = Children.begin(), - SE = Children.end(); SI != SE; ++SI) - WorkList.push_back(*SI); + WorkList.append(Children.begin(), Children.end()); if (S->isAbstractScope()) continue; - const SmallVectorImpl<InsnRange> &Ranges = S->getRanges(); - if (Ranges.empty()) - continue; - for (SmallVectorImpl<InsnRange>::const_iterator RI = Ranges.begin(), - RE = Ranges.end(); RI != RE; ++RI) { - assert(RI->first && "InsnRange does not have first instruction!"); - assert(RI->second && "InsnRange does not have second instruction!"); - requestLabelBeforeInsn(RI->first); - requestLabelAfterInsn(RI->second); + for (const InsnRange &R : S->getRanges()) { + assert(R.first && "InsnRange does not have first instruction!"); + assert(R.second && "InsnRange does not have second instruction!"); + requestLabelBeforeInsn(R.first); + requestLabelAfterInsn(R.second); } } } -// Get MDNode for DebugLoc's scope. -static MDNode *getScopeNode(DebugLoc DL, const LLVMContext &Ctx) { - if (MDNode *InlinedAt = DL.getInlinedAt(Ctx)) - return getScopeNode(DebugLoc::getFromDILocation(InlinedAt), Ctx); - return DL.getScope(Ctx); -} - -// Walk up the scope chain of given debug loc and find line number info -// for the function. -static DebugLoc getFnDebugLoc(DebugLoc DL, const LLVMContext &Ctx) { - const MDNode *Scope = getScopeNode(DL, Ctx); - DISubprogram SP = getDISubprogram(Scope); - if (SP.isSubprogram()) { - // Check for number of operands since the compatibility is - // cheap here. - if (SP->getNumOperands() > 19) - return DebugLoc::get(SP.getScopeLineNumber(), 0, SP); - else - return DebugLoc::get(SP.getLineNumber(), 0, SP); - } - +static DebugLoc findPrologueEndLoc(const MachineFunction *MF) { + // First known non-DBG_VALUE and non-frame setup location marks + // the beginning of the function body. + for (const auto &MBB : *MF) + for (const auto &MI : MBB) + if (!MI.isDebugValue() && !MI.getFlag(MachineInstr::FrameSetup) && + !MI.getDebugLoc().isUnknown()) + return MI.getDebugLoc(); return DebugLoc(); } // Gather pre-function debug information. Assumes being called immediately // after the function entry point has been emitted. void DwarfDebug::beginFunction(const MachineFunction *MF) { + CurFn = MF; // If there's no debug info for the function we're not going to do anything. if (!MMI->hasDebugInfo()) @@ -1623,19 +1422,19 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { if (LScopes.empty()) return; - assert(UserVariables.empty() && DbgValues.empty() && "Maps weren't cleaned"); + assert(DbgValues.empty() && "DbgValues map wasn't cleaned!"); // Make sure that each lexical scope will have a begin/end label. identifyScopeMarkers(); - // Set DwarfCompileUnitID in MCContext to the Compile Unit this function + // Set DwarfDwarfCompileUnitID in MCContext to the Compile Unit this function // belongs to so that we add to the correct per-cu line table in the // non-asm case. LexicalScope *FnScope = LScopes.getCurrentFunctionScope(); - CompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode()); + DwarfCompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode()); assert(TheCU && "Unable to find compile unit!"); - if (Asm->TM.hasMCUseLoc() && Asm->OutStreamer.hasRawTextSupport()) - // Use a single line table if we are using .loc and generating assembly. + if (Asm->OutStreamer.hasRawTextSupport()) + // Use a single line table if we are generating assembly. Asm->OutStreamer.getContext().setDwarfCompileUnitID(0); else Asm->OutStreamer.getContext().setDwarfCompileUnitID(TheCU->getUniqueID()); @@ -1645,148 +1444,26 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { // Assumes in correct section after the entry point. Asm->OutStreamer.EmitLabel(FunctionBeginSym); - const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo(); - // LiveUserVar - Map physreg numbers to the MDNode they contain. - std::vector<const MDNode *> LiveUserVar(TRI->getNumRegs()); - - for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E; - ++I) { - bool AtBlockEntry = true; - for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); - II != IE; ++II) { - const MachineInstr *MI = II; - - if (MI->isDebugValue()) { - assert(MI->getNumOperands() > 1 && "Invalid machine instruction!"); - - // Keep track of user variables. - const MDNode *Var = - MI->getOperand(MI->getNumOperands() - 1).getMetadata(); - - // Variable is in a register, we need to check for clobbers. - if (isDbgValueInDefinedReg(MI)) - LiveUserVar[MI->getOperand(0).getReg()] = Var; - - // Check the history of this variable. - SmallVectorImpl<const MachineInstr *> &History = DbgValues[Var]; - if (History.empty()) { - UserVariables.push_back(Var); - // The first mention of a function argument gets the FunctionBeginSym - // label, so arguments are visible when breaking at function entry. - DIVariable DV(Var); - if (DV.isVariable() && DV.getTag() == dwarf::DW_TAG_arg_variable && - getDISubprogram(DV.getContext()).describes(MF->getFunction())) - LabelsBeforeInsn[MI] = FunctionBeginSym; - } else { - // We have seen this variable before. Try to coalesce DBG_VALUEs. - const MachineInstr *Prev = History.back(); - if (Prev->isDebugValue()) { - // Coalesce identical entries at the end of History. - if (History.size() >= 2 && - Prev->isIdenticalTo(History[History.size() - 2])) { - DEBUG(dbgs() << "Coalescing identical DBG_VALUE entries:\n" - << "\t" << *Prev << "\t" - << *History[History.size() - 2] << "\n"); - History.pop_back(); - } - - // Terminate old register assignments that don't reach MI; - MachineFunction::const_iterator PrevMBB = Prev->getParent(); - if (PrevMBB != I && (!AtBlockEntry || llvm::next(PrevMBB) != I) && - isDbgValueInDefinedReg(Prev)) { - // Previous register assignment needs to terminate at the end of - // its basic block. - MachineBasicBlock::const_iterator LastMI = - PrevMBB->getLastNonDebugInstr(); - if (LastMI == PrevMBB->end()) { - // Drop DBG_VALUE for empty range. - DEBUG(dbgs() << "Dropping DBG_VALUE for empty range:\n" - << "\t" << *Prev << "\n"); - History.pop_back(); - } else if (llvm::next(PrevMBB) != PrevMBB->getParent()->end()) - // Terminate after LastMI. - History.push_back(LastMI); - } - } - } - History.push_back(MI); - } else { - // Not a DBG_VALUE instruction. - if (!MI->isLabel()) - AtBlockEntry = false; - - // First known non-DBG_VALUE and non-frame setup location marks - // the beginning of the function body. - if (!MI->getFlag(MachineInstr::FrameSetup) && - (PrologEndLoc.isUnknown() && !MI->getDebugLoc().isUnknown())) - PrologEndLoc = MI->getDebugLoc(); - - // Check if the instruction clobbers any registers with debug vars. - for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(), - MOE = MI->operands_end(); - MOI != MOE; ++MOI) { - if (!MOI->isReg() || !MOI->isDef() || !MOI->getReg()) - continue; - for (MCRegAliasIterator AI(MOI->getReg(), TRI, true); AI.isValid(); - ++AI) { - unsigned Reg = *AI; - const MDNode *Var = LiveUserVar[Reg]; - if (!Var) - continue; - // Reg is now clobbered. - LiveUserVar[Reg] = 0; - - // Was MD last defined by a DBG_VALUE referring to Reg? - DbgValueHistoryMap::iterator HistI = DbgValues.find(Var); - if (HistI == DbgValues.end()) - continue; - SmallVectorImpl<const MachineInstr *> &History = HistI->second; - if (History.empty()) - continue; - const MachineInstr *Prev = History.back(); - // Sanity-check: Register assignments are terminated at the end of - // their block. - if (!Prev->isDebugValue() || Prev->getParent() != MI->getParent()) - continue; - // Is the variable still in Reg? - if (!isDbgValueInDefinedReg(Prev) || - Prev->getOperand(0).getReg() != Reg) - continue; - // Var is clobbered. Make sure the next instruction gets a label. - History.push_back(MI); - } - } - } - } - } + // Calculate history for local variables. + calculateDbgValueHistory(MF, Asm->TM.getRegisterInfo(), DbgValues); - for (DbgValueHistoryMap::iterator I = DbgValues.begin(), E = DbgValues.end(); - I != E; ++I) { - SmallVectorImpl<const MachineInstr *> &History = I->second; - if (History.empty()) + // Request labels for the full history. + for (const auto &I : DbgValues) { + const auto &Ranges = I.second; + if (Ranges.empty()) continue; - // Make sure the final register assignments are terminated. - const MachineInstr *Prev = History.back(); - if (Prev->isDebugValue() && isDbgValueInDefinedReg(Prev)) { - const MachineBasicBlock *PrevMBB = Prev->getParent(); - MachineBasicBlock::const_iterator LastMI = - PrevMBB->getLastNonDebugInstr(); - if (LastMI == PrevMBB->end()) - // Drop DBG_VALUE for empty range. - History.pop_back(); - else if (PrevMBB != &PrevMBB->getParent()->back()) { - // Terminate after LastMI. - History.push_back(LastMI); - } - } - // Request labels for the full history. - for (unsigned i = 0, e = History.size(); i != e; ++i) { - const MachineInstr *MI = History[i]; - if (MI->isDebugValue()) - requestLabelBeforeInsn(MI); - else - requestLabelAfterInsn(MI); + // The first mention of a function argument gets the FunctionBeginSym + // label, so arguments are visible when breaking at function entry. + DIVariable DV(I.first); + if (DV.isVariable() && DV.getTag() == dwarf::DW_TAG_arg_variable && + getDISubprogram(DV.getContext()).describes(MF->getFunction())) + LabelsBeforeInsn[Ranges.front().first] = FunctionBeginSym; + + for (const auto &Range : Ranges) { + requestLabelBeforeInsn(Range.first); + if (Range.second) + requestLabelAfterInsn(Range.second); } } @@ -1794,9 +1471,10 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { PrevLabel = FunctionBeginSym; // Record beginning of function. + PrologEndLoc = findPrologueEndLoc(MF); if (!PrologEndLoc.isUnknown()) { DebugLoc FnStartDL = - getFnDebugLoc(PrologEndLoc, MF->getFunction()->getContext()); + PrologEndLoc.getFnDebugLoc(MF->getFunction()->getContext()); recordSourceLine( FnStartDL.getLine(), FnStartDL.getCol(), FnStartDL.getScope(MF->getFunction()->getContext()), @@ -1807,6 +1485,8 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { } void DwarfDebug::addScopeVariable(LexicalScope *LS, DbgVariable *Var) { + if (addCurrentFnArgument(Var, LS)) + return; SmallVectorImpl<DbgVariable *> &Vars = ScopeVariables[LS]; DIVariable DV = Var->getVariable(); // Variables with positive arg numbers are parameters. @@ -1839,66 +1519,76 @@ void DwarfDebug::addScopeVariable(LexicalScope *LS, DbgVariable *Var) { // Gather and emit post-function debug information. void DwarfDebug::endFunction(const MachineFunction *MF) { - if (!MMI->hasDebugInfo() || LScopes.empty()) return; + // Every beginFunction(MF) call should be followed by an endFunction(MF) call, + // though the beginFunction may not be called at all. + // We should handle both cases. + if (!CurFn) + CurFn = MF; + else + assert(CurFn == MF); + assert(CurFn != nullptr); + + if (!MMI->hasDebugInfo() || LScopes.empty()) { + // If we don't have a lexical scope for this function then there will + // be a hole in the range information. Keep note of this by setting the + // previously used section to nullptr. + PrevSection = nullptr; + PrevCU = nullptr; + CurFn = nullptr; + return; + } // Define end label for subprogram. - FunctionEndSym = Asm->GetTempSymbol("func_end", - Asm->getFunctionNumber()); + FunctionEndSym = Asm->GetTempSymbol("func_end", Asm->getFunctionNumber()); // Assumes in correct section after the entry point. Asm->OutStreamer.EmitLabel(FunctionEndSym); - // Set DwarfCompileUnitID in MCContext to default value. + + // Set DwarfDwarfCompileUnitID in MCContext to default value. Asm->OutStreamer.getContext().setDwarfCompileUnitID(0); SmallPtrSet<const MDNode *, 16> ProcessedVars; - collectVariableInfo(MF, ProcessedVars); + collectVariableInfo(ProcessedVars); LexicalScope *FnScope = LScopes.getCurrentFunctionScope(); - CompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode()); - assert(TheCU && "Unable to find compile unit!"); + DwarfCompileUnit &TheCU = *SPMap.lookup(FnScope->getScopeNode()); // Construct abstract scopes. - ArrayRef<LexicalScope *> AList = LScopes.getAbstractScopesList(); - for (unsigned i = 0, e = AList.size(); i != e; ++i) { - LexicalScope *AScope = AList[i]; + for (LexicalScope *AScope : LScopes.getAbstractScopesList()) { DISubprogram SP(AScope->getScopeNode()); - if (SP.isSubprogram()) { - // Collect info for variables that were optimized out. - DIArray Variables = SP.getVariables(); - for (unsigned i = 0, e = Variables.getNumElements(); i != e; ++i) { - DIVariable DV(Variables.getElement(i)); - if (!DV || !DV.isVariable() || !ProcessedVars.insert(DV)) - continue; - // Check that DbgVariable for DV wasn't created earlier, when - // findAbstractVariable() was called for inlined instance of DV. - LLVMContext &Ctx = DV->getContext(); - DIVariable CleanDV = cleanseInlinedVariable(DV, Ctx); - if (AbstractVariables.lookup(CleanDV)) - continue; - if (LexicalScope *Scope = LScopes.findAbstractScope(DV.getContext())) - addScopeVariable(Scope, new DbgVariable(DV, NULL, this)); - } + assert(SP.isSubprogram()); + // Collect info for variables that were optimized out. + DIArray Variables = SP.getVariables(); + for (unsigned i = 0, e = Variables.getNumElements(); i != e; ++i) { + DIVariable DV(Variables.getElement(i)); + assert(DV && DV.isVariable()); + if (!ProcessedVars.insert(DV)) + continue; + ensureAbstractVariableIsCreated(DV, DV.getContext()); } - if (ProcessedSPNodes.count(AScope->getScopeNode()) == 0) - constructScopeDIE(TheCU, AScope); + constructAbstractSubprogramScopeDIE(TheCU, AScope); } - DIE *CurFnDIE = constructScopeDIE(TheCU, FnScope); + DIE &CurFnDIE = constructSubprogramScopeDIE(TheCU, FnScope); + if (!CurFn->getTarget().Options.DisableFramePointerElim(*CurFn)) + TheCU.addFlag(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr); - if (!MF->getTarget().Options.DisableFramePointerElim(*MF)) - TheCU->addFlag(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr); + // Add the range of this function to the list of ranges for the CU. + RangeSpan Span(FunctionBeginSym, FunctionEndSym); + TheCU.addRange(std::move(Span)); + PrevSection = Asm->getCurrentSection(); + PrevCU = &TheCU; // Clear debug info - for (ScopeVariablesMap::iterator - I = ScopeVariables.begin(), E = ScopeVariables.end(); I != E; ++I) - DeleteContainerPointers(I->second); + // Ownership of DbgVariables is a bit subtle - ScopeVariables owns all the + // DbgVariables except those that are also in AbstractVariables (since they + // can be used cross-function) ScopeVariables.clear(); - DeleteContainerPointers(CurrentFnArguments); - UserVariables.clear(); + CurrentFnArguments.clear(); DbgValues.clear(); - AbstractVariables.clear(); LabelsBeforeInsn.clear(); LabelsAfterInsn.clear(); - PrevLabel = NULL; + PrevLabel = nullptr; + CurFn = nullptr; } // Register a source line with debug info. Returns the unique label that was @@ -1908,129 +1598,46 @@ void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S, StringRef Fn; StringRef Dir; unsigned Src = 1; - if (S) { - DIDescriptor Scope(S); - - if (Scope.isCompileUnit()) { - DICompileUnit CU(S); - Fn = CU.getFilename(); - Dir = CU.getDirectory(); - } else if (Scope.isFile()) { - DIFile F(S); - Fn = F.getFilename(); - Dir = F.getDirectory(); - } else if (Scope.isSubprogram()) { - DISubprogram SP(S); - Fn = SP.getFilename(); - Dir = SP.getDirectory(); - } else if (Scope.isLexicalBlockFile()) { - DILexicalBlockFile DBF(S); - Fn = DBF.getFilename(); - Dir = DBF.getDirectory(); - } else if (Scope.isLexicalBlock()) { - DILexicalBlock DB(S); - Fn = DB.getFilename(); - Dir = DB.getDirectory(); - } else - llvm_unreachable("Unexpected scope info"); + unsigned Discriminator = 0; + if (DIScope Scope = DIScope(S)) { + assert(Scope.isScope()); + Fn = Scope.getFilename(); + Dir = Scope.getDirectory(); + if (Scope.isLexicalBlock()) + Discriminator = DILexicalBlock(S).getDiscriminator(); - Src = getOrCreateSourceID(Fn, Dir, - Asm->OutStreamer.getContext().getDwarfCompileUnitID()); + unsigned CUID = Asm->OutStreamer.getContext().getDwarfCompileUnitID(); + Src = static_cast<DwarfCompileUnit &>(*InfoHolder.getUnits()[CUID]) + .getOrCreateSourceID(Fn, Dir); } - Asm->OutStreamer.EmitDwarfLocDirective(Src, Line, Col, Flags, 0, 0, Fn); + Asm->OutStreamer.EmitDwarfLocDirective(Src, Line, Col, Flags, 0, + Discriminator, Fn); } //===----------------------------------------------------------------------===// // Emit Methods //===----------------------------------------------------------------------===// -// Compute the size and offset of a DIE. The offset is relative to start of the -// CU. It returns the offset after laying out the DIE. -unsigned -DwarfUnits::computeSizeAndOffset(DIE *Die, unsigned Offset) { - // Get the children. - const std::vector<DIE *> &Children = Die->getChildren(); - - // Record the abbreviation. - assignAbbrevNumber(Die->getAbbrev()); - - // Get the abbreviation for this DIE. - unsigned AbbrevNumber = Die->getAbbrevNumber(); - const DIEAbbrev *Abbrev = Abbreviations[AbbrevNumber - 1]; - - // Set DIE offset - Die->setOffset(Offset); - - // Start the size with the size of abbreviation code. - Offset += MCAsmInfo::getULEB128Size(AbbrevNumber); - - const SmallVectorImpl<DIEValue*> &Values = Die->getValues(); - const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev->getData(); - - // Size the DIE attribute values. - for (unsigned i = 0, N = Values.size(); i < N; ++i) - // Size attribute value. - Offset += Values[i]->SizeOf(Asm, AbbrevData[i].getForm()); - - // Size the DIE children if any. - if (!Children.empty()) { - assert(Abbrev->getChildrenFlag() == dwarf::DW_CHILDREN_yes && - "Children flag not set"); - - for (unsigned j = 0, M = Children.size(); j < M; ++j) - Offset = computeSizeAndOffset(Children[j], Offset); - - // End of children marker. - Offset += sizeof(int8_t); - } - - Die->setSize(Offset - Die->getOffset()); - return Offset; -} - -// Compute the size and offset for each DIE. -void DwarfUnits::computeSizeAndOffsets() { - // Offset from the first CU in the debug info section is 0 initially. - unsigned SecOffset = 0; - - // Iterate over each compile unit and set the size and offsets for each - // DIE within each compile unit. All offsets are CU relative. - for (SmallVectorImpl<CompileUnit *>::iterator I = CUs.begin(), - E = CUs.end(); I != E; ++I) { - (*I)->setDebugInfoOffset(SecOffset); - - // CU-relative offset is reset to 0 here. - unsigned Offset = sizeof(int32_t) + // Length of Unit Info - (*I)->getHeaderSize(); // Unit-specific headers - - // EndOffset here is CU-relative, after laying out - // all of the CU DIE. - unsigned EndOffset = computeSizeAndOffset((*I)->getCUDie(), Offset); - SecOffset += EndOffset; - } -} - // Emit initial Dwarf sections with a label at the start of each one. void DwarfDebug::emitSectionLabels() { const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); // Dwarf sections base addresses. DwarfInfoSectionSym = - emitSectionSym(Asm, TLOF.getDwarfInfoSection(), "section_info"); + emitSectionSym(Asm, TLOF.getDwarfInfoSection(), "section_info"); + if (useSplitDwarf()) + DwarfInfoDWOSectionSym = + emitSectionSym(Asm, TLOF.getDwarfInfoDWOSection(), "section_info_dwo"); DwarfAbbrevSectionSym = - emitSectionSym(Asm, TLOF.getDwarfAbbrevSection(), "section_abbrev"); + emitSectionSym(Asm, TLOF.getDwarfAbbrevSection(), "section_abbrev"); if (useSplitDwarf()) - DwarfAbbrevDWOSectionSym = - emitSectionSym(Asm, TLOF.getDwarfAbbrevDWOSection(), - "section_abbrev_dwo"); - emitSectionSym(Asm, TLOF.getDwarfARangesSection()); - - if (const MCSection *MacroInfo = TLOF.getDwarfMacroInfoSection()) - emitSectionSym(Asm, MacroInfo); + DwarfAbbrevDWOSectionSym = emitSectionSym( + Asm, TLOF.getDwarfAbbrevDWOSection(), "section_abbrev_dwo"); + if (GenerateARangeSection) + emitSectionSym(Asm, TLOF.getDwarfARangesSection()); DwarfLineSectionSym = - emitSectionSym(Asm, TLOF.getDwarfLineSection(), "section_line"); - emitSectionSym(Asm, TLOF.getDwarfLocSection()); + emitSectionSym(Asm, TLOF.getDwarfLineSection(), "section_line"); if (GenerateGnuPubSections) { DwarfGnuPubNamesSectionSym = emitSectionSym(Asm, TLOF.getDwarfGnuPubNamesSection()); @@ -2042,39 +1649,36 @@ void DwarfDebug::emitSectionLabels() { } DwarfStrSectionSym = - emitSectionSym(Asm, TLOF.getDwarfStrSection(), "info_string"); + emitSectionSym(Asm, TLOF.getDwarfStrSection(), "info_string"); if (useSplitDwarf()) { DwarfStrDWOSectionSym = - emitSectionSym(Asm, TLOF.getDwarfStrDWOSection(), "skel_string"); + emitSectionSym(Asm, TLOF.getDwarfStrDWOSection(), "skel_string"); DwarfAddrSectionSym = - emitSectionSym(Asm, TLOF.getDwarfAddrSection(), "addr_sec"); - } - DwarfDebugRangeSectionSym = emitSectionSym(Asm, TLOF.getDwarfRangesSection(), - "debug_range"); - - DwarfDebugLocSectionSym = emitSectionSym(Asm, TLOF.getDwarfLocSection(), - "section_debug_loc"); - - TextSectionSym = emitSectionSym(Asm, TLOF.getTextSection(), "text_begin"); - emitSectionSym(Asm, TLOF.getDataSection()); + emitSectionSym(Asm, TLOF.getDwarfAddrSection(), "addr_sec"); + DwarfDebugLocSectionSym = + emitSectionSym(Asm, TLOF.getDwarfLocDWOSection(), "skel_loc"); + } else + DwarfDebugLocSectionSym = + emitSectionSym(Asm, TLOF.getDwarfLocSection(), "section_debug_loc"); + DwarfDebugRangeSectionSym = + emitSectionSym(Asm, TLOF.getDwarfRangesSection(), "debug_range"); } // Recursively emits a debug information entry. -void DwarfDebug::emitDIE(DIE *Die, ArrayRef<DIEAbbrev *> Abbrevs) { +void DwarfDebug::emitDIE(DIE &Die) { // Get the abbreviation for this DIE. - unsigned AbbrevNumber = Die->getAbbrevNumber(); - const DIEAbbrev *Abbrev = Abbrevs[AbbrevNumber - 1]; + const DIEAbbrev &Abbrev = Die.getAbbrev(); // Emit the code (index) for the abbreviation. if (Asm->isVerbose()) - Asm->OutStreamer.AddComment("Abbrev [" + Twine(AbbrevNumber) + "] 0x" + - Twine::utohexstr(Die->getOffset()) + ":0x" + - Twine::utohexstr(Die->getSize()) + " " + - dwarf::TagString(Abbrev->getTag())); - Asm->EmitULEB128(AbbrevNumber); + Asm->OutStreamer.AddComment("Abbrev [" + Twine(Abbrev.getNumber()) + + "] 0x" + Twine::utohexstr(Die.getOffset()) + + ":0x" + Twine::utohexstr(Die.getSize()) + " " + + dwarf::TagString(Abbrev.getTag())); + Asm->EmitULEB128(Abbrev.getNumber()); - const SmallVectorImpl<DIEValue*> &Values = Die->getValues(); - const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev->getData(); + const SmallVectorImpl<DIEValue *> &Values = Die.getValues(); + const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData(); // Emit the DIE attribute values. for (unsigned i = 0, N = Values.size(); i < N; ++i) { @@ -2082,172 +1686,39 @@ void DwarfDebug::emitDIE(DIE *Die, ArrayRef<DIEAbbrev *> Abbrevs) { dwarf::Form Form = AbbrevData[i].getForm(); assert(Form && "Too many attributes for DIE (check abbreviation)"); - if (Asm->isVerbose()) + if (Asm->isVerbose()) { Asm->OutStreamer.AddComment(dwarf::AttributeString(Attr)); - - switch (Attr) { - case dwarf::DW_AT_abstract_origin: - case dwarf::DW_AT_type: - case dwarf::DW_AT_friend: - case dwarf::DW_AT_specification: - case dwarf::DW_AT_import: - case dwarf::DW_AT_containing_type: { - DIEEntry *E = cast<DIEEntry>(Values[i]); - DIE *Origin = E->getEntry(); - unsigned Addr = Origin->getOffset(); - if (Form == dwarf::DW_FORM_ref_addr) { - assert(!useSplitDwarf() && "TODO: dwo files can't have relocations."); - // For DW_FORM_ref_addr, output the offset from beginning of debug info - // section. Origin->getOffset() returns the offset from start of the - // compile unit. - CompileUnit *CU = CUDieMap.lookup(Origin->getCompileUnit()); - assert(CU && "CUDie should belong to a CU."); - Addr += CU->getDebugInfoOffset(); - if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) - Asm->EmitLabelPlusOffset(DwarfInfoSectionSym, Addr, - DIEEntry::getRefAddrSize(Asm)); - else - Asm->EmitLabelOffsetDifference(DwarfInfoSectionSym, Addr, - DwarfInfoSectionSym, - DIEEntry::getRefAddrSize(Asm)); - } else { - // Make sure Origin belong to the same CU. - assert(Die->getCompileUnit() == Origin->getCompileUnit() && - "The referenced DIE should belong to the same CU in ref4"); - Asm->EmitInt32(Addr); - } - break; - } - case dwarf::DW_AT_ranges: { - // DW_AT_range Value encodes offset in debug_range section. - DIEInteger *V = cast<DIEInteger>(Values[i]); - - if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) { - Asm->EmitLabelPlusOffset(DwarfDebugRangeSectionSym, - V->getValue(), - 4); - } else { - Asm->EmitLabelOffsetDifference(DwarfDebugRangeSectionSym, - V->getValue(), - DwarfDebugRangeSectionSym, - 4); - } - break; - } - case dwarf::DW_AT_location: { - if (DIELabel *L = dyn_cast<DIELabel>(Values[i])) { - if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) - Asm->EmitSectionOffset(L->getValue(), DwarfDebugLocSectionSym); - else - Asm->EmitLabelDifference(L->getValue(), DwarfDebugLocSectionSym, 4); - } else { - Values[i]->EmitValue(Asm, Form); - } - break; - } - case dwarf::DW_AT_accessibility: { - if (Asm->isVerbose()) { - DIEInteger *V = cast<DIEInteger>(Values[i]); - Asm->OutStreamer.AddComment(dwarf::AccessibilityString(V->getValue())); - } - Values[i]->EmitValue(Asm, Form); - break; - } - default: - // Emit an attribute using the defined form. - Values[i]->EmitValue(Asm, Form); - break; + if (Attr == dwarf::DW_AT_accessibility) + Asm->OutStreamer.AddComment(dwarf::AccessibilityString( + cast<DIEInteger>(Values[i])->getValue())); } + + // Emit an attribute using the defined form. + Values[i]->EmitValue(Asm, Form); } // Emit the DIE children if any. - if (Abbrev->getChildrenFlag() == dwarf::DW_CHILDREN_yes) { - const std::vector<DIE *> &Children = Die->getChildren(); + if (Abbrev.hasChildren()) { + for (auto &Child : Die.getChildren()) + emitDIE(*Child); - for (unsigned j = 0, M = Children.size(); j < M; ++j) - emitDIE(Children[j], Abbrevs); - - if (Asm->isVerbose()) - Asm->OutStreamer.AddComment("End Of Children Mark"); + Asm->OutStreamer.AddComment("End Of Children Mark"); Asm->EmitInt8(0); } } -// Emit the various dwarf units to the unit section USection with -// the abbreviations going into ASection. -void DwarfUnits::emitUnits(DwarfDebug *DD, - const MCSection *USection, - const MCSection *ASection, - const MCSymbol *ASectionSym) { - Asm->OutStreamer.SwitchSection(USection); - for (SmallVectorImpl<CompileUnit *>::iterator I = CUs.begin(), - E = CUs.end(); I != E; ++I) { - CompileUnit *TheCU = *I; - DIE *Die = TheCU->getCUDie(); - - // Emit the compile units header. - Asm->OutStreamer - .EmitLabel(Asm->GetTempSymbol(USection->getLabelBeginName(), - TheCU->getUniqueID())); - - // Emit size of content not including length itself - Asm->OutStreamer.AddComment("Length of Unit"); - Asm->EmitInt32(TheCU->getHeaderSize() + Die->getSize()); - - TheCU->emitHeader(ASection, ASectionSym); - - DD->emitDIE(Die, Abbreviations); - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol(USection->getLabelEndName(), - TheCU->getUniqueID())); - } -} - // Emit the debug info section. void DwarfDebug::emitDebugInfo() { - DwarfUnits &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; + DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; - Holder.emitUnits(this, Asm->getObjFileLowering().getDwarfInfoSection(), - Asm->getObjFileLowering().getDwarfAbbrevSection(), - DwarfAbbrevSectionSym); + Holder.emitUnits(this, DwarfAbbrevSectionSym); } // Emit the abbreviation section. void DwarfDebug::emitAbbreviations() { - if (!useSplitDwarf()) - emitAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevSection(), - &Abbreviations); - else - emitSkeletonAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevSection()); -} - -void DwarfDebug::emitAbbrevs(const MCSection *Section, - std::vector<DIEAbbrev *> *Abbrevs) { - // Check to see if it is worth the effort. - if (!Abbrevs->empty()) { - // Start the debug abbrev section. - Asm->OutStreamer.SwitchSection(Section); - - MCSymbol *Begin = Asm->GetTempSymbol(Section->getLabelBeginName()); - Asm->OutStreamer.EmitLabel(Begin); - - // For each abbrevation. - for (unsigned i = 0, N = Abbrevs->size(); i < N; ++i) { - // Get abbreviation data - const DIEAbbrev *Abbrev = Abbrevs->at(i); - - // Emit the abbrevations code (base 1 index.) - Asm->EmitULEB128(Abbrev->getNumber(), "Abbreviation Code"); + DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; - // Emit the abbreviations data. - Abbrev->Emit(Asm); - } - - // Mark end of abbreviations. - Asm->EmitULEB128(0, "EOM(3)"); - - MCSymbol *End = Asm->GetTempSymbol(Section->getLabelEndName()); - Asm->OutStreamer.EmitLabel(End); - } + Holder.emitAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevSection()); } // Emit the last address of the section and the end of the line matrix. @@ -2263,8 +1734,9 @@ void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) { Asm->OutStreamer.AddComment("Section end label"); - Asm->OutStreamer.EmitSymbolValue(Asm->GetTempSymbol("section_end",SectionEnd), - Asm->getDataLayout().getPointerSize()); + Asm->OutStreamer.EmitSymbolValue( + Asm->GetTempSymbol("section_end", SectionEnd), + Asm->getDataLayout().getPointerSize()); // Mark end of matrix. Asm->OutStreamer.AddComment("DW_LNE_end_sequence"); @@ -2275,122 +1747,52 @@ void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) { // Emit visible names into a hashed accelerator table section. void DwarfDebug::emitAccelNames() { - DwarfAccelTable AT(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, - dwarf::DW_FORM_data4)); - for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(), - E = CUMap.end(); I != E; ++I) { - CompileUnit *TheCU = I->second; - const StringMap<std::vector<DIE*> > &Names = TheCU->getAccelNames(); - for (StringMap<std::vector<DIE*> >::const_iterator - GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) { - StringRef Name = GI->getKey(); - const std::vector<DIE *> &Entities = GI->second; - for (std::vector<DIE *>::const_iterator DI = Entities.begin(), - DE = Entities.end(); DI != DE; ++DI) - AT.AddName(Name, (*DI)); - } - } - - AT.FinalizeTable(Asm, "Names"); + AccelNames.FinalizeTable(Asm, "Names"); Asm->OutStreamer.SwitchSection( - Asm->getObjFileLowering().getDwarfAccelNamesSection()); + Asm->getObjFileLowering().getDwarfAccelNamesSection()); MCSymbol *SectionBegin = Asm->GetTempSymbol("names_begin"); Asm->OutStreamer.EmitLabel(SectionBegin); // Emit the full data. - AT.Emit(Asm, SectionBegin, &InfoHolder); + AccelNames.Emit(Asm, SectionBegin, &InfoHolder); } // Emit objective C classes and categories into a hashed accelerator table // section. void DwarfDebug::emitAccelObjC() { - DwarfAccelTable AT(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, - dwarf::DW_FORM_data4)); - for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(), - E = CUMap.end(); I != E; ++I) { - CompileUnit *TheCU = I->second; - const StringMap<std::vector<DIE*> > &Names = TheCU->getAccelObjC(); - for (StringMap<std::vector<DIE*> >::const_iterator - GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) { - StringRef Name = GI->getKey(); - const std::vector<DIE *> &Entities = GI->second; - for (std::vector<DIE *>::const_iterator DI = Entities.begin(), - DE = Entities.end(); DI != DE; ++DI) - AT.AddName(Name, (*DI)); - } - } - - AT.FinalizeTable(Asm, "ObjC"); - Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering() - .getDwarfAccelObjCSection()); + AccelObjC.FinalizeTable(Asm, "ObjC"); + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfAccelObjCSection()); MCSymbol *SectionBegin = Asm->GetTempSymbol("objc_begin"); Asm->OutStreamer.EmitLabel(SectionBegin); // Emit the full data. - AT.Emit(Asm, SectionBegin, &InfoHolder); + AccelObjC.Emit(Asm, SectionBegin, &InfoHolder); } // Emit namespace dies into a hashed accelerator table. void DwarfDebug::emitAccelNamespaces() { - DwarfAccelTable AT(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, - dwarf::DW_FORM_data4)); - for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(), - E = CUMap.end(); I != E; ++I) { - CompileUnit *TheCU = I->second; - const StringMap<std::vector<DIE*> > &Names = TheCU->getAccelNamespace(); - for (StringMap<std::vector<DIE*> >::const_iterator - GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) { - StringRef Name = GI->getKey(); - const std::vector<DIE *> &Entities = GI->second; - for (std::vector<DIE *>::const_iterator DI = Entities.begin(), - DE = Entities.end(); DI != DE; ++DI) - AT.AddName(Name, (*DI)); - } - } - - AT.FinalizeTable(Asm, "namespac"); - Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering() - .getDwarfAccelNamespaceSection()); + AccelNamespace.FinalizeTable(Asm, "namespac"); + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfAccelNamespaceSection()); MCSymbol *SectionBegin = Asm->GetTempSymbol("namespac_begin"); Asm->OutStreamer.EmitLabel(SectionBegin); // Emit the full data. - AT.Emit(Asm, SectionBegin, &InfoHolder); + AccelNamespace.Emit(Asm, SectionBegin, &InfoHolder); } // Emit type dies into a hashed accelerator table. void DwarfDebug::emitAccelTypes() { - std::vector<DwarfAccelTable::Atom> Atoms; - Atoms.push_back(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, - dwarf::DW_FORM_data4)); - Atoms.push_back(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_tag, - dwarf::DW_FORM_data2)); - Atoms.push_back(DwarfAccelTable::Atom(dwarf::DW_ATOM_type_flags, - dwarf::DW_FORM_data1)); - DwarfAccelTable AT(Atoms); - for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(), - E = CUMap.end(); I != E; ++I) { - CompileUnit *TheCU = I->second; - const StringMap<std::vector<std::pair<DIE*, unsigned > > > &Names - = TheCU->getAccelTypes(); - for (StringMap<std::vector<std::pair<DIE*, unsigned> > >::const_iterator - GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) { - StringRef Name = GI->getKey(); - const std::vector<std::pair<DIE *, unsigned> > &Entities = GI->second; - for (std::vector<std::pair<DIE *, unsigned> >::const_iterator DI - = Entities.begin(), DE = Entities.end(); DI !=DE; ++DI) - AT.AddName(Name, (*DI).first, (*DI).second); - } - } - AT.FinalizeTable(Asm, "types"); - Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering() - .getDwarfAccelTypesSection()); + AccelTypes.FinalizeTable(Asm, "types"); + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfAccelTypesSection()); MCSymbol *SectionBegin = Asm->GetTempSymbol("types_begin"); Asm->OutStreamer.EmitLabel(SectionBegin); // Emit the full data. - AT.Emit(Asm, SectionBegin, &InfoHolder); + AccelTypes.Emit(Asm, SectionBegin, &InfoHolder); } // Public name handling. @@ -2408,16 +1810,16 @@ void DwarfDebug::emitAccelTypes() { // reference in the pubname header doesn't change. /// computeIndexValue - Compute the gdb index value for the DIE and CU. -static dwarf::PubIndexEntryDescriptor computeIndexValue(CompileUnit *CU, - DIE *Die) { +static dwarf::PubIndexEntryDescriptor computeIndexValue(DwarfUnit *CU, + const DIE *Die) { dwarf::GDBIndexEntryLinkage Linkage = dwarf::GIEL_STATIC; // We could have a specification DIE that has our most of our knowledge, // look for that now. DIEValue *SpecVal = Die->findAttribute(dwarf::DW_AT_specification); if (SpecVal) { - DIE *SpecDIE = cast<DIEEntry>(SpecVal)->getEntry(); - if (SpecDIE->findAttribute(dwarf::DW_AT_external)) + DIE &SpecDIE = cast<DIEEntry>(SpecVal)->getEntry(); + if (SpecDIE.findAttribute(dwarf::DW_AT_external)) Linkage = dwarf::GIEL_EXTERNAL; } else if (Die->findAttribute(dwarf::DW_AT_external)) Linkage = dwarf::GIEL_EXTERNAL; @@ -2453,342 +1855,212 @@ static dwarf::PubIndexEntryDescriptor computeIndexValue(CompileUnit *CU, /// emitDebugPubNames - Emit visible names into a debug pubnames section. /// void DwarfDebug::emitDebugPubNames(bool GnuStyle) { - const MCSection *ISec = Asm->getObjFileLowering().getDwarfInfoSection(); const MCSection *PSec = GnuStyle ? Asm->getObjFileLowering().getDwarfGnuPubNamesSection() : Asm->getObjFileLowering().getDwarfPubNamesSection(); - typedef DenseMap<const MDNode*, CompileUnit*> CUMapType; - for (CUMapType::iterator I = CUMap.begin(), E = CUMap.end(); I != E; ++I) { - CompileUnit *TheCU = I->second; - unsigned ID = TheCU->getUniqueID(); + emitDebugPubSection(GnuStyle, PSec, "Names", &DwarfUnit::getGlobalNames); +} + +void DwarfDebug::emitDebugPubSection( + bool GnuStyle, const MCSection *PSec, StringRef Name, + const StringMap<const DIE *> &(DwarfUnit::*Accessor)() const) { + for (const auto &NU : CUMap) { + DwarfCompileUnit *TheU = NU.second; + + const auto &Globals = (TheU->*Accessor)(); + + if (Globals.empty()) + continue; + + if (auto Skeleton = static_cast<DwarfCompileUnit *>(TheU->getSkeleton())) + TheU = Skeleton; + unsigned ID = TheU->getUniqueID(); // Start the dwarf pubnames section. Asm->OutStreamer.SwitchSection(PSec); - // Emit a label so we can reference the beginning of this pubname section. - if (GnuStyle) - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("gnu_pubnames", - TheCU->getUniqueID())); - // Emit the header. - Asm->OutStreamer.AddComment("Length of Public Names Info"); - Asm->EmitLabelDifference(Asm->GetTempSymbol("pubnames_end", ID), - Asm->GetTempSymbol("pubnames_begin", ID), 4); + Asm->OutStreamer.AddComment("Length of Public " + Name + " Info"); + MCSymbol *BeginLabel = Asm->GetTempSymbol("pub" + Name + "_begin", ID); + MCSymbol *EndLabel = Asm->GetTempSymbol("pub" + Name + "_end", ID); + Asm->EmitLabelDifference(EndLabel, BeginLabel, 4); - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_begin", ID)); + Asm->OutStreamer.EmitLabel(BeginLabel); Asm->OutStreamer.AddComment("DWARF Version"); Asm->EmitInt16(dwarf::DW_PUBNAMES_VERSION); Asm->OutStreamer.AddComment("Offset of Compilation Unit Info"); - Asm->EmitSectionOffset(Asm->GetTempSymbol(ISec->getLabelBeginName(), ID), - DwarfInfoSectionSym); + Asm->EmitSectionOffset(TheU->getLabelBegin(), TheU->getSectionSym()); Asm->OutStreamer.AddComment("Compilation Unit Length"); - Asm->EmitLabelDifference(Asm->GetTempSymbol(ISec->getLabelEndName(), ID), - Asm->GetTempSymbol(ISec->getLabelBeginName(), ID), - 4); + Asm->EmitLabelDifference(TheU->getLabelEnd(), TheU->getLabelBegin(), 4); // Emit the pubnames for this compilation unit. - const StringMap<DIE*> &Globals = TheCU->getGlobalNames(); - for (StringMap<DIE*>::const_iterator - GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) { - const char *Name = GI->getKeyData(); - DIE *Entity = GI->second; + for (const auto &GI : Globals) { + const char *Name = GI.getKeyData(); + const DIE *Entity = GI.second; Asm->OutStreamer.AddComment("DIE offset"); Asm->EmitInt32(Entity->getOffset()); if (GnuStyle) { - dwarf::PubIndexEntryDescriptor Desc = computeIndexValue(TheCU, Entity); + dwarf::PubIndexEntryDescriptor Desc = computeIndexValue(TheU, Entity); Asm->OutStreamer.AddComment( Twine("Kind: ") + dwarf::GDBIndexEntryKindString(Desc.Kind) + ", " + dwarf::GDBIndexEntryLinkageString(Desc.Linkage)); Asm->EmitInt8(Desc.toBits()); } - if (Asm->isVerbose()) - Asm->OutStreamer.AddComment("External Name"); - Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength()+1)); + Asm->OutStreamer.AddComment("External Name"); + Asm->OutStreamer.EmitBytes(StringRef(Name, GI.getKeyLength() + 1)); } Asm->OutStreamer.AddComment("End Mark"); Asm->EmitInt32(0); - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_end", ID)); + Asm->OutStreamer.EmitLabel(EndLabel); } } void DwarfDebug::emitDebugPubTypes(bool GnuStyle) { - const MCSection *ISec = Asm->getObjFileLowering().getDwarfInfoSection(); const MCSection *PSec = GnuStyle ? Asm->getObjFileLowering().getDwarfGnuPubTypesSection() : Asm->getObjFileLowering().getDwarfPubTypesSection(); - for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(), - E = CUMap.end(); - I != E; ++I) { - CompileUnit *TheCU = I->second; - // Start the dwarf pubtypes section. - Asm->OutStreamer.SwitchSection(PSec); - - // Emit a label so we can reference the beginning of this pubtype section. - if (GnuStyle) - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("gnu_pubtypes", - TheCU->getUniqueID())); - - // Emit the header. - Asm->OutStreamer.AddComment("Length of Public Types Info"); - Asm->EmitLabelDifference( - Asm->GetTempSymbol("pubtypes_end", TheCU->getUniqueID()), - Asm->GetTempSymbol("pubtypes_begin", TheCU->getUniqueID()), 4); - - Asm->OutStreamer.EmitLabel( - Asm->GetTempSymbol("pubtypes_begin", TheCU->getUniqueID())); - - if (Asm->isVerbose()) - Asm->OutStreamer.AddComment("DWARF Version"); - Asm->EmitInt16(dwarf::DW_PUBTYPES_VERSION); - - Asm->OutStreamer.AddComment("Offset of Compilation Unit Info"); - Asm->EmitSectionOffset( - Asm->GetTempSymbol(ISec->getLabelBeginName(), TheCU->getUniqueID()), - DwarfInfoSectionSym); - - Asm->OutStreamer.AddComment("Compilation Unit Length"); - Asm->EmitLabelDifference( - Asm->GetTempSymbol(ISec->getLabelEndName(), TheCU->getUniqueID()), - Asm->GetTempSymbol(ISec->getLabelBeginName(), TheCU->getUniqueID()), 4); - - // Emit the pubtypes. - const StringMap<DIE *> &Globals = TheCU->getGlobalTypes(); - for (StringMap<DIE *>::const_iterator GI = Globals.begin(), - GE = Globals.end(); - GI != GE; ++GI) { - const char *Name = GI->getKeyData(); - DIE *Entity = GI->second; - - if (Asm->isVerbose()) - Asm->OutStreamer.AddComment("DIE offset"); - Asm->EmitInt32(Entity->getOffset()); - - if (GnuStyle) { - dwarf::PubIndexEntryDescriptor Desc = computeIndexValue(TheCU, Entity); - Asm->OutStreamer.AddComment( - Twine("Kind: ") + dwarf::GDBIndexEntryKindString(Desc.Kind) + ", " + - dwarf::GDBIndexEntryLinkageString(Desc.Linkage)); - Asm->EmitInt8(Desc.toBits()); - } - - if (Asm->isVerbose()) - Asm->OutStreamer.AddComment("External Name"); - - // Emit the name with a terminating null byte. - Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength() + 1)); - } - - Asm->OutStreamer.AddComment("End Mark"); - Asm->EmitInt32(0); - Asm->OutStreamer.EmitLabel( - Asm->GetTempSymbol("pubtypes_end", TheCU->getUniqueID())); - } + emitDebugPubSection(GnuStyle, PSec, "Types", &DwarfUnit::getGlobalTypes); } -// Emit strings into a string section. -void DwarfUnits::emitStrings(const MCSection *StrSection, - const MCSection *OffsetSection = NULL, - const MCSymbol *StrSecSym = NULL) { - - if (StringPool.empty()) return; - - // Start the dwarf str section. - Asm->OutStreamer.SwitchSection(StrSection); - - // Get all of the string pool entries and put them in an array by their ID so - // we can sort them. - SmallVector<std::pair<unsigned, - StringMapEntry<std::pair<MCSymbol*, unsigned> >*>, 64> Entries; - - for (StringMap<std::pair<MCSymbol*, unsigned> >::iterator - I = StringPool.begin(), E = StringPool.end(); - I != E; ++I) - Entries.push_back(std::make_pair(I->second.second, &*I)); - - array_pod_sort(Entries.begin(), Entries.end()); - - for (unsigned i = 0, e = Entries.size(); i != e; ++i) { - // Emit a label for reference from debug information entries. - Asm->OutStreamer.EmitLabel(Entries[i].second->getValue().first); - - // Emit the string itself with a terminating null byte. - Asm->OutStreamer.EmitBytes(StringRef(Entries[i].second->getKeyData(), - Entries[i].second->getKeyLength()+1)); - } - - // If we've got an offset section go ahead and emit that now as well. - if (OffsetSection) { - Asm->OutStreamer.SwitchSection(OffsetSection); - unsigned offset = 0; - unsigned size = 4; // FIXME: DWARF64 is 8. - for (unsigned i = 0, e = Entries.size(); i != e; ++i) { - Asm->OutStreamer.EmitIntValue(offset, size); - offset += Entries[i].second->getKeyLength() + 1; - } - } +// Emit visible names into a debug str section. +void DwarfDebug::emitDebugStr() { + DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; + Holder.emitStrings(Asm->getObjFileLowering().getDwarfStrSection()); } -// Emit strings into a string section. -void DwarfUnits::emitAddresses(const MCSection *AddrSection) { - - if (AddressPool.empty()) return; - - // Start the dwarf addr section. - Asm->OutStreamer.SwitchSection(AddrSection); - - // Order the address pool entries by ID - SmallVector<const MCExpr *, 64> Entries(AddressPool.size()); - - for (DenseMap<const MCExpr *, unsigned>::iterator I = AddressPool.begin(), - E = AddressPool.end(); - I != E; ++I) - Entries[I->second] = I->first; +void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer, + const DebugLocEntry &Entry) { + assert(Entry.getValues().size() == 1 && + "multi-value entries are not supported yet."); + const DebugLocEntry::Value Value = Entry.getValues()[0]; + DIVariable DV(Value.getVariable()); + if (Value.isInt()) { + DIBasicType BTy(resolve(DV.getType())); + if (BTy.Verify() && (BTy.getEncoding() == dwarf::DW_ATE_signed || + BTy.getEncoding() == dwarf::DW_ATE_signed_char)) { + Streamer.EmitInt8(dwarf::DW_OP_consts, "DW_OP_consts"); + Streamer.EmitSLEB128(Value.getInt()); + } else { + Streamer.EmitInt8(dwarf::DW_OP_constu, "DW_OP_constu"); + Streamer.EmitULEB128(Value.getInt()); + } + } else if (Value.isLocation()) { + MachineLocation Loc = Value.getLoc(); + if (!DV.hasComplexAddress()) + // Regular entry. + Asm->EmitDwarfRegOp(Streamer, Loc, DV.isIndirect()); + else { + // Complex address entry. + unsigned N = DV.getNumAddrElements(); + unsigned i = 0; + if (N >= 2 && DV.getAddrElement(0) == DIBuilder::OpPlus) { + if (Loc.getOffset()) { + i = 2; + Asm->EmitDwarfRegOp(Streamer, Loc, DV.isIndirect()); + Streamer.EmitInt8(dwarf::DW_OP_deref, "DW_OP_deref"); + Streamer.EmitInt8(dwarf::DW_OP_plus_uconst, "DW_OP_plus_uconst"); + Streamer.EmitSLEB128(DV.getAddrElement(1)); + } else { + // If first address element is OpPlus then emit + // DW_OP_breg + Offset instead of DW_OP_reg + Offset. + MachineLocation TLoc(Loc.getReg(), DV.getAddrElement(1)); + Asm->EmitDwarfRegOp(Streamer, TLoc, DV.isIndirect()); + i = 2; + } + } else { + Asm->EmitDwarfRegOp(Streamer, Loc, DV.isIndirect()); + } - for (unsigned i = 0, e = Entries.size(); i != e; ++i) { - // Emit an expression for reference from debug information entries. - if (const MCExpr *Expr = Entries[i]) - Asm->OutStreamer.EmitValue(Expr, Asm->getDataLayout().getPointerSize()); - else - Asm->OutStreamer.EmitIntValue(0, Asm->getDataLayout().getPointerSize()); + // Emit remaining complex address elements. + for (; i < N; ++i) { + uint64_t Element = DV.getAddrElement(i); + if (Element == DIBuilder::OpPlus) { + Streamer.EmitInt8(dwarf::DW_OP_plus_uconst, "DW_OP_plus_uconst"); + Streamer.EmitULEB128(DV.getAddrElement(++i)); + } else if (Element == DIBuilder::OpDeref) { + if (!Loc.isReg()) + Streamer.EmitInt8(dwarf::DW_OP_deref, "DW_OP_deref"); + } else + llvm_unreachable("unknown Opcode found in complex address"); + } + } } - + // else ... ignore constant fp. There is not any good way to + // to represent them here in dwarf. + // FIXME: ^ } -// Emit visible names into a debug str section. -void DwarfDebug::emitDebugStr() { - DwarfUnits &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; - Holder.emitStrings(Asm->getObjFileLowering().getDwarfStrSection()); +void DwarfDebug::emitDebugLocEntryLocation(const DebugLocEntry &Entry) { + Asm->OutStreamer.AddComment("Loc expr size"); + MCSymbol *begin = Asm->OutStreamer.getContext().CreateTempSymbol(); + MCSymbol *end = Asm->OutStreamer.getContext().CreateTempSymbol(); + Asm->EmitLabelDifference(end, begin, 2); + Asm->OutStreamer.EmitLabel(begin); + // Emit the entry. + APByteStreamer Streamer(*Asm); + emitDebugLocEntry(Streamer, Entry); + // Close the range. + Asm->OutStreamer.EmitLabel(end); } // Emit locations into the debug loc section. void DwarfDebug::emitDebugLoc() { - if (DotDebugLocEntries.empty()) - return; - - for (SmallVectorImpl<DotDebugLocEntry>::iterator - I = DotDebugLocEntries.begin(), E = DotDebugLocEntries.end(); - I != E; ++I) { - DotDebugLocEntry &Entry = *I; - if (I + 1 != DotDebugLocEntries.end()) - Entry.Merge(I+1); - } - // Start the dwarf loc section. Asm->OutStreamer.SwitchSection( - Asm->getObjFileLowering().getDwarfLocSection()); + Asm->getObjFileLowering().getDwarfLocSection()); unsigned char Size = Asm->getDataLayout().getPointerSize(); - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_loc", 0)); - unsigned index = 1; - for (SmallVectorImpl<DotDebugLocEntry>::iterator - I = DotDebugLocEntries.begin(), E = DotDebugLocEntries.end(); - I != E; ++I, ++index) { - DotDebugLocEntry &Entry = *I; - if (Entry.isMerged()) continue; - if (Entry.isEmpty()) { - Asm->OutStreamer.EmitIntValue(0, Size); - Asm->OutStreamer.EmitIntValue(0, Size); - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_loc", index)); - } else { - Asm->OutStreamer.EmitSymbolValue(Entry.getBeginSym(), Size); - Asm->OutStreamer.EmitSymbolValue(Entry.getEndSym(), Size); - DIVariable DV(Entry.getVariable()); - Asm->OutStreamer.AddComment("Loc expr size"); - MCSymbol *begin = Asm->OutStreamer.getContext().CreateTempSymbol(); - MCSymbol *end = Asm->OutStreamer.getContext().CreateTempSymbol(); - Asm->EmitLabelDifference(end, begin, 2); - Asm->OutStreamer.EmitLabel(begin); - if (Entry.isInt()) { - DIBasicType BTy(DV.getType()); - if (BTy.Verify() && - (BTy.getEncoding() == dwarf::DW_ATE_signed - || BTy.getEncoding() == dwarf::DW_ATE_signed_char)) { - Asm->OutStreamer.AddComment("DW_OP_consts"); - Asm->EmitInt8(dwarf::DW_OP_consts); - Asm->EmitSLEB128(Entry.getInt()); - } else { - Asm->OutStreamer.AddComment("DW_OP_constu"); - Asm->EmitInt8(dwarf::DW_OP_constu); - Asm->EmitULEB128(Entry.getInt()); - } - } else if (Entry.isLocation()) { - MachineLocation Loc = Entry.getLoc(); - if (!DV.hasComplexAddress()) - // Regular entry. - Asm->EmitDwarfRegOp(Loc, DV.isIndirect()); - else { - // Complex address entry. - unsigned N = DV.getNumAddrElements(); - unsigned i = 0; - if (N >= 2 && DV.getAddrElement(0) == DIBuilder::OpPlus) { - if (Loc.getOffset()) { - i = 2; - Asm->EmitDwarfRegOp(Loc, DV.isIndirect()); - Asm->OutStreamer.AddComment("DW_OP_deref"); - Asm->EmitInt8(dwarf::DW_OP_deref); - Asm->OutStreamer.AddComment("DW_OP_plus_uconst"); - Asm->EmitInt8(dwarf::DW_OP_plus_uconst); - Asm->EmitSLEB128(DV.getAddrElement(1)); - } else { - // If first address element is OpPlus then emit - // DW_OP_breg + Offset instead of DW_OP_reg + Offset. - MachineLocation TLoc(Loc.getReg(), DV.getAddrElement(1)); - Asm->EmitDwarfRegOp(TLoc, DV.isIndirect()); - i = 2; - } - } else { - Asm->EmitDwarfRegOp(Loc, DV.isIndirect()); - } - - // Emit remaining complex address elements. - for (; i < N; ++i) { - uint64_t Element = DV.getAddrElement(i); - if (Element == DIBuilder::OpPlus) { - Asm->EmitInt8(dwarf::DW_OP_plus_uconst); - Asm->EmitULEB128(DV.getAddrElement(++i)); - } else if (Element == DIBuilder::OpDeref) { - if (!Loc.isReg()) - Asm->EmitInt8(dwarf::DW_OP_deref); - } else - llvm_unreachable("unknown Opcode found in complex address"); - } - } + for (const auto &DebugLoc : DotDebugLocEntries) { + Asm->OutStreamer.EmitLabel(DebugLoc.Label); + for (const auto &Entry : DebugLoc.List) { + // Set up the range. This range is relative to the entry point of the + // compile unit. This is a hard coded 0 for low_pc when we're emitting + // ranges, or the DW_AT_low_pc on the compile unit otherwise. + const DwarfCompileUnit *CU = Entry.getCU(); + if (CU->getRanges().size() == 1) { + // Grab the begin symbol from the first range as our base. + const MCSymbol *Base = CU->getRanges()[0].getStart(); + Asm->EmitLabelDifference(Entry.getBeginSym(), Base, Size); + Asm->EmitLabelDifference(Entry.getEndSym(), Base, Size); + } else { + Asm->OutStreamer.EmitSymbolValue(Entry.getBeginSym(), Size); + Asm->OutStreamer.EmitSymbolValue(Entry.getEndSym(), Size); } - // else ... ignore constant fp. There is not any good way to - // to represent them here in dwarf. - Asm->OutStreamer.EmitLabel(end); + + emitDebugLocEntryLocation(Entry); } + Asm->OutStreamer.EmitIntValue(0, Size); + Asm->OutStreamer.EmitIntValue(0, Size); } } -struct SymbolCUSorter { - SymbolCUSorter(const MCStreamer &s) : Streamer(s) {} - const MCStreamer &Streamer; - - bool operator() (const SymbolCU &A, const SymbolCU &B) { - unsigned IA = A.Sym ? Streamer.GetSymbolOrder(A.Sym) : 0; - unsigned IB = B.Sym ? Streamer.GetSymbolOrder(B.Sym) : 0; - - // Symbols with no order assigned should be placed at the end. - // (e.g. section end labels) - if (IA == 0) - IA = (unsigned)(-1); - if (IB == 0) - IB = (unsigned)(-1); - return IA < IB; +void DwarfDebug::emitDebugLocDWO() { + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfLocDWOSection()); + for (const auto &DebugLoc : DotDebugLocEntries) { + Asm->OutStreamer.EmitLabel(DebugLoc.Label); + for (const auto &Entry : DebugLoc.List) { + // Just always use start_length for now - at least that's one address + // rather than two. We could get fancier and try to, say, reuse an + // address we know we've emitted elsewhere (the start of the function? + // The start of the CU or CU subrange that encloses this range?) + Asm->EmitInt8(dwarf::DW_LLE_start_length_entry); + unsigned idx = AddrPool.getIndex(Entry.getBeginSym()); + Asm->EmitULEB128(idx); + Asm->EmitLabelDifference(Entry.getEndSym(), Entry.getBeginSym(), 4); + + emitDebugLocEntryLocation(Entry); + } + Asm->EmitInt8(dwarf::DW_LLE_end_of_list_entry); } -}; - -static bool CUSort(const CompileUnit *A, const CompileUnit *B) { - return (A->getUniqueID() < B->getUniqueID()); } struct ArangeSpan { @@ -2799,18 +2071,17 @@ struct ArangeSpan { // address we can tie back to a CU. void DwarfDebug::emitDebugARanges() { // Start the dwarf aranges section. - Asm->OutStreamer - .SwitchSection(Asm->getObjFileLowering().getDwarfARangesSection()); + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfARangesSection()); - typedef DenseMap<CompileUnit *, std::vector<ArangeSpan> > SpansType; + typedef DenseMap<DwarfCompileUnit *, std::vector<ArangeSpan>> SpansType; SpansType Spans; // Build a list of sections used. std::vector<const MCSection *> Sections; - for (SectionMapType::iterator it = SectionMap.begin(); it != SectionMap.end(); - it++) { - const MCSection *Section = it->first; + for (const auto &it : SectionMap) { + const MCSection *Section = it.first; Sections.push_back(Section); } @@ -2819,32 +2090,40 @@ void DwarfDebug::emitDebugARanges() { std::sort(Sections.begin(), Sections.end(), SectionSort); // Build a set of address spans, sorted by CU. - for (size_t SecIdx=0;SecIdx<Sections.size();SecIdx++) { - const MCSection *Section = Sections[SecIdx]; + for (const MCSection *Section : Sections) { SmallVector<SymbolCU, 8> &List = SectionMap[Section]; if (List.size() < 2) continue; // Sort the symbols by offset within the section. - SymbolCUSorter sorter(Asm->OutStreamer); - std::sort(List.begin(), List.end(), sorter); + std::sort(List.begin(), List.end(), + [&](const SymbolCU &A, const SymbolCU &B) { + unsigned IA = A.Sym ? Asm->OutStreamer.GetSymbolOrder(A.Sym) : 0; + unsigned IB = B.Sym ? Asm->OutStreamer.GetSymbolOrder(B.Sym) : 0; + + // Symbols with no order assigned should be placed at the end. + // (e.g. section end labels) + if (IA == 0) + return false; + if (IB == 0) + return true; + return IA < IB; + }); // If we have no section (e.g. common), just write out // individual spans for each symbol. - if (Section == NULL) { - for (size_t n = 0; n < List.size(); n++) { - const SymbolCU &Cur = List[n]; - + if (!Section) { + for (const SymbolCU &Cur : List) { ArangeSpan Span; Span.Start = Cur.Sym; - Span.End = NULL; + Span.End = nullptr; if (Cur.CU) Spans[Cur.CU].push_back(Span); } } else { // Build spans between each label. const MCSymbol *StartSym = List[0].Sym; - for (size_t n = 1; n < List.size(); n++) { + for (size_t n = 1, e = List.size(); n < e; n++) { const SymbolCU &Prev = List[n - 1]; const SymbolCU &Cur = List[n]; @@ -2860,37 +2139,36 @@ void DwarfDebug::emitDebugARanges() { } } - const MCSection *ISec = Asm->getObjFileLowering().getDwarfInfoSection(); unsigned PtrSize = Asm->getDataLayout().getPointerSize(); // Build a list of CUs used. - std::vector<CompileUnit *> CUs; - for (SpansType::iterator it = Spans.begin(); it != Spans.end(); it++) { - CompileUnit *CU = it->first; + std::vector<DwarfCompileUnit *> CUs; + for (const auto &it : Spans) { + DwarfCompileUnit *CU = it.first; CUs.push_back(CU); } // Sort the CU list (again, to ensure consistent output order). - std::sort(CUs.begin(), CUs.end(), CUSort); + std::sort(CUs.begin(), CUs.end(), [](const DwarfUnit *A, const DwarfUnit *B) { + return A->getUniqueID() < B->getUniqueID(); + }); // Emit an arange table for each CU we used. - for (size_t CUIdx=0;CUIdx<CUs.size();CUIdx++) { - CompileUnit *CU = CUs[CUIdx]; + for (DwarfCompileUnit *CU : CUs) { std::vector<ArangeSpan> &List = Spans[CU]; // Emit size of content not including length itself. - unsigned ContentSize - = sizeof(int16_t) // DWARF ARange version number - + sizeof(int32_t) // Offset of CU in the .debug_info section - + sizeof(int8_t) // Pointer Size (in bytes) - + sizeof(int8_t); // Segment Size (in bytes) + unsigned ContentSize = + sizeof(int16_t) + // DWARF ARange version number + sizeof(int32_t) + // Offset of CU in the .debug_info section + sizeof(int8_t) + // Pointer Size (in bytes) + sizeof(int8_t); // Segment Size (in bytes) unsigned TupleSize = PtrSize * 2; // 7.20 in the Dwarf specs requires the table to be aligned to a tuple. - unsigned Padding = 0; - while (((sizeof(int32_t) + ContentSize + Padding) % TupleSize) != 0) - Padding++; + unsigned Padding = + OffsetToAlignment(sizeof(int32_t) + ContentSize, TupleSize); ContentSize += Padding; ContentSize += (List.size() + 1) * TupleSize; @@ -2901,19 +2179,15 @@ void DwarfDebug::emitDebugARanges() { Asm->OutStreamer.AddComment("DWARF Arange version number"); Asm->EmitInt16(dwarf::DW_ARANGES_VERSION); Asm->OutStreamer.AddComment("Offset Into Debug Info Section"); - Asm->EmitSectionOffset( - Asm->GetTempSymbol(ISec->getLabelBeginName(), CU->getUniqueID()), - DwarfInfoSectionSym); + Asm->EmitSectionOffset(CU->getLocalLabelBegin(), CU->getLocalSectionSym()); Asm->OutStreamer.AddComment("Address Size (in bytes)"); Asm->EmitInt8(PtrSize); Asm->OutStreamer.AddComment("Segment Size (in bytes)"); Asm->EmitInt8(0); - for (unsigned n = 0; n < Padding; n++) - Asm->EmitInt8(0xff); + Asm->OutStreamer.EmitFill(Padding, 0xff); - for (unsigned n = 0; n < List.size(); n++) { - const ArangeSpan &Span = List[n]; + for (const ArangeSpan &Span : List) { Asm->EmitLabelReference(Span.Start, PtrSize); // Calculate the size as being from the span start to it's end. @@ -2939,125 +2213,133 @@ void DwarfDebug::emitDebugARanges() { // Emit visible names into a debug ranges section. void DwarfDebug::emitDebugRanges() { // Start the dwarf ranges section. - Asm->OutStreamer - .SwitchSection(Asm->getObjFileLowering().getDwarfRangesSection()); + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfRangesSection()); + + // Size for our labels. unsigned char Size = Asm->getDataLayout().getPointerSize(); - for (SmallVectorImpl<const MCSymbol *>::iterator - I = DebugRangeSymbols.begin(), E = DebugRangeSymbols.end(); - I != E; ++I) { - if (*I) - Asm->OutStreamer.EmitSymbolValue(const_cast<MCSymbol*>(*I), Size); - else + + // Grab the specific ranges for the compile units in the module. + for (const auto &I : CUMap) { + DwarfCompileUnit *TheCU = I.second; + + // Iterate over the misc ranges for the compile units in the module. + for (const RangeSpanList &List : TheCU->getRangeLists()) { + // Emit our symbol so we can find the beginning of the range. + Asm->OutStreamer.EmitLabel(List.getSym()); + + for (const RangeSpan &Range : List.getRanges()) { + const MCSymbol *Begin = Range.getStart(); + const MCSymbol *End = Range.getEnd(); + assert(Begin && "Range without a begin symbol?"); + assert(End && "Range without an end symbol?"); + if (TheCU->getRanges().size() == 1) { + // Grab the begin symbol from the first range as our base. + const MCSymbol *Base = TheCU->getRanges()[0].getStart(); + Asm->EmitLabelDifference(Begin, Base, Size); + Asm->EmitLabelDifference(End, Base, Size); + } else { + Asm->OutStreamer.EmitSymbolValue(Begin, Size); + Asm->OutStreamer.EmitSymbolValue(End, Size); + } + } + + // And terminate the list with two 0 values. Asm->OutStreamer.EmitIntValue(0, Size); - } -} + Asm->OutStreamer.EmitIntValue(0, Size); + } -// Emit visible names into a debug macinfo section. -void DwarfDebug::emitDebugMacInfo() { - if (const MCSection *LineInfo = - Asm->getObjFileLowering().getDwarfMacroInfoSection()) { - // Start the dwarf macinfo section. - Asm->OutStreamer.SwitchSection(LineInfo); + // Now emit a range for the CU itself. + if (TheCU->getRanges().size() > 1) { + Asm->OutStreamer.EmitLabel( + Asm->GetTempSymbol("cu_ranges", TheCU->getUniqueID())); + for (const RangeSpan &Range : TheCU->getRanges()) { + const MCSymbol *Begin = Range.getStart(); + const MCSymbol *End = Range.getEnd(); + assert(Begin && "Range without a begin symbol?"); + assert(End && "Range without an end symbol?"); + Asm->OutStreamer.EmitSymbolValue(Begin, Size); + Asm->OutStreamer.EmitSymbolValue(End, Size); + } + // And terminate the list with two 0 values. + Asm->OutStreamer.EmitIntValue(0, Size); + Asm->OutStreamer.EmitIntValue(0, Size); + } } } // DWARF5 Experimental Separate Dwarf emitters. -// This DIE has the following attributes: DW_AT_comp_dir, DW_AT_stmt_list, -// DW_AT_low_pc, DW_AT_high_pc, DW_AT_ranges, DW_AT_dwo_name, DW_AT_dwo_id, -// DW_AT_ranges_base, DW_AT_addr_base. -CompileUnit *DwarfDebug::constructSkeletonCU(const CompileUnit *CU) { +void DwarfDebug::initSkeletonUnit(const DwarfUnit &U, DIE &Die, + std::unique_ptr<DwarfUnit> NewU) { + NewU->addLocalString(Die, dwarf::DW_AT_GNU_dwo_name, + U.getCUNode().getSplitDebugFilename()); - DIE *Die = new DIE(dwarf::DW_TAG_compile_unit); - CompileUnit *NewCU = new CompileUnit(CU->getUniqueID(), Die, CU->getNode(), - Asm, this, &SkeletonHolder); + if (!CompilationDir.empty()) + NewU->addLocalString(Die, dwarf::DW_AT_comp_dir, CompilationDir); - NewCU->addLocalString(Die, dwarf::DW_AT_GNU_dwo_name, - CU->getNode().getSplitDebugFilename()); + addGnuPubAttributes(*NewU, Die); - // Relocate to the beginning of the addr_base section, else 0 for the - // beginning of the one for this compile unit. - if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) - NewCU->addSectionLabel(Die, dwarf::DW_AT_GNU_addr_base, - DwarfAddrSectionSym); - else - NewCU->addSectionOffset(Die, dwarf::DW_AT_GNU_addr_base, 0); - - // 2.17.1 requires that we use DW_AT_low_pc for a single entry point - // into an entity. We're using 0, or a NULL label for this. - NewCU->addUInt(Die, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, 0); - - // DW_AT_stmt_list is a offset of line number information for this - // compile unit in debug_line section. - // FIXME: Should handle multiple compile units. - if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) - NewCU->addSectionLabel(Die, dwarf::DW_AT_stmt_list, - DwarfLineSectionSym); - else - NewCU->addSectionOffset(Die, dwarf::DW_AT_stmt_list, 0); + SkeletonHolder.addUnit(std::move(NewU)); +} - if (!CompilationDir.empty()) - NewCU->addLocalString(Die, dwarf::DW_AT_comp_dir, CompilationDir); +// This DIE has the following attributes: DW_AT_comp_dir, DW_AT_stmt_list, +// DW_AT_low_pc, DW_AT_high_pc, DW_AT_ranges, DW_AT_dwo_name, DW_AT_dwo_id, +// DW_AT_addr_base, DW_AT_ranges_base. +DwarfCompileUnit &DwarfDebug::constructSkeletonCU(const DwarfCompileUnit &CU) { - // Flags to let the linker know we have emitted new style pubnames. - if (GenerateGnuPubSections) { - if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) - NewCU->addSectionLabel( - Die, dwarf::DW_AT_GNU_pubnames, - Asm->GetTempSymbol("gnu_pubnames", NewCU->getUniqueID())); - else - NewCU->addSectionDelta( - Die, dwarf::DW_AT_GNU_pubnames, - Asm->GetTempSymbol("gnu_pubnames", NewCU->getUniqueID()), - DwarfGnuPubNamesSectionSym); - - if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) - NewCU->addSectionLabel( - Die, dwarf::DW_AT_GNU_pubtypes, - Asm->GetTempSymbol("gnu_pubtypes", NewCU->getUniqueID())); - else - NewCU->addSectionDelta( - Die, dwarf::DW_AT_GNU_pubtypes, - Asm->GetTempSymbol("gnu_pubtypes", NewCU->getUniqueID()), - DwarfGnuPubTypesSectionSym); - } + auto OwnedUnit = make_unique<DwarfCompileUnit>( + CU.getUniqueID(), CU.getCUNode(), Asm, this, &SkeletonHolder); + DwarfCompileUnit &NewCU = *OwnedUnit; + NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoSection(), + DwarfInfoSectionSym); - // Flag if we've emitted any ranges and their location for the compile unit. - if (DebugRangeSymbols.size()) { - if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) - NewCU->addSectionLabel(Die, dwarf::DW_AT_GNU_ranges_base, - DwarfDebugRangeSectionSym); - else - NewCU->addUInt(Die, dwarf::DW_AT_GNU_ranges_base, dwarf::DW_FORM_data4, - 0); - } + NewCU.initStmtList(DwarfLineSectionSym); - SkeletonHolder.addUnit(NewCU); - SkeletonCUs.push_back(NewCU); + initSkeletonUnit(CU, NewCU.getUnitDie(), std::move(OwnedUnit)); return NewCU; } -void DwarfDebug::emitSkeletonAbbrevs(const MCSection *Section) { - assert(useSplitDwarf() && "No split dwarf debug info?"); - emitAbbrevs(Section, &SkeletonAbbrevs); +// This DIE has the following attributes: DW_AT_comp_dir, DW_AT_dwo_name, +// DW_AT_addr_base. +DwarfTypeUnit &DwarfDebug::constructSkeletonTU(DwarfTypeUnit &TU) { + DwarfCompileUnit &CU = static_cast<DwarfCompileUnit &>( + *SkeletonHolder.getUnits()[TU.getCU().getUniqueID()]); + + auto OwnedUnit = make_unique<DwarfTypeUnit>(TU.getUniqueID(), CU, Asm, this, + &SkeletonHolder); + DwarfTypeUnit &NewTU = *OwnedUnit; + NewTU.setTypeSignature(TU.getTypeSignature()); + NewTU.setType(nullptr); + NewTU.initSection( + Asm->getObjFileLowering().getDwarfTypesSection(TU.getTypeSignature())); + + initSkeletonUnit(TU, NewTU.getUnitDie(), std::move(OwnedUnit)); + return NewTU; } // Emit the .debug_info.dwo section for separated dwarf. This contains the // compile units that would normally be in debug_info. void DwarfDebug::emitDebugInfoDWO() { assert(useSplitDwarf() && "No split dwarf debug info?"); - InfoHolder.emitUnits(this, Asm->getObjFileLowering().getDwarfInfoDWOSection(), - Asm->getObjFileLowering().getDwarfAbbrevDWOSection(), - DwarfAbbrevDWOSectionSym); + // Don't pass an abbrev symbol, using a constant zero instead so as not to + // emit relocations into the dwo file. + InfoHolder.emitUnits(this, /* AbbrevSymbol */ nullptr); } // Emit the .debug_abbrev.dwo section for separated dwarf. This contains the // abbreviations for the .debug_info.dwo section. void DwarfDebug::emitDebugAbbrevDWO() { assert(useSplitDwarf() && "No split dwarf?"); - emitAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevDWOSection(), - &Abbreviations); + InfoHolder.emitAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevDWOSection()); +} + +void DwarfDebug::emitDebugLineDWO() { + assert(useSplitDwarf() && "No split dwarf?"); + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfLineDWOSection()); + SplitTypeUnitFileTable.Emit(Asm->OutStreamer); } // Emit the .debug_str.dwo section for separated dwarf. This contains the @@ -3065,9 +2347,152 @@ void DwarfDebug::emitDebugAbbrevDWO() { // sections. void DwarfDebug::emitDebugStrDWO() { assert(useSplitDwarf() && "No split dwarf?"); - const MCSection *OffSec = Asm->getObjFileLowering() - .getDwarfStrOffDWOSection(); + const MCSection *OffSec = + Asm->getObjFileLowering().getDwarfStrOffDWOSection(); const MCSymbol *StrSym = DwarfStrSectionSym; InfoHolder.emitStrings(Asm->getObjFileLowering().getDwarfStrDWOSection(), OffSec, StrSym); } + +MCDwarfDwoLineTable *DwarfDebug::getDwoLineTable(const DwarfCompileUnit &CU) { + if (!useSplitDwarf()) + return nullptr; + if (SingleCU) + SplitTypeUnitFileTable.setCompilationDir(CU.getCUNode().getDirectory()); + return &SplitTypeUnitFileTable; +} + +static uint64_t makeTypeSignature(StringRef Identifier) { + MD5 Hash; + Hash.update(Identifier); + // ... take the least significant 8 bytes and return those. Our MD5 + // implementation always returns its results in little endian, swap bytes + // appropriately. + MD5::MD5Result Result; + Hash.final(Result); + return *reinterpret_cast<support::ulittle64_t *>(Result + 8); +} + +void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU, + StringRef Identifier, DIE &RefDie, + DICompositeType CTy) { + // Fast path if we're building some type units and one has already used the + // address pool we know we're going to throw away all this work anyway, so + // don't bother building dependent types. + if (!TypeUnitsUnderConstruction.empty() && AddrPool.hasBeenUsed()) + return; + + const DwarfTypeUnit *&TU = DwarfTypeUnits[CTy]; + if (TU) { + CU.addDIETypeSignature(RefDie, *TU); + return; + } + + bool TopLevelType = TypeUnitsUnderConstruction.empty(); + AddrPool.resetUsedFlag(); + + auto OwnedUnit = + make_unique<DwarfTypeUnit>(InfoHolder.getUnits().size(), CU, Asm, this, + &InfoHolder, getDwoLineTable(CU)); + DwarfTypeUnit &NewTU = *OwnedUnit; + DIE &UnitDie = NewTU.getUnitDie(); + TU = &NewTU; + TypeUnitsUnderConstruction.push_back( + std::make_pair(std::move(OwnedUnit), CTy)); + + NewTU.addUInt(UnitDie, dwarf::DW_AT_language, dwarf::DW_FORM_data2, + CU.getLanguage()); + + uint64_t Signature = makeTypeSignature(Identifier); + NewTU.setTypeSignature(Signature); + + if (!useSplitDwarf()) + CU.applyStmtList(UnitDie); + + // FIXME: Skip using COMDAT groups for type units in the .dwo file once tools + // such as DWP ( http://gcc.gnu.org/wiki/DebugFissionDWP ) can cope with it. + NewTU.initSection( + useSplitDwarf() + ? Asm->getObjFileLowering().getDwarfTypesDWOSection(Signature) + : Asm->getObjFileLowering().getDwarfTypesSection(Signature)); + + NewTU.setType(NewTU.createTypeDIE(CTy)); + + if (TopLevelType) { + auto TypeUnitsToAdd = std::move(TypeUnitsUnderConstruction); + TypeUnitsUnderConstruction.clear(); + + // Types referencing entries in the address table cannot be placed in type + // units. + if (AddrPool.hasBeenUsed()) { + + // Remove all the types built while building this type. + // This is pessimistic as some of these types might not be dependent on + // the type that used an address. + for (const auto &TU : TypeUnitsToAdd) + DwarfTypeUnits.erase(TU.second); + + // Construct this type in the CU directly. + // This is inefficient because all the dependent types will be rebuilt + // from scratch, including building them in type units, discovering that + // they depend on addresses, throwing them out and rebuilding them. + CU.constructTypeDIE(RefDie, CTy); + return; + } + + // If the type wasn't dependent on fission addresses, finish adding the type + // and all its dependent types. + for (auto &TU : TypeUnitsToAdd) { + if (useSplitDwarf()) + TU.first->setSkeleton(constructSkeletonTU(*TU.first)); + InfoHolder.addUnit(std::move(TU.first)); + } + } + CU.addDIETypeSignature(RefDie, NewTU); +} + +void DwarfDebug::attachLowHighPC(DwarfCompileUnit &Unit, DIE &D, + MCSymbol *Begin, MCSymbol *End) { + assert(Begin && "Begin label should not be null!"); + assert(End && "End label should not be null!"); + assert(Begin->isDefined() && "Invalid starting label"); + assert(End->isDefined() && "Invalid end label"); + + Unit.addLabelAddress(D, dwarf::DW_AT_low_pc, Begin); + if (DwarfVersion < 4) + Unit.addLabelAddress(D, dwarf::DW_AT_high_pc, End); + else + Unit.addLabelDelta(D, dwarf::DW_AT_high_pc, End, Begin); +} + +// Accelerator table mutators - add each name along with its companion +// DIE to the proper table while ensuring that the name that we're going +// to reference is in the string table. We do this since the names we +// add may not only be identical to the names in the DIE. +void DwarfDebug::addAccelName(StringRef Name, const DIE &Die) { + if (!useDwarfAccelTables()) + return; + AccelNames.AddName(Name, InfoHolder.getStringPool().getSymbol(*Asm, Name), + &Die); +} + +void DwarfDebug::addAccelObjC(StringRef Name, const DIE &Die) { + if (!useDwarfAccelTables()) + return; + AccelObjC.AddName(Name, InfoHolder.getStringPool().getSymbol(*Asm, Name), + &Die); +} + +void DwarfDebug::addAccelNamespace(StringRef Name, const DIE &Die) { + if (!useDwarfAccelTables()) + return; + AccelNamespace.AddName(Name, InfoHolder.getStringPool().getSymbol(*Asm, Name), + &Die); +} + +void DwarfDebug::addAccelType(StringRef Name, const DIE &Die, char Flags) { + if (!useDwarfAccelTables()) + return; + AccelTypes.AddName(Name, InfoHolder.getStringPool().getSymbol(*Asm, Name), + &Die); +} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h index cebac39..f2aa808 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -14,44 +14,50 @@ #ifndef CODEGEN_ASMPRINTER_DWARFDEBUG_H__ #define CODEGEN_ASMPRINTER_DWARFDEBUG_H__ +#include "DwarfFile.h" +#include "AsmPrinterHandler.h" #include "DIE.h" +#include "DbgValueHistoryCalculator.h" +#include "DebugLocEntry.h" +#include "DebugLocList.h" +#include "DwarfAccelTable.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/FoldingSet.h" -#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringMap.h" -#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/ADT/FoldingSet.h" #include "llvm/CodeGen/LexicalScopes.h" -#include "llvm/DebugInfo.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/MC/MachineLocation.h" +#include "llvm/MC/MCDwarf.h" #include "llvm/Support/Allocator.h" -#include "llvm/Support/DebugLoc.h" + +#include <memory> namespace llvm { -class CompileUnit; +class AsmPrinter; +class ByteStreamer; class ConstantInt; class ConstantFP; -class DbgVariable; -class MachineFrameInfo; +class DwarfCompileUnit; +class DwarfDebug; +class DwarfTypeUnit; +class DwarfUnit; class MachineModuleInfo; -class MachineOperand; -class MCAsmInfo; -class DIEAbbrev; -class DIE; -class DIEBlock; -class DIEEntry; //===----------------------------------------------------------------------===// /// \brief This class is used to record source line correspondence. class SrcLineInfo { - unsigned Line; // Source line number. - unsigned Column; // Source column. - unsigned SourceID; // Source ID number. - MCSymbol *Label; // Label in code ID number. + unsigned Line; // Source line number. + unsigned Column; // Source column. + unsigned SourceID; // Source ID number. + MCSymbol *Label; // Label in code ID number. public: SrcLineInfo(unsigned L, unsigned C, unsigned S, MCSymbol *label) - : Line(L), Column(C), SourceID(S), Label(label) {} + : Line(L), Column(C), SourceID(S), Label(label) {} // Accessors unsigned getLine() const { return Line; } @@ -60,124 +66,47 @@ public: MCSymbol *getLabel() const { return Label; } }; -/// \brief This struct describes location entries emitted in the .debug_loc -/// section. -class DotDebugLocEntry { - // Begin and end symbols for the address range that this location is valid. - const MCSymbol *Begin; - const MCSymbol *End; - - // Type of entry that this represents. - enum EntryType { - E_Location, - E_Integer, - E_ConstantFP, - E_ConstantInt - }; - enum EntryType EntryKind; - - union { - int64_t Int; - const ConstantFP *CFP; - const ConstantInt *CIP; - } Constants; - - // The location in the machine frame. - MachineLocation Loc; - - // The variable to which this location entry corresponds. - const MDNode *Variable; - - // Whether this location has been merged. - bool Merged; - -public: - DotDebugLocEntry() : Begin(0), End(0), Variable(0), Merged(false) { - Constants.Int = 0; - } - DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, MachineLocation &L, - const MDNode *V) - : Begin(B), End(E), Loc(L), Variable(V), Merged(false) { - Constants.Int = 0; - EntryKind = E_Location; - } - DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, int64_t i) - : Begin(B), End(E), Variable(0), Merged(false) { - Constants.Int = i; - EntryKind = E_Integer; - } - DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, const ConstantFP *FPtr) - : Begin(B), End(E), Variable(0), Merged(false) { - Constants.CFP = FPtr; - EntryKind = E_ConstantFP; - } - DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, - const ConstantInt *IPtr) - : Begin(B), End(E), Variable(0), Merged(false) { - Constants.CIP = IPtr; - EntryKind = E_ConstantInt; - } - - /// \brief Empty entries are also used as a trigger to emit temp label. Such - /// labels are referenced is used to find debug_loc offset for a given DIE. - bool isEmpty() { return Begin == 0 && End == 0; } - bool isMerged() { return Merged; } - void Merge(DotDebugLocEntry *Next) { - if (!(Begin && Loc == Next->Loc && End == Next->Begin)) - return; - Next->Begin = Begin; - Merged = true; - } - bool isLocation() const { return EntryKind == E_Location; } - bool isInt() const { return EntryKind == E_Integer; } - bool isConstantFP() const { return EntryKind == E_ConstantFP; } - bool isConstantInt() const { return EntryKind == E_ConstantInt; } - int64_t getInt() const { return Constants.Int; } - const ConstantFP *getConstantFP() const { return Constants.CFP; } - const ConstantInt *getConstantInt() const { return Constants.CIP; } - const MDNode *getVariable() const { return Variable; } - const MCSymbol *getBeginSym() const { return Begin; } - const MCSymbol *getEndSym() const { return End; } - MachineLocation getLoc() const { return Loc; } -}; - //===----------------------------------------------------------------------===// /// \brief This class is used to track local variable information. class DbgVariable { - DIVariable Var; // Variable Descriptor. - DIE *TheDIE; // Variable DIE. - unsigned DotDebugLocOffset; // Offset in DotDebugLocEntries. - DbgVariable *AbsVar; // Corresponding Abstract variable, if any. - const MachineInstr *MInsn; // DBG_VALUE instruction of the variable. + DIVariable Var; // Variable Descriptor. + DIE *TheDIE; // Variable DIE. + unsigned DotDebugLocOffset; // Offset in DotDebugLocEntries. + const MachineInstr *MInsn; // DBG_VALUE instruction of the variable. int FrameIndex; DwarfDebug *DD; + public: - // AbsVar may be NULL. - DbgVariable(DIVariable V, DbgVariable *AV, DwarfDebug *DD) - : Var(V), TheDIE(0), DotDebugLocOffset(~0U), AbsVar(AV), MInsn(0), - FrameIndex(~0), DD(DD) {} + /// Construct a DbgVariable from a DIVariable. + DbgVariable(DIVariable V, DwarfDebug *DD) + : Var(V), TheDIE(nullptr), DotDebugLocOffset(~0U), MInsn(nullptr), + FrameIndex(~0), DD(DD) {} + + /// Construct a DbgVariable from a DEBUG_VALUE. + /// AbstractVar may be NULL. + DbgVariable(const MachineInstr *DbgValue, DwarfDebug *DD) + : Var(DbgValue->getDebugVariable()), TheDIE(nullptr), + DotDebugLocOffset(~0U), MInsn(DbgValue), FrameIndex(~0), DD(DD) {} // Accessors. - DIVariable getVariable() const { return Var; } - void setDIE(DIE *D) { TheDIE = D; } - DIE *getDIE() const { return TheDIE; } - void setDotDebugLocOffset(unsigned O) { DotDebugLocOffset = O; } - unsigned getDotDebugLocOffset() const { return DotDebugLocOffset; } - StringRef getName() const { return Var.getName(); } - DbgVariable *getAbstractVariable() const { return AbsVar; } - const MachineInstr *getMInsn() const { return MInsn; } - void setMInsn(const MachineInstr *M) { MInsn = M; } - int getFrameIndex() const { return FrameIndex; } - void setFrameIndex(int FI) { FrameIndex = FI; } + DIVariable getVariable() const { return Var; } + void setDIE(DIE &D) { TheDIE = &D; } + DIE *getDIE() const { return TheDIE; } + void setDotDebugLocOffset(unsigned O) { DotDebugLocOffset = O; } + unsigned getDotDebugLocOffset() const { return DotDebugLocOffset; } + StringRef getName() const { return Var.getName(); } + const MachineInstr *getMInsn() const { return MInsn; } + int getFrameIndex() const { return FrameIndex; } + void setFrameIndex(int FI) { FrameIndex = FI; } // Translate tag to proper Dwarf tag. - uint16_t getTag() const { + dwarf::Tag getTag() const { if (Var.getTag() == dwarf::DW_TAG_arg_variable) return dwarf::DW_TAG_formal_parameter; return dwarf::DW_TAG_variable; } /// \brief Return true if DbgVariable is artificial. - bool isArtificial() const { + bool isArtificial() const { if (Var.isArtificial()) return true; if (getType().isArtificial()) @@ -185,7 +114,7 @@ public: return false; } - bool isObjectPointer() const { + bool isObjectPointer() const { if (Var.isObjectPointer()) return true; if (getType().isObjectPointer()) @@ -193,21 +122,16 @@ public: return false; } - bool variableHasComplexAddress() const { + bool variableHasComplexAddress() const { assert(Var.isVariable() && "Invalid complex DbgVariable!"); return Var.hasComplexAddress(); } - bool isBlockByrefVariable() const { - assert(Var.isVariable() && "Invalid complex DbgVariable!"); - return Var.isBlockByrefVariable(); - } - unsigned getNumAddrElements() const { + bool isBlockByrefVariable() const; + unsigned getNumAddrElements() const { assert(Var.isVariable() && "Invalid complex DbgVariable!"); return Var.getNumAddrElements(); } - uint64_t getAddrElement(unsigned i) const { - return Var.getAddrElement(i); - } + uint64_t getAddrElement(unsigned i) const { return Var.getAddrElement(i); } DIType getType() const; private: @@ -216,101 +140,16 @@ private: template <typename T> T resolve(DIRef<T> Ref) const; }; -/// \brief Collects and handles information specific to a particular -/// collection of units. -class DwarfUnits { - // Target of Dwarf emission, used for sizing of abbreviations. - AsmPrinter *Asm; - - // Used to uniquely define abbreviations. - FoldingSet<DIEAbbrev> *AbbreviationsSet; - - // A list of all the unique abbreviations in use. - std::vector<DIEAbbrev *> &Abbreviations; - - // A pointer to all units in the section. - SmallVector<CompileUnit *, 1> CUs; - - // Collection of strings for this unit and assorted symbols. - // A String->Symbol mapping of strings used by indirect - // references. - typedef StringMap<std::pair<MCSymbol*, unsigned>, - BumpPtrAllocator&> StrPool; - StrPool StringPool; - unsigned NextStringPoolNumber; - std::string StringPref; - - // Collection of addresses for this unit and assorted labels. - // A Symbol->unsigned mapping of addresses used by indirect - // references. - typedef DenseMap<const MCExpr *, unsigned> AddrPool; - AddrPool AddressPool; - unsigned NextAddrPoolNumber; - -public: - DwarfUnits(AsmPrinter *AP, FoldingSet<DIEAbbrev> *AS, - std::vector<DIEAbbrev *> &A, const char *Pref, - BumpPtrAllocator &DA) - : Asm(AP), AbbreviationsSet(AS), Abbreviations(A), StringPool(DA), - NextStringPoolNumber(0), StringPref(Pref), AddressPool(), - NextAddrPoolNumber(0) {} - - /// \brief Compute the size and offset of a DIE given an incoming Offset. - unsigned computeSizeAndOffset(DIE *Die, unsigned Offset); - - /// \brief Compute the size and offset of all the DIEs. - void computeSizeAndOffsets(); - - /// \brief Define a unique number for the abbreviation. - void assignAbbrevNumber(DIEAbbrev &Abbrev); - - /// \brief Add a unit to the list of CUs. - void addUnit(CompileUnit *CU) { CUs.push_back(CU); } - - /// \brief Emit all of the units to the section listed with the given - /// abbreviation section. - void emitUnits(DwarfDebug *DD, const MCSection *USection, - const MCSection *ASection, const MCSymbol *ASectionSym); - - /// \brief Emit all of the strings to the section given. - void emitStrings(const MCSection *StrSection, const MCSection *OffsetSection, - const MCSymbol *StrSecSym); - - /// \brief Emit all of the addresses to the section given. - void emitAddresses(const MCSection *AddrSection); - - /// \brief Returns the entry into the start of the pool. - MCSymbol *getStringPoolSym(); - - /// \brief Returns an entry into the string pool with the given - /// string text. - MCSymbol *getStringPoolEntry(StringRef Str); - - /// \brief Returns the index into the string pool with the given - /// string text. - unsigned getStringPoolIndex(StringRef Str); - - /// \brief Returns the string pool. - StrPool *getStringPool() { return &StringPool; } - - /// \brief Returns the index into the address pool with the given - /// label/symbol. - unsigned getAddrPoolIndex(const MCExpr *Sym); - unsigned getAddrPoolIndex(const MCSymbol *Sym); - - /// \brief Returns the address pool. - AddrPool *getAddrPool() { return &AddressPool; } -}; /// \brief Helper used to pair up a symbol and its DWARF compile unit. struct SymbolCU { - SymbolCU(CompileUnit *CU, const MCSymbol *Sym) : Sym(Sym), CU(CU) {} + SymbolCU(DwarfCompileUnit *CU, const MCSymbol *Sym) : Sym(Sym), CU(CU) {} const MCSymbol *Sym; - CompileUnit *CU; + DwarfCompileUnit *CU; }; /// \brief Collects and handles dwarf debug information. -class DwarfDebug { +class DwarfDebug : public AsmPrinterHandler { // Target of Dwarf emission. AsmPrinter *Asm; @@ -320,40 +159,31 @@ class DwarfDebug { // All DIEValues are allocated through this allocator. BumpPtrAllocator DIEValueAllocator; - // Handle to the a compile unit used for the inline extension handling. - CompileUnit *FirstCU; + // Handle to the compile unit used for the inline extension handling, + // this is just so that the DIEValue allocator has a place to store + // the particular elements. + // FIXME: Store these off of DwarfDebug instead? + DwarfCompileUnit *FirstCU; - // Maps MDNode with its corresponding CompileUnit. - DenseMap <const MDNode *, CompileUnit *> CUMap; + // Maps MDNode with its corresponding DwarfCompileUnit. + MapVector<const MDNode *, DwarfCompileUnit *> CUMap; - // Maps subprogram MDNode with its corresponding CompileUnit. - DenseMap <const MDNode *, CompileUnit *> SPMap; + // Maps subprogram MDNode with its corresponding DwarfCompileUnit. + DenseMap<const MDNode *, DwarfCompileUnit *> SPMap; - // Maps a CU DIE with its corresponding CompileUnit. - DenseMap <const DIE *, CompileUnit *> CUDieMap; + // Maps a CU DIE with its corresponding DwarfCompileUnit. + DenseMap<const DIE *, DwarfCompileUnit *> CUDieMap; - /// Maps MDNodes for type sysstem with the corresponding DIEs. These DIEs can + /// Maps MDNodes for type system with the corresponding DIEs. These DIEs can /// be shared across CUs, that is why we keep the map here instead - /// of in CompileUnit. + /// of in DwarfCompileUnit. DenseMap<const MDNode *, DIE *> MDTypeNodeToDieMap; - // Used to uniquely define abbreviations. - FoldingSet<DIEAbbrev> AbbreviationsSet; - - // A list of all the unique abbreviations in use. - std::vector<DIEAbbrev *> Abbreviations; - - // Stores the current file ID for a given compile unit. - DenseMap <unsigned, unsigned> FileIDCUMap; - // Source id map, i.e. CUID, source filename and directory, - // separated by a zero byte, mapped to a unique id. - StringMap<unsigned, BumpPtrAllocator&> SourceIdMap; - // List of all labels used in aranges generation. std::vector<SymbolCU> ArangeLabels; // Size of each symbol emitted (for those symbols that have a specific size). - DenseMap <const MCSymbol *, uint64_t> SymSize; + DenseMap<const MCSymbol *, uint64_t> SymSize; // Provides a unique id per text section. typedef DenseMap<const MCSection *, SmallVector<SymbolCU, 8> > SectionMapType; @@ -368,15 +198,17 @@ class DwarfDebug { DenseMap<const MDNode *, DIE *> AbstractSPDies; // Collection of dbg variables of a scope. - typedef DenseMap<LexicalScope *, - SmallVector<DbgVariable *, 8> > ScopeVariablesMap; + typedef DenseMap<LexicalScope *, SmallVector<DbgVariable *, 8> > + ScopeVariablesMap; ScopeVariablesMap ScopeVariables; // Collection of abstract variables. - DenseMap<const MDNode *, DbgVariable *> AbstractVariables; + DenseMap<const MDNode *, std::unique_ptr<DbgVariable>> AbstractVariables; + SmallVector<std::unique_ptr<DbgVariable>, 64> ConcreteVariables; - // Collection of DotDebugLocEntry. - SmallVector<DotDebugLocEntry, 4> DotDebugLocEntries; + // Collection of DebugLocEntry. Stored in a linked list so that DIELocLists + // can refer to them in spite of insertions into this list. + SmallVector<DebugLocList, 4> DotDebugLocEntries; // Collection of subprogram DIEs that are marked (at the end of the module) // as DW_AT_inline. @@ -392,19 +224,10 @@ class DwarfDebug { // Maps instruction with label emitted after instruction. DenseMap<const MachineInstr *, MCSymbol *> LabelsAfterInsn; - // Every user variable mentioned by a DBG_VALUE instruction in order of - // appearance. - SmallVector<const MDNode*, 8> UserVariables; - - // For each user variable, keep a list of DBG_VALUE instructions in order. - // The list can also contain normal instructions that clobber the previous - // DBG_VALUE. - typedef DenseMap<const MDNode*, SmallVector<const MachineInstr*, 4> > - DbgValueHistoryMap; + // History of DBG_VALUE and clobber instructions for each user variable. + // Variables are listed in order of appearance. DbgValueHistoryMap DbgValues; - SmallVector<const MCSymbol *, 8> DebugRangeSymbols; - // Previous instruction's location information. This is used to determine // label location to indicate scope boundries in dwarf debug info. DebugLoc PrevInstLoc; @@ -414,6 +237,19 @@ class DwarfDebug { // body. DebugLoc PrologEndLoc; + // If nonnull, stores the current machine function we're processing. + const MachineFunction *CurFn; + + // If nonnull, stores the current machine instruction we're processing. + const MachineInstr *CurMI; + + // If nonnull, stores the section that the previous function was allocated to + // emitting. + const MCSection *PrevSection; + + // If nonnull, stores the CU in which the previous subprogram was contained. + const DwarfCompileUnit *PrevCU; + // Section Symbols: these are assembler temporary labels that are emitted at // the beginning of each supported dwarf section. These are used to form // section offsets and are created by EmitSectionLabels. @@ -421,36 +257,50 @@ class DwarfDebug { MCSymbol *DwarfStrSectionSym, *TextSectionSym, *DwarfDebugRangeSectionSym; MCSymbol *DwarfDebugLocSectionSym, *DwarfLineSectionSym, *DwarfAddrSectionSym; MCSymbol *FunctionBeginSym, *FunctionEndSym; - MCSymbol *DwarfAbbrevDWOSectionSym, *DwarfStrDWOSectionSym; + MCSymbol *DwarfInfoDWOSectionSym, *DwarfAbbrevDWOSectionSym; + MCSymbol *DwarfStrDWOSectionSym; MCSymbol *DwarfGnuPubNamesSectionSym, *DwarfGnuPubTypesSectionSym; // As an optimization, there is no need to emit an entry in the directory // table for the same directory as DW_AT_comp_dir. StringRef CompilationDir; - // Counter for assigning globally unique IDs for CUs. - unsigned GlobalCUIndexCount; + // Counter for assigning globally unique IDs for ranges. + unsigned GlobalRangeCount; // Holder for the file specific debug information. - DwarfUnits InfoHolder; + DwarfFile InfoHolder; // Holders for the various debug information flags that we might need to // have exposed. See accessor functions below for description. // Holder for imported entities. typedef SmallVector<std::pair<const MDNode *, const MDNode *>, 32> - ImportedEntityMap; + ImportedEntityMap; ImportedEntityMap ScopesWithImportedEntities; - // Holder for types that are going to be extracted out into a type unit. - std::vector<DIE *> TypeUnits; + // Map from MDNodes for user-defined types to the type units that describe + // them. + DenseMap<const MDNode *, const DwarfTypeUnit *> DwarfTypeUnits; + + SmallVector<std::pair<std::unique_ptr<DwarfTypeUnit>, DICompositeType>, 1> TypeUnitsUnderConstruction; // Whether to emit the pubnames/pubtypes sections. bool HasDwarfPubSections; + // Whether or not to use AT_ranges for compilation units. + bool HasCURanges; + + // Whether we emitted a function into a section other than the default + // text. + bool UsedNonDefaultText; + // Version of dwarf we're emitting. unsigned DwarfVersion; + // Maps from a type identifier to the actual MDNode. + DITypeIdentifierMap TypeIdentifierMap; + // DWARF5 Experimental Options bool HasDwarfAccelTables; bool HasSplitDwarf; @@ -460,50 +310,83 @@ class DwarfDebug { // original object file, rather than things that are meant // to be in the .dwo sections. - // The CUs left in the original object file for separated debug info. - SmallVector<CompileUnit *, 1> SkeletonCUs; + // Holder for the skeleton information. + DwarfFile SkeletonHolder; - // Used to uniquely define abbreviations for the skeleton emission. - FoldingSet<DIEAbbrev> SkeletonAbbrevSet; + /// Store file names for type units under fission in a line table header that + /// will be emitted into debug_line.dwo. + // FIXME: replace this with a map from comp_dir to table so that we can emit + // multiple tables during LTO each of which uses directory 0, referencing the + // comp_dir of all the type units that use it. + MCDwarfDwoLineTable SplitTypeUnitFileTable; - // A list of all the unique abbreviations in use. - std::vector<DIEAbbrev *> SkeletonAbbrevs; + // True iff there are multiple CUs in this module. + bool SingleCU; - // Holder for the skeleton information. - DwarfUnits SkeletonHolder; + AddressPool AddrPool; - // Maps from a type identifier to the actual MDNode. - DITypeIdentifierMap TypeIdentifierMap; + DwarfAccelTable AccelNames; + DwarfAccelTable AccelObjC; + DwarfAccelTable AccelNamespace; + DwarfAccelTable AccelTypes; -private: + MCDwarfDwoLineTable *getDwoLineTable(const DwarfCompileUnit &); void addScopeVariable(LexicalScope *LS, DbgVariable *Var); + const SmallVectorImpl<std::unique_ptr<DwarfUnit>> &getUnits() { + return InfoHolder.getUnits(); + } + /// \brief Find abstract variable associated with Var. - DbgVariable *findAbstractVariable(DIVariable &Var, DebugLoc Loc); + DbgVariable *getExistingAbstractVariable(const DIVariable &DV, + DIVariable &Cleansed); + DbgVariable *getExistingAbstractVariable(const DIVariable &DV); + void createAbstractVariable(const DIVariable &DV, LexicalScope *Scope); + void ensureAbstractVariableIsCreated(const DIVariable &Var, + const MDNode *Scope); + void ensureAbstractVariableIsCreatedIfScoped(const DIVariable &Var, + const MDNode *Scope); /// \brief Find DIE for the given subprogram and attach appropriate /// DW_AT_low_pc and DW_AT_high_pc attributes. If there are global /// variables in this scope then create and insert DIEs for these /// variables. - DIE *updateSubprogramScopeDIE(CompileUnit *SPCU, DISubprogram SP); + DIE &updateSubprogramScopeDIE(DwarfCompileUnit &SPCU, DISubprogram SP); + + /// \brief A helper function to check whether the DIE for a given Scope is + /// going to be null. + bool isLexicalScopeDIENull(LexicalScope *Scope); + + /// \brief A helper function to construct a RangeSpanList for a given + /// lexical scope. + void addScopeRangeList(DwarfCompileUnit &TheCU, DIE &ScopeDIE, + const SmallVectorImpl<InsnRange> &Range); /// \brief Construct new DW_TAG_lexical_block for this scope and /// attach DW_AT_low_pc/DW_AT_high_pc labels. - DIE *constructLexicalScopeDIE(CompileUnit *TheCU, LexicalScope *Scope); - /// A helper function to check whether the DIE for a given Scope is going - /// to be null. - bool isLexicalScopeDIENull(LexicalScope *Scope); + std::unique_ptr<DIE> constructLexicalScopeDIE(DwarfCompileUnit &TheCU, + LexicalScope *Scope); /// \brief This scope represents inlined body of a function. Construct /// DIE to represent this concrete inlined copy of the function. - DIE *constructInlinedScopeDIE(CompileUnit *TheCU, LexicalScope *Scope); + std::unique_ptr<DIE> constructInlinedScopeDIE(DwarfCompileUnit &TheCU, + LexicalScope *Scope); /// \brief Construct a DIE for this scope. - DIE *constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope); + std::unique_ptr<DIE> constructScopeDIE(DwarfCompileUnit &TheCU, + LexicalScope *Scope); + void createAndAddScopeChildren(DwarfCompileUnit &TheCU, LexicalScope *Scope, + DIE &ScopeDIE); + /// \brief Construct a DIE for this abstract scope. + void constructAbstractSubprogramScopeDIE(DwarfCompileUnit &TheCU, + LexicalScope *Scope); + /// \brief Construct a DIE for this subprogram scope. + DIE &constructSubprogramScopeDIE(DwarfCompileUnit &TheCU, + LexicalScope *Scope); /// A helper function to create children of a Scope DIE. - DIE *createScopeChildrenDIE(CompileUnit *TheCU, LexicalScope *Scope, - SmallVectorImpl<DIE*> &Children); + DIE *createScopeChildrenDIE(DwarfCompileUnit &TheCU, LexicalScope *Scope, + SmallVectorImpl<std::unique_ptr<DIE>> &Children); /// \brief Emit initial Dwarf sections with a label at the start of each one. void emitSectionLabels(); @@ -514,12 +397,13 @@ private: /// \brief Compute the size and offset of all the DIEs. void computeSizeAndOffsets(); - /// \brief Attach DW_AT_inline attribute with inlined subprogram DIEs. - void computeInlinedDIEs(); - /// \brief Collect info for variables that were optimized out. void collectDeadVariables(); + void finishVariableDefinitions(); + + void finishSubprogramDefinitions(); + /// \brief Finish off debug information after all functions have been /// processed. void finalizeModuleInfo(); @@ -528,9 +412,6 @@ private: /// open. void endSections(); - /// \brief Emit a set of abbreviations to the specific section. - void emitAbbrevs(const MCSection *, std::vector<DIEAbbrev*> *); - /// \brief Emit the debug info section. void emitDebugInfo(); @@ -566,32 +447,42 @@ private: /// index. void emitDebugPubTypes(bool GnuStyle = false); + void + emitDebugPubSection(bool GnuStyle, const MCSection *PSec, StringRef Name, + const StringMap<const DIE *> &(DwarfUnit::*Accessor)() + const); + /// \brief Emit visible names into a debug str section. void emitDebugStr(); /// \brief Emit visible names into a debug loc section. void emitDebugLoc(); + /// \brief Emit visible names into a debug loc dwo section. + void emitDebugLocDWO(); + /// \brief Emit visible names into a debug aranges section. void emitDebugARanges(); /// \brief Emit visible names into a debug ranges section. void emitDebugRanges(); - /// \brief Emit visible names into a debug macinfo section. - void emitDebugMacInfo(); - /// \brief Emit inline info using custom format. void emitDebugInlineInfo(); /// DWARF 5 Experimental Split Dwarf Emitters + /// \brief Initialize common features of skeleton units. + void initSkeletonUnit(const DwarfUnit &U, DIE &Die, + std::unique_ptr<DwarfUnit> NewU); + /// \brief Construct the split debug info compile unit for the debug info /// section. - CompileUnit *constructSkeletonCU(const CompileUnit *CU); + DwarfCompileUnit &constructSkeletonCU(const DwarfCompileUnit &CU); - /// \brief Emit the local split abbreviations. - void emitSkeletonAbbrevs(const MCSection *); + /// \brief Construct the split debug info compile unit for the debug info + /// section. + DwarfTypeUnit &constructSkeletonTU(DwarfTypeUnit &TU); /// \brief Emit the debug info dwo section. void emitDebugInfoDWO(); @@ -599,27 +490,30 @@ private: /// \brief Emit the debug abbrev dwo section. void emitDebugAbbrevDWO(); + /// \brief Emit the debug line dwo section. + void emitDebugLineDWO(); + /// \brief Emit the debug str dwo section. void emitDebugStrDWO(); - /// \brief Create new CompileUnit for the given metadata node with tag - /// DW_TAG_compile_unit. - CompileUnit *constructCompileUnit(DICompileUnit DIUnit); + /// Flags to let the linker know we have emitted new style pubnames. Only + /// emit it here if we don't have a skeleton CU for split dwarf. + void addGnuPubAttributes(DwarfUnit &U, DIE &D) const; - /// \brief Construct subprogram DIE. - void constructSubprogramDIE(CompileUnit *TheCU, const MDNode *N); + /// \brief Create new DwarfCompileUnit for the given metadata node with tag + /// DW_TAG_compile_unit. + DwarfCompileUnit &constructDwarfCompileUnit(DICompileUnit DIUnit); /// \brief Construct imported_module or imported_declaration DIE. - void constructImportedEntityDIE(CompileUnit *TheCU, const MDNode *N); + void constructImportedEntityDIE(DwarfCompileUnit &TheCU, const MDNode *N); /// \brief Construct import_module DIE. - void constructImportedEntityDIE(CompileUnit *TheCU, const MDNode *N, - DIE *Context); + void constructImportedEntityDIE(DwarfCompileUnit &TheCU, const MDNode *N, + DIE &Context); /// \brief Construct import_module DIE. - void constructImportedEntityDIE(CompileUnit *TheCU, - const DIImportedEntity &Module, - DIE *Context); + void constructImportedEntityDIE(DwarfCompileUnit &TheCU, + const DIImportedEntity &Module, DIE &Context); /// \brief Register a source line with debug info. Returns the unique /// label that was emitted and which provides correspondence to the @@ -633,21 +527,18 @@ private: /// \brief If Var is an current function argument that add it in /// CurrentFnArguments list. - bool addCurrentFnArgument(const MachineFunction *MF, - DbgVariable *Var, LexicalScope *Scope); + bool addCurrentFnArgument(DbgVariable *Var, LexicalScope *Scope); /// \brief Populate LexicalScope entries with variables' info. - void collectVariableInfo(const MachineFunction *, - SmallPtrSet<const MDNode *, 16> &ProcessedVars); + void collectVariableInfo(SmallPtrSet<const MDNode *, 16> &ProcessedVars); /// \brief Collect variable information from the side table maintained /// by MMI. - void collectVariableInfoFromMMITable(const MachineFunction * MF, - SmallPtrSet<const MDNode *, 16> &P); + void collectVariableInfoFromMMITable(SmallPtrSet<const MDNode *, 16> &P); /// \brief Ensure that a label will be emitted before MI. void requestLabelBeforeInsn(const MachineInstr *MI) { - LabelsBeforeInsn.insert(std::make_pair(MI, (MCSymbol*)0)); + LabelsBeforeInsn.insert(std::make_pair(MI, nullptr)); } /// \brief Return Label preceding the instruction. @@ -655,18 +546,25 @@ private: /// \brief Ensure that a label will be emitted after MI. void requestLabelAfterInsn(const MachineInstr *MI) { - LabelsAfterInsn.insert(std::make_pair(MI, (MCSymbol*)0)); + LabelsAfterInsn.insert(std::make_pair(MI, nullptr)); } /// \brief Return Label immediately following the instruction. MCSymbol *getLabelAfterInsn(const MachineInstr *MI); + void attachRangesOrLowHighPC(DwarfCompileUnit &Unit, DIE &D, + const SmallVectorImpl<InsnRange> &Ranges); + void attachLowHighPC(DwarfCompileUnit &Unit, DIE &D, MCSymbol *Begin, + MCSymbol *End); + public: //===--------------------------------------------------------------------===// // Main entry points. // DwarfDebug(AsmPrinter *A, Module *M); + ~DwarfDebug() override; + void insertDIE(const MDNode *TypeMD, DIE *Die) { MDTypeNodeToDieMap.insert(std::make_pair(TypeMD, Die)); } @@ -679,62 +577,101 @@ public: void beginModule(); /// \brief Emit all Dwarf sections that should come after the content. - void endModule(); + void endModule() override; /// \brief Gather pre-function debug information. - void beginFunction(const MachineFunction *MF); + void beginFunction(const MachineFunction *MF) override; /// \brief Gather and emit post-function debug information. - void endFunction(const MachineFunction *MF); + void endFunction(const MachineFunction *MF) override; /// \brief Process beginning of an instruction. - void beginInstruction(const MachineInstr *MI); + void beginInstruction(const MachineInstr *MI) override; /// \brief Process end of an instruction. - void endInstruction(const MachineInstr *MI); + void endInstruction() override; /// \brief Add a DIE to the set of types that we're going to pull into /// type units. - void addTypeUnitType(DIE *Die) { TypeUnits.push_back(Die); } + void addDwarfTypeUnitType(DwarfCompileUnit &CU, StringRef Identifier, + DIE &Die, DICompositeType CTy); /// \brief Add a label so that arange data can be generated for it. void addArangeLabel(SymbolCU SCU) { ArangeLabels.push_back(SCU); } /// \brief For symbols that have a size designated (e.g. common symbols), /// this tracks that size. - void setSymbolSize(const MCSymbol *Sym, uint64_t Size) { SymSize[Sym] = Size;} - - /// \brief Look up the source id with the given directory and source file - /// names. If none currently exists, create a new id and insert it in the - /// SourceIds map. - unsigned getOrCreateSourceID(StringRef DirName, StringRef FullName, - unsigned CUID); + void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override { + SymSize[Sym] = Size; + } /// \brief Recursively Emits a debug information entry. - void emitDIE(DIE *Die, ArrayRef<DIEAbbrev *> Abbrevs); + void emitDIE(DIE &Die); // Experimental DWARF5 features. /// \brief Returns whether or not to emit tables that dwarf consumers can /// use to accelerate lookup. - bool useDwarfAccelTables() { return HasDwarfAccelTables; } + bool useDwarfAccelTables() const { return HasDwarfAccelTables; } /// \brief Returns whether or not to change the current debug info for the /// split dwarf proposal support. - bool useSplitDwarf() { return HasSplitDwarf; } + bool useSplitDwarf() const { return HasSplitDwarf; } /// Returns the Dwarf Version. unsigned getDwarfVersion() const { return DwarfVersion; } + /// Returns the section symbol for the .debug_loc section. + MCSymbol *getDebugLocSym() const { return DwarfDebugLocSectionSym; } + + /// Returns the previous section that was emitted into. + const MCSection *getPrevSection() const { return PrevSection; } + + /// Returns the previous CU that was being updated + const DwarfCompileUnit *getPrevCU() const { return PrevCU; } + + /// Returns the entries for the .debug_loc section. + const SmallVectorImpl<DebugLocList> & + getDebugLocEntries() const { + return DotDebugLocEntries; + } + + /// \brief Emit an entry for the debug loc section. This can be used to + /// handle an entry that's going to be emitted into the debug loc section. + void emitDebugLocEntry(ByteStreamer &Streamer, const DebugLocEntry &Entry); + + /// Emit the location for a debug loc entry, including the size header. + void emitDebugLocEntryLocation(const DebugLocEntry &Entry); + /// Find the MDNode for the given reference. template <typename T> T resolve(DIRef<T> Ref) const { return Ref.resolve(TypeIdentifierMap); } + /// \brief Return the TypeIdentifierMap. + const DITypeIdentifierMap &getTypeIdentifierMap() const { + return TypeIdentifierMap; + } + + /// Find the DwarfCompileUnit for the given CU Die. + DwarfCompileUnit *lookupUnit(const DIE *CU) const { + return CUDieMap.lookup(CU); + } /// isSubprogramContext - Return true if Context is either a subprogram /// or another context nested inside a subprogram. bool isSubprogramContext(const MDNode *Context); + void addSubprogramNames(DISubprogram SP, DIE &Die); + + AddressPool &getAddressPool() { return AddrPool; } + + void addAccelName(StringRef Name, const DIE &Die); + + void addAccelObjC(StringRef Name, const DIE &Die); + + void addAccelNamespace(StringRef Name, const DIE &Die); + + void addAccelType(StringRef Name, const DIE &Die, char Flags); }; } // End of namespace llvm diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h index 1575161..0440fce 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h @@ -14,135 +14,14 @@ #ifndef LLVM_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H #define LLVM_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H -#include "llvm/ADT/DenseMap.h" +#include "EHStreamer.h" #include "llvm/CodeGen/AsmPrinter.h" -#include <vector> namespace llvm { - -template <typename T> class SmallVectorImpl; -struct LandingPadInfo; -class MachineModuleInfo; -class MachineInstr; class MachineFunction; -class MCAsmInfo; -class MCExpr; -class MCSymbol; -class Function; class ARMTargetStreamer; -class AsmPrinter; - -//===----------------------------------------------------------------------===// -/// DwarfException - Emits Dwarf exception handling directives. -/// -class DwarfException { -protected: - /// Asm - Target of Dwarf emission. - AsmPrinter *Asm; - - /// MMI - Collected machine module information. - MachineModuleInfo *MMI; - - /// SharedTypeIds - How many leading type ids two landing pads have in common. - static unsigned SharedTypeIds(const LandingPadInfo *L, - const LandingPadInfo *R); - - /// PadLT - Order landing pads lexicographically by type id. - static bool PadLT(const LandingPadInfo *L, const LandingPadInfo *R); - - /// PadRange - Structure holding a try-range and the associated landing pad. - struct PadRange { - // The index of the landing pad. - unsigned PadIndex; - // The index of the begin and end labels in the landing pad's label lists. - unsigned RangeIndex; - }; - - typedef DenseMap<MCSymbol *, PadRange> RangeMapType; - - /// ActionEntry - Structure describing an entry in the actions table. - struct ActionEntry { - int ValueForTypeID; // The value to write - may not be equal to the type id. - int NextAction; - unsigned Previous; - }; - - /// CallSiteEntry - Structure describing an entry in the call-site table. - struct CallSiteEntry { - // The 'try-range' is BeginLabel .. EndLabel. - MCSymbol *BeginLabel; // zero indicates the start of the function. - MCSymbol *EndLabel; // zero indicates the end of the function. - - // The landing pad starts at PadLabel. - MCSymbol *PadLabel; // zero indicates that there is no landing pad. - unsigned Action; - }; - - /// ComputeActionsTable - Compute the actions table and gather the first - /// action index for each landing pad site. - unsigned ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*>&LPs, - SmallVectorImpl<ActionEntry> &Actions, - SmallVectorImpl<unsigned> &FirstActions); - - /// CallToNoUnwindFunction - Return `true' if this is a call to a function - /// marked `nounwind'. Return `false' otherwise. - bool CallToNoUnwindFunction(const MachineInstr *MI); - - /// ComputeCallSiteTable - Compute the call-site table. The entry for an - /// invoke has a try-range containing the call, a non-zero landing pad and an - /// appropriate action. The entry for an ordinary call has a try-range - /// containing the call and zero for the landing pad and the action. Calls - /// marked 'nounwind' have no entry and must not be contained in the try-range - /// of any entry - they form gaps in the table. Entries must be ordered by - /// try-range address. - void ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, - const RangeMapType &PadMap, - const SmallVectorImpl<const LandingPadInfo *> &LPs, - const SmallVectorImpl<unsigned> &FirstActions); - - /// EmitExceptionTable - Emit landing pads and actions. - /// - /// The general organization of the table is complex, but the basic concepts - /// are easy. First there is a header which describes the location and - /// organization of the three components that follow. - /// 1. The landing pad site information describes the range of code covered - /// by the try. In our case it's an accumulation of the ranges covered - /// by the invokes in the try. There is also a reference to the landing - /// pad that handles the exception once processed. Finally an index into - /// the actions table. - /// 2. The action table, in our case, is composed of pairs of type ids - /// and next action offset. Starting with the action index from the - /// landing pad site, each type Id is checked for a match to the current - /// exception. If it matches then the exception and type id are passed - /// on to the landing pad. Otherwise the next action is looked up. This - /// chain is terminated with a next action of zero. If no type id is - /// found the frame is unwound and handling continues. - /// 3. Type id table contains references to all the C++ typeinfo for all - /// catches in the function. This tables is reversed indexed base 1. - void EmitExceptionTable(); - - virtual void EmitTypeInfos(unsigned TTypeEncoding); - -public: - //===--------------------------------------------------------------------===// - // Main entry points. - // - DwarfException(AsmPrinter *A); - virtual ~DwarfException(); - /// EndModule - Emit all exception information that should come after the - /// content. - virtual void EndModule(); - - /// BeginFunction - Gather pre-function exception information. Assumes being - /// emitted immediately after the function entry point. - virtual void BeginFunction(const MachineFunction *MF); - - /// EndFunction - Gather and emit post-function exception information. - virtual void EndFunction(); -}; - -class DwarfCFIException : public DwarfException { +class DwarfCFIException : public EHStreamer { /// shouldEmitPersonality - Per-function flag to indicate if .cfi_personality /// should be emitted. bool shouldEmitPersonality; @@ -164,22 +43,26 @@ public: DwarfCFIException(AsmPrinter *A); virtual ~DwarfCFIException(); - /// EndModule - Emit all exception information that should come after the + /// endModule - Emit all exception information that should come after the /// content. - virtual void EndModule(); + void endModule() override; - /// BeginFunction - Gather pre-function exception information. Assumes being + /// beginFunction - Gather pre-function exception information. Assumes being /// emitted immediately after the function entry point. - virtual void BeginFunction(const MachineFunction *MF); + void beginFunction(const MachineFunction *MF) override; - /// EndFunction - Gather and emit post-function exception information. - virtual void EndFunction(); + /// endFunction - Gather and emit post-function exception information. + void endFunction(const MachineFunction *) override; }; -class ARMException : public DwarfException { - void EmitTypeInfos(unsigned TTypeEncoding); +class ARMException : public EHStreamer { + void emitTypeInfos(unsigned TTypeEncoding) override; ARMTargetStreamer &getTargetStreamer(); + /// shouldEmitCFI - Per-function flag to indicate if frame CFI info + /// should be emitted. + bool shouldEmitCFI; + public: //===--------------------------------------------------------------------===// // Main entry points. @@ -187,19 +70,19 @@ public: ARMException(AsmPrinter *A); virtual ~ARMException(); - /// EndModule - Emit all exception information that should come after the + /// endModule - Emit all exception information that should come after the /// content. - virtual void EndModule(); + void endModule() override; - /// BeginFunction - Gather pre-function exception information. Assumes being + /// beginFunction - Gather pre-function exception information. Assumes being /// emitted immediately after the function entry point. - virtual void BeginFunction(const MachineFunction *MF); + void beginFunction(const MachineFunction *MF) override; - /// EndFunction - Gather and emit post-function exception information. - virtual void EndFunction(); + /// endFunction - Gather and emit post-function exception information. + void endFunction(const MachineFunction *) override; }; -class Win64Exception : public DwarfException { +class Win64Exception : public EHStreamer { /// shouldEmitPersonality - Per-function flag to indicate if personality /// info should be emitted. bool shouldEmitPersonality; @@ -219,16 +102,16 @@ public: Win64Exception(AsmPrinter *A); virtual ~Win64Exception(); - /// EndModule - Emit all exception information that should come after the + /// endModule - Emit all exception information that should come after the /// content. - virtual void EndModule(); + void endModule() override; - /// BeginFunction - Gather pre-function exception information. Assumes being + /// beginFunction - Gather pre-function exception information. Assumes being /// emitted immediately after the function entry point. - virtual void BeginFunction(const MachineFunction *MF); + void beginFunction(const MachineFunction *MF) override; - /// EndFunction - Gather and emit post-function exception information. - virtual void EndFunction(); + /// endFunction - Gather and emit post-function exception information. + void endFunction(const MachineFunction *) override; }; } // End of namespace llvm diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp new file mode 100644 index 0000000..737ee54 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp @@ -0,0 +1,156 @@ +//===-- llvm/CodeGen/DwarfFile.cpp - Dwarf Debug Framework ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "DwarfFile.h" + +#include "DwarfDebug.h" +#include "DwarfUnit.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/Support/LEB128.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Target/TargetLoweringObjectFile.h" + +namespace llvm { +DwarfFile::DwarfFile(AsmPrinter *AP, StringRef Pref, BumpPtrAllocator &DA) + : Asm(AP), StrPool(DA, *Asm, Pref) {} + +DwarfFile::~DwarfFile() {} + +// Define a unique number for the abbreviation. +// +void DwarfFile::assignAbbrevNumber(DIEAbbrev &Abbrev) { + // Check the set for priors. + DIEAbbrev *InSet = AbbreviationsSet.GetOrInsertNode(&Abbrev); + + // If it's newly added. + if (InSet == &Abbrev) { + // Add to abbreviation list. + Abbreviations.push_back(&Abbrev); + + // Assign the vector position + 1 as its number. + Abbrev.setNumber(Abbreviations.size()); + } else { + // Assign existing abbreviation number. + Abbrev.setNumber(InSet->getNumber()); + } +} + +void DwarfFile::addUnit(std::unique_ptr<DwarfUnit> U) { + CUs.push_back(std::move(U)); +} + +// Emit the various dwarf units to the unit section USection with +// the abbreviations going into ASection. +void DwarfFile::emitUnits(DwarfDebug *DD, const MCSymbol *ASectionSym) { + for (const auto &TheU : CUs) { + DIE &Die = TheU->getUnitDie(); + const MCSection *USection = TheU->getSection(); + Asm->OutStreamer.SwitchSection(USection); + + // Emit the compile units header. + Asm->OutStreamer.EmitLabel(TheU->getLabelBegin()); + + // Emit size of content not including length itself + Asm->OutStreamer.AddComment("Length of Unit"); + Asm->EmitInt32(TheU->getHeaderSize() + Die.getSize()); + + TheU->emitHeader(ASectionSym); + + DD->emitDIE(Die); + Asm->OutStreamer.EmitLabel(TheU->getLabelEnd()); + } +} +// Compute the size and offset for each DIE. +void DwarfFile::computeSizeAndOffsets() { + // Offset from the first CU in the debug info section is 0 initially. + unsigned SecOffset = 0; + + // Iterate over each compile unit and set the size and offsets for each + // DIE within each compile unit. All offsets are CU relative. + for (const auto &TheU : CUs) { + TheU->setDebugInfoOffset(SecOffset); + + // CU-relative offset is reset to 0 here. + unsigned Offset = sizeof(int32_t) + // Length of Unit Info + TheU->getHeaderSize(); // Unit-specific headers + + // EndOffset here is CU-relative, after laying out + // all of the CU DIE. + unsigned EndOffset = computeSizeAndOffset(TheU->getUnitDie(), Offset); + SecOffset += EndOffset; + } +} +// Compute the size and offset of a DIE. The offset is relative to start of the +// CU. It returns the offset after laying out the DIE. +unsigned DwarfFile::computeSizeAndOffset(DIE &Die, unsigned Offset) { + // Record the abbreviation. + assignAbbrevNumber(Die.getAbbrev()); + + // Get the abbreviation for this DIE. + const DIEAbbrev &Abbrev = Die.getAbbrev(); + + // Set DIE offset + Die.setOffset(Offset); + + // Start the size with the size of abbreviation code. + Offset += getULEB128Size(Die.getAbbrevNumber()); + + const SmallVectorImpl<DIEValue *> &Values = Die.getValues(); + const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData(); + + // Size the DIE attribute values. + for (unsigned i = 0, N = Values.size(); i < N; ++i) + // Size attribute value. + Offset += Values[i]->SizeOf(Asm, AbbrevData[i].getForm()); + + // Get the children. + const auto &Children = Die.getChildren(); + + // Size the DIE children if any. + if (!Children.empty()) { + assert(Abbrev.hasChildren() && "Children flag not set"); + + for (auto &Child : Children) + Offset = computeSizeAndOffset(*Child, Offset); + + // End of children marker. + Offset += sizeof(int8_t); + } + + Die.setSize(Offset - Die.getOffset()); + return Offset; +} +void DwarfFile::emitAbbrevs(const MCSection *Section) { + // Check to see if it is worth the effort. + if (!Abbreviations.empty()) { + // Start the debug abbrev section. + Asm->OutStreamer.SwitchSection(Section); + + // For each abbrevation. + for (const DIEAbbrev *Abbrev : Abbreviations) { + // Emit the abbrevations code (base 1 index.) + Asm->EmitULEB128(Abbrev->getNumber(), "Abbreviation Code"); + + // Emit the abbreviations data. + Abbrev->Emit(Asm); + } + + // Mark end of abbreviations. + Asm->EmitULEB128(0, "EOM(3)"); + } +} + +// Emit strings into a string section. +void DwarfFile::emitStrings(const MCSection *StrSection, + const MCSection *OffsetSection, + const MCSymbol *StrSecSym) { + StrPool.emit(*Asm, StrSection, OffsetSection, StrSecSym); +} +} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h new file mode 100644 index 0000000..3985eb2 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h @@ -0,0 +1,84 @@ +//===-- llvm/CodeGen/DwarfFile.h - Dwarf Debug Framework -------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef CODEGEN_ASMPRINTER_DWARFFILE_H__ +#define CODEGEN_ASMPRINTER_DWARFFILE_H__ + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/Support/Allocator.h" +#include "AddressPool.h" +#include "DwarfStringPool.h" + +#include <vector> +#include <string> +#include <memory> + +namespace llvm { +class AsmPrinter; +class DwarfUnit; +class DIEAbbrev; +class MCSymbol; +class DIE; +class StringRef; +class DwarfDebug; +class MCSection; +class DwarfFile { + // Target of Dwarf emission, used for sizing of abbreviations. + AsmPrinter *Asm; + + // Used to uniquely define abbreviations. + FoldingSet<DIEAbbrev> AbbreviationsSet; + + // A list of all the unique abbreviations in use. + std::vector<DIEAbbrev *> Abbreviations; + + // A pointer to all units in the section. + SmallVector<std::unique_ptr<DwarfUnit>, 1> CUs; + + DwarfStringPool StrPool; + +public: + DwarfFile(AsmPrinter *AP, StringRef Pref, BumpPtrAllocator &DA); + + ~DwarfFile(); + + const SmallVectorImpl<std::unique_ptr<DwarfUnit>> &getUnits() { return CUs; } + + /// \brief Compute the size and offset of a DIE given an incoming Offset. + unsigned computeSizeAndOffset(DIE &Die, unsigned Offset); + + /// \brief Compute the size and offset of all the DIEs. + void computeSizeAndOffsets(); + + /// \brief Define a unique number for the abbreviation. + void assignAbbrevNumber(DIEAbbrev &Abbrev); + + /// \brief Add a unit to the list of CUs. + void addUnit(std::unique_ptr<DwarfUnit> U); + + /// \brief Emit all of the units to the section listed with the given + /// abbreviation section. + void emitUnits(DwarfDebug *DD, const MCSymbol *ASectionSym); + + /// \brief Emit a set of abbreviations to the specific section. + void emitAbbrevs(const MCSection *); + + /// \brief Emit all of the strings to the section given. + void emitStrings(const MCSection *StrSection, + const MCSection *OffsetSection = nullptr, + const MCSymbol *StrSecSym = nullptr); + + /// \brief Returns the string pool. + DwarfStringPool &getStringPool() { return StrPool; } +}; +} +#endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp new file mode 100644 index 0000000..72cab60 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp @@ -0,0 +1,74 @@ +//===-- llvm/CodeGen/DwarfStringPool.cpp - Dwarf Debug Framework ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "DwarfStringPool.h" +#include "llvm/MC/MCStreamer.h" + +using namespace llvm; + +MCSymbol *DwarfStringPool::getSectionSymbol() { return SectionSymbol; } + +static std::pair<MCSymbol *, unsigned> & +getEntry(AsmPrinter &Asm, + StringMap<std::pair<MCSymbol *, unsigned>, BumpPtrAllocator &> &Pool, + StringRef Prefix, StringRef Str) { + std::pair<MCSymbol *, unsigned> &Entry = + Pool.GetOrCreateValue(Str).getValue(); + if (!Entry.first) { + Entry.second = Pool.size() - 1; + Entry.first = Asm.GetTempSymbol(Prefix, Entry.second); + } + return Entry; +} + +MCSymbol *DwarfStringPool::getSymbol(AsmPrinter &Asm, StringRef Str) { + return getEntry(Asm, Pool, Prefix, Str).first; +} + +unsigned DwarfStringPool::getIndex(AsmPrinter &Asm, StringRef Str) { + return getEntry(Asm, Pool, Prefix, Str).second; +} + +void DwarfStringPool::emit(AsmPrinter &Asm, const MCSection *StrSection, + const MCSection *OffsetSection, + const MCSymbol *StrSecSym) { + if (Pool.empty()) + return; + + // Start the dwarf str section. + Asm.OutStreamer.SwitchSection(StrSection); + + // Get all of the string pool entries and put them in an array by their ID so + // we can sort them. + SmallVector<const StringMapEntry<std::pair<MCSymbol *, unsigned>> *, 64> + Entries(Pool.size()); + + for (const auto &E : Pool) + Entries[E.getValue().second] = &E; + + for (const auto &Entry : Entries) { + // Emit a label for reference from debug information entries. + Asm.OutStreamer.EmitLabel(Entry->getValue().first); + + // Emit the string itself with a terminating null byte. + Asm.OutStreamer.EmitBytes( + StringRef(Entry->getKeyData(), Entry->getKeyLength() + 1)); + } + + // If we've got an offset section go ahead and emit that now as well. + if (OffsetSection) { + Asm.OutStreamer.SwitchSection(OffsetSection); + unsigned offset = 0; + unsigned size = 4; // FIXME: DWARF64 is 8. + for (const auto &Entry : Entries) { + Asm.OutStreamer.EmitIntValue(offset, size); + offset += Entry->getKeyLength() + 1; + } + } +} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h new file mode 100644 index 0000000..c1615fb --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h @@ -0,0 +1,55 @@ +//===-- llvm/CodeGen/DwarfStringPool.h - Dwarf Debug Framework -*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef CODEGEN_ASMPRINTER_STRINGPOOL_H__ +#define CODEGEN_ASMPRINTER_STRINGPOOL_H__ + +#include "llvm/ADT/StringMap.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/Support/Allocator.h" + +#include <utility> + +namespace llvm { + +class MCSymbol; +class MCSection; +class StringRef; + +// Collection of strings for this unit and assorted symbols. +// A String->Symbol mapping of strings used by indirect +// references. +class DwarfStringPool { + StringMap<std::pair<MCSymbol *, unsigned>, BumpPtrAllocator &> Pool; + StringRef Prefix; + MCSymbol *SectionSymbol; + +public: + DwarfStringPool(BumpPtrAllocator &A, AsmPrinter &Asm, StringRef Prefix) + : Pool(A), Prefix(Prefix), SectionSymbol(Asm.GetTempSymbol(Prefix)) {} + + void emit(AsmPrinter &Asm, const MCSection *StrSection, + const MCSection *OffsetSection = nullptr, + const MCSymbol *StrSecSym = nullptr); + + /// \brief Returns the entry into the start of the pool. + MCSymbol *getSectionSymbol(); + + /// \brief Returns an entry into the string pool with the given + /// string text. + MCSymbol *getSymbol(AsmPrinter &Asm, StringRef Str); + + /// \brief Returns the index into the string pool with the given + /// string text. + unsigned getIndex(AsmPrinter &Asm, StringRef Str); + + bool empty() const { return Pool.empty(); } +}; +} +#endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 50f5cc9..9538bee 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -1,4 +1,4 @@ -//===-- llvm/CodeGen/DwarfCompileUnit.cpp - Dwarf Compile Unit ------------===// +//===-- llvm/CodeGen/DwarfUnit.cpp - Dwarf Type and Compile Units ---------===// // // The LLVM Compiler Infrastructure // @@ -11,52 +11,80 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "dwarfdebug" - -#include "DwarfCompileUnit.h" +#include "DwarfUnit.h" #include "DwarfAccelTable.h" #include "DwarfDebug.h" #include "llvm/ADT/APFloat.h" -#include "llvm/DIBuilder.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DIBuilder.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/Mangler.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" -#include "llvm/Target/Mangler.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; -/// CompileUnit - Compile unit constructor. -CompileUnit::CompileUnit(unsigned UID, DIE *D, DICompileUnit Node, - AsmPrinter *A, DwarfDebug *DW, DwarfUnits *DWU) - : UniqueID(UID), Node(Node), CUDie(D), Asm(A), DD(DW), DU(DWU), - IndexTyDie(0), DebugInfoOffset(0) { +#define DEBUG_TYPE "dwarfdebug" + +static cl::opt<bool> +GenerateDwarfTypeUnits("generate-type-units", cl::Hidden, + cl::desc("Generate DWARF4 type units."), + cl::init(false)); + +/// Unit - Unit constructor. +DwarfUnit::DwarfUnit(unsigned UID, dwarf::Tag UnitTag, DICompileUnit Node, + AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU) + : UniqueID(UID), CUNode(Node), UnitDie(UnitTag), DebugInfoOffset(0), Asm(A), + DD(DW), DU(DWU), IndexTyDie(nullptr), Section(nullptr), + Skeleton(nullptr) { + assert(UnitTag == dwarf::DW_TAG_compile_unit || + UnitTag == dwarf::DW_TAG_type_unit); DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1); - insertDIE(Node, D); } -/// ~CompileUnit - Destructor for compile unit. -CompileUnit::~CompileUnit() { +DwarfCompileUnit::DwarfCompileUnit(unsigned UID, DICompileUnit Node, + AsmPrinter *A, DwarfDebug *DW, + DwarfFile *DWU) + : DwarfUnit(UID, dwarf::DW_TAG_compile_unit, Node, A, DW, DWU) { + insertDIE(Node, &getUnitDie()); +} + +DwarfTypeUnit::DwarfTypeUnit(unsigned UID, DwarfCompileUnit &CU, AsmPrinter *A, + DwarfDebug *DW, DwarfFile *DWU, + MCDwarfDwoLineTable *SplitLineTable) + : DwarfUnit(UID, dwarf::DW_TAG_type_unit, CU.getCUNode(), A, DW, DWU), + CU(CU), SplitLineTable(SplitLineTable) { + if (SplitLineTable) + addSectionOffset(UnitDie, dwarf::DW_AT_stmt_list, 0); +} + +/// ~Unit - Destructor for compile unit. +DwarfUnit::~DwarfUnit() { for (unsigned j = 0, M = DIEBlocks.size(); j < M; ++j) DIEBlocks[j]->~DIEBlock(); + for (unsigned j = 0, M = DIELocs.size(); j < M; ++j) + DIELocs[j]->~DIELoc(); } /// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug /// information entry. -DIEEntry *CompileUnit::createDIEEntry(DIE *Entry) { +DIEEntry *DwarfUnit::createDIEEntry(DIE &Entry) { DIEEntry *Value = new (DIEValueAllocator) DIEEntry(Entry); return Value; } /// getDefaultLowerBound - Return the default lower bound for an array. If the /// DWARF version doesn't handle the language, return -1. -int64_t CompileUnit::getDefaultLowerBound() const { +int64_t DwarfUnit::getDefaultLowerBound() const { switch (getLanguage()) { default: break; @@ -100,17 +128,23 @@ int64_t CompileUnit::getDefaultLowerBound() const { /// Check whether the DIE for this MDNode can be shared across CUs. static bool isShareableAcrossCUs(DIDescriptor D) { - // When the MDNode can be part of the type system, the DIE can be - // shared across CUs. - return D.isType() || - (D.isSubprogram() && !DISubprogram(D).isDefinition()); + // When the MDNode can be part of the type system, the DIE can be shared + // across CUs. + // Combining type units and cross-CU DIE sharing is lower value (since + // cross-CU DIE sharing is used in LTO and removes type redundancy at that + // level already) but may be implementable for some value in projects + // building multiple independent libraries with LTO and then linking those + // together. + return (D.isType() || + (D.isSubprogram() && !DISubprogram(D).isDefinition())) && + !GenerateDwarfTypeUnits; } /// getDIE - Returns the debug information entry map slot for the /// specified debug variable. We delegate the request to DwarfDebug /// when the DIE for this MDNode can be shared across CUs. The mappings /// will be kept in DwarfDebug for shareable DIEs. -DIE *CompileUnit::getDIE(DIDescriptor D) const { +DIE *DwarfUnit::getDIE(DIDescriptor D) const { if (isShareableAcrossCUs(D)) return DD->getDIE(D); return MDNodeToDieMap.lookup(D); @@ -119,7 +153,7 @@ DIE *CompileUnit::getDIE(DIDescriptor D) const { /// insertDIE - Insert DIE into the map. We delegate the request to DwarfDebug /// when the DIE for this MDNode can be shared across CUs. The mappings /// will be kept in DwarfDebug for shareable DIEs. -void CompileUnit::insertDIE(DIDescriptor Desc, DIE *D) { +void DwarfUnit::insertDIE(DIDescriptor Desc, DIE *D) { if (isShareableAcrossCUs(Desc)) { DD->insertDIE(Desc, D); return; @@ -128,40 +162,40 @@ void CompileUnit::insertDIE(DIDescriptor Desc, DIE *D) { } /// addFlag - Add a flag that is true. -void CompileUnit::addFlag(DIE *Die, dwarf::Attribute Attribute) { +void DwarfUnit::addFlag(DIE &Die, dwarf::Attribute Attribute) { if (DD->getDwarfVersion() >= 4) - Die->addValue(Attribute, dwarf::DW_FORM_flag_present, DIEIntegerOne); + Die.addValue(Attribute, dwarf::DW_FORM_flag_present, DIEIntegerOne); else - Die->addValue(Attribute, dwarf::DW_FORM_flag, DIEIntegerOne); + Die.addValue(Attribute, dwarf::DW_FORM_flag, DIEIntegerOne); } /// addUInt - Add an unsigned integer attribute data and value. /// -void CompileUnit::addUInt(DIE *Die, dwarf::Attribute Attribute, - Optional<dwarf::Form> Form, uint64_t Integer) { +void DwarfUnit::addUInt(DIE &Die, dwarf::Attribute Attribute, + Optional<dwarf::Form> Form, uint64_t Integer) { if (!Form) Form = DIEInteger::BestForm(false, Integer); DIEValue *Value = Integer == 1 ? DIEIntegerOne : new (DIEValueAllocator) DIEInteger(Integer); - Die->addValue(Attribute, *Form, Value); + Die.addValue(Attribute, *Form, Value); } -void CompileUnit::addUInt(DIEBlock *Block, dwarf::Form Form, uint64_t Integer) { +void DwarfUnit::addUInt(DIE &Block, dwarf::Form Form, uint64_t Integer) { addUInt(Block, (dwarf::Attribute)0, Form, Integer); } /// addSInt - Add an signed integer attribute data and value. /// -void CompileUnit::addSInt(DIE *Die, dwarf::Attribute Attribute, - Optional<dwarf::Form> Form, int64_t Integer) { +void DwarfUnit::addSInt(DIE &Die, dwarf::Attribute Attribute, + Optional<dwarf::Form> Form, int64_t Integer) { if (!Form) Form = DIEInteger::BestForm(true, Integer); DIEValue *Value = new (DIEValueAllocator) DIEInteger(Integer); - Die->addValue(Attribute, *Form, Value); + Die.addValue(Attribute, *Form, Value); } -void CompileUnit::addSInt(DIEBlock *Die, Optional<dwarf::Form> Form, - int64_t Integer) { +void DwarfUnit::addSInt(DIELoc &Die, Optional<dwarf::Form> Form, + int64_t Integer) { addSInt(Die, (dwarf::Attribute)0, Form, Integer); } @@ -170,67 +204,67 @@ void CompileUnit::addSInt(DIEBlock *Die, Optional<dwarf::Form> Form, /// more predictable sizes. In the case of split dwarf we emit an index /// into another table which gets us the static offset into the string /// table. -void CompileUnit::addString(DIE *Die, dwarf::Attribute Attribute, - StringRef String) { - DIEValue *Value; - dwarf::Form Form; - if (!DD->useSplitDwarf()) { - MCSymbol *Symb = DU->getStringPoolEntry(String); - if (Asm->needsRelocationsForDwarfStringPool()) - Value = new (DIEValueAllocator) DIELabel(Symb); - else { - MCSymbol *StringPool = DU->getStringPoolSym(); - Value = new (DIEValueAllocator) DIEDelta(Symb, StringPool); - } - Form = dwarf::DW_FORM_strp; - } else { - unsigned idx = DU->getStringPoolIndex(String); - Value = new (DIEValueAllocator) DIEInteger(idx); - Form = dwarf::DW_FORM_GNU_str_index; - } +void DwarfUnit::addString(DIE &Die, dwarf::Attribute Attribute, + StringRef String) { + + if (!DD->useSplitDwarf()) + return addLocalString(Die, Attribute, String); + + unsigned idx = DU->getStringPool().getIndex(*Asm, String); + DIEValue *Value = new (DIEValueAllocator) DIEInteger(idx); DIEValue *Str = new (DIEValueAllocator) DIEString(Value, String); - Die->addValue(Attribute, Form, Str); + Die.addValue(Attribute, dwarf::DW_FORM_GNU_str_index, Str); } /// addLocalString - Add a string attribute data and value. This is guaranteed /// to be in the local string pool instead of indirected. -void CompileUnit::addLocalString(DIE *Die, dwarf::Attribute Attribute, - StringRef String) { - MCSymbol *Symb = DU->getStringPoolEntry(String); +void DwarfUnit::addLocalString(DIE &Die, dwarf::Attribute Attribute, + StringRef String) { + MCSymbol *Symb = DU->getStringPool().getSymbol(*Asm, String); DIEValue *Value; - if (Asm->needsRelocationsForDwarfStringPool()) + if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) Value = new (DIEValueAllocator) DIELabel(Symb); else { - MCSymbol *StringPool = DU->getStringPoolSym(); + MCSymbol *StringPool = DU->getStringPool().getSectionSymbol(); Value = new (DIEValueAllocator) DIEDelta(Symb, StringPool); } - Die->addValue(Attribute, dwarf::DW_FORM_strp, Value); + DIEValue *Str = new (DIEValueAllocator) DIEString(Value, String); + Die.addValue(Attribute, dwarf::DW_FORM_strp, Str); } /// addExpr - Add a Dwarf expression attribute data and value. /// -void CompileUnit::addExpr(DIEBlock *Die, dwarf::Form Form, const MCExpr *Expr) { +void DwarfUnit::addExpr(DIELoc &Die, dwarf::Form Form, const MCExpr *Expr) { DIEValue *Value = new (DIEValueAllocator) DIEExpr(Expr); - Die->addValue((dwarf::Attribute)0, Form, Value); + Die.addValue((dwarf::Attribute)0, Form, Value); +} + +/// addLocationList - Add a Dwarf loclistptr attribute data and value. +/// +void DwarfUnit::addLocationList(DIE &Die, dwarf::Attribute Attribute, + unsigned Index) { + DIEValue *Value = new (DIEValueAllocator) DIELocList(Index); + dwarf::Form Form = DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset + : dwarf::DW_FORM_data4; + Die.addValue(Attribute, Form, Value); } /// addLabel - Add a Dwarf label attribute data and value. /// -void CompileUnit::addLabel(DIE *Die, dwarf::Attribute Attribute, - dwarf::Form Form, const MCSymbol *Label) { +void DwarfUnit::addLabel(DIE &Die, dwarf::Attribute Attribute, dwarf::Form Form, + const MCSymbol *Label) { DIEValue *Value = new (DIEValueAllocator) DIELabel(Label); - Die->addValue(Attribute, Form, Value); + Die.addValue(Attribute, Form, Value); } -void CompileUnit::addLabel(DIEBlock *Die, dwarf::Form Form, - const MCSymbol *Label) { +void DwarfUnit::addLabel(DIELoc &Die, dwarf::Form Form, const MCSymbol *Label) { addLabel(Die, (dwarf::Attribute)0, Form, Label); } /// addSectionLabel - Add a Dwarf section label attribute data and value. /// -void CompileUnit::addSectionLabel(DIE *Die, dwarf::Attribute Attribute, - const MCSymbol *Label) { +void DwarfUnit::addSectionLabel(DIE &Die, dwarf::Attribute Attribute, + const MCSymbol *Label) { if (DD->getDwarfVersion() >= 4) addLabel(Die, Attribute, dwarf::DW_FORM_sec_offset, Label); else @@ -239,8 +273,8 @@ void CompileUnit::addSectionLabel(DIE *Die, dwarf::Attribute Attribute, /// addSectionOffset - Add an offset into a section attribute data and value. /// -void CompileUnit::addSectionOffset(DIE *Die, dwarf::Attribute Attribute, - uint64_t Integer) { +void DwarfUnit::addSectionOffset(DIE &Die, dwarf::Attribute Attribute, + uint64_t Integer) { if (DD->getDwarfVersion() >= 4) addUInt(Die, Attribute, dwarf::DW_FORM_sec_offset, Integer); else @@ -250,104 +284,143 @@ void CompileUnit::addSectionOffset(DIE *Die, dwarf::Attribute Attribute, /// addLabelAddress - Add a dwarf label attribute data and value using /// DW_FORM_addr or DW_FORM_GNU_addr_index. /// -void CompileUnit::addLabelAddress(DIE *Die, dwarf::Attribute Attribute, - MCSymbol *Label) { +void DwarfCompileUnit::addLabelAddress(DIE &Die, dwarf::Attribute Attribute, + const MCSymbol *Label) { + + if (!DD->useSplitDwarf()) + return addLocalLabelAddress(Die, Attribute, Label); + if (Label) DD->addArangeLabel(SymbolCU(this, Label)); - if (!DD->useSplitDwarf()) { - if (Label != NULL) { - DIEValue *Value = new (DIEValueAllocator) DIELabel(Label); - Die->addValue(Attribute, dwarf::DW_FORM_addr, Value); - } else { - DIEValue *Value = new (DIEValueAllocator) DIEInteger(0); - Die->addValue(Attribute, dwarf::DW_FORM_addr, Value); - } - } else { - unsigned idx = DU->getAddrPoolIndex(Label); - DIEValue *Value = new (DIEValueAllocator) DIEInteger(idx); - Die->addValue(Attribute, dwarf::DW_FORM_GNU_addr_index, Value); - } + unsigned idx = DD->getAddressPool().getIndex(Label); + DIEValue *Value = new (DIEValueAllocator) DIEInteger(idx); + Die.addValue(Attribute, dwarf::DW_FORM_GNU_addr_index, Value); +} + +void DwarfCompileUnit::addLocalLabelAddress(DIE &Die, + dwarf::Attribute Attribute, + const MCSymbol *Label) { + if (Label) + DD->addArangeLabel(SymbolCU(this, Label)); + + Die.addValue(Attribute, dwarf::DW_FORM_addr, + Label ? (DIEValue *)new (DIEValueAllocator) DIELabel(Label) + : new (DIEValueAllocator) DIEInteger(0)); +} + +unsigned DwarfCompileUnit::getOrCreateSourceID(StringRef FileName, StringRef DirName) { + // If we print assembly, we can't separate .file entries according to + // compile units. Thus all files will belong to the default compile unit. + + // FIXME: add a better feature test than hasRawTextSupport. Even better, + // extend .file to support this. + return Asm->OutStreamer.EmitDwarfFileDirective( + 0, DirName, FileName, + Asm->OutStreamer.hasRawTextSupport() ? 0 : getUniqueID()); +} + +unsigned DwarfTypeUnit::getOrCreateSourceID(StringRef FileName, StringRef DirName) { + return SplitLineTable ? SplitLineTable->getFile(DirName, FileName) + : getCU().getOrCreateSourceID(FileName, DirName); } /// addOpAddress - Add a dwarf op address data and value using the /// form given and an op of either DW_FORM_addr or DW_FORM_GNU_addr_index. /// -void CompileUnit::addOpAddress(DIEBlock *Die, const MCSymbol *Sym) { - DD->addArangeLabel(SymbolCU(this, Sym)); +void DwarfUnit::addOpAddress(DIELoc &Die, const MCSymbol *Sym) { if (!DD->useSplitDwarf()) { addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); addLabel(Die, dwarf::DW_FORM_udata, Sym); } else { addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_addr_index); - addUInt(Die, dwarf::DW_FORM_GNU_addr_index, DU->getAddrPoolIndex(Sym)); + addUInt(Die, dwarf::DW_FORM_GNU_addr_index, + DD->getAddressPool().getIndex(Sym)); } } /// addSectionDelta - Add a section label delta attribute data and value. /// -void CompileUnit::addSectionDelta(DIE *Die, dwarf::Attribute Attribute, - const MCSymbol *Hi, const MCSymbol *Lo) { +void DwarfUnit::addSectionDelta(DIE &Die, dwarf::Attribute Attribute, + const MCSymbol *Hi, const MCSymbol *Lo) { DIEValue *Value = new (DIEValueAllocator) DIEDelta(Hi, Lo); - if (DD->getDwarfVersion() >= 4) - Die->addValue(Attribute, dwarf::DW_FORM_sec_offset, Value); - else - Die->addValue(Attribute, dwarf::DW_FORM_data4, Value); + Die.addValue(Attribute, DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset + : dwarf::DW_FORM_data4, + Value); +} + +void DwarfUnit::addLabelDelta(DIE &Die, dwarf::Attribute Attribute, + const MCSymbol *Hi, const MCSymbol *Lo) { + DIEValue *Value = new (DIEValueAllocator) DIEDelta(Hi, Lo); + Die.addValue(Attribute, dwarf::DW_FORM_data4, Value); } /// addDIEEntry - Add a DIE attribute data and value. /// -void CompileUnit::addDIEEntry(DIE *Die, dwarf::Attribute Attribute, - DIE *Entry) { +void DwarfUnit::addDIEEntry(DIE &Die, dwarf::Attribute Attribute, DIE &Entry) { addDIEEntry(Die, Attribute, createDIEEntry(Entry)); } -void CompileUnit::addDIEEntry(DIE *Die, dwarf::Attribute Attribute, - DIEEntry *Entry) { - const DIE *DieCU = Die->getCompileUnitOrNull(); - const DIE *EntryCU = Entry->getEntry()->getCompileUnitOrNull(); +void DwarfUnit::addDIETypeSignature(DIE &Die, const DwarfTypeUnit &Type) { + // Flag the type unit reference as a declaration so that if it contains + // members (implicit special members, static data member definitions, member + // declarations for definitions in this CU, etc) consumers don't get confused + // and think this is a full definition. + addFlag(Die, dwarf::DW_AT_declaration); + + Die.addValue(dwarf::DW_AT_signature, dwarf::DW_FORM_ref_sig8, + new (DIEValueAllocator) DIETypeSignature(Type)); +} + +void DwarfUnit::addDIEEntry(DIE &Die, dwarf::Attribute Attribute, + DIEEntry *Entry) { + const DIE *DieCU = Die.getUnitOrNull(); + const DIE *EntryCU = Entry->getEntry().getUnitOrNull(); if (!DieCU) // We assume that Die belongs to this CU, if it is not linked to any CU yet. - DieCU = getCUDie(); + DieCU = &getUnitDie(); if (!EntryCU) - EntryCU = getCUDie(); - Die->addValue(Attribute, EntryCU == DieCU ? dwarf::DW_FORM_ref4 - : dwarf::DW_FORM_ref_addr, - Entry); + EntryCU = &getUnitDie(); + Die.addValue(Attribute, + EntryCU == DieCU ? dwarf::DW_FORM_ref4 : dwarf::DW_FORM_ref_addr, + Entry); } /// Create a DIE with the given Tag, add the DIE to its parent, and /// call insertDIE if MD is not null. -DIE *CompileUnit::createAndAddDIE(unsigned Tag, DIE &Parent, DIDescriptor N) { - DIE *Die = new DIE(Tag); - Parent.addChild(Die); +DIE &DwarfUnit::createAndAddDIE(unsigned Tag, DIE &Parent, DIDescriptor N) { + assert(Tag != dwarf::DW_TAG_auto_variable && + Tag != dwarf::DW_TAG_arg_variable); + Parent.addChild(make_unique<DIE>((dwarf::Tag)Tag)); + DIE &Die = *Parent.getChildren().back(); if (N) - insertDIE(N, Die); + insertDIE(N, &Die); return Die; } /// addBlock - Add block data. /// -void CompileUnit::addBlock(DIE *Die, dwarf::Attribute Attribute, - DIEBlock *Block) { +void DwarfUnit::addBlock(DIE &Die, dwarf::Attribute Attribute, DIELoc *Loc) { + Loc->ComputeSize(Asm); + DIELocs.push_back(Loc); // Memoize so we can call the destructor later on. + Die.addValue(Attribute, Loc->BestForm(DD->getDwarfVersion()), Loc); +} + +void DwarfUnit::addBlock(DIE &Die, dwarf::Attribute Attribute, + DIEBlock *Block) { Block->ComputeSize(Asm); DIEBlocks.push_back(Block); // Memoize so we can call the destructor later on. - Die->addValue(Attribute, Block->BestForm(), Block); + Die.addValue(Attribute, Block->BestForm(), Block); } /// addSourceLine - Add location information to specified debug information /// entry. -void CompileUnit::addSourceLine(DIE *Die, DIVariable V) { - // Verify variable. - if (!V.isVariable()) - return; - - unsigned Line = V.getLineNumber(); +void DwarfUnit::addSourceLine(DIE &Die, unsigned Line, StringRef File, + StringRef Directory) { if (Line == 0) return; - unsigned FileID = - DD->getOrCreateSourceID(V.getContext().getFilename(), - V.getContext().getDirectory(), getUniqueID()); + + unsigned FileID = getOrCreateSourceID(File, Directory); assert(FileID && "Invalid file id"); addUInt(Die, dwarf::DW_AT_decl_file, None, FileID); addUInt(Die, dwarf::DW_AT_decl_line, None, Line); @@ -355,98 +428,59 @@ void CompileUnit::addSourceLine(DIE *Die, DIVariable V) { /// addSourceLine - Add location information to specified debug information /// entry. -void CompileUnit::addSourceLine(DIE *Die, DIGlobalVariable G) { - // Verify global variable. - if (!G.isGlobalVariable()) - return; +void DwarfUnit::addSourceLine(DIE &Die, DIVariable V) { + assert(V.isVariable()); - unsigned Line = G.getLineNumber(); - if (Line == 0) - return; - unsigned FileID = - DD->getOrCreateSourceID(G.getFilename(), G.getDirectory(), getUniqueID()); - assert(FileID && "Invalid file id"); - addUInt(Die, dwarf::DW_AT_decl_file, None, FileID); - addUInt(Die, dwarf::DW_AT_decl_line, None, Line); + addSourceLine(Die, V.getLineNumber(), V.getContext().getFilename(), + V.getContext().getDirectory()); } /// addSourceLine - Add location information to specified debug information /// entry. -void CompileUnit::addSourceLine(DIE *Die, DISubprogram SP) { - // Verify subprogram. - if (!SP.isSubprogram()) - return; +void DwarfUnit::addSourceLine(DIE &Die, DIGlobalVariable G) { + assert(G.isGlobalVariable()); - // If the line number is 0, don't add it. - unsigned Line = SP.getLineNumber(); - if (Line == 0) - return; + addSourceLine(Die, G.getLineNumber(), G.getFilename(), G.getDirectory()); +} - unsigned FileID = DD->getOrCreateSourceID(SP.getFilename(), SP.getDirectory(), - getUniqueID()); - assert(FileID && "Invalid file id"); - addUInt(Die, dwarf::DW_AT_decl_file, None, FileID); - addUInt(Die, dwarf::DW_AT_decl_line, None, Line); +/// addSourceLine - Add location information to specified debug information +/// entry. +void DwarfUnit::addSourceLine(DIE &Die, DISubprogram SP) { + assert(SP.isSubprogram()); + + addSourceLine(Die, SP.getLineNumber(), SP.getFilename(), SP.getDirectory()); } /// addSourceLine - Add location information to specified debug information /// entry. -void CompileUnit::addSourceLine(DIE *Die, DIType Ty) { - // Verify type. - if (!Ty.isType()) - return; +void DwarfUnit::addSourceLine(DIE &Die, DIType Ty) { + assert(Ty.isType()); - unsigned Line = Ty.getLineNumber(); - if (Line == 0) - return; - unsigned FileID = DD->getOrCreateSourceID(Ty.getFilename(), Ty.getDirectory(), - getUniqueID()); - assert(FileID && "Invalid file id"); - addUInt(Die, dwarf::DW_AT_decl_file, None, FileID); - addUInt(Die, dwarf::DW_AT_decl_line, None, Line); + addSourceLine(Die, Ty.getLineNumber(), Ty.getFilename(), Ty.getDirectory()); } /// addSourceLine - Add location information to specified debug information /// entry. -void CompileUnit::addSourceLine(DIE *Die, DIObjCProperty Ty) { - // Verify type. - if (!Ty.isObjCProperty()) - return; +void DwarfUnit::addSourceLine(DIE &Die, DIObjCProperty Ty) { + assert(Ty.isObjCProperty()); - unsigned Line = Ty.getLineNumber(); - if (Line == 0) - return; DIFile File = Ty.getFile(); - unsigned FileID = DD->getOrCreateSourceID(File.getFilename(), - File.getDirectory(), getUniqueID()); - assert(FileID && "Invalid file id"); - addUInt(Die, dwarf::DW_AT_decl_file, None, FileID); - addUInt(Die, dwarf::DW_AT_decl_line, None, Line); + addSourceLine(Die, Ty.getLineNumber(), File.getFilename(), + File.getDirectory()); } /// addSourceLine - Add location information to specified debug information /// entry. -void CompileUnit::addSourceLine(DIE *Die, DINameSpace NS) { - // Verify namespace. - if (!NS.Verify()) - return; +void DwarfUnit::addSourceLine(DIE &Die, DINameSpace NS) { + assert(NS.Verify()); - unsigned Line = NS.getLineNumber(); - if (Line == 0) - return; - StringRef FN = NS.getFilename(); - - unsigned FileID = - DD->getOrCreateSourceID(FN, NS.getDirectory(), getUniqueID()); - assert(FileID && "Invalid file id"); - addUInt(Die, dwarf::DW_AT_decl_file, None, FileID); - addUInt(Die, dwarf::DW_AT_decl_line, None, Line); + addSourceLine(Die, NS.getLineNumber(), NS.getFilename(), NS.getDirectory()); } /// addVariableAddress - Add DW_AT_location attribute for a /// DbgVariable based on provided MachineLocation. -void CompileUnit::addVariableAddress(const DbgVariable &DV, DIE *Die, - MachineLocation Location) { +void DwarfUnit::addVariableAddress(const DbgVariable &DV, DIE &Die, + MachineLocation Location) { if (DV.variableHasComplexAddress()) addComplexAddress(DV, Die, dwarf::DW_AT_location, Location); else if (DV.isBlockByrefVariable()) @@ -457,20 +491,53 @@ void CompileUnit::addVariableAddress(const DbgVariable &DV, DIE *Die, } /// addRegisterOp - Add register operand. -void CompileUnit::addRegisterOp(DIEBlock *TheDie, unsigned Reg) { +void DwarfUnit::addRegisterOp(DIELoc &TheDie, unsigned Reg) { const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); - unsigned DWReg = RI->getDwarfRegNum(Reg, false); + int DWReg = RI->getDwarfRegNum(Reg, false); + bool isSubRegister = DWReg < 0; + + unsigned Idx = 0; + + // Go up the super-register chain until we hit a valid dwarf register number. + for (MCSuperRegIterator SR(Reg, RI); SR.isValid() && DWReg < 0; ++SR) { + DWReg = RI->getDwarfRegNum(*SR, false); + if (DWReg >= 0) + Idx = RI->getSubRegIndex(*SR, Reg); + } + + if (DWReg < 0) { + DEBUG(dbgs() << "Invalid Dwarf register number.\n"); + addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_nop); + return; + } + + // Emit register if (DWReg < 32) addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + DWReg); else { addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_regx); addUInt(TheDie, dwarf::DW_FORM_udata, DWReg); } + + // Emit Mask + if (isSubRegister) { + unsigned Size = RI->getSubRegIdxSize(Idx); + unsigned Offset = RI->getSubRegIdxOffset(Idx); + if (Offset > 0) { + addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_bit_piece); + addUInt(TheDie, dwarf::DW_FORM_data1, Size); + addUInt(TheDie, dwarf::DW_FORM_data1, Offset); + } else { + unsigned ByteSize = Size / 8; // Assuming 8 bits per byte. + addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_piece); + addUInt(TheDie, dwarf::DW_FORM_data1, ByteSize); + } + } } /// addRegisterOffset - Add register offset. -void CompileUnit::addRegisterOffset(DIEBlock *TheDie, unsigned Reg, - int64_t Offset) { +void DwarfUnit::addRegisterOffset(DIELoc &TheDie, unsigned Reg, + int64_t Offset) { const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); unsigned DWReg = RI->getDwarfRegNum(Reg, false); const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo(); @@ -488,59 +555,59 @@ void CompileUnit::addRegisterOffset(DIEBlock *TheDie, unsigned Reg, /// addAddress - Add an address attribute to a die based on the location /// provided. -void CompileUnit::addAddress(DIE *Die, dwarf::Attribute Attribute, - const MachineLocation &Location, bool Indirect) { - DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); +void DwarfUnit::addAddress(DIE &Die, dwarf::Attribute Attribute, + const MachineLocation &Location, bool Indirect) { + DIELoc *Loc = new (DIEValueAllocator) DIELoc(); if (Location.isReg() && !Indirect) - addRegisterOp(Block, Location.getReg()); + addRegisterOp(*Loc, Location.getReg()); else { - addRegisterOffset(Block, Location.getReg(), Location.getOffset()); + addRegisterOffset(*Loc, Location.getReg(), Location.getOffset()); if (Indirect && !Location.isReg()) { - addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); } } // Now attach the location information to the DIE. - addBlock(Die, Attribute, Block); + addBlock(Die, Attribute, Loc); } /// addComplexAddress - Start with the address based on the location provided, /// and generate the DWARF information necessary to find the actual variable -/// given the extra address information encoded in the DIVariable, starting from -/// the starting location. Add the DWARF information to the die. +/// given the extra address information encoded in the DbgVariable, starting +/// from the starting location. Add the DWARF information to the die. /// -void CompileUnit::addComplexAddress(const DbgVariable &DV, DIE *Die, - dwarf::Attribute Attribute, - const MachineLocation &Location) { - DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); +void DwarfUnit::addComplexAddress(const DbgVariable &DV, DIE &Die, + dwarf::Attribute Attribute, + const MachineLocation &Location) { + DIELoc *Loc = new (DIEValueAllocator) DIELoc(); unsigned N = DV.getNumAddrElements(); unsigned i = 0; if (Location.isReg()) { if (N >= 2 && DV.getAddrElement(0) == DIBuilder::OpPlus) { // If first address element is OpPlus then emit // DW_OP_breg + Offset instead of DW_OP_reg + Offset. - addRegisterOffset(Block, Location.getReg(), DV.getAddrElement(1)); + addRegisterOffset(*Loc, Location.getReg(), DV.getAddrElement(1)); i = 2; } else - addRegisterOp(Block, Location.getReg()); + addRegisterOp(*Loc, Location.getReg()); } else - addRegisterOffset(Block, Location.getReg(), Location.getOffset()); + addRegisterOffset(*Loc, Location.getReg(), Location.getOffset()); for (; i < N; ++i) { uint64_t Element = DV.getAddrElement(i); if (Element == DIBuilder::OpPlus) { - addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); - addUInt(Block, dwarf::DW_FORM_udata, DV.getAddrElement(++i)); + addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + addUInt(*Loc, dwarf::DW_FORM_udata, DV.getAddrElement(++i)); } else if (Element == DIBuilder::OpDeref) { if (!Location.isReg()) - addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); } else llvm_unreachable("unknown DIBuilder Opcode"); } // Now attach the location information to the DIE. - addBlock(Die, Attribute, Block); + addBlock(Die, Attribute, Loc); } /* Byref variables, in Blocks, are declared by the programmer as "SomeType @@ -603,9 +670,9 @@ void CompileUnit::addComplexAddress(const DbgVariable &DV, DIE *Die, /// starting location. Add the DWARF information to the die. For /// more information, read large comment just above here. /// -void CompileUnit::addBlockByrefAddress(const DbgVariable &DV, DIE *Die, - dwarf::Attribute Attribute, - const MachineLocation &Location) { +void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die, + dwarf::Attribute Attribute, + const MachineLocation &Location) { DIType Ty = DV.getType(); DIType TmpTy = Ty; uint16_t Tag = Ty.getTag(); @@ -642,71 +709,81 @@ void CompileUnit::addBlockByrefAddress(const DbgVariable &DV, DIE *Die, // Decode the original location, and use that as the start of the byref // variable's location. - DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + DIELoc *Loc = new (DIEValueAllocator) DIELoc(); if (Location.isReg()) - addRegisterOp(Block, Location.getReg()); + addRegisterOp(*Loc, Location.getReg()); else - addRegisterOffset(Block, Location.getReg(), Location.getOffset()); + addRegisterOffset(*Loc, Location.getReg(), Location.getOffset()); // If we started with a pointer to the __Block_byref... struct, then // the first thing we need to do is dereference the pointer (DW_OP_deref). if (isPointer) - addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); // Next add the offset for the '__forwarding' field: // DW_OP_plus_uconst ForwardingFieldOffset. Note there's no point in // adding the offset if it's 0. if (forwardingFieldOffset > 0) { - addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); - addUInt(Block, dwarf::DW_FORM_udata, forwardingFieldOffset); + addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + addUInt(*Loc, dwarf::DW_FORM_udata, forwardingFieldOffset); } // Now dereference the __forwarding field to get to the real __Block_byref // struct: DW_OP_deref. - addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); // Now that we've got the real __Block_byref... struct, add the offset // for the variable's field to get to the location of the actual variable: // DW_OP_plus_uconst varFieldOffset. Again, don't add if it's 0. if (varFieldOffset > 0) { - addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); - addUInt(Block, dwarf::DW_FORM_udata, varFieldOffset); + addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + addUInt(*Loc, dwarf::DW_FORM_udata, varFieldOffset); } // Now attach the location information to the DIE. - addBlock(Die, Attribute, Block); -} - -/// isTypeSigned - Return true if the type is signed. -static bool isTypeSigned(DwarfDebug *DD, DIType Ty, int *SizeInBits) { - if (Ty.isDerivedType()) - return isTypeSigned(DD, DD->resolve(DIDerivedType(Ty).getTypeDerivedFrom()), - SizeInBits); - if (Ty.isBasicType()) - if (DIBasicType(Ty).getEncoding() == dwarf::DW_ATE_signed || - DIBasicType(Ty).getEncoding() == dwarf::DW_ATE_signed_char) { - *SizeInBits = Ty.getSizeInBits(); - return true; - } - return false; + addBlock(Die, Attribute, Loc); } /// Return true if type encoding is unsigned. static bool isUnsignedDIType(DwarfDebug *DD, DIType Ty) { DIDerivedType DTy(Ty); - if (DTy.isDerivedType()) - return isUnsignedDIType(DD, DD->resolve(DTy.getTypeDerivedFrom())); - - DIBasicType BTy(Ty); - if (BTy.isBasicType()) { - unsigned Encoding = BTy.getEncoding(); - if (Encoding == dwarf::DW_ATE_unsigned || - Encoding == dwarf::DW_ATE_unsigned_char || - Encoding == dwarf::DW_ATE_boolean) + if (DTy.isDerivedType()) { + dwarf::Tag T = (dwarf::Tag)Ty.getTag(); + // Encode pointer constants as unsigned bytes. This is used at least for + // null pointer constant emission. + // FIXME: reference and rvalue_reference /probably/ shouldn't be allowed + // here, but accept them for now due to a bug in SROA producing bogus + // dbg.values. + if (T == dwarf::DW_TAG_pointer_type || + T == dwarf::DW_TAG_ptr_to_member_type || + T == dwarf::DW_TAG_reference_type || + T == dwarf::DW_TAG_rvalue_reference_type) return true; + assert(T == dwarf::DW_TAG_typedef || T == dwarf::DW_TAG_const_type || + T == dwarf::DW_TAG_volatile_type || + T == dwarf::DW_TAG_restrict_type || + T == dwarf::DW_TAG_enumeration_type); + if (DITypeRef Deriv = DTy.getTypeDerivedFrom()) + return isUnsignedDIType(DD, DD->resolve(Deriv)); + // FIXME: Enums without a fixed underlying type have unknown signedness + // here, leading to incorrectly emitted constants. + assert(DTy.getTag() == dwarf::DW_TAG_enumeration_type); + return false; } - return false; + + DIBasicType BTy(Ty); + assert(BTy.isBasicType()); + unsigned Encoding = BTy.getEncoding(); + assert((Encoding == dwarf::DW_ATE_unsigned || + Encoding == dwarf::DW_ATE_unsigned_char || + Encoding == dwarf::DW_ATE_signed || + Encoding == dwarf::DW_ATE_signed_char || + Encoding == dwarf::DW_ATE_UTF || Encoding == dwarf::DW_ATE_boolean) && + "Unsupported encoding"); + return (Encoding == dwarf::DW_ATE_unsigned || + Encoding == dwarf::DW_ATE_unsigned_char || + Encoding == dwarf::DW_ATE_UTF || Encoding == dwarf::DW_ATE_boolean); } /// If this type is derived from a base type then return base type size. @@ -720,8 +797,9 @@ static uint64_t getBaseTypeSize(DwarfDebug *DD, DIDerivedType Ty) { DIType BaseType = DD->resolve(Ty.getTypeDerivedFrom()); - // If this type is not derived from any type then take conservative approach. - if (!BaseType.isValid()) + // If this type is not derived from any type or the type is a declaration then + // take conservative approach. + if (!BaseType.isValid() || BaseType.isForwardDecl()) return Ty.getSizeInBits(); // If this is a derived type, go ahead and get the base type, unless it's a @@ -737,47 +815,8 @@ static uint64_t getBaseTypeSize(DwarfDebug *DD, DIDerivedType Ty) { return BaseType.getSizeInBits(); } -/// addConstantValue - Add constant value entry in variable DIE. -void CompileUnit::addConstantValue(DIE *Die, const MachineOperand &MO, - DIType Ty) { - // FIXME: This is a bit conservative/simple - it emits negative values at - // their maximum bit width which is a bit unfortunate (& doesn't prefer - // udata/sdata over dataN as suggested by the DWARF spec) - assert(MO.isImm() && "Invalid machine operand!"); - int SizeInBits = -1; - bool SignedConstant = isTypeSigned(DD, Ty, &SizeInBits); - dwarf::Form Form; - - // If we're a signed constant definitely use sdata. - if (SignedConstant) { - addSInt(Die, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, MO.getImm()); - return; - } - - // Else use data for now unless it's larger than we can deal with. - switch (SizeInBits) { - case 8: - Form = dwarf::DW_FORM_data1; - break; - case 16: - Form = dwarf::DW_FORM_data2; - break; - case 32: - Form = dwarf::DW_FORM_data4; - break; - case 64: - Form = dwarf::DW_FORM_data8; - break; - default: - Form = dwarf::DW_FORM_udata; - addUInt(Die, dwarf::DW_AT_const_value, Form, MO.getImm()); - return; - } - addUInt(Die, dwarf::DW_AT_const_value, Form, MO.getImm()); -} - /// addConstantFPValue - Add constant value entry in variable DIE. -void CompileUnit::addConstantFPValue(DIE *Die, const MachineOperand &MO) { +void DwarfUnit::addConstantFPValue(DIE &Die, const MachineOperand &MO) { assert(MO.isFPImm() && "Invalid machine operand!"); DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); APFloat FPImm = MO.getFPImm()->getValueAPF(); @@ -794,55 +833,47 @@ void CompileUnit::addConstantFPValue(DIE *Die, const MachineOperand &MO) { // Output the constant to DWARF one byte at a time. for (; Start != Stop; Start += Incr) - addUInt(Block, dwarf::DW_FORM_data1, (unsigned char)0xFF & FltPtr[Start]); + addUInt(*Block, dwarf::DW_FORM_data1, (unsigned char)0xFF & FltPtr[Start]); addBlock(Die, dwarf::DW_AT_const_value, Block); } /// addConstantFPValue - Add constant value entry in variable DIE. -void CompileUnit::addConstantFPValue(DIE *Die, const ConstantFP *CFP) { +void DwarfUnit::addConstantFPValue(DIE &Die, const ConstantFP *CFP) { // Pass this down to addConstantValue as an unsigned bag of bits. addConstantValue(Die, CFP->getValueAPF().bitcastToAPInt(), true); } /// addConstantValue - Add constant value entry in variable DIE. -void CompileUnit::addConstantValue(DIE *Die, const ConstantInt *CI, - bool Unsigned) { - addConstantValue(Die, CI->getValue(), Unsigned); +void DwarfUnit::addConstantValue(DIE &Die, const ConstantInt *CI, DIType Ty) { + addConstantValue(Die, CI->getValue(), Ty); +} + +/// addConstantValue - Add constant value entry in variable DIE. +void DwarfUnit::addConstantValue(DIE &Die, const MachineOperand &MO, + DIType Ty) { + assert(MO.isImm() && "Invalid machine operand!"); + + addConstantValue(Die, isUnsignedDIType(DD, Ty), MO.getImm()); +} + +void DwarfUnit::addConstantValue(DIE &Die, bool Unsigned, uint64_t Val) { + // FIXME: This is a bit conservative/simple - it emits negative values always + // sign extended to 64 bits rather than minimizing the number of bytes. + addUInt(Die, dwarf::DW_AT_const_value, + Unsigned ? dwarf::DW_FORM_udata : dwarf::DW_FORM_sdata, Val); +} + +void DwarfUnit::addConstantValue(DIE &Die, const APInt &Val, DIType Ty) { + addConstantValue(Die, Val, isUnsignedDIType(DD, Ty)); } // addConstantValue - Add constant value entry in variable DIE. -void CompileUnit::addConstantValue(DIE *Die, const APInt &Val, bool Unsigned) { +void DwarfUnit::addConstantValue(DIE &Die, const APInt &Val, bool Unsigned) { unsigned CIBitWidth = Val.getBitWidth(); if (CIBitWidth <= 64) { - // If we're a signed constant definitely use sdata. - if (!Unsigned) { - addSInt(Die, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, - Val.getSExtValue()); - return; - } - - // Else use data for now unless it's larger than we can deal with. - dwarf::Form Form; - switch (CIBitWidth) { - case 8: - Form = dwarf::DW_FORM_data1; - break; - case 16: - Form = dwarf::DW_FORM_data2; - break; - case 32: - Form = dwarf::DW_FORM_data4; - break; - case 64: - Form = dwarf::DW_FORM_data8; - break; - default: - addUInt(Die, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata, - Val.getZExtValue()); - return; - } - addUInt(Die, dwarf::DW_AT_const_value, Form, Val.getZExtValue()); + addConstantValue(Die, Unsigned, + Unsigned ? Val.getZExtValue() : Val.getSExtValue()); return; } @@ -861,14 +892,14 @@ void CompileUnit::addConstantValue(DIE *Die, const APInt &Val, bool Unsigned) { c = Ptr64[i / 8] >> (8 * (i & 7)); else c = Ptr64[(NumBytes - 1 - i) / 8] >> (8 * ((NumBytes - 1 - i) & 7)); - addUInt(Block, dwarf::DW_FORM_data1, c); + addUInt(*Block, dwarf::DW_FORM_data1, c); } addBlock(Die, dwarf::DW_AT_const_value, Block); } /// addTemplateParams - Add template parameters into buffer. -void CompileUnit::addTemplateParams(DIE &Buffer, DIArray TParams) { +void DwarfUnit::addTemplateParams(DIE &Buffer, DIArray TParams) { // Add template parameters. for (unsigned i = 0, e = TParams.getNumElements(); i != e; ++i) { DIDescriptor Element = TParams.getElement(i); @@ -882,9 +913,9 @@ void CompileUnit::addTemplateParams(DIE &Buffer, DIArray TParams) { } /// getOrCreateContextDIE - Get context owner's DIE. -DIE *CompileUnit::getOrCreateContextDIE(DIScope Context) { +DIE *DwarfUnit::getOrCreateContextDIE(DIScope Context) { if (!Context || Context.isFile()) - return getCUDie(); + return &getUnitDie(); if (Context.isType()) return getOrCreateTypeDIE(DIType(Context)); if (Context.isNameSpace()) @@ -894,37 +925,72 @@ DIE *CompileUnit::getOrCreateContextDIE(DIScope Context) { return getDIE(Context); } +DIE *DwarfUnit::createTypeDIE(DICompositeType Ty) { + DIScope Context = resolve(Ty.getContext()); + DIE *ContextDIE = getOrCreateContextDIE(Context); + + if (DIE *TyDIE = getDIE(Ty)) + return TyDIE; + + // Create new type. + DIE &TyDIE = createAndAddDIE(Ty.getTag(), *ContextDIE, Ty); + + constructTypeDIE(TyDIE, Ty); + + updateAcceleratorTables(Context, Ty, TyDIE); + return &TyDIE; +} + /// getOrCreateTypeDIE - Find existing DIE or create new DIE for the /// given DIType. -DIE *CompileUnit::getOrCreateTypeDIE(const MDNode *TyNode) { +DIE *DwarfUnit::getOrCreateTypeDIE(const MDNode *TyNode) { if (!TyNode) - return NULL; + return nullptr; DIType Ty(TyNode); assert(Ty.isType()); + assert(Ty == resolve(Ty.getRef()) && + "type was not uniqued, possible ODR violation."); + + // DW_TAG_restrict_type is not supported in DWARF2 + if (Ty.getTag() == dwarf::DW_TAG_restrict_type && DD->getDwarfVersion() <= 2) + return getOrCreateTypeDIE(resolve(DIDerivedType(Ty).getTypeDerivedFrom())); // Construct the context before querying for the existence of the DIE in case // such construction creates the DIE. - DIE *ContextDIE = getOrCreateContextDIE(resolve(Ty.getContext())); + DIScope Context = resolve(Ty.getContext()); + DIE *ContextDIE = getOrCreateContextDIE(Context); assert(ContextDIE); - DIE *TyDIE = getDIE(Ty); - if (TyDIE) + if (DIE *TyDIE = getDIE(Ty)) return TyDIE; // Create new type. - TyDIE = createAndAddDIE(Ty.getTag(), *ContextDIE, Ty); + DIE &TyDIE = createAndAddDIE(Ty.getTag(), *ContextDIE, Ty); + + updateAcceleratorTables(Context, Ty, TyDIE); if (Ty.isBasicType()) - constructTypeDIE(*TyDIE, DIBasicType(Ty)); - else if (Ty.isCompositeType()) - constructTypeDIE(*TyDIE, DICompositeType(Ty)); - else { + constructTypeDIE(TyDIE, DIBasicType(Ty)); + else if (Ty.isCompositeType()) { + DICompositeType CTy(Ty); + if (GenerateDwarfTypeUnits && !Ty.isForwardDecl()) + if (MDString *TypeId = CTy.getIdentifier()) { + DD->addDwarfTypeUnitType(getCU(), TypeId->getString(), TyDIE, CTy); + // Skip updating the accelerator tables since this is not the full type. + return &TyDIE; + } + constructTypeDIE(TyDIE, CTy); + } else { assert(Ty.isDerivedType() && "Unknown kind of DIType"); - constructTypeDIE(*TyDIE, DIDerivedType(Ty)); + constructTypeDIE(TyDIE, DIDerivedType(Ty)); } - // If this is a named finished type then include it in the list of types - // for the accelerator tables. + + return &TyDIE; +} + +void DwarfUnit::updateAcceleratorTables(DIScope Context, DIType Ty, + const DIE &TyDIE) { if (!Ty.getName().empty() && !Ty.isForwardDecl()) { bool IsImplementation = 0; if (Ty.isCompositeType()) { @@ -934,14 +1000,18 @@ DIE *CompileUnit::getOrCreateTypeDIE(const MDNode *TyNode) { IsImplementation = (CT.getRunTimeLang() == 0) || CT.isObjcClassComplete(); } unsigned Flags = IsImplementation ? dwarf::DW_FLAG_type_implementation : 0; - addAccelType(Ty.getName(), std::make_pair(TyDIE, Flags)); - } + DD->addAccelType(Ty.getName(), TyDIE, Flags); - return TyDIE; + if ((!Context || Context.isCompileUnit() || Context.isFile() || + Context.isNameSpace()) && + getCUNode().getEmissionKind() != DIBuilder::LineTablesOnly) + GlobalTypes[getParentContextString(Context) + Ty.getName().str()] = + &TyDIE; + } } /// addType - Add a new type attribute to the specified entity. -void CompileUnit::addType(DIE *Entity, DIType Ty, dwarf::Attribute Attribute) { +void DwarfUnit::addType(DIE &Entity, DIType Ty, dwarf::Attribute Attribute) { assert(Ty && "Trying to add a type that doesn't exist?"); // Check for pre-existence. @@ -956,61 +1026,17 @@ void CompileUnit::addType(DIE *Entity, DIType Ty, dwarf::Attribute Attribute) { DIE *Buffer = getOrCreateTypeDIE(Ty); // Set up proxy. - Entry = createDIEEntry(Buffer); + Entry = createDIEEntry(*Buffer); insertDIEEntry(Ty, Entry); addDIEEntry(Entity, Attribute, Entry); - - // If this is a complete composite type then include it in the - // list of global types. - addGlobalType(Ty); -} - -// Accelerator table mutators - add each name along with its companion -// DIE to the proper table while ensuring that the name that we're going -// to reference is in the string table. We do this since the names we -// add may not only be identical to the names in the DIE. -void CompileUnit::addAccelName(StringRef Name, DIE *Die) { - DU->getStringPoolEntry(Name); - std::vector<DIE *> &DIEs = AccelNames[Name]; - DIEs.push_back(Die); -} - -void CompileUnit::addAccelObjC(StringRef Name, DIE *Die) { - DU->getStringPoolEntry(Name); - std::vector<DIE *> &DIEs = AccelObjC[Name]; - DIEs.push_back(Die); -} - -void CompileUnit::addAccelNamespace(StringRef Name, DIE *Die) { - DU->getStringPoolEntry(Name); - std::vector<DIE *> &DIEs = AccelNamespace[Name]; - DIEs.push_back(Die); -} - -void CompileUnit::addAccelType(StringRef Name, std::pair<DIE *, unsigned> Die) { - DU->getStringPoolEntry(Name); - std::vector<std::pair<DIE *, unsigned> > &DIEs = AccelTypes[Name]; - DIEs.push_back(Die); } /// addGlobalName - Add a new global name to the compile unit. -void CompileUnit::addGlobalName(StringRef Name, DIE *Die, DIScope Context) { +void DwarfUnit::addGlobalName(StringRef Name, DIE &Die, DIScope Context) { + if (getCUNode().getEmissionKind() == DIBuilder::LineTablesOnly) + return; std::string FullName = getParentContextString(Context) + Name.str(); - GlobalNames[FullName] = Die; -} - -/// addGlobalType - Add a new global type to the compile unit. -/// -void CompileUnit::addGlobalType(DIType Ty) { - DIScope Context = resolve(Ty.getContext()); - if (!Ty.getName().empty() && !Ty.isForwardDecl() && - (!Context || Context.isCompileUnit() || Context.isFile() || - Context.isNameSpace())) - if (DIEEntry *Entry = getDIEEntry(Ty)) { - std::string FullName = - getParentContextString(Context) + Ty.getName().str(); - GlobalTypes[FullName] = Entry->getEntry(); - } + GlobalNames[FullName] = &Die; } /// getParentContextString - Walks the metadata parent chain in a language @@ -1018,7 +1044,7 @@ void CompileUnit::addGlobalType(DIType Ty) { /// it as a string. This is done at the metadata level because DIEs may /// not currently have been added to the parent context and walking the /// DIEs looking for names is more expensive than walking the metadata. -std::string CompileUnit::getParentContextString(DIScope Context) const { +std::string DwarfUnit::getParentContextString(DIScope Context) const { if (!Context) return ""; @@ -1045,6 +1071,8 @@ std::string CompileUnit::getParentContextString(DIScope Context) const { I != E; ++I) { DIScope Ctx = *I; StringRef Name = Ctx.getName(); + if (Name.empty() && Ctx.isNameSpace()) + Name = "(anonymous namespace)"; if (!Name.empty()) { CS += Name; CS += "::"; @@ -1053,43 +1081,27 @@ std::string CompileUnit::getParentContextString(DIScope Context) const { return CS; } -/// addPubTypes - Add subprogram argument types for pubtypes section. -void CompileUnit::addPubTypes(DISubprogram SP) { - DICompositeType SPTy = SP.getType(); - uint16_t SPTag = SPTy.getTag(); - if (SPTag != dwarf::DW_TAG_subroutine_type) - return; - - DIArray Args = SPTy.getTypeArray(); - for (unsigned i = 0, e = Args.getNumElements(); i != e; ++i) { - DIType ATy(Args.getElement(i)); - if (!ATy.isType()) - continue; - addGlobalType(ATy); - } -} - /// constructTypeDIE - Construct basic type die from DIBasicType. -void CompileUnit::constructTypeDIE(DIE &Buffer, DIBasicType BTy) { +void DwarfUnit::constructTypeDIE(DIE &Buffer, DIBasicType BTy) { // Get core information. StringRef Name = BTy.getName(); // Add name if not anonymous or intermediate type. if (!Name.empty()) - addString(&Buffer, dwarf::DW_AT_name, Name); + addString(Buffer, dwarf::DW_AT_name, Name); // An unspecified type only has a name attribute. if (BTy.getTag() == dwarf::DW_TAG_unspecified_type) return; - addUInt(&Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, + addUInt(Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, BTy.getEncoding()); uint64_t Size = BTy.getSizeInBits() >> 3; - addUInt(&Buffer, dwarf::DW_AT_byte_size, None, Size); + addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size); } /// constructTypeDIE - Construct derived type die from DIDerivedType. -void CompileUnit::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) { +void DwarfUnit::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) { // Get core information. StringRef Name = DTy.getName(); uint64_t Size = DTy.getSizeInBits() >> 3; @@ -1098,74 +1110,43 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) { // Map to main type, void will not have a type. DIType FromTy = resolve(DTy.getTypeDerivedFrom()); if (FromTy) - addType(&Buffer, FromTy); + addType(Buffer, FromTy); // Add name if not anonymous or intermediate type. if (!Name.empty()) - addString(&Buffer, dwarf::DW_AT_name, Name); + addString(Buffer, dwarf::DW_AT_name, Name); // Add size if non-zero (derived types might be zero-sized.) if (Size && Tag != dwarf::DW_TAG_pointer_type) - addUInt(&Buffer, dwarf::DW_AT_byte_size, None, Size); + addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size); if (Tag == dwarf::DW_TAG_ptr_to_member_type) - addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, - getOrCreateTypeDIE(resolve(DTy.getClassType()))); + addDIEEntry(Buffer, dwarf::DW_AT_containing_type, + *getOrCreateTypeDIE(resolve(DTy.getClassType()))); // Add source line info if available and TyDesc is not a forward declaration. if (!DTy.isForwardDecl()) - addSourceLine(&Buffer, DTy); + addSourceLine(Buffer, DTy); } /// constructSubprogramArguments - Construct function argument DIEs. -void CompileUnit::constructSubprogramArguments(DIE &Buffer, DIArray Args) { - for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) { - DIDescriptor Ty = Args.getElement(i); - if (Ty.isUnspecifiedParameter()) { - assert(i == N-1 && "ellipsis must be the last argument"); - createAndAddDIE(dwarf::DW_TAG_unspecified_parameters, Buffer); - } else { - DIE *Arg = createAndAddDIE(dwarf::DW_TAG_formal_parameter, Buffer); - addType(Arg, DIType(Ty)); - if (DIType(Ty).isArtificial()) - addFlag(Arg, dwarf::DW_AT_artificial); - } +void DwarfUnit::constructSubprogramArguments(DIE &Buffer, DIArray Args) { + for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) { + DIDescriptor Ty = Args.getElement(i); + if (Ty.isUnspecifiedParameter()) { + assert(i == N-1 && "Unspecified parameter must be the last argument"); + createAndAddDIE(dwarf::DW_TAG_unspecified_parameters, Buffer); + } else { + DIE &Arg = createAndAddDIE(dwarf::DW_TAG_formal_parameter, Buffer); + addType(Arg, DIType(Ty)); + if (DIType(Ty).isArtificial()) + addFlag(Arg, dwarf::DW_AT_artificial); } -} - -/// Return true if the type is appropriately scoped to be contained inside -/// its own type unit. -static bool isTypeUnitScoped(DIType Ty, const DwarfDebug *DD) { - DIScope Parent = DD->resolve(Ty.getContext()); - while (Parent) { - // Don't generate a hash for anything scoped inside a function. - if (Parent.isSubprogram()) - return false; - Parent = DD->resolve(Parent.getContext()); - } - return true; -} - -/// Return true if the type should be split out into a type unit. -static bool shouldCreateTypeUnit(DICompositeType CTy, const DwarfDebug *DD) { - uint16_t Tag = CTy.getTag(); - - switch (Tag) { - case dwarf::DW_TAG_structure_type: - case dwarf::DW_TAG_union_type: - case dwarf::DW_TAG_enumeration_type: - case dwarf::DW_TAG_class_type: - // If this is a class, structure, union, or enumeration type - // that is a definition (not a declaration), and not scoped - // inside a function then separate this out as a type unit. - return !CTy.isForwardDecl() && isTypeUnitScoped(CTy, DD); - default: - return false; } } /// constructTypeDIE - Construct type DIE from DICompositeType. -void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { - // Get core information. +void DwarfUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { + // Add name if not anonymous or intermediate type. StringRef Name = CTy.getName(); uint64_t Size = CTy.getSizeInBits() >> 3; @@ -1183,7 +1164,7 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { DIArray Elements = CTy.getTypeArray(); DIType RTy(Elements.getElement(0)); if (RTy) - addType(&Buffer, RTy); + addType(Buffer, RTy); bool isPrototyped = true; if (Elements.getNumElements() == 2 && @@ -1198,7 +1179,13 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { if (isPrototyped && (Language == dwarf::DW_LANG_C89 || Language == dwarf::DW_LANG_C99 || Language == dwarf::DW_LANG_ObjC)) - addFlag(&Buffer, dwarf::DW_AT_prototyped); + addFlag(Buffer, dwarf::DW_AT_prototyped); + + if (CTy.isLValueReference()) + addFlag(Buffer, dwarf::DW_AT_reference); + + if (CTy.isRValueReference()) + addFlag(Buffer, dwarf::DW_AT_rvalue_reference); } break; case dwarf::DW_TAG_structure_type: case dwarf::DW_TAG_union_type: @@ -1207,25 +1194,12 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { DIArray Elements = CTy.getTypeArray(); for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { DIDescriptor Element = Elements.getElement(i); - DIE *ElemDie = NULL; - if (Element.isSubprogram()) { - DISubprogram SP(Element); - ElemDie = getOrCreateSubprogramDIE(SP); - if (SP.isProtected()) - addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, - dwarf::DW_ACCESS_protected); - else if (SP.isPrivate()) - addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, - dwarf::DW_ACCESS_private); - else - addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, - dwarf::DW_ACCESS_public); - if (SP.isExplicit()) - addFlag(ElemDie, dwarf::DW_AT_explicit); - } else if (Element.isDerivedType()) { + if (Element.isSubprogram()) + getOrCreateSubprogramDIE(DISubprogram(Element)); + else if (Element.isDerivedType()) { DIDerivedType DDTy(Element); if (DDTy.getTag() == dwarf::DW_TAG_friend) { - ElemDie = createAndAddDIE(dwarf::DW_TAG_friend, Buffer); + DIE &ElemDie = createAndAddDIE(dwarf::DW_TAG_friend, Buffer); addType(ElemDie, resolve(DDTy.getTypeDerivedFrom()), dwarf::DW_AT_friend); } else if (DDTy.isStaticMember()) { @@ -1235,10 +1209,11 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { } } else if (Element.isObjCProperty()) { DIObjCProperty Property(Element); - ElemDie = createAndAddDIE(Property.getTag(), Buffer); + DIE &ElemDie = createAndAddDIE(Property.getTag(), Buffer); StringRef PropertyName = Property.getObjCPropertyName(); addString(ElemDie, dwarf::DW_AT_APPLE_property_name, PropertyName); - addType(ElemDie, Property.getType()); + if (Property.getType()) + addType(ElemDie, Property.getType()); addSourceLine(ElemDie, Property); StringRef GetterName = Property.getObjCPropertyGetterName(); if (!GetterName.empty()) @@ -1273,15 +1248,15 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { } if (CTy.isAppleBlockExtension()) - addFlag(&Buffer, dwarf::DW_AT_APPLE_block); + addFlag(Buffer, dwarf::DW_AT_APPLE_block); DICompositeType ContainingType(resolve(CTy.getContainingType())); if (ContainingType) - addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, - getOrCreateTypeDIE(ContainingType)); + addDIEEntry(Buffer, dwarf::DW_AT_containing_type, + *getOrCreateTypeDIE(ContainingType)); if (CTy.isObjcClassComplete()) - addFlag(&Buffer, dwarf::DW_AT_APPLE_objc_complete_type); + addFlag(Buffer, dwarf::DW_AT_APPLE_objc_complete_type); // Add template parameters to a class, structure or union types. // FIXME: The support isn't in the metadata for this yet. @@ -1297,7 +1272,7 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { // Add name if not anonymous or intermediate type. if (!Name.empty()) - addString(&Buffer, dwarf::DW_AT_name, Name); + addString(Buffer, dwarf::DW_AT_name, Name); if (Tag == dwarf::DW_TAG_enumeration_type || Tag == dwarf::DW_TAG_class_type || Tag == dwarf::DW_TAG_structure_type || @@ -1305,37 +1280,32 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { // Add size if non-zero (derived types might be zero-sized.) // TODO: Do we care about size for enum forward declarations? if (Size) - addUInt(&Buffer, dwarf::DW_AT_byte_size, None, Size); + addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size); else if (!CTy.isForwardDecl()) // Add zero size if it is not a forward declaration. - addUInt(&Buffer, dwarf::DW_AT_byte_size, None, 0); + addUInt(Buffer, dwarf::DW_AT_byte_size, None, 0); // If we're a forward decl, say so. if (CTy.isForwardDecl()) - addFlag(&Buffer, dwarf::DW_AT_declaration); + addFlag(Buffer, dwarf::DW_AT_declaration); // Add source line info if available. if (!CTy.isForwardDecl()) - addSourceLine(&Buffer, CTy); + addSourceLine(Buffer, CTy); // No harm in adding the runtime language to the declaration. unsigned RLang = CTy.getRunTimeLang(); if (RLang) - addUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class, dwarf::DW_FORM_data1, + addUInt(Buffer, dwarf::DW_AT_APPLE_runtime_class, dwarf::DW_FORM_data1, RLang); } - // If this is a type applicable to a type unit it then add it to the - // list of types we'll compute a hash for later. - if (shouldCreateTypeUnit(CTy, DD)) - DD->addTypeUnitType(&Buffer); } /// constructTemplateTypeParameterDIE - Construct new DIE for the given /// DITemplateTypeParameter. -void -CompileUnit::constructTemplateTypeParameterDIE(DIE &Buffer, - DITemplateTypeParameter TP) { - DIE *ParamDIE = +void DwarfUnit::constructTemplateTypeParameterDIE(DIE &Buffer, + DITemplateTypeParameter TP) { + DIE &ParamDIE = createAndAddDIE(dwarf::DW_TAG_template_type_parameter, Buffer); // Add the type if it exists, it could be void and therefore no type. if (TP.getType()) @@ -1347,9 +1317,9 @@ CompileUnit::constructTemplateTypeParameterDIE(DIE &Buffer, /// constructTemplateValueParameterDIE - Construct new DIE for the given /// DITemplateValueParameter. void -CompileUnit::constructTemplateValueParameterDIE(DIE &Buffer, - DITemplateValueParameter VP) { - DIE *ParamDIE = createAndAddDIE(VP.getTag(), Buffer); +DwarfUnit::constructTemplateValueParameterDIE(DIE &Buffer, + DITemplateValueParameter VP) { + DIE &ParamDIE = createAndAddDIE(VP.getTag(), Buffer); // Add the type if there is one, template template and template parameter // packs will not have a type. @@ -1359,17 +1329,16 @@ CompileUnit::constructTemplateValueParameterDIE(DIE &Buffer, addString(ParamDIE, dwarf::DW_AT_name, VP.getName()); if (Value *Val = VP.getValue()) { if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) - addConstantValue(ParamDIE, CI, - isUnsignedDIType(DD, resolve(VP.getType()))); + addConstantValue(ParamDIE, CI, resolve(VP.getType())); else if (GlobalValue *GV = dyn_cast<GlobalValue>(Val)) { // For declaration non-type template parameters (such as global values and // functions) - DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - addOpAddress(Block, Asm->getSymbol(GV)); + DIELoc *Loc = new (DIEValueAllocator) DIELoc(); + addOpAddress(*Loc, Asm->getSymbol(GV)); // Emit DW_OP_stack_value to use the address as the immediate value of the // parameter, rather than a pointer to it. - addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_stack_value); - addBlock(ParamDIE, dwarf::DW_AT_location, Block); + addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_stack_value); + addBlock(ParamDIE, dwarf::DW_AT_location, Loc); } else if (VP.getTag() == dwarf::DW_TAG_GNU_template_template_param) { assert(isa<MDString>(Val)); addString(ParamDIE, dwarf::DW_AT_GNU_template_name, @@ -1377,73 +1346,98 @@ CompileUnit::constructTemplateValueParameterDIE(DIE &Buffer, } else if (VP.getTag() == dwarf::DW_TAG_GNU_template_parameter_pack) { assert(isa<MDNode>(Val)); DIArray A(cast<MDNode>(Val)); - addTemplateParams(*ParamDIE, A); + addTemplateParams(ParamDIE, A); } } } /// getOrCreateNameSpace - Create a DIE for DINameSpace. -DIE *CompileUnit::getOrCreateNameSpace(DINameSpace NS) { +DIE *DwarfUnit::getOrCreateNameSpace(DINameSpace NS) { // Construct the context before querying for the existence of the DIE in case // such construction creates the DIE. DIE *ContextDIE = getOrCreateContextDIE(NS.getContext()); - DIE *NDie = getDIE(NS); - if (NDie) + if (DIE *NDie = getDIE(NS)) return NDie; - NDie = createAndAddDIE(dwarf::DW_TAG_namespace, *ContextDIE, NS); + DIE &NDie = createAndAddDIE(dwarf::DW_TAG_namespace, *ContextDIE, NS); - if (!NS.getName().empty()) { + StringRef Name = NS.getName(); + if (!Name.empty()) addString(NDie, dwarf::DW_AT_name, NS.getName()); - addAccelNamespace(NS.getName(), NDie); - addGlobalName(NS.getName(), NDie, NS.getContext()); - } else - addAccelNamespace("(anonymous namespace)", NDie); + else + Name = "(anonymous namespace)"; + DD->addAccelNamespace(Name, NDie); + addGlobalName(Name, NDie, NS.getContext()); addSourceLine(NDie, NS); - return NDie; + return &NDie; } /// getOrCreateSubprogramDIE - Create new DIE using SP. -DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) { +DIE *DwarfUnit::getOrCreateSubprogramDIE(DISubprogram SP) { // Construct the context before querying for the existence of the DIE in case // such construction creates the DIE (as is the case for member function // declarations). DIE *ContextDIE = getOrCreateContextDIE(resolve(SP.getContext())); - DIE *SPDie = getDIE(SP); - if (SPDie) + if (DIE *SPDie = getDIE(SP)) return SPDie; - DISubprogram SPDecl = SP.getFunctionDeclaration(); - if (SPDecl.isSubprogram()) + if (DISubprogram SPDecl = SP.getFunctionDeclaration()) { // Add subprogram definitions to the CU die directly. - ContextDIE = CUDie.get(); + ContextDIE = &getUnitDie(); + // Build the decl now to ensure it precedes the definition. + getOrCreateSubprogramDIE(SPDecl); + } // DW_TAG_inlined_subroutine may refer to this DIE. - SPDie = createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE, SP); + DIE &SPDie = createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE, SP); - DIE *DeclDie = NULL; - if (SPDecl.isSubprogram()) - DeclDie = getOrCreateSubprogramDIE(SPDecl); + // Stop here and fill this in later, depending on whether or not this + // subprogram turns out to have inlined instances or not. + if (SP.isDefinition()) + return &SPDie; - // Add function template parameters. - addTemplateParams(*SPDie, SP.getTemplateParams()); - - // If this DIE is going to refer declaration info using AT_specification - // then there is no need to add other attributes. - if (DeclDie) { - // Refer function declaration directly. - addDIEEntry(SPDie, dwarf::DW_AT_specification, DeclDie); + applySubprogramAttributes(SP, SPDie); + return &SPDie; +} - return SPDie; +void DwarfUnit::applySubprogramAttributesToDefinition(DISubprogram SP, DIE &SPDie) { + DISubprogram SPDecl = SP.getFunctionDeclaration(); + DIScope Context = resolve(SPDecl ? SPDecl.getContext() : SP.getContext()); + applySubprogramAttributes(SP, SPDie); + addGlobalName(SP.getName(), SPDie, Context); +} + +void DwarfUnit::applySubprogramAttributes(DISubprogram SP, DIE &SPDie) { + DIE *DeclDie = nullptr; + StringRef DeclLinkageName; + if (DISubprogram SPDecl = SP.getFunctionDeclaration()) { + DeclDie = getDIE(SPDecl); + assert(DeclDie && "This DIE should've already been constructed when the " + "definition DIE was created in " + "getOrCreateSubprogramDIE"); + DeclLinkageName = SPDecl.getLinkageName(); } - // Add the linkage name if we have one. + // Add function template parameters. + addTemplateParams(SPDie, SP.getTemplateParams()); + + // Add the linkage name if we have one and it isn't in the Decl. StringRef LinkageName = SP.getLinkageName(); - if (!LinkageName.empty()) + assert(((LinkageName.empty() || DeclLinkageName.empty()) || + LinkageName == DeclLinkageName) && + "decl has a linkage name and it is different"); + if (!LinkageName.empty() && DeclLinkageName.empty()) addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, GlobalValue::getRealLinkageName(LinkageName)); + if (DeclDie) { + // Refer to the function declaration where all the other attributes will be + // found. + addDIEEntry(SPDie, dwarf::DW_AT_specification, *DeclDie); + return; + } + // Constructors and operators for anonymous aggregates do not have names. if (!SP.getName().empty()) addString(SPDie, dwarf::DW_AT_name, SP.getName()); @@ -1471,12 +1465,12 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) { unsigned VK = SP.getVirtuality(); if (VK) { addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_data1, VK); - DIEBlock *Block = getDIEBlock(); - addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); - addUInt(Block, dwarf::DW_FORM_udata, SP.getVirtualIndex()); + DIELoc *Block = getDIELoc(); + addUInt(*Block, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); + addUInt(*Block, dwarf::DW_FORM_udata, SP.getVirtualIndex()); addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, Block); ContainingTypeMap.insert( - std::make_pair(SPDie, resolve(SP.getContainingType()))); + std::make_pair(&SPDie, resolve(SP.getContainingType()))); } if (!SP.isDefinition()) { @@ -1484,7 +1478,7 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) { // Add arguments. Do not add arguments for subprogram definition. They will // be handled while processing variables. - constructSubprogramArguments(*SPDie, Args); + constructSubprogramArguments(SPDie, Args); } if (SP.isArtificial()) @@ -1500,7 +1494,35 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) { addUInt(SPDie, dwarf::DW_AT_APPLE_isa, dwarf::DW_FORM_flag, isa); } - return SPDie; + if (SP.isLValueReference()) + addFlag(SPDie, dwarf::DW_AT_reference); + + if (SP.isRValueReference()) + addFlag(SPDie, dwarf::DW_AT_rvalue_reference); + + if (SP.isProtected()) + addUInt(SPDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, + dwarf::DW_ACCESS_protected); + else if (SP.isPrivate()) + addUInt(SPDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, + dwarf::DW_ACCESS_private); + else + addUInt(SPDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, + dwarf::DW_ACCESS_public); + + if (SP.isExplicit()) + addFlag(SPDie, dwarf::DW_AT_explicit); +} + +void DwarfUnit::applyVariableAttributes(const DbgVariable &Var, + DIE &VariableDie) { + StringRef Name = Var.getName(); + if (!Name.empty()) + addString(VariableDie, dwarf::DW_AT_name, Name); + addSourceLine(VariableDie, Var.getVariable()); + addType(VariableDie, Var.getType()); + if (Var.isArtificial()) + addFlag(VariableDie, dwarf::DW_AT_artificial); } // Return const expression if value is a GEP to access merged global @@ -1510,42 +1532,40 @@ static const ConstantExpr *getMergedGlobalExpr(const Value *V) { const ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(V); if (!CE || CE->getNumOperands() != 3 || CE->getOpcode() != Instruction::GetElementPtr) - return NULL; + return nullptr; // First operand points to a global struct. Value *Ptr = CE->getOperand(0); if (!isa<GlobalValue>(Ptr) || !isa<StructType>(cast<PointerType>(Ptr->getType())->getElementType())) - return NULL; + return nullptr; // Second operand is zero. const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(CE->getOperand(1)); if (!CI || !CI->isZero()) - return NULL; + return nullptr; // Third operand is offset. if (!isa<ConstantInt>(CE->getOperand(2))) - return NULL; + return nullptr; return CE; } /// createGlobalVariableDIE - create global variable DIE. -void CompileUnit::createGlobalVariableDIE(DIGlobalVariable GV) { - +void DwarfCompileUnit::createGlobalVariableDIE(DIGlobalVariable GV) { // Check for pre-existence. if (getDIE(GV)) return; - if (!GV.isGlobalVariable()) - return; + assert(GV.isGlobalVariable()); DIScope GVContext = GV.getContext(); - DIType GTy = GV.getType(); + DIType GTy = DD->resolve(GV.getType()); // If this is a static data member definition, some attributes belong // to the declaration DIE. - DIE *VariableDIE = NULL; + DIE *VariableDIE = nullptr; bool IsStaticMember = false; DIDerivedType SDMDecl = GV.getStaticDataMemberDeclaration(); if (SDMDecl.Verify()) { @@ -1563,64 +1583,66 @@ void CompileUnit::createGlobalVariableDIE(DIGlobalVariable GV) { DIE *ContextDIE = getOrCreateContextDIE(GVContext); // Add to map. - VariableDIE = createAndAddDIE(GV.getTag(), *ContextDIE, GV); + VariableDIE = &createAndAddDIE(GV.getTag(), *ContextDIE, GV); // Add name and type. - addString(VariableDIE, dwarf::DW_AT_name, GV.getDisplayName()); - addType(VariableDIE, GTy); + addString(*VariableDIE, dwarf::DW_AT_name, GV.getDisplayName()); + addType(*VariableDIE, GTy); // Add scoping info. if (!GV.isLocalToUnit()) - addFlag(VariableDIE, dwarf::DW_AT_external); + addFlag(*VariableDIE, dwarf::DW_AT_external); // Add line number info. - addSourceLine(VariableDIE, GV); + addSourceLine(*VariableDIE, GV); } // Add location. bool addToAccelTable = false; - DIE *VariableSpecDIE = NULL; - bool isGlobalVariable = GV.getGlobal() != NULL; + DIE *VariableSpecDIE = nullptr; + bool isGlobalVariable = GV.getGlobal() != nullptr; if (isGlobalVariable) { addToAccelTable = true; - DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + DIELoc *Loc = new (DIEValueAllocator) DIELoc(); const MCSymbol *Sym = Asm->getSymbol(GV.getGlobal()); if (GV.getGlobal()->isThreadLocal()) { // FIXME: Make this work with -gsplit-dwarf. unsigned PointerSize = Asm->getDataLayout().getPointerSize(); assert((PointerSize == 4 || PointerSize == 8) && "Add support for other sizes if necessary"); - const MCExpr *Expr = - Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym); // Based on GCC's support for TLS: if (!DD->useSplitDwarf()) { // 1) Start with a constNu of the appropriate pointer size - addUInt(Block, dwarf::DW_FORM_data1, + addUInt(*Loc, dwarf::DW_FORM_data1, PointerSize == 4 ? dwarf::DW_OP_const4u : dwarf::DW_OP_const8u); // 2) containing the (relocated) offset of the TLS variable // within the module's TLS block. - addExpr(Block, dwarf::DW_FORM_udata, Expr); + addExpr(*Loc, dwarf::DW_FORM_udata, + Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym)); } else { - addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index); - addUInt(Block, dwarf::DW_FORM_udata, DU->getAddrPoolIndex(Expr)); + addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index); + addUInt(*Loc, dwarf::DW_FORM_udata, + DD->getAddressPool().getIndex(Sym, /* TLS */ true)); } // 3) followed by a custom OP to make the debugger do a TLS lookup. - addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_push_tls_address); - } else - addOpAddress(Block, Sym); + addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_push_tls_address); + } else { + DD->addArangeLabel(SymbolCU(this, Sym)); + addOpAddress(*Loc, Sym); + } // Do not create specification DIE if context is either compile unit // or a subprogram. if (GVContext && GV.isDefinition() && !GVContext.isCompileUnit() && !GVContext.isFile() && !DD->isSubprogramContext(GVContext)) { // Create specification DIE. - VariableSpecDIE = createAndAddDIE(dwarf::DW_TAG_variable, *CUDie); - addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification, VariableDIE); - addBlock(VariableSpecDIE, dwarf::DW_AT_location, Block); + VariableSpecDIE = &createAndAddDIE(dwarf::DW_TAG_variable, UnitDie); + addDIEEntry(*VariableSpecDIE, dwarf::DW_AT_specification, *VariableDIE); + addBlock(*VariableSpecDIE, dwarf::DW_AT_location, Loc); // A static member's declaration is already flagged as such. if (!SDMDecl.Verify()) - addFlag(VariableDIE, dwarf::DW_AT_declaration); + addFlag(*VariableDIE, dwarf::DW_AT_declaration); } else { - addBlock(VariableDIE, dwarf::DW_AT_location, Block); + addBlock(*VariableDIE, dwarf::DW_AT_location, Loc); } // Add the linkage name. StringRef LinkageName = GV.getLinkageName(); @@ -1628,9 +1650,10 @@ void CompileUnit::createGlobalVariableDIE(DIGlobalVariable GV) { // From DWARF4: DIEs to which DW_AT_linkage_name may apply include: // TAG_common_block, TAG_constant, TAG_entry_point, TAG_subprogram and // TAG_variable. - addString(IsStaticMember && VariableSpecDIE ? VariableSpecDIE - : VariableDIE, - dwarf::DW_AT_MIPS_linkage_name, + addString(IsStaticMember && VariableSpecDIE ? *VariableSpecDIE + : *VariableDIE, + DD->getDwarfVersion() >= 4 ? dwarf::DW_AT_linkage_name + : dwarf::DW_AT_MIPS_linkage_name, GlobalValue::getRealLinkageName(LinkageName)); } else if (const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(GV.getConstant())) { @@ -1638,41 +1661,41 @@ void CompileUnit::createGlobalVariableDIE(DIGlobalVariable GV) { // emitting AT_const_value multiple times, we only add AT_const_value when // it is not a static member. if (!IsStaticMember) - addConstantValue(VariableDIE, CI, isUnsignedDIType(DD, GTy)); + addConstantValue(*VariableDIE, CI, GTy); } else if (const ConstantExpr *CE = getMergedGlobalExpr(GV->getOperand(11))) { addToAccelTable = true; // GV is a merged global. - DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + DIELoc *Loc = new (DIEValueAllocator) DIELoc(); Value *Ptr = CE->getOperand(0); - addOpAddress(Block, Asm->getSymbol(cast<GlobalValue>(Ptr))); - addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); + MCSymbol *Sym = Asm->getSymbol(cast<GlobalValue>(Ptr)); + DD->addArangeLabel(SymbolCU(this, Sym)); + addOpAddress(*Loc, Sym); + addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); SmallVector<Value *, 3> Idx(CE->op_begin() + 1, CE->op_end()); - addUInt(Block, dwarf::DW_FORM_udata, + addUInt(*Loc, dwarf::DW_FORM_udata, Asm->getDataLayout().getIndexedOffset(Ptr->getType(), Idx)); - addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); - addBlock(VariableDIE, dwarf::DW_AT_location, Block); + addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); + addBlock(*VariableDIE, dwarf::DW_AT_location, Loc); } if (addToAccelTable) { - DIE *AddrDIE = VariableSpecDIE ? VariableSpecDIE : VariableDIE; - addAccelName(GV.getName(), AddrDIE); + DIE &AddrDIE = VariableSpecDIE ? *VariableSpecDIE : *VariableDIE; + DD->addAccelName(GV.getName(), AddrDIE); // If the linkage name is different than the name, go ahead and output // that as well into the name table. if (GV.getLinkageName() != "" && GV.getName() != GV.getLinkageName()) - addAccelName(GV.getLinkageName(), AddrDIE); + DD->addAccelName(GV.getLinkageName(), AddrDIE); } - if (!GV.isLocalToUnit()) - addGlobalName(GV.getName(), VariableSpecDIE ? VariableSpecDIE : VariableDIE, - GV.getContext()); + addGlobalName(GV.getName(), VariableSpecDIE ? *VariableSpecDIE : *VariableDIE, + GV.getContext()); } /// constructSubrangeDIE - Construct subrange DIE from DISubrange. -void CompileUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, - DIE *IndexTy) { - DIE *DW_Subrange = createAndAddDIE(dwarf::DW_TAG_subrange_type, Buffer); - addDIEEntry(DW_Subrange, dwarf::DW_AT_type, IndexTy); +void DwarfUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy) { + DIE &DW_Subrange = createAndAddDIE(dwarf::DW_TAG_subrange_type, Buffer); + addDIEEntry(DW_Subrange, dwarf::DW_AT_type, *IndexTy); // The LowerBound value defines the lower bounds which is typically zero for // C/C++. The Count value is the number of elements. Values are 64 bit. If @@ -1695,24 +1718,24 @@ void CompileUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, } /// constructArrayTypeDIE - Construct array type DIE from DICompositeType. -void CompileUnit::constructArrayTypeDIE(DIE &Buffer, DICompositeType CTy) { +void DwarfUnit::constructArrayTypeDIE(DIE &Buffer, DICompositeType CTy) { if (CTy.isVector()) - addFlag(&Buffer, dwarf::DW_AT_GNU_vector); + addFlag(Buffer, dwarf::DW_AT_GNU_vector); // Emit the element type. - addType(&Buffer, resolve(CTy.getTypeDerivedFrom())); + addType(Buffer, resolve(CTy.getTypeDerivedFrom())); // Get an anonymous type for index type. // FIXME: This type should be passed down from the front end // as different languages may have different sizes for indexes. DIE *IdxTy = getIndexTyDie(); if (!IdxTy) { - // Construct an anonymous type for index type. - IdxTy = createAndAddDIE(dwarf::DW_TAG_base_type, *CUDie.get()); - addString(IdxTy, dwarf::DW_AT_name, "int"); - addUInt(IdxTy, dwarf::DW_AT_byte_size, None, sizeof(int32_t)); - addUInt(IdxTy, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, - dwarf::DW_ATE_signed); + // Construct an integer type to use for indexes. + IdxTy = &createAndAddDIE(dwarf::DW_TAG_base_type, UnitDie); + addString(*IdxTy, dwarf::DW_AT_name, "sizetype"); + addUInt(*IdxTy, dwarf::DW_AT_byte_size, None, sizeof(int64_t)); + addUInt(*IdxTy, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, + dwarf::DW_ATE_unsigned); setIndexTyDie(IdxTy); } @@ -1726,66 +1749,60 @@ void CompileUnit::constructArrayTypeDIE(DIE &Buffer, DICompositeType CTy) { } /// constructEnumTypeDIE - Construct an enum type DIE from DICompositeType. -void CompileUnit::constructEnumTypeDIE(DIE &Buffer, DICompositeType CTy) { +void DwarfUnit::constructEnumTypeDIE(DIE &Buffer, DICompositeType CTy) { DIArray Elements = CTy.getTypeArray(); // Add enumerators to enumeration type. for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { DIEnumerator Enum(Elements.getElement(i)); if (Enum.isEnumerator()) { - DIE *Enumerator = createAndAddDIE(dwarf::DW_TAG_enumerator, Buffer); + DIE &Enumerator = createAndAddDIE(dwarf::DW_TAG_enumerator, Buffer); StringRef Name = Enum.getName(); addString(Enumerator, dwarf::DW_AT_name, Name); int64_t Value = Enum.getEnumValue(); - addSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, Value); + addSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, + Value); } } DIType DTy = resolve(CTy.getTypeDerivedFrom()); if (DTy) { - addType(&Buffer, DTy); - addFlag(&Buffer, dwarf::DW_AT_enum_class); + addType(Buffer, DTy); + addFlag(Buffer, dwarf::DW_AT_enum_class); } } /// constructContainingTypeDIEs - Construct DIEs for types that contain /// vtables. -void CompileUnit::constructContainingTypeDIEs() { +void DwarfUnit::constructContainingTypeDIEs() { for (DenseMap<DIE *, const MDNode *>::iterator CI = ContainingTypeMap.begin(), CE = ContainingTypeMap.end(); CI != CE; ++CI) { - DIE *SPDie = CI->first; + DIE &SPDie = *CI->first; DIDescriptor D(CI->second); if (!D) continue; DIE *NDie = getDIE(D); if (!NDie) continue; - addDIEEntry(SPDie, dwarf::DW_AT_containing_type, NDie); + addDIEEntry(SPDie, dwarf::DW_AT_containing_type, *NDie); } } /// constructVariableDIE - Construct a DIE for the given DbgVariable. -DIE *CompileUnit::constructVariableDIE(DbgVariable &DV, bool isScopeAbstract) { - StringRef Name = DV.getName(); +std::unique_ptr<DIE> DwarfUnit::constructVariableDIE(DbgVariable &DV, + bool Abstract) { + auto D = constructVariableDIEImpl(DV, Abstract); + DV.setDIE(*D); + return D; +} +std::unique_ptr<DIE> DwarfUnit::constructVariableDIEImpl(const DbgVariable &DV, + bool Abstract) { // Define variable debug information entry. - DIE *VariableDie = new DIE(DV.getTag()); - DbgVariable *AbsVar = DV.getAbstractVariable(); - DIE *AbsDIE = AbsVar ? AbsVar->getDIE() : NULL; - if (AbsDIE) - addDIEEntry(VariableDie, dwarf::DW_AT_abstract_origin, AbsDIE); - else { - if (!Name.empty()) - addString(VariableDie, dwarf::DW_AT_name, Name); - addSourceLine(VariableDie, DV.getVariable()); - addType(VariableDie, DV.getType()); - } - - if (DV.isArtificial()) - addFlag(VariableDie, dwarf::DW_AT_artificial); + auto VariableDie = make_unique<DIE>(DV.getTag()); - if (isScopeAbstract) { - DV.setDIE(VariableDie); + if (Abstract) { + applyVariableAttributes(DV, *VariableDie); return VariableDie; } @@ -1793,9 +1810,7 @@ DIE *CompileUnit::constructVariableDIE(DbgVariable &DV, bool isScopeAbstract) { unsigned Offset = DV.getDotDebugLocOffset(); if (Offset != ~0U) { - addSectionLabel(VariableDie, dwarf::DW_AT_location, - Asm->GetTempSymbol("debug_loc", Offset)); - DV.setDIE(VariableDie); + addLocationList(*VariableDie, dwarf::DW_AT_location, Offset); return VariableDie; } @@ -1808,38 +1823,36 @@ DIE *CompileUnit::constructVariableDIE(DbgVariable &DV, bool isScopeAbstract) { if (DVInsn->getOperand(1).isImm()) { MachineLocation Location(RegOp.getReg(), DVInsn->getOperand(1).getImm()); - addVariableAddress(DV, VariableDie, Location); + addVariableAddress(DV, *VariableDie, Location); } else if (RegOp.getReg()) - addVariableAddress(DV, VariableDie, MachineLocation(RegOp.getReg())); + addVariableAddress(DV, *VariableDie, MachineLocation(RegOp.getReg())); } else if (DVInsn->getOperand(0).isImm()) - addConstantValue(VariableDie, DVInsn->getOperand(0), DV.getType()); + addConstantValue(*VariableDie, DVInsn->getOperand(0), DV.getType()); else if (DVInsn->getOperand(0).isFPImm()) - addConstantFPValue(VariableDie, DVInsn->getOperand(0)); + addConstantFPValue(*VariableDie, DVInsn->getOperand(0)); else if (DVInsn->getOperand(0).isCImm()) - addConstantValue(VariableDie, DVInsn->getOperand(0).getCImm(), - isUnsignedDIType(DD, DV.getType())); + addConstantValue(*VariableDie, DVInsn->getOperand(0).getCImm(), + DV.getType()); - DV.setDIE(VariableDie); return VariableDie; - } else { - // .. else use frame index. - int FI = DV.getFrameIndex(); - if (FI != ~0) { - unsigned FrameReg = 0; - const TargetFrameLowering *TFI = Asm->TM.getFrameLowering(); - int Offset = TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg); - MachineLocation Location(FrameReg, Offset); - addVariableAddress(DV, VariableDie, Location); - } } - DV.setDIE(VariableDie); + // .. else use frame index. + int FI = DV.getFrameIndex(); + if (FI != ~0) { + unsigned FrameReg = 0; + const TargetFrameLowering *TFI = Asm->TM.getFrameLowering(); + int Offset = TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg); + MachineLocation Location(FrameReg, Offset); + addVariableAddress(DV, *VariableDie, Location); + } + return VariableDie; } /// constructMemberDIE - Construct member DIE from DIDerivedType. -void CompileUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) { - DIE *MemberDie = createAndAddDIE(DT.getTag(), Buffer); +void DwarfUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) { + DIE &MemberDie = createAndAddDIE(DT.getTag(), Buffer); StringRef Name = DT.getName(); if (!Name.empty()) addString(MemberDie, dwarf::DW_AT_name, Name); @@ -1854,14 +1867,14 @@ void CompileUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) { // expression to extract appropriate offset from vtable. // BaseAddr = ObAddr + *((*ObAddr) - Offset) - DIEBlock *VBaseLocationDie = new (DIEValueAllocator) DIEBlock(); - addUInt(VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_dup); - addUInt(VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); - addUInt(VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); - addUInt(VBaseLocationDie, dwarf::DW_FORM_udata, DT.getOffsetInBits()); - addUInt(VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_minus); - addUInt(VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); - addUInt(VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); + DIELoc *VBaseLocationDie = new (DIEValueAllocator) DIELoc(); + addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_dup); + addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); + addUInt(*VBaseLocationDie, dwarf::DW_FORM_udata, DT.getOffsetInBits()); + addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_minus); + addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); addBlock(MemberDie, dwarf::DW_AT_data_member_location, VBaseLocationDie); } else { @@ -1870,10 +1883,9 @@ void CompileUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) { uint64_t OffsetInBytes; if (Size != FieldSize) { - // Handle bitfield. - addUInt(MemberDie, dwarf::DW_AT_byte_size, None, - getBaseTypeSize(DD, DT) >> 3); - addUInt(MemberDie, dwarf::DW_AT_bit_size, None, DT.getSizeInBits()); + // Handle bitfield, assume bytes are 8 bits. + addUInt(MemberDie, dwarf::DW_AT_byte_size, None, FieldSize/8); + addUInt(MemberDie, dwarf::DW_AT_bit_size, None, Size); uint64_t Offset = DT.getOffsetInBits(); uint64_t AlignMask = ~(DT.getAlignInBits() - 1); @@ -1886,7 +1898,7 @@ void CompileUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) { Offset = FieldSize - (Offset + Size); addUInt(MemberDie, dwarf::DW_AT_bit_offset, None, Offset); - // Here WD_AT_data_member_location points to the anonymous + // Here DW_AT_data_member_location points to the anonymous // field that includes this bit field. OffsetInBytes = FieldOffset >> 3; } else @@ -1894,9 +1906,9 @@ void CompileUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) { OffsetInBytes = DT.getOffsetInBits() >> 3; if (DD->getDwarfVersion() <= 2) { - DIEBlock *MemLocationDie = new (DIEValueAllocator) DIEBlock(); - addUInt(MemLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); - addUInt(MemLocationDie, dwarf::DW_FORM_udata, OffsetInBytes); + DIELoc *MemLocationDie = new (DIEValueAllocator) DIELoc(); + addUInt(*MemLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + addUInt(*MemLocationDie, dwarf::DW_FORM_udata, OffsetInBytes); addBlock(MemberDie, dwarf::DW_AT_data_member_location, MemLocationDie); } else addUInt(MemberDie, dwarf::DW_AT_data_member_location, None, @@ -1920,17 +1932,17 @@ void CompileUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) { // Objective-C properties. if (MDNode *PNode = DT.getObjCProperty()) if (DIEEntry *PropertyDie = getDIEEntry(PNode)) - MemberDie->addValue(dwarf::DW_AT_APPLE_property, dwarf::DW_FORM_ref4, - PropertyDie); + MemberDie.addValue(dwarf::DW_AT_APPLE_property, dwarf::DW_FORM_ref4, + PropertyDie); if (DT.isArtificial()) addFlag(MemberDie, dwarf::DW_AT_artificial); } /// getOrCreateStaticMemberDIE - Create new DIE for C++ static member. -DIE *CompileUnit::getOrCreateStaticMemberDIE(DIDerivedType DT) { +DIE *DwarfUnit::getOrCreateStaticMemberDIE(DIDerivedType DT) { if (!DT.Verify()) - return NULL; + return nullptr; // Construct the context before querying for the existence of the DIE in case // such construction creates the DIE. @@ -1938,11 +1950,10 @@ DIE *CompileUnit::getOrCreateStaticMemberDIE(DIDerivedType DT) { assert(dwarf::isType(ContextDIE->getTag()) && "Static member should belong to a type."); - DIE *StaticMemberDIE = getDIE(DT); - if (StaticMemberDIE) + if (DIE *StaticMemberDIE = getDIE(DT)) return StaticMemberDIE; - StaticMemberDIE = createAndAddDIE(DT.getTag(), *ContextDIE, DT); + DIE &StaticMemberDIE = createAndAddDIE(DT.getTag(), *ContextDIE, DT); DIType Ty = resolve(DT.getTypeDerivedFrom()); @@ -1965,20 +1976,95 @@ DIE *CompileUnit::getOrCreateStaticMemberDIE(DIDerivedType DT) { dwarf::DW_ACCESS_public); if (const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(DT.getConstant())) - addConstantValue(StaticMemberDIE, CI, isUnsignedDIType(DD, Ty)); + addConstantValue(StaticMemberDIE, CI, Ty); if (const ConstantFP *CFP = dyn_cast_or_null<ConstantFP>(DT.getConstant())) addConstantFPValue(StaticMemberDIE, CFP); - return StaticMemberDIE; + return &StaticMemberDIE; } -void CompileUnit::emitHeader(const MCSection *ASection, - const MCSymbol *ASectionSym) { +void DwarfUnit::emitHeader(const MCSymbol *ASectionSym) const { Asm->OutStreamer.AddComment("DWARF version number"); Asm->EmitInt16(DD->getDwarfVersion()); Asm->OutStreamer.AddComment("Offset Into Abbrev. Section"); - Asm->EmitSectionOffset(Asm->GetTempSymbol(ASection->getLabelBeginName()), - ASectionSym); + // We share one abbreviations table across all units so it's always at the + // start of the section. Use a relocatable offset where needed to ensure + // linking doesn't invalidate that offset. + if (ASectionSym) + Asm->EmitSectionOffset(ASectionSym, ASectionSym); + else + // Use a constant value when no symbol is provided. + Asm->EmitInt32(0); Asm->OutStreamer.AddComment("Address Size (in bytes)"); Asm->EmitInt8(Asm->getDataLayout().getPointerSize()); } + +void DwarfUnit::addRange(RangeSpan Range) { + // Only add a range for this unit if we're emitting full debug. + if (getCUNode().getEmissionKind() == DIBuilder::FullDebug) { + // If we have no current ranges just add the range and return, otherwise, + // check the current section and CU against the previous section and CU we + // emitted into and the subprogram was contained within. If these are the + // same then extend our current range, otherwise add this as a new range. + if (CURanges.size() == 0 || + this != DD->getPrevCU() || + Asm->getCurrentSection() != DD->getPrevSection()) { + CURanges.push_back(Range); + return; + } + + assert(&(CURanges.back().getEnd()->getSection()) == + &(Range.getEnd()->getSection()) && + "We can only append to a range in the same section!"); + CURanges.back().setEnd(Range.getEnd()); + } +} + +void DwarfCompileUnit::initStmtList(MCSymbol *DwarfLineSectionSym) { + // Define start line table label for each Compile Unit. + MCSymbol *LineTableStartSym = + Asm->OutStreamer.getDwarfLineTableSymbol(getUniqueID()); + + stmtListIndex = UnitDie.getValues().size(); + + // DW_AT_stmt_list is a offset of line number information for this + // compile unit in debug_line section. For split dwarf this is + // left in the skeleton CU and so not included. + // The line table entries are not always emitted in assembly, so it + // is not okay to use line_table_start here. + if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) + addSectionLabel(UnitDie, dwarf::DW_AT_stmt_list, LineTableStartSym); + else + addSectionDelta(UnitDie, dwarf::DW_AT_stmt_list, LineTableStartSym, + DwarfLineSectionSym); +} + +void DwarfCompileUnit::applyStmtList(DIE &D) { + D.addValue(dwarf::DW_AT_stmt_list, + UnitDie.getAbbrev().getData()[stmtListIndex].getForm(), + UnitDie.getValues()[stmtListIndex]); +} + +void DwarfTypeUnit::emitHeader(const MCSymbol *ASectionSym) const { + DwarfUnit::emitHeader(ASectionSym); + Asm->OutStreamer.AddComment("Type Signature"); + Asm->OutStreamer.EmitIntValue(TypeSignature, sizeof(TypeSignature)); + Asm->OutStreamer.AddComment("Type DIE Offset"); + // In a skeleton type unit there is no type DIE so emit a zero offset. + Asm->OutStreamer.EmitIntValue(Ty ? Ty->getOffset() : 0, + sizeof(Ty->getOffset())); +} + +void DwarfTypeUnit::initSection(const MCSection *Section) { + assert(!this->Section); + this->Section = Section; + // Since each type unit is contained in its own COMDAT section, the begin + // label and the section label are the same. Using the begin label emission in + // DwarfDebug to emit the section label as well is slightly subtle/sneaky, but + // the only other alternative of lazily constructing start-of-section labels + // and storing a mapping in DwarfDebug (or AsmPrinter). + this->SectionSym = this->LabelBegin = + Asm->GetTempSymbol(Section->getLabelBeginName(), getUniqueID()); + this->LabelEnd = + Asm->GetTempSymbol(Section->getLabelEndName(), getUniqueID()); +} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h new file mode 100644 index 0000000..b7b83b2 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h @@ -0,0 +1,588 @@ +//===-- llvm/CodeGen/DwarfUnit.h - Dwarf Compile Unit ---*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing dwarf compile unit. +// +//===----------------------------------------------------------------------===// + +#ifndef CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H +#define CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H + +#include "DIE.h" +#include "DwarfDebug.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/IR/DIBuilder.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCDwarf.h" + +namespace llvm { + +class MachineLocation; +class MachineOperand; +class ConstantInt; +class ConstantFP; +class DbgVariable; +class DwarfCompileUnit; + +// Data structure to hold a range for range lists. +class RangeSpan { +public: + RangeSpan(MCSymbol *S, MCSymbol *E) : Start(S), End(E) {} + const MCSymbol *getStart() const { return Start; } + const MCSymbol *getEnd() const { return End; } + void setEnd(const MCSymbol *E) { End = E; } + +private: + const MCSymbol *Start, *End; +}; + +class RangeSpanList { +private: + // Index for locating within the debug_range section this particular span. + MCSymbol *RangeSym; + // List of ranges. + SmallVector<RangeSpan, 2> Ranges; + +public: + RangeSpanList(MCSymbol *Sym) : RangeSym(Sym) {} + MCSymbol *getSym() const { return RangeSym; } + const SmallVectorImpl<RangeSpan> &getRanges() const { return Ranges; } + void addRange(RangeSpan Range) { Ranges.push_back(Range); } +}; + +//===----------------------------------------------------------------------===// +/// Unit - This dwarf writer support class manages information associated +/// with a source file. +class DwarfUnit { +protected: + /// UniqueID - a numeric ID unique among all CUs in the module + unsigned UniqueID; + + /// Node - MDNode for the compile unit. + DICompileUnit CUNode; + + /// Unit debug information entry. + DIE UnitDie; + + /// Offset of the UnitDie from beginning of debug info section. + unsigned DebugInfoOffset; + + /// Asm - Target of Dwarf emission. + AsmPrinter *Asm; + + // Holders for some common dwarf information. + DwarfDebug *DD; + DwarfFile *DU; + + /// IndexTyDie - An anonymous type for index type. Owned by UnitDie. + DIE *IndexTyDie; + + /// MDNodeToDieMap - Tracks the mapping of unit level debug information + /// variables to debug information entries. + DenseMap<const MDNode *, DIE *> MDNodeToDieMap; + + /// MDNodeToDIEEntryMap - Tracks the mapping of unit level debug information + /// descriptors to debug information entries using a DIEEntry proxy. + DenseMap<const MDNode *, DIEEntry *> MDNodeToDIEEntryMap; + + /// GlobalNames - A map of globally visible named entities for this unit. + StringMap<const DIE *> GlobalNames; + + /// GlobalTypes - A map of globally visible types for this unit. + StringMap<const DIE *> GlobalTypes; + + /// DIEBlocks - A list of all the DIEBlocks in use. + std::vector<DIEBlock *> DIEBlocks; + + /// DIELocs - A list of all the DIELocs in use. + std::vector<DIELoc *> DIELocs; + + /// ContainingTypeMap - This map is used to keep track of subprogram DIEs that + /// need DW_AT_containing_type attribute. This attribute points to a DIE that + /// corresponds to the MDNode mapped with the subprogram DIE. + DenseMap<DIE *, const MDNode *> ContainingTypeMap; + + // List of ranges for a given compile unit. + SmallVector<RangeSpan, 1> CURanges; + + // List of range lists for a given compile unit, separate from the ranges for + // the CU itself. + SmallVector<RangeSpanList, 1> CURangeLists; + + // DIEValueAllocator - All DIEValues are allocated through this allocator. + BumpPtrAllocator DIEValueAllocator; + + // DIEIntegerOne - A preallocated DIEValue because 1 is used frequently. + DIEInteger *DIEIntegerOne; + + /// The section this unit will be emitted in. + const MCSection *Section; + + /// A label at the start of the non-dwo section related to this unit. + MCSymbol *SectionSym; + + /// The start of the unit within its section. + MCSymbol *LabelBegin; + + /// The end of the unit within its section. + MCSymbol *LabelEnd; + + /// Skeleton unit associated with this unit. + DwarfUnit *Skeleton; + + DwarfUnit(unsigned UID, dwarf::Tag, DICompileUnit CU, AsmPrinter *A, + DwarfDebug *DW, DwarfFile *DWU); + +public: + virtual ~DwarfUnit(); + + /// Set the skeleton unit associated with this unit. + void setSkeleton(DwarfUnit &Skel) { Skeleton = &Skel; } + + /// Get the skeleton unit associated with this unit. + DwarfUnit *getSkeleton() const { return Skeleton; } + + /// Pass in the SectionSym even though we could recreate it in every compile + /// unit (type units will have actually distinct symbols once they're in + /// comdat sections). + void initSection(const MCSection *Section, MCSymbol *SectionSym) { + assert(!this->Section); + this->Section = Section; + this->SectionSym = SectionSym; + this->LabelBegin = + Asm->GetTempSymbol(Section->getLabelBeginName(), getUniqueID()); + this->LabelEnd = + Asm->GetTempSymbol(Section->getLabelEndName(), getUniqueID()); + } + + const MCSection *getSection() const { + assert(Section); + return Section; + } + + /// If there's a skeleton then return the section symbol for the skeleton + /// unit, otherwise return the section symbol for this unit. + MCSymbol *getLocalSectionSym() const { + if (Skeleton) + return Skeleton->getSectionSym(); + return getSectionSym(); + } + + MCSymbol *getSectionSym() const { + assert(Section); + return SectionSym; + } + + /// If there's a skeleton then return the begin label for the skeleton unit, + /// otherwise return the local label for this unit. + MCSymbol *getLocalLabelBegin() const { + if (Skeleton) + return Skeleton->getLabelBegin(); + return getLabelBegin(); + } + + MCSymbol *getLabelBegin() const { + assert(Section); + return LabelBegin; + } + + MCSymbol *getLabelEnd() const { + assert(Section); + return LabelEnd; + } + + // Accessors. + unsigned getUniqueID() const { return UniqueID; } + uint16_t getLanguage() const { return CUNode.getLanguage(); } + DICompileUnit getCUNode() const { return CUNode; } + DIE &getUnitDie() { return UnitDie; } + const StringMap<const DIE *> &getGlobalNames() const { return GlobalNames; } + const StringMap<const DIE *> &getGlobalTypes() const { return GlobalTypes; } + + unsigned getDebugInfoOffset() const { return DebugInfoOffset; } + void setDebugInfoOffset(unsigned DbgInfoOff) { DebugInfoOffset = DbgInfoOff; } + + /// hasContent - Return true if this compile unit has something to write out. + bool hasContent() const { return !UnitDie.getChildren().empty(); } + + /// addRange - Add an address range to the list of ranges for this unit. + void addRange(RangeSpan Range); + + /// getRanges - Get the list of ranges for this unit. + const SmallVectorImpl<RangeSpan> &getRanges() const { return CURanges; } + SmallVectorImpl<RangeSpan> &getRanges() { return CURanges; } + + /// addRangeList - Add an address range list to the list of range lists. + void addRangeList(RangeSpanList Ranges) { CURangeLists.push_back(Ranges); } + + /// getRangeLists - Get the vector of range lists. + const SmallVectorImpl<RangeSpanList> &getRangeLists() const { + return CURangeLists; + } + SmallVectorImpl<RangeSpanList> &getRangeLists() { return CURangeLists; } + + /// getParentContextString - Get a string containing the language specific + /// context for a global name. + std::string getParentContextString(DIScope Context) const; + + /// addGlobalName - Add a new global entity to the compile unit. + /// + void addGlobalName(StringRef Name, DIE &Die, DIScope Context); + + /// addAccelNamespace - Add a new name to the namespace accelerator table. + void addAccelNamespace(StringRef Name, const DIE &Die); + + /// getDIE - Returns the debug information entry map slot for the + /// specified debug variable. We delegate the request to DwarfDebug + /// when the MDNode can be part of the type system, since DIEs for + /// the type system can be shared across CUs and the mappings are + /// kept in DwarfDebug. + DIE *getDIE(DIDescriptor D) const; + + /// getDIELoc - Returns a fresh newly allocated DIELoc. + DIELoc *getDIELoc() { return new (DIEValueAllocator) DIELoc(); } + + /// insertDIE - Insert DIE into the map. We delegate the request to DwarfDebug + /// when the MDNode can be part of the type system, since DIEs for + /// the type system can be shared across CUs and the mappings are + /// kept in DwarfDebug. + void insertDIE(DIDescriptor Desc, DIE *D); + + /// addFlag - Add a flag that is true to the DIE. + void addFlag(DIE &Die, dwarf::Attribute Attribute); + + /// addUInt - Add an unsigned integer attribute data and value. + void addUInt(DIE &Die, dwarf::Attribute Attribute, Optional<dwarf::Form> Form, + uint64_t Integer); + + void addUInt(DIE &Block, dwarf::Form Form, uint64_t Integer); + + /// addSInt - Add an signed integer attribute data and value. + void addSInt(DIE &Die, dwarf::Attribute Attribute, Optional<dwarf::Form> Form, + int64_t Integer); + + void addSInt(DIELoc &Die, Optional<dwarf::Form> Form, int64_t Integer); + + /// addString - Add a string attribute data and value. + void addString(DIE &Die, dwarf::Attribute Attribute, const StringRef Str); + + /// addLocalString - Add a string attribute data and value. + void addLocalString(DIE &Die, dwarf::Attribute Attribute, + const StringRef Str); + + /// addExpr - Add a Dwarf expression attribute data and value. + void addExpr(DIELoc &Die, dwarf::Form Form, const MCExpr *Expr); + + /// addLabel - Add a Dwarf label attribute data and value. + void addLabel(DIE &Die, dwarf::Attribute Attribute, dwarf::Form Form, + const MCSymbol *Label); + + void addLabel(DIELoc &Die, dwarf::Form Form, const MCSymbol *Label); + + /// addLocationList - Add a Dwarf loclistptr attribute data and value. + void addLocationList(DIE &Die, dwarf::Attribute Attribute, unsigned Index); + + /// addSectionLabel - Add a Dwarf section label attribute data and value. + /// + void addSectionLabel(DIE &Die, dwarf::Attribute Attribute, + const MCSymbol *Label); + + /// addSectionOffset - Add an offset into a section attribute data and value. + /// + void addSectionOffset(DIE &Die, dwarf::Attribute Attribute, uint64_t Integer); + + /// addOpAddress - Add a dwarf op address data and value using the + /// form given and an op of either DW_FORM_addr or DW_FORM_GNU_addr_index. + void addOpAddress(DIELoc &Die, const MCSymbol *Label); + + /// addSectionDelta - Add a label delta attribute data and value. + void addSectionDelta(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Hi, + const MCSymbol *Lo); + + /// addLabelDelta - Add a label delta attribute data and value. + void addLabelDelta(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Hi, + const MCSymbol *Lo); + + /// addDIEEntry - Add a DIE attribute data and value. + void addDIEEntry(DIE &Die, dwarf::Attribute Attribute, DIE &Entry); + + /// addDIEEntry - Add a DIE attribute data and value. + void addDIEEntry(DIE &Die, dwarf::Attribute Attribute, DIEEntry *Entry); + + void addDIETypeSignature(DIE &Die, const DwarfTypeUnit &Type); + + /// addBlock - Add block data. + void addBlock(DIE &Die, dwarf::Attribute Attribute, DIELoc *Block); + + /// addBlock - Add block data. + void addBlock(DIE &Die, dwarf::Attribute Attribute, DIEBlock *Block); + + /// addSourceLine - Add location information to specified debug information + /// entry. + void addSourceLine(DIE &Die, unsigned Line, StringRef File, + StringRef Directory); + void addSourceLine(DIE &Die, DIVariable V); + void addSourceLine(DIE &Die, DIGlobalVariable G); + void addSourceLine(DIE &Die, DISubprogram SP); + void addSourceLine(DIE &Die, DIType Ty); + void addSourceLine(DIE &Die, DINameSpace NS); + void addSourceLine(DIE &Die, DIObjCProperty Ty); + + /// addAddress - Add an address attribute to a die based on the location + /// provided. + void addAddress(DIE &Die, dwarf::Attribute Attribute, + const MachineLocation &Location, bool Indirect = false); + + /// addConstantValue - Add constant value entry in variable DIE. + void addConstantValue(DIE &Die, const MachineOperand &MO, DIType Ty); + void addConstantValue(DIE &Die, const ConstantInt *CI, DIType Ty); + void addConstantValue(DIE &Die, const APInt &Val, DIType Ty); + void addConstantValue(DIE &Die, const APInt &Val, bool Unsigned); + void addConstantValue(DIE &Die, bool Unsigned, uint64_t Val); + + /// addConstantFPValue - Add constant value entry in variable DIE. + void addConstantFPValue(DIE &Die, const MachineOperand &MO); + void addConstantFPValue(DIE &Die, const ConstantFP *CFP); + + /// addTemplateParams - Add template parameters in buffer. + void addTemplateParams(DIE &Buffer, DIArray TParams); + + /// addRegisterOp - Add register operand. + void addRegisterOp(DIELoc &TheDie, unsigned Reg); + + /// addRegisterOffset - Add register offset. + void addRegisterOffset(DIELoc &TheDie, unsigned Reg, int64_t Offset); + + /// addComplexAddress - Start with the address based on the location provided, + /// and generate the DWARF information necessary to find the actual variable + /// (navigating the extra location information encoded in the type) based on + /// the starting location. Add the DWARF information to the die. + void addComplexAddress(const DbgVariable &DV, DIE &Die, + dwarf::Attribute Attribute, + const MachineLocation &Location); + + // FIXME: Should be reformulated in terms of addComplexAddress. + /// addBlockByrefAddress - Start with the address based on the location + /// provided, and generate the DWARF information necessary to find the + /// actual Block variable (navigating the Block struct) based on the + /// starting location. Add the DWARF information to the die. Obsolete, + /// please use addComplexAddress instead. + void addBlockByrefAddress(const DbgVariable &DV, DIE &Die, + dwarf::Attribute Attribute, + const MachineLocation &Location); + + /// addVariableAddress - Add DW_AT_location attribute for a + /// DbgVariable based on provided MachineLocation. + void addVariableAddress(const DbgVariable &DV, DIE &Die, + MachineLocation Location); + + /// addType - Add a new type attribute to the specified entity. This takes + /// and attribute parameter because DW_AT_friend attributes are also + /// type references. + void addType(DIE &Entity, DIType Ty, + dwarf::Attribute Attribute = dwarf::DW_AT_type); + + /// getOrCreateNameSpace - Create a DIE for DINameSpace. + DIE *getOrCreateNameSpace(DINameSpace NS); + + /// getOrCreateSubprogramDIE - Create new DIE using SP. + DIE *getOrCreateSubprogramDIE(DISubprogram SP); + + void applySubprogramAttributes(DISubprogram SP, DIE &SPDie); + void applySubprogramAttributesToDefinition(DISubprogram SP, DIE &SPDie); + void applyVariableAttributes(const DbgVariable &Var, DIE &VariableDie); + + /// getOrCreateTypeDIE - Find existing DIE or create new DIE for the + /// given DIType. + DIE *getOrCreateTypeDIE(const MDNode *N); + + /// getOrCreateContextDIE - Get context owner's DIE. + DIE *createTypeDIE(DICompositeType Ty); + + /// getOrCreateContextDIE - Get context owner's DIE. + DIE *getOrCreateContextDIE(DIScope Context); + + /// constructContainingTypeDIEs - Construct DIEs for types that contain + /// vtables. + void constructContainingTypeDIEs(); + + /// constructVariableDIE - Construct a DIE for the given DbgVariable. + std::unique_ptr<DIE> constructVariableDIE(DbgVariable &DV, + bool Abstract = false); + + /// constructSubprogramArguments - Construct function argument DIEs. + void constructSubprogramArguments(DIE &Buffer, DIArray Args); + + /// Create a DIE with the given Tag, add the DIE to its parent, and + /// call insertDIE if MD is not null. + DIE &createAndAddDIE(unsigned Tag, DIE &Parent, + DIDescriptor N = DIDescriptor()); + + /// Compute the size of a header for this unit, not including the initial + /// length field. + virtual unsigned getHeaderSize() const { + return sizeof(int16_t) + // DWARF version number + sizeof(int32_t) + // Offset Into Abbrev. Section + sizeof(int8_t); // Pointer Size (in bytes) + } + + /// Emit the header for this unit, not including the initial length field. + virtual void emitHeader(const MCSymbol *ASectionSym) const; + + virtual DwarfCompileUnit &getCU() = 0; + + /// constructTypeDIE - Construct type DIE from DICompositeType. + void constructTypeDIE(DIE &Buffer, DICompositeType CTy); + +protected: + /// getOrCreateStaticMemberDIE - Create new static data member DIE. + DIE *getOrCreateStaticMemberDIE(DIDerivedType DT); + + /// Look up the source ID with the given directory and source file names. If + /// none currently exists, create a new ID and insert it in the line table. + virtual unsigned getOrCreateSourceID(StringRef File, StringRef Directory) = 0; + +private: + /// \brief Construct a DIE for the given DbgVariable without initializing the + /// DbgVariable's DIE reference. + std::unique_ptr<DIE> constructVariableDIEImpl(const DbgVariable &DV, + bool Abstract); + + /// constructTypeDIE - Construct basic type die from DIBasicType. + void constructTypeDIE(DIE &Buffer, DIBasicType BTy); + + /// constructTypeDIE - Construct derived type die from DIDerivedType. + void constructTypeDIE(DIE &Buffer, DIDerivedType DTy); + + /// constructSubrangeDIE - Construct subrange DIE from DISubrange. + void constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy); + + /// constructArrayTypeDIE - Construct array type DIE from DICompositeType. + void constructArrayTypeDIE(DIE &Buffer, DICompositeType CTy); + + /// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator. + void constructEnumTypeDIE(DIE &Buffer, DICompositeType CTy); + + /// constructMemberDIE - Construct member DIE from DIDerivedType. + void constructMemberDIE(DIE &Buffer, DIDerivedType DT); + + /// constructTemplateTypeParameterDIE - Construct new DIE for the given + /// DITemplateTypeParameter. + void constructTemplateTypeParameterDIE(DIE &Buffer, + DITemplateTypeParameter TP); + + /// constructTemplateValueParameterDIE - Construct new DIE for the given + /// DITemplateValueParameter. + void constructTemplateValueParameterDIE(DIE &Buffer, + DITemplateValueParameter TVP); + + /// getLowerBoundDefault - Return the default lower bound for an array. If the + /// DWARF version doesn't handle the language, return -1. + int64_t getDefaultLowerBound() const; + + /// getDIEEntry - Returns the debug information entry for the specified + /// debug variable. + DIEEntry *getDIEEntry(const MDNode *N) const { + return MDNodeToDIEEntryMap.lookup(N); + } + + /// insertDIEEntry - Insert debug information entry into the map. + void insertDIEEntry(const MDNode *N, DIEEntry *E) { + MDNodeToDIEEntryMap.insert(std::make_pair(N, E)); + } + + // getIndexTyDie - Get an anonymous type for index type. + DIE *getIndexTyDie() { return IndexTyDie; } + + // setIndexTyDie - Set D as anonymous type for index which can be reused + // later. + void setIndexTyDie(DIE *D) { IndexTyDie = D; } + + /// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug + /// information entry. + DIEEntry *createDIEEntry(DIE &Entry); + + /// resolve - Look in the DwarfDebug map for the MDNode that + /// corresponds to the reference. + template <typename T> T resolve(DIRef<T> Ref) const { + return DD->resolve(Ref); + } + + /// If this is a named finished type then include it in the list of types for + /// the accelerator tables. + void updateAcceleratorTables(DIScope Context, DIType Ty, const DIE &TyDIE); +}; + +class DwarfCompileUnit : public DwarfUnit { + /// The attribute index of DW_AT_stmt_list in the compile unit DIE, avoiding + /// the need to search for it in applyStmtList. + unsigned stmtListIndex; + +public: + DwarfCompileUnit(unsigned UID, DICompileUnit Node, AsmPrinter *A, + DwarfDebug *DW, DwarfFile *DWU); + + void initStmtList(MCSymbol *DwarfLineSectionSym); + + /// Apply the DW_AT_stmt_list from this compile unit to the specified DIE. + void applyStmtList(DIE &D); + + /// createGlobalVariableDIE - create global variable DIE. + void createGlobalVariableDIE(DIGlobalVariable GV); + + /// addLabelAddress - Add a dwarf label attribute data and value using + /// either DW_FORM_addr or DW_FORM_GNU_addr_index. + void addLabelAddress(DIE &Die, dwarf::Attribute Attribute, + const MCSymbol *Label); + + /// addLocalLabelAddress - Add a dwarf label attribute data and value using + /// DW_FORM_addr only. + void addLocalLabelAddress(DIE &Die, dwarf::Attribute Attribute, + const MCSymbol *Label); + + DwarfCompileUnit &getCU() override { return *this; } + + unsigned getOrCreateSourceID(StringRef FileName, StringRef DirName) override; +}; + +class DwarfTypeUnit : public DwarfUnit { +private: + uint64_t TypeSignature; + const DIE *Ty; + DwarfCompileUnit &CU; + MCDwarfDwoLineTable *SplitLineTable; + +public: + DwarfTypeUnit(unsigned UID, DwarfCompileUnit &CU, AsmPrinter *A, + DwarfDebug *DW, DwarfFile *DWU, + MCDwarfDwoLineTable *SplitLineTable = nullptr); + + void setTypeSignature(uint64_t Signature) { TypeSignature = Signature; } + uint64_t getTypeSignature() const { return TypeSignature; } + void setType(const DIE *Ty) { this->Ty = Ty; } + + /// Emit the header for this unit, not including the initial length field. + void emitHeader(const MCSymbol *ASectionSym) const override; + unsigned getHeaderSize() const override { + return DwarfUnit::getHeaderSize() + sizeof(uint64_t) + // Type Signature + sizeof(uint32_t); // Type DIE Offset + } + void initSection(const MCSection *Section); + DwarfCompileUnit &getCU() override { return CU; } + +protected: + unsigned getOrCreateSourceID(StringRef File, StringRef Directory) override; +}; +} // end llvm namespace +#endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp index 7133458..73f62bf 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp @@ -1,4 +1,4 @@ -//===-- CodeGen/AsmPrinter/DwarfException.cpp - Dwarf Exception Impl ------===// +//===-- CodeGen/AsmPrinter/EHStreamer.cpp - Exception Directive Streamer --===// // // The LLVM Compiler Infrastructure // @@ -7,44 +7,31 @@ // //===----------------------------------------------------------------------===// // -// This file contains support for writing DWARF exception info into asm files. +// This file contains support for writing exception info into assembly files. // //===----------------------------------------------------------------------===// -#include "DwarfException.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/ADT/Twine.h" +#include "EHStreamer.h" #include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/Module.h" +#include "llvm/IR/Function.h" #include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Support/Dwarf.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/FormattedStream.h" -#include "llvm/Target/Mangler.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Support/LEB128.h" #include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetRegisterInfo.h" + using namespace llvm; -DwarfException::DwarfException(AsmPrinter *A) - : Asm(A), MMI(Asm->MMI) {} +EHStreamer::EHStreamer(AsmPrinter *A) : Asm(A), MMI(Asm->MMI) {} -DwarfException::~DwarfException() {} +EHStreamer::~EHStreamer() {} -/// SharedTypeIds - How many leading type ids two landing pads have in common. -unsigned DwarfException::SharedTypeIds(const LandingPadInfo *L, - const LandingPadInfo *R) { +/// How many leading type ids two landing pads have in common. +unsigned EHStreamer::sharedTypeIDs(const LandingPadInfo *L, + const LandingPadInfo *R) { const std::vector<int> &LIds = L->TypeIds, &RIds = R->TypeIds; unsigned LSize = LIds.size(), RSize = RIds.size(); unsigned MinSize = LSize < RSize ? LSize : RSize; @@ -57,23 +44,10 @@ unsigned DwarfException::SharedTypeIds(const LandingPadInfo *L, return Count; } -/// PadLT - Order landing pads lexicographically by type id. -bool DwarfException::PadLT(const LandingPadInfo *L, const LandingPadInfo *R) { - const std::vector<int> &LIds = L->TypeIds, &RIds = R->TypeIds; - unsigned LSize = LIds.size(), RSize = RIds.size(); - unsigned MinSize = LSize < RSize ? LSize : RSize; - - for (unsigned i = 0; i != MinSize; ++i) - if (LIds[i] != RIds[i]) - return LIds[i] < RIds[i]; - - return LSize < RSize; -} - -/// ComputeActionsTable - Compute the actions table and gather the first action -/// index for each landing pad site. -unsigned DwarfException:: -ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads, +/// Compute the actions table and gather the first action index for each landing +/// pad site. +unsigned EHStreamer:: +computeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads, SmallVectorImpl<ActionEntry> &Actions, SmallVectorImpl<unsigned> &FirstActions) { @@ -108,20 +82,20 @@ ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads, for (std::vector<unsigned>::const_iterator I = FilterIds.begin(), E = FilterIds.end(); I != E; ++I) { FilterOffsets.push_back(Offset); - Offset -= MCAsmInfo::getULEB128Size(*I); + Offset -= getULEB128Size(*I); } FirstActions.reserve(LandingPads.size()); int FirstAction = 0; unsigned SizeActions = 0; - const LandingPadInfo *PrevLPI = 0; + const LandingPadInfo *PrevLPI = nullptr; for (SmallVectorImpl<const LandingPadInfo *>::const_iterator I = LandingPads.begin(), E = LandingPads.end(); I != E; ++I) { const LandingPadInfo *LPI = *I; const std::vector<int> &TypeIds = LPI->TypeIds; - unsigned NumShared = PrevLPI ? SharedTypeIds(LPI, PrevLPI) : 0; + unsigned NumShared = PrevLPI ? sharedTypeIDs(LPI, PrevLPI) : 0; unsigned SizeSiteActions = 0; if (NumShared < TypeIds.size()) { @@ -132,14 +106,12 @@ ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads, unsigned SizePrevIds = PrevLPI->TypeIds.size(); assert(Actions.size()); PrevAction = Actions.size() - 1; - SizeAction = - MCAsmInfo::getSLEB128Size(Actions[PrevAction].NextAction) + - MCAsmInfo::getSLEB128Size(Actions[PrevAction].ValueForTypeID); + SizeAction = getSLEB128Size(Actions[PrevAction].NextAction) + + getSLEB128Size(Actions[PrevAction].ValueForTypeID); for (unsigned j = NumShared; j != SizePrevIds; ++j) { assert(PrevAction != (unsigned)-1 && "PrevAction is invalid!"); - SizeAction -= - MCAsmInfo::getSLEB128Size(Actions[PrevAction].ValueForTypeID); + SizeAction -= getSLEB128Size(Actions[PrevAction].ValueForTypeID); SizeAction += -Actions[PrevAction].NextAction; PrevAction = Actions[PrevAction].Previous; } @@ -150,10 +122,10 @@ ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads, int TypeID = TypeIds[J]; assert(-1 - TypeID < (int)FilterOffsets.size() && "Unknown filter id!"); int ValueForTypeID = TypeID < 0 ? FilterOffsets[-1 - TypeID] : TypeID; - unsigned SizeTypeID = MCAsmInfo::getSLEB128Size(ValueForTypeID); + unsigned SizeTypeID = getSLEB128Size(ValueForTypeID); int NextAction = SizeAction ? -(SizeAction + SizeTypeID) : 0; - SizeAction = SizeTypeID + MCAsmInfo::getSLEB128Size(NextAction); + SizeAction = SizeTypeID + getSLEB128Size(NextAction); SizeSiteActions += SizeAction; ActionEntry Action = { ValueForTypeID, NextAction, PrevAction }; @@ -181,9 +153,9 @@ ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads, return SizeActions; } -/// CallToNoUnwindFunction - Return `true' if this is a call to a function -/// marked `nounwind'. Return `false' otherwise. -bool DwarfException::CallToNoUnwindFunction(const MachineInstr *MI) { +/// Return `true' if this is a call to a function marked `nounwind'. Return +/// `false' otherwise. +bool EHStreamer::callToNoUnwindFunction(const MachineInstr *MI) { assert(MI->isCall() && "This should be a call instruction!"); bool MarkedNoUnwind = false; @@ -195,7 +167,7 @@ bool DwarfException::CallToNoUnwindFunction(const MachineInstr *MI) { if (!MO.isGlobal()) continue; const Function *F = dyn_cast<Function>(MO.getGlobal()); - if (F == 0) continue; + if (!F) continue; if (SawFunc) { // Be conservative. If we have more than one function operand for this @@ -215,20 +187,19 @@ bool DwarfException::CallToNoUnwindFunction(const MachineInstr *MI) { return MarkedNoUnwind; } -/// ComputeCallSiteTable - Compute the call-site table. The entry for an invoke -/// has a try-range containing the call, a non-zero landing pad, and an -/// appropriate action. The entry for an ordinary call has a try-range -/// containing the call and zero for the landing pad and the action. Calls -/// marked 'nounwind' have no entry and must not be contained in the try-range -/// of any entry - they form gaps in the table. Entries must be ordered by -/// try-range address. -void DwarfException:: -ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, +/// Compute the call-site table. The entry for an invoke has a try-range +/// containing the call, a non-zero landing pad, and an appropriate action. The +/// entry for an ordinary call has a try-range containing the call and zero for +/// the landing pad and the action. Calls marked 'nounwind' have no entry and +/// must not be contained in the try-range of any entry - they form gaps in the +/// table. Entries must be ordered by try-range address. +void EHStreamer:: +computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, const RangeMapType &PadMap, const SmallVectorImpl<const LandingPadInfo *> &LandingPads, const SmallVectorImpl<unsigned> &FirstActions) { // The end label of the previous invoke or nounwind try-range. - MCSymbol *LastLabel = 0; + MCSymbol *LastLabel = nullptr; // Whether there is a potentially throwing instruction (currently this means // an ordinary call) between the end of the previous try-range and now. @@ -238,18 +209,16 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, bool PreviousIsInvoke = false; // Visit all instructions in order of address. - for (MachineFunction::const_iterator I = Asm->MF->begin(), E = Asm->MF->end(); - I != E; ++I) { - for (MachineBasicBlock::const_iterator MI = I->begin(), E = I->end(); - MI != E; ++MI) { - if (!MI->isLabel()) { - if (MI->isCall()) - SawPotentiallyThrowing |= !CallToNoUnwindFunction(MI); + for (const auto &MBB : *Asm->MF) { + for (const auto &MI : MBB) { + if (!MI.isEHLabel()) { + if (MI.isCall()) + SawPotentiallyThrowing |= !callToNoUnwindFunction(&MI); continue; } // End of the previous try-range? - MCSymbol *BeginLabel = MI->getOperand(0).getMCSymbol(); + MCSymbol *BeginLabel = MI.getOperand(0).getMCSymbol(); if (BeginLabel == LastLabel) SawPotentiallyThrowing = false; @@ -269,7 +238,7 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, // create a call-site entry with no landing pad for the region between the // try-ranges. if (SawPotentiallyThrowing && Asm->MAI->isExceptionHandlingDwarf()) { - CallSiteEntry Site = { LastLabel, BeginLabel, 0, 0 }; + CallSiteEntry Site = { LastLabel, BeginLabel, nullptr, 0 }; CallSites.push_back(Site); PreviousIsInvoke = false; } @@ -319,12 +288,12 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, // function may throw, create a call-site entry with no landing pad for the // region following the try-range. if (SawPotentiallyThrowing && Asm->MAI->isExceptionHandlingDwarf()) { - CallSiteEntry Site = { LastLabel, 0, 0, 0 }; + CallSiteEntry Site = { LastLabel, nullptr, nullptr, 0 }; CallSites.push_back(Site); } } -/// EmitExceptionTable - Emit landing pads and actions. +/// Emit landing pads and actions. /// /// The general organization of the table is complex, but the basic concepts are /// easy. First there is a header which describes the location and organization @@ -344,7 +313,7 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, /// unwound and handling continues. /// 3. Type ID table contains references to all the C++ typeinfo for all /// catches in the function. This tables is reverse indexed base 1. -void DwarfException::EmitExceptionTable() { +void EHStreamer::emitExceptionTable() { const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos(); const std::vector<unsigned> &FilterIds = MMI->getFilterIds(); const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads(); @@ -357,13 +326,17 @@ void DwarfException::EmitExceptionTable() { for (unsigned i = 0, N = PadInfos.size(); i != N; ++i) LandingPads.push_back(&PadInfos[i]); - std::sort(LandingPads.begin(), LandingPads.end(), PadLT); + // Order landing pads lexicographically by type id. + std::sort(LandingPads.begin(), LandingPads.end(), + [](const LandingPadInfo *L, + const LandingPadInfo *R) { return L->TypeIds < R->TypeIds; }); // Compute the actions table and gather the first action index for each // landing pad site. SmallVector<ActionEntry, 32> Actions; SmallVector<unsigned, 64> FirstActions; - unsigned SizeActions=ComputeActionsTable(LandingPads, Actions, FirstActions); + unsigned SizeActions = + computeActionsTable(LandingPads, Actions, FirstActions); // Invokes and nounwind calls have entries in PadMap (due to being bracketed // by try-range labels when lowered). Ordinary calls do not, so appropriate @@ -381,7 +354,7 @@ void DwarfException::EmitExceptionTable() { // Compute the call-site table. SmallVector<CallSiteEntry, 64> CallSites; - ComputeCallSiteTable(CallSites, PadMap, LandingPads, FirstActions); + computeCallSiteTable(CallSites, PadMap, LandingPads, FirstActions); // Final tallies. @@ -401,9 +374,9 @@ void DwarfException::EmitExceptionTable() { } for (unsigned i = 0, e = CallSites.size(); i < e; ++i) { - CallSiteTableLength += MCAsmInfo::getULEB128Size(CallSites[i].Action); + CallSiteTableLength += getULEB128Size(CallSites[i].Action); if (IsSJLJ) - CallSiteTableLength += MCAsmInfo::getULEB128Size(i); + CallSiteTableLength += getULEB128Size(i); } // Type infos. @@ -488,15 +461,14 @@ void DwarfException::EmitExceptionTable() { // We chose another solution: don't output padding inside the table like GCC // does, instead output it before the table. unsigned SizeTypes = TypeInfos.size() * TypeFormatSize; - unsigned CallSiteTableLengthSize = - MCAsmInfo::getULEB128Size(CallSiteTableLength); + unsigned CallSiteTableLengthSize = getULEB128Size(CallSiteTableLength); unsigned TTypeBaseOffset = sizeof(int8_t) + // Call site format CallSiteTableLengthSize + // Call site table length size CallSiteTableLength + // Call site table length SizeActions + // Actions size SizeTypes; - unsigned TTypeBaseOffsetSize = MCAsmInfo::getULEB128Size(TTypeBaseOffset); + unsigned TTypeBaseOffsetSize = getULEB128Size(TTypeBaseOffset); unsigned TotalSize = sizeof(int8_t) + // LPStart format sizeof(int8_t) + // TType format @@ -583,10 +555,10 @@ void DwarfException::EmitExceptionTable() { Asm->GetTempSymbol("eh_func_begin", Asm->getFunctionNumber()); MCSymbol *BeginLabel = S.BeginLabel; - if (BeginLabel == 0) + if (!BeginLabel) BeginLabel = EHFuncBeginSym; MCSymbol *EndLabel = S.EndLabel; - if (EndLabel == 0) + if (!EndLabel) EndLabel = Asm->GetTempSymbol("eh_func_end", Asm->getFunctionNumber()); @@ -671,12 +643,12 @@ void DwarfException::EmitExceptionTable() { Asm->EmitSLEB128(Action.NextAction); } - EmitTypeInfos(TTypeEncoding); + emitTypeInfos(TTypeEncoding); Asm->EmitAlignment(2); } -void DwarfException::EmitTypeInfos(unsigned TTypeEncoding) { +void EHStreamer::emitTypeInfos(unsigned TTypeEncoding) { const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos(); const std::vector<unsigned> &FilterIds = MMI->getFilterIds(); @@ -717,20 +689,18 @@ void DwarfException::EmitTypeInfos(unsigned TTypeEncoding) { } } -/// EndModule - Emit all exception information that should come after the -/// content. -void DwarfException::EndModule() { +/// Emit all exception information that should come after the content. +void EHStreamer::endModule() { llvm_unreachable("Should be implemented"); } -/// BeginFunction - Gather pre-function exception information. Assumes it's -/// being emitted immediately after the function entry point. -void DwarfException::BeginFunction(const MachineFunction *MF) { +/// Gather pre-function exception information. Assumes it's being emitted +/// immediately after the function entry point. +void EHStreamer::beginFunction(const MachineFunction *MF) { llvm_unreachable("Should be implemented"); } -/// EndFunction - Gather and emit post-function exception information. -/// -void DwarfException::EndFunction() { +/// Gather and emit post-function exception information. +void EHStreamer::endFunction(const MachineFunction *) { llvm_unreachable("Should be implemented"); } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h new file mode 100644 index 0000000..2b6ba78 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h @@ -0,0 +1,138 @@ +//===-- EHStreamer.h - Exception Handling Directive Streamer ---*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing exception info into assembly files. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_ASMPRINTER_EHSTREAMER_H +#define LLVM_CODEGEN_ASMPRINTER_EHSTREAMER_H + +#include "AsmPrinterHandler.h" +#include "llvm/ADT/DenseMap.h" + +namespace llvm { +struct LandingPadInfo; +class MachineModuleInfo; +class MachineInstr; +class MachineFunction; +class AsmPrinter; + +template <typename T> +class SmallVectorImpl; + +/// Emits exception handling directives. +class EHStreamer : public AsmPrinterHandler { +protected: + /// Target of directive emission. + AsmPrinter *Asm; + + /// Collected machine module information. + MachineModuleInfo *MMI; + + /// How many leading type ids two landing pads have in common. + static unsigned sharedTypeIDs(const LandingPadInfo *L, + const LandingPadInfo *R); + + /// Structure holding a try-range and the associated landing pad. + struct PadRange { + // The index of the landing pad. + unsigned PadIndex; + // The index of the begin and end labels in the landing pad's label lists. + unsigned RangeIndex; + }; + + typedef DenseMap<MCSymbol *, PadRange> RangeMapType; + + /// Structure describing an entry in the actions table. + struct ActionEntry { + int ValueForTypeID; // The value to write - may not be equal to the type id. + int NextAction; + unsigned Previous; + }; + + /// Structure describing an entry in the call-site table. + struct CallSiteEntry { + // The 'try-range' is BeginLabel .. EndLabel. + MCSymbol *BeginLabel; // zero indicates the start of the function. + MCSymbol *EndLabel; // zero indicates the end of the function. + + // The landing pad starts at PadLabel. + MCSymbol *PadLabel; // zero indicates that there is no landing pad. + unsigned Action; + }; + + /// Compute the actions table and gather the first action index for each + /// landing pad site. + unsigned computeActionsTable(const SmallVectorImpl<const LandingPadInfo*>&LPs, + SmallVectorImpl<ActionEntry> &Actions, + SmallVectorImpl<unsigned> &FirstActions); + + /// Return `true' if this is a call to a function marked `nounwind'. Return + /// `false' otherwise. + bool callToNoUnwindFunction(const MachineInstr *MI); + + /// Compute the call-site table. The entry for an invoke has a try-range + /// containing the call, a non-zero landing pad and an appropriate action. + /// The entry for an ordinary call has a try-range containing the call and + /// zero for the landing pad and the action. Calls marked 'nounwind' have + /// no entry and must not be contained in the try-range of any entry - they + /// form gaps in the table. Entries must be ordered by try-range address. + + void computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, + const RangeMapType &PadMap, + const SmallVectorImpl<const LandingPadInfo *> &LPs, + const SmallVectorImpl<unsigned> &FirstActions); + + /// Emit landing pads and actions. + /// + /// The general organization of the table is complex, but the basic concepts + /// are easy. First there is a header which describes the location and + /// organization of the three components that follow. + /// 1. The landing pad site information describes the range of code covered + /// by the try. In our case it's an accumulation of the ranges covered + /// by the invokes in the try. There is also a reference to the landing + /// pad that handles the exception once processed. Finally an index into + /// the actions table. + /// 2. The action table, in our case, is composed of pairs of type ids + /// and next action offset. Starting with the action index from the + /// landing pad site, each type Id is checked for a match to the current + /// exception. If it matches then the exception and type id are passed + /// on to the landing pad. Otherwise the next action is looked up. This + /// chain is terminated with a next action of zero. If no type id is + /// found the frame is unwound and handling continues. + /// 3. Type id table contains references to all the C++ typeinfo for all + /// catches in the function. This tables is reversed indexed base 1. + void emitExceptionTable(); + + virtual void emitTypeInfos(unsigned TTypeEncoding); + +public: + EHStreamer(AsmPrinter *A); + virtual ~EHStreamer(); + + /// Emit all exception information that should come after the content. + void endModule() override; + + /// Gather pre-function exception information. Assumes being emitted + /// immediately after the function entry point. + void beginFunction(const MachineFunction *MF) override; + + /// Gather and emit post-function exception information. + void endFunction(const MachineFunction *) override; + + // Unused. + void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {} + void beginInstruction(const MachineInstr *MI) override {} + void endInstruction() override {} +}; +} + +#endif + diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp index a8fb66d..bfcbe6b 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp @@ -14,8 +14,8 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/GCs.h" #include "llvm/CodeGen/GCMetadataPrinter.h" +#include "llvm/CodeGen/GCs.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instruction.h" @@ -35,8 +35,8 @@ namespace { class ErlangGCPrinter : public GCMetadataPrinter { public: - void beginAssembly(AsmPrinter &AP); - void finishAssembly(AsmPrinter &AP); + void beginAssembly(AsmPrinter &AP) override; + void finishAssembly(AsmPrinter &AP) override; }; } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp index 98177c0..5a9ecd7 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp @@ -16,6 +16,7 @@ #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/GCMetadataPrinter.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/Mangler.h" #include "llvm/IR/Module.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" @@ -23,7 +24,6 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" -#include "llvm/Target/Mangler.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include <cctype> @@ -33,8 +33,8 @@ namespace { class OcamlGCMetadataPrinter : public GCMetadataPrinter { public: - void beginAssembly(AsmPrinter &AP); - void finishAssembly(AsmPrinter &AP); + void beginAssembly(AsmPrinter &AP) override; + void finishAssembly(AsmPrinter &AP) override; }; } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/Win64Exception.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/Win64Exception.cpp index 1561012..81285d5 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/Win64Exception.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/Win64Exception.cpp @@ -20,6 +20,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/Mangler.h" #include "llvm/IR/Module.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" @@ -30,7 +31,6 @@ #include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" -#include "llvm/Target/Mangler.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetOptions.h" @@ -38,20 +38,19 @@ using namespace llvm; Win64Exception::Win64Exception(AsmPrinter *A) - : DwarfException(A), - shouldEmitPersonality(false), shouldEmitLSDA(false), shouldEmitMoves(false) - {} + : EHStreamer(A), shouldEmitPersonality(false), shouldEmitLSDA(false), + shouldEmitMoves(false) {} Win64Exception::~Win64Exception() {} -/// EndModule - Emit all exception information that should come after the +/// endModule - Emit all exception information that should come after the /// content. -void Win64Exception::EndModule() { +void Win64Exception::endModule() { } -/// BeginFunction - Gather pre-function exception information. Assumes it's +/// beginFunction - Gather pre-function exception information. Assumes it's /// being emitted immediately after the function entry point. -void Win64Exception::BeginFunction(const MachineFunction *MF) { +void Win64Exception::beginFunction(const MachineFunction *MF) { shouldEmitMoves = shouldEmitPersonality = shouldEmitLSDA = false; // If any landing pads survive, we need an EH table. @@ -73,22 +72,22 @@ void Win64Exception::BeginFunction(const MachineFunction *MF) { if (!shouldEmitPersonality && !shouldEmitMoves) return; - Asm->OutStreamer.EmitWin64EHStartProc(Asm->CurrentFnSym); + Asm->OutStreamer.EmitWinCFIStartProc(Asm->CurrentFnSym); if (!shouldEmitPersonality) return; - MCSymbol *GCCHandlerSym = - Asm->GetExternalSymbolSymbol("_GCC_specific_handler"); - Asm->OutStreamer.EmitWin64EHHandler(GCCHandlerSym, true, true); + const MCSymbol *PersHandlerSym = + TLOF.getCFIPersonalitySymbol(Per, *Asm->Mang, Asm->TM, MMI); + Asm->OutStreamer.EmitWinEHHandler(PersHandlerSym, true, true); Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin", Asm->getFunctionNumber())); } -/// EndFunction - Gather and emit post-function exception information. +/// endFunction - Gather and emit post-function exception information. /// -void Win64Exception::EndFunction() { +void Win64Exception::endFunction(const MachineFunction *) { if (!shouldEmitPersonality && !shouldEmitMoves) return; @@ -99,16 +98,10 @@ void Win64Exception::EndFunction() { MMI->TidyLandingPads(); if (shouldEmitPersonality) { - const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); - const Function *Per = MMI->getPersonalities()[MMI->getPersonalityIndex()]; - const MCSymbol *Sym = TLOF.getCFIPersonalitySymbol(Per, Asm->Mang, MMI); - Asm->OutStreamer.PushSection(); - Asm->OutStreamer.EmitWin64EHHandlerData(); - Asm->OutStreamer.EmitValue(MCSymbolRefExpr::Create(Sym, Asm->OutContext), - 4); - EmitExceptionTable(); + Asm->OutStreamer.EmitWinEHHandlerData(); + emitExceptionTable(); Asm->OutStreamer.PopSection(); } - Asm->OutStreamer.EmitWin64EHEndProc(); + Asm->OutStreamer.EmitWinCFIEndProc(); } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp new file mode 100644 index 0000000..6a5c431 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp @@ -0,0 +1,335 @@ +//===-- llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp --*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing line tables info into COFF files. +// +//===----------------------------------------------------------------------===// + +#include "WinCodeViewLineTables.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/COFF.h" + +namespace llvm { + +StringRef WinCodeViewLineTables::getFullFilepath(const MDNode *S) { + assert(S); + DIDescriptor D(S); + assert((D.isCompileUnit() || D.isFile() || D.isSubprogram() || + D.isLexicalBlockFile() || D.isLexicalBlock()) && + "Unexpected scope info"); + + DIScope Scope(S); + StringRef Dir = Scope.getDirectory(), + Filename = Scope.getFilename(); + char *&Result = DirAndFilenameToFilepathMap[std::make_pair(Dir, Filename)]; + if (Result) + return Result; + + // Clang emits directory and relative filename info into the IR, but CodeView + // operates on full paths. We could change Clang to emit full paths too, but + // that would increase the IR size and probably not needed for other users. + // For now, just concatenate and canonicalize the path here. + std::string Filepath; + if (Filename.find(':') == 1) + Filepath = Filename; + else + Filepath = (Dir + Twine("\\") + Filename).str(); + + // Canonicalize the path. We have to do it textually because we may no longer + // have access the file in the filesystem. + // First, replace all slashes with backslashes. + std::replace(Filepath.begin(), Filepath.end(), '/', '\\'); + + // Remove all "\.\" with "\". + size_t Cursor = 0; + while ((Cursor = Filepath.find("\\.\\", Cursor)) != std::string::npos) + Filepath.erase(Cursor, 2); + + // Replace all "\XXX\..\" with "\". Don't try too hard though as the original + // path should be well-formatted, e.g. start with a drive letter, etc. + Cursor = 0; + while ((Cursor = Filepath.find("\\..\\", Cursor)) != std::string::npos) { + // Something's wrong if the path starts with "\..\", abort. + if (Cursor == 0) + break; + + size_t PrevSlash = Filepath.rfind('\\', Cursor - 1); + if (PrevSlash == std::string::npos) + // Something's wrong, abort. + break; + + Filepath.erase(PrevSlash, Cursor + 3 - PrevSlash); + // The next ".." might be following the one we've just erased. + Cursor = PrevSlash; + } + + // Remove all duplicate backslashes. + Cursor = 0; + while ((Cursor = Filepath.find("\\\\", Cursor)) != std::string::npos) + Filepath.erase(Cursor, 1); + + Result = strdup(Filepath.c_str()); + return StringRef(Result); +} + +void WinCodeViewLineTables::maybeRecordLocation(DebugLoc DL, + const MachineFunction *MF) { + const MDNode *Scope = DL.getScope(MF->getFunction()->getContext()); + if (!Scope) + return; + StringRef Filename = getFullFilepath(Scope); + + // Skip this instruction if it has the same file:line as the previous one. + assert(CurFn); + if (!CurFn->Instrs.empty()) { + const InstrInfoTy &LastInstr = InstrInfo[CurFn->Instrs.back()]; + if (LastInstr.Filename == Filename && LastInstr.LineNumber == DL.getLine()) + return; + } + FileNameRegistry.add(Filename); + + MCSymbol *MCL = Asm->MMI->getContext().CreateTempSymbol(); + Asm->OutStreamer.EmitLabel(MCL); + CurFn->Instrs.push_back(MCL); + InstrInfo[MCL] = InstrInfoTy(Filename, DL.getLine()); +} + +WinCodeViewLineTables::WinCodeViewLineTables(AsmPrinter *AP) + : Asm(nullptr), CurFn(nullptr) { + MachineModuleInfo *MMI = AP->MMI; + + // If module doesn't have named metadata anchors or COFF debug section + // is not available, skip any debug info related stuff. + if (!MMI->getModule()->getNamedMetadata("llvm.dbg.cu") || + !AP->getObjFileLowering().getCOFFDebugSymbolsSection()) + return; + + // Tell MMI that we have debug info. + MMI->setDebugInfoAvailability(true); + Asm = AP; +} + +static void EmitLabelDiff(MCStreamer &Streamer, + const MCSymbol *From, const MCSymbol *To) { + MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; + MCContext &Context = Streamer.getContext(); + const MCExpr *FromRef = MCSymbolRefExpr::Create(From, Variant, Context), + *ToRef = MCSymbolRefExpr::Create(To, Variant, Context); + const MCExpr *AddrDelta = + MCBinaryExpr::Create(MCBinaryExpr::Sub, ToRef, FromRef, Context); + Streamer.EmitValue(AddrDelta, 4); +} + +void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) { + // For each function there is a separate subsection + // which holds the PC to file:line table. + const MCSymbol *Fn = Asm->getSymbol(GV); + assert(Fn); + + const FunctionInfo &FI = FnDebugInfo[GV]; + if (FI.Instrs.empty()) + return; + assert(FI.End && "Don't know where the function ends?"); + + // PCs/Instructions are grouped into segments sharing the same filename. + // Pre-calculate the lengths (in instructions) of these segments and store + // them in a map for convenience. Each index in the map is the sequential + // number of the respective instruction that starts a new segment. + DenseMap<size_t, size_t> FilenameSegmentLengths; + size_t LastSegmentEnd = 0; + StringRef PrevFilename = InstrInfo[FI.Instrs[0]].Filename; + for (size_t J = 1, F = FI.Instrs.size(); J != F; ++J) { + if (PrevFilename == InstrInfo[FI.Instrs[J]].Filename) + continue; + FilenameSegmentLengths[LastSegmentEnd] = J - LastSegmentEnd; + LastSegmentEnd = J; + PrevFilename = InstrInfo[FI.Instrs[J]].Filename; + } + FilenameSegmentLengths[LastSegmentEnd] = FI.Instrs.size() - LastSegmentEnd; + + // Emit the control code of the subsection followed by the payload size. + Asm->OutStreamer.AddComment( + "Linetable subsection for " + Twine(Fn->getName())); + Asm->EmitInt32(COFF::DEBUG_LINE_TABLE_SUBSECTION); + MCSymbol *SubsectionBegin = Asm->MMI->getContext().CreateTempSymbol(), + *SubsectionEnd = Asm->MMI->getContext().CreateTempSymbol(); + EmitLabelDiff(Asm->OutStreamer, SubsectionBegin, SubsectionEnd); + Asm->OutStreamer.EmitLabel(SubsectionBegin); + + // Identify the function this subsection is for. + Asm->OutStreamer.EmitCOFFSecRel32(Fn); + Asm->OutStreamer.EmitCOFFSectionIndex(Fn); + + // Length of the function's code, in bytes. + EmitLabelDiff(Asm->OutStreamer, Fn, FI.End); + + // PC-to-linenumber lookup table: + MCSymbol *FileSegmentEnd = nullptr; + for (size_t J = 0, F = FI.Instrs.size(); J != F; ++J) { + MCSymbol *Instr = FI.Instrs[J]; + assert(InstrInfo.count(Instr)); + + if (FilenameSegmentLengths.count(J)) { + // We came to a beginning of a new filename segment. + if (FileSegmentEnd) + Asm->OutStreamer.EmitLabel(FileSegmentEnd); + StringRef CurFilename = InstrInfo[FI.Instrs[J]].Filename; + assert(FileNameRegistry.Infos.count(CurFilename)); + size_t IndexInStringTable = + FileNameRegistry.Infos[CurFilename].FilenameID; + // Each segment starts with the offset of the filename + // in the string table. + Asm->OutStreamer.AddComment( + "Segment for file '" + Twine(CurFilename) + "' begins"); + MCSymbol *FileSegmentBegin = Asm->MMI->getContext().CreateTempSymbol(); + Asm->OutStreamer.EmitLabel(FileSegmentBegin); + Asm->EmitInt32(8 * IndexInStringTable); + + // Number of PC records in the lookup table. + size_t SegmentLength = FilenameSegmentLengths[J]; + Asm->EmitInt32(SegmentLength); + + // Full size of the segment for this filename, including the prev two + // records. + FileSegmentEnd = Asm->MMI->getContext().CreateTempSymbol(); + EmitLabelDiff(Asm->OutStreamer, FileSegmentBegin, FileSegmentEnd); + } + + // The first PC with the given linenumber and the linenumber itself. + EmitLabelDiff(Asm->OutStreamer, Fn, Instr); + Asm->EmitInt32(InstrInfo[Instr].LineNumber); + } + + if (FileSegmentEnd) + Asm->OutStreamer.EmitLabel(FileSegmentEnd); + Asm->OutStreamer.EmitLabel(SubsectionEnd); +} + +void WinCodeViewLineTables::endModule() { + if (FnDebugInfo.empty()) + return; + + assert(Asm != nullptr); + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getCOFFDebugSymbolsSection()); + Asm->EmitInt32(COFF::DEBUG_SECTION_MAGIC); + + // The COFF .debug$S section consists of several subsections, each starting + // with a 4-byte control code (e.g. 0xF1, 0xF2, etc) and then a 4-byte length + // of the payload followed by the payload itself. The subsections are 4-byte + // aligned. + + for (size_t I = 0, E = VisitedFunctions.size(); I != E; ++I) + emitDebugInfoForFunction(VisitedFunctions[I]); + + // This subsection holds a file index to offset in string table table. + Asm->OutStreamer.AddComment("File index to string table offset subsection"); + Asm->EmitInt32(COFF::DEBUG_INDEX_SUBSECTION); + size_t NumFilenames = FileNameRegistry.Infos.size(); + Asm->EmitInt32(8 * NumFilenames); + for (size_t I = 0, E = FileNameRegistry.Filenames.size(); I != E; ++I) { + StringRef Filename = FileNameRegistry.Filenames[I]; + // For each unique filename, just write it's offset in the string table. + Asm->EmitInt32(FileNameRegistry.Infos[Filename].StartOffset); + // The function name offset is not followed by any additional data. + Asm->EmitInt32(0); + } + + // This subsection holds the string table. + Asm->OutStreamer.AddComment("String table"); + Asm->EmitInt32(COFF::DEBUG_STRING_TABLE_SUBSECTION); + Asm->EmitInt32(FileNameRegistry.LastOffset); + // The payload starts with a null character. + Asm->EmitInt8(0); + + for (size_t I = 0, E = FileNameRegistry.Filenames.size(); I != E; ++I) { + // Just emit unique filenames one by one, separated by a null character. + Asm->OutStreamer.EmitBytes(FileNameRegistry.Filenames[I]); + Asm->EmitInt8(0); + } + + // No more subsections. Fill with zeros to align the end of the section by 4. + Asm->OutStreamer.EmitFill((-FileNameRegistry.LastOffset) % 4, 0); + + clear(); +} + +void WinCodeViewLineTables::beginFunction(const MachineFunction *MF) { + assert(!CurFn && "Can't process two functions at once!"); + + if (!Asm || !Asm->MMI->hasDebugInfo()) + return; + + const Function *GV = MF->getFunction(); + assert(FnDebugInfo.count(GV) == false); + VisitedFunctions.push_back(GV); + CurFn = &FnDebugInfo[GV]; + + // Find the end of the function prolog. + // FIXME: is there a simpler a way to do this? Can we just search + // for the first instruction of the function, not the last of the prolog? + DebugLoc PrologEndLoc; + bool EmptyPrologue = true; + for (const auto &MBB : *MF) { + if (!PrologEndLoc.isUnknown()) + break; + for (const auto &MI : MBB) { + if (MI.isDebugValue()) + continue; + + // First known non-DBG_VALUE and non-frame setup location marks + // the beginning of the function body. + // FIXME: do we need the first subcondition? + if (!MI.getFlag(MachineInstr::FrameSetup) && + (!MI.getDebugLoc().isUnknown())) { + PrologEndLoc = MI.getDebugLoc(); + break; + } + EmptyPrologue = false; + } + } + // Record beginning of function if we have a non-empty prologue. + if (!PrologEndLoc.isUnknown() && !EmptyPrologue) { + DebugLoc FnStartDL = + PrologEndLoc.getFnDebugLoc(MF->getFunction()->getContext()); + maybeRecordLocation(FnStartDL, MF); + } +} + +void WinCodeViewLineTables::endFunction(const MachineFunction *MF) { + if (!Asm || !CurFn) // We haven't created any debug info for this function. + return; + + const Function *GV = MF->getFunction(); + assert(FnDebugInfo.count(GV)); + assert(CurFn == &FnDebugInfo[GV]); + + if (CurFn->Instrs.empty()) { + FnDebugInfo.erase(GV); + VisitedFunctions.pop_back(); + } else { + // Define end label for subprogram. + MCSymbol *FunctionEndSym = Asm->OutStreamer.getContext().CreateTempSymbol(); + Asm->OutStreamer.EmitLabel(FunctionEndSym); + CurFn->End = FunctionEndSym; + } + CurFn = nullptr; +} + +void WinCodeViewLineTables::beginInstruction(const MachineInstr *MI) { + // Ignore DBG_VALUE locations and function prologue. + if (!Asm || MI->isDebugValue() || MI->getFlag(MachineInstr::FrameSetup)) + return; + DebugLoc DL = MI->getDebugLoc(); + if (DL == PrevInstLoc || DL.isUnknown()) + return; + maybeRecordLocation(DL, Asm->MF); +} +} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h b/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h new file mode 100644 index 0000000..0734d97 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h @@ -0,0 +1,144 @@ +//===-- llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h ----*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing line tables info into COFF files. +// +//===----------------------------------------------------------------------===// + +#ifndef CODEGEN_ASMPRINTER_WINCODEVIEWLINETABLES_H__ +#define CODEGEN_ASMPRINTER_WINCODEVIEWLINETABLES_H__ + +#include "AsmPrinterHandler.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/LexicalScopes.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/Target/TargetLoweringObjectFile.h" + +namespace llvm { +/// \brief Collects and handles line tables information in a CodeView format. +class WinCodeViewLineTables : public AsmPrinterHandler { + AsmPrinter *Asm; + DebugLoc PrevInstLoc; + + // For each function, store a vector of labels to its instructions, as well as + // to the end of the function. + struct FunctionInfo { + SmallVector<MCSymbol *, 10> Instrs; + MCSymbol *End; + FunctionInfo() : End(nullptr) {} + } *CurFn; + + typedef DenseMap<const Function *, FunctionInfo> FnDebugInfoTy; + FnDebugInfoTy FnDebugInfo; + // Store the functions we've visited in a vector so we can maintain a stable + // order while emitting subsections. + SmallVector<const Function *, 10> VisitedFunctions; + + // InstrInfoTy - Holds the Filename:LineNumber information for every + // instruction with a unique debug location. + struct InstrInfoTy { + StringRef Filename; + unsigned LineNumber; + + InstrInfoTy() : LineNumber(0) {} + + InstrInfoTy(StringRef Filename, unsigned LineNumber) + : Filename(Filename), LineNumber(LineNumber) {} + }; + DenseMap<MCSymbol *, InstrInfoTy> InstrInfo; + + // FileNameRegistry - Manages filenames observed while generating debug info + // by filtering out duplicates and bookkeeping the offsets in the string + // table to be generated. + struct FileNameRegistryTy { + SmallVector<StringRef, 10> Filenames; + struct PerFileInfo { + size_t FilenameID, StartOffset; + }; + StringMap<PerFileInfo> Infos; + + // The offset in the string table where we'll write the next unique + // filename. + size_t LastOffset; + + FileNameRegistryTy() { + clear(); + } + + // Add Filename to the registry, if it was not observed before. + void add(StringRef Filename) { + if (Infos.count(Filename)) + return; + size_t OldSize = Infos.size(); + Infos[Filename].FilenameID = OldSize; + Infos[Filename].StartOffset = LastOffset; + LastOffset += Filename.size() + 1; + Filenames.push_back(Filename); + } + + void clear() { + LastOffset = 1; + Infos.clear(); + Filenames.clear(); + } + } FileNameRegistry; + + typedef std::map<std::pair<StringRef, StringRef>, char *> + DirAndFilenameToFilepathMapTy; + DirAndFilenameToFilepathMapTy DirAndFilenameToFilepathMap; + StringRef getFullFilepath(const MDNode *S); + + void maybeRecordLocation(DebugLoc DL, const MachineFunction *MF); + + void clear() { + assert(CurFn == nullptr); + FileNameRegistry.clear(); + InstrInfo.clear(); + } + + void emitDebugInfoForFunction(const Function *GV); + +public: + WinCodeViewLineTables(AsmPrinter *Asm); + + ~WinCodeViewLineTables() { + for (DirAndFilenameToFilepathMapTy::iterator + I = DirAndFilenameToFilepathMap.begin(), + E = DirAndFilenameToFilepathMap.end(); + I != E; ++I) + free(I->second); + } + + void setSymbolSize(const llvm::MCSymbol *, uint64_t) override {} + + /// \brief Emit the COFF section that holds the line table information. + void endModule() override; + + /// \brief Gather pre-function debug information. + void beginFunction(const MachineFunction *MF) override; + + /// \brief Gather post-function debug information. + void endFunction(const MachineFunction *) override; + + /// \brief Process beginning of an instruction. + void beginInstruction(const MachineInstr *MI) override; + + /// \brief Process end of an instruction. + void endInstruction() override {} +}; +} // End of namespace llvm + +#endif diff --git a/contrib/llvm/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp b/contrib/llvm/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp new file mode 100644 index 0000000..421946d --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp @@ -0,0 +1,380 @@ +//===-- AtomicExpandLoadLinkedPass.cpp - Expand atomic instructions -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass (at IR level) to replace atomic instructions with +// appropriate (intrinsic-based) ldrex/strex loops. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "arm-atomic-expand" + +namespace { + class AtomicExpandLoadLinked : public FunctionPass { + const TargetMachine *TM; + public: + static char ID; // Pass identification, replacement for typeid + explicit AtomicExpandLoadLinked(const TargetMachine *TM = nullptr) + : FunctionPass(ID), TM(TM) { + initializeAtomicExpandLoadLinkedPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override; + bool expandAtomicInsts(Function &F); + + bool expandAtomicLoad(LoadInst *LI); + bool expandAtomicStore(StoreInst *LI); + bool expandAtomicRMW(AtomicRMWInst *AI); + bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI); + + AtomicOrdering insertLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord); + void insertTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord); + }; +} + +char AtomicExpandLoadLinked::ID = 0; +char &llvm::AtomicExpandLoadLinkedID = AtomicExpandLoadLinked::ID; +INITIALIZE_TM_PASS(AtomicExpandLoadLinked, "atomic-ll-sc", + "Expand Atomic calls in terms of load-linked & store-conditional", + false, false) + +FunctionPass *llvm::createAtomicExpandLoadLinkedPass(const TargetMachine *TM) { + return new AtomicExpandLoadLinked(TM); +} + +bool AtomicExpandLoadLinked::runOnFunction(Function &F) { + if (!TM || !TM->getSubtargetImpl()->enableAtomicExpandLoadLinked()) + return false; + + SmallVector<Instruction *, 1> AtomicInsts; + + // Changing control-flow while iterating through it is a bad idea, so gather a + // list of all atomic instructions before we start. + for (BasicBlock &BB : F) + for (Instruction &Inst : BB) { + if (isa<AtomicRMWInst>(&Inst) || isa<AtomicCmpXchgInst>(&Inst) || + (isa<LoadInst>(&Inst) && cast<LoadInst>(&Inst)->isAtomic()) || + (isa<StoreInst>(&Inst) && cast<StoreInst>(&Inst)->isAtomic())) + AtomicInsts.push_back(&Inst); + } + + bool MadeChange = false; + for (Instruction *Inst : AtomicInsts) { + if (!TM->getTargetLowering()->shouldExpandAtomicInIR(Inst)) + continue; + + if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(Inst)) + MadeChange |= expandAtomicRMW(AI); + else if (AtomicCmpXchgInst *CI = dyn_cast<AtomicCmpXchgInst>(Inst)) + MadeChange |= expandAtomicCmpXchg(CI); + else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) + MadeChange |= expandAtomicLoad(LI); + else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) + MadeChange |= expandAtomicStore(SI); + else + llvm_unreachable("Unknown atomic instruction"); + } + + return MadeChange; +} + +bool AtomicExpandLoadLinked::expandAtomicLoad(LoadInst *LI) { + // Load instructions don't actually need a leading fence, even in the + // SequentiallyConsistent case. + AtomicOrdering MemOpOrder = + TM->getTargetLowering()->getInsertFencesForAtomic() ? Monotonic + : LI->getOrdering(); + + // The only 64-bit load guaranteed to be single-copy atomic by the ARM ARM is + // an ldrexd (A3.5.3). + IRBuilder<> Builder(LI); + Value *Val = TM->getTargetLowering()->emitLoadLinked( + Builder, LI->getPointerOperand(), MemOpOrder); + + insertTrailingFence(Builder, LI->getOrdering()); + + LI->replaceAllUsesWith(Val); + LI->eraseFromParent(); + + return true; +} + +bool AtomicExpandLoadLinked::expandAtomicStore(StoreInst *SI) { + // The only atomic 64-bit store on ARM is an strexd that succeeds, which means + // we need a loop and the entire instruction is essentially an "atomicrmw + // xchg" that ignores the value loaded. + IRBuilder<> Builder(SI); + AtomicRMWInst *AI = + Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, SI->getPointerOperand(), + SI->getValueOperand(), SI->getOrdering()); + SI->eraseFromParent(); + + // Now we have an appropriate swap instruction, lower it as usual. + return expandAtomicRMW(AI); +} + +bool AtomicExpandLoadLinked::expandAtomicRMW(AtomicRMWInst *AI) { + AtomicOrdering Order = AI->getOrdering(); + Value *Addr = AI->getPointerOperand(); + BasicBlock *BB = AI->getParent(); + Function *F = BB->getParent(); + LLVMContext &Ctx = F->getContext(); + + // Given: atomicrmw some_op iN* %addr, iN %incr ordering + // + // The standard expansion we produce is: + // [...] + // fence? + // atomicrmw.start: + // %loaded = @load.linked(%addr) + // %new = some_op iN %loaded, %incr + // %stored = @store_conditional(%new, %addr) + // %try_again = icmp i32 ne %stored, 0 + // br i1 %try_again, label %loop, label %atomicrmw.end + // atomicrmw.end: + // fence? + // [...] + BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end"); + BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); + + // This grabs the DebugLoc from AI. + IRBuilder<> Builder(AI); + + // The split call above "helpfully" added a branch at the end of BB (to the + // wrong place), but we might want a fence too. It's easiest to just remove + // the branch entirely. + std::prev(BB->end())->eraseFromParent(); + Builder.SetInsertPoint(BB); + AtomicOrdering MemOpOrder = insertLeadingFence(Builder, Order); + Builder.CreateBr(LoopBB); + + // Start the main loop block now that we've taken care of the preliminaries. + Builder.SetInsertPoint(LoopBB); + Value *Loaded = + TM->getTargetLowering()->emitLoadLinked(Builder, Addr, MemOpOrder); + + Value *NewVal; + switch (AI->getOperation()) { + case AtomicRMWInst::Xchg: + NewVal = AI->getValOperand(); + break; + case AtomicRMWInst::Add: + NewVal = Builder.CreateAdd(Loaded, AI->getValOperand(), "new"); + break; + case AtomicRMWInst::Sub: + NewVal = Builder.CreateSub(Loaded, AI->getValOperand(), "new"); + break; + case AtomicRMWInst::And: + NewVal = Builder.CreateAnd(Loaded, AI->getValOperand(), "new"); + break; + case AtomicRMWInst::Nand: + NewVal = Builder.CreateNot(Builder.CreateAnd(Loaded, AI->getValOperand()), + "new"); + break; + case AtomicRMWInst::Or: + NewVal = Builder.CreateOr(Loaded, AI->getValOperand(), "new"); + break; + case AtomicRMWInst::Xor: + NewVal = Builder.CreateXor(Loaded, AI->getValOperand(), "new"); + break; + case AtomicRMWInst::Max: + NewVal = Builder.CreateICmpSGT(Loaded, AI->getValOperand()); + NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new"); + break; + case AtomicRMWInst::Min: + NewVal = Builder.CreateICmpSLE(Loaded, AI->getValOperand()); + NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new"); + break; + case AtomicRMWInst::UMax: + NewVal = Builder.CreateICmpUGT(Loaded, AI->getValOperand()); + NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new"); + break; + case AtomicRMWInst::UMin: + NewVal = Builder.CreateICmpULE(Loaded, AI->getValOperand()); + NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new"); + break; + default: + llvm_unreachable("Unknown atomic op"); + } + + Value *StoreSuccess = TM->getTargetLowering()->emitStoreConditional( + Builder, NewVal, Addr, MemOpOrder); + Value *TryAgain = Builder.CreateICmpNE( + StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain"); + Builder.CreateCondBr(TryAgain, LoopBB, ExitBB); + + Builder.SetInsertPoint(ExitBB, ExitBB->begin()); + insertTrailingFence(Builder, Order); + + AI->replaceAllUsesWith(Loaded); + AI->eraseFromParent(); + + return true; +} + +bool AtomicExpandLoadLinked::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { + AtomicOrdering SuccessOrder = CI->getSuccessOrdering(); + AtomicOrdering FailureOrder = CI->getFailureOrdering(); + Value *Addr = CI->getPointerOperand(); + BasicBlock *BB = CI->getParent(); + Function *F = BB->getParent(); + LLVMContext &Ctx = F->getContext(); + + // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord + // + // The full expansion we produce is: + // [...] + // fence? + // cmpxchg.start: + // %loaded = @load.linked(%addr) + // %should_store = icmp eq %loaded, %desired + // br i1 %should_store, label %cmpxchg.trystore, + // label %cmpxchg.failure + // cmpxchg.trystore: + // %stored = @store_conditional(%new, %addr) + // %success = icmp eq i32 %stored, 0 + // br i1 %success, label %cmpxchg.success, label %loop/%cmpxchg.failure + // cmpxchg.success: + // fence? + // br label %cmpxchg.end + // cmpxchg.failure: + // fence? + // br label %cmpxchg.end + // cmpxchg.end: + // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure] + // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0 + // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1 + // [...] + BasicBlock *ExitBB = BB->splitBasicBlock(CI, "cmpxchg.end"); + auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB); + auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, FailureBB); + auto TryStoreBB = BasicBlock::Create(Ctx, "cmpxchg.trystore", F, SuccessBB); + auto LoopBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, TryStoreBB); + + // This grabs the DebugLoc from CI + IRBuilder<> Builder(CI); + + // The split call above "helpfully" added a branch at the end of BB (to the + // wrong place), but we might want a fence too. It's easiest to just remove + // the branch entirely. + std::prev(BB->end())->eraseFromParent(); + Builder.SetInsertPoint(BB); + AtomicOrdering MemOpOrder = insertLeadingFence(Builder, SuccessOrder); + Builder.CreateBr(LoopBB); + + // Start the main loop block now that we've taken care of the preliminaries. + Builder.SetInsertPoint(LoopBB); + Value *Loaded = + TM->getTargetLowering()->emitLoadLinked(Builder, Addr, MemOpOrder); + Value *ShouldStore = + Builder.CreateICmpEQ(Loaded, CI->getCompareOperand(), "should_store"); + + // If the the cmpxchg doesn't actually need any ordering when it fails, we can + // jump straight past that fence instruction (if it exists). + Builder.CreateCondBr(ShouldStore, TryStoreBB, FailureBB); + + Builder.SetInsertPoint(TryStoreBB); + Value *StoreSuccess = TM->getTargetLowering()->emitStoreConditional( + Builder, CI->getNewValOperand(), Addr, MemOpOrder); + StoreSuccess = Builder.CreateICmpEQ( + StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success"); + Builder.CreateCondBr(StoreSuccess, SuccessBB, + CI->isWeak() ? FailureBB : LoopBB); + + // Make sure later instructions don't get reordered with a fence if necessary. + Builder.SetInsertPoint(SuccessBB); + insertTrailingFence(Builder, SuccessOrder); + Builder.CreateBr(ExitBB); + + Builder.SetInsertPoint(FailureBB); + insertTrailingFence(Builder, FailureOrder); + Builder.CreateBr(ExitBB); + + // Finally, we have control-flow based knowledge of whether the cmpxchg + // succeeded or not. We expose this to later passes by converting any + // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate PHI. + + // Setup the builder so we can create any PHIs we need. + Builder.SetInsertPoint(ExitBB, ExitBB->begin()); + PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2); + Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB); + Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB); + + // Look for any users of the cmpxchg that are just comparing the loaded value + // against the desired one, and replace them with the CFG-derived version. + SmallVector<ExtractValueInst *, 2> PrunedInsts; + for (auto User : CI->users()) { + ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User); + if (!EV) + continue; + + assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 && + "weird extraction from { iN, i1 }"); + + if (EV->getIndices()[0] == 0) + EV->replaceAllUsesWith(Loaded); + else + EV->replaceAllUsesWith(Success); + + PrunedInsts.push_back(EV); + } + + // We can remove the instructions now we're no longer iterating through them. + for (auto EV : PrunedInsts) + EV->eraseFromParent(); + + if (!CI->use_empty()) { + // Some use of the full struct return that we don't understand has happened, + // so we've got to reconstruct it properly. + Value *Res; + Res = Builder.CreateInsertValue(UndefValue::get(CI->getType()), Loaded, 0); + Res = Builder.CreateInsertValue(Res, Success, 1); + + CI->replaceAllUsesWith(Res); + } + + CI->eraseFromParent(); + return true; +} + +AtomicOrdering AtomicExpandLoadLinked::insertLeadingFence(IRBuilder<> &Builder, + AtomicOrdering Ord) { + if (!TM->getTargetLowering()->getInsertFencesForAtomic()) + return Ord; + + if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent) + Builder.CreateFence(Release); + + // The exclusive operations don't need any barrier if we're adding separate + // fences. + return Monotonic; +} + +void AtomicExpandLoadLinked::insertTrailingFence(IRBuilder<> &Builder, + AtomicOrdering Ord) { + if (!TM->getTargetLowering()->getInsertFencesForAtomic()) + return; + + if (Ord == Acquire || Ord == AcquireRelease) + Builder.CreateFence(Acquire); + else if (Ord == SequentiallyConsistent) + Builder.CreateFence(SequentiallyConsistent); +} diff --git a/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp b/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp index 24aa1ab..b2737bf 100644 --- a/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp +++ b/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp @@ -15,27 +15,37 @@ /// //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "basictti" #include "llvm/CodeGen/Passes.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include <utility> - using namespace llvm; +static cl::opt<unsigned> +PartialUnrollingThreshold("partial-unrolling-threshold", cl::init(0), + cl::desc("Threshold for partial unrolling"), cl::Hidden); + +#define DEBUG_TYPE "basictti" + namespace { -class BasicTTI : public ImmutablePass, public TargetTransformInfo { +class BasicTTI final : public ImmutablePass, public TargetTransformInfo { const TargetMachine *TM; /// Estimate the overhead of scalarizing an instruction. Insert and Extract /// are set if the result needs to be inserted and/or extracted from vectors. unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; + /// Estimate the cost overhead of SK_Alternate shuffle. + unsigned getAltShuffleOverhead(Type *Ty) const; + const TargetLoweringBase *getTLI() const { return TM->getTargetLowering(); } public: - BasicTTI() : ImmutablePass(ID), TM(0) { + BasicTTI() : ImmutablePass(ID), TM(nullptr) { llvm_unreachable("This pass cannot be directly constructed"); } @@ -43,15 +53,11 @@ public: initializeBasicTTIPass(*PassRegistry::getPassRegistry()); } - virtual void initializePass() { + void initializePass() override { pushTTIStack(this); } - virtual void finalizePass() { - popTTIStack(); - } - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { TargetTransformInfo::getAnalysisUsage(AU); } @@ -59,61 +65,61 @@ public: static char ID; /// Provide necessary pointer adjustments for the two base classes. - virtual void *getAdjustedAnalysisPointer(const void *ID) { + void *getAdjustedAnalysisPointer(const void *ID) override { if (ID == &TargetTransformInfo::ID) return (TargetTransformInfo*)this; return this; } - virtual bool hasBranchDivergence() const; + bool hasBranchDivergence() const override; /// \name Scalar TTI Implementations /// @{ - virtual bool isLegalAddImmediate(int64_t imm) const; - virtual bool isLegalICmpImmediate(int64_t imm) const; - virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, - int64_t BaseOffset, bool HasBaseReg, - int64_t Scale) const; - virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, - int64_t BaseOffset, bool HasBaseReg, - int64_t Scale) const; - virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const; - virtual bool isTypeLegal(Type *Ty) const; - virtual unsigned getJumpBufAlignment() const; - virtual unsigned getJumpBufSize() const; - virtual bool shouldBuildLookupTables() const; - virtual bool haveFastSqrt(Type *Ty) const; - virtual void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const; + bool isLegalAddImmediate(int64_t imm) const override; + bool isLegalICmpImmediate(int64_t imm) const override; + bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, + int64_t BaseOffset, bool HasBaseReg, + int64_t Scale) const override; + int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, + int64_t BaseOffset, bool HasBaseReg, + int64_t Scale) const override; + bool isTruncateFree(Type *Ty1, Type *Ty2) const override; + bool isTypeLegal(Type *Ty) const override; + unsigned getJumpBufAlignment() const override; + unsigned getJumpBufSize() const override; + bool shouldBuildLookupTables() const override; + bool haveFastSqrt(Type *Ty) const override; + void getUnrollingPreferences(Loop *L, + UnrollingPreferences &UP) const override; /// @} /// \name Vector TTI Implementations /// @{ - virtual unsigned getNumberOfRegisters(bool Vector) const; - virtual unsigned getMaximumUnrollFactor() const; - virtual unsigned getRegisterBitWidth(bool Vector) const; - virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, - OperandValueKind, - OperandValueKind) const; - virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, - int Index, Type *SubTp) const; - virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst, - Type *Src) const; - virtual unsigned getCFInstrCost(unsigned Opcode) const; - virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy) const; - virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val, - unsigned Index) const; - virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src, - unsigned Alignment, - unsigned AddressSpace) const; - virtual unsigned getIntrinsicInstrCost(Intrinsic::ID, Type *RetTy, - ArrayRef<Type*> Tys) const; - virtual unsigned getNumberOfParts(Type *Tp) const; - virtual unsigned getAddressComputationCost(Type *Ty, bool IsComplex) const; - virtual unsigned getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwise) const; + unsigned getNumberOfRegisters(bool Vector) const override; + unsigned getMaximumUnrollFactor() const override; + unsigned getRegisterBitWidth(bool Vector) const override; + unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind, + OperandValueKind) const override; + unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, + int Index, Type *SubTp) const override; + unsigned getCastInstrCost(unsigned Opcode, Type *Dst, + Type *Src) const override; + unsigned getCFInstrCost(unsigned Opcode) const override; + unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, + Type *CondTy) const override; + unsigned getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index) const override; + unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, + unsigned AddressSpace) const override; + unsigned getIntrinsicInstrCost(Intrinsic::ID, Type *RetTy, + ArrayRef<Type*> Tys) const override; + unsigned getNumberOfParts(Type *Tp) const override; + unsigned getAddressComputationCost( Type *Ty, bool IsComplex) const override; + unsigned getReductionCost(unsigned Opcode, Type *Ty, + bool IsPairwise) const override; /// @} }; @@ -191,7 +197,61 @@ bool BasicTTI::haveFastSqrt(Type *Ty) const { return TLI->isTypeLegal(VT) && TLI->isOperationLegalOrCustom(ISD::FSQRT, VT); } -void BasicTTI::getUnrollingPreferences(Loop *, UnrollingPreferences &) const { } +void BasicTTI::getUnrollingPreferences(Loop *L, + UnrollingPreferences &UP) const { + // This unrolling functionality is target independent, but to provide some + // motivation for its intended use, for x86: + + // According to the Intel 64 and IA-32 Architectures Optimization Reference + // Manual, Intel Core models and later have a loop stream detector + // (and associated uop queue) that can benefit from partial unrolling. + // The relevant requirements are: + // - The loop must have no more than 4 (8 for Nehalem and later) branches + // taken, and none of them may be calls. + // - The loop can have no more than 18 (28 for Nehalem and later) uops. + + // According to the Software Optimization Guide for AMD Family 15h Processors, + // models 30h-4fh (Steamroller and later) have a loop predictor and loop + // buffer which can benefit from partial unrolling. + // The relevant requirements are: + // - The loop must have fewer than 16 branches + // - The loop must have less than 40 uops in all executed loop branches + + // The number of taken branches in a loop is hard to estimate here, and + // benchmarking has revealed that it is better not to be conservative when + // estimating the branch count. As a result, we'll ignore the branch limits + // until someone finds a case where it matters in practice. + + unsigned MaxOps; + const TargetSubtargetInfo *ST = &TM->getSubtarget<TargetSubtargetInfo>(); + if (PartialUnrollingThreshold.getNumOccurrences() > 0) + MaxOps = PartialUnrollingThreshold; + else if (ST->getSchedModel()->LoopMicroOpBufferSize > 0) + MaxOps = ST->getSchedModel()->LoopMicroOpBufferSize; + else + return; + + // Scan the loop: don't unroll loops with calls. + for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); + I != E; ++I) { + BasicBlock *BB = *I; + + for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; ++J) + if (isa<CallInst>(J) || isa<InvokeInst>(J)) { + ImmutableCallSite CS(J); + if (const Function *F = CS.getCalledFunction()) { + if (!TopTTI->isLoweredToCall(F)) + continue; + } + + return; + } + } + + // Enable runtime and partial unrolling up to the specified size. + UP.Partial = UP.Runtime = true; + UP.PartialThreshold = UP.PartialOptSizeThreshold = MaxOps; +} //===----------------------------------------------------------------------===// // @@ -270,8 +330,28 @@ unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, return OpCost; } +unsigned BasicTTI::getAltShuffleOverhead(Type *Ty) const { + assert(Ty->isVectorTy() && "Can only shuffle vectors"); + unsigned Cost = 0; + // Shuffle cost is equal to the cost of extracting element from its argument + // plus the cost of inserting them onto the result vector. + + // e.g. <4 x float> has a mask of <0,5,2,7> i.e we need to extract from index + // 0 of first vector, index 1 of second vector,index 2 of first vector and + // finally index 3 of second vector and insert them at index <0,1,2,3> of + // result vector. + for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) { + Cost += TopTTI->getVectorInstrCost(Instruction::InsertElement, Ty, i); + Cost += TopTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, i); + } + return Cost; +} + unsigned BasicTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, Type *SubTp) const { + if (Kind == SK_Alternate) { + return getAltShuffleOverhead(Tp); + } return 1; } @@ -302,7 +382,8 @@ unsigned BasicTTI::getCastInstrCost(unsigned Opcode, Type *Dst, return 0; // If the cast is marked as legal (or promote) then assume low cost. - if (TLI->isOperationLegalOrPromote(ISD, DstLT.second)) + if (SrcLT.first == DstLT.first && + TLI->isOperationLegalOrPromote(ISD, DstLT.second)) return 1; // Handle scalar conversions. @@ -409,7 +490,9 @@ unsigned BasicTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, unsigned BasicTTI::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const { - return 1; + std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(Val->getScalarType()); + + return LT.first; } unsigned BasicTTI::getMemoryOpCost(unsigned Opcode, Type *Src, @@ -418,8 +501,32 @@ unsigned BasicTTI::getMemoryOpCost(unsigned Opcode, Type *Src, assert(!Src->isVoidTy() && "Invalid type"); std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(Src); - // Assume that all loads of legal types cost 1. - return LT.first; + // Assuming that all loads of legal types cost 1. + unsigned Cost = LT.first; + + if (Src->isVectorTy() && + Src->getPrimitiveSizeInBits() < LT.second.getSizeInBits()) { + // This is a vector load that legalizes to a larger type than the vector + // itself. Unless the corresponding extending load or truncating store is + // legal, then this will scalarize. + TargetLowering::LegalizeAction LA = TargetLowering::Expand; + EVT MemVT = getTLI()->getValueType(Src, true); + if (MemVT.isSimple() && MemVT != MVT::Other) { + if (Opcode == Instruction::Store) + LA = getTLI()->getTruncStoreAction(LT.second, MemVT.getSimpleVT()); + else + LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, MemVT.getSimpleVT()); + } + + if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) { + // This is a vector load/store for some illegal type that is scalarized. + // We must account for the cost of building or decomposing the vector. + Cost += getScalarizationOverhead(Src, Opcode != Instruction::Store, + Opcode == Instruction::Store); + } + } + + return Cost; } unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, @@ -464,7 +571,7 @@ unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, case Intrinsic::round: ISD = ISD::FROUND; break; case Intrinsic::pow: ISD = ISD::FPOW; break; case Intrinsic::fma: ISD = ISD::FMA; break; - case Intrinsic::fmuladd: ISD = ISD::FMA; break; // FIXME: mul + add? + case Intrinsic::fmuladd: ISD = ISD::FMA; break; case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: return 0; @@ -489,6 +596,12 @@ unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, return LT.first * 2; } + // If we can't lower fmuladd into an FMA estimate the cost as a floating + // point mul followed by an add. + if (IID == Intrinsic::fmuladd) + return TopTTI->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) + + TopTTI->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy); + // Else, assume that we need to scalarize this intrinsic. For math builtins // this will emit a costly libcall, adding call overhead and spills. Make it // very expensive. diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.cpp b/contrib/llvm/lib/CodeGen/BranchFolding.cpp index 9cd4208..7503e57 100644 --- a/contrib/llvm/lib/CodeGen/BranchFolding.cpp +++ b/contrib/llvm/lib/CodeGen/BranchFolding.cpp @@ -16,7 +16,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "branchfolding" #include "BranchFolding.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" @@ -38,6 +37,8 @@ #include <algorithm> using namespace llvm; +#define DEBUG_TYPE "branchfolding" + STATISTIC(NumDeadBlocks, "Number of dead blocks removed"); STATISTIC(NumBranchOpts, "Number of branches optimized"); STATISTIC(NumTailMerge , "Number of block tails merged"); @@ -66,9 +67,9 @@ namespace { static char ID; explicit BranchFolderPass(): MachineFunctionPass(ID) {} - virtual bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<TargetPassConfig>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -82,8 +83,15 @@ INITIALIZE_PASS(BranchFolderPass, "branch-folder", "Control Flow Optimizer", false, false) bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) { + if (skipOptnoneFunction(*MF.getFunction())) + return false; + TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>(); - BranchFolder Folder(PassConfig->getEnableTailMerge(), /*CommonHoist=*/true); + // TailMerge can create jump into if branches that make CFG irreducible for + // HW that requires structurized CFG. + bool EnableTailMerge = !MF.getTarget().requiresStructuredCFG() && + PassConfig->getEnableTailMerge(); + BranchFolder Folder(EnableTailMerge, /*CommonHoist=*/true); return Folder.OptimizeFunction(MF, MF.getTarget().getInstrInfo(), MF.getTarget().getRegisterInfo(), @@ -182,7 +190,7 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF, TII = tii; TRI = tri; MMI = mmi; - RS = NULL; + RS = nullptr; // Use a RegScavenger to help update liveness when required. MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -194,7 +202,7 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF, // Fix CFG. The later algorithms expect it to be right. bool MadeChange = false; for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; I++) { - MachineBasicBlock *MBB = I, *TBB = 0, *FBB = 0; + MachineBasicBlock *MBB = I, *TBB = nullptr, *FBB = nullptr; SmallVector<MachineOperand, 4> Cond; if (!TII->AnalyzeBranch(*MBB, TBB, FBB, Cond, true)) MadeChange |= MBB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty()); @@ -213,7 +221,7 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF, // See if any jump tables have become dead as the code generator // did its thing. MachineJumpTableInfo *JTI = MF.getJumpTableInfo(); - if (JTI == 0) { + if (!JTI) { delete RS; return MadeChange; } @@ -379,7 +387,7 @@ void BranchFolder::MaintainLiveIns(MachineBasicBlock *CurMBB, if (RS) { RS->enterBasicBlock(CurMBB); if (!CurMBB->empty()) - RS->forward(prior(CurMBB->end())); + RS->forward(std::prev(CurMBB->end())); BitVector RegsLiveAtExit(TRI->getNumRegs()); RS->getRegsUsed(RegsLiveAtExit, false); for (unsigned int i = 0, e = TRI->getNumRegs(); i != e; i++) @@ -409,7 +417,7 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB, MachineBasicBlock::iterator BBI1, const BasicBlock *BB) { if (!TII->isLegalToSplitMBBAt(CurMBB, BBI1)) - return 0; + return nullptr; MachineFunction &MF = *CurMBB.getParent(); @@ -458,8 +466,8 @@ static unsigned EstimateRuntime(MachineBasicBlock::iterator I, static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB, const TargetInstrInfo *TII) { MachineFunction *MF = CurMBB->getParent(); - MachineFunction::iterator I = llvm::next(MachineFunction::iterator(CurMBB)); - MachineBasicBlock *TBB = 0, *FBB = 0; + MachineFunction::iterator I = std::next(MachineFunction::iterator(CurMBB)); + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector<MachineOperand, 4> Cond; DebugLoc dl; // FIXME: this is nowhere if (I != MF->end() && @@ -468,12 +476,12 @@ static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB, if (TBB == NextBB && !Cond.empty() && !FBB) { if (!TII->ReverseBranchCondition(Cond)) { TII->RemoveBranch(*CurMBB); - TII->InsertBranch(*CurMBB, SuccBB, NULL, Cond, dl); + TII->InsertBranch(*CurMBB, SuccBB, nullptr, Cond, dl); return; } } } - TII->InsertBranch(*CurMBB, SuccBB, NULL, + TII->InsertBranch(*CurMBB, SuccBB, nullptr, SmallVector<MachineOperand, 0>(), dl); } @@ -596,12 +604,11 @@ unsigned BranchFolder::ComputeSameTails(unsigned CurHash, unsigned maxCommonTailLength = 0U; SameTails.clear(); MachineBasicBlock::iterator TrialBBI1, TrialBBI2; - MPIterator HighestMPIter = prior(MergePotentials.end()); - for (MPIterator CurMPIter = prior(MergePotentials.end()), + MPIterator HighestMPIter = std::prev(MergePotentials.end()); + for (MPIterator CurMPIter = std::prev(MergePotentials.end()), B = MergePotentials.begin(); - CurMPIter != B && CurMPIter->getHash() == CurHash; - --CurMPIter) { - for (MPIterator I = prior(CurMPIter); I->getHash() == CurHash ; --I) { + CurMPIter != B && CurMPIter->getHash() == CurHash; --CurMPIter) { + for (MPIterator I = std::prev(CurMPIter); I->getHash() == CurHash; --I) { unsigned CommonTailLen; if (ProfitableToMerge(CurMPIter->getBlock(), I->getBlock(), minCommonTailLength, @@ -630,9 +637,9 @@ void BranchFolder::RemoveBlocksWithHash(unsigned CurHash, MachineBasicBlock *SuccBB, MachineBasicBlock *PredBB) { MPIterator CurMPIter, B; - for (CurMPIter = prior(MergePotentials.end()), B = MergePotentials.begin(); - CurMPIter->getHash() == CurHash; - --CurMPIter) { + for (CurMPIter = std::prev(MergePotentials.end()), + B = MergePotentials.begin(); + CurMPIter->getHash() == CurHash; --CurMPIter) { // Put the unconditional branch back, if we need one. MachineBasicBlock *CurMBB = CurMPIter->getBlock(); if (SuccBB && CurMBB != PredBB) @@ -843,7 +850,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { // See if we can do any tail merging on those. if (MergePotentials.size() >= 2) - MadeChange |= TryTailMergeBlocks(NULL, NULL); + MadeChange |= TryTailMergeBlocks(nullptr, nullptr); // Look at blocks (IBB) with multiple predecessors (PBB). // We change each predecessor to a canonical form, by @@ -864,12 +871,12 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { // a compile-time infinite loop repeatedly doing and undoing the same // transformations.) - for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end(); + for (MachineFunction::iterator I = std::next(MF.begin()), E = MF.end(); I != E; ++I) { if (I->pred_size() < 2) continue; SmallPtrSet<MachineBasicBlock *, 8> UniquePreds; MachineBasicBlock *IBB = I; - MachineBasicBlock *PredBB = prior(I); + MachineBasicBlock *PredBB = std::prev(I); MergePotentials.clear(); for (MachineBasicBlock::pred_iterator P = I->pred_begin(), E2 = I->pred_end(); @@ -890,7 +897,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { if (PBB->getLandingPadSuccessor()) continue; - MachineBasicBlock *TBB = 0, *FBB = 0; + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector<MachineOperand, 4> Cond; if (!TII->AnalyzeBranch(*PBB, TBB, FBB, Cond, true)) { // Failing case: IBB is the target of a cbr, and we cannot reverse the @@ -901,7 +908,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { continue; // This is the QBB case described above if (!FBB) - FBB = llvm::next(MachineFunction::iterator(PBB)); + FBB = std::next(MachineFunction::iterator(PBB)); } // Failing case: the only way IBB can be reached from PBB is via @@ -909,10 +916,10 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { // a bit in the edge so we didn't have to do all this. if (IBB->isLandingPad()) { MachineFunction::iterator IP = PBB; IP++; - MachineBasicBlock *PredNextBB = NULL; + MachineBasicBlock *PredNextBB = nullptr; if (IP != MF.end()) PredNextBB = IP; - if (TBB == NULL) { + if (!TBB) { if (IBB != PredNextBB) // fallthrough continue; } else if (FBB) { @@ -933,7 +940,8 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { TII->RemoveBranch(*PBB); if (!Cond.empty()) // reinsert conditional branch only, for now - TII->InsertBranch(*PBB, (TBB == IBB) ? FBB : TBB, 0, NewCond, dl); + TII->InsertBranch(*PBB, (TBB == IBB) ? FBB : TBB, nullptr, + NewCond, dl); } MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB), *P)); @@ -951,7 +959,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { // Reinsert an unconditional branch if needed. The 1 below can occur as a // result of removing blocks in TryTailMergeBlocks. - PredBB = prior(I); // this may have been changed in TryTailMergeBlocks + PredBB = std::prev(I); // this may have been changed in TryTailMergeBlocks if (MergePotentials.size() == 1 && MergePotentials.begin()->getBlock() != PredBB) FixTail(MergePotentials.begin()->getBlock(), IBB, TII); @@ -970,7 +978,7 @@ bool BranchFolder::OptimizeBranches(MachineFunction &MF) { // Make sure blocks are numbered in order MF.RenumberBlocks(); - for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end(); + for (MachineFunction::iterator I = std::next(MF.begin()), E = MF.end(); I != E; ) { MachineBasicBlock *MBB = I++; MadeChange |= OptimizeBlock(MBB); @@ -1091,9 +1099,9 @@ ReoptimizeBlock: // Check to see if we can simplify the terminator of the block before this // one. - MachineBasicBlock &PrevBB = *prior(MachineFunction::iterator(MBB)); + MachineBasicBlock &PrevBB = *std::prev(MachineFunction::iterator(MBB)); - MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0; + MachineBasicBlock *PriorTBB = nullptr, *PriorFBB = nullptr; SmallVector<MachineOperand, 4> PriorCond; bool PriorUnAnalyzable = TII->AnalyzeBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, true); @@ -1110,7 +1118,7 @@ ReoptimizeBlock: TII->RemoveBranch(PrevBB); PriorCond.clear(); if (PriorTBB != MBB) - TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond, dl); + TII->InsertBranch(PrevBB, PriorTBB, nullptr, PriorCond, dl); MadeChange = true; ++NumBranchOpts; goto ReoptimizeBlock; @@ -1154,7 +1162,7 @@ ReoptimizeBlock: // If the previous branch *only* branches to *this* block (conditional or // not) remove the branch. - if (PriorTBB == MBB && PriorFBB == 0) { + if (PriorTBB == MBB && !PriorFBB) { TII->RemoveBranch(PrevBB); MadeChange = true; ++NumBranchOpts; @@ -1166,7 +1174,7 @@ ReoptimizeBlock: if (PriorFBB == MBB) { DebugLoc dl = getBranchDebugLoc(PrevBB); TII->RemoveBranch(PrevBB); - TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond, dl); + TII->InsertBranch(PrevBB, PriorTBB, nullptr, PriorCond, dl); MadeChange = true; ++NumBranchOpts; goto ReoptimizeBlock; @@ -1180,7 +1188,7 @@ ReoptimizeBlock: if (!TII->ReverseBranchCondition(NewPriorCond)) { DebugLoc dl = getBranchDebugLoc(PrevBB); TII->RemoveBranch(PrevBB); - TII->InsertBranch(PrevBB, PriorFBB, 0, NewPriorCond, dl); + TII->InsertBranch(PrevBB, PriorFBB, nullptr, NewPriorCond, dl); MadeChange = true; ++NumBranchOpts; goto ReoptimizeBlock; @@ -1195,7 +1203,7 @@ ReoptimizeBlock: // We consider it more likely that execution will stay in the function (e.g. // due to loops) than it is to exit it. This asserts in loops etc, moving // the assert condition out of the loop body. - if (MBB->succ_empty() && !PriorCond.empty() && PriorFBB == 0 && + if (MBB->succ_empty() && !PriorCond.empty() && !PriorFBB && MachineFunction::iterator(PriorTBB) == FallThrough && !MBB->canFallThrough()) { bool DoTransform = true; @@ -1218,7 +1226,7 @@ ReoptimizeBlock: DebugLoc dl = getBranchDebugLoc(PrevBB); TII->RemoveBranch(PrevBB); - TII->InsertBranch(PrevBB, MBB, 0, NewPriorCond, dl); + TII->InsertBranch(PrevBB, MBB, nullptr, NewPriorCond, dl); // Move this block to the end of the function. MBB->moveAfter(--MF.end()); @@ -1231,7 +1239,7 @@ ReoptimizeBlock: } // Analyze the branch in the current block. - MachineBasicBlock *CurTBB = 0, *CurFBB = 0; + MachineBasicBlock *CurTBB = nullptr, *CurFBB = nullptr; SmallVector<MachineOperand, 4> CurCond; bool CurUnAnalyzable= TII->AnalyzeBranch(*MBB, CurTBB, CurFBB, CurCond, true); if (!CurUnAnalyzable) { @@ -1257,7 +1265,7 @@ ReoptimizeBlock: // If this branch is the only thing in its block, see if we can forward // other blocks across it. - if (CurTBB && CurCond.empty() && CurFBB == 0 && + if (CurTBB && CurCond.empty() && !CurFBB && IsBranchOnlyBlock(MBB) && CurTBB != MBB && !MBB->hasAddressTaken()) { DebugLoc dl = getBranchDebugLoc(*MBB); @@ -1295,12 +1303,12 @@ ReoptimizeBlock: // explicit branch to us to make updates simpler. if (!PredHasNoFallThrough && PrevBB.isSuccessor(MBB) && PriorTBB != MBB && PriorFBB != MBB) { - if (PriorTBB == 0) { - assert(PriorCond.empty() && PriorFBB == 0 && + if (!PriorTBB) { + assert(PriorCond.empty() && !PriorFBB && "Bad branch analysis"); PriorTBB = MBB; } else { - assert(PriorFBB == 0 && "Machine CFG out of date!"); + assert(!PriorFBB && "Machine CFG out of date!"); PriorFBB = MBB; } DebugLoc pdl = getBranchDebugLoc(PrevBB); @@ -1324,7 +1332,7 @@ ReoptimizeBlock: // If this change resulted in PMBB ending in a conditional // branch where both conditions go to the same destination, // change this to an unconditional branch (and fix the CFG). - MachineBasicBlock *NewCurTBB = 0, *NewCurFBB = 0; + MachineBasicBlock *NewCurTBB = nullptr, *NewCurFBB = nullptr; SmallVector<MachineOperand, 4> NewCurCond; bool NewCurUnAnalyzable = TII->AnalyzeBranch(*PMBB, NewCurTBB, NewCurFBB, NewCurCond, true); @@ -1332,10 +1340,10 @@ ReoptimizeBlock: DebugLoc pdl = getBranchDebugLoc(*PMBB); TII->RemoveBranch(*PMBB); NewCurCond.clear(); - TII->InsertBranch(*PMBB, NewCurTBB, 0, NewCurCond, pdl); + TII->InsertBranch(*PMBB, NewCurTBB, nullptr, NewCurCond, pdl); MadeChange = true; ++NumBranchOpts; - PMBB->CorrectExtraCFGEdges(NewCurTBB, 0, false); + PMBB->CorrectExtraCFGEdges(NewCurTBB, nullptr, false); } } } @@ -1352,7 +1360,7 @@ ReoptimizeBlock: } // Add the branch back if the block is more than just an uncond branch. - TII->InsertBranch(*MBB, CurTBB, 0, CurCond, dl); + TII->InsertBranch(*MBB, CurTBB, nullptr, CurCond, dl); } } @@ -1373,7 +1381,7 @@ ReoptimizeBlock: // Analyze the branch at the end of the pred. MachineBasicBlock *PredBB = *PI; MachineFunction::iterator PredFallthrough = PredBB; ++PredFallthrough; - MachineBasicBlock *PredTBB = 0, *PredFBB = 0; + MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; SmallVector<MachineOperand, 4> PredCond; if (PredBB != MBB && !PredBB->canFallThrough() && !TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true) @@ -1390,9 +1398,10 @@ ReoptimizeBlock: // B elsewhere // next: if (CurFallsThru) { - MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB)); + MachineBasicBlock *NextBB = + std::next(MachineFunction::iterator(MBB)); CurCond.clear(); - TII->InsertBranch(*MBB, NextBB, 0, CurCond, DebugLoc()); + TII->InsertBranch(*MBB, NextBB, nullptr, CurCond, DebugLoc()); } MBB->moveAfter(PredBB); MadeChange = true; @@ -1425,7 +1434,7 @@ ReoptimizeBlock: // Okay, there is no really great place to put this block. If, however, // the block before this one would be a fall-through if this block were // removed, move this block to the end of the function. - MachineBasicBlock *PrevTBB = 0, *PrevFBB = 0; + MachineBasicBlock *PrevTBB = nullptr, *PrevFBB = nullptr; SmallVector<MachineOperand, 4> PrevCond; if (FallThrough != MF.end() && !TII->AnalyzeBranch(PrevBB, PrevTBB, PrevFBB, PrevCond, true) && @@ -1466,7 +1475,7 @@ static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB, if (SuccBB != TrueBB) return SuccBB; } - return NULL; + return nullptr; } /// findHoistingInsertPosAndDeps - Find the location to move common instructions @@ -1496,10 +1505,17 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, if (MO.isUse()) { for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) Uses.insert(*AI); - } else if (!MO.isDead()) - // Don't try to hoist code in the rare case the terminator defines a - // register that is later used. - return MBB->end(); + } else { + if (!MO.isDead()) + // Don't try to hoist code in the rare case the terminator defines a + // register that is later used. + return MBB->end(); + + // If the terminator defines a register, make sure we don't hoist + // the instruction whose def might be clobbered by the terminator. + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + Defs.insert(*AI); + } } if (Uses.empty()) @@ -1511,7 +1527,7 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, // branch from condition setting instruction. MachineBasicBlock::iterator PI = Loc; --PI; - while (PI != MBB->begin() && Loc->isDebugValue()) + while (PI != MBB->begin() && PI->isDebugValue()) --PI; bool IsDef = false; @@ -1540,7 +1556,7 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, // Also avoid moving code above predicated instruction since it's hard to // reason about register liveness with predicated instruction. bool DontMoveAcrossStore = true; - if (!PI->isSafeToMove(TII, 0, DontMoveAcrossStore) || + if (!PI->isSafeToMove(TII, nullptr, DontMoveAcrossStore) || TII->isPredicated(PI)) return MBB->end(); @@ -1574,7 +1590,7 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, /// sequence at the start of the function, move the instructions before MBB /// terminator if it's legal. bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { - MachineBasicBlock *TBB = 0, *FBB = 0; + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector<MachineOperand, 4> Cond; if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond, true) || !TBB || Cond.empty()) return false; @@ -1679,7 +1695,7 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { break; bool DontMoveAcrossStore = true; - if (!TIB->isSafeToMove(TII, 0, DontMoveAcrossStore)) + if (!TIB->isSafeToMove(TII, nullptr, DontMoveAcrossStore)) break; // Remove kills from LocalDefsSet, these registers had short live ranges. diff --git a/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp b/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp index 4925c4d..bc033f9 100644 --- a/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp +++ b/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp @@ -7,8 +7,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "calcspillweights" - #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" @@ -22,6 +20,8 @@ #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; +#define DEBUG_TYPE "calcspillweights" + void llvm::calculateSpillWeightsAndHints(LiveIntervals &LIS, MachineFunction &MF, const MachineLoopInfo &MLI, @@ -96,8 +96,8 @@ void VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) { MachineRegisterInfo &mri = MF.getRegInfo(); const TargetRegisterInfo &tri = *MF.getTarget().getRegisterInfo(); - MachineBasicBlock *mbb = 0; - MachineLoop *loop = 0; + MachineBasicBlock *mbb = nullptr; + MachineLoop *loop = nullptr; bool isExiting = false; float totalWeight = 0; SmallPtrSet<MachineInstr*, 8> visited; @@ -112,8 +112,10 @@ VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) { // Don't recompute spill weight for an unspillable register. bool Spillable = li.isSpillable(); - for (MachineRegisterInfo::reg_iterator I = mri.reg_begin(li.reg); - MachineInstr *mi = I.skipInstruction();) { + for (MachineRegisterInfo::reg_instr_iterator + I = mri.reg_instr_begin(li.reg), E = mri.reg_instr_end(); + I != E; ) { + MachineInstr *mi = &*(I++); if (mi->isIdentityCopy() || mi->isImplicitDef() || mi->isDebugValue()) continue; if (!visited.insert(mi)) @@ -130,9 +132,9 @@ VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) { // Calculate instr weight. bool reads, writes; - tie(reads, writes) = mi->readsWritesVirtualRegister(li.reg); + std::tie(reads, writes) = mi->readsWritesVirtualRegister(li.reg); weight = LiveIntervals::getSpillWeight( - writes, reads, MBFI.getBlockFreq(mi->getParent())); + writes, reads, &MBFI, mi); // Give extra weight to what looks like a loop induction variable update. if (writes && isExiting && LIS.isLiveOutOfMBB(li, mbb)) @@ -147,7 +149,11 @@ VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) { unsigned hint = copyHint(mi, li.reg, tri, mri); if (!hint) continue; - float hweight = Hint[hint] += weight; + // Force hweight onto the stack so that x86 doesn't add hidden precision, + // making the comparison incorrectly pass (i.e., 1 > 1 == true??). + // + // FIXME: we probably shouldn't use floats at all. + volatile float hweight = Hint[hint] += weight; if (TargetRegisterInfo::isPhysicalRegister(hint)) { if (hweight > bestPhys && mri.isAllocatable(hint)) bestPhys = hweight, hintPhys = hint; diff --git a/contrib/llvm/lib/CodeGen/CallingConvLower.cpp b/contrib/llvm/lib/CodeGen/CallingConvLower.cpp index fcfc9dc..add861a 100644 --- a/contrib/llvm/lib/CodeGen/CallingConvLower.cpp +++ b/contrib/llvm/lib/CodeGen/CallingConvLower.cpp @@ -76,7 +76,7 @@ CCState::AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins, dbgs() << "Formal argument #" << i << " has unhandled type " << EVT(ArgVT).getEVTString() << '\n'; #endif - llvm_unreachable(0); + llvm_unreachable(nullptr); } } } @@ -108,7 +108,7 @@ void CCState::AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs, dbgs() << "Return operand #" << i << " has unhandled type " << EVT(VT).getEVTString() << '\n'; #endif - llvm_unreachable(0); + llvm_unreachable(nullptr); } } } @@ -126,7 +126,7 @@ void CCState::AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs, dbgs() << "Call operand #" << i << " has unhandled type " << EVT(ArgVT).getEVTString() << '\n'; #endif - llvm_unreachable(0); + llvm_unreachable(nullptr); } } } @@ -145,7 +145,7 @@ void CCState::AnalyzeCallOperands(SmallVectorImpl<MVT> &ArgVTs, dbgs() << "Call operand #" << i << " has unhandled type " << EVT(ArgVT).getEVTString() << '\n'; #endif - llvm_unreachable(0); + llvm_unreachable(nullptr); } } } @@ -162,7 +162,7 @@ void CCState::AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins, dbgs() << "Call result #" << i << " has unhandled type " << EVT(VT).getEVTString() << '\n'; #endif - llvm_unreachable(0); + llvm_unreachable(nullptr); } } } @@ -175,6 +175,6 @@ void CCState::AnalyzeCallResult(MVT VT, CCAssignFn Fn) { dbgs() << "Call result has unhandled type " << EVT(VT).getEVTString() << '\n'; #endif - llvm_unreachable(0); + llvm_unreachable(nullptr); } } diff --git a/contrib/llvm/lib/CodeGen/CodeGen.cpp b/contrib/llvm/lib/CodeGen/CodeGen.cpp index 7430c53..b3beac3 100644 --- a/contrib/llvm/lib/CodeGen/CodeGen.cpp +++ b/contrib/llvm/lib/CodeGen/CodeGen.cpp @@ -13,15 +13,17 @@ //===----------------------------------------------------------------------===// #include "llvm/InitializePasses.h" -#include "llvm/PassRegistry.h" #include "llvm-c/Initialization.h" +#include "llvm/PassRegistry.h" using namespace llvm; /// initializeCodeGen - Initialize all passes linked into the CodeGen library. void llvm::initializeCodeGen(PassRegistry &Registry) { + initializeAtomicExpandLoadLinkedPass(Registry); initializeBasicTTIPass(Registry); initializeBranchFolderPassPass(Registry); + initializeCodeGenPreparePass(Registry); initializeDeadMachineInstructionElimPass(Registry); initializeEarlyIfConverterPass(Registry); initializeExpandPostRAPass(Registry); @@ -51,6 +53,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeOptimizePHIsPass(Registry); initializePHIEliminationPass(Registry); initializePeepholeOptimizerPass(Registry); + initializePostMachineSchedulerPass(Registry); initializePostRASchedulerPass(Registry); initializeProcessImplicitDefsPass(Registry); initializePEIPass(Registry); @@ -69,6 +72,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeVirtRegRewriterPass(Registry); initializeLowerIntrinsicsPass(Registry); initializeMachineFunctionPrinterPassPass(Registry); + initializeStackMapLivenessPass(Registry); } void LLVMInitializeCodeGen(LLVMPassRegistryRef R) { diff --git a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp new file mode 100644 index 0000000..d5039b2 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -0,0 +1,3326 @@ +//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass munges the code in the input function to better prepare it for +// SelectionDAG-based code generation. This works around limitations in it's +// basic-block-at-a-time approach. It should eventually be removed. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GetElementPtrTypeIterator.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/IR/ValueMap.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/BuildLibCalls.h" +#include "llvm/Transforms/Utils/BypassSlowDivision.h" +#include "llvm/Transforms/Utils/Local.h" +using namespace llvm; +using namespace llvm::PatternMatch; + +#define DEBUG_TYPE "codegenprepare" + +STATISTIC(NumBlocksElim, "Number of blocks eliminated"); +STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated"); +STATISTIC(NumGEPsElim, "Number of GEPs converted to casts"); +STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of " + "sunken Cmps"); +STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses " + "of sunken Casts"); +STATISTIC(NumMemoryInsts, "Number of memory instructions whose address " + "computations were sunk"); +STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads"); +STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized"); +STATISTIC(NumRetsDup, "Number of return instructions duplicated"); +STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved"); +STATISTIC(NumSelectsExpanded, "Number of selects turned into branches"); +STATISTIC(NumAndCmpsMoved, "Number of and/cmp's pushed into branches"); + +static cl::opt<bool> DisableBranchOpts( + "disable-cgp-branch-opts", cl::Hidden, cl::init(false), + cl::desc("Disable branch optimizations in CodeGenPrepare")); + +static cl::opt<bool> DisableSelectToBranch( + "disable-cgp-select2branch", cl::Hidden, cl::init(false), + cl::desc("Disable select to branch conversion.")); + +static cl::opt<bool> AddrSinkUsingGEPs( + "addr-sink-using-gep", cl::Hidden, cl::init(false), + cl::desc("Address sinking in CGP using GEPs.")); + +static cl::opt<bool> EnableAndCmpSinking( + "enable-andcmp-sinking", cl::Hidden, cl::init(true), + cl::desc("Enable sinkinig and/cmp into branches.")); + +namespace { +typedef SmallPtrSet<Instruction *, 16> SetOfInstrs; +typedef DenseMap<Instruction *, Type *> InstrToOrigTy; + + class CodeGenPrepare : public FunctionPass { + /// TLI - Keep a pointer of a TargetLowering to consult for determining + /// transformation profitability. + const TargetMachine *TM; + const TargetLowering *TLI; + const TargetLibraryInfo *TLInfo; + DominatorTree *DT; + + /// CurInstIterator - As we scan instructions optimizing them, this is the + /// next instruction to optimize. Xforms that can invalidate this should + /// update it. + BasicBlock::iterator CurInstIterator; + + /// Keeps track of non-local addresses that have been sunk into a block. + /// This allows us to avoid inserting duplicate code for blocks with + /// multiple load/stores of the same address. + ValueMap<Value*, Value*> SunkAddrs; + + /// Keeps track of all truncates inserted for the current function. + SetOfInstrs InsertedTruncsSet; + /// Keeps track of the type of the related instruction before their + /// promotion for the current function. + InstrToOrigTy PromotedInsts; + + /// ModifiedDT - If CFG is modified in anyway, dominator tree may need to + /// be updated. + bool ModifiedDT; + + /// OptSize - True if optimizing for size. + bool OptSize; + + public: + static char ID; // Pass identification, replacement for typeid + explicit CodeGenPrepare(const TargetMachine *TM = nullptr) + : FunctionPass(ID), TM(TM), TLI(nullptr) { + initializeCodeGenPreparePass(*PassRegistry::getPassRegistry()); + } + bool runOnFunction(Function &F) override; + + const char *getPassName() const override { return "CodeGen Prepare"; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addPreserved<DominatorTreeWrapperPass>(); + AU.addRequired<TargetLibraryInfo>(); + } + + private: + bool EliminateFallThrough(Function &F); + bool EliminateMostlyEmptyBlocks(Function &F); + bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const; + void EliminateMostlyEmptyBlock(BasicBlock *BB); + bool OptimizeBlock(BasicBlock &BB); + bool OptimizeInst(Instruction *I); + bool OptimizeMemoryInst(Instruction *I, Value *Addr, Type *AccessTy); + bool OptimizeInlineAsmInst(CallInst *CS); + bool OptimizeCallInst(CallInst *CI); + bool MoveExtToFormExtLoad(Instruction *I); + bool OptimizeExtUses(Instruction *I); + bool OptimizeSelectInst(SelectInst *SI); + bool OptimizeShuffleVectorInst(ShuffleVectorInst *SI); + bool DupRetToEnableTailCallOpts(BasicBlock *BB); + bool PlaceDbgValues(Function &F); + bool sinkAndCmp(Function &F); + }; +} + +char CodeGenPrepare::ID = 0; +INITIALIZE_TM_PASS(CodeGenPrepare, "codegenprepare", + "Optimize for code generation", false, false) + +FunctionPass *llvm::createCodeGenPreparePass(const TargetMachine *TM) { + return new CodeGenPrepare(TM); +} + +bool CodeGenPrepare::runOnFunction(Function &F) { + if (skipOptnoneFunction(F)) + return false; + + bool EverMadeChange = false; + // Clear per function information. + InsertedTruncsSet.clear(); + PromotedInsts.clear(); + + ModifiedDT = false; + if (TM) TLI = TM->getTargetLowering(); + TLInfo = &getAnalysis<TargetLibraryInfo>(); + DominatorTreeWrapperPass *DTWP = + getAnalysisIfAvailable<DominatorTreeWrapperPass>(); + DT = DTWP ? &DTWP->getDomTree() : nullptr; + OptSize = F.getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::OptimizeForSize); + + /// This optimization identifies DIV instructions that can be + /// profitably bypassed and carried out with a shorter, faster divide. + if (!OptSize && TLI && TLI->isSlowDivBypassed()) { + const DenseMap<unsigned int, unsigned int> &BypassWidths = + TLI->getBypassSlowDivWidths(); + for (Function::iterator I = F.begin(); I != F.end(); I++) + EverMadeChange |= bypassSlowDivision(F, I, BypassWidths); + } + + // Eliminate blocks that contain only PHI nodes and an + // unconditional branch. + EverMadeChange |= EliminateMostlyEmptyBlocks(F); + + // llvm.dbg.value is far away from the value then iSel may not be able + // handle it properly. iSel will drop llvm.dbg.value if it can not + // find a node corresponding to the value. + EverMadeChange |= PlaceDbgValues(F); + + // If there is a mask, compare against zero, and branch that can be combined + // into a single target instruction, push the mask and compare into branch + // users. Do this before OptimizeBlock -> OptimizeInst -> + // OptimizeCmpExpression, which perturbs the pattern being searched for. + if (!DisableBranchOpts) + EverMadeChange |= sinkAndCmp(F); + + bool MadeChange = true; + while (MadeChange) { + MadeChange = false; + for (Function::iterator I = F.begin(); I != F.end(); ) { + BasicBlock *BB = I++; + MadeChange |= OptimizeBlock(*BB); + } + EverMadeChange |= MadeChange; + } + + SunkAddrs.clear(); + + if (!DisableBranchOpts) { + MadeChange = false; + SmallPtrSet<BasicBlock*, 8> WorkList; + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { + SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB)); + MadeChange |= ConstantFoldTerminator(BB, true); + if (!MadeChange) continue; + + for (SmallVectorImpl<BasicBlock*>::iterator + II = Successors.begin(), IE = Successors.end(); II != IE; ++II) + if (pred_begin(*II) == pred_end(*II)) + WorkList.insert(*II); + } + + // Delete the dead blocks and any of their dead successors. + MadeChange |= !WorkList.empty(); + while (!WorkList.empty()) { + BasicBlock *BB = *WorkList.begin(); + WorkList.erase(BB); + SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB)); + + DeleteDeadBlock(BB); + + for (SmallVectorImpl<BasicBlock*>::iterator + II = Successors.begin(), IE = Successors.end(); II != IE; ++II) + if (pred_begin(*II) == pred_end(*II)) + WorkList.insert(*II); + } + + // Merge pairs of basic blocks with unconditional branches, connected by + // a single edge. + if (EverMadeChange || MadeChange) + MadeChange |= EliminateFallThrough(F); + + if (MadeChange) + ModifiedDT = true; + EverMadeChange |= MadeChange; + } + + if (ModifiedDT && DT) + DT->recalculate(F); + + return EverMadeChange; +} + +/// EliminateFallThrough - Merge basic blocks which are connected +/// by a single edge, where one of the basic blocks has a single successor +/// pointing to the other basic block, which has a single predecessor. +bool CodeGenPrepare::EliminateFallThrough(Function &F) { + bool Changed = false; + // Scan all of the blocks in the function, except for the entry block. + for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) { + BasicBlock *BB = I++; + // If the destination block has a single pred, then this is a trivial + // edge, just collapse it. + BasicBlock *SinglePred = BB->getSinglePredecessor(); + + // Don't merge if BB's address is taken. + if (!SinglePred || SinglePred == BB || BB->hasAddressTaken()) continue; + + BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator()); + if (Term && !Term->isConditional()) { + Changed = true; + DEBUG(dbgs() << "To merge:\n"<< *SinglePred << "\n\n\n"); + // Remember if SinglePred was the entry block of the function. + // If so, we will need to move BB back to the entry position. + bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); + MergeBasicBlockIntoOnlyPred(BB, this); + + if (isEntry && BB != &BB->getParent()->getEntryBlock()) + BB->moveBefore(&BB->getParent()->getEntryBlock()); + + // We have erased a block. Update the iterator. + I = BB; + } + } + return Changed; +} + +/// EliminateMostlyEmptyBlocks - eliminate blocks that contain only PHI nodes, +/// debug info directives, and an unconditional branch. Passes before isel +/// (e.g. LSR/loopsimplify) often split edges in ways that are non-optimal for +/// isel. Start by eliminating these blocks so we can split them the way we +/// want them. +bool CodeGenPrepare::EliminateMostlyEmptyBlocks(Function &F) { + bool MadeChange = false; + // Note that this intentionally skips the entry block. + for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) { + BasicBlock *BB = I++; + + // If this block doesn't end with an uncond branch, ignore it. + BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()); + if (!BI || !BI->isUnconditional()) + continue; + + // If the instruction before the branch (skipping debug info) isn't a phi + // node, then other stuff is happening here. + BasicBlock::iterator BBI = BI; + if (BBI != BB->begin()) { + --BBI; + while (isa<DbgInfoIntrinsic>(BBI)) { + if (BBI == BB->begin()) + break; + --BBI; + } + if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI)) + continue; + } + + // Do not break infinite loops. + BasicBlock *DestBB = BI->getSuccessor(0); + if (DestBB == BB) + continue; + + if (!CanMergeBlocks(BB, DestBB)) + continue; + + EliminateMostlyEmptyBlock(BB); + MadeChange = true; + } + return MadeChange; +} + +/// CanMergeBlocks - Return true if we can merge BB into DestBB if there is a +/// single uncond branch between them, and BB contains no other non-phi +/// instructions. +bool CodeGenPrepare::CanMergeBlocks(const BasicBlock *BB, + const BasicBlock *DestBB) const { + // We only want to eliminate blocks whose phi nodes are used by phi nodes in + // the successor. If there are more complex condition (e.g. preheaders), + // don't mess around with them. + BasicBlock::const_iterator BBI = BB->begin(); + while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) { + for (const User *U : PN->users()) { + const Instruction *UI = cast<Instruction>(U); + if (UI->getParent() != DestBB || !isa<PHINode>(UI)) + return false; + // If User is inside DestBB block and it is a PHINode then check + // incoming value. If incoming value is not from BB then this is + // a complex condition (e.g. preheaders) we want to avoid here. + if (UI->getParent() == DestBB) { + if (const PHINode *UPN = dyn_cast<PHINode>(UI)) + for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) { + Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I)); + if (Insn && Insn->getParent() == BB && + Insn->getParent() != UPN->getIncomingBlock(I)) + return false; + } + } + } + } + + // If BB and DestBB contain any common predecessors, then the phi nodes in BB + // and DestBB may have conflicting incoming values for the block. If so, we + // can't merge the block. + const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin()); + if (!DestBBPN) return true; // no conflict. + + // Collect the preds of BB. + SmallPtrSet<const BasicBlock*, 16> BBPreds; + if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) { + // It is faster to get preds from a PHI than with pred_iterator. + for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i) + BBPreds.insert(BBPN->getIncomingBlock(i)); + } else { + BBPreds.insert(pred_begin(BB), pred_end(BB)); + } + + // Walk the preds of DestBB. + for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) { + BasicBlock *Pred = DestBBPN->getIncomingBlock(i); + if (BBPreds.count(Pred)) { // Common predecessor? + BBI = DestBB->begin(); + while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) { + const Value *V1 = PN->getIncomingValueForBlock(Pred); + const Value *V2 = PN->getIncomingValueForBlock(BB); + + // If V2 is a phi node in BB, look up what the mapped value will be. + if (const PHINode *V2PN = dyn_cast<PHINode>(V2)) + if (V2PN->getParent() == BB) + V2 = V2PN->getIncomingValueForBlock(Pred); + + // If there is a conflict, bail out. + if (V1 != V2) return false; + } + } + } + + return true; +} + + +/// EliminateMostlyEmptyBlock - Eliminate a basic block that have only phi's and +/// an unconditional branch in it. +void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { + BranchInst *BI = cast<BranchInst>(BB->getTerminator()); + BasicBlock *DestBB = BI->getSuccessor(0); + + DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB); + + // If the destination block has a single pred, then this is a trivial edge, + // just collapse it. + if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) { + if (SinglePred != DestBB) { + // Remember if SinglePred was the entry block of the function. If so, we + // will need to move BB back to the entry position. + bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); + MergeBasicBlockIntoOnlyPred(DestBB, this); + + if (isEntry && BB != &BB->getParent()->getEntryBlock()) + BB->moveBefore(&BB->getParent()->getEntryBlock()); + + DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); + return; + } + } + + // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB + // to handle the new incoming edges it is about to have. + PHINode *PN; + for (BasicBlock::iterator BBI = DestBB->begin(); + (PN = dyn_cast<PHINode>(BBI)); ++BBI) { + // Remove the incoming value for BB, and remember it. + Value *InVal = PN->removeIncomingValue(BB, false); + + // Two options: either the InVal is a phi node defined in BB or it is some + // value that dominates BB. + PHINode *InValPhi = dyn_cast<PHINode>(InVal); + if (InValPhi && InValPhi->getParent() == BB) { + // Add all of the input values of the input PHI as inputs of this phi. + for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i) + PN->addIncoming(InValPhi->getIncomingValue(i), + InValPhi->getIncomingBlock(i)); + } else { + // Otherwise, add one instance of the dominating value for each edge that + // we will be adding. + if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) { + for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i) + PN->addIncoming(InVal, BBPN->getIncomingBlock(i)); + } else { + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) + PN->addIncoming(InVal, *PI); + } + } + } + + // The PHIs are now updated, change everything that refers to BB to use + // DestBB and remove BB. + BB->replaceAllUsesWith(DestBB); + if (DT && !ModifiedDT) { + BasicBlock *BBIDom = DT->getNode(BB)->getIDom()->getBlock(); + BasicBlock *DestBBIDom = DT->getNode(DestBB)->getIDom()->getBlock(); + BasicBlock *NewIDom = DT->findNearestCommonDominator(BBIDom, DestBBIDom); + DT->changeImmediateDominator(DestBB, NewIDom); + DT->eraseNode(BB); + } + BB->eraseFromParent(); + ++NumBlocksElim; + + DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); +} + +/// SinkCast - Sink the specified cast instruction into its user blocks +static bool SinkCast(CastInst *CI) { + BasicBlock *DefBB = CI->getParent(); + + /// InsertedCasts - Only insert a cast in each block once. + DenseMap<BasicBlock*, CastInst*> InsertedCasts; + + bool MadeChange = false; + for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end(); + UI != E; ) { + Use &TheUse = UI.getUse(); + Instruction *User = cast<Instruction>(*UI); + + // Figure out which BB this cast is used in. For PHI's this is the + // appropriate predecessor block. + BasicBlock *UserBB = User->getParent(); + if (PHINode *PN = dyn_cast<PHINode>(User)) { + UserBB = PN->getIncomingBlock(TheUse); + } + + // Preincrement use iterator so we don't invalidate it. + ++UI; + + // If this user is in the same block as the cast, don't change the cast. + if (UserBB == DefBB) continue; + + // If we have already inserted a cast into this block, use it. + CastInst *&InsertedCast = InsertedCasts[UserBB]; + + if (!InsertedCast) { + BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); + InsertedCast = + CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "", + InsertPt); + MadeChange = true; + } + + // Replace a use of the cast with a use of the new cast. + TheUse = InsertedCast; + ++NumCastUses; + } + + // If we removed all uses, nuke the cast. + if (CI->use_empty()) { + CI->eraseFromParent(); + MadeChange = true; + } + + return MadeChange; +} + +/// OptimizeNoopCopyExpression - If the specified cast instruction is a noop +/// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC), +/// sink it into user blocks to reduce the number of virtual +/// registers that must be created and coalesced. +/// +/// Return true if any changes are made. +/// +static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){ + // If this is a noop copy, + EVT SrcVT = TLI.getValueType(CI->getOperand(0)->getType()); + EVT DstVT = TLI.getValueType(CI->getType()); + + // This is an fp<->int conversion? + if (SrcVT.isInteger() != DstVT.isInteger()) + return false; + + // If this is an extension, it will be a zero or sign extension, which + // isn't a noop. + if (SrcVT.bitsLT(DstVT)) return false; + + // If these values will be promoted, find out what they will be promoted + // to. This helps us consider truncates on PPC as noop copies when they + // are. + if (TLI.getTypeAction(CI->getContext(), SrcVT) == + TargetLowering::TypePromoteInteger) + SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT); + if (TLI.getTypeAction(CI->getContext(), DstVT) == + TargetLowering::TypePromoteInteger) + DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT); + + // If, after promotion, these are the same types, this is a noop copy. + if (SrcVT != DstVT) + return false; + + return SinkCast(CI); +} + +/// OptimizeCmpExpression - sink the given CmpInst into user blocks to reduce +/// the number of virtual registers that must be created and coalesced. This is +/// a clear win except on targets with multiple condition code registers +/// (PowerPC), where it might lose; some adjustment may be wanted there. +/// +/// Return true if any changes are made. +static bool OptimizeCmpExpression(CmpInst *CI) { + BasicBlock *DefBB = CI->getParent(); + + /// InsertedCmp - Only insert a cmp in each block once. + DenseMap<BasicBlock*, CmpInst*> InsertedCmps; + + bool MadeChange = false; + for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end(); + UI != E; ) { + Use &TheUse = UI.getUse(); + Instruction *User = cast<Instruction>(*UI); + + // Preincrement use iterator so we don't invalidate it. + ++UI; + + // Don't bother for PHI nodes. + if (isa<PHINode>(User)) + continue; + + // Figure out which BB this cmp is used in. + BasicBlock *UserBB = User->getParent(); + + // If this user is in the same block as the cmp, don't change the cmp. + if (UserBB == DefBB) continue; + + // If we have already inserted a cmp into this block, use it. + CmpInst *&InsertedCmp = InsertedCmps[UserBB]; + + if (!InsertedCmp) { + BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); + InsertedCmp = + CmpInst::Create(CI->getOpcode(), + CI->getPredicate(), CI->getOperand(0), + CI->getOperand(1), "", InsertPt); + MadeChange = true; + } + + // Replace a use of the cmp with a use of the new cmp. + TheUse = InsertedCmp; + ++NumCmpUses; + } + + // If we removed all uses, nuke the cmp. + if (CI->use_empty()) + CI->eraseFromParent(); + + return MadeChange; +} + +/// isExtractBitsCandidateUse - Check if the candidates could +/// be combined with shift instruction, which includes: +/// 1. Truncate instruction +/// 2. And instruction and the imm is a mask of the low bits: +/// imm & (imm+1) == 0 +static bool isExtractBitsCandidateUse(Instruction *User) { + if (!isa<TruncInst>(User)) { + if (User->getOpcode() != Instruction::And || + !isa<ConstantInt>(User->getOperand(1))) + return false; + + const APInt &Cimm = cast<ConstantInt>(User->getOperand(1))->getValue(); + + if ((Cimm & (Cimm + 1)).getBoolValue()) + return false; + } + return true; +} + +/// SinkShiftAndTruncate - sink both shift and truncate instruction +/// to the use of truncate's BB. +static bool +SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI, + DenseMap<BasicBlock *, BinaryOperator *> &InsertedShifts, + const TargetLowering &TLI) { + BasicBlock *UserBB = User->getParent(); + DenseMap<BasicBlock *, CastInst *> InsertedTruncs; + TruncInst *TruncI = dyn_cast<TruncInst>(User); + bool MadeChange = false; + + for (Value::user_iterator TruncUI = TruncI->user_begin(), + TruncE = TruncI->user_end(); + TruncUI != TruncE;) { + + Use &TruncTheUse = TruncUI.getUse(); + Instruction *TruncUser = cast<Instruction>(*TruncUI); + // Preincrement use iterator so we don't invalidate it. + + ++TruncUI; + + int ISDOpcode = TLI.InstructionOpcodeToISD(TruncUser->getOpcode()); + if (!ISDOpcode) + continue; + + // If the use is actually a legal node, there will not be an implicit + // truncate. + if (TLI.isOperationLegalOrCustom(ISDOpcode, + EVT::getEVT(TruncUser->getType()))) + continue; + + // Don't bother for PHI nodes. + if (isa<PHINode>(TruncUser)) + continue; + + BasicBlock *TruncUserBB = TruncUser->getParent(); + + if (UserBB == TruncUserBB) + continue; + + BinaryOperator *&InsertedShift = InsertedShifts[TruncUserBB]; + CastInst *&InsertedTrunc = InsertedTruncs[TruncUserBB]; + + if (!InsertedShift && !InsertedTrunc) { + BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt(); + // Sink the shift + if (ShiftI->getOpcode() == Instruction::AShr) + InsertedShift = + BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "", InsertPt); + else + InsertedShift = + BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "", InsertPt); + + // Sink the trunc + BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt(); + TruncInsertPt++; + + InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift, + TruncI->getType(), "", TruncInsertPt); + + MadeChange = true; + + TruncTheUse = InsertedTrunc; + } + } + return MadeChange; +} + +/// OptimizeExtractBits - sink the shift *right* instruction into user blocks if +/// the uses could potentially be combined with this shift instruction and +/// generate BitExtract instruction. It will only be applied if the architecture +/// supports BitExtract instruction. Here is an example: +/// BB1: +/// %x.extract.shift = lshr i64 %arg1, 32 +/// BB2: +/// %x.extract.trunc = trunc i64 %x.extract.shift to i16 +/// ==> +/// +/// BB2: +/// %x.extract.shift.1 = lshr i64 %arg1, 32 +/// %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16 +/// +/// CodeGen will recoginze the pattern in BB2 and generate BitExtract +/// instruction. +/// Return true if any changes are made. +static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, + const TargetLowering &TLI) { + BasicBlock *DefBB = ShiftI->getParent(); + + /// Only insert instructions in each block once. + DenseMap<BasicBlock *, BinaryOperator *> InsertedShifts; + + bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(ShiftI->getType())); + + bool MadeChange = false; + for (Value::user_iterator UI = ShiftI->user_begin(), E = ShiftI->user_end(); + UI != E;) { + Use &TheUse = UI.getUse(); + Instruction *User = cast<Instruction>(*UI); + // Preincrement use iterator so we don't invalidate it. + ++UI; + + // Don't bother for PHI nodes. + if (isa<PHINode>(User)) + continue; + + if (!isExtractBitsCandidateUse(User)) + continue; + + BasicBlock *UserBB = User->getParent(); + + if (UserBB == DefBB) { + // If the shift and truncate instruction are in the same BB. The use of + // the truncate(TruncUse) may still introduce another truncate if not + // legal. In this case, we would like to sink both shift and truncate + // instruction to the BB of TruncUse. + // for example: + // BB1: + // i64 shift.result = lshr i64 opnd, imm + // trunc.result = trunc shift.result to i16 + // + // BB2: + // ----> We will have an implicit truncate here if the architecture does + // not have i16 compare. + // cmp i16 trunc.result, opnd2 + // + if (isa<TruncInst>(User) && shiftIsLegal + // If the type of the truncate is legal, no trucate will be + // introduced in other basic blocks. + && (!TLI.isTypeLegal(TLI.getValueType(User->getType())))) + MadeChange = + SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI); + + continue; + } + // If we have already inserted a shift into this block, use it. + BinaryOperator *&InsertedShift = InsertedShifts[UserBB]; + + if (!InsertedShift) { + BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); + + if (ShiftI->getOpcode() == Instruction::AShr) + InsertedShift = + BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "", InsertPt); + else + InsertedShift = + BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "", InsertPt); + + MadeChange = true; + } + + // Replace a use of the shift with a use of the new shift. + TheUse = InsertedShift; + } + + // If we removed all uses, nuke the shift. + if (ShiftI->use_empty()) + ShiftI->eraseFromParent(); + + return MadeChange; +} + +namespace { +class CodeGenPrepareFortifiedLibCalls : public SimplifyFortifiedLibCalls { +protected: + void replaceCall(Value *With) override { + CI->replaceAllUsesWith(With); + CI->eraseFromParent(); + } + bool isFoldable(unsigned SizeCIOp, unsigned, bool) const override { + if (ConstantInt *SizeCI = + dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp))) + return SizeCI->isAllOnesValue(); + return false; + } +}; +} // end anonymous namespace + +bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) { + BasicBlock *BB = CI->getParent(); + + // Lower inline assembly if we can. + // If we found an inline asm expession, and if the target knows how to + // lower it to normal LLVM code, do so now. + if (TLI && isa<InlineAsm>(CI->getCalledValue())) { + if (TLI->ExpandInlineAsm(CI)) { + // Avoid invalidating the iterator. + CurInstIterator = BB->begin(); + // Avoid processing instructions out of order, which could cause + // reuse before a value is defined. + SunkAddrs.clear(); + return true; + } + // Sink address computing for memory operands into the block. + if (OptimizeInlineAsmInst(CI)) + return true; + } + + // Lower all uses of llvm.objectsize.* + IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI); + if (II && II->getIntrinsicID() == Intrinsic::objectsize) { + bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1); + Type *ReturnTy = CI->getType(); + Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL); + + // Substituting this can cause recursive simplifications, which can + // invalidate our iterator. Use a WeakVH to hold onto it in case this + // happens. + WeakVH IterHandle(CurInstIterator); + + replaceAndRecursivelySimplify(CI, RetVal, + TLI ? TLI->getDataLayout() : nullptr, + TLInfo, ModifiedDT ? nullptr : DT); + + // If the iterator instruction was recursively deleted, start over at the + // start of the block. + if (IterHandle != CurInstIterator) { + CurInstIterator = BB->begin(); + SunkAddrs.clear(); + } + return true; + } + + if (II && TLI) { + SmallVector<Value*, 2> PtrOps; + Type *AccessTy; + if (TLI->GetAddrModeArguments(II, PtrOps, AccessTy)) + while (!PtrOps.empty()) + if (OptimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy)) + return true; + } + + // From here on out we're working with named functions. + if (!CI->getCalledFunction()) return false; + + // We'll need DataLayout from here on out. + const DataLayout *TD = TLI ? TLI->getDataLayout() : nullptr; + if (!TD) return false; + + // Lower all default uses of _chk calls. This is very similar + // to what InstCombineCalls does, but here we are only lowering calls + // that have the default "don't know" as the objectsize. Anything else + // should be left alone. + CodeGenPrepareFortifiedLibCalls Simplifier; + return Simplifier.fold(CI, TD, TLInfo); +} + +/// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return +/// instructions to the predecessor to enable tail call optimizations. The +/// case it is currently looking for is: +/// @code +/// bb0: +/// %tmp0 = tail call i32 @f0() +/// br label %return +/// bb1: +/// %tmp1 = tail call i32 @f1() +/// br label %return +/// bb2: +/// %tmp2 = tail call i32 @f2() +/// br label %return +/// return: +/// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ] +/// ret i32 %retval +/// @endcode +/// +/// => +/// +/// @code +/// bb0: +/// %tmp0 = tail call i32 @f0() +/// ret i32 %tmp0 +/// bb1: +/// %tmp1 = tail call i32 @f1() +/// ret i32 %tmp1 +/// bb2: +/// %tmp2 = tail call i32 @f2() +/// ret i32 %tmp2 +/// @endcode +bool CodeGenPrepare::DupRetToEnableTailCallOpts(BasicBlock *BB) { + if (!TLI) + return false; + + ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator()); + if (!RI) + return false; + + PHINode *PN = nullptr; + BitCastInst *BCI = nullptr; + Value *V = RI->getReturnValue(); + if (V) { + BCI = dyn_cast<BitCastInst>(V); + if (BCI) + V = BCI->getOperand(0); + + PN = dyn_cast<PHINode>(V); + if (!PN) + return false; + } + + if (PN && PN->getParent() != BB) + return false; + + // It's not safe to eliminate the sign / zero extension of the return value. + // See llvm::isInTailCallPosition(). + const Function *F = BB->getParent(); + AttributeSet CallerAttrs = F->getAttributes(); + if (CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt) || + CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt)) + return false; + + // Make sure there are no instructions between the PHI and return, or that the + // return is the first instruction in the block. + if (PN) { + BasicBlock::iterator BI = BB->begin(); + do { ++BI; } while (isa<DbgInfoIntrinsic>(BI)); + if (&*BI == BCI) + // Also skip over the bitcast. + ++BI; + if (&*BI != RI) + return false; + } else { + BasicBlock::iterator BI = BB->begin(); + while (isa<DbgInfoIntrinsic>(BI)) ++BI; + if (&*BI != RI) + return false; + } + + /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail + /// call. + SmallVector<CallInst*, 4> TailCalls; + if (PN) { + for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) { + CallInst *CI = dyn_cast<CallInst>(PN->getIncomingValue(I)); + // Make sure the phi value is indeed produced by the tail call. + if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) && + TLI->mayBeEmittedAsTailCall(CI)) + TailCalls.push_back(CI); + } + } else { + SmallPtrSet<BasicBlock*, 4> VisitedBBs; + for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) { + if (!VisitedBBs.insert(*PI)) + continue; + + BasicBlock::InstListType &InstList = (*PI)->getInstList(); + BasicBlock::InstListType::reverse_iterator RI = InstList.rbegin(); + BasicBlock::InstListType::reverse_iterator RE = InstList.rend(); + do { ++RI; } while (RI != RE && isa<DbgInfoIntrinsic>(&*RI)); + if (RI == RE) + continue; + + CallInst *CI = dyn_cast<CallInst>(&*RI); + if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI)) + TailCalls.push_back(CI); + } + } + + bool Changed = false; + for (unsigned i = 0, e = TailCalls.size(); i != e; ++i) { + CallInst *CI = TailCalls[i]; + CallSite CS(CI); + + // Conservatively require the attributes of the call to match those of the + // return. Ignore noalias because it doesn't affect the call sequence. + AttributeSet CalleeAttrs = CS.getAttributes(); + if (AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex). + removeAttribute(Attribute::NoAlias) != + AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex). + removeAttribute(Attribute::NoAlias)) + continue; + + // Make sure the call instruction is followed by an unconditional branch to + // the return block. + BasicBlock *CallBB = CI->getParent(); + BranchInst *BI = dyn_cast<BranchInst>(CallBB->getTerminator()); + if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB) + continue; + + // Duplicate the return into CallBB. + (void)FoldReturnIntoUncondBranch(RI, BB, CallBB); + ModifiedDT = Changed = true; + ++NumRetsDup; + } + + // If we eliminated all predecessors of the block, delete the block now. + if (Changed && !BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB)) + BB->eraseFromParent(); + + return Changed; +} + +//===----------------------------------------------------------------------===// +// Memory Optimization +//===----------------------------------------------------------------------===// + +namespace { + +/// ExtAddrMode - This is an extended version of TargetLowering::AddrMode +/// which holds actual Value*'s for register values. +struct ExtAddrMode : public TargetLowering::AddrMode { + Value *BaseReg; + Value *ScaledReg; + ExtAddrMode() : BaseReg(nullptr), ScaledReg(nullptr) {} + void print(raw_ostream &OS) const; + void dump() const; + + bool operator==(const ExtAddrMode& O) const { + return (BaseReg == O.BaseReg) && (ScaledReg == O.ScaledReg) && + (BaseGV == O.BaseGV) && (BaseOffs == O.BaseOffs) && + (HasBaseReg == O.HasBaseReg) && (Scale == O.Scale); + } +}; + +#ifndef NDEBUG +static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) { + AM.print(OS); + return OS; +} +#endif + +void ExtAddrMode::print(raw_ostream &OS) const { + bool NeedPlus = false; + OS << "["; + if (BaseGV) { + OS << (NeedPlus ? " + " : "") + << "GV:"; + BaseGV->printAsOperand(OS, /*PrintType=*/false); + NeedPlus = true; + } + + if (BaseOffs) { + OS << (NeedPlus ? " + " : "") + << BaseOffs; + NeedPlus = true; + } + + if (BaseReg) { + OS << (NeedPlus ? " + " : "") + << "Base:"; + BaseReg->printAsOperand(OS, /*PrintType=*/false); + NeedPlus = true; + } + if (Scale) { + OS << (NeedPlus ? " + " : "") + << Scale << "*"; + ScaledReg->printAsOperand(OS, /*PrintType=*/false); + } + + OS << ']'; +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void ExtAddrMode::dump() const { + print(dbgs()); + dbgs() << '\n'; +} +#endif + +/// \brief This class provides transaction based operation on the IR. +/// Every change made through this class is recorded in the internal state and +/// can be undone (rollback) until commit is called. +class TypePromotionTransaction { + + /// \brief This represents the common interface of the individual transaction. + /// Each class implements the logic for doing one specific modification on + /// the IR via the TypePromotionTransaction. + class TypePromotionAction { + protected: + /// The Instruction modified. + Instruction *Inst; + + public: + /// \brief Constructor of the action. + /// The constructor performs the related action on the IR. + TypePromotionAction(Instruction *Inst) : Inst(Inst) {} + + virtual ~TypePromotionAction() {} + + /// \brief Undo the modification done by this action. + /// When this method is called, the IR must be in the same state as it was + /// before this action was applied. + /// \pre Undoing the action works if and only if the IR is in the exact same + /// state as it was directly after this action was applied. + virtual void undo() = 0; + + /// \brief Advocate every change made by this action. + /// When the results on the IR of the action are to be kept, it is important + /// to call this function, otherwise hidden information may be kept forever. + virtual void commit() { + // Nothing to be done, this action is not doing anything. + } + }; + + /// \brief Utility to remember the position of an instruction. + class InsertionHandler { + /// Position of an instruction. + /// Either an instruction: + /// - Is the first in a basic block: BB is used. + /// - Has a previous instructon: PrevInst is used. + union { + Instruction *PrevInst; + BasicBlock *BB; + } Point; + /// Remember whether or not the instruction had a previous instruction. + bool HasPrevInstruction; + + public: + /// \brief Record the position of \p Inst. + InsertionHandler(Instruction *Inst) { + BasicBlock::iterator It = Inst; + HasPrevInstruction = (It != (Inst->getParent()->begin())); + if (HasPrevInstruction) + Point.PrevInst = --It; + else + Point.BB = Inst->getParent(); + } + + /// \brief Insert \p Inst at the recorded position. + void insert(Instruction *Inst) { + if (HasPrevInstruction) { + if (Inst->getParent()) + Inst->removeFromParent(); + Inst->insertAfter(Point.PrevInst); + } else { + Instruction *Position = Point.BB->getFirstInsertionPt(); + if (Inst->getParent()) + Inst->moveBefore(Position); + else + Inst->insertBefore(Position); + } + } + }; + + /// \brief Move an instruction before another. + class InstructionMoveBefore : public TypePromotionAction { + /// Original position of the instruction. + InsertionHandler Position; + + public: + /// \brief Move \p Inst before \p Before. + InstructionMoveBefore(Instruction *Inst, Instruction *Before) + : TypePromotionAction(Inst), Position(Inst) { + DEBUG(dbgs() << "Do: move: " << *Inst << "\nbefore: " << *Before << "\n"); + Inst->moveBefore(Before); + } + + /// \brief Move the instruction back to its original position. + void undo() override { + DEBUG(dbgs() << "Undo: moveBefore: " << *Inst << "\n"); + Position.insert(Inst); + } + }; + + /// \brief Set the operand of an instruction with a new value. + class OperandSetter : public TypePromotionAction { + /// Original operand of the instruction. + Value *Origin; + /// Index of the modified instruction. + unsigned Idx; + + public: + /// \brief Set \p Idx operand of \p Inst with \p NewVal. + OperandSetter(Instruction *Inst, unsigned Idx, Value *NewVal) + : TypePromotionAction(Inst), Idx(Idx) { + DEBUG(dbgs() << "Do: setOperand: " << Idx << "\n" + << "for:" << *Inst << "\n" + << "with:" << *NewVal << "\n"); + Origin = Inst->getOperand(Idx); + Inst->setOperand(Idx, NewVal); + } + + /// \brief Restore the original value of the instruction. + void undo() override { + DEBUG(dbgs() << "Undo: setOperand:" << Idx << "\n" + << "for: " << *Inst << "\n" + << "with: " << *Origin << "\n"); + Inst->setOperand(Idx, Origin); + } + }; + + /// \brief Hide the operands of an instruction. + /// Do as if this instruction was not using any of its operands. + class OperandsHider : public TypePromotionAction { + /// The list of original operands. + SmallVector<Value *, 4> OriginalValues; + + public: + /// \brief Remove \p Inst from the uses of the operands of \p Inst. + OperandsHider(Instruction *Inst) : TypePromotionAction(Inst) { + DEBUG(dbgs() << "Do: OperandsHider: " << *Inst << "\n"); + unsigned NumOpnds = Inst->getNumOperands(); + OriginalValues.reserve(NumOpnds); + for (unsigned It = 0; It < NumOpnds; ++It) { + // Save the current operand. + Value *Val = Inst->getOperand(It); + OriginalValues.push_back(Val); + // Set a dummy one. + // We could use OperandSetter here, but that would implied an overhead + // that we are not willing to pay. + Inst->setOperand(It, UndefValue::get(Val->getType())); + } + } + + /// \brief Restore the original list of uses. + void undo() override { + DEBUG(dbgs() << "Undo: OperandsHider: " << *Inst << "\n"); + for (unsigned It = 0, EndIt = OriginalValues.size(); It != EndIt; ++It) + Inst->setOperand(It, OriginalValues[It]); + } + }; + + /// \brief Build a truncate instruction. + class TruncBuilder : public TypePromotionAction { + public: + /// \brief Build a truncate instruction of \p Opnd producing a \p Ty + /// result. + /// trunc Opnd to Ty. + TruncBuilder(Instruction *Opnd, Type *Ty) : TypePromotionAction(Opnd) { + IRBuilder<> Builder(Opnd); + Inst = cast<Instruction>(Builder.CreateTrunc(Opnd, Ty, "promoted")); + DEBUG(dbgs() << "Do: TruncBuilder: " << *Inst << "\n"); + } + + /// \brief Get the built instruction. + Instruction *getBuiltInstruction() { return Inst; } + + /// \brief Remove the built instruction. + void undo() override { + DEBUG(dbgs() << "Undo: TruncBuilder: " << *Inst << "\n"); + Inst->eraseFromParent(); + } + }; + + /// \brief Build a sign extension instruction. + class SExtBuilder : public TypePromotionAction { + public: + /// \brief Build a sign extension instruction of \p Opnd producing a \p Ty + /// result. + /// sext Opnd to Ty. + SExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty) + : TypePromotionAction(Inst) { + IRBuilder<> Builder(InsertPt); + Inst = cast<Instruction>(Builder.CreateSExt(Opnd, Ty, "promoted")); + DEBUG(dbgs() << "Do: SExtBuilder: " << *Inst << "\n"); + } + + /// \brief Get the built instruction. + Instruction *getBuiltInstruction() { return Inst; } + + /// \brief Remove the built instruction. + void undo() override { + DEBUG(dbgs() << "Undo: SExtBuilder: " << *Inst << "\n"); + Inst->eraseFromParent(); + } + }; + + /// \brief Mutate an instruction to another type. + class TypeMutator : public TypePromotionAction { + /// Record the original type. + Type *OrigTy; + + public: + /// \brief Mutate the type of \p Inst into \p NewTy. + TypeMutator(Instruction *Inst, Type *NewTy) + : TypePromotionAction(Inst), OrigTy(Inst->getType()) { + DEBUG(dbgs() << "Do: MutateType: " << *Inst << " with " << *NewTy + << "\n"); + Inst->mutateType(NewTy); + } + + /// \brief Mutate the instruction back to its original type. + void undo() override { + DEBUG(dbgs() << "Undo: MutateType: " << *Inst << " with " << *OrigTy + << "\n"); + Inst->mutateType(OrigTy); + } + }; + + /// \brief Replace the uses of an instruction by another instruction. + class UsesReplacer : public TypePromotionAction { + /// Helper structure to keep track of the replaced uses. + struct InstructionAndIdx { + /// The instruction using the instruction. + Instruction *Inst; + /// The index where this instruction is used for Inst. + unsigned Idx; + InstructionAndIdx(Instruction *Inst, unsigned Idx) + : Inst(Inst), Idx(Idx) {} + }; + + /// Keep track of the original uses (pair Instruction, Index). + SmallVector<InstructionAndIdx, 4> OriginalUses; + typedef SmallVectorImpl<InstructionAndIdx>::iterator use_iterator; + + public: + /// \brief Replace all the use of \p Inst by \p New. + UsesReplacer(Instruction *Inst, Value *New) : TypePromotionAction(Inst) { + DEBUG(dbgs() << "Do: UsersReplacer: " << *Inst << " with " << *New + << "\n"); + // Record the original uses. + for (Use &U : Inst->uses()) { + Instruction *UserI = cast<Instruction>(U.getUser()); + OriginalUses.push_back(InstructionAndIdx(UserI, U.getOperandNo())); + } + // Now, we can replace the uses. + Inst->replaceAllUsesWith(New); + } + + /// \brief Reassign the original uses of Inst to Inst. + void undo() override { + DEBUG(dbgs() << "Undo: UsersReplacer: " << *Inst << "\n"); + for (use_iterator UseIt = OriginalUses.begin(), + EndIt = OriginalUses.end(); + UseIt != EndIt; ++UseIt) { + UseIt->Inst->setOperand(UseIt->Idx, Inst); + } + } + }; + + /// \brief Remove an instruction from the IR. + class InstructionRemover : public TypePromotionAction { + /// Original position of the instruction. + InsertionHandler Inserter; + /// Helper structure to hide all the link to the instruction. In other + /// words, this helps to do as if the instruction was removed. + OperandsHider Hider; + /// Keep track of the uses replaced, if any. + UsesReplacer *Replacer; + + public: + /// \brief Remove all reference of \p Inst and optinally replace all its + /// uses with New. + /// \pre If !Inst->use_empty(), then New != nullptr + InstructionRemover(Instruction *Inst, Value *New = nullptr) + : TypePromotionAction(Inst), Inserter(Inst), Hider(Inst), + Replacer(nullptr) { + if (New) + Replacer = new UsesReplacer(Inst, New); + DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n"); + Inst->removeFromParent(); + } + + ~InstructionRemover() { delete Replacer; } + + /// \brief Really remove the instruction. + void commit() override { delete Inst; } + + /// \brief Resurrect the instruction and reassign it to the proper uses if + /// new value was provided when build this action. + void undo() override { + DEBUG(dbgs() << "Undo: InstructionRemover: " << *Inst << "\n"); + Inserter.insert(Inst); + if (Replacer) + Replacer->undo(); + Hider.undo(); + } + }; + +public: + /// Restoration point. + /// The restoration point is a pointer to an action instead of an iterator + /// because the iterator may be invalidated but not the pointer. + typedef const TypePromotionAction *ConstRestorationPt; + /// Advocate every changes made in that transaction. + void commit(); + /// Undo all the changes made after the given point. + void rollback(ConstRestorationPt Point); + /// Get the current restoration point. + ConstRestorationPt getRestorationPoint() const; + + /// \name API for IR modification with state keeping to support rollback. + /// @{ + /// Same as Instruction::setOperand. + void setOperand(Instruction *Inst, unsigned Idx, Value *NewVal); + /// Same as Instruction::eraseFromParent. + void eraseInstruction(Instruction *Inst, Value *NewVal = nullptr); + /// Same as Value::replaceAllUsesWith. + void replaceAllUsesWith(Instruction *Inst, Value *New); + /// Same as Value::mutateType. + void mutateType(Instruction *Inst, Type *NewTy); + /// Same as IRBuilder::createTrunc. + Instruction *createTrunc(Instruction *Opnd, Type *Ty); + /// Same as IRBuilder::createSExt. + Instruction *createSExt(Instruction *Inst, Value *Opnd, Type *Ty); + /// Same as Instruction::moveBefore. + void moveBefore(Instruction *Inst, Instruction *Before); + /// @} + +private: + /// The ordered list of actions made so far. + SmallVector<std::unique_ptr<TypePromotionAction>, 16> Actions; + typedef SmallVectorImpl<std::unique_ptr<TypePromotionAction>>::iterator CommitPt; +}; + +void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx, + Value *NewVal) { + Actions.push_back( + make_unique<TypePromotionTransaction::OperandSetter>(Inst, Idx, NewVal)); +} + +void TypePromotionTransaction::eraseInstruction(Instruction *Inst, + Value *NewVal) { + Actions.push_back( + make_unique<TypePromotionTransaction::InstructionRemover>(Inst, NewVal)); +} + +void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst, + Value *New) { + Actions.push_back(make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New)); +} + +void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) { + Actions.push_back(make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy)); +} + +Instruction *TypePromotionTransaction::createTrunc(Instruction *Opnd, + Type *Ty) { + std::unique_ptr<TruncBuilder> Ptr(new TruncBuilder(Opnd, Ty)); + Instruction *I = Ptr->getBuiltInstruction(); + Actions.push_back(std::move(Ptr)); + return I; +} + +Instruction *TypePromotionTransaction::createSExt(Instruction *Inst, + Value *Opnd, Type *Ty) { + std::unique_ptr<SExtBuilder> Ptr(new SExtBuilder(Inst, Opnd, Ty)); + Instruction *I = Ptr->getBuiltInstruction(); + Actions.push_back(std::move(Ptr)); + return I; +} + +void TypePromotionTransaction::moveBefore(Instruction *Inst, + Instruction *Before) { + Actions.push_back( + make_unique<TypePromotionTransaction::InstructionMoveBefore>(Inst, Before)); +} + +TypePromotionTransaction::ConstRestorationPt +TypePromotionTransaction::getRestorationPoint() const { + return !Actions.empty() ? Actions.back().get() : nullptr; +} + +void TypePromotionTransaction::commit() { + for (CommitPt It = Actions.begin(), EndIt = Actions.end(); It != EndIt; + ++It) + (*It)->commit(); + Actions.clear(); +} + +void TypePromotionTransaction::rollback( + TypePromotionTransaction::ConstRestorationPt Point) { + while (!Actions.empty() && Point != Actions.back().get()) { + std::unique_ptr<TypePromotionAction> Curr = Actions.pop_back_val(); + Curr->undo(); + } +} + +/// \brief A helper class for matching addressing modes. +/// +/// This encapsulates the logic for matching the target-legal addressing modes. +class AddressingModeMatcher { + SmallVectorImpl<Instruction*> &AddrModeInsts; + const TargetLowering &TLI; + + /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and + /// the memory instruction that we're computing this address for. + Type *AccessTy; + Instruction *MemoryInst; + + /// AddrMode - This is the addressing mode that we're building up. This is + /// part of the return value of this addressing mode matching stuff. + ExtAddrMode &AddrMode; + + /// The truncate instruction inserted by other CodeGenPrepare optimizations. + const SetOfInstrs &InsertedTruncs; + /// A map from the instructions to their type before promotion. + InstrToOrigTy &PromotedInsts; + /// The ongoing transaction where every action should be registered. + TypePromotionTransaction &TPT; + + /// IgnoreProfitability - This is set to true when we should not do + /// profitability checks. When true, IsProfitableToFoldIntoAddressingMode + /// always returns true. + bool IgnoreProfitability; + + AddressingModeMatcher(SmallVectorImpl<Instruction*> &AMI, + const TargetLowering &T, Type *AT, + Instruction *MI, ExtAddrMode &AM, + const SetOfInstrs &InsertedTruncs, + InstrToOrigTy &PromotedInsts, + TypePromotionTransaction &TPT) + : AddrModeInsts(AMI), TLI(T), AccessTy(AT), MemoryInst(MI), AddrMode(AM), + InsertedTruncs(InsertedTruncs), PromotedInsts(PromotedInsts), TPT(TPT) { + IgnoreProfitability = false; + } +public: + + /// Match - Find the maximal addressing mode that a load/store of V can fold, + /// give an access type of AccessTy. This returns a list of involved + /// instructions in AddrModeInsts. + /// \p InsertedTruncs The truncate instruction inserted by other + /// CodeGenPrepare + /// optimizations. + /// \p PromotedInsts maps the instructions to their type before promotion. + /// \p The ongoing transaction where every action should be registered. + static ExtAddrMode Match(Value *V, Type *AccessTy, + Instruction *MemoryInst, + SmallVectorImpl<Instruction*> &AddrModeInsts, + const TargetLowering &TLI, + const SetOfInstrs &InsertedTruncs, + InstrToOrigTy &PromotedInsts, + TypePromotionTransaction &TPT) { + ExtAddrMode Result; + + bool Success = AddressingModeMatcher(AddrModeInsts, TLI, AccessTy, + MemoryInst, Result, InsertedTruncs, + PromotedInsts, TPT).MatchAddr(V, 0); + (void)Success; assert(Success && "Couldn't select *anything*?"); + return Result; + } +private: + bool MatchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth); + bool MatchAddr(Value *V, unsigned Depth); + bool MatchOperationAddr(User *Operation, unsigned Opcode, unsigned Depth, + bool *MovedAway = nullptr); + bool IsProfitableToFoldIntoAddressingMode(Instruction *I, + ExtAddrMode &AMBefore, + ExtAddrMode &AMAfter); + bool ValueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2); + bool IsPromotionProfitable(unsigned MatchedSize, unsigned SizeWithPromotion, + Value *PromotedOperand) const; +}; + +/// MatchScaledValue - Try adding ScaleReg*Scale to the current addressing mode. +/// Return true and update AddrMode if this addr mode is legal for the target, +/// false if not. +bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale, + unsigned Depth) { + // If Scale is 1, then this is the same as adding ScaleReg to the addressing + // mode. Just process that directly. + if (Scale == 1) + return MatchAddr(ScaleReg, Depth); + + // If the scale is 0, it takes nothing to add this. + if (Scale == 0) + return true; + + // If we already have a scale of this value, we can add to it, otherwise, we + // need an available scale field. + if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg) + return false; + + ExtAddrMode TestAddrMode = AddrMode; + + // Add scale to turn X*4+X*3 -> X*7. This could also do things like + // [A+B + A*7] -> [B+A*8]. + TestAddrMode.Scale += Scale; + TestAddrMode.ScaledReg = ScaleReg; + + // If the new address isn't legal, bail out. + if (!TLI.isLegalAddressingMode(TestAddrMode, AccessTy)) + return false; + + // It was legal, so commit it. + AddrMode = TestAddrMode; + + // Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now + // to see if ScaleReg is actually X+C. If so, we can turn this into adding + // X*Scale + C*Scale to addr mode. + ConstantInt *CI = nullptr; Value *AddLHS = nullptr; + if (isa<Instruction>(ScaleReg) && // not a constant expr. + match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) { + TestAddrMode.ScaledReg = AddLHS; + TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale; + + // If this addressing mode is legal, commit it and remember that we folded + // this instruction. + if (TLI.isLegalAddressingMode(TestAddrMode, AccessTy)) { + AddrModeInsts.push_back(cast<Instruction>(ScaleReg)); + AddrMode = TestAddrMode; + return true; + } + } + + // Otherwise, not (x+c)*scale, just return what we have. + return true; +} + +/// MightBeFoldableInst - This is a little filter, which returns true if an +/// addressing computation involving I might be folded into a load/store +/// accessing it. This doesn't need to be perfect, but needs to accept at least +/// the set of instructions that MatchOperationAddr can. +static bool MightBeFoldableInst(Instruction *I) { + switch (I->getOpcode()) { + case Instruction::BitCast: + case Instruction::AddrSpaceCast: + // Don't touch identity bitcasts. + if (I->getType() == I->getOperand(0)->getType()) + return false; + return I->getType()->isPointerTy() || I->getType()->isIntegerTy(); + case Instruction::PtrToInt: + // PtrToInt is always a noop, as we know that the int type is pointer sized. + return true; + case Instruction::IntToPtr: + // We know the input is intptr_t, so this is foldable. + return true; + case Instruction::Add: + return true; + case Instruction::Mul: + case Instruction::Shl: + // Can only handle X*C and X << C. + return isa<ConstantInt>(I->getOperand(1)); + case Instruction::GetElementPtr: + return true; + default: + return false; + } +} + +/// \brief Hepler class to perform type promotion. +class TypePromotionHelper { + /// \brief Utility function to check whether or not a sign extension of + /// \p Inst with \p ConsideredSExtType can be moved through \p Inst by either + /// using the operands of \p Inst or promoting \p Inst. + /// In other words, check if: + /// sext (Ty Inst opnd1 opnd2 ... opndN) to ConsideredSExtType. + /// #1 Promotion applies: + /// ConsideredSExtType Inst (sext opnd1 to ConsideredSExtType, ...). + /// #2 Operand reuses: + /// sext opnd1 to ConsideredSExtType. + /// \p PromotedInsts maps the instructions to their type before promotion. + static bool canGetThrough(const Instruction *Inst, Type *ConsideredSExtType, + const InstrToOrigTy &PromotedInsts); + + /// \brief Utility function to determine if \p OpIdx should be promoted when + /// promoting \p Inst. + static bool shouldSExtOperand(const Instruction *Inst, int OpIdx) { + if (isa<SelectInst>(Inst) && OpIdx == 0) + return false; + return true; + } + + /// \brief Utility function to promote the operand of \p SExt when this + /// operand is a promotable trunc or sext. + /// \p PromotedInsts maps the instructions to their type before promotion. + /// \p CreatedInsts[out] contains how many non-free instructions have been + /// created to promote the operand of SExt. + /// Should never be called directly. + /// \return The promoted value which is used instead of SExt. + static Value *promoteOperandForTruncAndSExt(Instruction *SExt, + TypePromotionTransaction &TPT, + InstrToOrigTy &PromotedInsts, + unsigned &CreatedInsts); + + /// \brief Utility function to promote the operand of \p SExt when this + /// operand is promotable and is not a supported trunc or sext. + /// \p PromotedInsts maps the instructions to their type before promotion. + /// \p CreatedInsts[out] contains how many non-free instructions have been + /// created to promote the operand of SExt. + /// Should never be called directly. + /// \return The promoted value which is used instead of SExt. + static Value *promoteOperandForOther(Instruction *SExt, + TypePromotionTransaction &TPT, + InstrToOrigTy &PromotedInsts, + unsigned &CreatedInsts); + +public: + /// Type for the utility function that promotes the operand of SExt. + typedef Value *(*Action)(Instruction *SExt, TypePromotionTransaction &TPT, + InstrToOrigTy &PromotedInsts, + unsigned &CreatedInsts); + /// \brief Given a sign extend instruction \p SExt, return the approriate + /// action to promote the operand of \p SExt instead of using SExt. + /// \return NULL if no promotable action is possible with the current + /// sign extension. + /// \p InsertedTruncs keeps track of all the truncate instructions inserted by + /// the others CodeGenPrepare optimizations. This information is important + /// because we do not want to promote these instructions as CodeGenPrepare + /// will reinsert them later. Thus creating an infinite loop: create/remove. + /// \p PromotedInsts maps the instructions to their type before promotion. + static Action getAction(Instruction *SExt, const SetOfInstrs &InsertedTruncs, + const TargetLowering &TLI, + const InstrToOrigTy &PromotedInsts); +}; + +bool TypePromotionHelper::canGetThrough(const Instruction *Inst, + Type *ConsideredSExtType, + const InstrToOrigTy &PromotedInsts) { + // We can always get through sext. + if (isa<SExtInst>(Inst)) + return true; + + // We can get through binary operator, if it is legal. In other words, the + // binary operator must have a nuw or nsw flag. + const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst); + if (BinOp && isa<OverflowingBinaryOperator>(BinOp) && + (BinOp->hasNoUnsignedWrap() || BinOp->hasNoSignedWrap())) + return true; + + // Check if we can do the following simplification. + // sext(trunc(sext)) --> sext + if (!isa<TruncInst>(Inst)) + return false; + + Value *OpndVal = Inst->getOperand(0); + // Check if we can use this operand in the sext. + // If the type is larger than the result type of the sign extension, + // we cannot. + if (OpndVal->getType()->getIntegerBitWidth() > + ConsideredSExtType->getIntegerBitWidth()) + return false; + + // If the operand of the truncate is not an instruction, we will not have + // any information on the dropped bits. + // (Actually we could for constant but it is not worth the extra logic). + Instruction *Opnd = dyn_cast<Instruction>(OpndVal); + if (!Opnd) + return false; + + // Check if the source of the type is narrow enough. + // I.e., check that trunc just drops sign extended bits. + // #1 get the type of the operand. + const Type *OpndType; + InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd); + if (It != PromotedInsts.end()) + OpndType = It->second; + else if (isa<SExtInst>(Opnd)) + OpndType = cast<Instruction>(Opnd)->getOperand(0)->getType(); + else + return false; + + // #2 check that the truncate just drop sign extended bits. + if (Inst->getType()->getIntegerBitWidth() >= OpndType->getIntegerBitWidth()) + return true; + + return false; +} + +TypePromotionHelper::Action TypePromotionHelper::getAction( + Instruction *SExt, const SetOfInstrs &InsertedTruncs, + const TargetLowering &TLI, const InstrToOrigTy &PromotedInsts) { + Instruction *SExtOpnd = dyn_cast<Instruction>(SExt->getOperand(0)); + Type *SExtTy = SExt->getType(); + // If the operand of the sign extension is not an instruction, we cannot + // get through. + // If it, check we can get through. + if (!SExtOpnd || !canGetThrough(SExtOpnd, SExtTy, PromotedInsts)) + return nullptr; + + // Do not promote if the operand has been added by codegenprepare. + // Otherwise, it means we are undoing an optimization that is likely to be + // redone, thus causing potential infinite loop. + if (isa<TruncInst>(SExtOpnd) && InsertedTruncs.count(SExtOpnd)) + return nullptr; + + // SExt or Trunc instructions. + // Return the related handler. + if (isa<SExtInst>(SExtOpnd) || isa<TruncInst>(SExtOpnd)) + return promoteOperandForTruncAndSExt; + + // Regular instruction. + // Abort early if we will have to insert non-free instructions. + if (!SExtOpnd->hasOneUse() && + !TLI.isTruncateFree(SExtTy, SExtOpnd->getType())) + return nullptr; + return promoteOperandForOther; +} + +Value *TypePromotionHelper::promoteOperandForTruncAndSExt( + llvm::Instruction *SExt, TypePromotionTransaction &TPT, + InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts) { + // By construction, the operand of SExt is an instruction. Otherwise we cannot + // get through it and this method should not be called. + Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0)); + // Replace sext(trunc(opnd)) or sext(sext(opnd)) + // => sext(opnd). + TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0)); + CreatedInsts = 0; + + // Remove dead code. + if (SExtOpnd->use_empty()) + TPT.eraseInstruction(SExtOpnd); + + // Check if the sext is still needed. + if (SExt->getType() != SExt->getOperand(0)->getType()) + return SExt; + + // At this point we have: sext ty opnd to ty. + // Reassign the uses of SExt to the opnd and remove SExt. + Value *NextVal = SExt->getOperand(0); + TPT.eraseInstruction(SExt, NextVal); + return NextVal; +} + +Value * +TypePromotionHelper::promoteOperandForOther(Instruction *SExt, + TypePromotionTransaction &TPT, + InstrToOrigTy &PromotedInsts, + unsigned &CreatedInsts) { + // By construction, the operand of SExt is an instruction. Otherwise we cannot + // get through it and this method should not be called. + Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0)); + CreatedInsts = 0; + if (!SExtOpnd->hasOneUse()) { + // SExtOpnd will be promoted. + // All its uses, but SExt, will need to use a truncated value of the + // promoted version. + // Create the truncate now. + Instruction *Trunc = TPT.createTrunc(SExt, SExtOpnd->getType()); + Trunc->removeFromParent(); + // Insert it just after the definition. + Trunc->insertAfter(SExtOpnd); + + TPT.replaceAllUsesWith(SExtOpnd, Trunc); + // Restore the operand of SExt (which has been replace by the previous call + // to replaceAllUsesWith) to avoid creating a cycle trunc <-> sext. + TPT.setOperand(SExt, 0, SExtOpnd); + } + + // Get through the Instruction: + // 1. Update its type. + // 2. Replace the uses of SExt by Inst. + // 3. Sign extend each operand that needs to be sign extended. + + // Remember the original type of the instruction before promotion. + // This is useful to know that the high bits are sign extended bits. + PromotedInsts.insert( + std::pair<Instruction *, Type *>(SExtOpnd, SExtOpnd->getType())); + // Step #1. + TPT.mutateType(SExtOpnd, SExt->getType()); + // Step #2. + TPT.replaceAllUsesWith(SExt, SExtOpnd); + // Step #3. + Instruction *SExtForOpnd = SExt; + + DEBUG(dbgs() << "Propagate SExt to operands\n"); + for (int OpIdx = 0, EndOpIdx = SExtOpnd->getNumOperands(); OpIdx != EndOpIdx; + ++OpIdx) { + DEBUG(dbgs() << "Operand:\n" << *(SExtOpnd->getOperand(OpIdx)) << '\n'); + if (SExtOpnd->getOperand(OpIdx)->getType() == SExt->getType() || + !shouldSExtOperand(SExtOpnd, OpIdx)) { + DEBUG(dbgs() << "No need to propagate\n"); + continue; + } + // Check if we can statically sign extend the operand. + Value *Opnd = SExtOpnd->getOperand(OpIdx); + if (const ConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) { + DEBUG(dbgs() << "Statically sign extend\n"); + TPT.setOperand( + SExtOpnd, OpIdx, + ConstantInt::getSigned(SExt->getType(), Cst->getSExtValue())); + continue; + } + // UndefValue are typed, so we have to statically sign extend them. + if (isa<UndefValue>(Opnd)) { + DEBUG(dbgs() << "Statically sign extend\n"); + TPT.setOperand(SExtOpnd, OpIdx, UndefValue::get(SExt->getType())); + continue; + } + + // Otherwise we have to explicity sign extend the operand. + // Check if SExt was reused to sign extend an operand. + if (!SExtForOpnd) { + // If yes, create a new one. + DEBUG(dbgs() << "More operands to sext\n"); + SExtForOpnd = TPT.createSExt(SExt, Opnd, SExt->getType()); + ++CreatedInsts; + } + + TPT.setOperand(SExtForOpnd, 0, Opnd); + + // Move the sign extension before the insertion point. + TPT.moveBefore(SExtForOpnd, SExtOpnd); + TPT.setOperand(SExtOpnd, OpIdx, SExtForOpnd); + // If more sext are required, new instructions will have to be created. + SExtForOpnd = nullptr; + } + if (SExtForOpnd == SExt) { + DEBUG(dbgs() << "Sign extension is useless now\n"); + TPT.eraseInstruction(SExt); + } + return SExtOpnd; +} + +/// IsPromotionProfitable - Check whether or not promoting an instruction +/// to a wider type was profitable. +/// \p MatchedSize gives the number of instructions that have been matched +/// in the addressing mode after the promotion was applied. +/// \p SizeWithPromotion gives the number of created instructions for +/// the promotion plus the number of instructions that have been +/// matched in the addressing mode before the promotion. +/// \p PromotedOperand is the value that has been promoted. +/// \return True if the promotion is profitable, false otherwise. +bool +AddressingModeMatcher::IsPromotionProfitable(unsigned MatchedSize, + unsigned SizeWithPromotion, + Value *PromotedOperand) const { + // We folded less instructions than what we created to promote the operand. + // This is not profitable. + if (MatchedSize < SizeWithPromotion) + return false; + if (MatchedSize > SizeWithPromotion) + return true; + // The promotion is neutral but it may help folding the sign extension in + // loads for instance. + // Check that we did not create an illegal instruction. + Instruction *PromotedInst = dyn_cast<Instruction>(PromotedOperand); + if (!PromotedInst) + return false; + int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode()); + // If the ISDOpcode is undefined, it was undefined before the promotion. + if (!ISDOpcode) + return true; + // Otherwise, check if the promoted instruction is legal or not. + return TLI.isOperationLegalOrCustom(ISDOpcode, + EVT::getEVT(PromotedInst->getType())); +} + +/// MatchOperationAddr - Given an instruction or constant expr, see if we can +/// fold the operation into the addressing mode. If so, update the addressing +/// mode and return true, otherwise return false without modifying AddrMode. +/// If \p MovedAway is not NULL, it contains the information of whether or +/// not AddrInst has to be folded into the addressing mode on success. +/// If \p MovedAway == true, \p AddrInst will not be part of the addressing +/// because it has been moved away. +/// Thus AddrInst must not be added in the matched instructions. +/// This state can happen when AddrInst is a sext, since it may be moved away. +/// Therefore, AddrInst may not be valid when MovedAway is true and it must +/// not be referenced anymore. +bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, + unsigned Depth, + bool *MovedAway) { + // Avoid exponential behavior on extremely deep expression trees. + if (Depth >= 5) return false; + + // By default, all matched instructions stay in place. + if (MovedAway) + *MovedAway = false; + + switch (Opcode) { + case Instruction::PtrToInt: + // PtrToInt is always a noop, as we know that the int type is pointer sized. + return MatchAddr(AddrInst->getOperand(0), Depth); + case Instruction::IntToPtr: + // This inttoptr is a no-op if the integer type is pointer sized. + if (TLI.getValueType(AddrInst->getOperand(0)->getType()) == + TLI.getPointerTy(AddrInst->getType()->getPointerAddressSpace())) + return MatchAddr(AddrInst->getOperand(0), Depth); + return false; + case Instruction::BitCast: + case Instruction::AddrSpaceCast: + // BitCast is always a noop, and we can handle it as long as it is + // int->int or pointer->pointer (we don't want int<->fp or something). + if ((AddrInst->getOperand(0)->getType()->isPointerTy() || + AddrInst->getOperand(0)->getType()->isIntegerTy()) && + // Don't touch identity bitcasts. These were probably put here by LSR, + // and we don't want to mess around with them. Assume it knows what it + // is doing. + AddrInst->getOperand(0)->getType() != AddrInst->getType()) + return MatchAddr(AddrInst->getOperand(0), Depth); + return false; + case Instruction::Add: { + // Check to see if we can merge in the RHS then the LHS. If so, we win. + ExtAddrMode BackupAddrMode = AddrMode; + unsigned OldSize = AddrModeInsts.size(); + // Start a transaction at this point. + // The LHS may match but not the RHS. + // Therefore, we need a higher level restoration point to undo partially + // matched operation. + TypePromotionTransaction::ConstRestorationPt LastKnownGood = + TPT.getRestorationPoint(); + + if (MatchAddr(AddrInst->getOperand(1), Depth+1) && + MatchAddr(AddrInst->getOperand(0), Depth+1)) + return true; + + // Restore the old addr mode info. + AddrMode = BackupAddrMode; + AddrModeInsts.resize(OldSize); + TPT.rollback(LastKnownGood); + + // Otherwise this was over-aggressive. Try merging in the LHS then the RHS. + if (MatchAddr(AddrInst->getOperand(0), Depth+1) && + MatchAddr(AddrInst->getOperand(1), Depth+1)) + return true; + + // Otherwise we definitely can't merge the ADD in. + AddrMode = BackupAddrMode; + AddrModeInsts.resize(OldSize); + TPT.rollback(LastKnownGood); + break; + } + //case Instruction::Or: + // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD. + //break; + case Instruction::Mul: + case Instruction::Shl: { + // Can only handle X*C and X << C. + ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1)); + if (!RHS) + return false; + int64_t Scale = RHS->getSExtValue(); + if (Opcode == Instruction::Shl) + Scale = 1LL << Scale; + + return MatchScaledValue(AddrInst->getOperand(0), Scale, Depth); + } + case Instruction::GetElementPtr: { + // Scan the GEP. We check it if it contains constant offsets and at most + // one variable offset. + int VariableOperand = -1; + unsigned VariableScale = 0; + + int64_t ConstantOffset = 0; + const DataLayout *TD = TLI.getDataLayout(); + gep_type_iterator GTI = gep_type_begin(AddrInst); + for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) { + if (StructType *STy = dyn_cast<StructType>(*GTI)) { + const StructLayout *SL = TD->getStructLayout(STy); + unsigned Idx = + cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue(); + ConstantOffset += SL->getElementOffset(Idx); + } else { + uint64_t TypeSize = TD->getTypeAllocSize(GTI.getIndexedType()); + if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) { + ConstantOffset += CI->getSExtValue()*TypeSize; + } else if (TypeSize) { // Scales of zero don't do anything. + // We only allow one variable index at the moment. + if (VariableOperand != -1) + return false; + + // Remember the variable index. + VariableOperand = i; + VariableScale = TypeSize; + } + } + } + + // A common case is for the GEP to only do a constant offset. In this case, + // just add it to the disp field and check validity. + if (VariableOperand == -1) { + AddrMode.BaseOffs += ConstantOffset; + if (ConstantOffset == 0 || TLI.isLegalAddressingMode(AddrMode, AccessTy)){ + // Check to see if we can fold the base pointer in too. + if (MatchAddr(AddrInst->getOperand(0), Depth+1)) + return true; + } + AddrMode.BaseOffs -= ConstantOffset; + return false; + } + + // Save the valid addressing mode in case we can't match. + ExtAddrMode BackupAddrMode = AddrMode; + unsigned OldSize = AddrModeInsts.size(); + + // See if the scale and offset amount is valid for this target. + AddrMode.BaseOffs += ConstantOffset; + + // Match the base operand of the GEP. + if (!MatchAddr(AddrInst->getOperand(0), Depth+1)) { + // If it couldn't be matched, just stuff the value in a register. + if (AddrMode.HasBaseReg) { + AddrMode = BackupAddrMode; + AddrModeInsts.resize(OldSize); + return false; + } + AddrMode.HasBaseReg = true; + AddrMode.BaseReg = AddrInst->getOperand(0); + } + + // Match the remaining variable portion of the GEP. + if (!MatchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale, + Depth)) { + // If it couldn't be matched, try stuffing the base into a register + // instead of matching it, and retrying the match of the scale. + AddrMode = BackupAddrMode; + AddrModeInsts.resize(OldSize); + if (AddrMode.HasBaseReg) + return false; + AddrMode.HasBaseReg = true; + AddrMode.BaseReg = AddrInst->getOperand(0); + AddrMode.BaseOffs += ConstantOffset; + if (!MatchScaledValue(AddrInst->getOperand(VariableOperand), + VariableScale, Depth)) { + // If even that didn't work, bail. + AddrMode = BackupAddrMode; + AddrModeInsts.resize(OldSize); + return false; + } + } + + return true; + } + case Instruction::SExt: { + Instruction *SExt = dyn_cast<Instruction>(AddrInst); + if (!SExt) + return false; + + // Try to move this sext out of the way of the addressing mode. + // Ask for a method for doing so. + TypePromotionHelper::Action TPH = TypePromotionHelper::getAction( + SExt, InsertedTruncs, TLI, PromotedInsts); + if (!TPH) + return false; + + TypePromotionTransaction::ConstRestorationPt LastKnownGood = + TPT.getRestorationPoint(); + unsigned CreatedInsts = 0; + Value *PromotedOperand = TPH(SExt, TPT, PromotedInsts, CreatedInsts); + // SExt has been moved away. + // Thus either it will be rematched later in the recursive calls or it is + // gone. Anyway, we must not fold it into the addressing mode at this point. + // E.g., + // op = add opnd, 1 + // idx = sext op + // addr = gep base, idx + // is now: + // promotedOpnd = sext opnd <- no match here + // op = promoted_add promotedOpnd, 1 <- match (later in recursive calls) + // addr = gep base, op <- match + if (MovedAway) + *MovedAway = true; + + assert(PromotedOperand && + "TypePromotionHelper should have filtered out those cases"); + + ExtAddrMode BackupAddrMode = AddrMode; + unsigned OldSize = AddrModeInsts.size(); + + if (!MatchAddr(PromotedOperand, Depth) || + !IsPromotionProfitable(AddrModeInsts.size(), OldSize + CreatedInsts, + PromotedOperand)) { + AddrMode = BackupAddrMode; + AddrModeInsts.resize(OldSize); + DEBUG(dbgs() << "Sign extension does not pay off: rollback\n"); + TPT.rollback(LastKnownGood); + return false; + } + return true; + } + } + return false; +} + +/// MatchAddr - If we can, try to add the value of 'Addr' into the current +/// addressing mode. If Addr can't be added to AddrMode this returns false and +/// leaves AddrMode unmodified. This assumes that Addr is either a pointer type +/// or intptr_t for the target. +/// +bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) { + // Start a transaction at this point that we will rollback if the matching + // fails. + TypePromotionTransaction::ConstRestorationPt LastKnownGood = + TPT.getRestorationPoint(); + if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) { + // Fold in immediates if legal for the target. + AddrMode.BaseOffs += CI->getSExtValue(); + if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) + return true; + AddrMode.BaseOffs -= CI->getSExtValue(); + } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) { + // If this is a global variable, try to fold it into the addressing mode. + if (!AddrMode.BaseGV) { + AddrMode.BaseGV = GV; + if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) + return true; + AddrMode.BaseGV = nullptr; + } + } else if (Instruction *I = dyn_cast<Instruction>(Addr)) { + ExtAddrMode BackupAddrMode = AddrMode; + unsigned OldSize = AddrModeInsts.size(); + + // Check to see if it is possible to fold this operation. + bool MovedAway = false; + if (MatchOperationAddr(I, I->getOpcode(), Depth, &MovedAway)) { + // This instruction may have been move away. If so, there is nothing + // to check here. + if (MovedAway) + return true; + // Okay, it's possible to fold this. Check to see if it is actually + // *profitable* to do so. We use a simple cost model to avoid increasing + // register pressure too much. + if (I->hasOneUse() || + IsProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) { + AddrModeInsts.push_back(I); + return true; + } + + // It isn't profitable to do this, roll back. + //cerr << "NOT FOLDING: " << *I; + AddrMode = BackupAddrMode; + AddrModeInsts.resize(OldSize); + TPT.rollback(LastKnownGood); + } + } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) { + if (MatchOperationAddr(CE, CE->getOpcode(), Depth)) + return true; + TPT.rollback(LastKnownGood); + } else if (isa<ConstantPointerNull>(Addr)) { + // Null pointer gets folded without affecting the addressing mode. + return true; + } + + // Worse case, the target should support [reg] addressing modes. :) + if (!AddrMode.HasBaseReg) { + AddrMode.HasBaseReg = true; + AddrMode.BaseReg = Addr; + // Still check for legality in case the target supports [imm] but not [i+r]. + if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) + return true; + AddrMode.HasBaseReg = false; + AddrMode.BaseReg = nullptr; + } + + // If the base register is already taken, see if we can do [r+r]. + if (AddrMode.Scale == 0) { + AddrMode.Scale = 1; + AddrMode.ScaledReg = Addr; + if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) + return true; + AddrMode.Scale = 0; + AddrMode.ScaledReg = nullptr; + } + // Couldn't match. + TPT.rollback(LastKnownGood); + return false; +} + +/// IsOperandAMemoryOperand - Check to see if all uses of OpVal by the specified +/// inline asm call are due to memory operands. If so, return true, otherwise +/// return false. +static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, + const TargetLowering &TLI) { + TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(ImmutableCallSite(CI)); + for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { + TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; + + // Compute the constraint code and ConstraintType to use. + TLI.ComputeConstraintToUse(OpInfo, SDValue()); + + // If this asm operand is our Value*, and if it isn't an indirect memory + // operand, we can't fold it! + if (OpInfo.CallOperandVal == OpVal && + (OpInfo.ConstraintType != TargetLowering::C_Memory || + !OpInfo.isIndirect)) + return false; + } + + return true; +} + +/// FindAllMemoryUses - Recursively walk all the uses of I until we find a +/// memory use. If we find an obviously non-foldable instruction, return true. +/// Add the ultimately found memory instructions to MemoryUses. +static bool FindAllMemoryUses(Instruction *I, + SmallVectorImpl<std::pair<Instruction*,unsigned> > &MemoryUses, + SmallPtrSet<Instruction*, 16> &ConsideredInsts, + const TargetLowering &TLI) { + // If we already considered this instruction, we're done. + if (!ConsideredInsts.insert(I)) + return false; + + // If this is an obviously unfoldable instruction, bail out. + if (!MightBeFoldableInst(I)) + return true; + + // Loop over all the uses, recursively processing them. + for (Use &U : I->uses()) { + Instruction *UserI = cast<Instruction>(U.getUser()); + + if (LoadInst *LI = dyn_cast<LoadInst>(UserI)) { + MemoryUses.push_back(std::make_pair(LI, U.getOperandNo())); + continue; + } + + if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) { + unsigned opNo = U.getOperandNo(); + if (opNo == 0) return true; // Storing addr, not into addr. + MemoryUses.push_back(std::make_pair(SI, opNo)); + continue; + } + + if (CallInst *CI = dyn_cast<CallInst>(UserI)) { + InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue()); + if (!IA) return true; + + // If this is a memory operand, we're cool, otherwise bail out. + if (!IsOperandAMemoryOperand(CI, IA, I, TLI)) + return true; + continue; + } + + if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI)) + return true; + } + + return false; +} + +/// ValueAlreadyLiveAtInst - Retrn true if Val is already known to be live at +/// the use site that we're folding it into. If so, there is no cost to +/// include it in the addressing mode. KnownLive1 and KnownLive2 are two values +/// that we know are live at the instruction already. +bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1, + Value *KnownLive2) { + // If Val is either of the known-live values, we know it is live! + if (Val == nullptr || Val == KnownLive1 || Val == KnownLive2) + return true; + + // All values other than instructions and arguments (e.g. constants) are live. + if (!isa<Instruction>(Val) && !isa<Argument>(Val)) return true; + + // If Val is a constant sized alloca in the entry block, it is live, this is + // true because it is just a reference to the stack/frame pointer, which is + // live for the whole function. + if (AllocaInst *AI = dyn_cast<AllocaInst>(Val)) + if (AI->isStaticAlloca()) + return true; + + // Check to see if this value is already used in the memory instruction's + // block. If so, it's already live into the block at the very least, so we + // can reasonably fold it. + return Val->isUsedInBasicBlock(MemoryInst->getParent()); +} + +/// IsProfitableToFoldIntoAddressingMode - It is possible for the addressing +/// mode of the machine to fold the specified instruction into a load or store +/// that ultimately uses it. However, the specified instruction has multiple +/// uses. Given this, it may actually increase register pressure to fold it +/// into the load. For example, consider this code: +/// +/// X = ... +/// Y = X+1 +/// use(Y) -> nonload/store +/// Z = Y+1 +/// load Z +/// +/// In this case, Y has multiple uses, and can be folded into the load of Z +/// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to +/// be live at the use(Y) line. If we don't fold Y into load Z, we use one +/// fewer register. Since Y can't be folded into "use(Y)" we don't increase the +/// number of computations either. +/// +/// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If +/// X was live across 'load Z' for other reasons, we actually *would* want to +/// fold the addressing mode in the Z case. This would make Y die earlier. +bool AddressingModeMatcher:: +IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, + ExtAddrMode &AMAfter) { + if (IgnoreProfitability) return true; + + // AMBefore is the addressing mode before this instruction was folded into it, + // and AMAfter is the addressing mode after the instruction was folded. Get + // the set of registers referenced by AMAfter and subtract out those + // referenced by AMBefore: this is the set of values which folding in this + // address extends the lifetime of. + // + // Note that there are only two potential values being referenced here, + // BaseReg and ScaleReg (global addresses are always available, as are any + // folded immediates). + Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg; + + // If the BaseReg or ScaledReg was referenced by the previous addrmode, their + // lifetime wasn't extended by adding this instruction. + if (ValueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg)) + BaseReg = nullptr; + if (ValueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg)) + ScaledReg = nullptr; + + // If folding this instruction (and it's subexprs) didn't extend any live + // ranges, we're ok with it. + if (!BaseReg && !ScaledReg) + return true; + + // If all uses of this instruction are ultimately load/store/inlineasm's, + // check to see if their addressing modes will include this instruction. If + // so, we can fold it into all uses, so it doesn't matter if it has multiple + // uses. + SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses; + SmallPtrSet<Instruction*, 16> ConsideredInsts; + if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI)) + return false; // Has a non-memory, non-foldable use! + + // Now that we know that all uses of this instruction are part of a chain of + // computation involving only operations that could theoretically be folded + // into a memory use, loop over each of these uses and see if they could + // *actually* fold the instruction. + SmallVector<Instruction*, 32> MatchedAddrModeInsts; + for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) { + Instruction *User = MemoryUses[i].first; + unsigned OpNo = MemoryUses[i].second; + + // Get the access type of this use. If the use isn't a pointer, we don't + // know what it accesses. + Value *Address = User->getOperand(OpNo); + if (!Address->getType()->isPointerTy()) + return false; + Type *AddressAccessTy = Address->getType()->getPointerElementType(); + + // Do a match against the root of this address, ignoring profitability. This + // will tell us if the addressing mode for the memory operation will + // *actually* cover the shared instruction. + ExtAddrMode Result; + TypePromotionTransaction::ConstRestorationPt LastKnownGood = + TPT.getRestorationPoint(); + AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, AddressAccessTy, + MemoryInst, Result, InsertedTruncs, + PromotedInsts, TPT); + Matcher.IgnoreProfitability = true; + bool Success = Matcher.MatchAddr(Address, 0); + (void)Success; assert(Success && "Couldn't select *anything*?"); + + // The match was to check the profitability, the changes made are not + // part of the original matcher. Therefore, they should be dropped + // otherwise the original matcher will not present the right state. + TPT.rollback(LastKnownGood); + + // If the match didn't cover I, then it won't be shared by it. + if (std::find(MatchedAddrModeInsts.begin(), MatchedAddrModeInsts.end(), + I) == MatchedAddrModeInsts.end()) + return false; + + MatchedAddrModeInsts.clear(); + } + + return true; +} + +} // end anonymous namespace + +/// IsNonLocalValue - Return true if the specified values are defined in a +/// different basic block than BB. +static bool IsNonLocalValue(Value *V, BasicBlock *BB) { + if (Instruction *I = dyn_cast<Instruction>(V)) + return I->getParent() != BB; + return false; +} + +/// OptimizeMemoryInst - Load and Store Instructions often have +/// addressing modes that can do significant amounts of computation. As such, +/// instruction selection will try to get the load or store to do as much +/// computation as possible for the program. The problem is that isel can only +/// see within a single block. As such, we sink as much legal addressing mode +/// stuff into the block as possible. +/// +/// This method is used to optimize both load/store and inline asms with memory +/// operands. +bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, + Type *AccessTy) { + Value *Repl = Addr; + + // Try to collapse single-value PHI nodes. This is necessary to undo + // unprofitable PRE transformations. + SmallVector<Value*, 8> worklist; + SmallPtrSet<Value*, 16> Visited; + worklist.push_back(Addr); + + // Use a worklist to iteratively look through PHI nodes, and ensure that + // the addressing mode obtained from the non-PHI roots of the graph + // are equivalent. + Value *Consensus = nullptr; + unsigned NumUsesConsensus = 0; + bool IsNumUsesConsensusValid = false; + SmallVector<Instruction*, 16> AddrModeInsts; + ExtAddrMode AddrMode; + TypePromotionTransaction TPT; + TypePromotionTransaction::ConstRestorationPt LastKnownGood = + TPT.getRestorationPoint(); + while (!worklist.empty()) { + Value *V = worklist.back(); + worklist.pop_back(); + + // Break use-def graph loops. + if (!Visited.insert(V)) { + Consensus = nullptr; + break; + } + + // For a PHI node, push all of its incoming values. + if (PHINode *P = dyn_cast<PHINode>(V)) { + for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i) + worklist.push_back(P->getIncomingValue(i)); + continue; + } + + // For non-PHIs, determine the addressing mode being computed. + SmallVector<Instruction*, 16> NewAddrModeInsts; + ExtAddrMode NewAddrMode = AddressingModeMatcher::Match( + V, AccessTy, MemoryInst, NewAddrModeInsts, *TLI, InsertedTruncsSet, + PromotedInsts, TPT); + + // This check is broken into two cases with very similar code to avoid using + // getNumUses() as much as possible. Some values have a lot of uses, so + // calling getNumUses() unconditionally caused a significant compile-time + // regression. + if (!Consensus) { + Consensus = V; + AddrMode = NewAddrMode; + AddrModeInsts = NewAddrModeInsts; + continue; + } else if (NewAddrMode == AddrMode) { + if (!IsNumUsesConsensusValid) { + NumUsesConsensus = Consensus->getNumUses(); + IsNumUsesConsensusValid = true; + } + + // Ensure that the obtained addressing mode is equivalent to that obtained + // for all other roots of the PHI traversal. Also, when choosing one + // such root as representative, select the one with the most uses in order + // to keep the cost modeling heuristics in AddressingModeMatcher + // applicable. + unsigned NumUses = V->getNumUses(); + if (NumUses > NumUsesConsensus) { + Consensus = V; + NumUsesConsensus = NumUses; + AddrModeInsts = NewAddrModeInsts; + } + continue; + } + + Consensus = nullptr; + break; + } + + // If the addressing mode couldn't be determined, or if multiple different + // ones were determined, bail out now. + if (!Consensus) { + TPT.rollback(LastKnownGood); + return false; + } + TPT.commit(); + + // Check to see if any of the instructions supersumed by this addr mode are + // non-local to I's BB. + bool AnyNonLocal = false; + for (unsigned i = 0, e = AddrModeInsts.size(); i != e; ++i) { + if (IsNonLocalValue(AddrModeInsts[i], MemoryInst->getParent())) { + AnyNonLocal = true; + break; + } + } + + // If all the instructions matched are already in this BB, don't do anything. + if (!AnyNonLocal) { + DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode << "\n"); + return false; + } + + // Insert this computation right after this user. Since our caller is + // scanning from the top of the BB to the bottom, reuse of the expr are + // guaranteed to happen later. + IRBuilder<> Builder(MemoryInst); + + // Now that we determined the addressing expression we want to use and know + // that we have to sink it into this block. Check to see if we have already + // done this for some other load/store instr in this block. If so, reuse the + // computation. + Value *&SunkAddr = SunkAddrs[Addr]; + if (SunkAddr) { + DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for " + << *MemoryInst << "\n"); + if (SunkAddr->getType() != Addr->getType()) + SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType()); + } else if (AddrSinkUsingGEPs || (!AddrSinkUsingGEPs.getNumOccurrences() && + TM && TM->getSubtarget<TargetSubtargetInfo>().useAA())) { + // By default, we use the GEP-based method when AA is used later. This + // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities. + DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " + << *MemoryInst << "\n"); + Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(Addr->getType()); + Value *ResultPtr = nullptr, *ResultIndex = nullptr; + + // First, find the pointer. + if (AddrMode.BaseReg && AddrMode.BaseReg->getType()->isPointerTy()) { + ResultPtr = AddrMode.BaseReg; + AddrMode.BaseReg = nullptr; + } + + if (AddrMode.Scale && AddrMode.ScaledReg->getType()->isPointerTy()) { + // We can't add more than one pointer together, nor can we scale a + // pointer (both of which seem meaningless). + if (ResultPtr || AddrMode.Scale != 1) + return false; + + ResultPtr = AddrMode.ScaledReg; + AddrMode.Scale = 0; + } + + if (AddrMode.BaseGV) { + if (ResultPtr) + return false; + + ResultPtr = AddrMode.BaseGV; + } + + // If the real base value actually came from an inttoptr, then the matcher + // will look through it and provide only the integer value. In that case, + // use it here. + if (!ResultPtr && AddrMode.BaseReg) { + ResultPtr = + Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(), "sunkaddr"); + AddrMode.BaseReg = nullptr; + } else if (!ResultPtr && AddrMode.Scale == 1) { + ResultPtr = + Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(), "sunkaddr"); + AddrMode.Scale = 0; + } + + if (!ResultPtr && + !AddrMode.BaseReg && !AddrMode.Scale && !AddrMode.BaseOffs) { + SunkAddr = Constant::getNullValue(Addr->getType()); + } else if (!ResultPtr) { + return false; + } else { + Type *I8PtrTy = + Builder.getInt8PtrTy(Addr->getType()->getPointerAddressSpace()); + + // Start with the base register. Do this first so that subsequent address + // matching finds it last, which will prevent it from trying to match it + // as the scaled value in case it happens to be a mul. That would be + // problematic if we've sunk a different mul for the scale, because then + // we'd end up sinking both muls. + if (AddrMode.BaseReg) { + Value *V = AddrMode.BaseReg; + if (V->getType() != IntPtrTy) + V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr"); + + ResultIndex = V; + } + + // Add the scale value. + if (AddrMode.Scale) { + Value *V = AddrMode.ScaledReg; + if (V->getType() == IntPtrTy) { + // done. + } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() < + cast<IntegerType>(V->getType())->getBitWidth()) { + V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr"); + } else { + // It is only safe to sign extend the BaseReg if we know that the math + // required to create it did not overflow before we extend it. Since + // the original IR value was tossed in favor of a constant back when + // the AddrMode was created we need to bail out gracefully if widths + // do not match instead of extending it. + Instruction *I = dyn_cast_or_null<Instruction>(ResultIndex); + if (I && (ResultIndex != AddrMode.BaseReg)) + I->eraseFromParent(); + return false; + } + + if (AddrMode.Scale != 1) + V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale), + "sunkaddr"); + if (ResultIndex) + ResultIndex = Builder.CreateAdd(ResultIndex, V, "sunkaddr"); + else + ResultIndex = V; + } + + // Add in the Base Offset if present. + if (AddrMode.BaseOffs) { + Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs); + if (ResultIndex) { + // We need to add this separately from the scale above to help with + // SDAG consecutive load/store merging. + if (ResultPtr->getType() != I8PtrTy) + ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy); + ResultPtr = Builder.CreateGEP(ResultPtr, ResultIndex, "sunkaddr"); + } + + ResultIndex = V; + } + + if (!ResultIndex) { + SunkAddr = ResultPtr; + } else { + if (ResultPtr->getType() != I8PtrTy) + ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy); + SunkAddr = Builder.CreateGEP(ResultPtr, ResultIndex, "sunkaddr"); + } + + if (SunkAddr->getType() != Addr->getType()) + SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType()); + } + } else { + DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " + << *MemoryInst << "\n"); + Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(Addr->getType()); + Value *Result = nullptr; + + // Start with the base register. Do this first so that subsequent address + // matching finds it last, which will prevent it from trying to match it + // as the scaled value in case it happens to be a mul. That would be + // problematic if we've sunk a different mul for the scale, because then + // we'd end up sinking both muls. + if (AddrMode.BaseReg) { + Value *V = AddrMode.BaseReg; + if (V->getType()->isPointerTy()) + V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr"); + if (V->getType() != IntPtrTy) + V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr"); + Result = V; + } + + // Add the scale value. + if (AddrMode.Scale) { + Value *V = AddrMode.ScaledReg; + if (V->getType() == IntPtrTy) { + // done. + } else if (V->getType()->isPointerTy()) { + V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr"); + } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() < + cast<IntegerType>(V->getType())->getBitWidth()) { + V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr"); + } else { + // It is only safe to sign extend the BaseReg if we know that the math + // required to create it did not overflow before we extend it. Since + // the original IR value was tossed in favor of a constant back when + // the AddrMode was created we need to bail out gracefully if widths + // do not match instead of extending it. + Instruction *I = dyn_cast_or_null<Instruction>(Result); + if (I && (Result != AddrMode.BaseReg)) + I->eraseFromParent(); + return false; + } + if (AddrMode.Scale != 1) + V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale), + "sunkaddr"); + if (Result) + Result = Builder.CreateAdd(Result, V, "sunkaddr"); + else + Result = V; + } + + // Add in the BaseGV if present. + if (AddrMode.BaseGV) { + Value *V = Builder.CreatePtrToInt(AddrMode.BaseGV, IntPtrTy, "sunkaddr"); + if (Result) + Result = Builder.CreateAdd(Result, V, "sunkaddr"); + else + Result = V; + } + + // Add in the Base Offset if present. + if (AddrMode.BaseOffs) { + Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs); + if (Result) + Result = Builder.CreateAdd(Result, V, "sunkaddr"); + else + Result = V; + } + + if (!Result) + SunkAddr = Constant::getNullValue(Addr->getType()); + else + SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr"); + } + + MemoryInst->replaceUsesOfWith(Repl, SunkAddr); + + // If we have no uses, recursively delete the value and all dead instructions + // using it. + if (Repl->use_empty()) { + // This can cause recursive deletion, which can invalidate our iterator. + // Use a WeakVH to hold onto it in case this happens. + WeakVH IterHandle(CurInstIterator); + BasicBlock *BB = CurInstIterator->getParent(); + + RecursivelyDeleteTriviallyDeadInstructions(Repl, TLInfo); + + if (IterHandle != CurInstIterator) { + // If the iterator instruction was recursively deleted, start over at the + // start of the block. + CurInstIterator = BB->begin(); + SunkAddrs.clear(); + } + } + ++NumMemoryInsts; + return true; +} + +/// OptimizeInlineAsmInst - If there are any memory operands, use +/// OptimizeMemoryInst to sink their address computing into the block when +/// possible / profitable. +bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) { + bool MadeChange = false; + + TargetLowering::AsmOperandInfoVector + TargetConstraints = TLI->ParseConstraints(CS); + unsigned ArgNo = 0; + for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { + TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; + + // Compute the constraint code and ConstraintType to use. + TLI->ComputeConstraintToUse(OpInfo, SDValue()); + + if (OpInfo.ConstraintType == TargetLowering::C_Memory && + OpInfo.isIndirect) { + Value *OpVal = CS->getArgOperand(ArgNo++); + MadeChange |= OptimizeMemoryInst(CS, OpVal, OpVal->getType()); + } else if (OpInfo.Type == InlineAsm::isInput) + ArgNo++; + } + + return MadeChange; +} + +/// MoveExtToFormExtLoad - Move a zext or sext fed by a load into the same +/// basic block as the load, unless conditions are unfavorable. This allows +/// SelectionDAG to fold the extend into the load. +/// +bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *I) { + // Look for a load being extended. + LoadInst *LI = dyn_cast<LoadInst>(I->getOperand(0)); + if (!LI) return false; + + // If they're already in the same block, there's nothing to do. + if (LI->getParent() == I->getParent()) + return false; + + // If the load has other users and the truncate is not free, this probably + // isn't worthwhile. + if (!LI->hasOneUse() && + TLI && (TLI->isTypeLegal(TLI->getValueType(LI->getType())) || + !TLI->isTypeLegal(TLI->getValueType(I->getType()))) && + !TLI->isTruncateFree(I->getType(), LI->getType())) + return false; + + // Check whether the target supports casts folded into loads. + unsigned LType; + if (isa<ZExtInst>(I)) + LType = ISD::ZEXTLOAD; + else { + assert(isa<SExtInst>(I) && "Unexpected ext type!"); + LType = ISD::SEXTLOAD; + } + if (TLI && !TLI->isLoadExtLegal(LType, TLI->getValueType(LI->getType()))) + return false; + + // Move the extend into the same block as the load, so that SelectionDAG + // can fold it. + I->removeFromParent(); + I->insertAfter(LI); + ++NumExtsMoved; + return true; +} + +bool CodeGenPrepare::OptimizeExtUses(Instruction *I) { + BasicBlock *DefBB = I->getParent(); + + // If the result of a {s|z}ext and its source are both live out, rewrite all + // other uses of the source with result of extension. + Value *Src = I->getOperand(0); + if (Src->hasOneUse()) + return false; + + // Only do this xform if truncating is free. + if (TLI && !TLI->isTruncateFree(I->getType(), Src->getType())) + return false; + + // Only safe to perform the optimization if the source is also defined in + // this block. + if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent()) + return false; + + bool DefIsLiveOut = false; + for (User *U : I->users()) { + Instruction *UI = cast<Instruction>(U); + + // Figure out which BB this ext is used in. + BasicBlock *UserBB = UI->getParent(); + if (UserBB == DefBB) continue; + DefIsLiveOut = true; + break; + } + if (!DefIsLiveOut) + return false; + + // Make sure none of the uses are PHI nodes. + for (User *U : Src->users()) { + Instruction *UI = cast<Instruction>(U); + BasicBlock *UserBB = UI->getParent(); + if (UserBB == DefBB) continue; + // Be conservative. We don't want this xform to end up introducing + // reloads just before load / store instructions. + if (isa<PHINode>(UI) || isa<LoadInst>(UI) || isa<StoreInst>(UI)) + return false; + } + + // InsertedTruncs - Only insert one trunc in each block once. + DenseMap<BasicBlock*, Instruction*> InsertedTruncs; + + bool MadeChange = false; + for (Use &U : Src->uses()) { + Instruction *User = cast<Instruction>(U.getUser()); + + // Figure out which BB this ext is used in. + BasicBlock *UserBB = User->getParent(); + if (UserBB == DefBB) continue; + + // Both src and def are live in this block. Rewrite the use. + Instruction *&InsertedTrunc = InsertedTruncs[UserBB]; + + if (!InsertedTrunc) { + BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); + InsertedTrunc = new TruncInst(I, Src->getType(), "", InsertPt); + InsertedTruncsSet.insert(InsertedTrunc); + } + + // Replace a use of the {s|z}ext source with a use of the result. + U = InsertedTrunc; + ++NumExtUses; + MadeChange = true; + } + + return MadeChange; +} + +/// isFormingBranchFromSelectProfitable - Returns true if a SelectInst should be +/// turned into an explicit branch. +static bool isFormingBranchFromSelectProfitable(SelectInst *SI) { + // FIXME: This should use the same heuristics as IfConversion to determine + // whether a select is better represented as a branch. This requires that + // branch probability metadata is preserved for the select, which is not the + // case currently. + + CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition()); + + // If the branch is predicted right, an out of order CPU can avoid blocking on + // the compare. Emit cmovs on compares with a memory operand as branches to + // avoid stalls on the load from memory. If the compare has more than one use + // there's probably another cmov or setcc around so it's not worth emitting a + // branch. + if (!Cmp) + return false; + + Value *CmpOp0 = Cmp->getOperand(0); + Value *CmpOp1 = Cmp->getOperand(1); + + // We check that the memory operand has one use to avoid uses of the loaded + // value directly after the compare, making branches unprofitable. + return Cmp->hasOneUse() && + ((isa<LoadInst>(CmpOp0) && CmpOp0->hasOneUse()) || + (isa<LoadInst>(CmpOp1) && CmpOp1->hasOneUse())); +} + + +/// If we have a SelectInst that will likely profit from branch prediction, +/// turn it into a branch. +bool CodeGenPrepare::OptimizeSelectInst(SelectInst *SI) { + bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1); + + // Can we convert the 'select' to CF ? + if (DisableSelectToBranch || OptSize || !TLI || VectorCond) + return false; + + TargetLowering::SelectSupportKind SelectKind; + if (VectorCond) + SelectKind = TargetLowering::VectorMaskSelect; + else if (SI->getType()->isVectorTy()) + SelectKind = TargetLowering::ScalarCondVectorVal; + else + SelectKind = TargetLowering::ScalarValSelect; + + // Do we have efficient codegen support for this kind of 'selects' ? + if (TLI->isSelectSupported(SelectKind)) { + // We have efficient codegen support for the select instruction. + // Check if it is profitable to keep this 'select'. + if (!TLI->isPredictableSelectExpensive() || + !isFormingBranchFromSelectProfitable(SI)) + return false; + } + + ModifiedDT = true; + + // First, we split the block containing the select into 2 blocks. + BasicBlock *StartBlock = SI->getParent(); + BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(SI)); + BasicBlock *NextBlock = StartBlock->splitBasicBlock(SplitPt, "select.end"); + + // Create a new block serving as the landing pad for the branch. + BasicBlock *SmallBlock = BasicBlock::Create(SI->getContext(), "select.mid", + NextBlock->getParent(), NextBlock); + + // Move the unconditional branch from the block with the select in it into our + // landing pad block. + StartBlock->getTerminator()->eraseFromParent(); + BranchInst::Create(NextBlock, SmallBlock); + + // Insert the real conditional branch based on the original condition. + BranchInst::Create(NextBlock, SmallBlock, SI->getCondition(), SI); + + // The select itself is replaced with a PHI Node. + PHINode *PN = PHINode::Create(SI->getType(), 2, "", NextBlock->begin()); + PN->takeName(SI); + PN->addIncoming(SI->getTrueValue(), StartBlock); + PN->addIncoming(SI->getFalseValue(), SmallBlock); + SI->replaceAllUsesWith(PN); + SI->eraseFromParent(); + + // Instruct OptimizeBlock to skip to the next block. + CurInstIterator = StartBlock->end(); + ++NumSelectsExpanded; + return true; +} + +static bool isBroadcastShuffle(ShuffleVectorInst *SVI) { + SmallVector<int, 16> Mask(SVI->getShuffleMask()); + int SplatElem = -1; + for (unsigned i = 0; i < Mask.size(); ++i) { + if (SplatElem != -1 && Mask[i] != -1 && Mask[i] != SplatElem) + return false; + SplatElem = Mask[i]; + } + + return true; +} + +/// Some targets have expensive vector shifts if the lanes aren't all the same +/// (e.g. x86 only introduced "vpsllvd" and friends with AVX2). In these cases +/// it's often worth sinking a shufflevector splat down to its use so that +/// codegen can spot all lanes are identical. +bool CodeGenPrepare::OptimizeShuffleVectorInst(ShuffleVectorInst *SVI) { + BasicBlock *DefBB = SVI->getParent(); + + // Only do this xform if variable vector shifts are particularly expensive. + if (!TLI || !TLI->isVectorShiftByScalarCheap(SVI->getType())) + return false; + + // We only expect better codegen by sinking a shuffle if we can recognise a + // constant splat. + if (!isBroadcastShuffle(SVI)) + return false; + + // InsertedShuffles - Only insert a shuffle in each block once. + DenseMap<BasicBlock*, Instruction*> InsertedShuffles; + + bool MadeChange = false; + for (User *U : SVI->users()) { + Instruction *UI = cast<Instruction>(U); + + // Figure out which BB this ext is used in. + BasicBlock *UserBB = UI->getParent(); + if (UserBB == DefBB) continue; + + // For now only apply this when the splat is used by a shift instruction. + if (!UI->isShift()) continue; + + // Everything checks out, sink the shuffle if the user's block doesn't + // already have a copy. + Instruction *&InsertedShuffle = InsertedShuffles[UserBB]; + + if (!InsertedShuffle) { + BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); + InsertedShuffle = new ShuffleVectorInst(SVI->getOperand(0), + SVI->getOperand(1), + SVI->getOperand(2), "", InsertPt); + } + + UI->replaceUsesOfWith(SVI, InsertedShuffle); + MadeChange = true; + } + + // If we removed all uses, nuke the shuffle. + if (SVI->use_empty()) { + SVI->eraseFromParent(); + MadeChange = true; + } + + return MadeChange; +} + +bool CodeGenPrepare::OptimizeInst(Instruction *I) { + if (PHINode *P = dyn_cast<PHINode>(I)) { + // It is possible for very late stage optimizations (such as SimplifyCFG) + // to introduce PHI nodes too late to be cleaned up. If we detect such a + // trivial PHI, go ahead and zap it here. + if (Value *V = SimplifyInstruction(P, TLI ? TLI->getDataLayout() : nullptr, + TLInfo, DT)) { + P->replaceAllUsesWith(V); + P->eraseFromParent(); + ++NumPHIsElim; + return true; + } + return false; + } + + if (CastInst *CI = dyn_cast<CastInst>(I)) { + // If the source of the cast is a constant, then this should have + // already been constant folded. The only reason NOT to constant fold + // it is if something (e.g. LSR) was careful to place the constant + // evaluation in a block other than then one that uses it (e.g. to hoist + // the address of globals out of a loop). If this is the case, we don't + // want to forward-subst the cast. + if (isa<Constant>(CI->getOperand(0))) + return false; + + if (TLI && OptimizeNoopCopyExpression(CI, *TLI)) + return true; + + if (isa<ZExtInst>(I) || isa<SExtInst>(I)) { + /// Sink a zext or sext into its user blocks if the target type doesn't + /// fit in one register + if (TLI && TLI->getTypeAction(CI->getContext(), + TLI->getValueType(CI->getType())) == + TargetLowering::TypeExpandInteger) { + return SinkCast(CI); + } else { + bool MadeChange = MoveExtToFormExtLoad(I); + return MadeChange | OptimizeExtUses(I); + } + } + return false; + } + + if (CmpInst *CI = dyn_cast<CmpInst>(I)) + if (!TLI || !TLI->hasMultipleConditionRegisters()) + return OptimizeCmpExpression(CI); + + if (LoadInst *LI = dyn_cast<LoadInst>(I)) { + if (TLI) + return OptimizeMemoryInst(I, I->getOperand(0), LI->getType()); + return false; + } + + if (StoreInst *SI = dyn_cast<StoreInst>(I)) { + if (TLI) + return OptimizeMemoryInst(I, SI->getOperand(1), + SI->getOperand(0)->getType()); + return false; + } + + BinaryOperator *BinOp = dyn_cast<BinaryOperator>(I); + + if (BinOp && (BinOp->getOpcode() == Instruction::AShr || + BinOp->getOpcode() == Instruction::LShr)) { + ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1)); + if (TLI && CI && TLI->hasExtractBitsInsn()) + return OptimizeExtractBits(BinOp, CI, *TLI); + + return false; + } + + if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) { + if (GEPI->hasAllZeroIndices()) { + /// The GEP operand must be a pointer, so must its result -> BitCast + Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(), + GEPI->getName(), GEPI); + GEPI->replaceAllUsesWith(NC); + GEPI->eraseFromParent(); + ++NumGEPsElim; + OptimizeInst(NC); + return true; + } + return false; + } + + if (CallInst *CI = dyn_cast<CallInst>(I)) + return OptimizeCallInst(CI); + + if (SelectInst *SI = dyn_cast<SelectInst>(I)) + return OptimizeSelectInst(SI); + + if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I)) + return OptimizeShuffleVectorInst(SVI); + + return false; +} + +// In this pass we look for GEP and cast instructions that are used +// across basic blocks and rewrite them to improve basic-block-at-a-time +// selection. +bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) { + SunkAddrs.clear(); + bool MadeChange = false; + + CurInstIterator = BB.begin(); + while (CurInstIterator != BB.end()) + MadeChange |= OptimizeInst(CurInstIterator++); + + MadeChange |= DupRetToEnableTailCallOpts(&BB); + + return MadeChange; +} + +// llvm.dbg.value is far away from the value then iSel may not be able +// handle it properly. iSel will drop llvm.dbg.value if it can not +// find a node corresponding to the value. +bool CodeGenPrepare::PlaceDbgValues(Function &F) { + bool MadeChange = false; + for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { + Instruction *PrevNonDbgInst = nullptr; + for (BasicBlock::iterator BI = I->begin(), BE = I->end(); BI != BE;) { + Instruction *Insn = BI; ++BI; + DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn); + // Leave dbg.values that refer to an alloca alone. These + // instrinsics describe the address of a variable (= the alloca) + // being taken. They should not be moved next to the alloca + // (and to the beginning of the scope), but rather stay close to + // where said address is used. + if (!DVI || (DVI->getValue() && isa<AllocaInst>(DVI->getValue()))) { + PrevNonDbgInst = Insn; + continue; + } + + Instruction *VI = dyn_cast_or_null<Instruction>(DVI->getValue()); + if (VI && VI != PrevNonDbgInst && !VI->isTerminator()) { + DEBUG(dbgs() << "Moving Debug Value before :\n" << *DVI << ' ' << *VI); + DVI->removeFromParent(); + if (isa<PHINode>(VI)) + DVI->insertBefore(VI->getParent()->getFirstInsertionPt()); + else + DVI->insertAfter(VI); + MadeChange = true; + ++NumDbgValueMoved; + } + } + } + return MadeChange; +} + +// If there is a sequence that branches based on comparing a single bit +// against zero that can be combined into a single instruction, and the +// target supports folding these into a single instruction, sink the +// mask and compare into the branch uses. Do this before OptimizeBlock -> +// OptimizeInst -> OptimizeCmpExpression, which perturbs the pattern being +// searched for. +bool CodeGenPrepare::sinkAndCmp(Function &F) { + if (!EnableAndCmpSinking) + return false; + if (!TLI || !TLI->isMaskAndBranchFoldingLegal()) + return false; + bool MadeChange = false; + for (Function::iterator I = F.begin(), E = F.end(); I != E; ) { + BasicBlock *BB = I++; + + // Does this BB end with the following? + // %andVal = and %val, #single-bit-set + // %icmpVal = icmp %andResult, 0 + // br i1 %cmpVal label %dest1, label %dest2" + BranchInst *Brcc = dyn_cast<BranchInst>(BB->getTerminator()); + if (!Brcc || !Brcc->isConditional()) + continue; + ICmpInst *Cmp = dyn_cast<ICmpInst>(Brcc->getOperand(0)); + if (!Cmp || Cmp->getParent() != BB) + continue; + ConstantInt *Zero = dyn_cast<ConstantInt>(Cmp->getOperand(1)); + if (!Zero || !Zero->isZero()) + continue; + Instruction *And = dyn_cast<Instruction>(Cmp->getOperand(0)); + if (!And || And->getOpcode() != Instruction::And || And->getParent() != BB) + continue; + ConstantInt* Mask = dyn_cast<ConstantInt>(And->getOperand(1)); + if (!Mask || !Mask->getUniqueInteger().isPowerOf2()) + continue; + DEBUG(dbgs() << "found and; icmp ?,0; brcc\n"); DEBUG(BB->dump()); + + // Push the "and; icmp" for any users that are conditional branches. + // Since there can only be one branch use per BB, we don't need to keep + // track of which BBs we insert into. + for (Value::use_iterator UI = Cmp->use_begin(), E = Cmp->use_end(); + UI != E; ) { + Use &TheUse = *UI; + // Find brcc use. + BranchInst *BrccUser = dyn_cast<BranchInst>(*UI); + ++UI; + if (!BrccUser || !BrccUser->isConditional()) + continue; + BasicBlock *UserBB = BrccUser->getParent(); + if (UserBB == BB) continue; + DEBUG(dbgs() << "found Brcc use\n"); + + // Sink the "and; icmp" to use. + MadeChange = true; + BinaryOperator *NewAnd = + BinaryOperator::CreateAnd(And->getOperand(0), And->getOperand(1), "", + BrccUser); + CmpInst *NewCmp = + CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(), NewAnd, Zero, + "", BrccUser); + TheUse = NewCmp; + ++NumAndCmpsMoved; + DEBUG(BrccUser->getParent()->dump()); + } + } + return MadeChange; +} diff --git a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp index 18c8e0a..d3ffcc7 100644 --- a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -13,7 +13,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "post-RA-sched" #include "CriticalAntiDepBreaker.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -26,6 +25,8 @@ using namespace llvm; +#define DEBUG_TYPE "post-RA-sched" + CriticalAntiDepBreaker:: CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo &RCI) : AntiDepBreaker(), MF(MFi), @@ -33,7 +34,7 @@ CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo &RCI) : TII(MF.getTarget().getInstrInfo()), TRI(MF.getTarget().getRegisterInfo()), RegClassInfo(RCI), - Classes(TRI->getNumRegs(), static_cast<const TargetRegisterClass *>(0)), + Classes(TRI->getNumRegs(), nullptr), KillIndices(TRI->getNumRegs(), 0), DefIndices(TRI->getNumRegs(), 0), KeepRegs(TRI->getNumRegs(), false) {} @@ -45,7 +46,7 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { const unsigned BBSize = BB->size(); for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i) { // Clear out the register class data. - Classes[i] = static_cast<const TargetRegisterClass *>(0); + Classes[i] = nullptr; // Initialize the indices to indicate that no registers are live. KillIndices[i] = ~0u; @@ -75,7 +76,7 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { // callee-saved register that is not saved in the prolog. const MachineFrameInfo *MFI = MF.getFrameInfo(); BitVector Pristine = MFI->getPristineRegs(BB); - for (const uint16_t *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) { + for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) { if (!IsReturnBlock && !Pristine.test(*I)) continue; for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) { unsigned Reg = *AI; @@ -124,7 +125,7 @@ void CriticalAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count, /// CriticalPathStep - Return the next SUnit after SU on the bottom-up /// critical path. static const SDep *CriticalPathStep(const SUnit *SU) { - const SDep *Next = 0; + const SDep *Next = nullptr; unsigned NextDepth = 0; // Find the predecessor edge with the greatest depth. for (SUnit::const_pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end(); @@ -145,8 +146,8 @@ static const SDep *CriticalPathStep(const SUnit *SU) { void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) { // It's not safe to change register allocation for source operands of - // that have special allocation requirements. Also assume all registers - // used in a call must not be changed (ABI). + // instructions that have special allocation requirements. Also assume all + // registers used in a call must not be changed (ABI). // FIXME: The issue with predicated instruction is more complex. We are being // conservative here because the kill markers cannot be trusted after // if-conversion: @@ -171,7 +172,7 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) { if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (Reg == 0) continue; - const TargetRegisterClass *NewRC = 0; + const TargetRegisterClass *NewRC = nullptr; if (i < MI->getDesc().getNumOperands()) NewRC = TII->getRegClass(MI->getDesc(), i, TRI, MF); @@ -199,6 +200,28 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) { if (Classes[Reg] != reinterpret_cast<TargetRegisterClass *>(-1)) RegRefs.insert(std::make_pair(Reg, &MO)); + // If this reg is tied and live (Classes[Reg] is set to -1), we can't change + // it or any of its sub or super regs. We need to use KeepRegs to mark the + // reg because not all uses of the same reg within an instruction are + // necessarily tagged as tied. + // Example: an x86 "xor %eax, %eax" will have one source operand tied to the + // def register but not the second (see PR20020 for details). + // FIXME: can this check be relaxed to account for undef uses + // of a register? In the above 'xor' example, the uses of %eax are undef, so + // earlier instructions could still replace %eax even though the 'xor' + // itself can't be changed. + if (MI->isRegTiedToUseOperand(i) && + Classes[Reg] == reinterpret_cast<TargetRegisterClass *>(-1)) { + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) { + KeepRegs.set(*SubRegs); + } + for (MCSuperRegIterator SuperRegs(Reg, TRI); + SuperRegs.isValid(); ++SuperRegs) { + KeepRegs.set(*SuperRegs); + } + } + if (MO.isUse() && Special) { if (!KeepRegs.test(Reg)) { for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); @@ -227,7 +250,7 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI, DefIndices[i] = Count; KillIndices[i] = ~0u; KeepRegs.reset(i); - Classes[i] = 0; + Classes[i] = nullptr; RegRefs.erase(i); } @@ -235,16 +258,22 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI, unsigned Reg = MO.getReg(); if (Reg == 0) continue; if (!MO.isDef()) continue; + + // If we've already marked this reg as unchangeable, carry on. + if (KeepRegs.test(Reg)) continue; + // Ignore two-addr defs. if (MI->isRegTiedToUseOperand(i)) continue; + // FIXME: we should use a SubRegIterator that includes self (as above), so + // we don't have to repeat all this code for the reg itself. DefIndices[Reg] = Count; KillIndices[Reg] = ~0u; assert(((KillIndices[Reg] == ~0u) != (DefIndices[Reg] == ~0u)) && "Kill and Def maps aren't consistent for Reg!"); KeepRegs.reset(Reg); - Classes[Reg] = 0; + Classes[Reg] = nullptr; RegRefs.erase(Reg); // Repeat, for all subregs. for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { @@ -252,7 +281,7 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI, DefIndices[SubregReg] = Count; KillIndices[SubregReg] = ~0u; KeepRegs.reset(SubregReg); - Classes[SubregReg] = 0; + Classes[SubregReg] = nullptr; RegRefs.erase(SubregReg); } // Conservatively mark super-registers as unusable. @@ -267,7 +296,7 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI, if (Reg == 0) continue; if (!MO.isUse()) continue; - const TargetRegisterClass *NewRC = 0; + const TargetRegisterClass *NewRC = nullptr; if (i < MI->getDesc().getNumOperands()) NewRC = TII->getRegClass(MI->getDesc(), i, TRI, MF); @@ -280,6 +309,9 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI, RegRefs.insert(std::make_pair(Reg, &MO)); + // FIXME: we should use an MCRegAliasIterator that includes self so we don't + // have to repeat all this code for the reg itself. + // It wasn't previously live but now it is, this is a kill. if (KillIndices[Reg] == ~0u) { KillIndices[Reg] = Count; @@ -308,7 +340,7 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI, // the two-address instruction also defines NewReg, as may happen with // pre/postincrement loads. In this case, both the use and def operands are in // RegRefs because the def is inserted by PrescanInstruction and not erased -// during ScanInstruction. So checking for an instructions with definitions of +// during ScanInstruction. So checking for an instruction with definitions of // both NewReg and AntiDepReg covers it. bool CriticalAntiDepBreaker::isNewRegClobberedByRefs(RegRefIter RegRefBegin, @@ -324,7 +356,7 @@ CriticalAntiDepBreaker::isNewRegClobberedByRefs(RegRefIter RegRefBegin, if (RefOper->isDef() && RefOper->isEarlyClobber()) return true; - // Handle cases in which this instructions defines NewReg. + // Handle cases in which this instruction defines NewReg. MachineInstr *MI = RefOper->getParent(); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &CheckOper = MI->getOperand(i); @@ -342,11 +374,11 @@ CriticalAntiDepBreaker::isNewRegClobberedByRefs(RegRefIter RegRefBegin, return true; // Don't allow an instruction using AntiDepReg to be earlyclobbered by - // NewReg + // NewReg. if (CheckOper.isEarlyClobber()) return true; - // Don't allow inline asm to define NewReg at all. Who know what it's + // Don't allow inline asm to define NewReg at all. Who knows what it's // doing with it. if (MI->isInlineAsm()) return true; @@ -419,7 +451,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits, DenseMap<MachineInstr*,const SUnit*> MISUnitMap; // Find the node at the bottom of the critical path. - const SUnit *Max = 0; + const SUnit *Max = nullptr; for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { const SUnit *SU = &SUnits[i]; MISUnitMap[SU->getInstr()] = SU; @@ -493,8 +525,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits, // as we go to help determine which registers are available. unsigned Broken = 0; unsigned Count = InsertPosIndex - 1; - for (MachineBasicBlock::iterator I = End, E = Begin; - I != E; --Count) { + for (MachineBasicBlock::iterator I = End, E = Begin; I != E; --Count) { MachineInstr *MI = --I; if (MI->isDebugValue()) continue; @@ -525,7 +556,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits, // Don't break anti-dependencies on non-allocatable registers. AntiDepReg = 0; else if (KeepRegs.test(AntiDepReg)) - // Don't break anti-dependencies if an use down below requires + // Don't break anti-dependencies if a use down below requires // this exact register. AntiDepReg = 0; else { @@ -551,8 +582,8 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits, CriticalPathMI = CriticalPathSU->getInstr(); } else { // We've reached the end of the critical path. - CriticalPathSU = 0; - CriticalPathMI = 0; + CriticalPathSU = nullptr; + CriticalPathMI = nullptr; } } @@ -563,8 +594,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits, // If MI's defs have a special allocation requirement, don't allow // any def registers to be changed. Also assume all registers // defined in a call must not be changed (ABI). - if (MI->isCall() || MI->hasExtraDefRegAllocReq() || - TII->isPredicated(MI)) + if (MI->isCall() || MI->hasExtraDefRegAllocReq() || TII->isPredicated(MI)) // If this instruction's defs have special allocation requirement, don't // break this anti-dependency. AntiDepReg = 0; @@ -589,13 +619,14 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits, // Determine AntiDepReg's register class, if it is live and is // consistently used within a single class. - const TargetRegisterClass *RC = AntiDepReg != 0 ? Classes[AntiDepReg] : 0; - assert((AntiDepReg == 0 || RC != NULL) && + const TargetRegisterClass *RC = AntiDepReg != 0 ? Classes[AntiDepReg] + : nullptr; + assert((AntiDepReg == 0 || RC != nullptr) && "Register should be live if it's causing an anti-dependence!"); if (RC == reinterpret_cast<TargetRegisterClass *>(-1)) AntiDepReg = 0; - // Look for a suitable register to use to break the anti-depenence. + // Look for a suitable register to use to break the anti-dependence. // // TODO: Instead of picking the first free register, consider which might // be the best. @@ -638,7 +669,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits, (DefIndices[NewReg] == ~0u)) && "Kill and Def maps aren't consistent for NewReg!"); - Classes[AntiDepReg] = 0; + Classes[AntiDepReg] = nullptr; DefIndices[AntiDepReg] = KillIndices[AntiDepReg]; KillIndices[AntiDepReg] = ~0u; assert(((KillIndices[AntiDepReg] == ~0u) != diff --git a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h index 565d20b..45e4ff5 100644 --- a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h +++ b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h @@ -55,12 +55,12 @@ class TargetRegisterInfo; typedef std::multimap<unsigned, MachineOperand *>::const_iterator RegRefIter; - /// KillIndices - The index of the most recent kill (proceding bottom-up), + /// KillIndices - The index of the most recent kill (proceeding bottom-up), /// or ~0u if the register is not live. std::vector<unsigned> KillIndices; - /// DefIndices - The index of the most recent complete def (proceding bottom - /// up), or ~0u if the register is live. + /// DefIndices - The index of the most recent complete def (proceeding + /// bottom up), or ~0u if the register is live. std::vector<unsigned> DefIndices; /// KeepRegs - A set of registers which are live and cannot be changed to @@ -72,7 +72,7 @@ class TargetRegisterInfo; ~CriticalAntiDepBreaker(); /// Start - Initialize anti-dep breaking for a new basic block. - void StartBlock(MachineBasicBlock *BB); + void StartBlock(MachineBasicBlock *BB) override; /// BreakAntiDependencies - Identifiy anti-dependencies along the critical /// path @@ -82,15 +82,16 @@ class TargetRegisterInfo; MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned InsertPosIndex, - DbgValueVector &DbgValues); + DbgValueVector &DbgValues) override; /// Observe - Update liveness information to account for the current /// instruction, which will not be scheduled. /// - void Observe(MachineInstr *MI, unsigned Count, unsigned InsertPosIndex); + void Observe(MachineInstr *MI, unsigned Count, + unsigned InsertPosIndex) override; /// Finish - Finish anti-dep breaking for a basic block. - void FinishBlock(); + void FinishBlock() override; private: void PrescanInstruction(MachineInstr *MI); diff --git a/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp b/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp index 6619bcf..bc6e9dc 100644 --- a/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp +++ b/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp @@ -108,7 +108,7 @@ public: DefaultVLIWScheduler(MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT, bool IsPostRA); // Schedule - Actual scheduling work. - void schedule(); + void schedule() override; }; } @@ -121,7 +121,7 @@ DefaultVLIWScheduler::DefaultVLIWScheduler( void DefaultVLIWScheduler::schedule() { // Build the scheduling graph. - buildSchedGraph(0); + buildSchedGraph(nullptr); } // VLIWPacketizerList Ctor @@ -129,7 +129,7 @@ VLIWPacketizerList::VLIWPacketizerList( MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT, bool IsPostRA) : TM(MF.getTarget()), MF(MF) { TII = TM.getInstrInfo(); - ResourceTracker = TII->CreateTargetScheduleState(&TM, 0); + ResourceTracker = TII->CreateTargetScheduleState(&TM, nullptr); VLIWScheduler = new DefaultVLIWScheduler(MF, MLI, MDT, IsPostRA); } diff --git a/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp index 5efe1ff..2b144d8 100644 --- a/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "codegen-dce" #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -23,11 +22,13 @@ #include "llvm/Target/TargetMachine.h" using namespace llvm; +#define DEBUG_TYPE "codegen-dce" + STATISTIC(NumDeletes, "Number of dead instructions deleted"); namespace { class DeadMachineInstructionElim : public MachineFunctionPass { - virtual bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; const TargetRegisterInfo *TRI; const MachineRegisterInfo *MRI; @@ -59,7 +60,7 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const { // Don't delete instructions with side effects. bool SawStore = false; - if (!MI->isSafeToMove(TII, 0, SawStore) && !MI->isPHI()) + if (!MI->isSafeToMove(TII, nullptr, SawStore) && !MI->isPHI()) return false; // Examine each operand. @@ -84,6 +85,9 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const { } bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { + if (skipOptnoneFunction(*MF.getFunction())) + return false; + bool AnyChanges = false; MRI = &MF.getRegInfo(); TRI = MF.getTarget().getRegisterInfo(); @@ -127,17 +131,7 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { unsigned Reg = MO.getReg(); if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; - MachineRegisterInfo::use_iterator nextI; - for (MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg), - E = MRI->use_end(); I!=E; I=nextI) { - nextI = llvm::next(I); // I is invalidated by the setReg - MachineOperand& Use = I.getOperand(); - MachineInstr *UseMI = Use.getParent(); - if (UseMI==MI) - continue; - assert(Use.isDebug()); - UseMI->getOperand(0).setReg(0U); - } + MRI->markUsesInDebugValueAsUndef(Reg); } AnyChanges = true; MI->eraseFromParent(); diff --git a/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp b/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp index c7c1752..a195586 100644 --- a/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp +++ b/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp @@ -12,22 +12,23 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "dwarfehprepare" #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/Dominators.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/Pass.h" -#include "llvm/Support/CallSite.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/SSAUpdater.h" using namespace llvm; +#define DEBUG_TYPE "dwarfehprepare" + STATISTIC(NumResumesLowered, "Number of resume calls lowered"); namespace { @@ -42,16 +43,16 @@ namespace { public: static char ID; // Pass identification, replacement for typeid. - DwarfEHPrepare(const TargetMachine *TM) : - FunctionPass(ID), TM(TM), RewindFunction(0) { - initializeDominatorTreePass(*PassRegistry::getPassRegistry()); - } + DwarfEHPrepare(const TargetMachine *TM) + : FunctionPass(ID), TM(TM), RewindFunction(nullptr) { + initializeDominatorTreeWrapperPassPass(*PassRegistry::getPassRegistry()); + } - virtual bool runOnFunction(Function &Fn); + bool runOnFunction(Function &Fn) override; - virtual void getAnalysisUsage(AnalysisUsage &AU) const { } + void getAnalysisUsage(AnalysisUsage &AU) const override { } - const char *getPassName() const { + const char *getPassName() const override { return "Exception handling preparation"; } }; @@ -68,10 +69,10 @@ FunctionPass *llvm::createDwarfEHPass(const TargetMachine *TM) { /// instructions, including the 'resume' instruction. Value *DwarfEHPrepare::GetExceptionObject(ResumeInst *RI) { Value *V = RI->getOperand(0); - Value *ExnObj = 0; + Value *ExnObj = nullptr; InsertValueInst *SelIVI = dyn_cast<InsertValueInst>(V); - LoadInst *SelLoad = 0; - InsertValueInst *ExcIVI = 0; + LoadInst *SelLoad = nullptr; + InsertValueInst *ExcIVI = nullptr; bool EraseIVIs = false; if (SelIVI) { diff --git a/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp b/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp index 5447df0..c470632 100644 --- a/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp +++ b/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp @@ -16,7 +16,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "early-ifcvt" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" @@ -40,6 +39,8 @@ using namespace llvm; +#define DEBUG_TYPE "early-ifcvt" + // Absolute maximum number of instructions allowed per speculated block. // This bypasses all other heuristics, so it should be set fairly high. static cl::opt<unsigned> @@ -219,7 +220,7 @@ bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) { // We never speculate stores, so an AA pointer isn't necessary. bool DontMoveAcrossStore = true; - if (!I->isSafeToMove(TII, 0, DontMoveAcrossStore)) { + if (!I->isSafeToMove(TII, nullptr, DontMoveAcrossStore)) { DEBUG(dbgs() << "Can't speculate: " << *I); return false; } @@ -338,7 +339,7 @@ bool SSAIfConv::findInsertionPoint() { /// bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) { Head = MBB; - TBB = FBB = Tail = 0; + TBB = FBB = Tail = nullptr; if (Head->succ_size() != 2) return false; @@ -461,9 +462,9 @@ void SSAIfConv::replacePHIInstrs() { DEBUG(dbgs() << "If-converting " << *PI.PHI); unsigned DstReg = PI.PHI->getOperand(0).getReg(); TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, PI.FReg); - DEBUG(dbgs() << " --> " << *llvm::prior(FirstTerm)); + DEBUG(dbgs() << " --> " << *std::prev(FirstTerm)); PI.PHI->eraseFromParent(); - PI.PHI = 0; + PI.PHI = nullptr; } } @@ -482,7 +483,7 @@ void SSAIfConv::rewritePHIOperands() { unsigned PHIDst = PI.PHI->getOperand(0).getReg(); unsigned DstReg = MRI->createVirtualRegister(MRI->getRegClass(PHIDst)); TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, PI.FReg); - DEBUG(dbgs() << " --> " << *llvm::prior(FirstTerm)); + DEBUG(dbgs() << " --> " << *std::prev(FirstTerm)); // Rewrite PHI operands TPred -> (DstReg, Head), remove FPred. for (unsigned i = PI.PHI->getNumOperands(); i != 1; i -= 2) { @@ -564,7 +565,7 @@ void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) { // We need a branch to Tail, let code placement work it out later. DEBUG(dbgs() << "Converting to unconditional branch.\n"); SmallVector<MachineOperand, 0> EmptyCond; - TII->InsertBranch(*Head, Tail, 0, EmptyCond, HeadDL); + TII->InsertBranch(*Head, Tail, nullptr, EmptyCond, HeadDL); Head->addSuccessor(Tail); } DEBUG(dbgs() << *Head); @@ -590,9 +591,9 @@ class EarlyIfConverter : public MachineFunctionPass { public: static char ID; EarlyIfConverter() : MachineFunctionPass(ID) {} - void getAnalysisUsage(AnalysisUsage &AU) const; - bool runOnMachineFunction(MachineFunction &MF); - const char *getPassName() const { return "Early If-Conversion"; } + void getAnalysisUsage(AnalysisUsage &AU) const override; + bool runOnMachineFunction(MachineFunction &MF) override; + const char *getPassName() const override { return "Early If-Conversion"; } private: bool tryConvertIf(MachineBasicBlock*); @@ -775,6 +776,12 @@ bool EarlyIfConverter::tryConvertIf(MachineBasicBlock *MBB) { bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "********** EARLY IF-CONVERSION **********\n" << "********** Function: " << MF.getName() << '\n'); + // Only run if conversion if the target wants it. + if (!MF.getTarget() + .getSubtarget<TargetSubtargetInfo>() + .enableEarlyIfConversion()) + return false; + TII = MF.getTarget().getInstrInfo(); TRI = MF.getTarget().getRegisterInfo(); SchedModel = @@ -783,7 +790,7 @@ bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) { DomTree = &getAnalysis<MachineDominatorTree>(); Loops = getAnalysisIfAvailable<MachineLoopInfo>(); Traces = &getAnalysis<MachineTraceMetrics>(); - MinInstr = 0; + MinInstr = nullptr; bool Changed = false; IfConv.runOnMachineFunction(MF); diff --git a/contrib/llvm/lib/CodeGen/EdgeBundles.cpp b/contrib/llvm/lib/CodeGen/EdgeBundles.cpp index 3bb0465..aea7c31 100644 --- a/contrib/llvm/lib/CodeGen/EdgeBundles.cpp +++ b/contrib/llvm/lib/CodeGen/EdgeBundles.cpp @@ -41,9 +41,7 @@ bool EdgeBundles::runOnMachineFunction(MachineFunction &mf) { EC.clear(); EC.grow(2 * MF->getNumBlockIDs()); - for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E; - ++I) { - const MachineBasicBlock &MBB = *I; + for (const auto &MBB : *MF) { unsigned OutE = 2 * MBB.getNumber() + 1; // Join the outgoing bundle with the ingoing bundles of all successors. for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(), @@ -69,29 +67,31 @@ bool EdgeBundles::runOnMachineFunction(MachineFunction &mf) { return false; } -/// view - Visualize the annotated bipartite CFG with Graphviz. -void EdgeBundles::view() const { - ViewGraph(*this, "EdgeBundles"); -} - /// Specialize WriteGraph, the standard implementation won't work. -raw_ostream &llvm::WriteGraph(raw_ostream &O, const EdgeBundles &G, - bool ShortNames, - const Twine &Title) { +namespace llvm { +template<> +raw_ostream &WriteGraph<>(raw_ostream &O, const EdgeBundles &G, + bool ShortNames, + const Twine &Title) { const MachineFunction *MF = G.getMachineFunction(); O << "digraph {\n"; - for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); - I != E; ++I) { - unsigned BB = I->getNumber(); + for (const auto &MBB : *MF) { + unsigned BB = MBB.getNumber(); O << "\t\"BB#" << BB << "\" [ shape=box ]\n" << '\t' << G.getBundle(BB, false) << " -> \"BB#" << BB << "\"\n" << "\t\"BB#" << BB << "\" -> " << G.getBundle(BB, true) << '\n'; - for (MachineBasicBlock::const_succ_iterator SI = I->succ_begin(), - SE = I->succ_end(); SI != SE; ++SI) + for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(), + SE = MBB.succ_end(); SI != SE; ++SI) O << "\t\"BB#" << BB << "\" -> \"BB#" << (*SI)->getNumber() << "\" [ color=lightgray ]\n"; } O << "}\n"; return O; } +} + +/// view - Visualize the annotated bipartite CFG with Graphviz. +void EdgeBundles::view() const { + ViewGraph(*this, "EdgeBundles"); +} diff --git a/contrib/llvm/lib/CodeGen/ErlangGC.cpp b/contrib/llvm/lib/CodeGen/ErlangGC.cpp index 8a1e2d9..e976d7f 100644 --- a/contrib/llvm/lib/CodeGen/ErlangGC.cpp +++ b/contrib/llvm/lib/CodeGen/ErlangGC.cpp @@ -32,7 +32,7 @@ namespace { DebugLoc DL) const; public: ErlangGC(); - bool findCustomSafePoints(GCFunctionInfo &FI, MachineFunction &MF); + bool findCustomSafePoints(GCFunctionInfo &FI, MachineFunction &MF) override; }; } diff --git a/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp b/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp index 031f19c..cf55b68 100644 --- a/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp +++ b/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp @@ -20,10 +20,9 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "execution-fix" #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/PostOrderIterator.h" -#include "llvm/CodeGen/LiveRegUnits.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Allocator.h" @@ -33,6 +32,8 @@ #include "llvm/Target/TargetMachine.h" using namespace llvm; +#define DEBUG_TYPE "execution-fix" + /// A DomainValue is a bit like LiveIntervals' ValNo, but it also keeps track /// of execution domains. /// @@ -100,7 +101,7 @@ struct DomainValue { // Clear this DomainValue and point to next which has all its data. void clear() { AvailableDomains = 0; - Next = 0; + Next = nullptr; Instrs.clear(); } }; @@ -141,7 +142,7 @@ class ExeDepsFix : public MachineFunctionPass { std::vector<std::pair<MachineInstr*, unsigned> > UndefReads; /// Storage for register unit liveness. - LiveRegUnits LiveUnits; + LivePhysRegs LiveRegSet; /// Current instruction number. /// The first instruction in each basic block is 0. @@ -155,14 +156,14 @@ public: ExeDepsFix(const TargetRegisterClass *rc) : MachineFunctionPass(ID), RC(rc), NumRegs(RC->getNumRegs()) {} - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); MachineFunctionPass::getAnalysisUsage(AU); } - virtual bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; - virtual const char *getPassName() const { + const char *getPassName() const override { return "Execution dependency fix"; } @@ -275,7 +276,7 @@ void ExeDepsFix::kill(int rx) { return; release(LiveRegs[rx].Value); - LiveRegs[rx].Value = 0; + LiveRegs[rx].Value = nullptr; } /// Force register rx into domain. @@ -352,7 +353,7 @@ void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) { // Set up UndefReads to track undefined register reads. UndefReads.clear(); - LiveUnits.clear(); + LiveRegSet.clear(); // Set up LiveRegs to represent registers entering MBB. if (!LiveRegs) @@ -360,7 +361,7 @@ void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) { // Default values are 'nothing happened a long time ago'. for (unsigned rx = 0; rx != NumRegs; ++rx) { - LiveRegs[rx].Value = 0; + LiveRegs[rx].Value = nullptr; LiveRegs[rx].Def = -(1 << 20); } @@ -404,7 +405,7 @@ void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) { // We have a live DomainValue from more than one predecessor. if (LiveRegs[rx].Value->isCollapsed()) { - // We are already collapsed, but predecessor is not. Force him. + // We are already collapsed, but predecessor is not. Force it. unsigned Domain = LiveRegs[rx].Value->getFirstDomain(); if (!pdv->isCollapsed() && pdv->hasDomain(Domain)) collapse(pdv, Domain); @@ -440,7 +441,7 @@ void ExeDepsFix::leaveBasicBlock(MachineBasicBlock *MBB) { release(LiveRegs[i].Value); delete[] LiveRegs; } - LiveRegs = 0; + LiveRegs = nullptr; } void ExeDepsFix::visitInstr(MachineInstr *MI) { @@ -547,21 +548,19 @@ void ExeDepsFix::processUndefReads(MachineBasicBlock *MBB) { return; // Collect this block's live out register units. - LiveUnits.init(TRI); - for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(), - SE = MBB->succ_end(); SI != SE; ++SI) { - LiveUnits.addLiveIns(*SI, *TRI); - } + LiveRegSet.init(TRI); + LiveRegSet.addLiveOuts(MBB); + MachineInstr *UndefMI = UndefReads.back().first; unsigned OpIdx = UndefReads.back().second; for (MachineBasicBlock::reverse_iterator I = MBB->rbegin(), E = MBB->rend(); I != E; ++I) { - // Update liveness, including the current instrucion's defs. - LiveUnits.stepBackward(*I, *TRI); + // Update liveness, including the current instruction's defs. + LiveRegSet.stepBackward(*I); if (UndefMI == &*I) { - if (!LiveUnits.contains(UndefMI->getOperand(OpIdx).getReg(), *TRI)) + if (!LiveRegSet.contains(UndefMI->getOperand(OpIdx).getReg())) TII->breakPartialRegDependency(UndefMI, OpIdx, TRI); UndefReads.pop_back(); @@ -666,7 +665,7 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { // doms are now sorted in order of appearance. Try to merge them all, giving // priority to the latest ones. - DomainValue *dv = 0; + DomainValue *dv = nullptr; while (!Regs.empty()) { if (!dv) { dv = Regs.pop_back_val().Value; @@ -716,7 +715,7 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) { MF = &mf; TII = MF->getTarget().getInstrInfo(); TRI = MF->getTarget().getRegisterInfo(); - LiveRegs = 0; + LiveRegs = nullptr; assert(NumRegs == RC->getNumRegs() && "Bad regclass"); DEBUG(dbgs() << "********** FIX EXECUTION DEPENDENCIES: " diff --git a/contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp b/contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp index b2b6882..90b62b5 100644 --- a/contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp +++ b/contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp @@ -14,7 +14,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "expand-isel-pseudos" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -23,6 +22,8 @@ #include "llvm/Target/TargetMachine.h" using namespace llvm; +#define DEBUG_TYPE "expand-isel-pseudos" + namespace { class ExpandISelPseudos : public MachineFunctionPass { public: @@ -30,9 +31,9 @@ namespace { ExpandISelPseudos() : MachineFunctionPass(ID) {} private: - virtual bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { MachineFunctionPass::getAnalysisUsage(AU); } }; diff --git a/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp b/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp index 6c73fff..8969bcc 100644 --- a/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp +++ b/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "postrapseudos" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" @@ -25,6 +24,8 @@ #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; +#define DEBUG_TYPE "postrapseudos" + namespace { struct ExpandPostRA : public MachineFunctionPass { private: @@ -35,7 +36,7 @@ public: static char ID; // Pass identification, replacement for typeid ExpandPostRA() : MachineFunctionPass(ID) {} - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); AU.addPreservedID(MachineLoopInfoID); AU.addPreservedID(MachineDominatorsID); @@ -43,7 +44,7 @@ public: } /// runOnMachineFunction - pass entry point - bool runOnMachineFunction(MachineFunction&); + bool runOnMachineFunction(MachineFunction&) override; private: bool LowerSubregToReg(MachineInstr *MI); diff --git a/contrib/llvm/lib/CodeGen/GCMetadata.cpp b/contrib/llvm/lib/CodeGen/GCMetadata.cpp index ef5247c..c3e4f3e 100644 --- a/contrib/llvm/lib/CodeGen/GCMetadata.cpp +++ b/contrib/llvm/lib/CodeGen/GCMetadata.cpp @@ -32,12 +32,12 @@ namespace { public: explicit Printer(raw_ostream &OS) : FunctionPass(ID), OS(OS) {} - - const char *getPassName() const; - void getAnalysisUsage(AnalysisUsage &AU) const; - - bool runOnFunction(Function &F); - bool doFinalization(Module &M); + + const char *getPassName() const override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + + bool runOnFunction(Function &F) override; + bool doFinalization(Module &M) override; }; } @@ -61,10 +61,6 @@ GCModuleInfo::GCModuleInfo() initializeGCModuleInfoPass(*PassRegistry::getPassRegistry()); } -GCModuleInfo::~GCModuleInfo() { - clear(); -} - GCStrategy *GCModuleInfo::getOrCreateStrategy(const Module *M, const std::string &Name) { strategy_map_type::iterator NMI = StrategyMap.find(Name); @@ -74,17 +70,17 @@ GCStrategy *GCModuleInfo::getOrCreateStrategy(const Module *M, for (GCRegistry::iterator I = GCRegistry::begin(), E = GCRegistry::end(); I != E; ++I) { if (Name == I->getName()) { - GCStrategy *S = I->instantiate(); + std::unique_ptr<GCStrategy> S = I->instantiate(); S->M = M; S->Name = Name; - StrategyMap.GetOrCreateValue(Name).setValue(S); - StrategyList.push_back(S); - return S; + StrategyMap.GetOrCreateValue(Name).setValue(S.get()); + StrategyList.push_back(std::move(S)); + return StrategyList.back().get(); } } dbgs() << "unsupported GC: " << Name << "\n"; - llvm_unreachable(0); + llvm_unreachable(nullptr); } GCFunctionInfo &GCModuleInfo::getFunctionInfo(const Function &F) { @@ -104,9 +100,6 @@ GCFunctionInfo &GCModuleInfo::getFunctionInfo(const Function &F) { void GCModuleInfo::clear() { FInfoMap.clear(); StrategyMap.clear(); - - for (iterator I = begin(), E = end(); I != E; ++I) - delete *I; StrategyList.clear(); } diff --git a/contrib/llvm/lib/CodeGen/GCStrategy.cpp b/contrib/llvm/lib/CodeGen/GCStrategy.cpp index 1173d11..1fdff6b 100644 --- a/contrib/llvm/lib/CodeGen/GCStrategy.cpp +++ b/contrib/llvm/lib/CodeGen/GCStrategy.cpp @@ -16,13 +16,12 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/GCStrategy.h" -#include "llvm/Analysis/DominatorInternals.h" -#include "llvm/Analysis/Dominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" @@ -53,11 +52,11 @@ namespace { static char ID; LowerIntrinsics(); - const char *getPassName() const; - void getAnalysisUsage(AnalysisUsage &AU) const; + const char *getPassName() const override; + void getAnalysisUsage(AnalysisUsage &AU) const override; - bool doInitialization(Module &M); - bool runOnFunction(Function &F); + bool doInitialization(Module &M) override; + bool runOnFunction(Function &F) override; }; @@ -83,9 +82,9 @@ namespace { static char ID; GCMachineCodeAnalysis(); - void getAnalysisUsage(AnalysisUsage &AU) const; + void getAnalysisUsage(AnalysisUsage &AU) const override; - bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; }; } @@ -102,13 +101,6 @@ GCStrategy::GCStrategy() : UsesMetadata(false) {} -GCStrategy::~GCStrategy() { - for (iterator I = begin(), E = end(); I != E; ++I) - delete *I; - - Functions.clear(); -} - bool GCStrategy::initializeCustomLowering(Module &M) { return false; } bool GCStrategy::performCustomLowering(Function &F) { @@ -119,14 +111,13 @@ bool GCStrategy::performCustomLowering(Function &F) { bool GCStrategy::findCustomSafePoints(GCFunctionInfo& FI, MachineFunction &F) { dbgs() << "gc " << getName() << " must override findCustomSafePoints.\n"; - llvm_unreachable(0); + llvm_unreachable(nullptr); } GCFunctionInfo *GCStrategy::insertFunctionInfo(const Function &F) { - GCFunctionInfo *FI = new GCFunctionInfo(F, *this); - Functions.push_back(FI); - return FI; + Functions.push_back(make_unique<GCFunctionInfo>(F, *this)); + return Functions.back().get(); } // ----------------------------------------------------------------------------- @@ -154,7 +145,7 @@ const char *LowerIntrinsics::getPassName() const { void LowerIntrinsics::getAnalysisUsage(AnalysisUsage &AU) const { FunctionPass::getAnalysisUsage(AU); AU.addRequired<GCModuleInfo>(); - AU.addPreserved<DominatorTree>(); + AU.addPreserved<DominatorTreeWrapperPass>(); } /// doInitialization - If this module uses the GC intrinsics, find them now. @@ -271,8 +262,9 @@ bool LowerIntrinsics::runOnFunction(Function &F) { // Custom lowering may modify the CFG, so dominators must be recomputed. if (UseCustomLoweringPass) { - if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) - DT->DT->recalculate(F); + if (DominatorTreeWrapperPass *DTWP = + getAnalysisIfAvailable<DominatorTreeWrapperPass>()) + DTWP->getDomTree().recalculate(F); } return MadeChange; diff --git a/contrib/llvm/lib/CodeGen/GlobalMerge.cpp b/contrib/llvm/lib/CodeGen/GlobalMerge.cpp new file mode 100644 index 0000000..5572a06 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/GlobalMerge.cpp @@ -0,0 +1,361 @@ +//===-- GlobalMerge.cpp - Internal globals merging -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This pass merges globals with internal linkage into one. This way all the +// globals which were merged into a biggest one can be addressed using offsets +// from the same base pointer (no need for separate base pointer for each of the +// global). Such a transformation can significantly reduce the register pressure +// when many globals are involved. +// +// For example, consider the code which touches several global variables at +// once: +// +// static int foo[N], bar[N], baz[N]; +// +// for (i = 0; i < N; ++i) { +// foo[i] = bar[i] * baz[i]; +// } +// +// On ARM the addresses of 3 arrays should be kept in the registers, thus +// this code has quite large register pressure (loop body): +// +// ldr r1, [r5], #4 +// ldr r2, [r6], #4 +// mul r1, r2, r1 +// str r1, [r0], #4 +// +// Pass converts the code to something like: +// +// static struct { +// int foo[N]; +// int bar[N]; +// int baz[N]; +// } merged; +// +// for (i = 0; i < N; ++i) { +// merged.foo[i] = merged.bar[i] * merged.baz[i]; +// } +// +// and in ARM code this becomes: +// +// ldr r0, [r5, #40] +// ldr r1, [r5, #80] +// mul r0, r1, r0 +// str r0, [r5], #4 +// +// note that we saved 2 registers here almostly "for free". +// ===---------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +using namespace llvm; + +#define DEBUG_TYPE "global-merge" + +static cl::opt<bool> +EnableGlobalMerge("enable-global-merge", cl::Hidden, + cl::desc("Enable global merge pass"), + cl::init(true)); + +static cl::opt<bool> +EnableGlobalMergeOnConst("global-merge-on-const", cl::Hidden, + cl::desc("Enable global merge pass on constants"), + cl::init(false)); + +// FIXME: this could be a transitional option, and we probably need to remove +// it if only we are sure this optimization could always benefit all targets. +static cl::opt<bool> +EnableGlobalMergeOnExternal("global-merge-on-external", cl::Hidden, + cl::desc("Enable global merge pass on external linkage"), + cl::init(false)); + +STATISTIC(NumMerged , "Number of globals merged"); +namespace { + class GlobalMerge : public FunctionPass { + const TargetMachine *TM; + + bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals, + Module &M, bool isConst, unsigned AddrSpace) const; + + /// \brief Check if the given variable has been identified as must keep + /// \pre setMustKeepGlobalVariables must have been called on the Module that + /// contains GV + bool isMustKeepGlobalVariable(const GlobalVariable *GV) const { + return MustKeepGlobalVariables.count(GV); + } + + /// Collect every variables marked as "used" or used in a landing pad + /// instruction for this Module. + void setMustKeepGlobalVariables(Module &M); + + /// Collect every variables marked as "used" + void collectUsedGlobalVariables(Module &M); + + /// Keep track of the GlobalVariable that must not be merged away + SmallPtrSet<const GlobalVariable *, 16> MustKeepGlobalVariables; + + public: + static char ID; // Pass identification, replacement for typeid. + explicit GlobalMerge(const TargetMachine *TM = nullptr) + : FunctionPass(ID), TM(TM) { + initializeGlobalMergePass(*PassRegistry::getPassRegistry()); + } + + bool doInitialization(Module &M) override; + bool runOnFunction(Function &F) override; + bool doFinalization(Module &M) override; + + const char *getPassName() const override { + return "Merge internal globals"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + FunctionPass::getAnalysisUsage(AU); + } + }; +} // end anonymous namespace + +char GlobalMerge::ID = 0; +INITIALIZE_TM_PASS(GlobalMerge, "global-merge", "Merge global variables", + false, false) + +bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals, + Module &M, bool isConst, unsigned AddrSpace) const { + const TargetLowering *TLI = TM->getTargetLowering(); + const DataLayout *DL = TLI->getDataLayout(); + + // FIXME: Infer the maximum possible offset depending on the actual users + // (these max offsets are different for the users inside Thumb or ARM + // functions) + unsigned MaxOffset = TLI->getMaximalGlobalOffset(); + + // FIXME: Find better heuristics + std::stable_sort(Globals.begin(), Globals.end(), + [DL](const GlobalVariable *GV1, const GlobalVariable *GV2) { + Type *Ty1 = cast<PointerType>(GV1->getType())->getElementType(); + Type *Ty2 = cast<PointerType>(GV2->getType())->getElementType(); + + return (DL->getTypeAllocSize(Ty1) < DL->getTypeAllocSize(Ty2)); + }); + + Type *Int32Ty = Type::getInt32Ty(M.getContext()); + + assert(Globals.size() > 1); + + // FIXME: This simple solution merges globals all together as maximum as + // possible. However, with this solution it would be hard to remove dead + // global symbols at link-time. An alternative solution could be checking + // global symbols references function by function, and make the symbols + // being referred in the same function merged and we would probably need + // to introduce heuristic algorithm to solve the merge conflict from + // different functions. + for (size_t i = 0, e = Globals.size(); i != e; ) { + size_t j = 0; + uint64_t MergedSize = 0; + std::vector<Type*> Tys; + std::vector<Constant*> Inits; + + bool HasExternal = false; + GlobalVariable *TheFirstExternal = 0; + for (j = i; j != e; ++j) { + Type *Ty = Globals[j]->getType()->getElementType(); + MergedSize += DL->getTypeAllocSize(Ty); + if (MergedSize > MaxOffset) { + break; + } + Tys.push_back(Ty); + Inits.push_back(Globals[j]->getInitializer()); + + if (Globals[j]->hasExternalLinkage() && !HasExternal) { + HasExternal = true; + TheFirstExternal = Globals[j]; + } + } + + // If merged variables doesn't have external linkage, we needn't to expose + // the symbol after merging. + GlobalValue::LinkageTypes Linkage = HasExternal + ? GlobalValue::ExternalLinkage + : GlobalValue::InternalLinkage; + + StructType *MergedTy = StructType::get(M.getContext(), Tys); + Constant *MergedInit = ConstantStruct::get(MergedTy, Inits); + + // If merged variables have external linkage, we use symbol name of the + // first variable merged as the suffix of global symbol name. This would + // be able to avoid the link-time naming conflict for globalm symbols. + GlobalVariable *MergedGV = new GlobalVariable( + M, MergedTy, isConst, Linkage, MergedInit, + HasExternal ? "_MergedGlobals_" + TheFirstExternal->getName() + : "_MergedGlobals", + nullptr, GlobalVariable::NotThreadLocal, AddrSpace); + + for (size_t k = i; k < j; ++k) { + GlobalValue::LinkageTypes Linkage = Globals[k]->getLinkage(); + std::string Name = Globals[k]->getName(); + + Constant *Idx[2] = { + ConstantInt::get(Int32Ty, 0), + ConstantInt::get(Int32Ty, k-i) + }; + Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(MergedGV, Idx); + Globals[k]->replaceAllUsesWith(GEP); + Globals[k]->eraseFromParent(); + + if (Linkage != GlobalValue::InternalLinkage) { + // Generate a new alias... + auto *PTy = cast<PointerType>(GEP->getType()); + GlobalAlias::create(PTy->getElementType(), PTy->getAddressSpace(), + Linkage, Name, GEP, &M); + } + + NumMerged++; + } + i = j; + } + + return true; +} + +void GlobalMerge::collectUsedGlobalVariables(Module &M) { + // Extract global variables from llvm.used array + const GlobalVariable *GV = M.getGlobalVariable("llvm.used"); + if (!GV || !GV->hasInitializer()) return; + + // Should be an array of 'i8*'. + const ConstantArray *InitList = cast<ConstantArray>(GV->getInitializer()); + + for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) + if (const GlobalVariable *G = + dyn_cast<GlobalVariable>(InitList->getOperand(i)->stripPointerCasts())) + MustKeepGlobalVariables.insert(G); +} + +void GlobalMerge::setMustKeepGlobalVariables(Module &M) { + collectUsedGlobalVariables(M); + + for (Module::iterator IFn = M.begin(), IEndFn = M.end(); IFn != IEndFn; + ++IFn) { + for (Function::iterator IBB = IFn->begin(), IEndBB = IFn->end(); + IBB != IEndBB; ++IBB) { + // Follow the invoke link to find the landing pad instruction + const InvokeInst *II = dyn_cast<InvokeInst>(IBB->getTerminator()); + if (!II) continue; + + const LandingPadInst *LPInst = II->getUnwindDest()->getLandingPadInst(); + // Look for globals in the clauses of the landing pad instruction + for (unsigned Idx = 0, NumClauses = LPInst->getNumClauses(); + Idx != NumClauses; ++Idx) + if (const GlobalVariable *GV = + dyn_cast<GlobalVariable>(LPInst->getClause(Idx) + ->stripPointerCasts())) + MustKeepGlobalVariables.insert(GV); + } + } +} + +bool GlobalMerge::doInitialization(Module &M) { + if (!EnableGlobalMerge) + return false; + + DenseMap<unsigned, SmallVector<GlobalVariable*, 16> > Globals, ConstGlobals, + BSSGlobals; + const TargetLowering *TLI = TM->getTargetLowering(); + const DataLayout *DL = TLI->getDataLayout(); + unsigned MaxOffset = TLI->getMaximalGlobalOffset(); + bool Changed = false; + setMustKeepGlobalVariables(M); + + // Grab all non-const globals. + for (Module::global_iterator I = M.global_begin(), + E = M.global_end(); I != E; ++I) { + // Merge is safe for "normal" internal or external globals only + if (I->isDeclaration() || I->isThreadLocal() || I->hasSection()) + continue; + + if (!(EnableGlobalMergeOnExternal && I->hasExternalLinkage()) && + !I->hasInternalLinkage()) + continue; + + PointerType *PT = dyn_cast<PointerType>(I->getType()); + assert(PT && "Global variable is not a pointer!"); + + unsigned AddressSpace = PT->getAddressSpace(); + + // Ignore fancy-aligned globals for now. + unsigned Alignment = DL->getPreferredAlignment(I); + Type *Ty = I->getType()->getElementType(); + if (Alignment > DL->getABITypeAlignment(Ty)) + continue; + + // Ignore all 'special' globals. + if (I->getName().startswith("llvm.") || + I->getName().startswith(".llvm.")) + continue; + + // Ignore all "required" globals: + if (isMustKeepGlobalVariable(I)) + continue; + + if (DL->getTypeAllocSize(Ty) < MaxOffset) { + if (TargetLoweringObjectFile::getKindForGlobal(I, *TM).isBSSLocal()) + BSSGlobals[AddressSpace].push_back(I); + else if (I->isConstant()) + ConstGlobals[AddressSpace].push_back(I); + else + Globals[AddressSpace].push_back(I); + } + } + + for (DenseMap<unsigned, SmallVector<GlobalVariable*, 16> >::iterator + I = Globals.begin(), E = Globals.end(); I != E; ++I) + if (I->second.size() > 1) + Changed |= doMerge(I->second, M, false, I->first); + + for (DenseMap<unsigned, SmallVector<GlobalVariable*, 16> >::iterator + I = BSSGlobals.begin(), E = BSSGlobals.end(); I != E; ++I) + if (I->second.size() > 1) + Changed |= doMerge(I->second, M, false, I->first); + + if (EnableGlobalMergeOnConst) + for (DenseMap<unsigned, SmallVector<GlobalVariable*, 16> >::iterator + I = ConstGlobals.begin(), E = ConstGlobals.end(); I != E; ++I) + if (I->second.size() > 1) + Changed |= doMerge(I->second, M, true, I->first); + + return Changed; +} + +bool GlobalMerge::runOnFunction(Function &F) { + return false; +} + +bool GlobalMerge::doFinalization(Module &M) { + MustKeepGlobalVariables.clear(); + return false; +} + +Pass *llvm::createGlobalMergePass(const TargetMachine *TM) { + return new GlobalMerge(TM); +} diff --git a/contrib/llvm/lib/CodeGen/IfConversion.cpp b/contrib/llvm/lib/CodeGen/IfConversion.cpp index e2d0eb4..1502d5f 100644 --- a/contrib/llvm/lib/CodeGen/IfConversion.cpp +++ b/contrib/llvm/lib/CodeGen/IfConversion.cpp @@ -11,19 +11,18 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "ifcvt" #include "llvm/CodeGen/Passes.h" #include "BranchFolding.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetSchedule.h" -#include "llvm/CodeGen/LiveRegUnits.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -37,6 +36,8 @@ using namespace llvm; +#define DEBUG_TYPE "ifcvt" + // Hidden options for help debugging. static cl::opt<int> IfCvtFnStart("ifcvt-fn-start", cl::init(-1), cl::Hidden); static cl::opt<int> IfCvtFnStop("ifcvt-fn-stop", cl::init(-1), cl::Hidden); @@ -127,7 +128,8 @@ namespace { IsAnalyzed(false), IsEnqueued(false), IsBrAnalyzable(false), HasFallThrough(false), IsUnpredicable(false), CannotBeCopied(false), ClobbersPred(false), NonPredSize(0), - ExtraCost(0), ExtraCost2(0), BB(0), TrueBB(0), FalseBB(0) {} + ExtraCost(0), ExtraCost2(0), BB(nullptr), TrueBB(nullptr), + FalseBB(nullptr) {} }; /// IfcvtToken - Record information about pending if-conversions to attempt: @@ -162,8 +164,8 @@ namespace { const MachineBranchProbabilityInfo *MBPI; MachineRegisterInfo *MRI; - LiveRegUnits Redefs; - LiveRegUnits DontKill; + LivePhysRegs Redefs; + LivePhysRegs DontKill; bool PreRegAlloc; bool MadeChange; @@ -174,12 +176,12 @@ namespace { initializeIfConverterPass(*PassRegistry::getPassRegistry()); } - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<MachineBranchProbabilityInfo>(); MachineFunctionPass::getAnalysisUsage(AU); } - virtual bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; private: bool ReverseBranchCondition(BBInfo &BBI); @@ -205,7 +207,7 @@ namespace { void PredicateBlock(BBInfo &BBI, MachineBasicBlock::iterator E, SmallVectorImpl<MachineOperand> &Cond, - SmallSet<unsigned, 4> *LaterRedefs = 0); + SmallSet<unsigned, 4> *LaterRedefs = nullptr); void CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, SmallVectorImpl<MachineOperand> &Cond, bool IgnoreBr = false); @@ -230,7 +232,7 @@ namespace { // blockAlwaysFallThrough - Block ends without a terminator. bool blockAlwaysFallThrough(BBInfo &BBI) const { - return BBI.IsBrAnalyzable && BBI.TrueBB == NULL; + return BBI.IsBrAnalyzable && BBI.TrueBB == nullptr; } // IfcvtTokenCmp - Used to sort if-conversion candidates. @@ -438,7 +440,7 @@ static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB, if (SuccBB != TrueBB) return SuccBB; } - return NULL; + return nullptr; } /// ReverseBranchCondition - Reverse the condition of the end of the block @@ -460,7 +462,7 @@ static inline MachineBasicBlock *getNextBlock(MachineBasicBlock *BB) { MachineFunction::iterator I = BB; MachineFunction::iterator E = BB->getParent()->end(); if (++I == E) - return NULL; + return nullptr; return I; } @@ -551,7 +553,7 @@ bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI, FT = getNextBlock(FalseBBI.BB); if (TT != FT) return false; - if (TT == NULL && (TrueBBI.IsBrAnalyzable || FalseBBI.IsBrAnalyzable)) + if (!TT && (TrueBBI.IsBrAnalyzable || FalseBBI.IsBrAnalyzable)) return false; if (TrueBBI.BB->pred_size() > 1 || FalseBBI.BB->pred_size() > 1) return false; @@ -641,11 +643,11 @@ void IfConverter::ScanInstructions(BBInfo &BBI) { bool AlreadyPredicated = !BBI.Predicate.empty(); // First analyze the end of BB branches. - BBI.TrueBB = BBI.FalseBB = NULL; + BBI.TrueBB = BBI.FalseBB = nullptr; BBI.BrCond.clear(); BBI.IsBrAnalyzable = !TII->AnalyzeBranch(*BBI.BB, BBI.TrueBB, BBI.FalseBB, BBI.BrCond); - BBI.HasFallThrough = BBI.IsBrAnalyzable && BBI.FalseBB == NULL; + BBI.HasFallThrough = BBI.IsBrAnalyzable && BBI.FalseBB == nullptr; if (BBI.BrCond.size()) { // No false branch. This BB must end with a conditional branch and a @@ -921,7 +923,7 @@ void IfConverter::AnalyzeBlocks(MachineFunction &MF, /// next block). static bool canFallThroughTo(MachineBasicBlock *BB, MachineBasicBlock *ToBB) { MachineFunction::iterator PI = BB; - MachineFunction::iterator I = llvm::next(PI); + MachineFunction::iterator I = std::next(PI); MachineFunction::iterator TI = ToBB; MachineFunction::iterator E = BB->getParent()->end(); while (I != TI) { @@ -954,13 +956,13 @@ static void InsertUncondBranch(MachineBasicBlock *BB, MachineBasicBlock *ToBB, const TargetInstrInfo *TII) { DebugLoc dl; // FIXME: this is nowhere SmallVector<MachineOperand, 0> NoCond; - TII->InsertBranch(*BB, ToBB, NULL, NoCond, dl); + TII->InsertBranch(*BB, ToBB, nullptr, NoCond, dl); } /// RemoveExtraEdges - Remove true / false edges if either / both are no longer /// successors. void IfConverter::RemoveExtraEdges(BBInfo &BBI) { - MachineBasicBlock *TBB = NULL, *FBB = NULL; + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector<MachineOperand, 4> Cond; if (!TII->AnalyzeBranch(*BBI.BB, TBB, FBB, Cond)) BBI.BB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty()); @@ -968,23 +970,22 @@ void IfConverter::RemoveExtraEdges(BBInfo &BBI) { /// Behaves like LiveRegUnits::StepForward() but also adds implicit uses to all /// values defined in MI which are not live/used by MI. -static void UpdatePredRedefs(MachineInstr *MI, LiveRegUnits &Redefs, - const TargetRegisterInfo *TRI) { +static void UpdatePredRedefs(MachineInstr *MI, LivePhysRegs &Redefs) { for (ConstMIBundleOperands Ops(MI); Ops.isValid(); ++Ops) { if (!Ops->isReg() || !Ops->isKill()) continue; unsigned Reg = Ops->getReg(); if (Reg == 0) continue; - Redefs.removeReg(Reg, *TRI); + Redefs.removeReg(Reg); } for (MIBundleOperands Ops(MI); Ops.isValid(); ++Ops) { if (!Ops->isReg() || !Ops->isDef()) continue; unsigned Reg = Ops->getReg(); - if (Reg == 0 || Redefs.contains(Reg, *TRI)) + if (Reg == 0 || Redefs.contains(Reg)) continue; - Redefs.addReg(Reg, *TRI); + Redefs.addReg(Reg); MachineOperand &Op = *Ops; MachineInstr *MI = Op.getParent(); @@ -996,12 +997,11 @@ static void UpdatePredRedefs(MachineInstr *MI, LiveRegUnits &Redefs, /** * Remove kill flags from operands with a registers in the @p DontKill set. */ -static void RemoveKills(MachineInstr &MI, const LiveRegUnits &DontKill, - const MCRegisterInfo &MCRI) { +static void RemoveKills(MachineInstr &MI, const LivePhysRegs &DontKill) { for (MIBundleOperands O(&MI); O.isValid(); ++O) { if (!O->isReg() || !O->isKill()) continue; - if (DontKill.contains(O->getReg(), MCRI)) + if (DontKill.contains(O->getReg())) O->setIsKill(false); } } @@ -1012,10 +1012,10 @@ static void RemoveKills(MachineInstr &MI, const LiveRegUnits &DontKill, */ static void RemoveKills(MachineBasicBlock::iterator I, MachineBasicBlock::iterator E, - const LiveRegUnits &DontKill, + const LivePhysRegs &DontKill, const MCRegisterInfo &MCRI) { for ( ; I != E; ++I) - RemoveKills(*I, DontKill, MCRI); + RemoveKills(*I, DontKill); } /// IfConvertSimple - If convert a simple (split, no rejoin) sub-CFG. @@ -1049,13 +1049,13 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) { // Initialize liveins to the first BB. These are potentiall redefined by // predicated instructions. Redefs.init(TRI); - Redefs.addLiveIns(CvtBBI->BB, *TRI); - Redefs.addLiveIns(NextBBI->BB, *TRI); + Redefs.addLiveIns(CvtBBI->BB); + Redefs.addLiveIns(NextBBI->BB); // Compute a set of registers which must not be killed by instructions in // BB1: This is everything live-in to BB2. DontKill.init(TRI); - DontKill.addLiveIns(NextBBI->BB, *TRI); + DontKill.addLiveIns(NextBBI->BB); if (CvtBBI->BB->pred_size() > 1) { BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); @@ -1104,6 +1104,28 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) { return true; } +/// Scale down weights to fit into uint32_t. NewTrue is the new weight +/// for successor TrueBB, and NewFalse is the new weight for successor +/// FalseBB. +static void ScaleWeights(uint64_t NewTrue, uint64_t NewFalse, + MachineBasicBlock *MBB, + const MachineBasicBlock *TrueBB, + const MachineBasicBlock *FalseBB, + const MachineBranchProbabilityInfo *MBPI) { + uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse; + uint32_t Scale = (NewMax / UINT32_MAX) + 1; + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); + SI != SE; ++SI) { + if (*SI == TrueBB) + MBB->setSuccWeight(SI, (uint32_t)(NewTrue / Scale)); + else if (*SI == FalseBB) + MBB->setSuccWeight(SI, (uint32_t)(NewFalse / Scale)); + else + MBB->setSuccWeight(SI, MBPI->getEdgeWeight(MBB, SI) / Scale); + } +} + /// IfConvertTriangle - If convert a triangle sub-CFG. /// bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { @@ -1154,12 +1176,22 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { // Initialize liveins to the first BB. These are potentially redefined by // predicated instructions. Redefs.init(TRI); - Redefs.addLiveIns(CvtBBI->BB, *TRI); - Redefs.addLiveIns(NextBBI->BB, *TRI); + Redefs.addLiveIns(CvtBBI->BB); + Redefs.addLiveIns(NextBBI->BB); DontKill.clear(); - bool HasEarlyExit = CvtBBI->FalseBB != NULL; + bool HasEarlyExit = CvtBBI->FalseBB != nullptr; + uint64_t CvtNext = 0, CvtFalse = 0, BBNext = 0, BBCvt = 0, SumWeight = 0; + uint32_t WeightScale = 0; + if (HasEarlyExit) { + // Get weights before modifying CvtBBI->BB and BBI.BB. + CvtNext = MBPI->getEdgeWeight(CvtBBI->BB, NextBBI->BB); + CvtFalse = MBPI->getEdgeWeight(CvtBBI->BB, CvtBBI->FalseBB); + BBNext = MBPI->getEdgeWeight(BBI.BB, NextBBI->BB); + BBCvt = MBPI->getEdgeWeight(BBI.BB, CvtBBI->BB); + SumWeight = MBPI->getSumForBlock(CvtBBI->BB, WeightScale); + } if (CvtBBI->BB->pred_size() > 1) { BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); // Copy instructions in the true block, predicate them, and add them to @@ -1185,8 +1217,22 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { CvtBBI->BrCond.end()); if (TII->ReverseBranchCondition(RevCond)) llvm_unreachable("Unable to reverse branch condition!"); - TII->InsertBranch(*BBI.BB, CvtBBI->FalseBB, NULL, RevCond, dl); + TII->InsertBranch(*BBI.BB, CvtBBI->FalseBB, nullptr, RevCond, dl); BBI.BB->addSuccessor(CvtBBI->FalseBB); + // Update the edge weight for both CvtBBI->FalseBB and NextBBI. + // New_Weight(BBI.BB, NextBBI->BB) = + // Weight(BBI.BB, NextBBI->BB) * getSumForBlock(CvtBBI->BB) + + // Weight(BBI.BB, CvtBBI->BB) * Weight(CvtBBI->BB, NextBBI->BB) + // New_Weight(BBI.BB, CvtBBI->FalseBB) = + // Weight(BBI.BB, CvtBBI->BB) * Weight(CvtBBI->BB, CvtBBI->FalseBB) + + uint64_t NewNext = BBNext * SumWeight + (BBCvt * CvtNext) / WeightScale; + uint64_t NewFalse = (BBCvt * CvtFalse) / WeightScale; + // We need to scale down all weights of BBI.BB to fit uint32_t. + // Here BBI.BB is connected to CvtBBI->FalseBB and will fall through to + // the next block. + ScaleWeights(NewNext, NewFalse, BBI.BB, getNextBlock(BBI.BB), + CvtBBI->FalseBB, MBPI); } // Merge in the 'false' block if the 'false' block has no other @@ -1284,7 +1330,7 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, // Initialize liveins to the first BB. These are potentially redefined by // predicated instructions. Redefs.init(TRI); - Redefs.addLiveIns(BBI1->BB, *TRI); + Redefs.addLiveIns(BBI1->BB); // Remove the duplicated instructions at the beginnings of both paths. MachineBasicBlock::iterator DI1 = BBI1->BB->begin(); @@ -1317,12 +1363,12 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, DontKill.init(TRI); for (MachineBasicBlock::reverse_iterator I = BBI2->BB->rbegin(), E = MachineBasicBlock::reverse_iterator(DI2); I != E; ++I) { - DontKill.stepBackward(*I, *TRI); + DontKill.stepBackward(*I); } for (MachineBasicBlock::const_iterator I = BBI1->BB->begin(), E = DI1; I != E; ++I) { - Redefs.stepForward(*I, *TRI); + Redefs.stepForward(*I); } BBI.BB->splice(BBI.BB->end(), BBI1->BB, BBI1->BB->begin(), DI1); BBI2->BB->erase(BBI2->BB->begin(), DI2); @@ -1409,8 +1455,8 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, PredicateBlock(*BBI2, DI2, *Cond2); // Merge the true block into the entry of the diamond. - MergeBlocks(BBI, *BBI1, TailBB == 0); - MergeBlocks(BBI, *BBI2, TailBB == 0); + MergeBlocks(BBI, *BBI1, TailBB == nullptr); + MergeBlocks(BBI, *BBI2, TailBB == nullptr); // If the if-converted block falls through or unconditionally branches into // the tail block, and the tail block does not have other predecessors, then @@ -1459,7 +1505,7 @@ static bool MaySpeculate(const MachineInstr *MI, SmallSet<unsigned, 4> &LaterRedefs, const TargetInstrInfo *TII) { bool SawStore = true; - if (!MI->isSafeToMove(TII, 0, SawStore)) + if (!MI->isSafeToMove(TII, nullptr, SawStore)) return false; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { @@ -1483,7 +1529,7 @@ void IfConverter::PredicateBlock(BBInfo &BBI, SmallVectorImpl<MachineOperand> &Cond, SmallSet<unsigned, 4> *LaterRedefs) { bool AnyUnpred = false; - bool MaySpec = LaterRedefs != 0; + bool MaySpec = LaterRedefs != nullptr; for (MachineBasicBlock::iterator I = BBI.BB->begin(); I != E; ++I) { if (I->isDebugValue() || TII->isPredicated(I)) continue; @@ -1501,12 +1547,12 @@ void IfConverter::PredicateBlock(BBInfo &BBI, #ifndef NDEBUG dbgs() << "Unable to predicate " << *I << "!\n"; #endif - llvm_unreachable(0); + llvm_unreachable(nullptr); } // If the predicated instruction now redefines a register as the result of // if-conversion, add an implicit kill. - UpdatePredRedefs(I, Redefs, TRI); + UpdatePredRedefs(I, Redefs); } std::copy(Cond.begin(), Cond.end(), std::back_inserter(BBI.Predicate)); @@ -1546,24 +1592,24 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, #ifndef NDEBUG dbgs() << "Unable to predicate " << *I << "!\n"; #endif - llvm_unreachable(0); + llvm_unreachable(nullptr); } } // If the predicated instruction now redefines a register as the result of // if-conversion, add an implicit kill. - UpdatePredRedefs(MI, Redefs, TRI); + UpdatePredRedefs(MI, Redefs); // Some kill flags may not be correct anymore. if (!DontKill.empty()) - RemoveKills(*MI, DontKill, *TRI); + RemoveKills(*MI, DontKill); } if (!IgnoreBr) { std::vector<MachineBasicBlock *> Succs(FromBBI.BB->succ_begin(), FromBBI.BB->succ_end()); MachineBasicBlock *NBB = getNextBlock(FromBBI.BB); - MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : NULL; + MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : nullptr; for (unsigned i = 0, e = Succs.size(); i != e; ++i) { MachineBasicBlock *Succ = Succs[i]; @@ -1599,7 +1645,7 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) { std::vector<MachineBasicBlock *> Succs(FromBBI.BB->succ_begin(), FromBBI.BB->succ_end()); MachineBasicBlock *NBB = getNextBlock(FromBBI.BB); - MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : NULL; + MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : nullptr; for (unsigned i = 0, e = Succs.size(); i != e; ++i) { MachineBasicBlock *Succ = Succs[i]; diff --git a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp index bb0e642..f3c8d3d 100644 --- a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp +++ b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "regalloc" #include "Spiller.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" @@ -21,8 +20,9 @@ #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/LiveStackAnalysis.h" -#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -38,6 +38,8 @@ using namespace llvm; +#define DEBUG_TYPE "regalloc" + STATISTIC(NumSpilledRanges, "Number of spilled live ranges"); STATISTIC(NumSnippets, "Number of spilled snippets"); STATISTIC(NumSpills, "Number of spills inserted"); @@ -120,7 +122,7 @@ public: SibValueInfo(unsigned Reg, VNInfo *VNI) : AllDefsAreReloads(true), DefByOrigPHI(false), KillsSource(false), - SpillReg(Reg), SpillVNI(VNI), SpillMBB(0), DefMI(0) {} + SpillReg(Reg), SpillVNI(VNI), SpillMBB(nullptr), DefMI(nullptr) {} // Returns true when a def has been found. bool hasDef() const { return DefByOrigPHI || DefMI; } @@ -153,7 +155,7 @@ public: TRI(*mf.getTarget().getRegisterInfo()), MBFI(pass.getAnalysis<MachineBlockFrequencyInfo>()) {} - void spill(LiveRangeEdit &); + void spill(LiveRangeEdit &) override; private: bool isSnippet(const LiveInterval &SnipLI); @@ -166,7 +168,7 @@ private: bool isSibling(unsigned Reg); MachineInstr *traceSiblingValue(unsigned, VNInfo*, VNInfo*); - void propagateSiblingValue(SibValueMap::iterator, VNInfo *VNI = 0); + void propagateSiblingValue(SibValueMap::iterator, VNInfo *VNI = nullptr); void analyzeSiblingValues(); bool hoistSpill(LiveInterval &SpillLI, MachineInstr *CopyMI); @@ -178,7 +180,7 @@ private: bool coalesceStackAccess(MachineInstr *MI, unsigned Reg); bool foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> >, - MachineInstr *LoadMI = 0); + MachineInstr *LoadMI = nullptr); void insertReload(unsigned VReg, SlotIndex, MachineBasicBlock::iterator MI); void insertSpill(unsigned VReg, bool isKill, MachineBasicBlock::iterator MI); @@ -235,12 +237,13 @@ bool InlineSpiller::isSnippet(const LiveInterval &SnipLI) { if (SnipLI.getNumValNums() > 2 || !LIS.intervalIsInOneMBB(SnipLI)) return false; - MachineInstr *UseMI = 0; + MachineInstr *UseMI = nullptr; // Check that all uses satisfy our criteria. - for (MachineRegisterInfo::reg_nodbg_iterator - RI = MRI.reg_nodbg_begin(SnipLI.reg); - MachineInstr *MI = RI.skipInstruction();) { + for (MachineRegisterInfo::reg_instr_nodbg_iterator + RI = MRI.reg_instr_nodbg_begin(SnipLI.reg), + E = MRI.reg_instr_nodbg_end(); RI != E; ) { + MachineInstr *MI = &*(RI++); // Allow copies to/from Reg. if (isFullCopyOf(MI, Reg)) @@ -277,8 +280,9 @@ void InlineSpiller::collectRegsToSpill() { if (Original == Reg) return; - for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Reg); - MachineInstr *MI = RI.skipInstruction();) { + for (MachineRegisterInfo::reg_instr_iterator + RI = MRI.reg_instr_begin(Reg), E = MRI.reg_instr_end(); RI != E; ) { + MachineInstr *MI = &*(RI++); unsigned SnipReg = isFullCopyOf(MI, Reg); if (!isSibling(SnipReg)) continue; @@ -364,7 +368,7 @@ void InlineSpiller::propagateSiblingValue(SibValueMap::iterator SVIIter, do { SVI = WorkList.pop_back_val(); TinyPtrVector<VNInfo*> *Deps = VNI ? &FirstDeps : &SVI->second.Deps; - VNI = 0; + VNI = nullptr; SibValueInfo &SV = SVI->second; if (!SV.SpillMBB) @@ -438,7 +442,20 @@ void InlineSpiller::propagateSiblingValue(SibValueMap::iterator SVIIter, // Also hoist spills to blocks with smaller loop depth, but make sure // that the new value dominates. Non-phi dependents are always // dominated, phis need checking. + + const BranchProbability MarginProb(4, 5); // 80% + // Hoist a spill to outer loop if there are multiple dependents (it + // can be beneficial if more than one dependents are hoisted) or + // if DepSV (the hoisting source) is hotter than SV (the hoisting + // destination) (we add a 80% margin to bias a little towards + // loop depth). + bool HoistCondition = + (MBFI.getBlockFreq(DepSV.SpillMBB) >= + (MBFI.getBlockFreq(SV.SpillMBB) * MarginProb)) || + Deps->size() > 1; + if ((Loops.getLoopDepth(DepSV.SpillMBB) > SpillDepth) && + HoistCondition && (!DepSVI->first->isPHIDef() || MDT.dominates(SV.SpillMBB, DepSV.SpillMBB))) { Changed = true; @@ -476,7 +493,7 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI, // Check if a cached value already exists. SibValueMap::iterator SVI; bool Inserted; - tie(SVI, Inserted) = + std::tie(SVI, Inserted) = SibValues.insert(std::make_pair(UseVNI, SibValueInfo(UseReg, UseVNI))); if (!Inserted) { DEBUG(dbgs() << "Cached value " << PrintReg(UseReg) << ':' @@ -495,7 +512,7 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI, do { unsigned Reg; VNInfo *VNI; - tie(Reg, VNI) = WorkList.pop_back_val(); + std::tie(Reg, VNI) = WorkList.pop_back_val(); DEBUG(dbgs() << " " << PrintReg(Reg) << ':' << VNI->id << '@' << VNI->def << ":\t"); @@ -554,7 +571,7 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI, for (unsigned i = 0, e = NonPHIs.size(); i != e; ++i) { VNInfo *NonPHI = NonPHIs[i]; // Known value? Try an insertion. - tie(SVI, Inserted) = + std::tie(SVI, Inserted) = SibValues.insert(std::make_pair(NonPHI, SibValueInfo(Reg, NonPHI))); // Add all the PHIs as dependents of NonPHI. for (unsigned pi = 0, pe = PHIs.size(); pi != pe; ++pi) @@ -587,8 +604,8 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI, << SrcVNI->id << '@' << SrcVNI->def << " kill=" << unsigned(SVI->second.KillsSource) << '\n'); // Known sibling source value? Try an insertion. - tie(SVI, Inserted) = SibValues.insert(std::make_pair(SrcVNI, - SibValueInfo(SrcReg, SrcVNI))); + std::tie(SVI, Inserted) = SibValues.insert( + std::make_pair(SrcVNI, SibValueInfo(SrcReg, SrcVNI))); // This is the first time we see Src, add it to the worklist. if (Inserted) WorkList.push_back(std::make_pair(SrcReg, SrcVNI)); @@ -643,7 +660,7 @@ void InlineSpiller::analyzeSiblingValues() { VNInfo *VNI = *VI; if (VNI->isUnused()) continue; - MachineInstr *DefMI = 0; + MachineInstr *DefMI = nullptr; if (!VNI->isPHIDef()) { DefMI = LIS.getInstructionFromIndex(VNI->def); assert(DefMI && "No defining instruction"); @@ -745,7 +762,7 @@ void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) { do { LiveInterval *LI; - tie(LI, VNI) = WorkList.pop_back_val(); + std::tie(LI, VNI) = WorkList.pop_back_val(); unsigned Reg = LI->reg; DEBUG(dbgs() << "Checking redundant spills for " << VNI->id << '@' << VNI->def << " in " << *LI << '\n'); @@ -759,8 +776,10 @@ void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) { DEBUG(dbgs() << "Merged to stack int: " << *StackInt << '\n'); // Find all spills and copies of VNI. - for (MachineRegisterInfo::use_nodbg_iterator UI = MRI.use_nodbg_begin(Reg); - MachineInstr *MI = UI.skipInstruction();) { + for (MachineRegisterInfo::use_instr_nodbg_iterator + UI = MRI.use_instr_nodbg_begin(Reg), E = MRI.use_instr_nodbg_end(); + UI != E; ) { + MachineInstr *MI = &*(UI++); if (!MI->isCopy() && !MI->mayStore()) continue; SlotIndex Idx = LIS.getInstructionIndex(MI); @@ -804,7 +823,7 @@ void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) { SmallVector<std::pair<LiveInterval*, VNInfo*>, 8> WorkList; WorkList.push_back(std::make_pair(LI, VNI)); do { - tie(LI, VNI) = WorkList.pop_back_val(); + std::tie(LI, VNI) = WorkList.pop_back_val(); if (!UsedValues.insert(VNI)) continue; @@ -920,10 +939,12 @@ void InlineSpiller::reMaterializeAll() { for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) { unsigned Reg = RegsToSpill[i]; LiveInterval &LI = LIS.getInterval(Reg); - for (MachineRegisterInfo::use_nodbg_iterator - RI = MRI.use_nodbg_begin(Reg); - MachineInstr *MI = RI.skipBundle();) + for (MachineRegisterInfo::use_bundle_nodbg_iterator + RI = MRI.use_bundle_nodbg_begin(Reg), E = MRI.use_bundle_nodbg_end(); + RI != E; ) { + MachineInstr *MI = &*(RI++); anyRemat |= reMaterializeFor(LI, MI); + } } if (!anyRemat) return; @@ -1014,7 +1035,7 @@ static void dumpMachineInstrRangeWithSlotIndex(MachineBasicBlock::iterator B, char NextLine = '\n'; char SlotIndent = '\t'; - if (llvm::next(B) == E) { + if (std::next(B) == E) { NextLine = ' '; SlotIndent = ' '; } @@ -1098,12 +1119,11 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops, MRI.isReserved(Reg)) { continue; } + // Skip non-Defs, including undef uses and internal reads. + if (MO->isUse()) + continue; MIBundleOperands::PhysRegInfo RI = MIBundleOperands(FoldMI).analyzePhysReg(Reg, &TRI); - if (MO->readsReg()) { - assert(RI.Reads && "Cannot fold physreg reader"); - continue; - } if (RI.Defines) continue; // FoldMI does not define this physreg. Remove the LI segment. @@ -1172,12 +1192,12 @@ void InlineSpiller::insertSpill(unsigned NewVReg, bool isKill, MachineBasicBlock &MBB = *MI->getParent(); MachineInstrSpan MIS(MI); - TII.storeRegToStackSlot(MBB, llvm::next(MI), NewVReg, isKill, StackSlot, + TII.storeRegToStackSlot(MBB, std::next(MI), NewVReg, isKill, StackSlot, MRI.getRegClass(NewVReg), &TRI); - LIS.InsertMachineInstrRangeInMaps(llvm::next(MI), MIS.end()); + LIS.InsertMachineInstrRangeInMaps(std::next(MI), MIS.end()); - DEBUG(dumpMachineInstrRangeWithSlotIndex(llvm::next(MI), MIS.end(), LIS, + DEBUG(dumpMachineInstrRangeWithSlotIndex(std::next(MI), MIS.end(), LIS, "spill")); ++NumSpills; } @@ -1188,8 +1208,10 @@ void InlineSpiller::spillAroundUses(unsigned Reg) { LiveInterval &OldLI = LIS.getInterval(Reg); // Iterate over instructions using Reg. - for (MachineRegisterInfo::reg_iterator RegI = MRI.reg_begin(Reg); - MachineInstr *MI = RegI.skipBundle();) { + for (MachineRegisterInfo::reg_bundle_iterator + RegI = MRI.reg_bundle_begin(Reg), E = MRI.reg_bundle_end(); + RegI != E; ) { + MachineInstr *MI = &*(RegI++); // Debug values are not allowed to affect codegen. if (MI->isDebugValue()) { @@ -1314,8 +1336,10 @@ void InlineSpiller::spillAll() { // Finally delete the SnippetCopies. for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) { - for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(RegsToSpill[i]); - MachineInstr *MI = RI.skipInstruction();) { + for (MachineRegisterInfo::reg_instr_iterator + RI = MRI.reg_instr_begin(RegsToSpill[i]), E = MRI.reg_instr_end(); + RI != E; ) { + MachineInstr *MI = &*(RI++); assert(SnippetCopies.count(MI) && "Remaining use wasn't a snippet copy"); // FIXME: Do this with a LiveRangeEdit callback. LIS.RemoveMachineInstrFromMaps(MI); @@ -1336,7 +1360,7 @@ void InlineSpiller::spill(LiveRangeEdit &edit) { // Share a stack slot among all descendants of Original. Original = VRM.getOriginal(edit.getReg()); StackSlot = VRM.getStackSlot(Original); - StackInt = 0; + StackInt = nullptr; DEBUG(dbgs() << "Inline spilling " << MRI.getRegClass(edit.getReg())->getName() diff --git a/contrib/llvm/lib/CodeGen/InterferenceCache.cpp b/contrib/llvm/lib/CodeGen/InterferenceCache.cpp index 427225d..187e015 100644 --- a/contrib/llvm/lib/CodeGen/InterferenceCache.cpp +++ b/contrib/llvm/lib/CodeGen/InterferenceCache.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "regalloc" #include "InterferenceCache.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/Support/ErrorHandling.h" @@ -19,9 +18,27 @@ using namespace llvm; +#define DEBUG_TYPE "regalloc" + // Static member used for null interference cursors. InterferenceCache::BlockInterference InterferenceCache::Cursor::NoInterference; +// Initializes PhysRegEntries (instead of a SmallVector, PhysRegEntries is a +// buffer of size NumPhysRegs to speed up alloc/clear for targets with large +// reg files). Calloced memory is used for good form, and quites tools like +// Valgrind too, but zero initialized memory is not required by the algorithm: +// this is because PhysRegEntries works like a SparseSet and its entries are +// only valid when there is a corresponding CacheEntries assignment. There is +// also support for when pass managers are reused for targets with different +// numbers of PhysRegs: in this case PhysRegEntries is freed and reinitialized. +void InterferenceCache::reinitPhysRegEntries() { + if (PhysRegEntriesCount == TRI->getNumRegs()) return; + free(PhysRegEntries); + PhysRegEntriesCount = TRI->getNumRegs(); + PhysRegEntries = (unsigned char*) + calloc(PhysRegEntriesCount, sizeof(unsigned char)); +} + void InterferenceCache::init(MachineFunction *mf, LiveIntervalUnion *liuarray, SlotIndexes *indexes, @@ -30,7 +47,7 @@ void InterferenceCache::init(MachineFunction *mf, MF = mf; LIUArray = liuarray; TRI = tri; - PhysRegEntries.assign(TRI->getNumRegs(), 0); + reinitPhysRegEntries(); for (unsigned i = 0; i != CacheEntries; ++i) Entries[i].clear(mf, indexes, lis); } @@ -105,7 +122,7 @@ bool InterferenceCache::Entry::valid(LiveIntervalUnion *LIUArray, void InterferenceCache::Entry::update(unsigned MBBNum) { SlotIndex Start, Stop; - tie(Start, Stop) = Indexes->getMBBRange(MBBNum); + std::tie(Start, Stop) = Indexes->getMBBRange(MBBNum); // Use advanceTo only when possible. if (PrevPos != Start) { @@ -182,7 +199,7 @@ void InterferenceCache::Entry::update(unsigned MBBNum) { BI = &Blocks[MBBNum]; if (BI->Tag == Tag) return; - tie(Start, Stop) = Indexes->getMBBRange(MBBNum); + std::tie(Start, Stop) = Indexes->getMBBRange(MBBNum); } // Check for last interference in block. diff --git a/contrib/llvm/lib/CodeGen/InterferenceCache.h b/contrib/llvm/lib/CodeGen/InterferenceCache.h index 800f705..91a1da9 100644 --- a/contrib/llvm/lib/CodeGen/InterferenceCache.h +++ b/contrib/llvm/lib/CodeGen/InterferenceCache.h @@ -77,7 +77,8 @@ class InterferenceCache { /// Iterator pointing into the fixed RegUnit interference. LiveInterval::iterator FixedI; - RegUnitInfo(LiveIntervalUnion &LIU) : VirtTag(LIU.getTag()), Fixed(0) { + RegUnitInfo(LiveIntervalUnion &LIU) + : VirtTag(LIU.getTag()), Fixed(nullptr) { VirtI.setMap(LIU.getMap()); } }; @@ -93,7 +94,7 @@ class InterferenceCache { void update(unsigned MBBNum); public: - Entry() : PhysReg(0), Tag(0), RefCount(0), Indexes(0), LIS(0) {} + Entry() : PhysReg(0), Tag(0), RefCount(0), Indexes(nullptr), LIS(nullptr) {} void clear(MachineFunction *mf, SlotIndexes *indexes, LiveIntervals *lis) { assert(!hasRefs() && "Cannot clear cache entry with references"); @@ -135,7 +136,8 @@ class InterferenceCache { // Point to an entry for each physreg. The entry pointed to may not be up to // date, and it may have been reused for a different physreg. - SmallVector<unsigned char, 2> PhysRegEntries; + unsigned char* PhysRegEntries; + size_t PhysRegEntriesCount; // Next round-robin entry to be picked. unsigned RoundRobin; @@ -147,7 +149,15 @@ class InterferenceCache { Entry *get(unsigned PhysReg); public: - InterferenceCache() : TRI(0), LIUArray(0), MF(0), RoundRobin(0) {} + InterferenceCache() + : TRI(nullptr), LIUArray(nullptr), MF(nullptr), PhysRegEntries(nullptr), + PhysRegEntriesCount(0), RoundRobin(0) {} + + ~InterferenceCache() { + free(PhysRegEntries); + } + + void reinitPhysRegEntries(); /// init - Prepare cache for a new function. void init(MachineFunction*, LiveIntervalUnion*, SlotIndexes*, LiveIntervals*, @@ -164,7 +174,7 @@ public: static BlockInterference NoInterference; void setEntry(Entry *E) { - Current = 0; + Current = nullptr; // Update reference counts. Nothing happens when RefCount reaches 0, so // we don't have to check for E == CacheEntry etc. if (CacheEntry) @@ -176,10 +186,10 @@ public: public: /// Cursor - Create a dangling cursor. - Cursor() : CacheEntry(0), Current(0) {} - ~Cursor() { setEntry(0); } + Cursor() : CacheEntry(nullptr), Current(nullptr) {} + ~Cursor() { setEntry(nullptr); } - Cursor(const Cursor &O) : CacheEntry(0), Current(0) { + Cursor(const Cursor &O) : CacheEntry(nullptr), Current(nullptr) { setEntry(O.CacheEntry); } @@ -192,7 +202,7 @@ public: void setPhysReg(InterferenceCache &Cache, unsigned PhysReg) { // Release reference before getting a new one. That guarantees we can // actually have CacheEntries live cursors. - setEntry(0); + setEntry(nullptr); if (PhysReg) setEntry(Cache.get(PhysReg)); } diff --git a/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp index c38d4fb..a8b8600 100644 --- a/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp +++ b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp @@ -13,13 +13,13 @@ #include "llvm/CodeGen/IntrinsicLowering.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" -#include "llvm/Support/CallSite.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -115,21 +115,21 @@ void IntrinsicLowering::AddPrototypes(Module &M) { Type::getInt8PtrTy(Context), Type::getInt8PtrTy(Context), Type::getInt8PtrTy(Context), - TD.getIntPtrType(Context), (Type *)0); + DL.getIntPtrType(Context), nullptr); break; case Intrinsic::memmove: M.getOrInsertFunction("memmove", Type::getInt8PtrTy(Context), Type::getInt8PtrTy(Context), Type::getInt8PtrTy(Context), - TD.getIntPtrType(Context), (Type *)0); + DL.getIntPtrType(Context), nullptr); break; case Intrinsic::memset: M.getOrInsertFunction("memset", Type::getInt8PtrTy(Context), Type::getInt8PtrTy(Context), Type::getInt32Ty(M.getContext()), - TD.getIntPtrType(Context), (Type *)0); + DL.getIntPtrType(Context), nullptr); break; case Intrinsic::sqrt: EnsureFPIntrinsicsExist(M, I, "sqrtf", "sqrt", "sqrtl"); @@ -463,7 +463,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { break; // Strip out annotate intrinsic case Intrinsic::memcpy: { - Type *IntPtr = TD.getIntPtrType(Context); + Type *IntPtr = DL.getIntPtrType(Context); Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr, /* isSigned */ false); Value *Ops[3]; @@ -474,7 +474,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { break; } case Intrinsic::memmove: { - Type *IntPtr = TD.getIntPtrType(Context); + Type *IntPtr = DL.getIntPtrType(Context); Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr, /* isSigned */ false); Value *Ops[3]; @@ -486,7 +486,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { } case Intrinsic::memset: { Value *Op0 = CI->getArgOperand(0); - Type *IntPtr = TD.getIntPtrType(Op0->getType()); + Type *IntPtr = DL.getIntPtrType(Op0->getType()); Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr, /* isSigned */ false); Value *Ops[3]; diff --git a/contrib/llvm/lib/CodeGen/JumpInstrTables.cpp b/contrib/llvm/lib/CodeGen/JumpInstrTables.cpp new file mode 100644 index 0000000..750f71f --- /dev/null +++ b/contrib/llvm/lib/CodeGen/JumpInstrTables.cpp @@ -0,0 +1,297 @@ +//===-- JumpInstrTables.cpp: Jump-Instruction Tables ----------------------===// +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief An implementation of jump-instruction tables. +/// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "jt" + +#include "llvm/CodeGen/JumpInstrTables.h" + +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/JumpInstrTableInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Verifier.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +#include <vector> + +using namespace llvm; + +char JumpInstrTables::ID = 0; + +INITIALIZE_PASS_BEGIN(JumpInstrTables, "jump-instr-tables", + "Jump-Instruction Tables", true, true) +INITIALIZE_PASS_DEPENDENCY(JumpInstrTableInfo); +INITIALIZE_PASS_END(JumpInstrTables, "jump-instr-tables", + "Jump-Instruction Tables", true, true) + +STATISTIC(NumJumpTables, "Number of indirect call tables generated"); +STATISTIC(NumFuncsInJumpTables, "Number of functions in the jump tables"); + +ModulePass *llvm::createJumpInstrTablesPass() { + // The default implementation uses a single table for all functions. + return new JumpInstrTables(JumpTable::Single); +} + +ModulePass *llvm::createJumpInstrTablesPass(JumpTable::JumpTableType JTT) { + return new JumpInstrTables(JTT); +} + +namespace { +static const char jump_func_prefix[] = "__llvm_jump_instr_table_"; +static const char jump_section_prefix[] = ".jump.instr.table.text."; + +// Checks to see if a given CallSite is making an indirect call, including +// cases where the indirect call is made through a bitcast. +bool isIndirectCall(CallSite &CS) { + if (CS.getCalledFunction()) + return false; + + // Check the value to see if it is merely a bitcast of a function. In + // this case, it will translate to a direct function call in the resulting + // assembly, so we won't treat it as an indirect call here. + const Value *V = CS.getCalledValue(); + if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { + return !(CE->isCast() && isa<Function>(CE->getOperand(0))); + } + + // Otherwise, since we know it's a call, it must be an indirect call + return true; +} + +// Replaces Functions and GlobalAliases with a different Value. +bool replaceGlobalValueIndirectUse(GlobalValue *GV, Value *V, Use *U) { + User *Us = U->getUser(); + if (!Us) + return false; + if (Instruction *I = dyn_cast<Instruction>(Us)) { + CallSite CS(I); + + // Don't do the replacement if this use is a direct call to this function. + // If the use is not the called value, then replace it. + if (CS && (isIndirectCall(CS) || CS.isCallee(U))) { + return false; + } + + U->set(V); + } else if (Constant *C = dyn_cast<Constant>(Us)) { + // Don't replace calls to bitcasts of function symbols, since they get + // translated to direct calls. + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Us)) { + if (CE->getOpcode() == Instruction::BitCast) { + // This bitcast must have exactly one user. + if (CE->user_begin() != CE->user_end()) { + User *ParentUs = *CE->user_begin(); + if (CallInst *CI = dyn_cast<CallInst>(ParentUs)) { + CallSite CS(CI); + Use &CEU = *CE->use_begin(); + if (CS.isCallee(&CEU)) { + return false; + } + } + } + } + } + + // GlobalAlias doesn't support replaceUsesOfWithOnConstant. And the verifier + // requires alias to point to a defined function. So, GlobalAlias is handled + // as a separate case in runOnModule. + if (!isa<GlobalAlias>(C)) + C->replaceUsesOfWithOnConstant(GV, V, U); + } else { + assert(false && "The Use of a Function symbol is neither an instruction nor" + " a constant"); + } + + return true; +} + +// Replaces all replaceable address-taken uses of GV with a pointer to a +// jump-instruction table entry. +void replaceValueWithFunction(GlobalValue *GV, Function *F) { + // Go through all uses of this function and replace the uses of GV with the + // jump-table version of the function. Get the uses as a vector before + // replacing them, since replacing them changes the use list and invalidates + // the iterator otherwise. + for (Value::use_iterator I = GV->use_begin(), E = GV->use_end(); I != E;) { + Use &U = *I++; + + // Replacement of constants replaces all instances in the constant. So, some + // uses might have already been handled by the time we reach them here. + if (U.get() == GV) + replaceGlobalValueIndirectUse(GV, F, &U); + } + + return; +} +} // end anonymous namespace + +JumpInstrTables::JumpInstrTables() + : ModulePass(ID), Metadata(), JITI(nullptr), TableCount(0), + JTType(JumpTable::Single) { + initializeJumpInstrTablesPass(*PassRegistry::getPassRegistry()); +} + +JumpInstrTables::JumpInstrTables(JumpTable::JumpTableType JTT) + : ModulePass(ID), Metadata(), JITI(nullptr), TableCount(0), JTType(JTT) { + initializeJumpInstrTablesPass(*PassRegistry::getPassRegistry()); +} + +JumpInstrTables::~JumpInstrTables() {} + +void JumpInstrTables::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<JumpInstrTableInfo>(); +} + +Function *JumpInstrTables::insertEntry(Module &M, Function *Target) { + FunctionType *OrigFunTy = Target->getFunctionType(); + FunctionType *FunTy = transformType(OrigFunTy); + + JumpMap::iterator it = Metadata.find(FunTy); + if (Metadata.end() == it) { + struct TableMeta Meta; + Meta.TableNum = TableCount; + Meta.Count = 0; + Metadata[FunTy] = Meta; + it = Metadata.find(FunTy); + ++NumJumpTables; + ++TableCount; + } + + it->second.Count++; + + std::string NewName(jump_func_prefix); + NewName += (Twine(it->second.TableNum) + "_" + Twine(it->second.Count)).str(); + Function *JumpFun = + Function::Create(OrigFunTy, GlobalValue::ExternalLinkage, NewName, &M); + // The section for this table + JumpFun->setSection((jump_section_prefix + Twine(it->second.TableNum)).str()); + JITI->insertEntry(FunTy, Target, JumpFun); + + ++NumFuncsInJumpTables; + return JumpFun; +} + +bool JumpInstrTables::hasTable(FunctionType *FunTy) { + FunctionType *TransTy = transformType(FunTy); + return Metadata.end() != Metadata.find(TransTy); +} + +FunctionType *JumpInstrTables::transformType(FunctionType *FunTy) { + // Returning nullptr forces all types into the same table, since all types map + // to the same type + Type *VoidPtrTy = Type::getInt8PtrTy(FunTy->getContext()); + + // Ignore the return type. + Type *RetTy = VoidPtrTy; + bool IsVarArg = FunTy->isVarArg(); + std::vector<Type *> ParamTys(FunTy->getNumParams()); + FunctionType::param_iterator PI, PE; + int i = 0; + + std::vector<Type *> EmptyParams; + Type *Int32Ty = Type::getInt32Ty(FunTy->getContext()); + FunctionType *VoidFnTy = FunctionType::get( + Type::getVoidTy(FunTy->getContext()), EmptyParams, false); + switch (JTType) { + case JumpTable::Single: + + return FunctionType::get(RetTy, EmptyParams, false); + case JumpTable::Arity: + // Transform all types to void* so that all functions with the same arity + // end up in the same table. + for (PI = FunTy->param_begin(), PE = FunTy->param_end(); PI != PE; + PI++, i++) { + ParamTys[i] = VoidPtrTy; + } + + return FunctionType::get(RetTy, ParamTys, IsVarArg); + case JumpTable::Simplified: + // Project all parameters types to one of 3 types: composite, integer, and + // function, matching the three subclasses of Type. + for (PI = FunTy->param_begin(), PE = FunTy->param_end(); PI != PE; + ++PI, ++i) { + assert((isa<IntegerType>(*PI) || isa<FunctionType>(*PI) || + isa<CompositeType>(*PI)) && + "This type is not an Integer or a Composite or a Function"); + if (isa<CompositeType>(*PI)) { + ParamTys[i] = VoidPtrTy; + } else if (isa<FunctionType>(*PI)) { + ParamTys[i] = VoidFnTy; + } else if (isa<IntegerType>(*PI)) { + ParamTys[i] = Int32Ty; + } + } + + return FunctionType::get(RetTy, ParamTys, IsVarArg); + case JumpTable::Full: + // Don't transform this type at all. + return FunTy; + } + + return nullptr; +} + +bool JumpInstrTables::runOnModule(Module &M) { + JITI = &getAnalysis<JumpInstrTableInfo>(); + + // Get the set of jumptable-annotated functions. + DenseMap<Function *, Function *> Functions; + for (Function &F : M) { + if (F.hasFnAttribute(Attribute::JumpTable)) { + assert(F.hasUnnamedAddr() && + "Attribute 'jumptable' requires 'unnamed_addr'"); + Functions[&F] = nullptr; + } + } + + // Create the jump-table functions. + for (auto &KV : Functions) { + Function *F = KV.first; + KV.second = insertEntry(M, F); + } + + // GlobalAlias is a special case, because the target of an alias statement + // must be a defined function. So, instead of replacing a given function in + // the alias, we replace all uses of aliases that target jumptable functions. + // Note that there's no need to create these functions, since only aliases + // that target known jumptable functions are replaced, and there's no way to + // put the jumptable annotation on a global alias. + DenseMap<GlobalAlias *, Function *> Aliases; + for (GlobalAlias &GA : M.aliases()) { + Constant *Aliasee = GA.getAliasee(); + if (Function *F = dyn_cast<Function>(Aliasee)) { + auto it = Functions.find(F); + if (it != Functions.end()) { + Aliases[&GA] = it->second; + } + } + } + + // Replace each address taken function with its jump-instruction table entry. + for (auto &KV : Functions) + replaceValueWithFunction(KV.first, KV.second); + + for (auto &KV : Aliases) + replaceValueWithFunction(KV.first, KV.second); + + return !Functions.empty(); +} diff --git a/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp index ad2c553..df96b94 100644 --- a/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp +++ b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp @@ -12,12 +12,15 @@ //===----------------------------------------------------------------------===// #include "llvm/Target/TargetMachine.h" -#include "llvm/ADT/OwningPtr.h" -#include "llvm/Assembly/PrintModulePass.h" + +#include "llvm/Analysis/Passes.h" #include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/JumpInstrTables.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/IR/IRPrintingPasses.h" +#include "llvm/IR/Verifier.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCInstrInfo.h" @@ -44,33 +47,24 @@ static cl::opt<cl::boolOrDefault> EnableFastISelOption("fast-isel", cl::Hidden, cl::desc("Enable the \"fast\" instruction selector")); -static cl::opt<bool> ShowMCEncoding("show-mc-encoding", cl::Hidden, - cl::desc("Show encoding in .s output")); -static cl::opt<bool> ShowMCInst("show-mc-inst", cl::Hidden, - cl::desc("Show instruction structure in .s output")); - -static cl::opt<cl::boolOrDefault> -AsmVerbose("asm-verbose", cl::desc("Add comments to directives."), - cl::init(cl::BOU_UNSET)); - -static bool getVerboseAsm() { - switch (AsmVerbose) { - case cl::BOU_UNSET: return TargetMachine::getAsmVerbosityDefault(); - case cl::BOU_TRUE: return true; - case cl::BOU_FALSE: return false; - } - llvm_unreachable("Invalid verbose asm state"); -} - void LLVMTargetMachine::initAsmInfo() { - AsmInfo = TheTarget.createMCAsmInfo(*getRegisterInfo(), TargetTriple); + MCAsmInfo *TmpAsmInfo = TheTarget.createMCAsmInfo(*getRegisterInfo(), + TargetTriple); // TargetSelect.h moved to a different directory between LLVM 2.9 and 3.0, // and if the old one gets included then MCAsmInfo will be NULL and // we'll crash later. // Provide the user with a useful error message about what's wrong. - assert(AsmInfo && "MCAsmInfo not initialized. " + assert(TmpAsmInfo && "MCAsmInfo not initialized. " "Make sure you include the correct TargetSelect.h" "and that InitializeAllTargetMCs() is being invoked!"); + + if (Options.DisableIntegratedAS) + TmpAsmInfo->setUseIntegratedAssembler(false); + + if (Options.CompressDebugSections) + TmpAsmInfo->setCompressDebugSections(true); + + AsmInfo = TmpAsmInfo; } LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple, @@ -92,7 +86,12 @@ static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM, bool DisableVerify, AnalysisID StartAfter, AnalysisID StopAfter) { - // Targets may override createPassConfig to provide a target-specific sublass. + + // Add internal analysis passes from the target machine. + TM->addAnalysisPasses(PM); + + // Targets may override createPassConfig to provide a target-specific + // subclass. TargetPassConfig *PassConfig = TM->createPassConfig(PM); PassConfig->setStartStopPasses(StartAfter, StopAfter); @@ -127,7 +126,7 @@ static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM, // Ask the target for an isel. if (PassConfig->addInstSelector()) - return NULL; + return nullptr; PassConfig->addMachinePasses(); @@ -142,6 +141,11 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, bool DisableVerify, AnalysisID StartAfter, AnalysisID StopAfter) { + // Passes to handle jumptable function annotations. These can't be handled at + // JIT time, so we don't add them directly to addPassesToGenerateCode. + PM.add(createJumpInstrTableInfoPass()); + PM.add(createJumpInstrTablesPass(Options.JTType)); + // Add common CodeGen passes. MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify, StartAfter, StopAfter); @@ -154,18 +158,18 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, // machine-level pass), and whatever other information is needed to // deserialize the code and resume compilation. For now, just write the // LLVM IR. - PM.add(createPrintModulePass(&Out)); + PM.add(createPrintModulePass(Out)); return false; } - if (hasMCSaveTempLabels()) + if (Options.MCOptions.MCSaveTempLabels) Context->setAllowTemporaryLabels(false); const MCAsmInfo &MAI = *getMCAsmInfo(); const MCRegisterInfo &MRI = *getRegisterInfo(); const MCInstrInfo &MII = *getInstrInfo(); const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>(); - OwningPtr<MCStreamer> AsmStreamer; + std::unique_ptr<MCStreamer> AsmStreamer; switch (FileType) { case CGFT_AssemblyFile: { @@ -174,20 +178,16 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, MII, MRI, STI); // Create a code emitter if asked to show the encoding. - MCCodeEmitter *MCE = 0; - if (ShowMCEncoding) + MCCodeEmitter *MCE = nullptr; + if (Options.MCOptions.ShowMCEncoding) MCE = getTarget().createMCCodeEmitter(MII, MRI, STI, *Context); MCAsmBackend *MAB = getTarget().createMCAsmBackend(MRI, getTargetTriple(), TargetCPU); - MCStreamer *S = getTarget().createAsmStreamer(*Context, Out, - getVerboseAsm(), - hasMCUseLoc(), - hasMCUseCFI(), - hasMCUseDwarfDirectory(), - InstPrinter, - MCE, MAB, - ShowMCInst); + MCStreamer *S = getTarget().createAsmStreamer( + *Context, Out, Options.MCOptions.AsmVerbose, + Options.MCOptions.MCUseDwarfDirectory, InstPrinter, MCE, MAB, + Options.MCOptions.ShowMCInst); AsmStreamer.reset(S); break; } @@ -198,30 +198,28 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, *Context); MCAsmBackend *MAB = getTarget().createMCAsmBackend(MRI, getTargetTriple(), TargetCPU); - if (MCE == 0 || MAB == 0) + if (!MCE || !MAB) return true; - AsmStreamer.reset(getTarget().createMCObjectStreamer(getTargetTriple(), - *Context, *MAB, Out, - MCE, hasMCRelaxAll(), - hasMCNoExecStack())); - AsmStreamer.get()->setAutoInitSections(true); + AsmStreamer.reset(getTarget().createMCObjectStreamer( + getTargetTriple(), *Context, *MAB, Out, MCE, STI, + Options.MCOptions.MCRelaxAll, Options.MCOptions.MCNoExecStack)); break; } case CGFT_Null: // The Null output is intended for use for performance analysis and testing, // not real users. - AsmStreamer.reset(createNullStreamer(*Context)); + AsmStreamer.reset(getTarget().createNullStreamer(*Context)); break; } // Create the AsmPrinter, which takes ownership of AsmStreamer if successful. FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer); - if (Printer == 0) + if (!Printer) return true; // If successful, createAsmPrinter took ownership of AsmStreamer. - AsmStreamer.take(); + AsmStreamer.release(); PM.add(Printer); @@ -238,7 +236,8 @@ bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM, JITCodeEmitter &JCE, bool DisableVerify) { // Add common CodeGen passes. - MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify, 0, 0); + MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify, nullptr, + nullptr); if (!Context) return true; @@ -257,11 +256,11 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, raw_ostream &Out, bool DisableVerify) { // Add common CodeGen passes. - Ctx = addPassesToGenerateCode(this, PM, DisableVerify, 0, 0); + Ctx = addPassesToGenerateCode(this, PM, DisableVerify, nullptr, nullptr); if (!Ctx) return true; - if (hasMCSaveTempLabels()) + if (Options.MCOptions.MCSaveTempLabels) Ctx->setAllowTemporaryLabels(false); // Create the code emitter for the target if it exists. If not, .o file @@ -272,23 +271,21 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, STI, *Ctx); MCAsmBackend *MAB = getTarget().createMCAsmBackend(MRI, getTargetTriple(), TargetCPU); - if (MCE == 0 || MAB == 0) + if (!MCE || !MAB) return true; - OwningPtr<MCStreamer> AsmStreamer; - AsmStreamer.reset(getTarget().createMCObjectStreamer(getTargetTriple(), *Ctx, - *MAB, Out, MCE, - hasMCRelaxAll(), - hasMCNoExecStack())); - AsmStreamer.get()->InitSections(); + std::unique_ptr<MCStreamer> AsmStreamer; + AsmStreamer.reset(getTarget().createMCObjectStreamer( + getTargetTriple(), *Ctx, *MAB, Out, MCE, STI, + Options.MCOptions.MCRelaxAll, Options.MCOptions.MCNoExecStack)); // Create the AsmPrinter, which takes ownership of AsmStreamer if successful. FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer); - if (Printer == 0) + if (!Printer) return true; // If successful, createAsmPrinter took ownership of AsmStreamer. - AsmStreamer.take(); + AsmStreamer.release(); PM.add(Printer); diff --git a/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp b/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp index deab05a..cdf505e 100644 --- a/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp +++ b/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp @@ -13,12 +13,13 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "scheduler" #include "llvm/CodeGen/LatencyPriorityQueue.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; +#define DEBUG_TYPE "scheduler" + bool latency_sort::operator()(const SUnit *LHS, const SUnit *RHS) const { // The isScheduleHigh flag allows nodes with wraparound dependencies that // cannot easily be modeled as edges with latencies to be scheduled as @@ -53,7 +54,7 @@ bool latency_sort::operator()(const SUnit *LHS, const SUnit *RHS) const { /// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor /// of SU, return it, otherwise return null. SUnit *LatencyPriorityQueue::getSingleUnscheduledPred(SUnit *SU) { - SUnit *OnlyAvailablePred = 0; + SUnit *OnlyAvailablePred = nullptr; for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { SUnit &Pred = *I->getSUnit(); @@ -61,7 +62,7 @@ SUnit *LatencyPriorityQueue::getSingleUnscheduledPred(SUnit *SU) { // We found an available, but not scheduled, predecessor. If it's the // only one we have found, keep track of it... otherwise give up. if (OnlyAvailablePred && OnlyAvailablePred != &Pred) - return 0; + return nullptr; OnlyAvailablePred = &Pred; } } @@ -105,7 +106,7 @@ void LatencyPriorityQueue::AdjustPriorityOfUnscheduledPreds(SUnit *SU) { if (SU->isAvailable) return; // All preds scheduled. SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU); - if (OnlyAvailablePred == 0 || !OnlyAvailablePred->isAvailable) return; + if (!OnlyAvailablePred || !OnlyAvailablePred->isAvailable) return; // Okay, we found a single predecessor that is available, but not scheduled. // Since it is available, it must be in the priority queue. First remove it. @@ -117,14 +118,14 @@ void LatencyPriorityQueue::AdjustPriorityOfUnscheduledPreds(SUnit *SU) { } SUnit *LatencyPriorityQueue::pop() { - if (empty()) return NULL; + if (empty()) return nullptr; std::vector<SUnit *>::iterator Best = Queue.begin(); - for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()), + for (std::vector<SUnit *>::iterator I = std::next(Queue.begin()), E = Queue.end(); I != E; ++I) if (Picker(*Best, *I)) Best = I; SUnit *V = *Best; - if (Best != prior(Queue.end())) + if (Best != std::prev(Queue.end())) std::swap(*Best, Queue.back()); Queue.pop_back(); return V; @@ -133,7 +134,7 @@ SUnit *LatencyPriorityQueue::pop() { void LatencyPriorityQueue::remove(SUnit *SU) { assert(!Queue.empty() && "Queue is empty!"); std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), SU); - if (I != prior(Queue.end())) + if (I != std::prev(Queue.end())) std::swap(*I, Queue.back()); Queue.pop_back(); } diff --git a/contrib/llvm/lib/CodeGen/LexicalScopes.cpp b/contrib/llvm/lib/CodeGen/LexicalScopes.cpp index ffe407a..d12c234 100644 --- a/contrib/llvm/lib/CodeGen/LexicalScopes.cpp +++ b/contrib/llvm/lib/CodeGen/LexicalScopes.cpp @@ -14,34 +14,31 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "lexicalscopes" #include "llvm/CodeGen/LexicalScopes.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" -#include "llvm/DebugInfo.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/Function.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" using namespace llvm; -LexicalScopes::~LexicalScopes() { - releaseMemory(); -} +#define DEBUG_TYPE "lexicalscopes" -/// releaseMemory - release memory. -void LexicalScopes::releaseMemory() { - MF = NULL; - CurrentFnLexicalScope = NULL; - DeleteContainerSeconds(LexicalScopeMap); - DeleteContainerSeconds(AbstractScopeMap); +/// reset - Reset the instance so that it's prepared for another function. +void LexicalScopes::reset() { + MF = nullptr; + CurrentFnLexicalScope = nullptr; + LexicalScopeMap.clear(); + AbstractScopeMap.clear(); InlinedLexicalScopeMap.clear(); AbstractScopesList.clear(); } /// initialize - Scan machine function and constuct lexical scope nest. void LexicalScopes::initialize(const MachineFunction &Fn) { - releaseMemory(); + reset(); MF = &Fn; SmallVector<InsnRange, 4> MIRanges; DenseMap<const MachineInstr *, LexicalScope *> MI2ScopeMap; @@ -54,35 +51,31 @@ void LexicalScopes::initialize(const MachineFunction &Fn) { /// extractLexicalScopes - Extract instruction ranges for each lexical scopes /// for the given machine function. -void LexicalScopes:: -extractLexicalScopes(SmallVectorImpl<InsnRange> &MIRanges, - DenseMap<const MachineInstr *, LexicalScope *> &MI2ScopeMap) { +void LexicalScopes::extractLexicalScopes( + SmallVectorImpl<InsnRange> &MIRanges, + DenseMap<const MachineInstr *, LexicalScope *> &MI2ScopeMap) { // Scan each instruction and create scopes. First build working set of scopes. - for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); - I != E; ++I) { - const MachineInstr *RangeBeginMI = NULL; - const MachineInstr *PrevMI = NULL; + for (const auto &MBB : *MF) { + const MachineInstr *RangeBeginMI = nullptr; + const MachineInstr *PrevMI = nullptr; DebugLoc PrevDL; - for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); - II != IE; ++II) { - const MachineInstr *MInsn = II; - + for (const auto &MInsn : MBB) { // Check if instruction has valid location information. - const DebugLoc MIDL = MInsn->getDebugLoc(); + const DebugLoc MIDL = MInsn.getDebugLoc(); if (MIDL.isUnknown()) { - PrevMI = MInsn; + PrevMI = &MInsn; continue; } // If scope has not changed then skip this instruction. if (MIDL == PrevDL) { - PrevMI = MInsn; + PrevMI = &MInsn; continue; } // Ignore DBG_VALUE. It does not contribute to any instruction in output. - if (MInsn->isDebugValue()) + if (MInsn.isDebugValue()) continue; if (RangeBeginMI) { @@ -95,10 +88,10 @@ extractLexicalScopes(SmallVectorImpl<InsnRange> &MIRanges, } // This is a beginning of a new instruction range. - RangeBeginMI = MInsn; + RangeBeginMI = &MInsn; // Reset previous markers. - PrevMI = MInsn; + PrevMI = &MInsn; PrevDL = MIDL; } @@ -111,30 +104,41 @@ extractLexicalScopes(SmallVectorImpl<InsnRange> &MIRanges, } } +LexicalScope *LexicalScopes::findInlinedScope(DebugLoc DL) { + MDNode *Scope = nullptr; + MDNode *IA = nullptr; + DL.getScopeAndInlinedAt(Scope, IA, MF->getFunction()->getContext()); + auto I = InlinedLexicalScopeMap.find(std::make_pair(Scope, IA)); + return I != InlinedLexicalScopeMap.end() ? &I->second : nullptr; +} + /// findLexicalScope - Find lexical scope, either regular or inlined, for the /// given DebugLoc. Return NULL if not found. LexicalScope *LexicalScopes::findLexicalScope(DebugLoc DL) { - MDNode *Scope = NULL; - MDNode *IA = NULL; + MDNode *Scope = nullptr; + MDNode *IA = nullptr; DL.getScopeAndInlinedAt(Scope, IA, MF->getFunction()->getContext()); - if (!Scope) return NULL; + if (!Scope) + return nullptr; // The scope that we were created with could have an extra file - which // isn't what we care about in this case. DIDescriptor D = DIDescriptor(Scope); if (D.isLexicalBlockFile()) Scope = DILexicalBlockFile(Scope).getScope(); - - if (IA) - return InlinedLexicalScopeMap.lookup(DebugLoc::getFromDILocation(IA)); - return LexicalScopeMap.lookup(Scope); + + if (IA) { + auto I = InlinedLexicalScopeMap.find(std::make_pair(Scope, IA)); + return I != InlinedLexicalScopeMap.end() ? &I->second : nullptr; + } + return findLexicalScope(Scope); } /// getOrCreateLexicalScope - Find lexical scope for the given DebugLoc. If /// not available then create new lexical scope. LexicalScope *LexicalScopes::getOrCreateLexicalScope(DebugLoc DL) { - MDNode *Scope = NULL; - MDNode *InlinedAt = NULL; + MDNode *Scope = nullptr; + MDNode *InlinedAt = nullptr; DL.getScopeAndInlinedAt(Scope, InlinedAt, MF->getFunction()->getContext()); if (InlinedAt) { @@ -143,7 +147,7 @@ LexicalScope *LexicalScopes::getOrCreateLexicalScope(DebugLoc DL) { // Create an inlined scope for inlined function. return getOrCreateInlinedScope(Scope, InlinedAt); } - + return getOrCreateRegularScope(Scope); } @@ -154,36 +158,49 @@ LexicalScope *LexicalScopes::getOrCreateRegularScope(MDNode *Scope) { Scope = DILexicalBlockFile(Scope).getScope(); D = DIDescriptor(Scope); } - - LexicalScope *WScope = LexicalScopeMap.lookup(Scope); - if (WScope) - return WScope; - LexicalScope *Parent = NULL; + auto I = LexicalScopeMap.find(Scope); + if (I != LexicalScopeMap.end()) + return &I->second; + + LexicalScope *Parent = nullptr; if (D.isLexicalBlock()) Parent = getOrCreateLexicalScope(DebugLoc::getFromDILexicalBlock(Scope)); - WScope = new LexicalScope(Parent, DIDescriptor(Scope), NULL, false); - LexicalScopeMap.insert(std::make_pair(Scope, WScope)); - if (!Parent && DIDescriptor(Scope).isSubprogram() - && DISubprogram(Scope).describes(MF->getFunction())) - CurrentFnLexicalScope = WScope; - - return WScope; + // FIXME: Use forward_as_tuple instead of make_tuple, once MSVC2012 + // compatibility is no longer required. + I = LexicalScopeMap.emplace(std::piecewise_construct, std::make_tuple(Scope), + std::make_tuple(Parent, DIDescriptor(Scope), + nullptr, false)).first; + + if (!Parent && DIDescriptor(Scope).isSubprogram() && + DISubprogram(Scope).describes(MF->getFunction())) + CurrentFnLexicalScope = &I->second; + + return &I->second; } /// getOrCreateInlinedScope - Find or create an inlined lexical scope. -LexicalScope *LexicalScopes::getOrCreateInlinedScope(MDNode *Scope, +LexicalScope *LexicalScopes::getOrCreateInlinedScope(MDNode *ScopeNode, MDNode *InlinedAt) { - LexicalScope *InlinedScope = LexicalScopeMap.lookup(InlinedAt); - if (InlinedScope) - return InlinedScope; - - DebugLoc InlinedLoc = DebugLoc::getFromDILocation(InlinedAt); - InlinedScope = new LexicalScope(getOrCreateLexicalScope(InlinedLoc), - DIDescriptor(Scope), InlinedAt, false); - InlinedLexicalScopeMap[InlinedLoc] = InlinedScope; - LexicalScopeMap[InlinedAt] = InlinedScope; - return InlinedScope; + std::pair<const MDNode*, const MDNode*> P(ScopeNode, InlinedAt); + auto I = InlinedLexicalScopeMap.find(P); + if (I != InlinedLexicalScopeMap.end()) + return &I->second; + + LexicalScope *Parent; + DILexicalBlock Scope(ScopeNode); + if (Scope.isSubprogram()) + Parent = getOrCreateLexicalScope(DebugLoc::getFromDILocation(InlinedAt)); + else + Parent = getOrCreateInlinedScope(Scope.getContext(), InlinedAt); + + // FIXME: Use forward_as_tuple instead of make_tuple, once MSVC2012 + // compatibility is no longer required. + I = InlinedLexicalScopeMap.emplace(std::piecewise_construct, + std::make_tuple(P), + std::make_tuple(Parent, Scope, InlinedAt, + false)).first; + return &I->second; } /// getOrCreateAbstractScope - Find or create an abstract lexical scope. @@ -193,26 +210,28 @@ LexicalScope *LexicalScopes::getOrCreateAbstractScope(const MDNode *N) { DIDescriptor Scope(N); if (Scope.isLexicalBlockFile()) Scope = DILexicalBlockFile(Scope).getScope(); - LexicalScope *AScope = AbstractScopeMap.lookup(N); - if (AScope) - return AScope; + auto I = AbstractScopeMap.find(Scope); + if (I != AbstractScopeMap.end()) + return &I->second; - LexicalScope *Parent = NULL; + LexicalScope *Parent = nullptr; if (Scope.isLexicalBlock()) { - DILexicalBlock DB(N); + DILexicalBlock DB(Scope); DIDescriptor ParentDesc = DB.getContext(); Parent = getOrCreateAbstractScope(ParentDesc); } - AScope = new LexicalScope(Parent, DIDescriptor(N), NULL, true); - AbstractScopeMap[N] = AScope; - if (DIDescriptor(N).isSubprogram()) - AbstractScopesList.push_back(AScope); - return AScope; + I = AbstractScopeMap.emplace(std::piecewise_construct, + std::forward_as_tuple(Scope), + std::forward_as_tuple(Parent, Scope, + nullptr, true)).first; + if (Scope.isSubprogram()) + AbstractScopesList.push_back(&I->second); + return &I->second; } /// constructScopeNest void LexicalScopes::constructScopeNest(LexicalScope *Scope) { - assert (Scope && "Unable to calculate scope dominance graph!"); + assert(Scope && "Unable to calculate scope dominance graph!"); SmallVector<LexicalScope *, 4> WorkStack; WorkStack.push_back(Scope); unsigned Counter = 0; @@ -221,7 +240,8 @@ void LexicalScopes::constructScopeNest(LexicalScope *Scope) { const SmallVectorImpl<LexicalScope *> &Children = WS->getChildren(); bool visitedChildren = false; for (SmallVectorImpl<LexicalScope *>::const_iterator SI = Children.begin(), - SE = Children.end(); SI != SE; ++SI) { + SE = Children.end(); + SI != SE; ++SI) { LexicalScope *ChildScope = *SI; if (!ChildScope->getDFSOut()) { WorkStack.push_back(ChildScope); @@ -239,17 +259,17 @@ void LexicalScopes::constructScopeNest(LexicalScope *Scope) { /// assignInstructionRanges - Find ranges of instructions covered by each /// lexical scope. -void LexicalScopes:: -assignInstructionRanges(SmallVectorImpl<InsnRange> &MIRanges, - DenseMap<const MachineInstr *, LexicalScope *> &MI2ScopeMap) -{ - - LexicalScope *PrevLexicalScope = NULL; +void LexicalScopes::assignInstructionRanges( + SmallVectorImpl<InsnRange> &MIRanges, + DenseMap<const MachineInstr *, LexicalScope *> &MI2ScopeMap) { + + LexicalScope *PrevLexicalScope = nullptr; for (SmallVectorImpl<InsnRange>::const_iterator RI = MIRanges.begin(), - RE = MIRanges.end(); RI != RE; ++RI) { + RE = MIRanges.end(); + RI != RE; ++RI) { const InsnRange &R = *RI; LexicalScope *S = MI2ScopeMap.lookup(R.first); - assert (S && "Lost LexicalScope for a machine instruction!"); + assert(S && "Lost LexicalScope for a machine instruction!"); if (PrevLexicalScope && !PrevLexicalScope->dominates(S)) PrevLexicalScope->closeInsnRange(S); S->openInsnRange(R.first); @@ -262,26 +282,25 @@ assignInstructionRanges(SmallVectorImpl<InsnRange> &MIRanges, } /// getMachineBasicBlocks - Populate given set using machine basic blocks which -/// have machine instructions that belong to lexical scope identified by +/// have machine instructions that belong to lexical scope identified by /// DebugLoc. -void LexicalScopes:: -getMachineBasicBlocks(DebugLoc DL, - SmallPtrSet<const MachineBasicBlock*, 4> &MBBs) { +void LexicalScopes::getMachineBasicBlocks( + DebugLoc DL, SmallPtrSet<const MachineBasicBlock *, 4> &MBBs) { MBBs.clear(); LexicalScope *Scope = getOrCreateLexicalScope(DL); if (!Scope) return; - + if (Scope == CurrentFnLexicalScope) { - for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); - I != E; ++I) - MBBs.insert(I); + for (const auto &MBB : *MF) + MBBs.insert(&MBB); return; } SmallVectorImpl<InsnRange> &InsnRanges = Scope->getRanges(); for (SmallVectorImpl<InsnRange>::iterator I = InsnRanges.begin(), - E = InsnRanges.end(); I != E; ++I) { + E = InsnRanges.end(); + I != E; ++I) { InsnRange &R = *I; MBBs.insert(R.first->getParent()); } @@ -299,8 +318,8 @@ bool LexicalScopes::dominates(DebugLoc DL, MachineBasicBlock *MBB) { return true; bool Result = false; - for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); - I != E; ++I) { + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; + ++I) { DebugLoc IDL = I->getDebugLoc(); if (IDL.isUnknown()) continue; @@ -311,8 +330,6 @@ bool LexicalScopes::dominates(DebugLoc DL, MachineBasicBlock *MBB) { return Result; } -void LexicalScope::anchor() { } - /// dump - Print data structures. void LexicalScope::dump(unsigned Indent) const { #ifndef NDEBUG @@ -332,4 +349,3 @@ void LexicalScope::dump(unsigned Indent) const { Children[i]->dump(Indent + 2); #endif } - diff --git a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp index 25645e0..388f58f 100644 --- a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp +++ b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp @@ -19,7 +19,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "livedebug" #include "LiveDebugVariables.h" #include "llvm/ADT/IntervalMap.h" #include "llvm/ADT/Statistic.h" @@ -31,8 +30,8 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/VirtRegMap.h" -#include "llvm/DebugInfo.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Value.h" #include "llvm/Support/CommandLine.h" @@ -41,8 +40,12 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include <memory> + using namespace llvm; +#define DEBUG_TYPE "livedebug" + static cl::opt<bool> EnableLDV("live-debug-variables", cl::init(true), cl::desc("Enable the live debug variables pass"), cl::Hidden); @@ -64,7 +67,7 @@ void LiveDebugVariables::getAnalysisUsage(AnalysisUsage &AU) const { MachineFunctionPass::getAnalysisUsage(AU); } -LiveDebugVariables::LiveDebugVariables() : MachineFunctionPass(ID), pImpl(0) { +LiveDebugVariables::LiveDebugVariables() : MachineFunctionPass(ID), pImpl(nullptr) { initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry()); } @@ -72,7 +75,7 @@ LiveDebugVariables::LiveDebugVariables() : MachineFunctionPass(ID), pImpl(0) { typedef IntervalMap<SlotIndex, unsigned, 4> LocMap; namespace { -/// UserValueScopes - Keeps track of lexical scopes associated with an +/// UserValueScopes - Keeps track of lexical scopes associated with a /// user value's source location. class UserValueScopes { DebugLoc DL; @@ -139,7 +142,7 @@ public: UserValue(const MDNode *var, unsigned o, bool i, DebugLoc L, LocMap::Allocator &alloc) : variable(var), offset(o), IsIndirect(i), dl(L), leader(this), - next(0), locInts(alloc) + next(nullptr), locInts(alloc) {} /// getLeader - Get the leader of this value's equivalence class. @@ -154,8 +157,8 @@ public: UserValue *getNext() const { return next; } /// match - Does this UserValue match the parameters? - bool match(const MDNode *Var, unsigned Offset) const { - return Var == variable && Offset == offset; + bool match(const MDNode *Var, unsigned Offset, bool indirect) const { + return Var == variable && Offset == offset && indirect == IsIndirect; } /// merge - Merge equivalence classes. @@ -292,7 +295,7 @@ class LDVImpl { bool ModifiedMF; /// userValues - All allocated UserValue instances. - SmallVector<UserValue*, 8> userValues; + SmallVector<std::unique_ptr<UserValue>, 8> userValues; /// Map virtual register to eq class leader. typedef DenseMap<unsigned, UserValue*> VRMap; @@ -332,7 +335,6 @@ public: /// clear - Release all memory. void clear() { - DeleteContainerPointers(userValues); userValues.clear(); virtRegToEqClass.clear(); userVarMap.clear(); @@ -425,12 +427,13 @@ UserValue *LDVImpl::getUserValue(const MDNode *Var, unsigned Offset, UserValue *UV = Leader->getLeader(); Leader = UV; for (; UV; UV = UV->getNext()) - if (UV->match(Var, Offset)) + if (UV->match(Var, Offset, IsIndirect)) return UV; } - UserValue *UV = new UserValue(Var, Offset, IsIndirect, DL, allocator); - userValues.push_back(UV); + userValues.push_back( + make_unique<UserValue>(Var, Offset, IsIndirect, DL, allocator)); + UserValue *UV = userValues.back().get(); Leader = UserValue::merge(Leader, UV); return UV; } @@ -444,7 +447,7 @@ void LDVImpl::mapVirtReg(unsigned VirtReg, UserValue *EC) { UserValue *LDVImpl::lookupVirtReg(unsigned VirtReg) { if (UserValue *UV = virtRegToEqClass.lookup(VirtReg)) return UV->getLeader(); - return 0; + return nullptr; } bool LDVImpl::handleDebugValue(MachineInstr *MI, SlotIndex Idx) { @@ -480,7 +483,7 @@ bool LDVImpl::collectDebugValues(MachineFunction &mf) { // DBG_VALUE has no slot index, use the previous instruction instead. SlotIndex Idx = MBBI == MBB->begin() ? LIS->getMBBStartIdx(MBB) : - LIS->getInstructionIndex(llvm::prior(MBBI)).getRegSlot(); + LIS->getInstructionIndex(std::prev(MBBI)).getRegSlot(); // Handle consecutive DBG_VALUE instructions with the same slot index. do { if (handleDebugValue(MBBI, Idx)) { @@ -568,13 +571,11 @@ UserValue::addDefsFromCopies(LiveInterval *LI, unsigned LocNo, // Collect all the (vreg, valno) pairs that are copies of LI. SmallVector<std::pair<LiveInterval*, const VNInfo*>, 8> CopyValues; - for (MachineRegisterInfo::use_nodbg_iterator - UI = MRI.use_nodbg_begin(LI->reg), - UE = MRI.use_nodbg_end(); UI != UE; ++UI) { + for (MachineOperand &MO : MRI.use_nodbg_operands(LI->reg)) { + MachineInstr *MI = MO.getParent(); // Copies of the full value. - if (UI.getOperand().getSubReg() || !UI->isCopy()) + if (MO.getSubReg() || !MI->isCopy()) continue; - MachineInstr *MI = &*UI; unsigned DstReg = MI->getOperand(0).getReg(); // Don't follow copies to physregs. These are usually setting up call @@ -648,14 +649,14 @@ UserValue::computeIntervals(MachineRegisterInfo &MRI, const MachineOperand &Loc = locations[LocNo]; if (!Loc.isReg()) { - extendDef(Idx, LocNo, 0, 0, 0, LIS, MDT, UVS); + extendDef(Idx, LocNo, nullptr, nullptr, nullptr, LIS, MDT, UVS); continue; } // Register locations are constrained to where the register value is live. if (TargetRegisterInfo::isVirtualRegister(Loc.getReg())) { - LiveInterval *LI = 0; - const VNInfo *VNI = 0; + LiveInterval *LI = nullptr; + const VNInfo *VNI = nullptr; if (LIS.hasInterval(Loc.getReg())) { LI = &LIS.getInterval(Loc.getReg()); VNI = LI->getVNInfoAt(Idx); @@ -672,7 +673,7 @@ UserValue::computeIntervals(MachineRegisterInfo &MRI, LiveRange *LR = &LIS.getRegUnit(Unit); const VNInfo *VNI = LR->getVNInfoAt(Idx); // Don't track copies from physregs, it is too expensive. - extendDef(Idx, LocNo, LR, VNI, 0, LIS, MDT, UVS); + extendDef(Idx, LocNo, LR, VNI, nullptr, LIS, MDT, UVS); } // Finally, erase all the undefs. @@ -704,7 +705,6 @@ bool LDVImpl::runOnMachineFunction(MachineFunction &mf) { bool Changed = collectDebugValues(mf); computeIntervals(); DEBUG(print(dbgs())); - LS.releaseMemory(); ModifiedMF = Changed; return Changed; } @@ -736,7 +736,7 @@ UserValue::splitLocation(unsigned OldLocNo, ArrayRef<unsigned> NewRegs, LiveIntervals& LIS) { DEBUG({ dbgs() << "Splitting Loc" << OldLocNo << '\t'; - print(dbgs(), 0); + print(dbgs(), nullptr); }); bool DidChange = false; LocMap::iterator LocMapI; @@ -826,7 +826,7 @@ UserValue::splitLocation(unsigned OldLocNo, ArrayRef<unsigned> NewRegs, } } - DEBUG({dbgs() << "Split result: \t"; print(dbgs(), 0);}); + DEBUG({dbgs() << "Split result: \t"; print(dbgs(), nullptr);}); return DidChange; } @@ -915,7 +915,7 @@ findInsertLocation(MachineBasicBlock *MBB, SlotIndex Idx, // Don't insert anything after the first terminator, though. return MI->isTerminator() ? MBB->getFirstTerminator() : - llvm::next(MachineBasicBlock::iterator(MI)); + std::next(MachineBasicBlock::iterator(MI)); } DebugLoc UserValue::findDebugLoc() { diff --git a/contrib/llvm/lib/CodeGen/LiveDebugVariables.h b/contrib/llvm/lib/CodeGen/LiveDebugVariables.h index 58a3f0f..bb67435 100644 --- a/contrib/llvm/lib/CodeGen/LiveDebugVariables.h +++ b/contrib/llvm/lib/CodeGen/LiveDebugVariables.h @@ -61,9 +61,9 @@ public: private: - virtual bool runOnMachineFunction(MachineFunction &); - virtual void releaseMemory(); - virtual void getAnalysisUsage(AnalysisUsage &) const; + bool runOnMachineFunction(MachineFunction &) override; + void releaseMemory() override; + void getAnalysisUsage(AnalysisUsage &) const override; }; diff --git a/contrib/llvm/lib/CodeGen/LiveInterval.cpp b/contrib/llvm/lib/CodeGen/LiveInterval.cpp index 2b8feb8..ce8ce96 100644 --- a/contrib/llvm/lib/CodeGen/LiveInterval.cpp +++ b/contrib/llvm/lib/CodeGen/LiveInterval.cpp @@ -222,13 +222,13 @@ void LiveRange::extendSegmentEndTo(iterator I, SlotIndex NewEnd) { VNInfo *ValNo = I->valno; // Search for the first segment that we can't merge with. - iterator MergeTo = llvm::next(I); + iterator MergeTo = std::next(I); for (; MergeTo != end() && NewEnd >= MergeTo->end; ++MergeTo) { assert(MergeTo->valno == ValNo && "Cannot merge with differing values!"); } // If NewEnd was in the middle of a segment, make sure to get its endpoint. - I->end = std::max(NewEnd, prior(MergeTo)->end); + I->end = std::max(NewEnd, std::prev(MergeTo)->end); // If the newly formed segment now touches the segment after it and if they // have the same value number, merge the two segments into one segment. @@ -239,7 +239,7 @@ void LiveRange::extendSegmentEndTo(iterator I, SlotIndex NewEnd) { } // Erase any dead segments. - segments.erase(llvm::next(I), MergeTo); + segments.erase(std::next(I), MergeTo); } @@ -274,7 +274,7 @@ LiveRange::extendSegmentStartTo(iterator I, SlotIndex NewStart) { MergeTo->end = I->end; } - segments.erase(llvm::next(MergeTo), llvm::next(I)); + segments.erase(std::next(MergeTo), std::next(I)); return MergeTo; } @@ -285,7 +285,7 @@ LiveRange::iterator LiveRange::addSegmentFrom(Segment S, iterator From) { // If the inserted segment starts in the middle or right at the end of // another segment, just extend that segment to contain the segment of S. if (it != begin()) { - iterator B = prior(it); + iterator B = std::prev(it); if (S.valno == B->valno) { if (B->start <= Start && B->end >= Start) { extendSegmentEndTo(B, End); @@ -331,13 +331,13 @@ LiveRange::iterator LiveRange::addSegmentFrom(Segment S, iterator From) { /// the value. If there is no live range before Kill, return NULL. VNInfo *LiveRange::extendInBlock(SlotIndex StartIdx, SlotIndex Kill) { if (empty()) - return 0; + return nullptr; iterator I = std::upper_bound(begin(), end(), Kill.getPrevSlot()); if (I == begin()) - return 0; + return nullptr; --I; if (I->end <= StartIdx) - return 0; + return nullptr; if (I->end < Kill) extendSegmentEndTo(I, Kill); return I->valno; @@ -389,7 +389,7 @@ void LiveRange::removeSegment(SlotIndex Start, SlotIndex End, I->end = Start; // Trim the old segment. // Insert the new one. - segments.insert(llvm::next(I), Segment(End, OldEnd, ValNo)); + segments.insert(std::next(I), Segment(End, OldEnd, ValNo)); } /// removeValNo - Remove all the segments defined by the specified value#. @@ -433,9 +433,9 @@ void LiveRange::join(LiveRange &Other, iterator OutIt = begin(); OutIt->valno = NewVNInfo[LHSValNoAssignments[OutIt->valno->id]]; - for (iterator I = llvm::next(OutIt), E = end(); I != E; ++I) { + for (iterator I = std::next(OutIt), E = end(); I != E; ++I) { VNInfo* nextValNo = NewVNInfo[LHSValNoAssignments[I->valno->id]]; - assert(nextValNo != 0 && "Huh?"); + assert(nextValNo && "Huh?"); // If this live range has the same value # as its immediate predecessor, // and if they are neighbors, remove one Segment. This happens when we @@ -638,13 +638,13 @@ void LiveRange::verify() const { assert(I->start.isValid()); assert(I->end.isValid()); assert(I->start < I->end); - assert(I->valno != 0); + assert(I->valno != nullptr); assert(I->valno->id < valnos.size()); assert(I->valno == valnos[I->valno->id]); - if (llvm::next(I) != E) { - assert(I->end <= llvm::next(I)->start); - if (I->end == llvm::next(I)->start) - assert(I->valno != llvm::next(I)->valno); + if (std::next(I) != E) { + assert(I->end <= std::next(I)->start); + if (I->end == std::next(I)->start) + assert(I->valno != std::next(I)->valno); } } } @@ -857,7 +857,7 @@ unsigned ConnectedVNInfoEqClasses::Classify(const LiveInterval *LI) { EqClass.clear(); EqClass.grow(LI->getNumValNums()); - const VNInfo *used = 0, *unused = 0; + const VNInfo *used = nullptr, *unused = nullptr; // Determine connections. for (LiveInterval::const_vni_iterator I = LI->vni_begin(), E = LI->vni_end(); @@ -905,8 +905,8 @@ void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[], // Rewrite instructions. for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(LI.reg), RE = MRI.reg_end(); RI != RE;) { - MachineOperand &MO = RI.getOperand(); - MachineInstr *MI = MO.getParent(); + MachineOperand &MO = *RI; + MachineInstr *MI = RI->getParent(); ++RI; // DBG_VALUE instructions don't have slot indexes, so get the index of the // instruction before them. diff --git a/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp index e1c3217..1559560 100644 --- a/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -15,13 +15,13 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "regalloc" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "LiveRangeCalc.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -41,6 +41,8 @@ #include <limits> using namespace llvm; +#define DEBUG_TYPE "regalloc" + char LiveIntervals::ID = 0; char &llvm::LiveIntervalsID = LiveIntervals::ID; INITIALIZE_PASS_BEGIN(LiveIntervals, "liveintervals", @@ -78,7 +80,7 @@ void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const { } LiveIntervals::LiveIntervals() : MachineFunctionPass(ID), - DomTree(0), LRCalc(0) { + DomTree(nullptr), LRCalc(nullptr) { initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); } @@ -184,6 +186,7 @@ void LiveIntervals::computeVirtRegInterval(LiveInterval &LI) { LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator()); LRCalc->createDeadDefs(LI); LRCalc->extendToUses(LI); + computeDeadValues(&LI, LI, nullptr, nullptr); } void LiveIntervals::computeVirtRegs() { @@ -325,8 +328,10 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, SmallPtrSet<MachineBasicBlock*, 16> LiveOut; // Visit all instructions reading li->reg. - for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(li->reg); - MachineInstr *UseMI = I.skipInstruction();) { + for (MachineRegisterInfo::reg_instr_iterator + I = MRI->reg_instr_begin(li->reg), E = MRI->reg_instr_end(); + I != E; ) { + MachineInstr *UseMI = &*(I++); if (UseMI->isDebugValue() || !UseMI->readsVirtualRegister(li->reg)) continue; SlotIndex Idx = getInstructionIndex(UseMI).getRegSlot(); @@ -408,21 +413,34 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, // Handle dead values. bool CanSeparate = false; + computeDeadValues(li, NewLR, &CanSeparate, dead); + + // Move the trimmed segments back. + li->segments.swap(NewLR.segments); + DEBUG(dbgs() << "Shrunk: " << *li << '\n'); + return CanSeparate; +} + +void LiveIntervals::computeDeadValues(LiveInterval *li, + LiveRange &LR, + bool *CanSeparate, + SmallVectorImpl<MachineInstr*> *dead) { for (LiveInterval::vni_iterator I = li->vni_begin(), E = li->vni_end(); I != E; ++I) { VNInfo *VNI = *I; if (VNI->isUnused()) continue; - LiveRange::iterator LRI = NewLR.FindSegmentContaining(VNI->def); - assert(LRI != NewLR.end() && "Missing segment for PHI"); + LiveRange::iterator LRI = LR.FindSegmentContaining(VNI->def); + assert(LRI != LR.end() && "Missing segment for PHI"); if (LRI->end != VNI->def.getDeadSlot()) continue; if (VNI->isPHIDef()) { // This is a dead PHI. Remove it. VNI->markUnused(); - NewLR.removeSegment(LRI->start, LRI->end); + LR.removeSegment(LRI->start, LRI->end); DEBUG(dbgs() << "Dead PHI at " << VNI->def << " may separate interval\n"); - CanSeparate = true; + if (CanSeparate) + *CanSeparate = true; } else { // This is a dead def. Make sure the instruction knows. MachineInstr *MI = getInstructionFromIndex(VNI->def); @@ -434,11 +452,6 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, } } } - - // Move the trimmed segments back. - li->segments.swap(NewLR.segments); - DEBUG(dbgs() << "Shrunk: " << *li << '\n'); - return CanSeparate; } void LiveIntervals::extendToIndices(LiveRange &LR, @@ -458,7 +471,7 @@ void LiveIntervals::pruneValue(LiveInterval *LI, SlotIndex Kill, MachineBasicBlock *KillMBB = Indexes->getMBBFromIndex(Kill); SlotIndex MBBStart, MBBEnd; - tie(MBBStart, MBBEnd) = Indexes->getMBBRange(KillMBB); + std::tie(MBBStart, MBBEnd) = Indexes->getMBBRange(KillMBB); // If VNI isn't live out from KillMBB, the value is trivially pruned. if (LRQ.endPoint() < MBBEnd) { @@ -485,7 +498,7 @@ void LiveIntervals::pruneValue(LiveInterval *LI, SlotIndex Kill, MachineBasicBlock *MBB = *I; // Check if VNI is live in to MBB. - tie(MBBStart, MBBEnd) = Indexes->getMBBRange(MBB); + std::tie(MBBStart, MBBEnd) = Indexes->getMBBRange(MBB); LiveQueryResult LRQ = LI->Query(MBBStart); if (LRQ.valueIn() != VNI) { // This block isn't part of the VNI segment. Prune the search. @@ -569,9 +582,9 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) { break; } if (CancelKill) - MI->clearRegisterKills(Reg, NULL); + MI->clearRegisterKills(Reg, nullptr); else - MI->addRegisterKilled(Reg, NULL); + MI->addRegisterKilled(Reg, nullptr); } } } @@ -587,17 +600,17 @@ LiveIntervals::intervalIsInOneMBB(const LiveInterval &LI) const { SlotIndex Start = LI.beginIndex(); if (Start.isBlock()) - return NULL; + return nullptr; SlotIndex Stop = LI.endIndex(); if (Stop.isBlock()) - return NULL; + return nullptr; // getMBBFromIndex doesn't need to search the MBB table when both indexes // belong to proper instructions. MachineBasicBlock *MBB1 = Indexes->getMBBFromIndex(Start); MachineBasicBlock *MBB2 = Indexes->getMBBFromIndex(Stop); - return MBB1 == MBB2 ? MBB1 : NULL; + return MBB1 == MBB2 ? MBB1 : nullptr; } bool @@ -620,9 +633,12 @@ LiveIntervals::hasPHIKill(const LiveInterval &LI, const VNInfo *VNI) const { } float -LiveIntervals::getSpillWeight(bool isDef, bool isUse, BlockFrequency freq) { - const float Scale = 1.0f / BlockFrequency::getEntryFrequency(); - return (isDef + isUse) * (freq.getFrequency() * Scale); +LiveIntervals::getSpillWeight(bool isDef, bool isUse, + const MachineBlockFrequencyInfo *MBFI, + const MachineInstr *MI) { + BlockFrequency Freq = MBFI->getBlockFreq(MI->getParent()); + const float Scale = 1.0f / MBFI->getEntryFreq(); + return (isDef + isUse) * (Freq.getFrequency() * Scale); } LiveRange::Segment @@ -870,8 +886,8 @@ private: // values. The new range should be placed immediately before NewI, move any // intermediate ranges up. assert(NewI != I && "Inconsistent iterators"); - std::copy(llvm::next(I), NewI, I); - *llvm::prior(NewI) + std::copy(std::next(I), NewI, I); + *std::prev(NewI) = LiveRange::Segment(DefVNI->def, NewIdx.getDeadSlot(), DefVNI); } @@ -916,7 +932,7 @@ private: if (I == E || !SlotIndex::isSameInstr(I->start, OldIdx)) { // No def, search for the new kill. // This can never be an early clobber kill since there is no def. - llvm::prior(I)->end = findLastUseBefore(Reg).getRegSlot(); + std::prev(I)->end = findLastUseBefore(Reg).getRegSlot(); return; } } @@ -952,7 +968,7 @@ private: // DefVNI is a dead def. It may have been moved across other values in LR, // so move I up to NewI. Slide [NewI;I) down one position. - std::copy_backward(NewI, I, llvm::next(I)); + std::copy_backward(NewI, I, std::next(I)); *NewI = LiveRange::Segment(DefVNI->def, NewIdx.getDeadSlot(), DefVNI); } @@ -964,11 +980,11 @@ private: "No RegMask at OldIdx."); *RI = NewIdx.getRegSlot(); assert((RI == LIS.RegMaskSlots.begin() || - SlotIndex::isEarlierInstr(*llvm::prior(RI), *RI)) && - "Cannot move regmask instruction above another call"); - assert((llvm::next(RI) == LIS.RegMaskSlots.end() || - SlotIndex::isEarlierInstr(*RI, *llvm::next(RI))) && - "Cannot move regmask instruction below another call"); + SlotIndex::isEarlierInstr(*std::prev(RI), *RI)) && + "Cannot move regmask instruction above another call"); + assert((std::next(RI) == LIS.RegMaskSlots.end() || + SlotIndex::isEarlierInstr(*RI, *std::next(RI))) && + "Cannot move regmask instruction below another call"); } // Return the last use of reg between NewIdx and OldIdx. @@ -976,10 +992,10 @@ private: if (TargetRegisterInfo::isVirtualRegister(Reg)) { SlotIndex LastUse = NewIdx; - for (MachineRegisterInfo::use_nodbg_iterator - UI = MRI.use_nodbg_begin(Reg), - UE = MRI.use_nodbg_end(); - UI != UE; UI.skipInstruction()) { + for (MachineRegisterInfo::use_instr_nodbg_iterator + UI = MRI.use_instr_nodbg_begin(Reg), + UE = MRI.use_instr_nodbg_end(); + UI != UE; ++UI) { const MachineInstr* MI = &*UI; SlotIndex InstSlot = LIS.getSlotIndexes()->getInstructionIndex(MI); if (InstSlot > LastUse && InstSlot < OldIdx) @@ -1121,7 +1137,7 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB, if (LII->end.isDead()) { SlotIndex prevStart; if (LII != LI.begin()) - prevStart = llvm::prior(LII)->start; + prevStart = std::prev(LII)->start; // FIXME: This could be more efficient if there was a // removeSegment method that returned an iterator. diff --git a/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp b/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp index d5a81a3..d81221b 100644 --- a/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp +++ b/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp @@ -13,7 +13,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "regalloc" #include "llvm/CodeGen/LiveIntervalUnion.h" #include "llvm/ADT/SparseBitVector.h" #include "llvm/Support/Debug.h" @@ -23,6 +22,8 @@ using namespace llvm; +#define DEBUG_TYPE "regalloc" + // Merge a LiveInterval's segments. Guarantee no overlaps. void LiveIntervalUnion::unify(LiveInterval &VirtReg) { @@ -138,7 +139,7 @@ collectInterferingVRegs(unsigned MaxInterferingRegs) { } LiveInterval::iterator VirtRegEnd = VirtReg->end(); - LiveInterval *RecentReg = 0; + LiveInterval *RecentReg = nullptr; while (LiveUnionI.valid()) { assert(VirtRegI != VirtRegEnd && "Reached end of VirtReg"); @@ -200,5 +201,5 @@ void LiveIntervalUnion::Array::clear() { LIUs[i].~LiveIntervalUnion(); free(LIUs); Size = 0; - LIUs = 0; + LIUs = nullptr; } diff --git a/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp b/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp new file mode 100644 index 0000000..7efd941 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp @@ -0,0 +1,114 @@ +//===--- LivePhysRegs.cpp - Live Physical Register Set --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the LivePhysRegs utility for tracking liveness of +// physical registers across machine instructions in forward or backward order. +// A more detailed description can be found in the corresponding header file. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/CodeGen/MachineInstrBundle.h" +#include "llvm/Support/Debug.h" +using namespace llvm; + + +/// \brief Remove all registers from the set that get clobbered by the register +/// mask. +void LivePhysRegs::removeRegsInMask(const MachineOperand &MO) { + SparseSet<unsigned>::iterator LRI = LiveRegs.begin(); + while (LRI != LiveRegs.end()) { + if (MO.clobbersPhysReg(*LRI)) + LRI = LiveRegs.erase(LRI); + else + ++LRI; + } +} + +/// Simulates liveness when stepping backwards over an instruction(bundle): +/// Remove Defs, add uses. This is the recommended way of calculating liveness. +void LivePhysRegs::stepBackward(const MachineInstr &MI) { + // Remove defined registers and regmask kills from the set. + for (ConstMIBundleOperands O(&MI); O.isValid(); ++O) { + if (O->isReg()) { + if (!O->isDef()) + continue; + unsigned Reg = O->getReg(); + if (Reg == 0) + continue; + removeReg(Reg); + } else if (O->isRegMask()) + removeRegsInMask(*O); + } + + // Add uses to the set. + for (ConstMIBundleOperands O(&MI); O.isValid(); ++O) { + if (!O->isReg() || !O->readsReg() || O->isUndef()) + continue; + unsigned Reg = O->getReg(); + if (Reg == 0) + continue; + addReg(Reg); + } +} + +/// Simulates liveness when stepping forward over an instruction(bundle): Remove +/// killed-uses, add defs. This is the not recommended way, because it depends +/// on accurate kill flags. If possible use stepBackwards() instead of this +/// function. +void LivePhysRegs::stepForward(const MachineInstr &MI) { + SmallVector<unsigned, 4> Defs; + // Remove killed registers from the set. + for (ConstMIBundleOperands O(&MI); O.isValid(); ++O) { + if (O->isReg()) { + unsigned Reg = O->getReg(); + if (Reg == 0) + continue; + if (O->isDef()) { + if (!O->isDead()) + Defs.push_back(Reg); + } else { + if (!O->isKill()) + continue; + assert(O->isUse()); + removeReg(Reg); + } + } else if (O->isRegMask()) + removeRegsInMask(*O); + } + + // Add defs to the set. + for (unsigned i = 0, e = Defs.size(); i != e; ++i) + addReg(Defs[i]); +} + +/// Prin the currently live registers to OS. +void LivePhysRegs::print(raw_ostream &OS) const { + OS << "Live Registers:"; + if (!TRI) { + OS << " (uninitialized)\n"; + return; + } + + if (empty()) { + OS << " (empty)\n"; + return; + } + + for (const_iterator I = begin(), E = end(); I != E; ++I) + OS << " " << PrintReg(*I, TRI); + OS << "\n"; +} + +/// Dumps the currently live registers to the debug output. +void LivePhysRegs::dump() const { +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + dbgs() << " " << *this; +#endif +} diff --git a/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp b/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp index ae086bc..a558e14 100644 --- a/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp +++ b/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp @@ -11,13 +11,14 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "regalloc" #include "LiveRangeCalc.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineRegisterInfo.h" using namespace llvm; +#define DEBUG_TYPE "regalloc" + void LiveRangeCalc::reset(const MachineFunction *mf, SlotIndexes *SI, MachineDominatorTree *MDT, @@ -41,9 +42,8 @@ void LiveRangeCalc::createDeadDefs(LiveRange &LR, unsigned Reg) { // Visit all def operands. If the same instruction has multiple defs of Reg, // LR.createDeadDef() will deduplicate. - for (MachineRegisterInfo::def_iterator - I = MRI->def_begin(Reg), E = MRI->def_end(); I != E; ++I) { - const MachineInstr *MI = &*I; + for (MachineOperand &MO : MRI->def_operands(Reg)) { + const MachineInstr *MI = MO.getParent(); // Find the corresponding slot index. SlotIndex Idx; if (MI->isPHI()) @@ -52,7 +52,7 @@ void LiveRangeCalc::createDeadDefs(LiveRange &LR, unsigned Reg) { else // Instructions are either normal 'r', or early clobber 'e'. Idx = Indexes->getInstructionIndex(MI) - .getRegSlot(I.getOperand().isEarlyClobber()); + .getRegSlot(MO.isEarlyClobber()); // Create the def in LR. This may find an existing def. LR.createDeadDef(Idx, *Alloc); @@ -64,9 +64,7 @@ void LiveRangeCalc::extendToUses(LiveRange &LR, unsigned Reg) { assert(MRI && Indexes && "call reset() first"); // Visit all operands that read Reg. This may include partial defs. - for (MachineRegisterInfo::reg_nodbg_iterator I = MRI->reg_nodbg_begin(Reg), - E = MRI->reg_nodbg_end(); I != E; ++I) { - MachineOperand &MO = I.getOperand(); + for (MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) { // Clear all kill flags. They will be reinserted after register allocation // by LiveIntervalAnalysis::addKillFlags(). if (MO.isUse()) @@ -75,7 +73,8 @@ void LiveRangeCalc::extendToUses(LiveRange &LR, unsigned Reg) { continue; // MI is reading Reg. We may have visited MI before if it happens to be // reading Reg multiple times. That is OK, extend() is idempotent. - const MachineInstr *MI = &*I; + const MachineInstr *MI = MO.getParent(); + unsigned OpNo = (&MO - &MI->getOperand(0)); // Find the SlotIndex being read. SlotIndex Idx; @@ -83,7 +82,7 @@ void LiveRangeCalc::extendToUses(LiveRange &LR, unsigned Reg) { assert(!MO.isDef() && "Cannot handle PHI def of partial register."); // PHI operands are paired: (Reg, PredMBB). // Extend the live range to be live-out from PredMBB. - Idx = Indexes->getMBBEndIdx(MI->getOperand(I.getOperandNo()+1).getMBB()); + Idx = Indexes->getMBBEndIdx(MI->getOperand(OpNo+1).getMBB()); } else { // This is a normal instruction. Idx = Indexes->getInstructionIndex(MI).getRegSlot(); @@ -92,7 +91,7 @@ void LiveRangeCalc::extendToUses(LiveRange &LR, unsigned Reg) { if (MO.isDef()) { if (MO.isEarlyClobber()) Idx = Idx.getRegSlot(true); - } else if (MI->isRegTiedToDefOperand(I.getOperandNo(), &DefIdx)) { + } else if (MI->isRegTiedToDefOperand(OpNo, &DefIdx)) { // FIXME: This would be a lot easier if tied early-clobber uses also // had an early-clobber flag. if (MI->getOperand(DefIdx).isEarlyClobber()) @@ -114,7 +113,7 @@ void LiveRangeCalc::updateLiveIns() { MachineBasicBlock *MBB = I->DomNode->getBlock(); assert(I->Value && "No live-in value found"); SlotIndex Start, End; - tie(Start, End) = Indexes->getMBBRange(MBB); + std::tie(Start, End) = Indexes->getMBBRange(MBB); if (I->Kill.isValid()) // Value is killed inside this block. @@ -123,7 +122,7 @@ void LiveRangeCalc::updateLiveIns() { // The value is live-through, update LiveOut as well. // Defer the Domtree lookup until it is needed. assert(Seen.test(MBB->getNumber())); - LiveOut[MBB] = LiveOutPair(I->Value, (MachineDomTreeNode *)0); + LiveOut[MBB] = LiveOutPair(I->Value, (MachineDomTreeNode *)nullptr); } Updater.setDest(&I->LR); Updater.add(Start, End, I->Value); @@ -176,7 +175,7 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &KillMBB, // Remember if we have seen more than one value. bool UniqueVNI = true; - VNInfo *TheVNI = 0; + VNInfo *TheVNI = nullptr; // Using Seen as a visited set, perform a BFS for all reaching defs. for (unsigned i = 0; i != WorkList.size(); ++i) { @@ -212,7 +211,7 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &KillMBB, } SlotIndex Start, End; - tie(Start, End) = Indexes->getMBBRange(Pred); + std::tie(Start, End) = Indexes->getMBBRange(Pred); // First time we see Pred. Try to determine the live-out value, but set // it as null if Pred is live-through with an unknown value. @@ -247,13 +246,13 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &KillMBB, for (SmallVectorImpl<unsigned>::const_iterator I = WorkList.begin(), E = WorkList.end(); I != E; ++I) { SlotIndex Start, End; - tie(Start, End) = Indexes->getMBBRange(*I); + std::tie(Start, End) = Indexes->getMBBRange(*I); // Trim the live range in KillMBB. if (*I == KillMBBNum && Kill.isValid()) End = Kill; else LiveOut[MF->getBlockNumbered(*I)] = - LiveOutPair(TheVNI, (MachineDomTreeNode *)0); + LiveOutPair(TheVNI, nullptr); Updater.add(Start, End, TheVNI); } return true; @@ -342,12 +341,12 @@ void LiveRangeCalc::updateSSA() { ++Changes; assert(Alloc && "Need VNInfo allocator to create PHI-defs"); SlotIndex Start, End; - tie(Start, End) = Indexes->getMBBRange(MBB); + std::tie(Start, End) = Indexes->getMBBRange(MBB); LiveRange &LR = I->LR; VNInfo *VNI = LR.getNextValue(Start, *Alloc); I->Value = VNI; // This block is done, we know the final value. - I->DomNode = 0; + I->DomNode = nullptr; // Add liveness since updateLiveIns now skips this node. if (I->Kill.isValid()) diff --git a/contrib/llvm/lib/CodeGen/LiveRangeCalc.h b/contrib/llvm/lib/CodeGen/LiveRangeCalc.h index a3a3fbb..67ab559 100644 --- a/contrib/llvm/lib/CodeGen/LiveRangeCalc.h +++ b/contrib/llvm/lib/CodeGen/LiveRangeCalc.h @@ -92,7 +92,7 @@ class LiveRangeCalc { VNInfo *Value; LiveInBlock(LiveRange &LR, MachineDomTreeNode *node, SlotIndex kill) - : LR(LR), DomNode(node), Kill(kill), Value(0) {} + : LR(LR), DomNode(node), Kill(kill), Value(nullptr) {} }; /// LiveIn - Work list of blocks where the live-in value has yet to be @@ -125,7 +125,8 @@ class LiveRangeCalc { void updateLiveIns(); public: - LiveRangeCalc() : MF(0), MRI(0), Indexes(0), DomTree(0), Alloc(0) {} + LiveRangeCalc() : MF(nullptr), MRI(nullptr), Indexes(nullptr), + DomTree(nullptr), Alloc(nullptr) {} //===--------------------------------------------------------------------===// // High-level interface. @@ -203,7 +204,7 @@ public: /// addLiveInBlock(). void setLiveOutValue(MachineBasicBlock *MBB, VNInfo *VNI) { Seen.set(MBB->getNumber()); - LiveOut[MBB] = LiveOutPair(VNI, (MachineDomTreeNode *)0); + LiveOut[MBB] = LiveOutPair(VNI, nullptr); } /// addLiveInBlock - Add a block with an unknown live-in value. This diff --git a/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp b/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp index cb70c43..431241f 100644 --- a/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp +++ b/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp @@ -11,7 +11,6 @@ // is spilled or split. //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "regalloc" #include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/CalcSpillWeights.h" @@ -24,6 +23,8 @@ using namespace llvm; +#define DEBUG_TYPE "regalloc" + STATISTIC(NumDCEDeleted, "Number of instructions deleted by DCE"); STATISTIC(NumDCEFoldedLoads, "Number of single use loads folded after DCE"); STATISTIC(NumFracRanges, "Number of live ranges fractured by DCE"); @@ -164,12 +165,10 @@ void LiveRangeEdit::eraseVirtReg(unsigned Reg) { bool LiveRangeEdit::foldAsLoad(LiveInterval *LI, SmallVectorImpl<MachineInstr*> &Dead) { - MachineInstr *DefMI = 0, *UseMI = 0; + MachineInstr *DefMI = nullptr, *UseMI = nullptr; // Check that there is a single def and a single use. - for (MachineRegisterInfo::reg_nodbg_iterator I = MRI.reg_nodbg_begin(LI->reg), - E = MRI.reg_nodbg_end(); I != E; ++I) { - MachineOperand &MO = I.getOperand(); + for (MachineOperand &MO : MRI.reg_nodbg_operands(LI->reg)) { MachineInstr *MI = MO.getParent(); if (MO.isDef()) { if (DefMI && DefMI != MI) @@ -199,7 +198,7 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI, // We also need to make sure it is safe to move the load. // Assume there are stores between DefMI and UseMI. bool SawStore = true; - if (!DefMI->isSafeToMove(&TII, 0, SawStore)) + if (!DefMI->isSafeToMove(&TII, nullptr, SawStore)) return false; DEBUG(dbgs() << "Try to fold single def: " << *DefMI @@ -215,7 +214,7 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI, DEBUG(dbgs() << " folded: " << *FoldMI); LIS.ReplaceMachineInstrInMaps(UseMI, FoldMI); UseMI->eraseFromParent(); - DefMI->addRegisterDead(LI->reg, 0); + DefMI->addRegisterDead(LI->reg, nullptr); Dead.push_back(DefMI); ++NumDCEFoldedLoads; return true; @@ -238,7 +237,7 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) { // Use the same criteria as DeadMachineInstructionElim. bool SawStore = false; - if (!MI->isSafeToMove(&TII, 0, SawStore)) { + if (!MI->isSafeToMove(&TII, nullptr, SawStore)) { DEBUG(dbgs() << "Can't delete: " << Idx << '\t' << *MI); return; } diff --git a/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp b/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp index 1d801ac..de2ce22 100644 --- a/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp +++ b/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "regalloc" #include "llvm/CodeGen/LiveRegMatrix.h" #include "RegisterCoalescer.h" #include "llvm/ADT/Statistic.h" @@ -25,6 +24,8 @@ using namespace llvm; +#define DEBUG_TYPE "regalloc" + STATISTIC(NumAssigned , "Number of registers assigned"); STATISTIC(NumUnassigned , "Number of registers unassigned"); @@ -65,7 +66,9 @@ bool LiveRegMatrix::runOnMachineFunction(MachineFunction &MF) { void LiveRegMatrix::releaseMemory() { for (unsigned i = 0, e = Matrix.size(); i != e; ++i) { Matrix[i].clear(); - Queries[i].clear(); + // No need to clear Queries here, since LiveIntervalUnion::Query doesn't + // have anything important to clear and LiveRegMatrix's runOnFunction() + // does a std::unique_ptr::reset anyways. } } diff --git a/contrib/llvm/lib/CodeGen/LiveRegUnits.cpp b/contrib/llvm/lib/CodeGen/LiveRegUnits.cpp deleted file mode 100644 index 6221ca2..0000000 --- a/contrib/llvm/lib/CodeGen/LiveRegUnits.cpp +++ /dev/null @@ -1,111 +0,0 @@ -//===-- LiveInterval.cpp - Live Interval Representation -------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the LiveRegUnits utility for tracking liveness of -// physical register units across machine instructions in forward or backward -// order. -// -//===----------------------------------------------------------------------===// - -#include "llvm/CodeGen/LiveRegUnits.h" -#include "llvm/CodeGen/MachineInstrBundle.h" -using namespace llvm; - -/// Return true if the given MachineOperand clobbers the given register unit. -/// A register unit is only clobbered if all its super-registers are clobbered. -static bool operClobbersUnit(const MachineOperand *MO, unsigned Unit, - const MCRegisterInfo *MCRI) { - for (MCRegUnitRootIterator RI(Unit, MCRI); RI.isValid(); ++RI) { - for (MCSuperRegIterator SI(*RI, MCRI, true); SI.isValid(); ++SI) { - if (!MO->clobbersPhysReg(*SI)) - return false; - } - } - return true; -} - -/// We assume the high bits of a physical super register are not preserved -/// unless the instruction has an implicit-use operand reading the -/// super-register or a register unit for the upper bits is available. -void LiveRegUnits::removeRegsInMask(const MachineOperand &Op, - const MCRegisterInfo &MCRI) { - SparseSet<unsigned>::iterator LUI = LiveUnits.begin(); - while (LUI != LiveUnits.end()) { - if (operClobbersUnit(&Op, *LUI, &MCRI)) - LUI = LiveUnits.erase(LUI); - else - ++LUI; - } -} - -void LiveRegUnits::stepBackward(const MachineInstr &MI, - const MCRegisterInfo &MCRI) { - // Remove defined registers and regmask kills from the set. - for (ConstMIBundleOperands O(&MI); O.isValid(); ++O) { - if (O->isReg()) { - if (!O->isDef()) - continue; - unsigned Reg = O->getReg(); - if (Reg == 0) - continue; - removeReg(Reg, MCRI); - } else if (O->isRegMask()) { - removeRegsInMask(*O, MCRI); - } - } - // Add uses to the set. - for (ConstMIBundleOperands O(&MI); O.isValid(); ++O) { - if (!O->isReg() || !O->readsReg() || O->isUndef()) - continue; - unsigned Reg = O->getReg(); - if (Reg == 0) - continue; - addReg(Reg, MCRI); - } -} - -/// Uses with kill flag get removed from the set, defs added. If possible -/// use StepBackward() instead of this function because some kill flags may -/// be missing. -void LiveRegUnits::stepForward(const MachineInstr &MI, - const MCRegisterInfo &MCRI) { - SmallVector<unsigned, 4> Defs; - // Remove killed registers from the set. - for (ConstMIBundleOperands O(&MI); O.isValid(); ++O) { - if (O->isReg()) { - unsigned Reg = O->getReg(); - if (Reg == 0) - continue; - if (O->isDef()) { - if (!O->isDead()) - Defs.push_back(Reg); - } else { - if (!O->isKill()) - continue; - assert(O->isUse()); - removeReg(Reg, MCRI); - } - } else if (O->isRegMask()) { - removeRegsInMask(*O, MCRI); - } - } - // Add defs to the set. - for (unsigned i = 0, e = Defs.size(); i != e; ++i) { - addReg(Defs[i], MCRI); - } -} - -/// Adds all registers in the live-in list of block @p BB. -void LiveRegUnits::addLiveIns(const MachineBasicBlock *MBB, - const MCRegisterInfo &MCRI) { - for (MachineBasicBlock::livein_iterator L = MBB->livein_begin(), - LE = MBB->livein_end(); L != LE; ++L) { - addReg(*L, MCRI); - } -} diff --git a/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp b/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp index be11a8f..b3161a4 100644 --- a/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp +++ b/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp @@ -13,7 +13,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "livestacks" #include "llvm/CodeGen/LiveStackAnalysis.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" @@ -24,6 +23,8 @@ #include <limits> using namespace llvm; +#define DEBUG_TYPE "livestacks" + char LiveStacks::ID = 0; INITIALIZE_PASS_BEGIN(LiveStacks, "livestacks", "Live Stack Slot Analysis", false, false) diff --git a/contrib/llvm/lib/CodeGen/LiveVariables.cpp b/contrib/llvm/lib/CodeGen/LiveVariables.cpp index ed55d7a..758b216 100644 --- a/contrib/llvm/lib/CodeGen/LiveVariables.cpp +++ b/contrib/llvm/lib/CodeGen/LiveVariables.cpp @@ -61,7 +61,7 @@ LiveVariables::VarInfo::findKill(const MachineBasicBlock *MBB) const { for (unsigned i = 0, e = Kills.size(); i != e; ++i) if (Kills[i]->getParent() == MBB) return Kills[i]; - return NULL; + return nullptr; } void LiveVariables::VarInfo::dump() const { @@ -193,7 +193,7 @@ MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg, SmallSet<unsigned,4> &PartDefRegs) { unsigned LastDefReg = 0; unsigned LastDefDist = 0; - MachineInstr *LastDef = NULL; + MachineInstr *LastDef = nullptr; for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { unsigned SubReg = *SubRegs; MachineInstr *Def = PhysRegDef[SubReg]; @@ -208,7 +208,7 @@ MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg, } if (!LastDef) - return 0; + return nullptr; PartDefRegs.insert(LastDefReg); for (unsigned i = 0, e = LastDef->getNumOperands(); i != e; ++i) { @@ -282,7 +282,7 @@ MachineInstr *LiveVariables::FindLastRefOrPartRef(unsigned Reg) { MachineInstr *LastDef = PhysRegDef[Reg]; MachineInstr *LastUse = PhysRegUse[Reg]; if (!LastDef && !LastUse) - return 0; + return nullptr; MachineInstr *LastRefOrPartRef = LastUse ? LastUse : LastDef; unsigned LastRefOrPartRefDist = DistanceMap[LastRefOrPartRef]; @@ -333,7 +333,7 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) { // AX<dead> = AL<imp-def> // = AL<kill> // AX = - MachineInstr *LastPartDef = 0; + MachineInstr *LastPartDef = nullptr; unsigned LastPartDefDist = 0; SmallSet<unsigned, 8> PartUses; for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { @@ -436,7 +436,7 @@ void LiveVariables::HandleRegMask(const MachineOperand &MO) { for (MCSuperRegIterator SR(Reg, TRI); SR.isValid(); ++SR) if ((PhysRegDef[*SR] || PhysRegUse[*SR]) && MO.clobbersPhysReg(*SR)) Super = *SR; - HandlePhysRegKill(Super, 0); + HandlePhysRegKill(Super, nullptr); } } @@ -492,7 +492,7 @@ void LiveVariables::UpdatePhysRegDefs(MachineInstr *MI, SubRegs.isValid(); ++SubRegs) { unsigned SubReg = *SubRegs; PhysRegDef[SubReg] = MI; - PhysRegUse[SubReg] = NULL; + PhysRegUse[SubReg] = nullptr; } } } @@ -506,8 +506,8 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { PhysRegDef = new MachineInstr*[NumRegs]; PhysRegUse = new MachineInstr*[NumRegs]; PHIVarInfo = new SmallVector<unsigned, 4>[MF->getNumBlockIDs()]; - std::fill(PhysRegDef, PhysRegDef + NumRegs, (MachineInstr*)0); - std::fill(PhysRegUse, PhysRegUse + NumRegs, (MachineInstr*)0); + std::fill(PhysRegDef, PhysRegDef + NumRegs, nullptr); + std::fill(PhysRegUse, PhysRegUse + NumRegs, nullptr); PHIJoins.clear(); // FIXME: LiveIntervals will be updated to remove its dependence on @@ -536,7 +536,7 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { EE = MBB->livein_end(); II != EE; ++II) { assert(TargetRegisterInfo::isPhysicalRegister(*II) && "Cannot have a live-in virtual register!"); - HandlePhysRegDef(*II, 0, Defs); + HandlePhysRegDef(*II, nullptr, Defs); } // Loop over all of the instructions, processing them. @@ -639,10 +639,10 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { // available at the end of the basic block. for (unsigned i = 0; i != NumRegs; ++i) if ((PhysRegDef[i] || PhysRegUse[i]) && !LiveOuts.count(i)) - HandlePhysRegDef(i, 0, Defs); + HandlePhysRegDef(i, nullptr, Defs); - std::fill(PhysRegDef, PhysRegDef + NumRegs, (MachineInstr*)0); - std::fill(PhysRegUse, PhysRegUse + NumRegs, (MachineInstr*)0); + std::fill(PhysRegDef, PhysRegDef + NumRegs, nullptr); + std::fill(PhysRegUse, PhysRegUse + NumRegs, nullptr); } // Convert and transfer the dead / killed information we have gathered into @@ -701,14 +701,15 @@ void LiveVariables::removeVirtualRegistersKilled(MachineInstr *MI) { /// which is used in a PHI node. We map that to the BB the vreg is coming from. /// void LiveVariables::analyzePHINodes(const MachineFunction& Fn) { - for (MachineFunction::const_iterator I = Fn.begin(), E = Fn.end(); - I != E; ++I) - for (MachineBasicBlock::const_iterator BBI = I->begin(), BBE = I->end(); - BBI != BBE && BBI->isPHI(); ++BBI) - for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) - if (BBI->getOperand(i).readsReg()) - PHIVarInfo[BBI->getOperand(i + 1).getMBB()->getNumber()] - .push_back(BBI->getOperand(i).getReg()); + for (const auto &MBB : Fn) + for (const auto &BBI : MBB) { + if (!BBI.isPHI()) + break; + for (unsigned i = 1, e = BBI.getNumOperands(); i != e; i += 2) + if (BBI.getOperand(i).readsReg()) + PHIVarInfo[BBI.getOperand(i + 1).getMBB()->getNumber()] + .push_back(BBI.getOperand(i).getReg()); + } } bool LiveVariables::VarInfo::isLiveIn(const MachineBasicBlock &MBB, diff --git a/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp index 26a1176..36885e8 100644 --- a/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp +++ b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp @@ -14,15 +14,16 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "localstackalloc" #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/StackProtector.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Instructions.h" @@ -38,6 +39,8 @@ using namespace llvm; +#define DEBUG_TYPE "localstackalloc" + STATISTIC(NumAllocations, "Number of frame indices allocated into local block"); STATISTIC(NumBaseRegisters, "Number of virtual frame base registers allocated"); STATISTIC(NumReplacements, "Number of frame indices references replaced"); @@ -60,18 +63,27 @@ namespace { class LocalStackSlotPass: public MachineFunctionPass { SmallVector<int64_t,16> LocalOffsets; + /// StackObjSet - A set of stack object indexes + typedef SmallSetVector<int, 8> StackObjSet; void AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx, int64_t &Offset, bool StackGrowsDown, unsigned &MaxAlign); + void AssignProtectedObjSet(const StackObjSet &UnassignedObjs, + SmallSet<int, 16> &ProtectedObjs, + MachineFrameInfo *MFI, bool StackGrowsDown, + int64_t &Offset, unsigned &MaxAlign); void calculateFrameObjectOffsets(MachineFunction &Fn); bool insertFrameReferenceRegisters(MachineFunction &Fn); public: static char ID; // Pass identification, replacement for typeid - explicit LocalStackSlotPass() : MachineFunctionPass(ID) { } - bool runOnMachineFunction(MachineFunction &MF); + explicit LocalStackSlotPass() : MachineFunctionPass(ID) { + initializeLocalStackSlotPassPass(*PassRegistry::getPassRegistry()); + } + bool runOnMachineFunction(MachineFunction &MF) override; - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); + AU.addRequired<StackProtector>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -81,8 +93,12 @@ namespace { char LocalStackSlotPass::ID = 0; char &llvm::LocalStackSlotAllocationID = LocalStackSlotPass::ID; -INITIALIZE_PASS(LocalStackSlotPass, "localstackalloc", - "Local Stack Slot Allocation", false, false) +INITIALIZE_PASS_BEGIN(LocalStackSlotPass, "localstackalloc", + "Local Stack Slot Allocation", false, false) +INITIALIZE_PASS_DEPENDENCY(StackProtector) +INITIALIZE_PASS_END(LocalStackSlotPass, "localstackalloc", + "Local Stack Slot Allocation", false, false) + bool LocalStackSlotPass::runOnMachineFunction(MachineFunction &MF) { MachineFrameInfo *MFI = MF.getFrameInfo(); @@ -145,6 +161,22 @@ void LocalStackSlotPass::AdjustStackOffset(MachineFrameInfo *MFI, ++NumAllocations; } +/// AssignProtectedObjSet - Helper function to assign large stack objects (i.e., +/// those required to be close to the Stack Protector) to stack offsets. +void LocalStackSlotPass::AssignProtectedObjSet(const StackObjSet &UnassignedObjs, + SmallSet<int, 16> &ProtectedObjs, + MachineFrameInfo *MFI, + bool StackGrowsDown, int64_t &Offset, + unsigned &MaxAlign) { + + for (StackObjSet::const_iterator I = UnassignedObjs.begin(), + E = UnassignedObjs.end(); I != E; ++I) { + int i = *I; + AdjustStackOffset(MFI, i, Offset, StackGrowsDown, MaxAlign); + ProtectedObjs.insert(i); + } +} + /// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the /// abstract stack objects. /// @@ -156,11 +188,16 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) { TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; int64_t Offset = 0; unsigned MaxAlign = 0; + StackProtector *SP = &getAnalysis<StackProtector>(); // Make sure that the stack protector comes before the local variables on the // stack. - SmallSet<int, 16> LargeStackObjs; + SmallSet<int, 16> ProtectedObjs; if (MFI->getStackProtectorIndex() >= 0) { + StackObjSet LargeArrayObjs; + StackObjSet SmallArrayObjs; + StackObjSet AddrOfObjs; + AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), Offset, StackGrowsDown, MaxAlign); @@ -170,12 +207,29 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) { continue; if (MFI->getStackProtectorIndex() == (int)i) continue; - if (!MFI->MayNeedStackProtector(i)) - continue; - AdjustStackOffset(MFI, i, Offset, StackGrowsDown, MaxAlign); - LargeStackObjs.insert(i); + switch (SP->getSSPLayout(MFI->getObjectAllocation(i))) { + case StackProtector::SSPLK_None: + continue; + case StackProtector::SSPLK_SmallArray: + SmallArrayObjs.insert(i); + continue; + case StackProtector::SSPLK_AddrOf: + AddrOfObjs.insert(i); + continue; + case StackProtector::SSPLK_LargeArray: + LargeArrayObjs.insert(i); + continue; + } + llvm_unreachable("Unexpected SSPLayoutKind."); } + + AssignProtectedObjSet(LargeArrayObjs, ProtectedObjs, MFI, StackGrowsDown, + Offset, MaxAlign); + AssignProtectedObjSet(SmallArrayObjs, ProtectedObjs, MFI, StackGrowsDown, + Offset, MaxAlign); + AssignProtectedObjSet(AddrOfObjs, ProtectedObjs, MFI, StackGrowsDown, + Offset, MaxAlign); } // Then assign frame offsets to stack objects that are not used to spill @@ -185,7 +239,7 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) { continue; if (MFI->getStackProtectorIndex() == (int)i) continue; - if (LargeStackObjs.count(i)) + if (ProtectedObjs.count(i)) continue; AdjustStackOffset(MFI, i, Offset, StackGrowsDown, MaxAlign); @@ -233,9 +287,11 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) { MachineInstr *MI = I; - // Debug value instructions can't be out of range, so they don't need - // any updates. - if (MI->isDebugValue()) + // Debug value, stackmap and patchpoint instructions can't be out of + // range, so they don't need any updates. + if (MI->isDebugValue() || + MI->getOpcode() == TargetOpcode::STACKMAP || + MI->getOpcode() == TargetOpcode::PATCHPOINT) continue; // For now, allocate the base register(s) within the basic block @@ -322,18 +378,11 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { // processed all FrameRefs before this one, just check whether or not // the next FrameRef will be able to reuse this new register. If not, // then don't bother creating it. - bool CanReuse = false; - for (int refn = ref + 1; refn < e; ++refn) { - FrameRef &FRN = FrameReferenceInsns[refn]; - MachineBasicBlock::iterator J = FRN.getMachineInstr(); - MachineInstr *MIN = J; - - CanReuse = lookupCandidateBaseReg(BaseOffset, FrameSizeAdjust, - FRN.getLocalOffset(), MIN, TRI); - break; - } - - if (!CanReuse) { + if (ref + 1 >= e || + !lookupCandidateBaseReg( + BaseOffset, FrameSizeAdjust, + FrameReferenceInsns[ref + 1].getLocalOffset(), + FrameReferenceInsns[ref + 1].getMachineInstr(), TRI)) { BaseOffset = PrevBaseOffset; continue; } @@ -363,7 +412,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { // Modify the instruction to use the new base register rather // than the frame index operand. - TRI->resolveFrameIndex(I, BaseReg, Offset); + TRI->resolveFrameIndex(*I, BaseReg, Offset); DEBUG(dbgs() << "Resolved: " << *MI); ++NumReplacements; diff --git a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp index ca71e3b..08fef5f 100644 --- a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -14,7 +14,6 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallString.h" -#include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineDominators.h" @@ -25,10 +24,10 @@ #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/LeakDetector.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/LeakDetector.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" @@ -36,9 +35,11 @@ #include <algorithm> using namespace llvm; +#define DEBUG_TYPE "codegen" + MachineBasicBlock::MachineBasicBlock(MachineFunction &mf, const BasicBlock *bb) : BB(bb), Number(-1), xParent(&mf), Alignment(0), IsLandingPad(false), - AddressTaken(false), CachedMCSymbol(NULL) { + AddressTaken(false), CachedMCSymbol(nullptr) { Insts.Parent = this; } @@ -52,7 +53,8 @@ MCSymbol *MachineBasicBlock::getSymbol() const { if (!CachedMCSymbol) { const MachineFunction *MF = getParent(); MCContext &Ctx = MF->getContext(); - const char *Prefix = Ctx.getAsmInfo()->getPrivateGlobalPrefix(); + const TargetMachine &TM = MF->getTarget(); + const char *Prefix = TM.getDataLayout()->getPrivateGlobalPrefix(); CachedMCSymbol = Ctx.GetOrCreateSymbol(Twine(Prefix) + "BB" + Twine(MF->getFunctionNumber()) + "_" + Twine(getNumber())); @@ -98,7 +100,7 @@ void ilist_traits<MachineBasicBlock>::removeNodeFromList(MachineBasicBlock *N) { /// list, we update its parent pointer and add its operands from reg use/def /// lists if appropriate. void ilist_traits<MachineInstr>::addNodeToList(MachineInstr *N) { - assert(N->getParent() == 0 && "machine instruction already in a basic block"); + assert(!N->getParent() && "machine instruction already in a basic block"); N->setParent(Parent); // Add the instruction's register operands to their corresponding @@ -113,13 +115,13 @@ void ilist_traits<MachineInstr>::addNodeToList(MachineInstr *N) { /// list, we update its parent pointer and remove its operands from reg use/def /// lists if appropriate. void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr *N) { - assert(N->getParent() != 0 && "machine instruction not in a basic block"); + assert(N->getParent() && "machine instruction not in a basic block"); // Remove from the use/def lists. if (MachineFunction *MF = N->getParent()->getParent()) N->RemoveRegOperandsFromUseLists(MF->getRegInfo()); - N->setParent(0); + N->setParent(nullptr); LeakDetector::addGarbageObject(N); } @@ -160,7 +162,7 @@ MachineBasicBlock::iterator MachineBasicBlock::getFirstNonPHI() { MachineBasicBlock::iterator MachineBasicBlock::SkipPHIsAndLabels(MachineBasicBlock::iterator I) { iterator E = end(); - while (I != E && (I->isPHI() || I->isLabel() || I->isDebugValue())) + while (I != E && (I->isPHI() || I->isPosition() || I->isDebugValue())) ++I; // FIXME: This needs to change if we wish to bundle labels / dbg_values // inside the bundle. @@ -229,11 +231,11 @@ MachineBasicBlock::getLastNonDebugInstr() const { const MachineBasicBlock *MachineBasicBlock::getLandingPadSuccessor() const { // A block with a landing pad successor only has one other successor. if (succ_size() > 2) - return 0; + return nullptr; for (const_succ_iterator I = succ_begin(), E = succ_end(); I != E; ++I) if ((*I)->isLandingPad()) return *I; - return 0; + return nullptr; } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -277,7 +279,7 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const { const char *Comma = ""; if (const BasicBlock *LBB = getBasicBlock()) { OS << Comma << "derived from LLVM BB "; - WriteAsOperand(OS, LBB, /*PrintType=*/false); + LBB->printAsOperand(OS, /*PrintType=*/false); Comma = ", "; } if (isLandingPad()) { OS << Comma << "EH LANDING PAD"; Comma = ", "; } @@ -330,6 +332,10 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const { } } +void MachineBasicBlock::printAsOperand(raw_ostream &OS, bool /*PrintType*/) const { + OS << "BB#" << getNumber(); +} + void MachineBasicBlock::removeLiveIn(unsigned Reg) { std::vector<unsigned>::iterator I = std::find(LiveIns.begin(), LiveIns.end(), Reg); @@ -388,7 +394,7 @@ void MachineBasicBlock::updateTerminator() { // A block with no successors has no concerns with fall-through edges. if (this->succ_empty()) return; - MachineBasicBlock *TBB = 0, *FBB = 0; + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector<MachineOperand, 4> Cond; DebugLoc dl; // FIXME: this is nowhere bool B = TII->AnalyzeBranch(*this, TBB, FBB, Cond); @@ -419,7 +425,7 @@ void MachineBasicBlock::updateTerminator() { // Finally update the unconditional successor to be reached via a branch // if it would not be reached by fallthrough. if (!isLayoutSuccessor(TBB)) - TII->InsertBranch(*this, TBB, 0, Cond, dl); + TII->InsertBranch(*this, TBB, nullptr, Cond, dl); } } else { if (FBB) { @@ -430,16 +436,16 @@ void MachineBasicBlock::updateTerminator() { if (TII->ReverseBranchCondition(Cond)) return; TII->RemoveBranch(*this); - TII->InsertBranch(*this, FBB, 0, Cond, dl); + TII->InsertBranch(*this, FBB, nullptr, Cond, dl); } else if (isLayoutSuccessor(FBB)) { TII->RemoveBranch(*this); - TII->InsertBranch(*this, TBB, 0, Cond, dl); + TII->InsertBranch(*this, TBB, nullptr, Cond, dl); } } else { // Walk through the successors and find the successor which is not // a landing pad and is not the conditional branch destination (in TBB) // as the fallthrough successor. - MachineBasicBlock *FallthroughBB = 0; + MachineBasicBlock *FallthroughBB = nullptr; for (succ_iterator SI = succ_begin(), SE = succ_end(); SI != SE; ++SI) { if ((*SI)->isLandingPad() || *SI == TBB) continue; @@ -457,7 +463,7 @@ void MachineBasicBlock::updateTerminator() { // Finally update the unconditional successor to be reached via a branch // if it would not be reached by fallthrough. if (!isLayoutSuccessor(TBB)) - TII->InsertBranch(*this, TBB, 0, Cond, dl); + TII->InsertBranch(*this, TBB, nullptr, Cond, dl); return; } @@ -466,11 +472,11 @@ void MachineBasicBlock::updateTerminator() { if (TII->ReverseBranchCondition(Cond)) { // We can't reverse the condition, add an unconditional branch. Cond.clear(); - TII->InsertBranch(*this, FallthroughBB, 0, Cond, dl); + TII->InsertBranch(*this, FallthroughBB, nullptr, Cond, dl); return; } TII->RemoveBranch(*this); - TII->InsertBranch(*this, FallthroughBB, 0, Cond, dl); + TII->InsertBranch(*this, FallthroughBB, nullptr, Cond, dl); } else if (!isLayoutSuccessor(FallthroughBB)) { TII->RemoveBranch(*this); TII->InsertBranch(*this, TBB, FallthroughBB, Cond, dl); @@ -622,7 +628,7 @@ bool MachineBasicBlock::isSuccessor(const MachineBasicBlock *MBB) const { bool MachineBasicBlock::isLayoutSuccessor(const MachineBasicBlock *MBB) const { MachineFunction::const_iterator I(this); - return llvm::next(I) == MachineFunction::const_iterator(MBB); + return std::next(I) == MachineFunction::const_iterator(MBB); } bool MachineBasicBlock::canFallThrough() { @@ -637,7 +643,7 @@ bool MachineBasicBlock::canFallThrough() { return false; // Analyze the branches, if any, at the end of the block. - MachineBasicBlock *TBB = 0, *FBB = 0; + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector<MachineOperand, 4> Cond; const TargetInstrInfo *TII = getParent()->getTarget().getInstrInfo(); if (TII->AnalyzeBranch(*this, TBB, FBB, Cond)) { @@ -650,7 +656,7 @@ bool MachineBasicBlock::canFallThrough() { } // If there is no branch, control always falls through. - if (TBB == 0) return true; + if (!TBB) return true; // If there is some explicit branch to the fallthrough block, it can obviously // reach, even though the branch should get folded to fall through implicitly. @@ -664,7 +670,7 @@ bool MachineBasicBlock::canFallThrough() { // Otherwise, if it is conditional and has no explicit false block, it falls // through. - return FBB == 0; + return FBB == nullptr; } MachineBasicBlock * @@ -672,18 +678,23 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { // Splitting the critical edge to a landing pad block is non-trivial. Don't do // it in this generic function. if (Succ->isLandingPad()) - return NULL; + return nullptr; MachineFunction *MF = getParent(); DebugLoc dl; // FIXME: this is nowhere + // Performance might be harmed on HW that implements branching using exec mask + // where both sides of the branches are always executed. + if (MF->getTarget().requiresStructuredCFG()) + return nullptr; + // We may need to update this's terminator, but we can't do that if // AnalyzeBranch fails. If this uses a jump table, we won't touch it. const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); - MachineBasicBlock *TBB = 0, *FBB = 0; + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector<MachineOperand, 4> Cond; if (TII->AnalyzeBranch(*this, TBB, FBB, Cond)) - return NULL; + return nullptr; // Avoid bugpoint weirdness: A block may end with a conditional branch but // jumps to the same MBB is either case. We have duplicate CFG edges in that @@ -692,11 +703,11 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { if (TBB && TBB == FBB) { DEBUG(dbgs() << "Won't split critical edge after degenerate BB#" << getNumber() << '\n'); - return NULL; + return nullptr; } MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock(); - MF->insert(llvm::next(MachineFunction::iterator(this)), NMBB); + MF->insert(std::next(MachineFunction::iterator(this)), NMBB); DEBUG(dbgs() << "Splitting critical edge:" " BB#" << getNumber() << " -- BB#" << NMBB->getNumber() @@ -784,7 +795,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { NMBB->addSuccessor(Succ); if (!NMBB->isLayoutSuccessor(Succ)) { Cond.clear(); - MF->getTarget().getInstrInfo()->InsertBranch(*NMBB, Succ, NULL, Cond, dl); + MF->getTarget().getInstrInfo()->InsertBranch(*NMBB, Succ, nullptr, Cond, dl); if (Indexes) { for (instr_iterator I = NMBB->instr_begin(), E = NMBB->instr_end(); @@ -839,7 +850,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { // extend to the end of the new split block. bool isLastMBB = - llvm::next(MachineFunction::iterator(NMBB)) == getParent()->end(); + std::next(MachineFunction::iterator(NMBB)) == getParent()->end(); SlotIndex StartIndex = Indexes->getMBBEndIdx(this); SlotIndex PrevIndex = StartIndex.getPrevSlot(); @@ -1054,13 +1065,13 @@ bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA, bool Changed = false; MachineFunction::iterator FallThru = - llvm::next(MachineFunction::iterator(this)); + std::next(MachineFunction::iterator(this)); - if (DestA == 0 && DestB == 0) { + if (!DestA && !DestB) { // Block falls through to successor. DestA = FallThru; DestB = FallThru; - } else if (DestA != 0 && DestB == 0) { + } else if (DestA && !DestB) { if (isCond) // Block ends in conditional jump that falls through to successor. DestB = FallThru; @@ -1114,6 +1125,13 @@ uint32_t MachineBasicBlock::getSuccWeight(const_succ_iterator Succ) const { return *getWeightIterator(Succ); } +/// Set successor weight of a given iterator. +void MachineBasicBlock::setSuccWeight(succ_iterator I, uint32_t weight) { + if (Weights.empty()) + return; + *getWeightIterator(I) = weight; +} + /// getWeightIterator - Return wight iterator corresonding to the I successor /// iterator MachineBasicBlock::weight_iterator MachineBasicBlock:: @@ -1210,9 +1228,3 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI, // At this point we have no idea of the liveness of the register. return LQR_Unknown; } - -void llvm::WriteAsOperand(raw_ostream &OS, const MachineBasicBlock *MBB, - bool t) { - OS << "BB#" << MBB->getNumber(); -} - diff --git a/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp index e269d24..9151d99 100644 --- a/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp +++ b/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp @@ -1,4 +1,4 @@ -//====----- MachineBlockFrequencyInfo.cpp - Machine Block Frequency Analysis ----====// +//===- MachineBlockFrequencyInfo.cpp - MBB Frequency Analysis -------------===// // // The LLVM Compiler Infrastructure // @@ -12,45 +12,182 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" -#include "llvm/Analysis/BlockFrequencyImpl.h" +#include "llvm/Analysis/BlockFrequencyInfoImpl.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/InitializePasses.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/GraphWriter.h" using namespace llvm; +#define DEBUG_TYPE "block-freq" + +#ifndef NDEBUG +enum GVDAGType { + GVDT_None, + GVDT_Fraction, + GVDT_Integer +}; + +static cl::opt<GVDAGType> +ViewMachineBlockFreqPropagationDAG("view-machine-block-freq-propagation-dags", + cl::Hidden, + cl::desc("Pop up a window to show a dag displaying how machine block " + "frequencies propagate through the CFG."), + cl::values( + clEnumValN(GVDT_None, "none", + "do not display graphs."), + clEnumValN(GVDT_Fraction, "fraction", "display a graph using the " + "fractional block frequency representation."), + clEnumValN(GVDT_Integer, "integer", "display a graph using the raw " + "integer fractional block frequency representation."), + clEnumValEnd)); + +namespace llvm { + +template <> +struct GraphTraits<MachineBlockFrequencyInfo *> { + typedef const MachineBasicBlock NodeType; + typedef MachineBasicBlock::const_succ_iterator ChildIteratorType; + typedef MachineFunction::const_iterator nodes_iterator; + + static inline + const NodeType *getEntryNode(const MachineBlockFrequencyInfo *G) { + return G->getFunction()->begin(); + } + + static ChildIteratorType child_begin(const NodeType *N) { + return N->succ_begin(); + } + + static ChildIteratorType child_end(const NodeType *N) { + return N->succ_end(); + } + + static nodes_iterator nodes_begin(const MachineBlockFrequencyInfo *G) { + return G->getFunction()->begin(); + } + + static nodes_iterator nodes_end(const MachineBlockFrequencyInfo *G) { + return G->getFunction()->end(); + } +}; + +template<> +struct DOTGraphTraits<MachineBlockFrequencyInfo*> : + public DefaultDOTGraphTraits { + explicit DOTGraphTraits(bool isSimple=false) : + DefaultDOTGraphTraits(isSimple) {} + + static std::string getGraphName(const MachineBlockFrequencyInfo *G) { + return G->getFunction()->getName(); + } + + std::string getNodeLabel(const MachineBasicBlock *Node, + const MachineBlockFrequencyInfo *Graph) { + std::string Result; + raw_string_ostream OS(Result); + + OS << Node->getName().str() << ":"; + switch (ViewMachineBlockFreqPropagationDAG) { + case GVDT_Fraction: + Graph->printBlockFreq(OS, Node); + break; + case GVDT_Integer: + OS << Graph->getBlockFreq(Node).getFrequency(); + break; + case GVDT_None: + llvm_unreachable("If we are not supposed to render a graph we should " + "never reach this point."); + } + + return Result; + } +}; + + +} // end namespace llvm +#endif + INITIALIZE_PASS_BEGIN(MachineBlockFrequencyInfo, "machine-block-freq", "Machine Block Frequency Analysis", true, true) INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_END(MachineBlockFrequencyInfo, "machine-block-freq", "Machine Block Frequency Analysis", true, true) char MachineBlockFrequencyInfo::ID = 0; -MachineBlockFrequencyInfo::MachineBlockFrequencyInfo() : MachineFunctionPass(ID) { +MachineBlockFrequencyInfo:: +MachineBlockFrequencyInfo() :MachineFunctionPass(ID) { initializeMachineBlockFrequencyInfoPass(*PassRegistry::getPassRegistry()); - MBFI = new BlockFrequencyImpl<MachineBasicBlock, MachineFunction, - MachineBranchProbabilityInfo>(); } -MachineBlockFrequencyInfo::~MachineBlockFrequencyInfo() { - delete MBFI; -} +MachineBlockFrequencyInfo::~MachineBlockFrequencyInfo() {} void MachineBlockFrequencyInfo::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<MachineBranchProbabilityInfo>(); + AU.addRequired<MachineLoopInfo>(); AU.setPreservesAll(); MachineFunctionPass::getAnalysisUsage(AU); } bool MachineBlockFrequencyInfo::runOnMachineFunction(MachineFunction &F) { - MachineBranchProbabilityInfo &MBPI = getAnalysis<MachineBranchProbabilityInfo>(); - MBFI->doFunction(&F, &MBPI); + MachineBranchProbabilityInfo &MBPI = + getAnalysis<MachineBranchProbabilityInfo>(); + MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>(); + if (!MBFI) + MBFI.reset(new ImplType); + MBFI->doFunction(&F, &MBPI, &MLI); +#ifndef NDEBUG + if (ViewMachineBlockFreqPropagationDAG != GVDT_None) { + view(); + } +#endif return false; } +void MachineBlockFrequencyInfo::releaseMemory() { MBFI.reset(); } + +/// Pop up a ghostview window with the current block frequency propagation +/// rendered using dot. +void MachineBlockFrequencyInfo::view() const { +// This code is only for debugging. +#ifndef NDEBUG + ViewGraph(const_cast<MachineBlockFrequencyInfo *>(this), + "MachineBlockFrequencyDAGs"); +#else + errs() << "MachineBlockFrequencyInfo::view is only available in debug builds " + "on systems with Graphviz or gv!\n"; +#endif // NDEBUG +} + BlockFrequency MachineBlockFrequencyInfo:: getBlockFreq(const MachineBasicBlock *MBB) const { - return MBFI->getBlockFreq(MBB); + return MBFI ? MBFI->getBlockFreq(MBB) : 0; +} + +const MachineFunction *MachineBlockFrequencyInfo::getFunction() const { + return MBFI ? MBFI->getFunction() : nullptr; +} + +raw_ostream & +MachineBlockFrequencyInfo::printBlockFreq(raw_ostream &OS, + const BlockFrequency Freq) const { + return MBFI ? MBFI->printBlockFreq(OS, Freq) : OS; +} + +raw_ostream & +MachineBlockFrequencyInfo::printBlockFreq(raw_ostream &OS, + const MachineBasicBlock *MBB) const { + return MBFI ? MBFI->printBlockFreq(OS, MBB) : OS; +} + +uint64_t MachineBlockFrequencyInfo::getEntryFreq() const { + return MBFI ? MBFI->getEntryFreq() : 0; } diff --git a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp index 4b0f7f3..74af1e2 100644 --- a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -25,7 +25,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "block-placement2" #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" @@ -46,6 +45,8 @@ #include <algorithm> using namespace llvm; +#define DEBUG_TYPE "block-placement2" + STATISTIC(NumCondBranches, "Number of conditional branches"); STATISTIC(NumUncondBranches, "Number of uncondittional branches"); STATISTIC(CondBranchTakenFreq, @@ -58,6 +59,13 @@ static cl::opt<unsigned> AlignAllBlock("align-all-blocks", "blocks in the function."), cl::init(0), cl::Hidden); +// FIXME: Find a good default for this flag and remove the flag. +static cl::opt<unsigned> +ExitBlockBias("block-placement-exit-block-bias", + cl::desc("Block frequency percentage a loop exit block needs " + "over the original exit to be considered the new exit."), + cl::init(0), cl::Hidden); + namespace { class BlockChain; /// \brief Type for our function-wide basic block -> block chain mapping. @@ -145,7 +153,7 @@ public: #ifndef NDEBUG /// \brief Dump the blocks in this chain. - void dump() LLVM_ATTRIBUTE_USED { + LLVM_DUMP_METHOD void dump() { for (iterator I = begin(), E = end(); I != E; ++I) (*I)->dump(); } @@ -199,7 +207,7 @@ class MachineBlockPlacement : public MachineFunctionPass { void markChainSuccessors(BlockChain &Chain, MachineBasicBlock *LoopHeaderBB, SmallVectorImpl<MachineBasicBlock *> &BlockWorkList, - const BlockFilterSet *BlockFilter = 0); + const BlockFilterSet *BlockFilter = nullptr); MachineBasicBlock *selectBestSuccessor(MachineBasicBlock *BB, BlockChain &Chain, const BlockFilterSet *BlockFilter); @@ -213,7 +221,7 @@ class MachineBlockPlacement : public MachineFunctionPass { const BlockFilterSet *BlockFilter); void buildChain(MachineBasicBlock *BB, BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &BlockWorkList, - const BlockFilterSet *BlockFilter = 0); + const BlockFilterSet *BlockFilter = nullptr); MachineBasicBlock *findBestLoopTop(MachineLoop &L, const BlockFilterSet &LoopBlockSet); MachineBasicBlock *findBestLoopExit(MachineFunction &F, @@ -230,9 +238,9 @@ public: initializeMachineBlockPlacementPass(*PassRegistry::getPassRegistry()); } - bool runOnMachineFunction(MachineFunction &F); + bool runOnMachineFunction(MachineFunction &F) override; - void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<MachineBranchProbabilityInfo>(); AU.addRequired<MachineBlockFrequencyInfo>(); AU.addRequired<MachineLoopInfo>(); @@ -327,7 +335,7 @@ MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor( const BlockFilterSet *BlockFilter) { const BranchProbability HotProb(4, 5); // 80% - MachineBasicBlock *BestSucc = 0; + MachineBasicBlock *BestSucc = nullptr; // FIXME: Due to the performance of the probability and weight routines in // the MBPI analysis, we manually compute probabilities using the edge // weights. This is suboptimal as it means that the somewhat subtle @@ -360,7 +368,8 @@ MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor( // any CFG constraints. if (SuccChain.LoopPredecessors != 0) { if (SuccProb < HotProb) { - DEBUG(dbgs() << " " << getBlockName(*SI) << " -> CFG conflict\n"); + DEBUG(dbgs() << " " << getBlockName(*SI) << " -> " << SuccProb + << " (prob) (CFG conflict)\n"); continue; } @@ -383,8 +392,8 @@ MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor( } } if (BadCFGConflict) { - DEBUG(dbgs() << " " << getBlockName(*SI) - << " -> non-cold CFG conflict\n"); + DEBUG(dbgs() << " " << getBlockName(*SI) << " -> " << SuccProb + << " (prob) (non-cold CFG conflict)\n"); continue; } } @@ -401,23 +410,6 @@ MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor( return BestSucc; } -namespace { -/// \brief Predicate struct to detect blocks already placed. -class IsBlockPlaced { - const BlockChain &PlacedChain; - const BlockToChainMapType &BlockToChain; - -public: - IsBlockPlaced(const BlockChain &PlacedChain, - const BlockToChainMapType &BlockToChain) - : PlacedChain(PlacedChain), BlockToChain(BlockToChain) {} - - bool operator()(MachineBasicBlock *BB) const { - return BlockToChain.lookup(BB) == &PlacedChain; - } -}; -} - /// \brief Select the best block from a worklist. /// /// This looks through the provided worklist as a list of candidate basic @@ -436,10 +428,12 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock( // FIXME: If this shows up on profiles, it could be folded (at the cost of // some code complexity) into the loop below. WorkList.erase(std::remove_if(WorkList.begin(), WorkList.end(), - IsBlockPlaced(Chain, BlockToChain)), + [&](MachineBasicBlock *BB) { + return BlockToChain.lookup(BB) == &Chain; + }), WorkList.end()); - MachineBasicBlock *BestBlock = 0; + MachineBasicBlock *BestBlock = nullptr; BlockFrequency BestFreq; for (SmallVectorImpl<MachineBasicBlock *>::iterator WBI = WorkList.begin(), WBE = WorkList.end(); @@ -453,8 +447,8 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock( assert(SuccChain.LoopPredecessors == 0 && "Found CFG-violating block"); BlockFrequency CandidateFreq = MBFI->getBlockFreq(*WBI); - DEBUG(dbgs() << " " << getBlockName(*WBI) << " -> " << CandidateFreq - << " (freq)\n"); + DEBUG(dbgs() << " " << getBlockName(*WBI) << " -> "; + MBFI->printBlockFreq(dbgs(), CandidateFreq) << " (freq)\n"); if (BestBlock && BestFreq >= CandidateFreq) continue; BestBlock = *WBI; @@ -486,7 +480,7 @@ MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock( return *BlockToChain[I]->begin(); } } - return 0; + return nullptr; } void MachineBlockPlacement::buildChain( @@ -501,11 +495,11 @@ void MachineBlockPlacement::buildChain( MachineBasicBlock *LoopHeaderBB = BB; markChainSuccessors(Chain, LoopHeaderBB, BlockWorkList, BlockFilter); - BB = *llvm::prior(Chain.end()); + BB = *std::prev(Chain.end()); for (;;) { assert(BB); assert(BlockToChain[BB] == &Chain); - assert(*llvm::prior(Chain.end()) == BB); + assert(*std::prev(Chain.end()) == BB); // Look for the best viable successor if there is one to place immediately // after this block. @@ -536,7 +530,7 @@ void MachineBlockPlacement::buildChain( << " to " << getBlockNum(BestSucc) << "\n"); markChainSuccessors(SuccChain, LoopHeaderBB, BlockWorkList, BlockFilter); Chain.merge(BestSucc, &SuccChain); - BB = *llvm::prior(Chain.end()); + BB = *std::prev(Chain.end()); } DEBUG(dbgs() << "Finished forming chain for header block " @@ -567,7 +561,7 @@ MachineBlockPlacement::findBestLoopTop(MachineLoop &L, << getBlockName(L.getHeader()) << "\n"); BlockFrequency BestPredFreq; - MachineBasicBlock *BestPred = 0; + MachineBasicBlock *BestPred = nullptr; for (MachineBasicBlock::pred_iterator PI = L.getHeader()->pred_begin(), PE = L.getHeader()->pred_end(); PI != PE; ++PI) { @@ -575,8 +569,8 @@ MachineBlockPlacement::findBestLoopTop(MachineLoop &L, if (!LoopBlockSet.count(Pred)) continue; DEBUG(dbgs() << " header pred: " << getBlockName(Pred) << ", " - << Pred->succ_size() << " successors, " - << MBFI->getBlockFreq(Pred) << " freq\n"); + << Pred->succ_size() << " successors, "; + MBFI->printBlockFreq(dbgs(), Pred) << " freq\n"); if (Pred->succ_size() > 1) continue; @@ -623,11 +617,11 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F, // header and only rotate if safe. BlockChain &HeaderChain = *BlockToChain[L.getHeader()]; if (!LoopBlockSet.count(*HeaderChain.begin())) - return 0; + return nullptr; BlockFrequency BestExitEdgeFreq; unsigned BestExitLoopDepth = 0; - MachineBasicBlock *ExitingBB = 0; + MachineBasicBlock *ExitingBB = nullptr; // If there are exits to outer loops, loop rotation can severely limit // fallthrough opportunites unless it selects such an exit. Keep a set of // blocks where rotating to exit with that block will reach an outer loop. @@ -641,7 +635,7 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F, BlockChain &Chain = *BlockToChain[*I]; // Ensure that this block is at the end of a chain; otherwise it could be // mid-way through an inner loop or a successor of an analyzable branch. - if (*I != *llvm::prior(Chain.end())) + if (*I != *std::prev(Chain.end())) continue; // Now walk the successors. We need to establish whether this has a viable @@ -690,14 +684,17 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F, BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(*I) * SuccProb; DEBUG(dbgs() << " exiting: " << getBlockName(*I) << " -> " << getBlockName(*SI) << " [L:" << SuccLoopDepth - << "] (" << ExitEdgeFreq << ")\n"); - // Note that we slightly bias this toward an existing layout successor to - // retain incoming order in the absence of better information. - // FIXME: Should we bias this more strongly? It's pretty weak. + << "] ("; + MBFI->printBlockFreq(dbgs(), ExitEdgeFreq) << ")\n"); + // Note that we bias this toward an existing layout successor to retain + // incoming order in the absence of better information. The exit must have + // a frequency higher than the current exit before we consider breaking + // the layout. + BranchProbability Bias(100 - ExitBlockBias, 100); if (!ExitingBB || BestExitLoopDepth < SuccLoopDepth || ExitEdgeFreq > BestExitEdgeFreq || ((*I)->isLayoutSuccessor(*SI) && - !(ExitEdgeFreq < BestExitEdgeFreq))) { + !(ExitEdgeFreq < BestExitEdgeFreq * Bias))) { BestExitEdgeFreq = ExitEdgeFreq; ExitingBB = *I; } @@ -713,14 +710,14 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F, // Without a candidate exiting block or with only a single block in the // loop, just use the loop header to layout the loop. if (!ExitingBB || L.getNumBlocks() == 1) - return 0; + return nullptr; // Also, if we have exit blocks which lead to outer loops but didn't select // one of them as the exiting block we are rotating toward, disable loop // rotation altogether. if (!BlocksExitingToOuterLoop.empty() && !BlocksExitingToOuterLoop.count(ExitingBB)) - return 0; + return nullptr; DEBUG(dbgs() << " Best exiting block: " << getBlockName(ExitingBB) << "\n"); return ExitingBB; @@ -745,7 +742,7 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain, PI != PE; ++PI) { BlockChain *PredChain = BlockToChain[*PI]; if (!LoopBlockSet.count(*PI) && - (!PredChain || *PI == *llvm::prior(PredChain->end()))) { + (!PredChain || *PI == *std::prev(PredChain->end()))) { ViableTopFallthrough = true; break; } @@ -755,7 +752,7 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain, // bottom is a viable exiting block. If so, bail out as rotating will // introduce an unnecessary branch. if (ViableTopFallthrough) { - MachineBasicBlock *Bottom = *llvm::prior(LoopChain.end()); + MachineBasicBlock *Bottom = *std::prev(LoopChain.end()); for (MachineBasicBlock::succ_iterator SI = Bottom->succ_begin(), SE = Bottom->succ_end(); SI != SE; ++SI) { @@ -771,7 +768,7 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain, if (ExitIt == LoopChain.end()) return; - std::rotate(LoopChain.begin(), llvm::next(ExitIt), LoopChain.end()); + std::rotate(LoopChain.begin(), std::next(ExitIt), LoopChain.end()); } /// \brief Forms basic block chains from the natural loop structures. @@ -799,7 +796,7 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F, // If we selected just the header for the loop top, look for a potentially // profitable exit block in the event that rotating the loop can eliminate // branches by placing an exit edge at the bottom. - MachineBasicBlock *ExitingBB = 0; + MachineBasicBlock *ExitingBB = nullptr; if (LoopTop == L.getHeader()) ExitingBB = findBestLoopExit(F, L, LoopBlockSet); @@ -887,11 +884,11 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { // the exact fallthrough behavior for. for (;;) { Cond.clear(); - MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch. + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch. if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond) || !FI->canFallThrough()) break; - MachineFunction::iterator NextFI(llvm::next(FI)); + MachineFunction::iterator NextFI(std::next(FI)); MachineBasicBlock *NextBB = NextFI; // Ensure that the layout successor is a viable block, as we know that // fallthrough is a possibility. @@ -899,7 +896,7 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { DEBUG(dbgs() << "Pre-merging due to unanalyzable fallthrough: " << getBlockName(BB) << " -> " << getBlockName(NextBB) << "\n"); - Chain->merge(NextBB, 0); + Chain->merge(NextBB, nullptr); FI = NextFI; BB = NextBB; } @@ -939,7 +936,9 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { BlockChain &FunctionChain = *BlockToChain[&F.front()]; buildChain(&F.front(), FunctionChain, BlockWorkList); +#ifndef NDEBUG typedef SmallPtrSet<MachineBasicBlock *, 16> FunctionBlockSetType; +#endif DEBUG({ // Crash at the end so we get all of the debugging output first. bool BadFunc = false; @@ -983,13 +982,13 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { // Update the terminator of the previous block. if (BI == FunctionChain.begin()) continue; - MachineBasicBlock *PrevBB = llvm::prior(MachineFunction::iterator(*BI)); + MachineBasicBlock *PrevBB = std::prev(MachineFunction::iterator(*BI)); // FIXME: It would be awesome of updateTerminator would just return rather // than assert when the branch cannot be analyzed in order to remove this // boiler plate. Cond.clear(); - MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch. + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch. if (!TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) { // The "PrevBB" is not yet updated to reflect current code layout, so, // o. it may fall-through to a block without explict "goto" instruction @@ -1006,10 +1005,10 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { PrevBB->updateTerminator(); needUpdateBr = false; Cond.clear(); - TBB = FBB = 0; + TBB = FBB = nullptr; if (TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) { // FIXME: This should never take place. - TBB = FBB = 0; + TBB = FBB = nullptr; } } @@ -1034,7 +1033,7 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { // Fixup the last block. Cond.clear(); - MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch. + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch. if (!TII->AnalyzeBranch(F.back(), TBB, FBB, Cond)) F.back().updateTerminator(); @@ -1055,7 +1054,7 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { const BranchProbability ColdProb(1, 5); // 20% BlockFrequency EntryFreq = MBFI->getBlockFreq(F.begin()); BlockFrequency WeightedEntryFreq = EntryFreq * ColdProb; - for (BlockChain::iterator BI = llvm::next(FunctionChain.begin()), + for (BlockChain::iterator BI = std::next(FunctionChain.begin()), BE = FunctionChain.end(); BI != BE; ++BI) { // Don't align non-looping basic blocks. These are unlikely to execute @@ -1081,7 +1080,7 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { // Check for the existence of a non-layout predecessor which would benefit // from aligning this block. - MachineBasicBlock *LayoutPred = *llvm::prior(BI); + MachineBasicBlock *LayoutPred = *std::prev(BI); // Force alignment if all the predecessors are jumps. We already checked // that the block isn't cold above. @@ -1103,7 +1102,10 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) { // Check for single-block functions and skip them. - if (llvm::next(F.begin()) == F.end()) + if (std::next(F.begin()) == F.end()) + return false; + + if (skipOptnoneFunction(*F.getFunction())) return false; MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); @@ -1149,9 +1151,9 @@ public: initializeMachineBlockPlacementStatsPass(*PassRegistry::getPassRegistry()); } - bool runOnMachineFunction(MachineFunction &F); + bool runOnMachineFunction(MachineFunction &F) override; - void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<MachineBranchProbabilityInfo>(); AU.addRequired<MachineBlockFrequencyInfo>(); AU.setPreservesAll(); @@ -1171,7 +1173,7 @@ INITIALIZE_PASS_END(MachineBlockPlacementStats, "block-placement-stats", bool MachineBlockPlacementStats::runOnMachineFunction(MachineFunction &F) { // Check for single-block functions and skip them. - if (llvm::next(F.begin()) == F.end()) + if (std::next(F.begin()) == F.end()) return false; MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); diff --git a/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp index ae70912..6fbc2be 100644 --- a/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp +++ b/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp @@ -77,8 +77,9 @@ getEdgeWeight(const MachineBasicBlock *Src, return getEdgeWeight(Src, std::find(Src->succ_begin(), Src->succ_end(), Dst)); } -bool MachineBranchProbabilityInfo::isEdgeHot(MachineBasicBlock *Src, - MachineBasicBlock *Dst) const { +bool +MachineBranchProbabilityInfo::isEdgeHot(const MachineBasicBlock *Src, + const MachineBasicBlock *Dst) const { // Hot probability is at least 4/5 = 80% // FIXME: Compare against a static "hot" BranchProbability. return getEdgeProbability(Src, Dst) > BranchProbability(4, 5); @@ -87,7 +88,7 @@ bool MachineBranchProbabilityInfo::isEdgeHot(MachineBasicBlock *Src, MachineBasicBlock * MachineBranchProbabilityInfo::getHotSucc(MachineBasicBlock *MBB) const { uint32_t MaxWeight = 0; - MachineBasicBlock *MaxSucc = 0; + MachineBasicBlock *MaxSucc = nullptr; for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) { uint32_t Weight = getEdgeWeight(MBB, I); @@ -100,12 +101,11 @@ MachineBranchProbabilityInfo::getHotSucc(MachineBasicBlock *MBB) const { if (getEdgeProbability(MBB, MaxSucc) >= BranchProbability(4, 5)) return MaxSucc; - return 0; + return nullptr; } -BranchProbability -MachineBranchProbabilityInfo::getEdgeProbability(MachineBasicBlock *Src, - MachineBasicBlock *Dst) const { +BranchProbability MachineBranchProbabilityInfo::getEdgeProbability( + const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const { uint32_t Scale = 1; uint32_t D = getSumForBlock(Src, Scale); uint32_t N = getEdgeWeight(Src, Dst) / Scale; @@ -113,13 +113,13 @@ MachineBranchProbabilityInfo::getEdgeProbability(MachineBasicBlock *Src, return BranchProbability(N, D); } -raw_ostream &MachineBranchProbabilityInfo:: -printEdgeProbability(raw_ostream &OS, MachineBasicBlock *Src, - MachineBasicBlock *Dst) const { +raw_ostream &MachineBranchProbabilityInfo::printEdgeProbability( + raw_ostream &OS, const MachineBasicBlock *Src, + const MachineBasicBlock *Dst) const { const BranchProbability Prob = getEdgeProbability(Src, Dst); OS << "edge MBB#" << Src->getNumber() << " -> MBB#" << Dst->getNumber() - << " probability is " << Prob + << " probability is " << Prob << (isEdgeHot(Src, Dst) ? " [HOT edge]\n" : "\n"); return OS; diff --git a/contrib/llvm/lib/CodeGen/MachineCSE.cpp b/contrib/llvm/lib/CodeGen/MachineCSE.cpp index d228286..7da439c 100644 --- a/contrib/llvm/lib/CodeGen/MachineCSE.cpp +++ b/contrib/llvm/lib/CodeGen/MachineCSE.cpp @@ -13,7 +13,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "machine-cse" #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/ScopedHashTable.h" @@ -28,6 +27,8 @@ #include "llvm/Target/TargetInstrInfo.h" using namespace llvm; +#define DEBUG_TYPE "machine-cse" + STATISTIC(NumCoalesces, "Number of copies coalesced"); STATISTIC(NumCSEs, "Number of common subexpression eliminated"); STATISTIC(NumPhysCSEs, @@ -49,9 +50,9 @@ namespace { initializeMachineCSEPass(*PassRegistry::getPassRegistry()); } - virtual bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); AU.addRequired<AliasAnalysis>(); @@ -60,7 +61,7 @@ namespace { AU.addPreserved<MachineDominatorTree>(); } - virtual void releaseMemory() { + void releaseMemory() override { ScopeMap.clear(); Exps.clear(); } @@ -131,9 +132,24 @@ bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI, unsigned SrcReg = DefMI->getOperand(1).getReg(); if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) continue; - if (DefMI->getOperand(0).getSubReg() || DefMI->getOperand(1).getSubReg()) + if (DefMI->getOperand(0).getSubReg()) + continue; + // FIXME: We should trivially coalesce subregister copies to expose CSE + // opportunities on instructions with truncated operands (see + // cse-add-with-overflow.ll). This can be done here as follows: + // if (SrcSubReg) + // RC = TRI->getMatchingSuperRegClass(MRI->getRegClass(SrcReg), RC, + // SrcSubReg); + // MO.substVirtReg(SrcReg, SrcSubReg, *TRI); + // + // The 2-addr pass has been updated to handle coalesced subregs. However, + // some machine-specific code still can't handle it. + // To handle it properly we also need a way find a constrained subregister + // class given a super-reg class and subreg index. + if (DefMI->getOperand(1).getSubReg()) continue; - if (!MRI->constrainRegClass(SrcReg, MRI->getRegClass(Reg))) + const TargetRegisterClass *RC = MRI->getRegClass(Reg); + if (!MRI->constrainRegClass(SrcReg, RC)) continue; DEBUG(dbgs() << "Coalescing: " << *DefMI); DEBUG(dbgs() << "*** to: " << *MI); @@ -214,7 +230,7 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI, // Next, collect all defs into PhysDefs. If any is already in PhysRefs // (which currently contains only uses), set the PhysUseDef flag. PhysUseDef = false; - MachineBasicBlock::const_iterator I = MI; I = llvm::next(I); + MachineBasicBlock::const_iterator I = MI; I = std::next(I); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || !MO.isDef()) @@ -265,7 +281,7 @@ bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, } CrossMBB = true; } - MachineBasicBlock::const_iterator I = CSMI; I = llvm::next(I); + MachineBasicBlock::const_iterator I = CSMI; I = std::next(I); MachineBasicBlock::const_iterator E = MI; MachineBasicBlock::const_iterator EE = CSMBB->end(); unsigned LookAheadLeft = LookAheadLimit; @@ -310,8 +326,8 @@ bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, } bool MachineCSE::isCSECandidate(MachineInstr *MI) { - if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() || - MI->isKill() || MI->isInlineAsm() || MI->isDebugValue()) + if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() || + MI->isInlineAsm() || MI->isDebugValue()) return false; // Ignore copies. @@ -349,15 +365,11 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg, TargetRegisterInfo::isVirtualRegister(Reg)) { MayIncreasePressure = false; SmallPtrSet<MachineInstr*, 8> CSUses; - for (MachineRegisterInfo::use_nodbg_iterator I =MRI->use_nodbg_begin(CSReg), - E = MRI->use_nodbg_end(); I != E; ++I) { - MachineInstr *Use = &*I; - CSUses.insert(Use); + for (MachineInstr &MI : MRI->use_nodbg_instructions(CSReg)) { + CSUses.insert(&MI); } - for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg), - E = MRI->use_nodbg_end(); I != E; ++I) { - MachineInstr *Use = &*I; - if (!CSUses.count(Use)) { + for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) { + if (!CSUses.count(&MI)) { MayIncreasePressure = true; break; } @@ -388,11 +400,9 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg, } if (!HasVRegUse) { bool HasNonCopyUse = false; - for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg), - E = MRI->use_nodbg_end(); I != E; ++I) { - MachineInstr *Use = &*I; + for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) { // Ignore copies. - if (!Use->isCopyLike()) { + if (!MI.isCopyLike()) { HasNonCopyUse = true; break; } @@ -405,11 +415,9 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg, // it unless the defined value is already used in the BB of the new use. bool HasPHI = false; SmallPtrSet<MachineBasicBlock*, 4> CSBBs; - for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(CSReg), - E = MRI->use_nodbg_end(); I != E; ++I) { - MachineInstr *Use = &*I; - HasPHI |= Use->isPHI(); - CSBBs.insert(Use->getParent()); + for (MachineInstr &MI : MRI->use_nodbg_instructions(CSReg)) { + HasPHI |= MI.isPHI(); + CSBBs.insert(MI.getParent()); } if (!HasPHI) @@ -513,7 +521,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { bool DoCSE = true; unsigned NumDefs = MI->getDesc().getNumDefs() + MI->getDesc().getNumImplicitDefs(); - + for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || !MO.isDef()) @@ -652,6 +660,9 @@ bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) { } bool MachineCSE::runOnMachineFunction(MachineFunction &MF) { + if (skipOptnoneFunction(*MF.getFunction())) + return false; + TII = MF.getTarget().getInstrInfo(); TRI = MF.getTarget().getRegisterInfo(); MRI = &MF.getRegInfo(); diff --git a/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp index 4f48e2c..3119a35 100644 --- a/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp +++ b/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "codegen-cp" #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SetVector.h" @@ -28,6 +27,8 @@ #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; +#define DEBUG_TYPE "codegen-cp" + STATISTIC(NumDeletes, "Number of dead copies deleted"); namespace { @@ -42,7 +43,7 @@ namespace { initializeMachineCopyPropagationPass(*PassRegistry::getPassRegistry()); } - virtual bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; private: typedef SmallVector<unsigned, 4> DestList; @@ -127,13 +128,10 @@ static bool isNopCopy(MachineInstr *CopyMI, unsigned Def, unsigned Src, } // Remove MI from the function because it has been determined it is dead. -// Turn it into a noop KILL instruction if it has super-register liveness -// adjustments. +// Turn it into a noop KILL instruction as opposed to removing it to +// maintain imp-use/imp-def chains. void MachineCopyPropagation::removeCopy(MachineInstr *MI) { - if (MI->getNumOperands() == 2) - MI->eraseFromParent(); - else - MI->setDesc(TII->get(TargetOpcode::KILL)); + MI->setDesc(TII->get(TargetOpcode::KILL)); } bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { @@ -142,6 +140,8 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { DenseMap<unsigned, MachineInstr*> CopyMap; // Def -> copies map SourceMap SrcMap; // Src -> Def map + DEBUG(dbgs() << "MCP: CopyPropagateBlock " << MBB.getName() << "\n"); + bool Changed = false; for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ) { MachineInstr *MI = &*I; @@ -176,6 +176,8 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { // CALL // %RAX<def> = COPY %RSP + DEBUG(dbgs() << "MCP: copy is a NOP, removing: "; MI->dump()); + // Clear any kills of Def between CopyMI and MI. This extends the // live range. for (MachineBasicBlock::iterator I = CopyMI, E = MI; I != E; ++I) @@ -191,10 +193,14 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { // If Src is defined by a previous copy, it cannot be eliminated. for (MCRegAliasIterator AI(Src, TRI, true); AI.isValid(); ++AI) { CI = CopyMap.find(*AI); - if (CI != CopyMap.end()) + if (CI != CopyMap.end()) { + DEBUG(dbgs() << "MCP: Copy is no longer dead: "; CI->second->dump()); MaybeDeadCopies.remove(CI->second); + } } + DEBUG(dbgs() << "MCP: Copy is a deletion candidate: "; MI->dump()); + // Copy is now a candidate for deletion. MaybeDeadCopies.insert(MI); @@ -255,8 +261,10 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { // for elimination. for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { DenseMap<unsigned, MachineInstr*>::iterator CI = CopyMap.find(*AI); - if (CI != CopyMap.end()) + if (CI != CopyMap.end()) { + DEBUG(dbgs() << "MCP: Copy is used - not dead: "; CI->second->dump()); MaybeDeadCopies.remove(CI->second); + } } } @@ -273,6 +281,8 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { unsigned Reg = (*DI)->getOperand(0).getReg(); if (MRI->isReserved(Reg) || !MaskMO.clobbersPhysReg(Reg)) continue; + DEBUG(dbgs() << "MCP: Removing copy due to regmask clobbering: "; + (*DI)->dump()); removeCopy(*DI); Changed = true; ++NumDeletes; @@ -320,6 +330,9 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { } bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) { + if (skipOptnoneFunction(*MF.getFunction())) + return false; + bool Changed = false; TRI = MF.getTarget().getRegisterInfo(); diff --git a/contrib/llvm/lib/CodeGen/MachineDominanceFrontier.cpp b/contrib/llvm/lib/CodeGen/MachineDominanceFrontier.cpp new file mode 100644 index 0000000..0bee846 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineDominanceFrontier.cpp @@ -0,0 +1,54 @@ +//===- MachineDominanceFrontier.cpp ---------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineDominanceFrontier.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/Analysis/DominanceFrontierImpl.h" +#include "llvm/CodeGen/Passes.h" + + +using namespace llvm; + +namespace llvm { +template class DominanceFrontierBase<MachineBasicBlock>; +template class ForwardDominanceFrontierBase<MachineBasicBlock>; +} + + +char MachineDominanceFrontier::ID = 0; + +INITIALIZE_PASS_BEGIN(MachineDominanceFrontier, "machine-domfrontier", + "Machine Dominance Frontier Construction", true, true) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_END(MachineDominanceFrontier, "machine-domfrontier", + "Machine Dominance Frontier Construction", true, true) + +MachineDominanceFrontier::MachineDominanceFrontier() + : MachineFunctionPass(ID), + Base() { + initializeMachineDominanceFrontierPass(*PassRegistry::getPassRegistry()); +} + +char &llvm::MachineDominanceFrontierID = MachineDominanceFrontier::ID; + +bool MachineDominanceFrontier::runOnMachineFunction(MachineFunction &) { + releaseMemory(); + Base.analyze(getAnalysis<MachineDominatorTree>().getBase()); + return false; +} + +void MachineDominanceFrontier::releaseMemory() { + Base.releaseMemory(); +} + +void MachineDominanceFrontier::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<MachineDominatorTree>(); + MachineFunctionPass::getAnalysisUsage(AU); +} diff --git a/contrib/llvm/lib/CodeGen/MachineFunction.cpp b/contrib/llvm/lib/CodeGen/MachineFunction.cpp index f3be318..7e9b755 100644 --- a/contrib/llvm/lib/CodeGen/MachineFunction.cpp +++ b/contrib/llvm/lib/CodeGen/MachineFunction.cpp @@ -17,7 +17,6 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/Analysis/ConstantFolding.h" -#include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -26,8 +25,8 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/DebugInfo.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/Function.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" @@ -39,6 +38,8 @@ #include "llvm/Target/TargetMachine.h" using namespace llvm; +#define DEBUG_TYPE "codegen" + //===----------------------------------------------------------------------===// // MachineFunction implementation //===----------------------------------------------------------------------===// @@ -57,9 +58,9 @@ MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM, if (TM.getRegisterInfo()) RegInfo = new (Allocator) MachineRegisterInfo(TM); else - RegInfo = 0; + RegInfo = nullptr; - MFInfo = 0; + MFInfo = nullptr; FrameInfo = new (Allocator) MachineFrameInfo(TM,!F->hasFnAttribute("no-realign-stack")); @@ -78,7 +79,7 @@ MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM, TM.getTargetLowering()->getPrefFunctionAlignment()); FunctionNumber = FunctionNum; - JumpTableInfo = 0; + JumpTableInfo = nullptr; } MachineFunction::~MachineFunction() { @@ -124,6 +125,11 @@ getOrCreateJumpTableInfo(unsigned EntryKind) { return JumpTableInfo; } +/// Should we be emitting segmented stack stuff for the function +bool MachineFunction::shouldSplitStack() { + return getFunction()->hasFnAttribute("split-stack"); +} + /// RenumberBlocks - This discards all of the MachineBasicBlock numbers and /// recomputes them. This guarantees that the MBB numbers are sequential, /// dense, and match the ordering of the blocks within the function. If a @@ -132,7 +138,7 @@ getOrCreateJumpTableInfo(unsigned EntryKind) { void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) { if (empty()) { MBBNumbering.clear(); return; } MachineFunction::iterator MBBI, E = end(); - if (MBB == 0) + if (MBB == nullptr) MBBI = begin(); else MBBI = MBB; @@ -140,7 +146,7 @@ void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) { // Figure out the block number this should have. unsigned BlockNo = 0; if (MBBI != begin()) - BlockNo = prior(MBBI)->getNumber()+1; + BlockNo = std::prev(MBBI)->getNumber() + 1; for (; MBBI != E; ++MBBI, ++BlockNo) { if (MBBI->getNumber() != (int)BlockNo) { @@ -148,7 +154,7 @@ void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) { if (MBBI->getNumber() != -1) { assert(MBBNumbering[MBBI->getNumber()] == &*MBBI && "MBB number mismatch!"); - MBBNumbering[MBBI->getNumber()] = 0; + MBBNumbering[MBBI->getNumber()] = nullptr; } // If BlockNo is already taken, set that block's number to -1. @@ -232,11 +238,17 @@ MachineFunction::getMachineMemOperand(MachinePointerInfo PtrInfo, unsigned f, MachineMemOperand * MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO, int64_t Offset, uint64_t Size) { + if (MMO->getValue()) + return new (Allocator) + MachineMemOperand(MachinePointerInfo(MMO->getValue(), + MMO->getOffset()+Offset), + MMO->getFlags(), Size, + MMO->getBaseAlignment(), nullptr); return new (Allocator) - MachineMemOperand(MachinePointerInfo(MMO->getValue(), + MachineMemOperand(MachinePointerInfo(MMO->getPseudoValue(), MMO->getOffset()+Offset), MMO->getFlags(), Size, - MMO->getBaseAlignment(), 0); + MMO->getBaseAlignment(), nullptr); } MachineInstr::mmo_iterator @@ -347,15 +359,15 @@ void MachineFunction::print(raw_ostream &OS, SlotIndexes *Indexes) const { OS << PrintReg(I->first, TRI); if (I->second) OS << " in " << PrintReg(I->second, TRI); - if (llvm::next(I) != E) + if (std::next(I) != E) OS << ", "; } OS << '\n'; } - for (const_iterator BB = begin(), E = end(); BB != E; ++BB) { + for (const auto &BB : *this) { OS << '\n'; - BB->print(OS, Indexes); + BB.print(OS, Indexes); } OS << "\n# End machine code for function " << getName() << ".\n\n"; @@ -425,7 +437,16 @@ unsigned MachineFunction::addLiveIn(unsigned PReg, MachineRegisterInfo &MRI = getRegInfo(); unsigned VReg = MRI.getLiveInVirtReg(PReg); if (VReg) { - assert(MRI.getRegClass(VReg) == RC && "Register class mismatch!"); + const TargetRegisterClass *VRegRC = MRI.getRegClass(VReg); + (void)VRegRC; + // A physical register can be added several times. + // Between two calls, the register class of the related virtual register + // may have been constrained to match some operation constraints. + // In that case, check that the current register class includes the + // physical register and is a sub class of the specified RC. + assert((VRegRC == RC || (VRegRC->contains(PReg) && + RC->hasSubClassEq(VRegRC))) && + "Register class mismatch!"); return VReg; } VReg = MRI.createVirtualRegister(RC); @@ -436,14 +457,14 @@ unsigned MachineFunction::addLiveIn(unsigned PReg, /// getJTISymbol - Return the MCSymbol for the specified non-empty jump table. /// If isLinkerPrivate is specified, an 'l' label is returned, otherwise a /// normal 'L' label is returned. -MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx, +MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx, bool isLinkerPrivate) const { + const DataLayout *DL = getTarget().getDataLayout(); assert(JumpTableInfo && "No jump tables"); assert(JTI < JumpTableInfo->getJumpTables().size() && "Invalid JTI!"); - const MCAsmInfo &MAI = *getTarget().getMCAsmInfo(); - const char *Prefix = isLinkerPrivate ? MAI.getLinkerPrivateGlobalPrefix() : - MAI.getPrivateGlobalPrefix(); + const char *Prefix = isLinkerPrivate ? DL->getLinkerPrivateGlobalPrefix() : + DL->getPrivateGlobalPrefix(); SmallString<60> Name; raw_svector_ostream(Name) << Prefix << "JTI" << getFunctionNumber() << '_' << JTI; @@ -453,8 +474,8 @@ MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx, /// getPICBaseSymbol - Return a function-local symbol to represent the PIC /// base. MCSymbol *MachineFunction::getPICBaseSymbol() const { - const MCAsmInfo &MAI = *Target.getMCAsmInfo(); - return Ctx.GetOrCreateSymbol(Twine(MAI.getPrivateGlobalPrefix())+ + const DataLayout *DL = getTarget().getDataLayout(); + return Ctx.GetOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix())+ Twine(getFunctionNumber())+"$pb"); } @@ -490,14 +511,13 @@ static inline unsigned clampStackAlignment(bool ShouldClamp, unsigned Align, /// a nonnegative identifier to represent it. /// int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment, - bool isSS, bool MayNeedSP, const AllocaInst *Alloca) { + bool isSS, const AllocaInst *Alloca) { assert(Size != 0 && "Cannot allocate zero size stack objects!"); Alignment = clampStackAlignment(!getFrameLowering()->isStackRealignable() || !RealignOption, Alignment, getFrameLowering()->getStackAlignment()); - Objects.push_back(StackObject(Size, Alignment, 0, false, isSS, MayNeedSP, - Alloca)); + Objects.push_back(StackObject(Size, Alignment, 0, false, isSS, Alloca)); int Index = (int)Objects.size() - NumFixedObjects - 1; assert(Index >= 0 && "Bad frame index!"); ensureMaxAlignment(Alignment); @@ -510,11 +530,10 @@ int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment, /// int MachineFrameInfo::CreateSpillStackObject(uint64_t Size, unsigned Alignment) { - Alignment = - clampStackAlignment(!getFrameLowering()->isStackRealignable() || - !RealignOption, - Alignment, getFrameLowering()->getStackAlignment()); - CreateStackObject(Size, Alignment, true, false); + Alignment = clampStackAlignment( + !getFrameLowering()->isStackRealignable() || !RealignOption, Alignment, + getFrameLowering()->getStackAlignment()); + CreateStackObject(Size, Alignment, true); int Index = (int)Objects.size() - NumFixedObjects - 1; ensureMaxAlignment(Alignment); return Index; @@ -528,11 +547,10 @@ int MachineFrameInfo::CreateSpillStackObject(uint64_t Size, int MachineFrameInfo::CreateVariableSizedObject(unsigned Alignment, const AllocaInst *Alloca) { HasVarSizedObjects = true; - Alignment = - clampStackAlignment(!getFrameLowering()->isStackRealignable() || - !RealignOption, - Alignment, getFrameLowering()->getStackAlignment()); - Objects.push_back(StackObject(0, Alignment, 0, false, false, true, Alloca)); + Alignment = clampStackAlignment( + !getFrameLowering()->isStackRealignable() || !RealignOption, Alignment, + getFrameLowering()->getStackAlignment()); + Objects.push_back(StackObject(0, Alignment, 0, false, false, Alloca)); ensureMaxAlignment(Alignment); return (int)Objects.size()-NumFixedObjects-1; } @@ -551,17 +569,30 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset, // object is 16-byte aligned. unsigned StackAlign = getFrameLowering()->getStackAlignment(); unsigned Align = MinAlign(SPOffset, StackAlign); - Align = - clampStackAlignment(!getFrameLowering()->isStackRealignable() || - !RealignOption, - Align, getFrameLowering()->getStackAlignment()); + Align = clampStackAlignment(!getFrameLowering()->isStackRealignable() || + !RealignOption, + Align, getFrameLowering()->getStackAlignment()); Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable, /*isSS*/ false, - /*NeedSP*/ false, - /*Alloca*/ 0)); + /*Alloca*/ nullptr)); return -++NumFixedObjects; } +/// CreateFixedSpillStackObject - Create a spill slot at a fixed location +/// on the stack. Returns an index with a negative value. +int MachineFrameInfo::CreateFixedSpillStackObject(uint64_t Size, + int64_t SPOffset) { + unsigned StackAlign = getFrameLowering()->getStackAlignment(); + unsigned Align = MinAlign(SPOffset, StackAlign); + Align = clampStackAlignment(!getFrameLowering()->isStackRealignable() || + !RealignOption, + Align, getFrameLowering()->getStackAlignment()); + Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, + /*Immutable*/ true, + /*isSS*/ true, + /*Alloca*/ nullptr)); + return -++NumFixedObjects; +} BitVector MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const { @@ -577,7 +608,7 @@ MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const { if (!isCalleeSavedInfoValid()) return BV; - for (const uint16_t *CSR = TRI->getCalleeSavedRegs(MF); CSR && *CSR; ++CSR) + for (const MCPhysReg *CSR = TRI->getCalleeSavedRegs(MF); CSR && *CSR; ++CSR) BV.set(*CSR); // The entry MBB always has all CSRs pristine. @@ -805,6 +836,37 @@ unsigned MachineConstantPoolEntry::getRelocationInfo() const { return Val.ConstVal->getRelocationInfo(); } +SectionKind +MachineConstantPoolEntry::getSectionKind(const DataLayout *DL) const { + SectionKind Kind; + switch (getRelocationInfo()) { + default: + llvm_unreachable("Unknown section kind"); + case 2: + Kind = SectionKind::getReadOnlyWithRel(); + break; + case 1: + Kind = SectionKind::getReadOnlyWithRelLocal(); + break; + case 0: + switch (DL->getTypeAllocSize(getType())) { + case 4: + Kind = SectionKind::getMergeableConst4(); + break; + case 8: + Kind = SectionKind::getMergeableConst8(); + break; + case 16: + Kind = SectionKind::getMergeableConst16(); + break; + default: + Kind = SectionKind::getMergeableConst(); + break; + } + } + return Kind; +} + MachineConstantPool::~MachineConstantPool() { for (unsigned i = 0, e = Constants.size(); i != e; ++i) if (Constants[i].isMachineConstantPoolEntry()) @@ -830,11 +892,10 @@ static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B, if (isa<StructType>(A->getType()) || isa<ArrayType>(A->getType()) || isa<StructType>(B->getType()) || isa<ArrayType>(B->getType())) return false; - + // For now, only support constants with the same size. uint64_t StoreSize = TD->getTypeStoreSize(A->getType()); - if (StoreSize != TD->getTypeStoreSize(B->getType()) || - StoreSize > 128) + if (StoreSize != TD->getTypeStoreSize(B->getType()) || StoreSize > 128) return false; Type *IntTy = IntegerType::get(A->getContext(), StoreSize*8); @@ -863,7 +924,7 @@ static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B, /// an existing one. User must specify the log2 of the minimum required /// alignment for the object. /// -unsigned MachineConstantPool::getConstantPoolIndex(const Constant *C, +unsigned MachineConstantPool::getConstantPoolIndex(const Constant *C, unsigned Alignment) { assert(Alignment && "Alignment must be specified!"); if (Alignment > PoolAlignment) PoolAlignment = Alignment; @@ -911,7 +972,7 @@ void MachineConstantPool::print(raw_ostream &OS) const { if (Constants[i].isMachineConstantPoolEntry()) Constants[i].Val.MachineCPVal->print(OS); else - WriteAsOperand(OS, Constants[i].Val.ConstVal, /*PrintType=*/false); + Constants[i].Val.ConstVal->printAsOperand(OS, /*PrintType=*/false); OS << ", align=" << Constants[i].getAlignment(); OS << "\n"; } diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp index 35591e1..46cd60a 100644 --- a/contrib/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp +++ b/contrib/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp @@ -20,7 +20,7 @@ using namespace llvm; char MachineFunctionAnalysis::ID = 0; MachineFunctionAnalysis::MachineFunctionAnalysis(const TargetMachine &tm) : - FunctionPass(ID), TM(tm), MF(0) { + FunctionPass(ID), TM(tm), MF(nullptr) { initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry()); } @@ -53,5 +53,5 @@ bool MachineFunctionAnalysis::runOnFunction(Function &F) { void MachineFunctionAnalysis::releaseMemory() { delete MF; - MF = 0; + MF = nullptr; } diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp index 674cc80..789f204 100644 --- a/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp +++ b/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp @@ -51,6 +51,7 @@ void MachineFunctionPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved("domfrontier"); AU.addPreserved("loops"); AU.addPreserved("lda"); + AU.addPreserved("stack-protector"); FunctionPass::getAnalysisUsage(AU); } diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp index fa9c821..dee3977 100644 --- a/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp +++ b/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp @@ -34,14 +34,14 @@ struct MachineFunctionPrinterPass : public MachineFunctionPass { MachineFunctionPrinterPass(raw_ostream &os, const std::string &banner) : MachineFunctionPass(ID), OS(os), Banner(banner) {} - const char *getPassName() const { return "MachineFunction Printer"; } + const char *getPassName() const override { return "MachineFunction Printer"; } - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); MachineFunctionPass::getAnalysisUsage(AU); } - bool runOnMachineFunction(MachineFunction &MF) { + bool runOnMachineFunction(MachineFunction &MF) override { OS << "# " << Banner << ":\n"; MF.print(OS, getAnalysisIfAvailable<SlotIndexes>()); return false; diff --git a/contrib/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm/lib/CodeGen/MachineInstr.cpp index 295b450..5122165 100644 --- a/contrib/llvm/lib/CodeGen/MachineInstr.cpp +++ b/contrib/llvm/lib/CodeGen/MachineInstr.cpp @@ -15,15 +15,14 @@ #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/Hashing.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" -#include "llvm/DebugInfo.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/LLVMContext.h" @@ -129,7 +128,7 @@ void MachineOperand::ChangeToImmediate(int64_t ImmVal) { void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp, bool isKill, bool isDead, bool isUndef, bool isDebug) { - MachineRegisterInfo *RegInfo = 0; + MachineRegisterInfo *RegInfo = nullptr; if (MachineInstr *MI = getParent()) if (MachineBasicBlock *MBB = MI->getParent()) if (MachineFunction *MF = MBB->getParent()) @@ -153,7 +152,7 @@ void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp, IsEarlyClobber = false; IsDebug = isDebug; // Ensure isOnRegUseList() returns false. - Contents.Reg.Prev = 0; + Contents.Reg.Prev = nullptr; // Preserve the tie when the operand was already a register. if (!WasReg) TiedTo = 0; @@ -199,10 +198,13 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const { case MachineOperand::MO_BlockAddress: return getBlockAddress() == Other.getBlockAddress() && getOffset() == Other.getOffset(); - case MO_RegisterMask: + case MachineOperand::MO_RegisterMask: + case MachineOperand::MO_RegisterLiveOut: return getRegMask() == Other.getRegMask(); case MachineOperand::MO_MCSymbol: return getMCSymbol() == Other.getMCSymbol(); + case MachineOperand::MO_CFIIndex: + return getCFIIndex() == Other.getCFIIndex(); case MachineOperand::MO_Metadata: return getMetadata() == Other.getMetadata(); } @@ -241,11 +243,14 @@ hash_code llvm::hash_value(const MachineOperand &MO) { return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getBlockAddress(), MO.getOffset()); case MachineOperand::MO_RegisterMask: + case MachineOperand::MO_RegisterLiveOut: return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getRegMask()); case MachineOperand::MO_Metadata: return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMetadata()); case MachineOperand::MO_MCSymbol: return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMCSymbol()); + case MachineOperand::MO_CFIIndex: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getCFIIndex()); } llvm_unreachable("Invalid machine operand type"); } @@ -260,7 +265,7 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { if (const MachineBasicBlock *MBB = MI->getParent()) if (const MachineFunction *MF = MBB->getParent()) TM = &MF->getTarget(); - const TargetRegisterInfo *TRI = TM ? TM->getRegisterInfo() : 0; + const TargetRegisterInfo *TRI = TM ? TM->getRegisterInfo() : nullptr; switch (getType()) { case MachineOperand::MO_Register: @@ -312,7 +317,6 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { OS << "tied"; if (TiedTo != 15) OS << unsigned(TiedTo - 1); - NeedComma = true; } OS << '>'; } @@ -350,7 +354,7 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { break; case MachineOperand::MO_GlobalAddress: OS << "<ga:"; - WriteAsOperand(OS, getGlobal(), /*PrintType=*/false); + getGlobal()->printAsOperand(OS, /*PrintType=*/false); if (getOffset()) OS << "+" << getOffset(); OS << '>'; break; @@ -361,21 +365,27 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { break; case MachineOperand::MO_BlockAddress: OS << '<'; - WriteAsOperand(OS, getBlockAddress(), /*PrintType=*/false); + getBlockAddress()->printAsOperand(OS, /*PrintType=*/false); if (getOffset()) OS << "+" << getOffset(); OS << '>'; break; case MachineOperand::MO_RegisterMask: OS << "<regmask>"; break; + case MachineOperand::MO_RegisterLiveOut: + OS << "<regliveout>"; + break; case MachineOperand::MO_Metadata: OS << '<'; - WriteAsOperand(OS, getMetadata(), /*PrintType=*/false); + getMetadata()->printAsOperand(OS, /*PrintType=*/false); OS << '>'; break; case MachineOperand::MO_MCSymbol: OS << "<MCSym=" << *getMCSymbol() << '>'; break; + case MachineOperand::MO_CFIIndex: + OS << "<call frame instruction>"; + break; } if (unsigned TF = getTargetFlags()) @@ -389,8 +399,8 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { /// getAddrSpace - Return the LLVM IR address space number that this pointer /// points into. unsigned MachinePointerInfo::getAddrSpace() const { - if (V == 0) return 0; - return cast<PointerType>(V->getType())->getAddressSpace(); + if (V.isNull() || V.is<const PseudoSourceValue*>()) return 0; + return cast<PointerType>(V.get<const Value*>()->getType())->getAddressSpace(); } /// getConstantPool - Return a MachinePointerInfo record that refers to the @@ -424,7 +434,8 @@ MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, unsigned f, : PtrInfo(ptrinfo), Size(s), Flags((f & ((1 << MOMaxBits) - 1)) | ((Log2_32(a) + 1) << MOMaxBits)), TBAAInfo(TBAAInfo), Ranges(Ranges) { - assert((PtrInfo.V == 0 || isa<PointerType>(PtrInfo.V->getType())) && + assert((PtrInfo.V.isNull() || PtrInfo.V.is<const PseudoSourceValue*>() || + isa<PointerType>(PtrInfo.V.get<const Value*>()->getType())) && "invalid pointer value"); assert(getBaseAlignment() == a && "Alignment is not a power of 2!"); assert((isLoad() || isStore()) && "Not a load/store!"); @@ -435,7 +446,7 @@ MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, unsigned f, void MachineMemOperand::Profile(FoldingSetNodeID &ID) const { ID.AddInteger(getOffset()); ID.AddInteger(Size); - ID.AddPointer(getValue()); + ID.AddPointer(getOpaqueValue()); ID.AddInteger(Flags); } @@ -476,10 +487,16 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) { // Print the address information. OS << "["; - if (!MMO.getValue()) - OS << "<unknown>"; + if (const Value *V = MMO.getValue()) + V->printAsOperand(OS, /*PrintType=*/false); + else if (const PseudoSourceValue *PSV = MMO.getPseudoValue()) + PSV->printCustom(OS); else - WriteAsOperand(OS, MMO.getValue(), /*PrintType=*/false); + OS << "<unknown>"; + + unsigned AS = MMO.getAddrSpace(); + if (AS != 0) + OS << "(addrspace=" << AS << ')'; // If the alignment of the memory reference itself differs from the alignment // of the base pointer, print the base alignment explicitly, next to the base @@ -500,7 +517,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) { if (const MDNode *TBAAInfo = MMO.getTBAAInfo()) { OS << "(tbaa="; if (TBAAInfo->getNumOperands() > 0) - WriteAsOperand(OS, TBAAInfo->getOperand(0), /*PrintType=*/false); + TBAAInfo->getOperand(0)->printAsOperand(OS, /*PrintType=*/false); else OS << "<unknown>"; OS << ")"; @@ -531,9 +548,9 @@ void MachineInstr::addImplicitDefUseOperands(MachineFunction &MF) { /// the MCInstrDesc. MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &tid, const DebugLoc dl, bool NoImp) - : MCID(&tid), Parent(0), Operands(0), NumOperands(0), + : MCID(&tid), Parent(nullptr), Operands(nullptr), NumOperands(0), Flags(0), AsmPrinterFlags(0), - NumMemRefs(0), MemRefs(0), debugLoc(dl) { + NumMemRefs(0), MemRefs(nullptr), debugLoc(dl) { // Reserve space for the expected number of operands. if (unsigned NumOps = MCID->getNumOperands() + MCID->getNumImplicitDefs() + MCID->getNumImplicitUses()) { @@ -548,7 +565,7 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &tid, /// MachineInstr ctor - Copies MachineInstr arg exactly /// MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI) - : MCID(&MI.getDesc()), Parent(0), Operands(0), NumOperands(0), + : MCID(&MI.getDesc()), Parent(nullptr), Operands(nullptr), NumOperands(0), Flags(0), AsmPrinterFlags(0), NumMemRefs(MI.NumMemRefs), MemRefs(MI.MemRefs), debugLoc(MI.getDebugLoc()) { @@ -569,7 +586,7 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI) MachineRegisterInfo *MachineInstr::getRegInfo() { if (MachineBasicBlock *MBB = getParent()) return &MBB->getParent()->getRegInfo(); - return 0; + return nullptr; } /// RemoveRegOperandsFromUseLists - Unlink all of the register operands in @@ -688,7 +705,7 @@ void MachineInstr::addOperand(MachineFunction &MF, const MachineOperand &Op) { // When adding a register operand, tell MRI about it. if (NewMO->isReg()) { // Ensure isOnRegUseList() returns false, regardless of Op's status. - NewMO->Contents.Reg.Prev = 0; + NewMO->Contents.Reg.Prev = nullptr; // Ignore existing ties. This is not a property that can be copied. NewMO->TiedTo = 0; // Add the new operand to MRI, but only for instructions in an MBB. @@ -960,7 +977,7 @@ MachineInstr::getRegClassConstraint(unsigned OpIdx, return TII->getRegClass(getDesc(), OpIdx, TRI, MF); if (!getOperand(OpIdx).isReg()) - return NULL; + return nullptr; // For tied uses on inline asm, get the constraint from the def. unsigned DefIdx; @@ -970,7 +987,7 @@ MachineInstr::getRegClassConstraint(unsigned OpIdx, // Inline asm stores register class constraints in the flag word. int FlagIdx = findInlineAsmFlagIdx(OpIdx); if (FlagIdx < 0) - return NULL; + return nullptr; unsigned Flag = getOperand(FlagIdx).getImm(); unsigned RCID; @@ -981,7 +998,55 @@ MachineInstr::getRegClassConstraint(unsigned OpIdx, if (InlineAsm::getKind(Flag) == InlineAsm::Kind_Mem) return TRI->getPointerRegClass(MF); - return NULL; + return nullptr; +} + +const TargetRegisterClass *MachineInstr::getRegClassConstraintEffectForVReg( + unsigned Reg, const TargetRegisterClass *CurRC, const TargetInstrInfo *TII, + const TargetRegisterInfo *TRI, bool ExploreBundle) const { + // Check every operands inside the bundle if we have + // been asked to. + if (ExploreBundle) + for (ConstMIBundleOperands OpndIt(this); OpndIt.isValid() && CurRC; + ++OpndIt) + CurRC = OpndIt->getParent()->getRegClassConstraintEffectForVRegImpl( + OpndIt.getOperandNo(), Reg, CurRC, TII, TRI); + else + // Otherwise, just check the current operands. + for (ConstMIOperands OpndIt(this); OpndIt.isValid() && CurRC; ++OpndIt) + CurRC = getRegClassConstraintEffectForVRegImpl(OpndIt.getOperandNo(), Reg, + CurRC, TII, TRI); + return CurRC; +} + +const TargetRegisterClass *MachineInstr::getRegClassConstraintEffectForVRegImpl( + unsigned OpIdx, unsigned Reg, const TargetRegisterClass *CurRC, + const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const { + assert(CurRC && "Invalid initial register class"); + // Check if Reg is constrained by some of its use/def from MI. + const MachineOperand &MO = getOperand(OpIdx); + if (!MO.isReg() || MO.getReg() != Reg) + return CurRC; + // If yes, accumulate the constraints through the operand. + return getRegClassConstraintEffect(OpIdx, CurRC, TII, TRI); +} + +const TargetRegisterClass *MachineInstr::getRegClassConstraintEffect( + unsigned OpIdx, const TargetRegisterClass *CurRC, + const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const { + const TargetRegisterClass *OpRC = getRegClassConstraint(OpIdx, TII, TRI); + const MachineOperand &MO = getOperand(OpIdx); + assert(MO.isReg() && + "Cannot get register constraints for non-register operand"); + assert(CurRC && "Invalid initial register class"); + if (unsigned SubIdx = MO.getSubReg()) { + if (OpRC) + CurRC = TRI->getMatchingSuperRegClass(CurRC, OpRC, SubIdx); + else + CurRC = TRI->getSubClassWithSubReg(CurRC, SubIdx); + } else if (OpRC) + CurRC = TRI->getCommonSubClass(CurRC, OpRC); + return CurRC; } /// Return the number of instructions inside the MI bundle, not counting the @@ -1239,8 +1304,8 @@ bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII, return false; } - if (isLabel() || isDebugValue() || - isTerminator() || hasUnmodeledSideEffects()) + if (isPosition() || isDebugValue() || isTerminator() || + hasUnmodeledSideEffects()) return false; // See if this instruction does a load. If so, we have to guarantee that the @@ -1304,11 +1369,13 @@ bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const { if ((*I)->isStore()) return false; if ((*I)->isInvariant()) return true; + + // A load from a constant PseudoSourceValue is invariant. + if (const PseudoSourceValue *PSV = (*I)->getPseudoValue()) + if (PSV->isConstant(MFI)) + continue; + if (const Value *V = (*I)->getValue()) { - // A load from a constant PseudoSourceValue is invariant. - if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) - if (PSV->isConstant(MFI)) - continue; // If we have an AliasAnalysis, ask it whether the memory is constant. if (AA && AA->pointsToConstantMemory( AliasAnalysis::Location(V, (*I)->getSize(), @@ -1372,7 +1439,7 @@ void MachineInstr::copyImplicitOps(MachineFunction &MF, for (unsigned i = MI->getDesc().getNumOperands(), e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); - if (MO.isReg() && MO.isImplicit()) + if ((MO.isReg() && MO.isImplicit()) || MO.isRegMask()) addOperand(MF, MO); } } @@ -1386,32 +1453,14 @@ void MachineInstr::dump() const { static void printDebugLoc(DebugLoc DL, const MachineFunction *MF, raw_ostream &CommentOS) { const LLVMContext &Ctx = MF->getFunction()->getContext(); - if (!DL.isUnknown()) { // Print source line info. - DIScope Scope(DL.getScope(Ctx)); - assert((!Scope || Scope.isScope()) && - "Scope of a DebugLoc should be null or a DIScope."); - // Omit the directory, because it's likely to be long and uninteresting. - if (Scope) - CommentOS << Scope.getFilename(); - else - CommentOS << "<unknown>"; - CommentOS << ':' << DL.getLine(); - if (DL.getCol() != 0) - CommentOS << ':' << DL.getCol(); - DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(DL.getInlinedAt(Ctx)); - if (!InlinedAtDL.isUnknown()) { - CommentOS << " @[ "; - printDebugLoc(InlinedAtDL, MF, CommentOS); - CommentOS << " ]"; - } - } + DL.print(Ctx, CommentOS); } void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM, bool SkipOpers) const { // We can be a bit tidier if we know the TargetMachine and/or MachineFunction. - const MachineFunction *MF = 0; - const MachineRegisterInfo *MRI = 0; + const MachineFunction *MF = nullptr; + const MachineRegisterInfo *MRI = nullptr; if (const MachineBasicBlock *MBB = getParent()) { MF = MBB->getParent(); if (!TM && MF) @@ -1587,7 +1636,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM, for (mmo_iterator i = memoperands_begin(), e = memoperands_end(); i != e; ++i) { OS << **i; - if (llvm::next(i) != e) + if (std::next(i) != e) OS << " "; } } @@ -1612,19 +1661,19 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM, // Print debug location information. if (isDebugValue() && getOperand(e - 1).isMetadata()) { - if (!HaveSemi) OS << ";"; HaveSemi = true; + if (!HaveSemi) OS << ";"; DIVariable DV(getOperand(e - 1).getMetadata()); OS << " line no:" << DV.getLineNumber(); if (MDNode *InlinedAt = DV.getInlinedAt()) { DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(InlinedAt); - if (!InlinedAtDL.isUnknown()) { + if (!InlinedAtDL.isUnknown() && MF) { OS << " inlined @[ "; printDebugLoc(InlinedAtDL, MF, OS); OS << " ]"; } } } else if (!debugLoc.isUnknown() && MF) { - if (!HaveSemi) OS << ";"; HaveSemi = true; + if (!HaveSemi) OS << ";"; OS << " dbg:"; printDebugLoc(debugLoc, MF, OS); } @@ -1694,7 +1743,7 @@ bool MachineInstr::addRegisterKilled(unsigned IncomingReg, void MachineInstr::clearRegisterKills(unsigned Reg, const TargetRegisterInfo *RegInfo) { if (!TargetRegisterInfo::isPhysicalRegister(Reg)) - RegInfo = 0; + RegInfo = nullptr; for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { MachineOperand &MO = getOperand(i); if (!MO.isReg() || !MO.isUse() || !MO.isKill()) @@ -1827,7 +1876,7 @@ MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) { void MachineInstr::emitError(StringRef Msg) const { // Find the source location cookie. unsigned LocCookie = 0; - const MDNode *LocMD = 0; + const MDNode *LocMD = nullptr; for (unsigned i = getNumOperands(); i != 0; --i) { if (getOperand(i-1).isMetadata() && (LocMD = getOperand(i-1).getMetadata()) && diff --git a/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp b/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp index 77bcd1d..962169e 100644 --- a/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp +++ b/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp @@ -26,7 +26,7 @@ namespace { initializeUnpackMachineBundlesPass(*PassRegistry::getPassRegistry()); } - virtual bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; }; } // end anonymous namespace @@ -77,7 +77,7 @@ namespace { initializeFinalizeMachineBundlesPass(*PassRegistry::getPassRegistry()); } - virtual bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; }; } // end anonymous namespace @@ -211,7 +211,7 @@ MachineBasicBlock::instr_iterator llvm::finalizeBundle(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator FirstMI) { MachineBasicBlock::instr_iterator E = MBB.instr_end(); - MachineBasicBlock::instr_iterator LastMI = llvm::next(FirstMI); + MachineBasicBlock::instr_iterator LastMI = std::next(FirstMI); while (LastMI != E && LastMI->isInsideBundle()) ++LastMI; finalizeBundle(MBB, FirstMI, LastMI); @@ -235,7 +235,7 @@ bool llvm::finalizeBundles(MachineFunction &MF) { if (!MII->isInsideBundle()) ++MII; else { - MII = finalizeBundle(MBB, llvm::prior(MII)); + MII = finalizeBundle(MBB, std::prev(MII)); Changed = true; } } diff --git a/contrib/llvm/lib/CodeGen/MachineLICM.cpp b/contrib/llvm/lib/CodeGen/MachineLICM.cpp index 104eacd..68d2efd 100644 --- a/contrib/llvm/lib/CodeGen/MachineLICM.cpp +++ b/contrib/llvm/lib/CodeGen/MachineLICM.cpp @@ -20,7 +20,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "machine-licm" #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallSet.h" @@ -42,6 +41,8 @@ #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; +#define DEBUG_TYPE "machine-licm" + static cl::opt<bool> AvoidSpeculation("avoid-speculation", cl::desc("MachineLICM should avoid speculation"), @@ -125,9 +126,9 @@ namespace { initializeMachineLICMPass(*PassRegistry::getPassRegistry()); } - virtual bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<MachineLoopInfo>(); AU.addRequired<MachineDominatorTree>(); AU.addRequired<AliasAnalysis>(); @@ -136,7 +137,7 @@ namespace { MachineFunctionPass::getAnalysisUsage(AU); } - virtual void releaseMemory() { + void releaseMemory() override { RegSeen.clear(); RegPressure.clear(); RegLimit.clear(); @@ -319,6 +320,9 @@ static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) { } bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { + if (skipOptnoneFunction(*MF.getFunction())) + return false; + Changed = FirstInLoop = false; TM = &MF.getTarget(); TII = TM->getInstrInfo(); @@ -355,7 +359,7 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { SmallVector<MachineLoop *, 8> Worklist(MLI->begin(), MLI->end()); while (!Worklist.empty()) { CurLoop = Worklist.pop_back_val(); - CurPreheader = 0; + CurPreheader = nullptr; ExitBlocks.clear(); // If this is done before regalloc, only visit outer-most preheader-sporting @@ -387,10 +391,10 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { static bool InstructionStoresToFI(const MachineInstr *MI, int FI) { for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), oe = MI->memoperands_end(); o != oe; ++o) { - if (!(*o)->isStore() || !(*o)->getValue()) + if (!(*o)->isStore() || !(*o)->getPseudoValue()) continue; if (const FixedStackPseudoSourceValue *Value = - dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) { + dyn_cast<FixedStackPseudoSourceValue>((*o)->getPseudoValue())) { if (Value->getFrameIndex() == FI) return true; } @@ -697,7 +701,7 @@ void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) { WorkList.push_back(HeaderN); do { MachineDomTreeNode *Node = WorkList.pop_back_val(); - assert(Node != 0 && "Null dominator tree node?"); + assert(Node && "Null dominator tree node?"); MachineBasicBlock *BB = Node->getBlock(); // If the header of the loop containing this basic block is a landing pad, @@ -801,7 +805,7 @@ void MachineLICM::InitRegPressure(MachineBasicBlock *BB) { // defs as well. This happens whenever the preheader is created by splitting // the critical edge from the loop predecessor to the loop header. if (BB->pred_size() == 1) { - MachineBasicBlock *TBB = 0, *FBB = 0; + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector<MachineOperand, 4> Cond; if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond, false) && Cond.empty()) InitRegPressure(*BB->pred_begin()); @@ -879,10 +883,9 @@ static bool isLoadFromGOTOrConstantPool(MachineInstr &MI) { assert (MI.mayLoad() && "Expected MI that loads!"); for (MachineInstr::mmo_iterator I = MI.memoperands_begin(), E = MI.memoperands_end(); I != E; ++I) { - if (const Value *V = (*I)->getValue()) { - if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) - if (PSV == PSV->getGOT() || PSV == PSV->getConstantPool()) - return true; + if (const PseudoSourceValue *PSV = (*I)->getPseudoValue()) { + if (PSV == PSV->getGOT() || PSV == PSV->getConstantPool()) + return true; } } return false; @@ -978,25 +981,23 @@ bool MachineLICM::HasLoopPHIUse(const MachineInstr *MI) const { unsigned Reg = MO->getReg(); if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; - for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg), - UE = MRI->use_end(); UI != UE; ++UI) { - MachineInstr *UseMI = &*UI; + for (MachineInstr &UseMI : MRI->use_instructions(Reg)) { // A PHI may cause a copy to be inserted. - if (UseMI->isPHI()) { + if (UseMI.isPHI()) { // A PHI inside the loop causes a copy because the live range of Reg is // extended across the PHI. - if (CurLoop->contains(UseMI)) + if (CurLoop->contains(&UseMI)) return true; // A PHI in an exit block can cause a copy to be inserted if the PHI // has multiple predecessors in the loop with different values. // For now, approximate by rejecting all exit blocks. - if (isExitBlock(UseMI->getParent())) + if (isExitBlock(UseMI.getParent())) return true; continue; } // Look past copies as well. - if (UseMI->isCopy() && CurLoop->contains(UseMI)) - Work.push_back(UseMI); + if (UseMI.isCopy() && CurLoop->contains(&UseMI)) + Work.push_back(&UseMI); } } } while (!Work.empty()); @@ -1011,22 +1012,20 @@ bool MachineLICM::HasHighOperandLatency(MachineInstr &MI, if (!InstrItins || InstrItins->isEmpty() || MRI->use_nodbg_empty(Reg)) return false; - for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg), - E = MRI->use_nodbg_end(); I != E; ++I) { - MachineInstr *UseMI = &*I; - if (UseMI->isCopyLike()) + for (MachineInstr &UseMI : MRI->use_nodbg_instructions(Reg)) { + if (UseMI.isCopyLike()) continue; - if (!CurLoop->contains(UseMI->getParent())) + if (!CurLoop->contains(UseMI.getParent())) continue; - for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = UseMI->getOperand(i); + for (unsigned i = 0, e = UseMI.getNumOperands(); i != e; ++i) { + const MachineOperand &MO = UseMI.getOperand(i); if (!MO.isReg() || !MO.isUse()) continue; unsigned MOReg = MO.getReg(); if (MOReg != Reg) continue; - if (TII->hasHighOperandLatency(InstrItins, MRI, &MI, DefIdx, UseMI, i)) + if (TII->hasHighOperandLatency(InstrItins, MRI, &MI, DefIdx, &UseMI, i)) return true; } @@ -1242,13 +1241,13 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) { // Don't unfold simple loads. if (MI->canFoldAsLoad()) - return 0; + return nullptr; // If not, we may be able to unfold a load and hoist that. // First test whether the instruction is loading from an amenable // memory location. if (!MI->isInvariantLoad(AA)) - return 0; + return nullptr; // Next determine the register class for a temporary register. unsigned LoadRegIndex; @@ -1257,9 +1256,9 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) { /*UnfoldLoad=*/true, /*UnfoldStore=*/false, &LoadRegIndex); - if (NewOpc == 0) return 0; + if (NewOpc == 0) return nullptr; const MCInstrDesc &MID = TII->get(NewOpc); - if (MID.getNumDefs() != 1) return 0; + if (MID.getNumDefs() != 1) return nullptr; MachineFunction &MF = *MI->getParent()->getParent(); const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI, MF); // Ok, we're unfolding. Create a temporary register and do the unfold. @@ -1285,7 +1284,7 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) { if (!IsLoopInvariantInst(*NewMIs[0]) || !IsProfitableToHoist(*NewMIs[0])) { NewMIs[0]->eraseFromParent(); NewMIs[1]->eraseFromParent(); - return 0; + return nullptr; } // Update register pressure for the unfolded instruction. @@ -1317,10 +1316,10 @@ MachineLICM::LookForDuplicate(const MachineInstr *MI, std::vector<const MachineInstr*> &PrevMIs) { for (unsigned i = 0, e = PrevMIs.size(); i != e; ++i) { const MachineInstr *PrevMI = PrevMIs[i]; - if (TII->produceSameValue(MI, PrevMI, (PreRegAlloc ? MRI : 0))) + if (TII->produceSameValue(MI, PrevMI, (PreRegAlloc ? MRI : nullptr))) return PrevMI; } - return 0; + return nullptr; } bool MachineLICM::EliminateCSE(MachineInstr *MI, @@ -1391,7 +1390,7 @@ bool MachineLICM::MayCSE(MachineInstr *MI) { if (CI == CSEMap.end() || MI->isImplicitDef()) return false; - return LookForDuplicate(MI, CI->second) != 0; + return LookForDuplicate(MI, CI->second) != nullptr; } /// Hoist - When an instruction is found to use only loop invariant operands @@ -1467,7 +1466,7 @@ MachineBasicBlock *MachineLICM::getCurPreheader() { // If we've tried to get a preheader and failed, don't try again. if (CurPreheader == reinterpret_cast<MachineBasicBlock *>(-1)) - return 0; + return nullptr; if (!CurPreheader) { CurPreheader = CurLoop->getLoopPreheader(); @@ -1475,13 +1474,13 @@ MachineBasicBlock *MachineLICM::getCurPreheader() { MachineBasicBlock *Pred = CurLoop->getLoopPredecessor(); if (!Pred) { CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1); - return 0; + return nullptr; } CurPreheader = Pred->SplitCriticalEdge(CurLoop->getHeader(), this); if (!CurPreheader) { CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1); - return 0; + return nullptr; } } } diff --git a/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp b/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp index 4e2cfdc..89054d4 100644 --- a/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp +++ b/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp @@ -50,11 +50,11 @@ MachineBasicBlock *MachineLoop::getTopBlock() { MachineBasicBlock *TopMBB = getHeader(); MachineFunction::iterator Begin = TopMBB->getParent()->begin(); if (TopMBB != Begin) { - MachineBasicBlock *PriorMBB = prior(MachineFunction::iterator(TopMBB)); + MachineBasicBlock *PriorMBB = std::prev(MachineFunction::iterator(TopMBB)); while (contains(PriorMBB)) { TopMBB = PriorMBB; if (TopMBB == Begin) break; - PriorMBB = prior(MachineFunction::iterator(TopMBB)); + PriorMBB = std::prev(MachineFunction::iterator(TopMBB)); } } return TopMBB; @@ -63,12 +63,12 @@ MachineBasicBlock *MachineLoop::getTopBlock() { MachineBasicBlock *MachineLoop::getBottomBlock() { MachineBasicBlock *BotMBB = getHeader(); MachineFunction::iterator End = BotMBB->getParent()->end(); - if (BotMBB != prior(End)) { - MachineBasicBlock *NextMBB = llvm::next(MachineFunction::iterator(BotMBB)); + if (BotMBB != std::prev(End)) { + MachineBasicBlock *NextMBB = std::next(MachineFunction::iterator(BotMBB)); while (contains(NextMBB)) { BotMBB = NextMBB; - if (BotMBB == llvm::next(MachineFunction::iterator(BotMBB))) break; - NextMBB = llvm::next(MachineFunction::iterator(BotMBB)); + if (BotMBB == std::next(MachineFunction::iterator(BotMBB))) break; + NextMBB = std::next(MachineFunction::iterator(BotMBB)); } } return BotMBB; diff --git a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp index bb54284..4976e35 100644 --- a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp +++ b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp @@ -36,8 +36,8 @@ namespace llvm { class MMIAddrLabelMapCallbackPtr : CallbackVH { MMIAddrLabelMap *Map; public: - MMIAddrLabelMapCallbackPtr() : Map(0) {} - MMIAddrLabelMapCallbackPtr(Value *V) : CallbackVH(V), Map(0) {} + MMIAddrLabelMapCallbackPtr() : Map(nullptr) {} + MMIAddrLabelMapCallbackPtr(Value *V) : CallbackVH(V), Map(nullptr) {} void setPtr(BasicBlock *BB) { ValueHandleBase::operator=(BB); @@ -45,8 +45,8 @@ public: void setMap(MMIAddrLabelMap *map) { Map = map; } - virtual void deleted(); - virtual void allUsesReplacedWith(Value *V2); + void deleted() override; + void allUsesReplacedWith(Value *V2) override; }; class MMIAddrLabelMap { @@ -163,9 +163,9 @@ void MMIAddrLabelMap::UpdateForDeletedBlock(BasicBlock *BB) { AddrLabelSymEntry Entry = AddrLabelSymbols[BB]; AddrLabelSymbols.erase(BB); assert(!Entry.Symbols.isNull() && "Didn't have a symbol, why a callback?"); - BBCallbacks[Entry.Index] = 0; // Clear the callback. + BBCallbacks[Entry.Index] = nullptr; // Clear the callback. - assert((BB->getParent() == 0 || BB->getParent() == Entry.Fn) && + assert((BB->getParent() == nullptr || BB->getParent() == Entry.Fn) && "Block/parent mismatch"); // Handle both the single and the multiple symbols cases. @@ -213,7 +213,7 @@ void MMIAddrLabelMap::UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New) { return; } - BBCallbacks[OldEntry.Index] = 0; // Update the callback. + BBCallbacks[OldEntry.Index] = nullptr; // Update the callback. // Otherwise, we need to add the old symbol to the new block's set. If it is // just a single entry, upgrade it to a symbol list. @@ -253,12 +253,12 @@ void MMIAddrLabelMapCallbackPtr::allUsesReplacedWith(Value *V2) { MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI, const MCRegisterInfo &MRI, const MCObjectFileInfo *MOFI) - : ImmutablePass(ID), Context(&MAI, &MRI, MOFI, 0, false) { + : ImmutablePass(ID), Context(&MAI, &MRI, MOFI, nullptr, false) { initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry()); } MachineModuleInfo::MachineModuleInfo() - : ImmutablePass(ID), Context(0, 0, 0) { + : ImmutablePass(ID), Context(nullptr, nullptr, nullptr) { llvm_unreachable("This MachineModuleInfo constructor should never be called, " "MMI should always be explicitly constructed by " "LLVMTargetMachine"); @@ -269,16 +269,16 @@ MachineModuleInfo::~MachineModuleInfo() { bool MachineModuleInfo::doInitialization(Module &M) { - ObjFileMMI = 0; + ObjFileMMI = nullptr; CompactUnwindEncoding = 0; CurCallSite = 0; CallsEHReturn = 0; CallsUnwindInit = 0; DbgInfoAvailable = UsesVAFloatArgument = false; // Always emit some info, by default "no personality" info. - Personalities.push_back(NULL); - AddrLabelSymbols = 0; - TheModule = 0; + Personalities.push_back(nullptr); + AddrLabelSymbols = nullptr; + TheModule = nullptr; return false; } @@ -288,12 +288,12 @@ bool MachineModuleInfo::doFinalization(Module &M) { Personalities.clear(); delete AddrLabelSymbols; - AddrLabelSymbols = 0; + AddrLabelSymbols = nullptr; Context.reset(); delete ObjFileMMI; - ObjFileMMI = 0; + ObjFileMMI = nullptr; return false; } @@ -313,7 +313,7 @@ void MachineModuleInfo::EndFunction() { CallsEHReturn = 0; CallsUnwindInit = 0; CompactUnwindEncoding = 0; - VariableDbgInfo.clear(); + VariableDbgInfos.clear(); } /// AnalyzeModule - Scan the module for global debug information. @@ -341,7 +341,7 @@ void MachineModuleInfo::AnalyzeModule(const Module &M) { /// because the block may be accessed outside its containing function. MCSymbol *MachineModuleInfo::getAddrLabelSymbol(const BasicBlock *BB) { // Lazily create AddrLabelSymbols. - if (AddrLabelSymbols == 0) + if (!AddrLabelSymbols) AddrLabelSymbols = new MMIAddrLabelMap(Context); return AddrLabelSymbols->getAddrLabelSymbol(const_cast<BasicBlock*>(BB)); } @@ -352,7 +352,7 @@ MCSymbol *MachineModuleInfo::getAddrLabelSymbol(const BasicBlock *BB) { std::vector<MCSymbol*> MachineModuleInfo:: getAddrLabelSymbolToEmit(const BasicBlock *BB) { // Lazily create AddrLabelSymbols. - if (AddrLabelSymbols == 0) + if (!AddrLabelSymbols) AddrLabelSymbols = new MMIAddrLabelMap(Context); return AddrLabelSymbols->getAddrLabelSymbolToEmit(const_cast<BasicBlock*>(BB)); } @@ -366,7 +366,7 @@ void MachineModuleInfo:: takeDeletedSymbolsForFunction(const Function *F, std::vector<MCSymbol*> &Result) { // If no blocks have had their addresses taken, we're done. - if (AddrLabelSymbols == 0) return; + if (!AddrLabelSymbols) return; return AddrLabelSymbols-> takeDeletedSymbolsForFunction(const_cast<Function*>(F), Result); } @@ -419,7 +419,7 @@ void MachineModuleInfo::addPersonality(MachineBasicBlock *LandingPad, // If this is the first personality we're adding go // ahead and add it at the beginning. - if (Personalities[0] == NULL) + if (!Personalities[0]) Personalities[0] = Personality; else Personalities.push_back(Personality); @@ -462,7 +462,7 @@ void MachineModuleInfo::TidyLandingPads(DenseMap<MCSymbol*, uintptr_t> *LPMap) { if (LandingPad.LandingPadLabel && !LandingPad.LandingPadLabel->isDefined() && (!LPMap || (*LPMap)[LandingPad.LandingPadLabel] == 0)) - LandingPad.LandingPadLabel = 0; + LandingPad.LandingPadLabel = nullptr; // Special case: we *should* emit LPs with null LP MBB. This indicates // "nounwind" case. @@ -550,13 +550,13 @@ try_next:; const Function *MachineModuleInfo::getPersonality() const { // FIXME: Until PR1414 will be fixed, we're using 1 personality function per // function - return !LandingPads.empty() ? LandingPads[0].Personality : NULL; + return !LandingPads.empty() ? LandingPads[0].Personality : nullptr; } /// getPersonalityIndex - Return unique index for current personality /// function. NULL/first personality function should always get zero index. unsigned MachineModuleInfo::getPersonalityIndex() const { - const Function* Personality = NULL; + const Function* Personality = nullptr; // Scan landing pads. If there is at least one non-NULL personality - use it. for (unsigned i = 0, e = LandingPads.size(); i != e; ++i) diff --git a/contrib/llvm/lib/CodeGen/MachinePassRegistry.cpp b/contrib/llvm/lib/CodeGen/MachinePassRegistry.cpp index cb204fd..3ee3e40 100644 --- a/contrib/llvm/lib/CodeGen/MachinePassRegistry.cpp +++ b/contrib/llvm/lib/CodeGen/MachinePassRegistry.cpp @@ -20,7 +20,7 @@ void MachinePassRegistryListener::anchor() { } /// setDefault - Set the default constructor by name. void MachinePassRegistry::setDefault(StringRef Name) { - MachinePassCtor Ctor = 0; + MachinePassCtor Ctor = nullptr; for(MachinePassRegistryNode *R = getList(); R; R = R->getNext()) { if (R->getName() == Name) { Ctor = R->getCtor(); diff --git a/contrib/llvm/lib/CodeGen/MachineRegionInfo.cpp b/contrib/llvm/lib/CodeGen/MachineRegionInfo.cpp new file mode 100644 index 0000000..c6b6802 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineRegionInfo.cpp @@ -0,0 +1,138 @@ + +#include "llvm/CodeGen/MachineRegionInfo.h" +#include "llvm/CodeGen/MachinePostDominators.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/RegionInfoImpl.h" + +using namespace llvm; + +STATISTIC(numMachineRegions, "The # of machine regions"); +STATISTIC(numMachineSimpleRegions, "The # of simple machine regions"); + +namespace llvm { +template class RegionBase<RegionTraits<MachineFunction>>; +template class RegionNodeBase<RegionTraits<MachineFunction>>; +template class RegionInfoBase<RegionTraits<MachineFunction>>; +} + +//===----------------------------------------------------------------------===// +// MachineRegion implementation +// + +MachineRegion::MachineRegion(MachineBasicBlock *Entry, MachineBasicBlock *Exit, + MachineRegionInfo* RI, + MachineDominatorTree *DT, MachineRegion *Parent) : + RegionBase<RegionTraits<MachineFunction>>(Entry, Exit, RI, DT, Parent) { + +} + +MachineRegion::~MachineRegion() { } + +//===----------------------------------------------------------------------===// +// MachineRegionInfo implementation +// + +MachineRegionInfo::MachineRegionInfo() : + RegionInfoBase<RegionTraits<MachineFunction>>() { + +} + +MachineRegionInfo::~MachineRegionInfo() { + +} + +void MachineRegionInfo::updateStatistics(MachineRegion *R) { + ++numMachineRegions; + + // TODO: Slow. Should only be enabled if -stats is used. + if (R->isSimple()) + ++numMachineSimpleRegions; +} + +void MachineRegionInfo::recalculate(MachineFunction &F, + MachineDominatorTree *DT_, + MachinePostDominatorTree *PDT_, + MachineDominanceFrontier *DF_) { + DT = DT_; + PDT = PDT_; + DF = DF_; + + MachineBasicBlock *Entry = GraphTraits<MachineFunction*>::getEntryNode(&F); + + TopLevelRegion = new MachineRegion(Entry, nullptr, this, DT, nullptr); + updateStatistics(TopLevelRegion); + calculate(F); +} + +//===----------------------------------------------------------------------===// +// MachineRegionInfoPass implementation +// + +MachineRegionInfoPass::MachineRegionInfoPass() : MachineFunctionPass(ID) { + initializeMachineRegionInfoPassPass(*PassRegistry::getPassRegistry()); +} + +MachineRegionInfoPass::~MachineRegionInfoPass() { + +} + +bool MachineRegionInfoPass::runOnMachineFunction(MachineFunction &F) { + releaseMemory(); + + auto DT = &getAnalysis<MachineDominatorTree>(); + auto PDT = &getAnalysis<MachinePostDominatorTree>(); + auto DF = &getAnalysis<MachineDominanceFrontier>(); + + RI.recalculate(F, DT, PDT, DF); + return false; +} + +void MachineRegionInfoPass::releaseMemory() { + RI.releaseMemory(); +} + +void MachineRegionInfoPass::verifyAnalysis() const { + // Only do verification when user wants to, otherwise this expensive check + // will be invoked by PMDataManager::verifyPreservedAnalysis when + // a regionpass (marked PreservedAll) finish. + if (MachineRegionInfo::VerifyRegionInfo) + RI.verifyAnalysis(); +} + +void MachineRegionInfoPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequiredTransitive<DominatorTreeWrapperPass>(); + AU.addRequired<PostDominatorTree>(); + AU.addRequired<DominanceFrontier>(); +} + +void MachineRegionInfoPass::print(raw_ostream &OS, const Module *) const { + RI.print(OS); +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void MachineRegionInfoPass::dump() const { + RI.dump(); +} +#endif + +char MachineRegionInfoPass::ID = 0; + +INITIALIZE_PASS_BEGIN(MachineRegionInfoPass, "regions", + "Detect single entry single exit regions", true, true) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineDominanceFrontier) +INITIALIZE_PASS_END(MachineRegionInfoPass, "regions", + "Detect single entry single exit regions", true, true) + +// Create methods available outside of this file, to use them +// "include/llvm/LinkAllPasses.h". Otherwise the pass would be deleted by +// the link time optimization. + +namespace llvm { + FunctionPass *createMachineRegionInfoPass() { + return new MachineRegionInfoPass(); + } +} + diff --git a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp index f8b8796..f560259 100644 --- a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp +++ b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp @@ -13,9 +13,9 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Support/raw_os_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Support/raw_os_ostream.h" using namespace llvm; @@ -23,7 +23,7 @@ using namespace llvm; void MachineRegisterInfo::Delegate::anchor() {} MachineRegisterInfo::MachineRegisterInfo(const TargetMachine &TM) - : TM(TM), TheDelegate(0), IsSSA(true), TracksLiveness(true) { + : TM(TM), TheDelegate(nullptr), IsSSA(true), TracksLiveness(true) { VRegInfo.reserve(256); RegAllocHints.reserve(256); UsedRegUnits.resize(getTargetRegisterInfo()->getNumRegUnits()); @@ -60,7 +60,7 @@ MachineRegisterInfo::constrainRegClass(unsigned Reg, if (!NewRC || NewRC == OldRC) return NewRC; if (NewRC->getNumRegs() < MinNumRegs) - return 0; + return nullptr; setRegClass(Reg, NewRC); return NewRC; } @@ -77,19 +77,12 @@ MachineRegisterInfo::recomputeRegClass(unsigned Reg, const TargetMachine &TM) { return false; // Accumulate constraints from all uses. - for (reg_nodbg_iterator I = reg_nodbg_begin(Reg), E = reg_nodbg_end(); I != E; - ++I) { - const TargetRegisterClass *OpRC = - I->getRegClassConstraint(I.getOperandNo(), TII, - getTargetRegisterInfo()); - if (unsigned SubIdx = I.getOperand().getSubReg()) { - if (OpRC) - NewRC = getTargetRegisterInfo()->getMatchingSuperRegClass(NewRC, OpRC, - SubIdx); - else - NewRC = getTargetRegisterInfo()->getSubClassWithSubReg(NewRC, SubIdx); - } else if (OpRC) - NewRC = getTargetRegisterInfo()->getCommonSubClass(NewRC, OpRC); + for (MachineOperand &MO : reg_nodbg_operands(Reg)) { + // Apply the effect of the given operand to NewRC. + MachineInstr *MI = MO.getParent(); + unsigned OpNo = &MO - &MI->getOperand(0); + NewRC = MI->getRegClassConstraintEffect(OpNo, NewRC, TII, + getTargetRegisterInfo()); if (!NewRC || NewRC == OldRC) return false; } @@ -133,8 +126,8 @@ void MachineRegisterInfo::clearVirtRegs() { void MachineRegisterInfo::verifyUseList(unsigned Reg) const { #ifndef NDEBUG bool Valid = true; - for (reg_iterator I = reg_begin(Reg), E = reg_end(); I != E; ++I) { - MachineOperand *MO = &I.getOperand(); + for (MachineOperand &M : reg_operands(Reg)) { + MachineOperand *MO = &M; MachineInstr *MI = MO->getParent(); if (!MI) { errs() << PrintReg(Reg, getTargetRegisterInfo()) @@ -189,7 +182,7 @@ void MachineRegisterInfo::addRegOperandToUseList(MachineOperand *MO) { // Head is NULL for an empty list. if (!Head) { MO->Contents.Reg.Prev = MO; - MO->Contents.Reg.Next = 0; + MO->Contents.Reg.Next = nullptr; HeadRef = MO; return; } @@ -210,7 +203,7 @@ void MachineRegisterInfo::addRegOperandToUseList(MachineOperand *MO) { HeadRef = MO; } else { // Insert use at the end. - MO->Contents.Reg.Next = 0; + MO->Contents.Reg.Next = nullptr; Last->Contents.Reg.Next = MO; } } @@ -234,8 +227,8 @@ void MachineRegisterInfo::removeRegOperandFromUseList(MachineOperand *MO) { (Next ? Next : Head)->Contents.Reg.Prev = Prev; - MO->Contents.Reg.Prev = 0; - MO->Contents.Reg.Next = 0; + MO->Contents.Reg.Prev = nullptr; + MO->Contents.Reg.Next = nullptr; } /// Move NumOps operands from Src to Dst, updating use-def lists as needed. @@ -295,7 +288,7 @@ void MachineRegisterInfo::replaceRegWith(unsigned FromReg, unsigned ToReg) { // TODO: This could be more efficient by bulk changing the operands. for (reg_iterator I = reg_begin(FromReg), E = reg_end(); I != E; ) { - MachineOperand &O = I.getOperand(); + MachineOperand &O = *I; ++I; O.setReg(ToReg); } @@ -307,20 +300,20 @@ void MachineRegisterInfo::replaceRegWith(unsigned FromReg, unsigned ToReg) { /// form, so there should only be one definition. MachineInstr *MachineRegisterInfo::getVRegDef(unsigned Reg) const { // Since we are in SSA form, we can use the first definition. - def_iterator I = def_begin(Reg); - assert((I.atEnd() || llvm::next(I) == def_end()) && + def_instr_iterator I = def_instr_begin(Reg); + assert((I.atEnd() || std::next(I) == def_instr_end()) && "getVRegDef assumes a single definition or no definition"); - return !I.atEnd() ? &*I : 0; + return !I.atEnd() ? &*I : nullptr; } /// getUniqueVRegDef - Return the unique machine instr that defines the /// specified virtual register or null if none is found. If there are /// multiple definitions or no definition, return null. MachineInstr *MachineRegisterInfo::getUniqueVRegDef(unsigned Reg) const { - if (def_empty(Reg)) return 0; - def_iterator I = def_begin(Reg); - if (llvm::next(I) != def_end()) - return 0; + if (def_empty(Reg)) return nullptr; + def_instr_iterator I = def_instr_begin(Reg); + if (std::next(I) != def_instr_end()) + return nullptr; return &*I; } @@ -336,8 +329,8 @@ bool MachineRegisterInfo::hasOneNonDBGUse(unsigned RegNo) const { /// optimization passes which extend register lifetimes and need only /// preserve conservative kill flag information. void MachineRegisterInfo::clearKillFlags(unsigned Reg) const { - for (use_iterator UI = use_begin(Reg), UE = use_end(); UI != UE; ++UI) - UI.getOperand().setIsKill(false); + for (MachineOperand &MO : use_operands(Reg)) + MO.setIsKill(false); } bool MachineRegisterInfo::isLiveIn(unsigned Reg) const { @@ -399,8 +392,8 @@ MachineRegisterInfo::EmitLiveInCopies(MachineBasicBlock *EntryMBB, #ifndef NDEBUG void MachineRegisterInfo::dumpUses(unsigned Reg) const { - for (use_iterator I = use_begin(Reg), E = use_end(); I != E; ++I) - I.getOperand().getParent()->dump(); + for (MachineInstr &I : use_instructions(Reg)) + I.dump(); } #endif @@ -422,3 +415,18 @@ bool MachineRegisterInfo::isConstantPhysReg(unsigned PhysReg, return false; return true; } + +/// markUsesInDebugValueAsUndef - Mark every DBG_VALUE referencing the +/// specified register as undefined which causes the DBG_VALUE to be +/// deleted during LiveDebugVariables analysis. +void MachineRegisterInfo::markUsesInDebugValueAsUndef(unsigned Reg) const { + // Mark any DBG_VALUE that uses Reg as undef (but don't delete it.) + MachineRegisterInfo::use_instr_iterator nextI; + for (use_instr_iterator I = use_instr_begin(Reg), E = use_instr_end(); + I != E; I = nextI) { + nextI = std::next(I); // I is invalidated by the setReg + MachineInstr *UseMI = &*I; + if (UseMI->isDebugValue()) + UseMI->getOperand(0).setReg(0U); + } +} diff --git a/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp b/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp index 17f0af8..d9173a2 100644 --- a/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp +++ b/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp @@ -29,6 +29,8 @@ #include "llvm/Transforms/Utils/SSAUpdaterImpl.h" using namespace llvm; +#define DEBUG_TYPE "machine-ssaupdater" + typedef DenseMap<MachineBasicBlock*, unsigned> AvailableValsTy; static AvailableValsTy &getAvailableVals(void *AV) { return *static_cast<AvailableValsTy*>(AV); @@ -36,7 +38,7 @@ static AvailableValsTy &getAvailableVals(void *AV) { MachineSSAUpdater::MachineSSAUpdater(MachineFunction &MF, SmallVectorImpl<MachineInstr*> *NewPHI) - : AV(0), InsertedPHIs(NewPHI) { + : AV(nullptr), InsertedPHIs(NewPHI) { TII = MF.getTarget().getInstrInfo(); MRI = &MF.getRegInfo(); } @@ -48,7 +50,7 @@ MachineSSAUpdater::~MachineSSAUpdater() { /// Initialize - Reset this object to get ready for a new set of SSA /// updates. ProtoValue is the value used to name PHI nodes. void MachineSSAUpdater::Initialize(unsigned V) { - if (AV == 0) + if (!AV) AV = new AvailableValsTy(); else getAvailableVals(AV).clear(); @@ -230,16 +232,6 @@ void MachineSSAUpdater::RewriteUse(MachineOperand &U) { U.setReg(NewVR); } -void MachineSSAUpdater::ReplaceRegWith(unsigned OldReg, unsigned NewReg) { - MRI->replaceRegWith(OldReg, NewReg); - - AvailableValsTy &AvailableVals = getAvailableVals(AV); - for (DenseMap<MachineBasicBlock*, unsigned>::iterator - I = AvailableVals.begin(), E = AvailableVals.end(); I != E; ++I) - if (I->second == OldReg) - I->second = NewReg; -} - /// SSAUpdaterTraits<MachineSSAUpdater> - Traits for the SSAUpdaterImpl /// template, specialized for MachineSSAUpdater. namespace llvm { @@ -323,7 +315,7 @@ public: static MachineInstr *InstrIsPHI(MachineInstr *I) { if (I && I->isPHI()) return I; - return 0; + return nullptr; } /// ValueIsPHI - Check if the instruction that defines the specified register @@ -338,7 +330,7 @@ public: MachineInstr *PHI = ValueIsPHI(Val, Updater); if (PHI && PHI->getNumOperands() <= 1) return PHI; - return 0; + return nullptr; } /// GetPHIValue - For the specified PHI instruction, return the register diff --git a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp index e71c4df..44191f7 100644 --- a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp @@ -12,10 +12,7 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "misched" - #include "llvm/CodeGen/MachineScheduler.h" -#include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/PriorityQueue.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" @@ -36,6 +33,8 @@ using namespace llvm; +#define DEBUG_TYPE "misched" + namespace llvm { cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden, cl::desc("Force top-down list scheduling")); @@ -49,6 +48,11 @@ static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden, static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden, cl::desc("Stop scheduling after N instructions"), cl::init(~0U)); + +static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden, + cl::desc("Only schedule this function")); +static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden, + cl::desc("Only schedule this MBB#")); #else static bool ViewMISchedDAGs = false; #endif // NDEBUG @@ -81,7 +85,7 @@ void ScheduleDAGMutation::anchor() {} //===----------------------------------------------------------------------===// MachineSchedContext::MachineSchedContext(): - MF(0), MLI(0), MDT(0), PassConfig(0), AA(0), LIS(0) { + MF(nullptr), MLI(nullptr), MDT(nullptr), PassConfig(nullptr), AA(nullptr), LIS(nullptr) { RegClassInfo = new RegisterClassInfo(); } @@ -90,24 +94,46 @@ MachineSchedContext::~MachineSchedContext() { } namespace { +/// Base class for a machine scheduler class that can run at any point. +class MachineSchedulerBase : public MachineSchedContext, + public MachineFunctionPass { +public: + MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {} + + void print(raw_ostream &O, const Module* = nullptr) const override; + +protected: + void scheduleRegions(ScheduleDAGInstrs &Scheduler); +}; + /// MachineScheduler runs after coalescing and before register allocation. -class MachineScheduler : public MachineSchedContext, - public MachineFunctionPass { +class MachineScheduler : public MachineSchedulerBase { public: MachineScheduler(); - virtual void getAnalysisUsage(AnalysisUsage &AU) const; + void getAnalysisUsage(AnalysisUsage &AU) const override; + + bool runOnMachineFunction(MachineFunction&) override; + + static char ID; // Class identification, replacement for typeinfo + +protected: + ScheduleDAGInstrs *createMachineScheduler(); +}; - virtual void releaseMemory() {} +/// PostMachineScheduler runs after shortly before code emission. +class PostMachineScheduler : public MachineSchedulerBase { +public: + PostMachineScheduler(); - virtual bool runOnMachineFunction(MachineFunction&); + void getAnalysisUsage(AnalysisUsage &AU) const override; - virtual void print(raw_ostream &O, const Module* = 0) const; + bool runOnMachineFunction(MachineFunction&) override; static char ID; // Class identification, replacement for typeinfo protected: - ScheduleDAGInstrs *createMachineScheduler(); + ScheduleDAGInstrs *createPostMachineScheduler(); }; } // namespace @@ -124,7 +150,7 @@ INITIALIZE_PASS_END(MachineScheduler, "misched", "Machine Instruction Scheduler", false, false) MachineScheduler::MachineScheduler() -: MachineFunctionPass(ID) { +: MachineSchedulerBase(ID) { initializeMachineSchedulerPass(*PassRegistry::getPassRegistry()); } @@ -141,12 +167,32 @@ void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const { MachineFunctionPass::getAnalysisUsage(AU); } +char PostMachineScheduler::ID = 0; + +char &llvm::PostMachineSchedulerID = PostMachineScheduler::ID; + +INITIALIZE_PASS(PostMachineScheduler, "postmisched", + "PostRA Machine Instruction Scheduler", false, false) + +PostMachineScheduler::PostMachineScheduler() +: MachineSchedulerBase(ID) { + initializePostMachineSchedulerPass(*PassRegistry::getPassRegistry()); +} + +void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequiredID(MachineDominatorsID); + AU.addRequired<MachineLoopInfo>(); + AU.addRequired<TargetPassConfig>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + MachinePassRegistry MachineSchedRegistry::Registry; /// A dummy default scheduler factory indicates whether the scheduler /// is overridden on the command line. static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) { - return 0; + return nullptr; } /// MachineSchedOpt allows command line selection of the scheduler. @@ -162,8 +208,8 @@ DefaultSchedRegistry("default", "Use the target's default scheduler choice.", /// Forward declare the standard machine scheduler. This will be used as the /// default scheduler if the target does not set a default. -static ScheduleDAGInstrs *createGenericSched(MachineSchedContext *C); - +static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C); +static ScheduleDAGInstrs *createGenericSchedPostRA(MachineSchedContext *C); /// Decrement this iterator until reaching the top or a non-debug instr. static MachineBasicBlock::const_iterator @@ -222,7 +268,20 @@ ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() { return Scheduler; // Default to GenericScheduler. - return createGenericSched(this); + return createGenericSchedLive(this); +} + +/// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by +/// the caller. We don't have a command line option to override the postRA +/// scheduler. The Target must configure it. +ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() { + // Get the postRA scheduler set by the target for this function. + ScheduleDAGInstrs *Scheduler = PassConfig->createPostMachineScheduler(this); + if (Scheduler) + return Scheduler; + + // Default to GenericScheduler. + return createGenericSchedPostRA(this); } /// Top-level MachineScheduler pass driver. @@ -252,7 +311,6 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { AA = &getAnalysis<AliasAnalysis>(); LIS = &getAnalysis<LiveIntervals>(); - const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); if (VerifyScheduling) { DEBUG(LIS->dump()); @@ -262,7 +320,66 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { // Instantiate the selected scheduler for this target, function, and // optimization level. - OwningPtr<ScheduleDAGInstrs> Scheduler(createMachineScheduler()); + std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler()); + scheduleRegions(*Scheduler); + + DEBUG(LIS->dump()); + if (VerifyScheduling) + MF->verify(this, "After machine scheduling."); + return true; +} + +bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) { + if (skipOptnoneFunction(*mf.getFunction())) + return false; + + const TargetSubtargetInfo &ST = + mf.getTarget().getSubtarget<TargetSubtargetInfo>(); + if (!ST.enablePostMachineScheduler()) { + DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n"); + return false; + } + DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs())); + + // Initialize the context of the pass. + MF = &mf; + PassConfig = &getAnalysis<TargetPassConfig>(); + + if (VerifyScheduling) + MF->verify(this, "Before post machine scheduling."); + + // Instantiate the selected scheduler for this target, function, and + // optimization level. + std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler()); + scheduleRegions(*Scheduler); + + if (VerifyScheduling) + MF->verify(this, "After post machine scheduling."); + return true; +} + +/// Return true of the given instruction should not be included in a scheduling +/// region. +/// +/// MachineScheduler does not currently support scheduling across calls. To +/// handle calls, the DAG builder needs to be modified to create register +/// anti/output dependencies on the registers clobbered by the call's regmask +/// operand. In PreRA scheduling, the stack pointer adjustment already prevents +/// scheduling across calls. In PostRA scheduling, we need the isCall to enforce +/// the boundary, but there would be no benefit to postRA scheduling across +/// calls this late anyway. +static bool isSchedBoundary(MachineBasicBlock::iterator MI, + MachineBasicBlock *MBB, + MachineFunction *MF, + const TargetInstrInfo *TII, + bool IsPostRA) { + return MI->isCall() || TII->isSchedulingBoundary(MI, MBB, *MF); +} + +/// Main driver for both MachineScheduler and PostMachineScheduler. +void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) { + const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); + bool IsPostRA = Scheduler.isPostRA(); // Visit all machine basic blocks. // @@ -271,7 +388,15 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end(); MBB != MBBEnd; ++MBB) { - Scheduler->startBlock(MBB); + Scheduler.startBlock(MBB); + +#ifndef NDEBUG + if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName()) + continue; + if (SchedOnlyBlock.getNumOccurrences() + && (int)SchedOnlyBlock != MBB->getNumber()) + continue; +#endif // Break the block into scheduling regions [I, RegionEnd), and schedule each // region as soon as it is discovered. RegionEnd points the scheduling @@ -283,13 +408,16 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { // The Scheduler may insert instructions during either schedule() or // exitRegion(), even for empty regions. So the local iterators 'I' and // 'RegionEnd' are invalid across these calls. - unsigned RemainingInstrs = MBB->size(); + // + // MBB::size() uses instr_iterator to count. Here we need a bundle to count + // as a single instruction. + unsigned RemainingInstrs = std::distance(MBB->begin(), MBB->end()); for(MachineBasicBlock::iterator RegionEnd = MBB->end(); - RegionEnd != MBB->begin(); RegionEnd = Scheduler->begin()) { + RegionEnd != MBB->begin(); RegionEnd = Scheduler.begin()) { // Avoid decrementing RegionEnd for blocks with no terminator. - if (RegionEnd != MBB->end() - || TII->isSchedulingBoundary(llvm::prior(RegionEnd), MBB, *MF)) { + if (RegionEnd != MBB->end() || + isSchedBoundary(std::prev(RegionEnd), MBB, MF, TII, IsPostRA)) { --RegionEnd; // Count the boundary instruction. --RemainingInstrs; @@ -300,21 +428,22 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { unsigned NumRegionInstrs = 0; MachineBasicBlock::iterator I = RegionEnd; for(;I != MBB->begin(); --I, --RemainingInstrs, ++NumRegionInstrs) { - if (TII->isSchedulingBoundary(llvm::prior(I), MBB, *MF)) + if (isSchedBoundary(std::prev(I), MBB, MF, TII, IsPostRA)) break; } // Notify the scheduler of the region, even if we may skip scheduling // it. Perhaps it still needs to be bundled. - Scheduler->enterRegion(MBB, I, RegionEnd, NumRegionInstrs); + Scheduler.enterRegion(MBB, I, RegionEnd, NumRegionInstrs); // Skip empty scheduling regions (0 or 1 schedulable instructions). - if (I == RegionEnd || I == llvm::prior(RegionEnd)) { + if (I == RegionEnd || I == std::prev(RegionEnd)) { // Close the current region. Bundle the terminator if needed. // This invalidates 'RegionEnd' and 'I'. - Scheduler->exitRegion(); + Scheduler.exitRegion(); continue; } - DEBUG(dbgs() << "********** MI Scheduling **********\n"); + DEBUG(dbgs() << "********** " << ((Scheduler.isPostRA()) ? "PostRA " : "") + << "MI Scheduling **********\n"); DEBUG(dbgs() << MF->getName() << ":BB#" << MBB->getNumber() << " " << MBB->getName() << "\n From: " << *I << " To: "; @@ -325,47 +454,46 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { // Schedule a region: possibly reorder instructions. // This invalidates 'RegionEnd' and 'I'. - Scheduler->schedule(); + Scheduler.schedule(); // Close the current region. - Scheduler->exitRegion(); + Scheduler.exitRegion(); // Scheduling has invalidated the current iterator 'I'. Ask the // scheduler for the top of it's scheduled region. - RegionEnd = Scheduler->begin(); + RegionEnd = Scheduler.begin(); } assert(RemainingInstrs == 0 && "Instruction count mismatch!"); - Scheduler->finishBlock(); + Scheduler.finishBlock(); + if (Scheduler.isPostRA()) { + // FIXME: Ideally, no further passes should rely on kill flags. However, + // thumb2 size reduction is currently an exception. + Scheduler.fixupKills(MBB); + } } - Scheduler->finalizeSchedule(); - DEBUG(LIS->dump()); - if (VerifyScheduling) - MF->verify(this, "After machine scheduling."); - return true; + Scheduler.finalizeSchedule(); } -void MachineScheduler::print(raw_ostream &O, const Module* m) const { +void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const { // unimplemented } -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void ReadyQueue::dump() { dbgs() << Name << ": "; for (unsigned i = 0, e = Queue.size(); i < e; ++i) dbgs() << Queue[i]->NodeNum << " "; dbgs() << "\n"; } -#endif //===----------------------------------------------------------------------===// -// ScheduleDAGMI - Base class for MachineInstr scheduling with LiveIntervals -// preservation. -//===----------------------------------------------------------------------===// +// ScheduleDAGMI - Basic machine instruction scheduling. This is +// independent of PreRA/PostRA scheduling and involves no extra book-keeping for +// virtual registers. +// ===----------------------------------------------------------------------===/ +// Provide a vtable anchor. ScheduleDAGMI::~ScheduleDAGMI() { - delete DFSResult; - DeleteContainerPointers(Mutations); - delete SchedImpl; } bool ScheduleDAGMI::canAddEdge(SUnit *SuccSU, SUnit *PredSU) { @@ -403,9 +531,14 @@ void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) { dbgs() << "*** Scheduling failed! ***\n"; SuccSU->dump(this); dbgs() << " has been released too many times!\n"; - llvm_unreachable(0); + llvm_unreachable(nullptr); } #endif + // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However, + // CurrCycle may have advanced since then. + if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency()) + SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency(); + --SuccSU->NumPredsLeft; if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) SchedImpl->releaseTopNode(SuccSU); @@ -437,9 +570,14 @@ void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) { dbgs() << "*** Scheduling failed! ***\n"; PredSU->dump(this); dbgs() << " has been released too many times!\n"; - llvm_unreachable(0); + llvm_unreachable(nullptr); } #endif + // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However, + // CurrCycle may have advanced since then. + if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency()) + PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency(); + --PredSU->NumSuccsLeft; if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) SchedImpl->releaseBottomNode(PredSU); @@ -453,10 +591,24 @@ void ScheduleDAGMI::releasePredecessors(SUnit *SU) { } } +/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after +/// crossing a scheduling boundary. [begin, end) includes all instructions in +/// the region, including the boundary itself and single-instruction regions +/// that don't get scheduled. +void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb, + MachineBasicBlock::iterator begin, + MachineBasicBlock::iterator end, + unsigned regioninstrs) +{ + ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs); + + SchedImpl->initPolicy(begin, end, regioninstrs); +} + /// This is normally called from the main scheduler loop but may also be invoked /// by the scheduling strategy to perform additional code motion. -void ScheduleDAGMI::moveInstruction(MachineInstr *MI, - MachineBasicBlock::iterator InsertPos) { +void ScheduleDAGMI::moveInstruction( + MachineInstr *MI, MachineBasicBlock::iterator InsertPos) { // Advance RegionBegin if the first instruction moves down. if (&*RegionBegin == MI) ++RegionBegin; @@ -465,7 +617,8 @@ void ScheduleDAGMI::moveInstruction(MachineInstr *MI, BB->splice(InsertPos, BB, MI); // Update LiveIntervals - LIS->handleMove(MI, /*UpdateFlags=*/true); + if (LIS) + LIS->handleMove(MI, /*UpdateFlags=*/true); // Recede RegionBegin if an instruction moves above the first. if (RegionBegin == InsertPos) @@ -483,31 +636,215 @@ bool ScheduleDAGMI::checkSchedLimit() { return true; } +/// Per-region scheduling driver, called back from +/// MachineScheduler::runOnMachineFunction. This is a simplified driver that +/// does not consider liveness or register pressure. It is useful for PostRA +/// scheduling and potentially other custom schedulers. +void ScheduleDAGMI::schedule() { + // Build the DAG. + buildSchedGraph(AA); + + Topo.InitDAGTopologicalSorting(); + + postprocessDAG(); + + SmallVector<SUnit*, 8> TopRoots, BotRoots; + findRootsAndBiasEdges(TopRoots, BotRoots); + + // Initialize the strategy before modifying the DAG. + // This may initialize a DFSResult to be used for queue priority. + SchedImpl->initialize(this); + + DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) + SUnits[su].dumpAll(this)); + if (ViewMISchedDAGs) viewGraph(); + + // Initialize ready queues now that the DAG and priority data are finalized. + initQueues(TopRoots, BotRoots); + + bool IsTopNode = false; + while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) { + assert(!SU->isScheduled && "Node already scheduled"); + if (!checkSchedLimit()) + break; + + MachineInstr *MI = SU->getInstr(); + if (IsTopNode) { + assert(SU->isTopReady() && "node still has unscheduled dependencies"); + if (&*CurrentTop == MI) + CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom); + else + moveInstruction(MI, CurrentTop); + } + else { + assert(SU->isBottomReady() && "node still has unscheduled dependencies"); + MachineBasicBlock::iterator priorII = + priorNonDebug(CurrentBottom, CurrentTop); + if (&*priorII == MI) + CurrentBottom = priorII; + else { + if (&*CurrentTop == MI) + CurrentTop = nextIfDebug(++CurrentTop, priorII); + moveInstruction(MI, CurrentBottom); + CurrentBottom = MI; + } + } + // Notify the scheduling strategy before updating the DAG. + // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues + // runs, it can then use the accurate ReadyCycle time to determine whether + // newly released nodes can move to the readyQ. + SchedImpl->schedNode(SU, IsTopNode); + + updateQueues(SU, IsTopNode); + } + assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); + + placeDebugValues(); + + DEBUG({ + unsigned BBNum = begin()->getParent()->getNumber(); + dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n"; + dumpSchedule(); + dbgs() << '\n'; + }); +} + +/// Apply each ScheduleDAGMutation step in order. +void ScheduleDAGMI::postprocessDAG() { + for (unsigned i = 0, e = Mutations.size(); i < e; ++i) { + Mutations[i]->apply(this); + } +} + +void ScheduleDAGMI:: +findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots, + SmallVectorImpl<SUnit*> &BotRoots) { + for (std::vector<SUnit>::iterator + I = SUnits.begin(), E = SUnits.end(); I != E; ++I) { + SUnit *SU = &(*I); + assert(!SU->isBoundaryNode() && "Boundary node should not be in SUnits"); + + // Order predecessors so DFSResult follows the critical path. + SU->biasCriticalPath(); + + // A SUnit is ready to top schedule if it has no predecessors. + if (!I->NumPredsLeft) + TopRoots.push_back(SU); + // A SUnit is ready to bottom schedule if it has no successors. + if (!I->NumSuccsLeft) + BotRoots.push_back(SU); + } + ExitSU.biasCriticalPath(); +} + +/// Identify DAG roots and setup scheduler queues. +void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots, + ArrayRef<SUnit*> BotRoots) { + NextClusterSucc = nullptr; + NextClusterPred = nullptr; + + // Release all DAG roots for scheduling, not including EntrySU/ExitSU. + // + // Nodes with unreleased weak edges can still be roots. + // Release top roots in forward order. + for (SmallVectorImpl<SUnit*>::const_iterator + I = TopRoots.begin(), E = TopRoots.end(); I != E; ++I) { + SchedImpl->releaseTopNode(*I); + } + // Release bottom roots in reverse order so the higher priority nodes appear + // first. This is more natural and slightly more efficient. + for (SmallVectorImpl<SUnit*>::const_reverse_iterator + I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) { + SchedImpl->releaseBottomNode(*I); + } + + releaseSuccessors(&EntrySU); + releasePredecessors(&ExitSU); + + SchedImpl->registerRoots(); + + // Advance past initial DebugValues. + CurrentTop = nextIfDebug(RegionBegin, RegionEnd); + CurrentBottom = RegionEnd; +} + +/// Update scheduler queues after scheduling an instruction. +void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) { + // Release dependent instructions for scheduling. + if (IsTopNode) + releaseSuccessors(SU); + else + releasePredecessors(SU); + + SU->isScheduled = true; +} + +/// Reinsert any remaining debug_values, just like the PostRA scheduler. +void ScheduleDAGMI::placeDebugValues() { + // If first instruction was a DBG_VALUE then put it back. + if (FirstDbgValue) { + BB->splice(RegionBegin, BB, FirstDbgValue); + RegionBegin = FirstDbgValue; + } + + for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator + DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) { + std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI); + MachineInstr *DbgValue = P.first; + MachineBasicBlock::iterator OrigPrevMI = P.second; + if (&*RegionBegin == DbgValue) + ++RegionBegin; + BB->splice(++OrigPrevMI, BB, DbgValue); + if (OrigPrevMI == std::prev(RegionEnd)) + RegionEnd = DbgValue; + } + DbgValues.clear(); + FirstDbgValue = nullptr; +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void ScheduleDAGMI::dumpSchedule() const { + for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) { + if (SUnit *SU = getSUnit(&(*MI))) + SU->dump(this); + else + dbgs() << "Missing SUnit\n"; + } +} +#endif + +//===----------------------------------------------------------------------===// +// ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals +// preservation. +//===----------------------------------------------------------------------===// + +ScheduleDAGMILive::~ScheduleDAGMILive() { + delete DFSResult; +} + /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after /// crossing a scheduling boundary. [begin, end) includes all instructions in /// the region, including the boundary itself and single-instruction regions /// that don't get scheduled. -void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb, +void ScheduleDAGMILive::enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) { - ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs); + // ScheduleDAGMI initializes SchedImpl's per-region policy. + ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs); // For convenience remember the end of the liveness region. - LiveRegionEnd = - (RegionEnd == bb->end()) ? RegionEnd : llvm::next(RegionEnd); + LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(RegionEnd); SUPressureDiffs.clear(); - SchedImpl->initPolicy(begin, end, regioninstrs); - ShouldTrackPressure = SchedImpl->shouldTrackPressure(); } // Setup the register pressure trackers for the top scheduled top and bottom // scheduled regions. -void ScheduleDAGMI::initRegPressure() { +void ScheduleDAGMILive::initRegPressure() { TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin); BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd); @@ -567,7 +904,7 @@ void ScheduleDAGMI::initRegPressure() { dbgs() << "\n"); } -void ScheduleDAGMI:: +void ScheduleDAGMILive:: updateScheduledPressure(const SUnit *SU, const std::vector<unsigned> &NewMaxPressure) { const PressureDiff &PDiff = getPressureDiff(SU); @@ -595,7 +932,7 @@ updateScheduledPressure(const SUnit *SU, /// Update the PressureDiff array for liveness after scheduling this /// instruction. -void ScheduleDAGMI::updatePressureDiffs(ArrayRef<unsigned> LiveUses) { +void ScheduleDAGMILive::updatePressureDiffs(ArrayRef<unsigned> LiveUses) { for (unsigned LUIdx = 0, LUEnd = LiveUses.size(); LUIdx != LUEnd; ++LUIdx) { /// FIXME: Currently assuming single-use physregs. unsigned Reg = LiveUses[LUIdx]; @@ -644,9 +981,9 @@ void ScheduleDAGMI::updatePressureDiffs(ArrayRef<unsigned> LiveUses) { /// so that it can be easilly extended by experimental schedulers. Generally, /// implementing MachineSchedStrategy should be sufficient to implement a new /// scheduling algorithm. However, if a scheduler further subclasses -/// ScheduleDAGMI then it will want to override this virtual method in order to -/// update any specialized state. -void ScheduleDAGMI::schedule() { +/// ScheduleDAGMILive then it will want to override this virtual method in order +/// to update any specialized state. +void ScheduleDAGMILive::schedule() { buildDAGWithRegPressure(); Topo.InitDAGTopologicalSorting(); @@ -667,6 +1004,11 @@ void ScheduleDAGMI::schedule() { // Initialize ready queues now that the DAG and priority data are finalized. initQueues(TopRoots, BotRoots); + if (ShouldTrackPressure) { + assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker"); + TopRPTracker.setPos(CurrentTop); + } + bool IsTopNode = false; while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) { assert(!SU->isScheduled && "Node already scheduled"); @@ -676,6 +1018,18 @@ void ScheduleDAGMI::schedule() { scheduleMI(SU, IsTopNode); updateQueues(SU, IsTopNode); + + if (DFSResult) { + unsigned SubtreeID = DFSResult->getSubtreeID(SU); + if (!ScheduledTrees.test(SubtreeID)) { + ScheduledTrees.set(SubtreeID); + DFSResult->scheduleTree(SubtreeID); + SchedImpl->scheduleTree(SubtreeID); + } + } + + // Notify the scheduling strategy after updating the DAG. + SchedImpl->schedNode(SU, IsTopNode); } assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); @@ -690,7 +1044,7 @@ void ScheduleDAGMI::schedule() { } /// Build the DAG and setup three register pressure trackers. -void ScheduleDAGMI::buildDAGWithRegPressure() { +void ScheduleDAGMILive::buildDAGWithRegPressure() { if (!ShouldTrackPressure) { RPTracker.reset(); RegionCriticalPSets.clear(); @@ -713,14 +1067,7 @@ void ScheduleDAGMI::buildDAGWithRegPressure() { initRegPressure(); } -/// Apply each ScheduleDAGMutation step in order. -void ScheduleDAGMI::postprocessDAG() { - for (unsigned i = 0, e = Mutations.size(); i < e; ++i) { - Mutations[i]->apply(this); - } -} - -void ScheduleDAGMI::computeDFSResult() { +void ScheduleDAGMILive::computeDFSResult() { if (!DFSResult) DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize); DFSResult->clear(); @@ -730,26 +1077,6 @@ void ScheduleDAGMI::computeDFSResult() { ScheduledTrees.resize(DFSResult->getNumSubtrees()); } -void ScheduleDAGMI::findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots, - SmallVectorImpl<SUnit*> &BotRoots) { - for (std::vector<SUnit>::iterator - I = SUnits.begin(), E = SUnits.end(); I != E; ++I) { - SUnit *SU = &(*I); - assert(!SU->isBoundaryNode() && "Boundary node should not be in SUnits"); - - // Order predecessors so DFSResult follows the critical path. - SU->biasCriticalPath(); - - // A SUnit is ready to top schedule if it has no predecessors. - if (!I->NumPredsLeft) - TopRoots.push_back(SU); - // A SUnit is ready to bottom schedule if it has no successors. - if (!I->NumSuccsLeft) - BotRoots.push_back(SU); - } - ExitSU.biasCriticalPath(); -} - /// Compute the max cyclic critical path through the DAG. The scheduling DAG /// only provides the critical path for single block loops. To handle loops that /// span blocks, we could use the vreg path latencies provided by @@ -773,7 +1100,10 @@ void ScheduleDAGMI::findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots, /// LiveOutDepth - LiveInDepth = 3 - 1 = 2 /// LiveInHeight - LiveOutHeight = 4 - 2 = 2 /// CyclicCriticalPath = min(2, 2) = 2 -unsigned ScheduleDAGMI::computeCyclicCriticalPath() { +/// +/// This could be relevant to PostRA scheduling, but is currently implemented +/// assuming LiveIntervals. +unsigned ScheduleDAGMILive::computeCyclicCriticalPath() { // This only applies to single block loop. if (!BB->isSuccessor(BB)) return 0; @@ -835,44 +1165,8 @@ unsigned ScheduleDAGMI::computeCyclicCriticalPath() { return MaxCyclicLatency; } -/// Identify DAG roots and setup scheduler queues. -void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots, - ArrayRef<SUnit*> BotRoots) { - NextClusterSucc = NULL; - NextClusterPred = NULL; - - // Release all DAG roots for scheduling, not including EntrySU/ExitSU. - // - // Nodes with unreleased weak edges can still be roots. - // Release top roots in forward order. - for (SmallVectorImpl<SUnit*>::const_iterator - I = TopRoots.begin(), E = TopRoots.end(); I != E; ++I) { - SchedImpl->releaseTopNode(*I); - } - // Release bottom roots in reverse order so the higher priority nodes appear - // first. This is more natural and slightly more efficient. - for (SmallVectorImpl<SUnit*>::const_reverse_iterator - I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) { - SchedImpl->releaseBottomNode(*I); - } - - releaseSuccessors(&EntrySU); - releasePredecessors(&ExitSU); - - SchedImpl->registerRoots(); - - // Advance past initial DebugValues. - CurrentTop = nextIfDebug(RegionBegin, RegionEnd); - CurrentBottom = RegionEnd; - - if (ShouldTrackPressure) { - assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker"); - TopRPTracker.setPos(CurrentTop); - } -} - /// Move an instruction and update register pressure. -void ScheduleDAGMI::scheduleMI(SUnit *SU, bool IsTopNode) { +void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) { // Move the instruction to its new location in the instruction stream. MachineInstr *MI = SU->getInstr(); @@ -917,63 +1211,6 @@ void ScheduleDAGMI::scheduleMI(SUnit *SU, bool IsTopNode) { } } -/// Update scheduler queues after scheduling an instruction. -void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) { - // Release dependent instructions for scheduling. - if (IsTopNode) - releaseSuccessors(SU); - else - releasePredecessors(SU); - - SU->isScheduled = true; - - if (DFSResult) { - unsigned SubtreeID = DFSResult->getSubtreeID(SU); - if (!ScheduledTrees.test(SubtreeID)) { - ScheduledTrees.set(SubtreeID); - DFSResult->scheduleTree(SubtreeID); - SchedImpl->scheduleTree(SubtreeID); - } - } - - // Notify the scheduling strategy after updating the DAG. - SchedImpl->schedNode(SU, IsTopNode); -} - -/// Reinsert any remaining debug_values, just like the PostRA scheduler. -void ScheduleDAGMI::placeDebugValues() { - // If first instruction was a DBG_VALUE then put it back. - if (FirstDbgValue) { - BB->splice(RegionBegin, BB, FirstDbgValue); - RegionBegin = FirstDbgValue; - } - - for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator - DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) { - std::pair<MachineInstr *, MachineInstr *> P = *prior(DI); - MachineInstr *DbgValue = P.first; - MachineBasicBlock::iterator OrigPrevMI = P.second; - if (&*RegionBegin == DbgValue) - ++RegionBegin; - BB->splice(++OrigPrevMI, BB, DbgValue); - if (OrigPrevMI == llvm::prior(RegionEnd)) - RegionEnd = DbgValue; - } - DbgValues.clear(); - FirstDbgValue = NULL; -} - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -void ScheduleDAGMI::dumpSchedule() const { - for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) { - if (SUnit *SU = getSUnit(&(*MI))) - SU->dump(this); - else - dbgs() << "Missing SUnit\n"; - } -} -#endif - //===----------------------------------------------------------------------===// // LoadClusterMutation - DAG post-processing to cluster loads. //===----------------------------------------------------------------------===// @@ -988,9 +1225,11 @@ class LoadClusterMutation : public ScheduleDAGMutation { unsigned Offset; LoadInfo(SUnit *su, unsigned reg, unsigned ofs) : SU(su), BaseReg(reg), Offset(ofs) {} + + bool operator<(const LoadInfo &RHS) const { + return std::tie(BaseReg, Offset) < std::tie(RHS.BaseReg, RHS.Offset); + } }; - static bool LoadInfoLess(const LoadClusterMutation::LoadInfo &LHS, - const LoadClusterMutation::LoadInfo &RHS); const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; @@ -999,20 +1238,12 @@ public: const TargetRegisterInfo *tri) : TII(tii), TRI(tri) {} - virtual void apply(ScheduleDAGMI *DAG); + void apply(ScheduleDAGMI *DAG) override; protected: void clusterNeighboringLoads(ArrayRef<SUnit*> Loads, ScheduleDAGMI *DAG); }; } // anonymous -bool LoadClusterMutation::LoadInfoLess( - const LoadClusterMutation::LoadInfo &LHS, - const LoadClusterMutation::LoadInfo &RHS) { - if (LHS.BaseReg != RHS.BaseReg) - return LHS.BaseReg < RHS.BaseReg; - return LHS.Offset < RHS.Offset; -} - void LoadClusterMutation::clusterNeighboringLoads(ArrayRef<SUnit*> Loads, ScheduleDAGMI *DAG) { SmallVector<LoadClusterMutation::LoadInfo,32> LoadRecords; @@ -1025,7 +1256,7 @@ void LoadClusterMutation::clusterNeighboringLoads(ArrayRef<SUnit*> Loads, } if (LoadRecords.size() < 2) return; - std::sort(LoadRecords.begin(), LoadRecords.end(), LoadInfoLess); + std::sort(LoadRecords.begin(), LoadRecords.end()); unsigned ClusterLength = 1; for (unsigned Idx = 0, End = LoadRecords.size(); Idx < (End - 1); ++Idx) { if (LoadRecords[Idx].BaseReg != LoadRecords[Idx+1].BaseReg) { @@ -1102,7 +1333,7 @@ class MacroFusion : public ScheduleDAGMutation { public: MacroFusion(const TargetInstrInfo *tii): TII(tii) {} - virtual void apply(ScheduleDAGMI *DAG); + void apply(ScheduleDAGMI *DAG) override; }; } // anonymous @@ -1151,10 +1382,10 @@ class CopyConstrain : public ScheduleDAGMutation { public: CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {} - virtual void apply(ScheduleDAGMI *DAG); + void apply(ScheduleDAGMI *DAG) override; protected: - void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMI *DAG); + void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG); }; } // anonymous @@ -1177,7 +1408,7 @@ protected: /// this algorithm should handle extended blocks. An EBB is a set of /// contiguously numbered blocks such that the previous block in the EBB is /// always the single predecessor. -void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMI *DAG) { +void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) { LiveIntervals *LIS = DAG->getLIS(); MachineInstr *Copy = CopySU->getInstr(); @@ -1227,19 +1458,19 @@ void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMI *DAG) { // Check if GlobalLI contains a hole in the vicinity of LocalLI. if (GlobalSegment != GlobalLI->begin()) { // Two address defs have no hole. - if (SlotIndex::isSameInstr(llvm::prior(GlobalSegment)->end, + if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end, GlobalSegment->start)) { return; } // If the prior global segment may be defined by the same two-address // instruction that also defines LocalLI, then can't make a hole here. - if (SlotIndex::isSameInstr(llvm::prior(GlobalSegment)->start, + if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->start, LocalLI->beginIndex())) { return; } // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise // it would be a disconnected component in the live range. - assert(llvm::prior(GlobalSegment)->start < LocalLI->beginIndex() && + assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() && "Disconnected LRG within the scheduling region."); } MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start); @@ -1302,6 +1533,8 @@ void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMI *DAG) { /// \brief Callback from DAG postProcessing to create weak edges to encourage /// copy elimination. void CopyConstrain::apply(ScheduleDAGMI *DAG) { + assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals"); + MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end()); if (FirstPos == DAG->end()) return; @@ -1314,370 +1547,53 @@ void CopyConstrain::apply(ScheduleDAGMI *DAG) { if (!SU->getInstr()->isCopy()) continue; - constrainLocalCopy(SU, DAG); + constrainLocalCopy(SU, static_cast<ScheduleDAGMILive*>(DAG)); } } //===----------------------------------------------------------------------===// -// GenericScheduler - Implementation of the generic MachineSchedStrategy. +// MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler +// and possibly other custom schedulers. //===----------------------------------------------------------------------===// -namespace { -/// GenericScheduler shrinks the unscheduled zone using heuristics to balance -/// the schedule. -class GenericScheduler : public MachineSchedStrategy { -public: - /// Represent the type of SchedCandidate found within a single queue. - /// pickNodeBidirectional depends on these listed by decreasing priority. - enum CandReason { - NoCand, PhysRegCopy, RegExcess, RegCritical, Cluster, Weak, RegMax, - ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce, - TopDepthReduce, TopPathReduce, NextDefUse, NodeOrder}; - -#ifndef NDEBUG - static const char *getReasonStr(GenericScheduler::CandReason Reason); -#endif - - /// Policy for scheduling the next instruction in the candidate's zone. - struct CandPolicy { - bool ReduceLatency; - unsigned ReduceResIdx; - unsigned DemandResIdx; - - CandPolicy(): ReduceLatency(false), ReduceResIdx(0), DemandResIdx(0) {} - }; - - /// Status of an instruction's critical resource consumption. - struct SchedResourceDelta { - // Count critical resources in the scheduled region required by SU. - unsigned CritResources; - - // Count critical resources from another region consumed by SU. - unsigned DemandedResources; - - SchedResourceDelta(): CritResources(0), DemandedResources(0) {} - - bool operator==(const SchedResourceDelta &RHS) const { - return CritResources == RHS.CritResources - && DemandedResources == RHS.DemandedResources; - } - bool operator!=(const SchedResourceDelta &RHS) const { - return !operator==(RHS); - } - }; - - /// Store the state used by GenericScheduler heuristics, required for the - /// lifetime of one invocation of pickNode(). - struct SchedCandidate { - CandPolicy Policy; - - // The best SUnit candidate. - SUnit *SU; - - // The reason for this candidate. - CandReason Reason; - - // Set of reasons that apply to multiple candidates. - uint32_t RepeatReasonSet; - - // Register pressure values for the best candidate. - RegPressureDelta RPDelta; - - // Critical resource consumption of the best candidate. - SchedResourceDelta ResDelta; - - SchedCandidate(const CandPolicy &policy) - : Policy(policy), SU(NULL), Reason(NoCand), RepeatReasonSet(0) {} - - bool isValid() const { return SU; } - - // Copy the status of another candidate without changing policy. - void setBest(SchedCandidate &Best) { - assert(Best.Reason != NoCand && "uninitialized Sched candidate"); - SU = Best.SU; - Reason = Best.Reason; - RPDelta = Best.RPDelta; - ResDelta = Best.ResDelta; - } - - bool isRepeat(CandReason R) { return RepeatReasonSet & (1 << R); } - void setRepeat(CandReason R) { RepeatReasonSet |= (1 << R); } - - void initResourceDelta(const ScheduleDAGMI *DAG, - const TargetSchedModel *SchedModel); - }; - - /// Summarize the unscheduled region. - struct SchedRemainder { - // Critical path through the DAG in expected latency. - unsigned CriticalPath; - unsigned CyclicCritPath; - - // Scaled count of micro-ops left to schedule. - unsigned RemIssueCount; - - bool IsAcyclicLatencyLimited; - - // Unscheduled resources - SmallVector<unsigned, 16> RemainingCounts; - - void reset() { - CriticalPath = 0; - CyclicCritPath = 0; - RemIssueCount = 0; - IsAcyclicLatencyLimited = false; - RemainingCounts.clear(); - } - - SchedRemainder() { reset(); } - - void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel); - }; - - /// Each Scheduling boundary is associated with ready queues. It tracks the - /// current cycle in the direction of movement, and maintains the state - /// of "hazards" and other interlocks at the current cycle. - struct SchedBoundary { - ScheduleDAGMI *DAG; - const TargetSchedModel *SchedModel; - SchedRemainder *Rem; - - ReadyQueue Available; - ReadyQueue Pending; - bool CheckPending; - - // For heuristics, keep a list of the nodes that immediately depend on the - // most recently scheduled node. - SmallPtrSet<const SUnit*, 8> NextSUs; - - ScheduleHazardRecognizer *HazardRec; - - /// Number of cycles it takes to issue the instructions scheduled in this - /// zone. It is defined as: scheduled-micro-ops / issue-width + stalls. - /// See getStalls(). - unsigned CurrCycle; - - /// Micro-ops issued in the current cycle - unsigned CurrMOps; - - /// MinReadyCycle - Cycle of the soonest available instruction. - unsigned MinReadyCycle; - - // The expected latency of the critical path in this scheduled zone. - unsigned ExpectedLatency; - - // The latency of dependence chains leading into this zone. - // For each node scheduled bottom-up: DLat = max DLat, N.Depth. - // For each cycle scheduled: DLat -= 1. - unsigned DependentLatency; - - /// Count the scheduled (issued) micro-ops that can be retired by - /// time=CurrCycle assuming the first scheduled instr is retired at time=0. - unsigned RetiredMOps; - - // Count scheduled resources that have been executed. Resources are - // considered executed if they become ready in the time that it takes to - // saturate any resource including the one in question. Counts are scaled - // for direct comparison with other resources. Counts can be compared with - // MOps * getMicroOpFactor and Latency * getLatencyFactor. - SmallVector<unsigned, 16> ExecutedResCounts; - - /// Cache the max count for a single resource. - unsigned MaxExecutedResCount; - - // Cache the critical resources ID in this scheduled zone. - unsigned ZoneCritResIdx; - - // Is the scheduled region resource limited vs. latency limited. - bool IsResourceLimited; - -#ifndef NDEBUG - // Remember the greatest operand latency as an upper bound on the number of - // times we should retry the pending queue because of a hazard. - unsigned MaxObservedLatency; -#endif - - void reset() { - // A new HazardRec is created for each DAG and owned by SchedBoundary. - // Destroying and reconstructing it is very expensive though. So keep - // invalid, placeholder HazardRecs. - if (HazardRec && HazardRec->isEnabled()) { - delete HazardRec; - HazardRec = 0; - } - Available.clear(); - Pending.clear(); - CheckPending = false; - NextSUs.clear(); - CurrCycle = 0; - CurrMOps = 0; - MinReadyCycle = UINT_MAX; - ExpectedLatency = 0; - DependentLatency = 0; - RetiredMOps = 0; - MaxExecutedResCount = 0; - ZoneCritResIdx = 0; - IsResourceLimited = false; -#ifndef NDEBUG - MaxObservedLatency = 0; -#endif - // Reserve a zero-count for invalid CritResIdx. - ExecutedResCounts.resize(1); - assert(!ExecutedResCounts[0] && "nonzero count for bad resource"); - } - - /// Pending queues extend the ready queues with the same ID and the - /// PendingFlag set. - SchedBoundary(unsigned ID, const Twine &Name): - DAG(0), SchedModel(0), Rem(0), Available(ID, Name+".A"), - Pending(ID << GenericScheduler::LogMaxQID, Name+".P"), - HazardRec(0) { - reset(); - } - - ~SchedBoundary() { delete HazardRec; } - - void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, - SchedRemainder *rem); - - bool isTop() const { - return Available.getID() == GenericScheduler::TopQID; - } - -#ifndef NDEBUG - const char *getResourceName(unsigned PIdx) { - if (!PIdx) - return "MOps"; - return SchedModel->getProcResource(PIdx)->Name; - } -#endif - - /// Get the number of latency cycles "covered" by the scheduled - /// instructions. This is the larger of the critical path within the zone - /// and the number of cycles required to issue the instructions. - unsigned getScheduledLatency() const { - return std::max(ExpectedLatency, CurrCycle); - } - - unsigned getUnscheduledLatency(SUnit *SU) const { - return isTop() ? SU->getHeight() : SU->getDepth(); - } - - unsigned getResourceCount(unsigned ResIdx) const { - return ExecutedResCounts[ResIdx]; - } - - /// Get the scaled count of scheduled micro-ops and resources, including - /// executed resources. - unsigned getCriticalCount() const { - if (!ZoneCritResIdx) - return RetiredMOps * SchedModel->getMicroOpFactor(); - return getResourceCount(ZoneCritResIdx); - } - - /// Get a scaled count for the minimum execution time of the scheduled - /// micro-ops that are ready to execute by getExecutedCount. Notice the - /// feedback loop. - unsigned getExecutedCount() const { - return std::max(CurrCycle * SchedModel->getLatencyFactor(), - MaxExecutedResCount); - } - - bool checkHazard(SUnit *SU); - - unsigned findMaxLatency(ArrayRef<SUnit*> ReadySUs); - - unsigned getOtherResourceCount(unsigned &OtherCritIdx); - - void setPolicy(CandPolicy &Policy, SchedBoundary &OtherZone); - - void releaseNode(SUnit *SU, unsigned ReadyCycle); - - void bumpCycle(unsigned NextCycle); - - void incExecutedResources(unsigned PIdx, unsigned Count); - - unsigned countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle); - - void bumpNode(SUnit *SU); - - void releasePending(); - - void removeReady(SUnit *SU); - - SUnit *pickOnlyChoice(); +static const unsigned InvalidCycle = ~0U; -#ifndef NDEBUG - void dumpScheduledState(); -#endif - }; - -private: - const MachineSchedContext *Context; - ScheduleDAGMI *DAG; - const TargetSchedModel *SchedModel; - const TargetRegisterInfo *TRI; - - // State of the top and bottom scheduled instruction boundaries. - SchedRemainder Rem; - SchedBoundary Top; - SchedBoundary Bot; - - MachineSchedPolicy RegionPolicy; -public: - /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both) - enum { - TopQID = 1, - BotQID = 2, - LogMaxQID = 2 - }; - - GenericScheduler(const MachineSchedContext *C): - Context(C), DAG(0), SchedModel(0), TRI(0), - Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {} - - virtual void initPolicy(MachineBasicBlock::iterator Begin, - MachineBasicBlock::iterator End, - unsigned NumRegionInstrs); - - bool shouldTrackPressure() const { return RegionPolicy.ShouldTrackPressure; } - - virtual void initialize(ScheduleDAGMI *dag); - - virtual SUnit *pickNode(bool &IsTopNode); - - virtual void schedNode(SUnit *SU, bool IsTopNode); - - virtual void releaseTopNode(SUnit *SU); - - virtual void releaseBottomNode(SUnit *SU); - - virtual void registerRoots(); - -protected: - void checkAcyclicLatency(); - - void tryCandidate(SchedCandidate &Cand, - SchedCandidate &TryCand, - SchedBoundary &Zone, - const RegPressureTracker &RPTracker, - RegPressureTracker &TempTracker); - - SUnit *pickNodeBidirectional(bool &IsTopNode); - - void pickNodeFromQueue(SchedBoundary &Zone, - const RegPressureTracker &RPTracker, - SchedCandidate &Candidate); - - void reschedulePhysRegCopies(SUnit *SU, bool isTop); +SchedBoundary::~SchedBoundary() { delete HazardRec; } +void SchedBoundary::reset() { + // A new HazardRec is created for each DAG and owned by SchedBoundary. + // Destroying and reconstructing it is very expensive though. So keep + // invalid, placeholder HazardRecs. + if (HazardRec && HazardRec->isEnabled()) { + delete HazardRec; + HazardRec = nullptr; + } + Available.clear(); + Pending.clear(); + CheckPending = false; + NextSUs.clear(); + CurrCycle = 0; + CurrMOps = 0; + MinReadyCycle = UINT_MAX; + ExpectedLatency = 0; + DependentLatency = 0; + RetiredMOps = 0; + MaxExecutedResCount = 0; + ZoneCritResIdx = 0; + IsResourceLimited = false; + ReservedCycles.clear(); #ifndef NDEBUG - void traceCandidate(const SchedCandidate &Cand); + // Track the maximum number of stall cycles that could arise either from the + // latency of a DAG edge or the number of cycles that a processor resource is + // reserved (SchedBoundary::ReservedCycles). + MaxObservedStall = 0; #endif -}; -} // namespace + // Reserve a zero-count for invalid CritResIdx. + ExecutedResCounts.resize(1); + assert(!ExecutedResCounts[0] && "nonzero count for bad resource"); +} -void GenericScheduler::SchedRemainder:: +void SchedRemainder:: init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) { reset(); if (!SchedModel->hasInstrSchedModel()) @@ -1698,175 +1614,47 @@ init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) { } } -void GenericScheduler::SchedBoundary:: +void SchedBoundary:: init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) { reset(); DAG = dag; SchedModel = smodel; Rem = rem; - if (SchedModel->hasInstrSchedModel()) + if (SchedModel->hasInstrSchedModel()) { ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds()); -} - -/// Initialize the per-region scheduling policy. -void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin, - MachineBasicBlock::iterator End, - unsigned NumRegionInstrs) { - const TargetMachine &TM = Context->MF->getTarget(); - - // Avoid setting up the register pressure tracker for small regions to save - // compile time. As a rough heuristic, only track pressure when the number of - // schedulable instructions exceeds half the integer register file. - unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs( - TM.getTargetLowering()->getRegClassFor(MVT::i32)); - - RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2); - - // For generic targets, we default to bottom-up, because it's simpler and more - // compile-time optimizations have been implemented in that direction. - RegionPolicy.OnlyBottomUp = true; - - // Allow the subtarget to override default policy. - const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); - ST.overrideSchedPolicy(RegionPolicy, Begin, End, NumRegionInstrs); - - // After subtarget overrides, apply command line options. - if (!EnableRegPressure) - RegionPolicy.ShouldTrackPressure = false; - - // Check -misched-topdown/bottomup can force or unforce scheduling direction. - // e.g. -misched-bottomup=false allows scheduling in both directions. - assert((!ForceTopDown || !ForceBottomUp) && - "-misched-topdown incompatible with -misched-bottomup"); - if (ForceBottomUp.getNumOccurrences() > 0) { - RegionPolicy.OnlyBottomUp = ForceBottomUp; - if (RegionPolicy.OnlyBottomUp) - RegionPolicy.OnlyTopDown = false; - } - if (ForceTopDown.getNumOccurrences() > 0) { - RegionPolicy.OnlyTopDown = ForceTopDown; - if (RegionPolicy.OnlyTopDown) - RegionPolicy.OnlyBottomUp = false; - } -} - -void GenericScheduler::initialize(ScheduleDAGMI *dag) { - DAG = dag; - SchedModel = DAG->getSchedModel(); - TRI = DAG->TRI; - - Rem.init(DAG, SchedModel); - Top.init(DAG, SchedModel, &Rem); - Bot.init(DAG, SchedModel, &Rem); - - // Initialize resource counts. - - // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or - // are disabled, then these HazardRecs will be disabled. - const InstrItineraryData *Itin = SchedModel->getInstrItineraries(); - const TargetMachine &TM = DAG->MF.getTarget(); - if (!Top.HazardRec) { - Top.HazardRec = - TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); - } - if (!Bot.HazardRec) { - Bot.HazardRec = - TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); - } -} - -void GenericScheduler::releaseTopNode(SUnit *SU) { - if (SU->isScheduled) - return; - - for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) { - if (I->isWeak()) - continue; - unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle; - unsigned Latency = I->getLatency(); -#ifndef NDEBUG - Top.MaxObservedLatency = std::max(Latency, Top.MaxObservedLatency); -#endif - if (SU->TopReadyCycle < PredReadyCycle + Latency) - SU->TopReadyCycle = PredReadyCycle + Latency; - } - Top.releaseNode(SU, SU->TopReadyCycle); -} - -void GenericScheduler::releaseBottomNode(SUnit *SU) { - if (SU->isScheduled) - return; - - assert(SU->getInstr() && "Scheduled SUnit must have instr"); - - for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); - I != E; ++I) { - if (I->isWeak()) - continue; - unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle; - unsigned Latency = I->getLatency(); -#ifndef NDEBUG - Bot.MaxObservedLatency = std::max(Latency, Bot.MaxObservedLatency); -#endif - if (SU->BotReadyCycle < SuccReadyCycle + Latency) - SU->BotReadyCycle = SuccReadyCycle + Latency; + ReservedCycles.resize(SchedModel->getNumProcResourceKinds(), InvalidCycle); } - Bot.releaseNode(SU, SU->BotReadyCycle); } -/// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic -/// critical path by more cycles than it takes to drain the instruction buffer. -/// We estimate an upper bounds on in-flight instructions as: -/// -/// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height ) -/// InFlightIterations = AcyclicPath / CyclesPerIteration -/// InFlightResources = InFlightIterations * LoopResources -/// -/// TODO: Check execution resources in addition to IssueCount. -void GenericScheduler::checkAcyclicLatency() { - if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath) - return; - - // Scaled number of cycles per loop iteration. - unsigned IterCount = - std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(), - Rem.RemIssueCount); - // Scaled acyclic critical path. - unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor(); - // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop - unsigned InFlightCount = - (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount; - unsigned BufferLimit = - SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor(); - - Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit; +/// Compute the stall cycles based on this SUnit's ready time. Heuristics treat +/// these "soft stalls" differently than the hard stall cycles based on CPU +/// resources and computed by checkHazard(). A fully in-order model +/// (MicroOpBufferSize==0) will not make use of this since instructions are not +/// available for scheduling until they are ready. However, a weaker in-order +/// model may use this for heuristics. For example, if a processor has in-order +/// behavior when reading certain resources, this may come into play. +unsigned SchedBoundary::getLatencyStallCycles(SUnit *SU) { + if (!SU->isUnbuffered) + return 0; - DEBUG(dbgs() << "IssueCycles=" - << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c " - << "IterCycles=" << IterCount / SchedModel->getLatencyFactor() - << "c NumIters=" << (AcyclicCount + IterCount-1) / IterCount - << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor() - << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n"; - if (Rem.IsAcyclicLatencyLimited) - dbgs() << " ACYCLIC LATENCY LIMIT\n"); + unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle); + if (ReadyCycle > CurrCycle) + return ReadyCycle - CurrCycle; + return 0; } -void GenericScheduler::registerRoots() { - Rem.CriticalPath = DAG->ExitSU.getDepth(); - - // Some roots may not feed into ExitSU. Check all of them in case. - for (std::vector<SUnit*>::const_iterator - I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) { - if ((*I)->getDepth() > Rem.CriticalPath) - Rem.CriticalPath = (*I)->getDepth(); - } - DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n'); - - if (EnableCyclicPath) { - Rem.CyclicCritPath = DAG->computeCyclicCriticalPath(); - checkAcyclicLatency(); - } +/// Compute the next cycle at which the given processor resource can be +/// scheduled. +unsigned SchedBoundary:: +getNextResourceCycle(unsigned PIdx, unsigned Cycles) { + unsigned NextUnreserved = ReservedCycles[PIdx]; + // If this resource has never been used, always return cycle zero. + if (NextUnreserved == InvalidCycle) + return 0; + // For bottom-up scheduling add the cycles needed for the current operation. + if (!isTop()) + NextUnreserved += Cycles; + return NextUnreserved; } /// Does this SU have a hazard within the current instruction group. @@ -1882,23 +1670,41 @@ void GenericScheduler::registerRoots() { /// can dispatch per cycle. /// /// TODO: Also check whether the SU must start a new group. -bool GenericScheduler::SchedBoundary::checkHazard(SUnit *SU) { - if (HazardRec->isEnabled()) - return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard; - +bool SchedBoundary::checkHazard(SUnit *SU) { + if (HazardRec->isEnabled() + && HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) { + return true; + } unsigned uops = SchedModel->getNumMicroOps(SU->getInstr()); if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) { DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops=" << SchedModel->getNumMicroOps(SU->getInstr()) << '\n'); return true; } + if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) { + const MCSchedClassDesc *SC = DAG->getSchedClass(SU); + for (TargetSchedModel::ProcResIter + PI = SchedModel->getWriteProcResBegin(SC), + PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { + unsigned NRCycle = getNextResourceCycle(PI->ProcResourceIdx, PI->Cycles); + if (NRCycle > CurrCycle) { +#ifndef NDEBUG + MaxObservedStall = std::max(PI->Cycles, MaxObservedStall); +#endif + DEBUG(dbgs() << " SU(" << SU->NodeNum << ") " + << SchedModel->getResourceName(PI->ProcResourceIdx) + << "=" << NRCycle << "c\n"); + return true; + } + } + } return false; } // Find the unscheduled node in ReadySUs with the highest latency. -unsigned GenericScheduler::SchedBoundary:: +unsigned SchedBoundary:: findMaxLatency(ArrayRef<SUnit*> ReadySUs) { - SUnit *LateSU = 0; + SUnit *LateSU = nullptr; unsigned RemLatency = 0; for (ArrayRef<SUnit*>::iterator I = ReadySUs.begin(), E = ReadySUs.end(); I != E; ++I) { @@ -1918,7 +1724,7 @@ findMaxLatency(ArrayRef<SUnit*> ReadySUs) { // Count resources in this zone and the remaining unscheduled // instruction. Return the max count, scaled. Set OtherCritIdx to the critical // resource index, or zero if the zone is issue limited. -unsigned GenericScheduler::SchedBoundary:: +unsigned SchedBoundary:: getOtherResourceCount(unsigned &OtherCritIdx) { OtherCritIdx = 0; if (!SchedModel->hasInstrSchedModel()) @@ -1939,74 +1745,22 @@ getOtherResourceCount(unsigned &OtherCritIdx) { if (OtherCritIdx) { DEBUG(dbgs() << " " << Available.getName() << " + Remain CritRes: " << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx) - << " " << getResourceName(OtherCritIdx) << "\n"); + << " " << SchedModel->getResourceName(OtherCritIdx) << "\n"); } return OtherCritCount; } -/// Set the CandPolicy for this zone given the current resources and latencies -/// inside and outside the zone. -void GenericScheduler::SchedBoundary::setPolicy(CandPolicy &Policy, - SchedBoundary &OtherZone) { - // Now that potential stalls have been considered, apply preemptive heuristics - // based on the the total latency and resources inside and outside this - // zone. - - // Compute remaining latency. We need this both to determine whether the - // overall schedule has become latency-limited and whether the instructions - // outside this zone are resource or latency limited. - // - // The "dependent" latency is updated incrementally during scheduling as the - // max height/depth of scheduled nodes minus the cycles since it was - // scheduled: - // DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone - // - // The "independent" latency is the max ready queue depth: - // ILat = max N.depth for N in Available|Pending - // - // RemainingLatency is the greater of independent and dependent latency. - unsigned RemLatency = DependentLatency; - RemLatency = std::max(RemLatency, findMaxLatency(Available.elements())); - RemLatency = std::max(RemLatency, findMaxLatency(Pending.elements())); - - // Compute the critical resource outside the zone. - unsigned OtherCritIdx; - unsigned OtherCount = OtherZone.getOtherResourceCount(OtherCritIdx); - - bool OtherResLimited = false; - if (SchedModel->hasInstrSchedModel()) { - unsigned LFactor = SchedModel->getLatencyFactor(); - OtherResLimited = (int)(OtherCount - (RemLatency * LFactor)) > (int)LFactor; - } - if (!OtherResLimited && (RemLatency + CurrCycle > Rem->CriticalPath)) { - Policy.ReduceLatency |= true; - DEBUG(dbgs() << " " << Available.getName() << " RemainingLatency " - << RemLatency << " + " << CurrCycle << "c > CritPath " - << Rem->CriticalPath << "\n"); - } - // If the same resource is limiting inside and outside the zone, do nothing. - if (ZoneCritResIdx == OtherCritIdx) - return; - - DEBUG( - if (IsResourceLimited) { - dbgs() << " " << Available.getName() << " ResourceLimited: " - << getResourceName(ZoneCritResIdx) << "\n"; - } - if (OtherResLimited) - dbgs() << " RemainingLimit: " << getResourceName(OtherCritIdx) << "\n"; - if (!IsResourceLimited && !OtherResLimited) - dbgs() << " Latency limited both directions.\n"); - - if (IsResourceLimited && !Policy.ReduceResIdx) - Policy.ReduceResIdx = ZoneCritResIdx; +void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) { + assert(SU->getInstr() && "Scheduled SUnit must have instr"); - if (OtherResLimited) - Policy.DemandResIdx = OtherCritIdx; -} +#ifndef NDEBUG + // ReadyCycle was been bumped up to the CurrCycle when this node was + // scheduled, but CurrCycle may have been eagerly advanced immediately after + // scheduling, so may now be greater than ReadyCycle. + if (ReadyCycle > CurrCycle) + MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall); +#endif -void GenericScheduler::SchedBoundary::releaseNode(SUnit *SU, - unsigned ReadyCycle) { if (ReadyCycle < MinReadyCycle) MinReadyCycle = ReadyCycle; @@ -2022,8 +1776,22 @@ void GenericScheduler::SchedBoundary::releaseNode(SUnit *SU, NextSUs.insert(SU); } +void SchedBoundary::releaseTopNode(SUnit *SU) { + if (SU->isScheduled) + return; + + releaseNode(SU, SU->TopReadyCycle); +} + +void SchedBoundary::releaseBottomNode(SUnit *SU) { + if (SU->isScheduled) + return; + + releaseNode(SU, SU->BotReadyCycle); +} + /// Move the boundary of scheduled code by one cycle. -void GenericScheduler::SchedBoundary::bumpCycle(unsigned NextCycle) { +void SchedBoundary::bumpCycle(unsigned NextCycle) { if (SchedModel->getMicroOpBufferSize() == 0) { assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized"); if (MinReadyCycle > NextCycle) @@ -2061,8 +1829,7 @@ void GenericScheduler::SchedBoundary::bumpCycle(unsigned NextCycle) { DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName() << '\n'); } -void GenericScheduler::SchedBoundary::incExecutedResources(unsigned PIdx, - unsigned Count) { +void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) { ExecutedResCounts[PIdx] += Count; if (ExecutedResCounts[PIdx] > MaxExecutedResCount) MaxExecutedResCount = ExecutedResCounts[PIdx]; @@ -2075,11 +1842,11 @@ void GenericScheduler::SchedBoundary::incExecutedResources(unsigned PIdx, /// /// \return the next cycle at which the instruction may execute without /// oversubscribing resources. -unsigned GenericScheduler::SchedBoundary:: -countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle) { +unsigned SchedBoundary:: +countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) { unsigned Factor = SchedModel->getResourceFactor(PIdx); unsigned Count = Factor * Cycles; - DEBUG(dbgs() << " " << getResourceName(PIdx) + DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx) << " +" << Cycles << "x" << Factor << "u\n"); // Update Executed resources counts. @@ -2092,15 +1859,21 @@ countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle) { if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) { ZoneCritResIdx = PIdx; DEBUG(dbgs() << " *** Critical resource " - << getResourceName(PIdx) << ": " + << SchedModel->getResourceName(PIdx) << ": " << getResourceCount(PIdx) / SchedModel->getLatencyFactor() << "c\n"); } - // TODO: We don't yet model reserved resources. It's not hard though. - return CurrCycle; + // For reserved resources, record the highest cycle using the resource. + unsigned NextAvailable = getNextResourceCycle(PIdx, Cycles); + if (NextAvailable > CurrCycle) { + DEBUG(dbgs() << " Resource conflict: " + << SchedModel->getProcResource(PIdx)->Name << " reserved until @" + << NextAvailable << "\n"); + } + return NextAvailable; } /// Move the boundary of scheduled code by one SUnit. -void GenericScheduler::SchedBoundary::bumpNode(SUnit *SU) { +void SchedBoundary::bumpNode(SUnit *SU) { // Update the reservation table. if (HazardRec->isEnabled()) { if (!isTop() && SU->isCall) { @@ -2110,25 +1883,18 @@ void GenericScheduler::SchedBoundary::bumpNode(SUnit *SU) { } HazardRec->EmitInstruction(SU); } + // checkHazard should prevent scheduling multiple instructions per cycle that + // exceed the issue width. const MCSchedClassDesc *SC = DAG->getSchedClass(SU); unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr()); - CurrMOps += IncMOps; - // checkHazard prevents scheduling multiple instructions per cycle that exceed - // issue width. However, we commonly reach the maximum. In this case - // opportunistically bump the cycle to avoid uselessly checking everything in - // the readyQ. Furthermore, a single instruction may produce more than one - // cycle's worth of micro-ops. - // - // TODO: Also check if this SU must end a dispatch group. - unsigned NextCycle = CurrCycle; - if (CurrMOps >= SchedModel->getIssueWidth()) { - ++NextCycle; - DEBUG(dbgs() << " *** Max MOps " << CurrMOps - << " at cycle " << CurrCycle << '\n'); - } + assert( + (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) && + "Cannot schedule this instruction's MicroOps in the current cycle."); + unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle); DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n"); + unsigned NextCycle = CurrCycle; switch (SchedModel->getMicroOpBufferSize()) { case 0: assert(ReadyCycle <= CurrCycle && "Broken PendingQueue"); @@ -2141,7 +1907,11 @@ void GenericScheduler::SchedBoundary::bumpNode(SUnit *SU) { break; default: // We don't currently model the OOO reorder buffer, so consider all - // scheduled MOps to be "retired". + // scheduled MOps to be "retired". We do loosely model in-order resource + // latency. If this instruction uses an in-order resource, account for any + // likely stall cycles. + if (SU->isUnbuffered && ReadyCycle > NextCycle) + NextCycle = ReadyCycle; break; } RetiredMOps += IncMOps; @@ -2169,10 +1939,29 @@ void GenericScheduler::SchedBoundary::bumpNode(SUnit *SU) { PI = SchedModel->getWriteProcResBegin(SC), PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { unsigned RCycle = - countResource(PI->ProcResourceIdx, PI->Cycles, ReadyCycle); + countResource(PI->ProcResourceIdx, PI->Cycles, NextCycle); if (RCycle > NextCycle) NextCycle = RCycle; } + if (SU->hasReservedResource) { + // For reserved resources, record the highest cycle using the resource. + // For top-down scheduling, this is the cycle in which we schedule this + // instruction plus the number of cycles the operations reserves the + // resource. For bottom-up is it simply the instruction's cycle. + for (TargetSchedModel::ProcResIter + PI = SchedModel->getWriteProcResBegin(SC), + PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { + unsigned PIdx = PI->ProcResourceIdx; + if (SchedModel->getProcResource(PIdx)->BufferSize == 0) { + if (isTop()) { + ReservedCycles[PIdx] = + std::max(getNextResourceCycle(PIdx, 0), NextCycle + PI->Cycles); + } + else + ReservedCycles[PIdx] = NextCycle; + } + } + } } // Update ExpectedLatency and DependentLatency. unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency; @@ -2193,18 +1982,28 @@ void GenericScheduler::SchedBoundary::bumpNode(SUnit *SU) { } else { // After updating ZoneCritResIdx and ExpectedLatency, check if we're - // resource limited. If a stall occured, bumpCycle does this. + // resource limited. If a stall occurred, bumpCycle does this. unsigned LFactor = SchedModel->getLatencyFactor(); IsResourceLimited = (int)(getCriticalCount() - (getScheduledLatency() * LFactor)) > (int)LFactor; } + // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle + // resets CurrMOps. Loop to handle instructions with more MOps than issue in + // one cycle. Since we commonly reach the max MOps here, opportunistically + // bump the cycle to avoid uselessly checking everything in the readyQ. + CurrMOps += IncMOps; + while (CurrMOps >= SchedModel->getIssueWidth()) { + DEBUG(dbgs() << " *** Max MOps " << CurrMOps + << " at cycle " << CurrCycle << '\n'); + bumpCycle(++NextCycle); + } DEBUG(dumpScheduledState()); } /// Release pending ready nodes in to the available queue. This makes them /// visible to heuristics. -void GenericScheduler::SchedBoundary::releasePending() { +void SchedBoundary::releasePending() { // If the available queue is empty, it is safe to reset MinReadyCycle. if (Available.empty()) MinReadyCycle = UINT_MAX; @@ -2234,7 +2033,7 @@ void GenericScheduler::SchedBoundary::releasePending() { } /// Remove SU from the ready set for this boundary. -void GenericScheduler::SchedBoundary::removeReady(SUnit *SU) { +void SchedBoundary::removeReady(SUnit *SU) { if (Available.isInQueue(SU)) Available.remove(Available.find(SU)); else { @@ -2246,7 +2045,7 @@ void GenericScheduler::SchedBoundary::removeReady(SUnit *SU) { /// If this queue only has one ready candidate, return it. As a side effect, /// defer any nodes that now hit a hazard, and advance the cycle until at least /// one node is ready. If multiple instructions are ready, return NULL. -SUnit *GenericScheduler::SchedBoundary::pickOnlyChoice() { +SUnit *SchedBoundary::pickOnlyChoice() { if (CheckPending) releasePending(); @@ -2262,20 +2061,22 @@ SUnit *GenericScheduler::SchedBoundary::pickOnlyChoice() { } } for (unsigned i = 0; Available.empty(); ++i) { - assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedLatency) && - "permanent hazard"); (void)i; +// FIXME: Re-enable assert once PR20057 is resolved. +// assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) && +// "permanent hazard"); + (void)i; bumpCycle(CurrCycle + 1); releasePending(); } if (Available.size() == 1) return *Available.begin(); - return NULL; + return nullptr; } #ifndef NDEBUG // This is useful information to dump after bumpNode. // Note that the Queue contents are more useful before pickNodeFromQueue. -void GenericScheduler::SchedBoundary::dumpScheduledState() { +void SchedBoundary::dumpScheduledState() { unsigned ResFactor; unsigned ResCount; if (ZoneCritResIdx) { @@ -2291,14 +2092,19 @@ void GenericScheduler::SchedBoundary::dumpScheduledState() { << " Retired: " << RetiredMOps; dbgs() << "\n Executed: " << getExecutedCount() / LFactor << "c"; dbgs() << "\n Critical: " << ResCount / LFactor << "c, " - << ResCount / ResFactor << " " << getResourceName(ZoneCritResIdx) + << ResCount / ResFactor << " " + << SchedModel->getResourceName(ZoneCritResIdx) << "\n ExpectedLatency: " << ExpectedLatency << "c\n" << (IsResourceLimited ? " - Resource" : " - Latency") << " limited.\n"; } #endif -void GenericScheduler::SchedCandidate:: +//===----------------------------------------------------------------------===// +// GenericScheduler - Generic implementation of MachineSchedStrategy. +//===----------------------------------------------------------------------===// + +void GenericSchedulerBase::SchedCandidate:: initResourceDelta(const ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) { if (!Policy.ReduceResIdx && !Policy.DemandResIdx) @@ -2315,12 +2121,162 @@ initResourceDelta(const ScheduleDAGMI *DAG, } } +/// Set the CandPolicy given a scheduling zone given the current resources and +/// latencies inside and outside the zone. +void GenericSchedulerBase::setPolicy(CandPolicy &Policy, + bool IsPostRA, + SchedBoundary &CurrZone, + SchedBoundary *OtherZone) { + // Apply preemptive heuristics based on the the total latency and resources + // inside and outside this zone. Potential stalls should be considered before + // following this policy. + + // Compute remaining latency. We need this both to determine whether the + // overall schedule has become latency-limited and whether the instructions + // outside this zone are resource or latency limited. + // + // The "dependent" latency is updated incrementally during scheduling as the + // max height/depth of scheduled nodes minus the cycles since it was + // scheduled: + // DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone + // + // The "independent" latency is the max ready queue depth: + // ILat = max N.depth for N in Available|Pending + // + // RemainingLatency is the greater of independent and dependent latency. + unsigned RemLatency = CurrZone.getDependentLatency(); + RemLatency = std::max(RemLatency, + CurrZone.findMaxLatency(CurrZone.Available.elements())); + RemLatency = std::max(RemLatency, + CurrZone.findMaxLatency(CurrZone.Pending.elements())); + + // Compute the critical resource outside the zone. + unsigned OtherCritIdx = 0; + unsigned OtherCount = + OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0; + + bool OtherResLimited = false; + if (SchedModel->hasInstrSchedModel()) { + unsigned LFactor = SchedModel->getLatencyFactor(); + OtherResLimited = (int)(OtherCount - (RemLatency * LFactor)) > (int)LFactor; + } + // Schedule aggressively for latency in PostRA mode. We don't check for + // acyclic latency during PostRA, and highly out-of-order processors will + // skip PostRA scheduling. + if (!OtherResLimited) { + if (IsPostRA || (RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath)) { + Policy.ReduceLatency |= true; + DEBUG(dbgs() << " " << CurrZone.Available.getName() + << " RemainingLatency " << RemLatency << " + " + << CurrZone.getCurrCycle() << "c > CritPath " + << Rem.CriticalPath << "\n"); + } + } + // If the same resource is limiting inside and outside the zone, do nothing. + if (CurrZone.getZoneCritResIdx() == OtherCritIdx) + return; + + DEBUG( + if (CurrZone.isResourceLimited()) { + dbgs() << " " << CurrZone.Available.getName() << " ResourceLimited: " + << SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) + << "\n"; + } + if (OtherResLimited) + dbgs() << " RemainingLimit: " + << SchedModel->getResourceName(OtherCritIdx) << "\n"; + if (!CurrZone.isResourceLimited() && !OtherResLimited) + dbgs() << " Latency limited both directions.\n"); + + if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx) + Policy.ReduceResIdx = CurrZone.getZoneCritResIdx(); + + if (OtherResLimited) + Policy.DemandResIdx = OtherCritIdx; +} + +#ifndef NDEBUG +const char *GenericSchedulerBase::getReasonStr( + GenericSchedulerBase::CandReason Reason) { + switch (Reason) { + case NoCand: return "NOCAND "; + case PhysRegCopy: return "PREG-COPY"; + case RegExcess: return "REG-EXCESS"; + case RegCritical: return "REG-CRIT "; + case Stall: return "STALL "; + case Cluster: return "CLUSTER "; + case Weak: return "WEAK "; + case RegMax: return "REG-MAX "; + case ResourceReduce: return "RES-REDUCE"; + case ResourceDemand: return "RES-DEMAND"; + case TopDepthReduce: return "TOP-DEPTH "; + case TopPathReduce: return "TOP-PATH "; + case BotHeightReduce:return "BOT-HEIGHT"; + case BotPathReduce: return "BOT-PATH "; + case NextDefUse: return "DEF-USE "; + case NodeOrder: return "ORDER "; + }; + llvm_unreachable("Unknown reason!"); +} + +void GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) { + PressureChange P; + unsigned ResIdx = 0; + unsigned Latency = 0; + switch (Cand.Reason) { + default: + break; + case RegExcess: + P = Cand.RPDelta.Excess; + break; + case RegCritical: + P = Cand.RPDelta.CriticalMax; + break; + case RegMax: + P = Cand.RPDelta.CurrentMax; + break; + case ResourceReduce: + ResIdx = Cand.Policy.ReduceResIdx; + break; + case ResourceDemand: + ResIdx = Cand.Policy.DemandResIdx; + break; + case TopDepthReduce: + Latency = Cand.SU->getDepth(); + break; + case TopPathReduce: + Latency = Cand.SU->getHeight(); + break; + case BotHeightReduce: + Latency = Cand.SU->getHeight(); + break; + case BotPathReduce: + Latency = Cand.SU->getDepth(); + break; + } + dbgs() << " SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason); + if (P.isValid()) + dbgs() << " " << TRI->getRegPressureSetName(P.getPSet()) + << ":" << P.getUnitInc() << " "; + else + dbgs() << " "; + if (ResIdx) + dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " "; + else + dbgs() << " "; + if (Latency) + dbgs() << " " << Latency << " cycles "; + else + dbgs() << " "; + dbgs() << '\n'; +} +#endif /// Return true if this heuristic determines order. static bool tryLess(int TryVal, int CandVal, - GenericScheduler::SchedCandidate &TryCand, - GenericScheduler::SchedCandidate &Cand, - GenericScheduler::CandReason Reason) { + GenericSchedulerBase::SchedCandidate &TryCand, + GenericSchedulerBase::SchedCandidate &Cand, + GenericSchedulerBase::CandReason Reason) { if (TryVal < CandVal) { TryCand.Reason = Reason; return true; @@ -2335,9 +2291,9 @@ static bool tryLess(int TryVal, int CandVal, } static bool tryGreater(int TryVal, int CandVal, - GenericScheduler::SchedCandidate &TryCand, - GenericScheduler::SchedCandidate &Cand, - GenericScheduler::CandReason Reason) { + GenericSchedulerBase::SchedCandidate &TryCand, + GenericSchedulerBase::SchedCandidate &Cand, + GenericSchedulerBase::CandReason Reason) { if (TryVal > CandVal) { TryCand.Reason = Reason; return true; @@ -2351,11 +2307,172 @@ static bool tryGreater(int TryVal, int CandVal, return false; } +static bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, + GenericSchedulerBase::SchedCandidate &Cand, + SchedBoundary &Zone) { + if (Zone.isTop()) { + if (Cand.SU->getDepth() > Zone.getScheduledLatency()) { + if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(), + TryCand, Cand, GenericSchedulerBase::TopDepthReduce)) + return true; + } + if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(), + TryCand, Cand, GenericSchedulerBase::TopPathReduce)) + return true; + } + else { + if (Cand.SU->getHeight() > Zone.getScheduledLatency()) { + if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(), + TryCand, Cand, GenericSchedulerBase::BotHeightReduce)) + return true; + } + if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(), + TryCand, Cand, GenericSchedulerBase::BotPathReduce)) + return true; + } + return false; +} + +static void tracePick(const GenericSchedulerBase::SchedCandidate &Cand, + bool IsTop) { + DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ") + << GenericSchedulerBase::getReasonStr(Cand.Reason) << '\n'); +} + +void GenericScheduler::initialize(ScheduleDAGMI *dag) { + assert(dag->hasVRegLiveness() && + "(PreRA)GenericScheduler needs vreg liveness"); + DAG = static_cast<ScheduleDAGMILive*>(dag); + SchedModel = DAG->getSchedModel(); + TRI = DAG->TRI; + + Rem.init(DAG, SchedModel); + Top.init(DAG, SchedModel, &Rem); + Bot.init(DAG, SchedModel, &Rem); + + // Initialize resource counts. + + // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or + // are disabled, then these HazardRecs will be disabled. + const InstrItineraryData *Itin = SchedModel->getInstrItineraries(); + const TargetMachine &TM = DAG->MF.getTarget(); + if (!Top.HazardRec) { + Top.HazardRec = + TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); + } + if (!Bot.HazardRec) { + Bot.HazardRec = + TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); + } +} + +/// Initialize the per-region scheduling policy. +void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin, + MachineBasicBlock::iterator End, + unsigned NumRegionInstrs) { + const TargetMachine &TM = Context->MF->getTarget(); + const TargetLowering *TLI = TM.getTargetLowering(); + + // Avoid setting up the register pressure tracker for small regions to save + // compile time. As a rough heuristic, only track pressure when the number of + // schedulable instructions exceeds half the integer register file. + RegionPolicy.ShouldTrackPressure = true; + for (unsigned VT = MVT::i32; VT > (unsigned)MVT::i1; --VT) { + MVT::SimpleValueType LegalIntVT = (MVT::SimpleValueType)VT; + if (TLI->isTypeLegal(LegalIntVT)) { + unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs( + TLI->getRegClassFor(LegalIntVT)); + RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2); + } + } + + // For generic targets, we default to bottom-up, because it's simpler and more + // compile-time optimizations have been implemented in that direction. + RegionPolicy.OnlyBottomUp = true; + + // Allow the subtarget to override default policy. + const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); + ST.overrideSchedPolicy(RegionPolicy, Begin, End, NumRegionInstrs); + + // After subtarget overrides, apply command line options. + if (!EnableRegPressure) + RegionPolicy.ShouldTrackPressure = false; + + // Check -misched-topdown/bottomup can force or unforce scheduling direction. + // e.g. -misched-bottomup=false allows scheduling in both directions. + assert((!ForceTopDown || !ForceBottomUp) && + "-misched-topdown incompatible with -misched-bottomup"); + if (ForceBottomUp.getNumOccurrences() > 0) { + RegionPolicy.OnlyBottomUp = ForceBottomUp; + if (RegionPolicy.OnlyBottomUp) + RegionPolicy.OnlyTopDown = false; + } + if (ForceTopDown.getNumOccurrences() > 0) { + RegionPolicy.OnlyTopDown = ForceTopDown; + if (RegionPolicy.OnlyTopDown) + RegionPolicy.OnlyBottomUp = false; + } +} + +/// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic +/// critical path by more cycles than it takes to drain the instruction buffer. +/// We estimate an upper bounds on in-flight instructions as: +/// +/// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height ) +/// InFlightIterations = AcyclicPath / CyclesPerIteration +/// InFlightResources = InFlightIterations * LoopResources +/// +/// TODO: Check execution resources in addition to IssueCount. +void GenericScheduler::checkAcyclicLatency() { + if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath) + return; + + // Scaled number of cycles per loop iteration. + unsigned IterCount = + std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(), + Rem.RemIssueCount); + // Scaled acyclic critical path. + unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor(); + // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop + unsigned InFlightCount = + (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount; + unsigned BufferLimit = + SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor(); + + Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit; + + DEBUG(dbgs() << "IssueCycles=" + << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c " + << "IterCycles=" << IterCount / SchedModel->getLatencyFactor() + << "c NumIters=" << (AcyclicCount + IterCount-1) / IterCount + << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor() + << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n"; + if (Rem.IsAcyclicLatencyLimited) + dbgs() << " ACYCLIC LATENCY LIMIT\n"); +} + +void GenericScheduler::registerRoots() { + Rem.CriticalPath = DAG->ExitSU.getDepth(); + + // Some roots may not feed into ExitSU. Check all of them in case. + for (std::vector<SUnit*>::const_iterator + I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) { + if ((*I)->getDepth() > Rem.CriticalPath) + Rem.CriticalPath = (*I)->getDepth(); + } + DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n'); + + if (EnableCyclicPath) { + Rem.CyclicCritPath = DAG->computeCyclicCriticalPath(); + checkAcyclicLatency(); + } +} + static bool tryPressure(const PressureChange &TryP, const PressureChange &CandP, - GenericScheduler::SchedCandidate &TryCand, - GenericScheduler::SchedCandidate &Cand, - GenericScheduler::CandReason Reason) { + GenericSchedulerBase::SchedCandidate &TryCand, + GenericSchedulerBase::SchedCandidate &Cand, + GenericSchedulerBase::CandReason Reason) { int TryRank = TryP.getPSetOrMax(); int CandRank = CandP.getPSetOrMax(); // If both candidates affect the same set, go with the smallest increase. @@ -2407,32 +2524,6 @@ static int biasPhysRegCopy(const SUnit *SU, bool isTop) { return 0; } -static bool tryLatency(GenericScheduler::SchedCandidate &TryCand, - GenericScheduler::SchedCandidate &Cand, - GenericScheduler::SchedBoundary &Zone) { - if (Zone.isTop()) { - if (Cand.SU->getDepth() > Zone.getScheduledLatency()) { - if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(), - TryCand, Cand, GenericScheduler::TopDepthReduce)) - return true; - } - if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(), - TryCand, Cand, GenericScheduler::TopPathReduce)) - return true; - } - else { - if (Cand.SU->getHeight() > Zone.getScheduledLatency()) { - if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(), - TryCand, Cand, GenericScheduler::BotHeightReduce)) - return true; - } - if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(), - TryCand, Cand, GenericScheduler::BotPathReduce)) - return true; - } - return false; -} - /// Apply a set of heursitics to a new candidate. Heuristics are currently /// hierarchical. This may be more efficient than a graduated cost model because /// we don't need to evaluate all aspects of the model for each node in the @@ -2445,10 +2536,10 @@ static bool tryLatency(GenericScheduler::SchedCandidate &TryCand, /// \param RPTracker describes reg pressure within the scheduled zone. /// \param TempTracker is a scratch pressure tracker to reuse in queries. void GenericScheduler::tryCandidate(SchedCandidate &Cand, - SchedCandidate &TryCand, - SchedBoundary &Zone, - const RegPressureTracker &RPTracker, - RegPressureTracker &TempTracker) { + SchedCandidate &TryCand, + SchedBoundary &Zone, + const RegPressureTracker &RPTracker, + RegPressureTracker &TempTracker) { if (DAG->isTrackingPressure()) { // Always initialize TryCand's RPDelta. @@ -2510,10 +2601,15 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand, // For loops that are acyclic path limited, aggressively schedule for latency. // This can result in very long dependence chains scheduled in sequence, so // once every cycle (when CurrMOps == 0), switch to normal heuristics. - if (Rem.IsAcyclicLatencyLimited && !Zone.CurrMOps + if (Rem.IsAcyclicLatencyLimited && !Zone.getCurrMOps() && tryLatency(TryCand, Cand, Zone)) return; + // Prioritize instructions that read unbuffered resources by stall cycles. + if (tryLess(Zone.getLatencyStallCycles(TryCand.SU), + Zone.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) + return; + // Keep clustered nodes together to encourage downstream peephole // optimizations which may reduce resource requirements. // @@ -2558,7 +2654,7 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand, // Prefer immediate defs/users of the last scheduled instruction. This is a // local pressure avoidance strategy that also makes the machine code // readable. - if (tryGreater(Zone.NextSUs.count(TryCand.SU), Zone.NextSUs.count(Cand.SU), + if (tryGreater(Zone.isNextSU(TryCand.SU), Zone.isNextSU(Cand.SU), TryCand, Cand, NextDefUse)) return; @@ -2569,90 +2665,14 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand, } } -#ifndef NDEBUG -const char *GenericScheduler::getReasonStr( - GenericScheduler::CandReason Reason) { - switch (Reason) { - case NoCand: return "NOCAND "; - case PhysRegCopy: return "PREG-COPY"; - case RegExcess: return "REG-EXCESS"; - case RegCritical: return "REG-CRIT "; - case Cluster: return "CLUSTER "; - case Weak: return "WEAK "; - case RegMax: return "REG-MAX "; - case ResourceReduce: return "RES-REDUCE"; - case ResourceDemand: return "RES-DEMAND"; - case TopDepthReduce: return "TOP-DEPTH "; - case TopPathReduce: return "TOP-PATH "; - case BotHeightReduce:return "BOT-HEIGHT"; - case BotPathReduce: return "BOT-PATH "; - case NextDefUse: return "DEF-USE "; - case NodeOrder: return "ORDER "; - }; - llvm_unreachable("Unknown reason!"); -} - -void GenericScheduler::traceCandidate(const SchedCandidate &Cand) { - PressureChange P; - unsigned ResIdx = 0; - unsigned Latency = 0; - switch (Cand.Reason) { - default: - break; - case RegExcess: - P = Cand.RPDelta.Excess; - break; - case RegCritical: - P = Cand.RPDelta.CriticalMax; - break; - case RegMax: - P = Cand.RPDelta.CurrentMax; - break; - case ResourceReduce: - ResIdx = Cand.Policy.ReduceResIdx; - break; - case ResourceDemand: - ResIdx = Cand.Policy.DemandResIdx; - break; - case TopDepthReduce: - Latency = Cand.SU->getDepth(); - break; - case TopPathReduce: - Latency = Cand.SU->getHeight(); - break; - case BotHeightReduce: - Latency = Cand.SU->getHeight(); - break; - case BotPathReduce: - Latency = Cand.SU->getDepth(); - break; - } - dbgs() << " SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason); - if (P.isValid()) - dbgs() << " " << TRI->getRegPressureSetName(P.getPSet()) - << ":" << P.getUnitInc() << " "; - else - dbgs() << " "; - if (ResIdx) - dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " "; - else - dbgs() << " "; - if (Latency) - dbgs() << " " << Latency << " cycles "; - else - dbgs() << " "; - dbgs() << '\n'; -} -#endif - /// Pick the best candidate from the queue. /// /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during /// DAG building. To adjust for the current scheduling location we need to /// maintain the number of vreg uses remaining to be top-scheduled. void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone, - const RegPressureTracker &RPTracker, - SchedCandidate &Cand) { + const RegPressureTracker &RPTracker, + SchedCandidate &Cand) { ReadyQueue &Q = Zone.Available; DEBUG(Q.dump()); @@ -2675,12 +2695,6 @@ void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone, } } -static void tracePick(const GenericScheduler::SchedCandidate &Cand, - bool IsTop) { - DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ") - << GenericScheduler::getReasonStr(Cand.Reason) << '\n'); -} - /// Pick the best candidate node from either the top or bottom queue. SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) { // Schedule as far as possible in the direction of no choice. This is most @@ -2698,8 +2712,12 @@ SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) { CandPolicy NoPolicy; SchedCandidate BotCand(NoPolicy); SchedCandidate TopCand(NoPolicy); - Bot.setPolicy(BotCand.Policy, Top); - Top.setPolicy(TopCand.Policy, Bot); + // Set the bottom-up policy based on the state of the current bottom zone and + // the instructions outside the zone, including the top zone. + setPolicy(BotCand.Policy, /*IsPostRA=*/false, Bot, &Top); + // Set the top-down policy based on the state of the current top zone and + // the instructions outside the zone, including the bottom zone. + setPolicy(TopCand.Policy, /*IsPostRA=*/false, Top, &Bot); // Prefer bottom scheduling when heuristics are silent. pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); @@ -2741,7 +2759,7 @@ SUnit *GenericScheduler::pickNode(bool &IsTopNode) { if (DAG->top() == DAG->bottom()) { assert(Top.Available.empty() && Top.Pending.empty() && Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage"); - return NULL; + return nullptr; } SUnit *SU; do { @@ -2809,20 +2827,21 @@ void GenericScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) { } /// Update the scheduler's state after scheduling a node. This is the same node -/// that was just returned by pickNode(). However, ScheduleDAGMI needs to update -/// it's state based on the current cycle before MachineSchedStrategy does. +/// that was just returned by pickNode(). However, ScheduleDAGMILive needs to +/// update it's state based on the current cycle before MachineSchedStrategy +/// does. /// /// FIXME: Eventually, we may bundle physreg copies rather than rescheduling /// them here. See comments in biasPhysRegCopy. void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) { if (IsTopNode) { - SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.CurrCycle); + SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle()); Top.bumpNode(SU); if (SU->hasPhysRegUses) reschedulePhysRegCopies(SU, true); } else { - SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.CurrCycle); + SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle()); Bot.bumpNode(SU); if (SU->hasPhysRegDefs) reschedulePhysRegCopies(SU, false); @@ -2831,23 +2850,155 @@ void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) { /// Create the standard converging machine scheduler. This will be used as the /// default scheduler if the target does not set a default. -static ScheduleDAGInstrs *createGenericSched(MachineSchedContext *C) { - ScheduleDAGMI *DAG = new ScheduleDAGMI(C, new GenericScheduler(C)); +static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C) { + ScheduleDAGMILive *DAG = new ScheduleDAGMILive(C, make_unique<GenericScheduler>(C)); // Register DAG post-processors. // // FIXME: extend the mutation API to allow earlier mutations to instantiate // data and pass it to later mutations. Have a single mutation that gathers // the interesting nodes in one pass. - DAG->addMutation(new CopyConstrain(DAG->TII, DAG->TRI)); + DAG->addMutation(make_unique<CopyConstrain>(DAG->TII, DAG->TRI)); if (EnableLoadCluster && DAG->TII->enableClusterLoads()) - DAG->addMutation(new LoadClusterMutation(DAG->TII, DAG->TRI)); + DAG->addMutation(make_unique<LoadClusterMutation>(DAG->TII, DAG->TRI)); if (EnableMacroFusion) - DAG->addMutation(new MacroFusion(DAG->TII)); + DAG->addMutation(make_unique<MacroFusion>(DAG->TII)); return DAG; } + static MachineSchedRegistry GenericSchedRegistry("converge", "Standard converging scheduler.", - createGenericSched); + createGenericSchedLive); + +//===----------------------------------------------------------------------===// +// PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy. +//===----------------------------------------------------------------------===// + +void PostGenericScheduler::initialize(ScheduleDAGMI *Dag) { + DAG = Dag; + SchedModel = DAG->getSchedModel(); + TRI = DAG->TRI; + + Rem.init(DAG, SchedModel); + Top.init(DAG, SchedModel, &Rem); + BotRoots.clear(); + + // Initialize the HazardRecognizers. If itineraries don't exist, are empty, + // or are disabled, then these HazardRecs will be disabled. + const InstrItineraryData *Itin = SchedModel->getInstrItineraries(); + const TargetMachine &TM = DAG->MF.getTarget(); + if (!Top.HazardRec) { + Top.HazardRec = + TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); + } +} + + +void PostGenericScheduler::registerRoots() { + Rem.CriticalPath = DAG->ExitSU.getDepth(); + + // Some roots may not feed into ExitSU. Check all of them in case. + for (SmallVectorImpl<SUnit*>::const_iterator + I = BotRoots.begin(), E = BotRoots.end(); I != E; ++I) { + if ((*I)->getDepth() > Rem.CriticalPath) + Rem.CriticalPath = (*I)->getDepth(); + } + DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n'); +} + +/// Apply a set of heursitics to a new candidate for PostRA scheduling. +/// +/// \param Cand provides the policy and current best candidate. +/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized. +void PostGenericScheduler::tryCandidate(SchedCandidate &Cand, + SchedCandidate &TryCand) { + + // Initialize the candidate if needed. + if (!Cand.isValid()) { + TryCand.Reason = NodeOrder; + return; + } + + // Prioritize instructions that read unbuffered resources by stall cycles. + if (tryLess(Top.getLatencyStallCycles(TryCand.SU), + Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) + return; + + // Avoid critical resource consumption and balance the schedule. + if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, + TryCand, Cand, ResourceReduce)) + return; + if (tryGreater(TryCand.ResDelta.DemandedResources, + Cand.ResDelta.DemandedResources, + TryCand, Cand, ResourceDemand)) + return; + + // Avoid serializing long latency dependence chains. + if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) { + return; + } + + // Fall through to original instruction order. + if (TryCand.SU->NodeNum < Cand.SU->NodeNum) + TryCand.Reason = NodeOrder; +} + +void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) { + ReadyQueue &Q = Top.Available; + + DEBUG(Q.dump()); + + for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { + SchedCandidate TryCand(Cand.Policy); + TryCand.SU = *I; + TryCand.initResourceDelta(DAG, SchedModel); + tryCandidate(Cand, TryCand); + if (TryCand.Reason != NoCand) { + Cand.setBest(TryCand); + DEBUG(traceCandidate(Cand)); + } + } +} + +/// Pick the next node to schedule. +SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) { + if (DAG->top() == DAG->bottom()) { + assert(Top.Available.empty() && Top.Pending.empty() && "ReadyQ garbage"); + return nullptr; + } + SUnit *SU; + do { + SU = Top.pickOnlyChoice(); + if (!SU) { + CandPolicy NoPolicy; + SchedCandidate TopCand(NoPolicy); + // Set the top-down policy based on the state of the current top zone and + // the instructions outside the zone, including the bottom zone. + setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr); + pickNodeFromQueue(TopCand); + assert(TopCand.Reason != NoCand && "failed to find a candidate"); + tracePick(TopCand, true); + SU = TopCand.SU; + } + } while (SU->isScheduled); + + IsTopNode = true; + Top.removeReady(SU); + + DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr()); + return SU; +} + +/// Called after ScheduleDAGMI has scheduled an instruction and updated +/// scheduled/remaining flags in the DAG nodes. +void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) { + SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle()); + Top.bumpNode(SU); +} + +/// Create a generic scheduler with no vreg liveness or DAG mutation passes. +static ScheduleDAGInstrs *createGenericSchedPostRA(MachineSchedContext *C) { + return new ScheduleDAGMI(C, make_unique<PostGenericScheduler>(C), /*IsPostRA=*/true); +} //===----------------------------------------------------------------------===// // ILP Scheduler. Currently for experimental analysis of heuristics. @@ -2860,7 +3011,8 @@ struct ILPOrder { const BitVector *ScheduledTrees; bool MaximizeILP; - ILPOrder(bool MaxILP): DFSResult(0), ScheduledTrees(0), MaximizeILP(MaxILP) {} + ILPOrder(bool MaxILP) + : DFSResult(nullptr), ScheduledTrees(nullptr), MaximizeILP(MaxILP) {} /// \brief Apply a less-than relation on node priority. /// @@ -2889,22 +3041,23 @@ struct ILPOrder { /// \brief Schedule based on the ILP metric. class ILPScheduler : public MachineSchedStrategy { - ScheduleDAGMI *DAG; + ScheduleDAGMILive *DAG; ILPOrder Cmp; std::vector<SUnit*> ReadyQ; public: - ILPScheduler(bool MaximizeILP): DAG(0), Cmp(MaximizeILP) {} + ILPScheduler(bool MaximizeILP): DAG(nullptr), Cmp(MaximizeILP) {} - virtual void initialize(ScheduleDAGMI *dag) { - DAG = dag; + void initialize(ScheduleDAGMI *dag) override { + assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness"); + DAG = static_cast<ScheduleDAGMILive*>(dag); DAG->computeDFSResult(); Cmp.DFSResult = DAG->getDFSResult(); Cmp.ScheduledTrees = &DAG->getScheduledTrees(); ReadyQ.clear(); } - virtual void registerRoots() { + void registerRoots() override { // Restore the heap in ReadyQ with the updated DFS results. std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); } @@ -2913,8 +3066,8 @@ public: /// ----------------------------------------- /// Callback to select the highest priority node from the ready Q. - virtual SUnit *pickNode(bool &IsTopNode) { - if (ReadyQ.empty()) return NULL; + SUnit *pickNode(bool &IsTopNode) override { + if (ReadyQ.empty()) return nullptr; std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); SUnit *SU = ReadyQ.back(); ReadyQ.pop_back(); @@ -2929,19 +3082,19 @@ public: } /// \brief Scheduler callback to notify that a new subtree is scheduled. - virtual void scheduleTree(unsigned SubtreeID) { + void scheduleTree(unsigned SubtreeID) override { std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); } /// Callback after a node is scheduled. Mark a newly scheduled tree, notify /// DFSResults, and resort the priority Q. - virtual void schedNode(SUnit *SU, bool IsTopNode) { + void schedNode(SUnit *SU, bool IsTopNode) override { assert(!IsTopNode && "SchedDFSResult needs bottom-up"); } - virtual void releaseTopNode(SUnit *) { /*only called for top roots*/ } + void releaseTopNode(SUnit *) override { /*only called for top roots*/ } - virtual void releaseBottomNode(SUnit *SU) { + void releaseBottomNode(SUnit *SU) override { ReadyQ.push_back(SU); std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); } @@ -2949,10 +3102,10 @@ public: } // namespace static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) { - return new ScheduleDAGMI(C, new ILPScheduler(true)); + return new ScheduleDAGMILive(C, make_unique<ILPScheduler>(true)); } static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) { - return new ScheduleDAGMI(C, new ILPScheduler(false)); + return new ScheduleDAGMILive(C, make_unique<ILPScheduler>(false)); } static MachineSchedRegistry ILPMaxRegistry( "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler); @@ -2994,7 +3147,7 @@ public: InstructionShuffler(bool alternate, bool topdown) : IsAlternating(alternate), IsTopDown(topdown) {} - virtual void initialize(ScheduleDAGMI *) { + void initialize(ScheduleDAGMI*) override { TopQ.clear(); BottomQ.clear(); } @@ -3002,11 +3155,11 @@ public: /// Implement MachineSchedStrategy interface. /// ----------------------------------------- - virtual SUnit *pickNode(bool &IsTopNode) { + SUnit *pickNode(bool &IsTopNode) override { SUnit *SU; if (IsTopDown) { do { - if (TopQ.empty()) return NULL; + if (TopQ.empty()) return nullptr; SU = TopQ.top(); TopQ.pop(); } while (SU->isScheduled); @@ -3014,7 +3167,7 @@ public: } else { do { - if (BottomQ.empty()) return NULL; + if (BottomQ.empty()) return nullptr; SU = BottomQ.top(); BottomQ.pop(); } while (SU->isScheduled); @@ -3025,12 +3178,12 @@ public: return SU; } - virtual void schedNode(SUnit *SU, bool IsTopNode) {} + void schedNode(SUnit *SU, bool IsTopNode) override {} - virtual void releaseTopNode(SUnit *SU) { + void releaseTopNode(SUnit *SU) override { TopQ.push(SU); } - virtual void releaseBottomNode(SUnit *SU) { + void releaseBottomNode(SUnit *SU) override { BottomQ.push(SU); } }; @@ -3041,7 +3194,7 @@ static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) { bool TopDown = !ForceBottomUp; assert((TopDown || !ForceTopDown) && "-misched-topdown incompatible with -misched-bottomup"); - return new ScheduleDAGMI(C, new InstructionShuffler(Alternate, TopDown)); + return new ScheduleDAGMILive(C, make_unique<InstructionShuffler>(Alternate, TopDown)); } static MachineSchedRegistry ShufflerRegistry( "shuffle", "Shuffle machine instructions alternating directions", @@ -3049,7 +3202,7 @@ static MachineSchedRegistry ShufflerRegistry( #endif // !NDEBUG //===----------------------------------------------------------------------===// -// GraphWriter support for ScheduleDAGMI. +// GraphWriter support for ScheduleDAGMILive. //===----------------------------------------------------------------------===// #ifndef NDEBUG @@ -3095,8 +3248,9 @@ struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits { static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) { std::string Str; raw_string_ostream SS(Str); - const SchedDFSResult *DFS = - static_cast<const ScheduleDAGMI*>(G)->getDFSResult(); + const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G); + const SchedDFSResult *DFS = DAG->hasVRegLiveness() ? + static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr; SS << "SU:" << SU->NodeNum; if (DFS) SS << " I:" << DFS->getNumInstrs(SU); @@ -3106,11 +3260,11 @@ struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits { return G->getGraphNodeLabel(SU); } - static std::string getNodeAttributes(const SUnit *N, - const ScheduleDAG *Graph) { + static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) { std::string Str("shape=Mrecord"); - const SchedDFSResult *DFS = - static_cast<const ScheduleDAGMI*>(Graph)->getDFSResult(); + const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G); + const SchedDFSResult *DFS = DAG->hasVRegLiveness() ? + static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr; if (DFS) { Str += ",style=filled,fillcolor=\"#"; Str += DOT::getColorString(DFS->getSubtreeID(N)); diff --git a/contrib/llvm/lib/CodeGen/MachineSink.cpp b/contrib/llvm/lib/CodeGen/MachineSink.cpp index 105d7c2..f44e4d1 100644 --- a/contrib/llvm/lib/CodeGen/MachineSink.cpp +++ b/contrib/llvm/lib/CodeGen/MachineSink.cpp @@ -16,7 +16,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "machine-sink" #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" @@ -32,6 +31,8 @@ #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; +#define DEBUG_TYPE "machine-sink" + static cl::opt<bool> SplitEdges("machine-sink-split", cl::desc("Split critical edges during machine sinking"), @@ -60,9 +61,9 @@ namespace { initializeMachineSinkingPass(*PassRegistry::getPassRegistry()); } - virtual bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); AU.addRequired<AliasAnalysis>(); @@ -72,7 +73,7 @@ namespace { AU.addPreserved<MachineLoopInfo>(); } - virtual void releaseMemory() { + void releaseMemory() override { CEBCandidates.clear(); } @@ -98,16 +99,6 @@ namespace { bool PerformTrivialForwardCoalescing(MachineInstr *MI, MachineBasicBlock *MBB); }; - - // SuccessorSorter - Sort Successors according to their loop depth. - struct SuccessorSorter { - SuccessorSorter(MachineLoopInfo *LoopInfo) : LI(LoopInfo) {} - bool operator()(const MachineBasicBlock *LHS, - const MachineBasicBlock *RHS) const { - return LI->getLoopDepth(LHS) < LI->getLoopDepth(RHS); - } - MachineLoopInfo *LI; - }; } // end anonymous namespace char MachineSinking::ID = 0; @@ -181,13 +172,12 @@ MachineSinking::AllUsesDominatedByBlock(unsigned Reg, // Predecessors according to CFG: BB#0 BB#1 // %reg16386<def> = PHI %reg16434, <BB#0>, %reg16385, <BB#1> BreakPHIEdge = true; - for (MachineRegisterInfo::use_nodbg_iterator - I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); - I != E; ++I) { - MachineInstr *UseInst = &*I; + for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) { + MachineInstr *UseInst = MO.getParent(); + unsigned OpNo = &MO - &UseInst->getOperand(0); MachineBasicBlock *UseBlock = UseInst->getParent(); if (!(UseBlock == MBB && UseInst->isPHI() && - UseInst->getOperand(I.getOperandNo()+1).getMBB() == DefMBB)) { + UseInst->getOperand(OpNo+1).getMBB() == DefMBB)) { BreakPHIEdge = false; break; } @@ -195,16 +185,15 @@ MachineSinking::AllUsesDominatedByBlock(unsigned Reg, if (BreakPHIEdge) return true; - for (MachineRegisterInfo::use_nodbg_iterator - I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); - I != E; ++I) { + for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) { // Determine the block of the use. - MachineInstr *UseInst = &*I; + MachineInstr *UseInst = MO.getParent(); + unsigned OpNo = &MO - &UseInst->getOperand(0); MachineBasicBlock *UseBlock = UseInst->getParent(); if (UseInst->isPHI()) { // PHI nodes use the operand in the predecessor block, not the block with // the PHI. - UseBlock = UseInst->getOperand(I.getOperandNo()+1).getMBB(); + UseBlock = UseInst->getOperand(OpNo+1).getMBB(); } else if (UseBlock == DefMBB) { LocalUse = true; return false; @@ -219,6 +208,9 @@ MachineSinking::AllUsesDominatedByBlock(unsigned Reg, } bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { + if (skipOptnoneFunction(*MF.getFunction())) + return false; + DEBUG(dbgs() << "******** Machine Sinking ********\n"); const TargetMachine &TM = MF.getTarget(); @@ -341,16 +333,16 @@ MachineBasicBlock *MachineSinking::SplitCriticalEdge(MachineInstr *MI, MachineBasicBlock *ToBB, bool BreakPHIEdge) { if (!isWorthBreakingCriticalEdge(MI, FromBB, ToBB)) - return 0; + return nullptr; // Avoid breaking back edge. From == To means backedge for single BB loop. if (!SplitEdges || FromBB == ToBB) - return 0; + return nullptr; // Check for backedges of more "complex" loops. if (LI->getLoopFor(FromBB) == LI->getLoopFor(ToBB) && LI->isLoopHeader(ToBB)) - return 0; + return nullptr; // It's not always legal to break critical edges and sink the computation // to the edge. @@ -397,7 +389,7 @@ MachineBasicBlock *MachineSinking::SplitCriticalEdge(MachineInstr *MI, if (*PI == FromBB) continue; if (!DT->dominates(ToBB, *PI)) - return 0; + return nullptr; } } @@ -460,12 +452,9 @@ bool MachineSinking::isProfitableToSinkTo(unsigned Reg, MachineInstr *MI, // Check if only use in post dominated block is PHI instruction. bool NonPHIUse = false; - for (MachineRegisterInfo::use_nodbg_iterator - I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); - I != E; ++I) { - MachineInstr *UseInst = &*I; - MachineBasicBlock *UseBlock = UseInst->getParent(); - if (UseBlock == SuccToSinkTo && !UseInst->isPHI()) + for (MachineInstr &UseInst : MRI->use_nodbg_instructions(Reg)) { + MachineBasicBlock *UseBlock = UseInst.getParent(); + if (UseBlock == SuccToSinkTo && !UseInst.isPHI()) NonPHIUse = true; } if (!NonPHIUse) @@ -496,7 +485,7 @@ MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI, // SuccToSinkTo - This is the successor to sink this instruction to, once we // decide. - MachineBasicBlock *SuccToSinkTo = 0; + MachineBasicBlock *SuccToSinkTo = nullptr; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); if (!MO.isReg()) continue; // Ignore non-register operands. @@ -510,10 +499,10 @@ MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI, // and we can freely move its uses. Alternatively, if it's allocatable, // it could get allocated to something with a def during allocation. if (!MRI->isConstantPhysReg(Reg, *MBB->getParent())) - return NULL; + return nullptr; } else if (!MO.isDead()) { // A def that isn't dead. We can't move it. - return NULL; + return nullptr; } } else { // Virtual register uses are always safe to sink. @@ -521,7 +510,7 @@ MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI, // If it's not safe to move defs of the register class, then abort. if (!TII->isSafeToMoveRegClassDefs(MRI->getRegClass(Reg))) - return NULL; + return nullptr; // FIXME: This picks a successor to sink into based on having one // successor that dominates all the uses. However, there are cases where @@ -544,7 +533,7 @@ MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI, bool LocalUse = false; if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, MBB, BreakPHIEdge, LocalUse)) - return NULL; + return nullptr; continue; } @@ -553,7 +542,12 @@ MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI, // we should sink to. // We give successors with smaller loop depth higher priority. SmallVector<MachineBasicBlock*, 4> Succs(MBB->succ_begin(), MBB->succ_end()); - std::stable_sort(Succs.begin(), Succs.end(), SuccessorSorter(LI)); + // Sort Successors according to their loop depth. + std::stable_sort( + Succs.begin(), Succs.end(), + [this](const MachineBasicBlock *LHS, const MachineBasicBlock *RHS) { + return LI->getLoopDepth(LHS) < LI->getLoopDepth(RHS); + }); for (SmallVectorImpl<MachineBasicBlock *>::iterator SI = Succs.begin(), E = Succs.end(); SI != E; ++SI) { MachineBasicBlock *SuccBlock = *SI; @@ -565,26 +559,26 @@ MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI, } if (LocalUse) // Def is used locally, it's never safe to move this def. - return NULL; + return nullptr; } // If we couldn't find a block to sink to, ignore this instruction. - if (SuccToSinkTo == 0) - return NULL; - else if (!isProfitableToSinkTo(Reg, MI, MBB, SuccToSinkTo)) - return NULL; + if (!SuccToSinkTo) + return nullptr; + if (!isProfitableToSinkTo(Reg, MI, MBB, SuccToSinkTo)) + return nullptr; } } // It is not possible to sink an instruction into its own block. This can // happen with loops. if (MBB == SuccToSinkTo) - return NULL; + return nullptr; // It's not safe to sink instructions to EH landing pad. Control flow into // landing pad is implicitly defined. if (SuccToSinkTo && SuccToSinkTo->isLandingPad()) - return NULL; + return nullptr; return SuccToSinkTo; } @@ -614,7 +608,7 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { MachineBasicBlock *SuccToSinkTo = FindSuccToSinkTo(MI, ParentBlock, BreakPHIEdge); // If there are no outputs, it must have side-effects. - if (SuccToSinkTo == 0) + if (!SuccToSinkTo) return false; diff --git a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp index 6aa3f67..1bbf0ad 100644 --- a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp +++ b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp @@ -7,7 +7,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "machine-trace-metrics" #include "llvm/CodeGen/MachineTraceMetrics.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SparseSet.h" @@ -26,6 +25,8 @@ using namespace llvm; +#define DEBUG_TYPE "machine-trace-metrics" + char MachineTraceMetrics::ID = 0; char &llvm::MachineTraceMetricsID = MachineTraceMetrics::ID; @@ -37,8 +38,9 @@ INITIALIZE_PASS_END(MachineTraceMetrics, "machine-trace-metrics", "Machine Trace Metrics", false, true) MachineTraceMetrics::MachineTraceMetrics() - : MachineFunctionPass(ID), MF(0), TII(0), TRI(0), MRI(0), Loops(0) { - std::fill(Ensembles, array_endof(Ensembles), (Ensemble*)0); + : MachineFunctionPass(ID), MF(nullptr), TII(nullptr), TRI(nullptr), + MRI(nullptr), Loops(nullptr) { + std::fill(std::begin(Ensembles), std::end(Ensembles), nullptr); } void MachineTraceMetrics::getAnalysisUsage(AnalysisUsage &AU) const { @@ -64,11 +66,11 @@ bool MachineTraceMetrics::runOnMachineFunction(MachineFunction &Func) { } void MachineTraceMetrics::releaseMemory() { - MF = 0; + MF = nullptr; BlockInfo.clear(); for (unsigned i = 0; i != TS_NumStrategies; ++i) { delete Ensembles[i]; - Ensembles[i] = 0; + Ensembles[i] = nullptr; } } @@ -95,19 +97,17 @@ MachineTraceMetrics::getResources(const MachineBasicBlock *MBB) { unsigned PRKinds = SchedModel.getNumProcResourceKinds(); SmallVector<unsigned, 32> PRCycles(PRKinds); - for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end(); - I != E; ++I) { - const MachineInstr *MI = I; - if (MI->isTransient()) + for (const auto &MI : *MBB) { + if (MI.isTransient()) continue; ++InstrCount; - if (MI->isCall()) + if (MI.isCall()) FBI->HasCalls = true; // Count processor resources used. if (!SchedModel.hasInstrSchedModel()) continue; - const MCSchedClassDesc *SC = SchedModel.resolveSchedClass(MI); + const MCSchedClassDesc *SC = SchedModel.resolveSchedClass(&MI); if (!SC->isValid()) continue; @@ -233,7 +233,7 @@ const MachineTraceMetrics::TraceBlockInfo* MachineTraceMetrics::Ensemble:: getDepthResources(const MachineBasicBlock *MBB) const { const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()]; - return TBI->hasValidDepth() ? TBI : 0; + return TBI->hasValidDepth() ? TBI : nullptr; } // Check if height resources for MBB are valid and return the TBI. @@ -242,7 +242,7 @@ const MachineTraceMetrics::TraceBlockInfo* MachineTraceMetrics::Ensemble:: getHeightResources(const MachineBasicBlock *MBB) const { const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()]; - return TBI->hasValidHeight() ? TBI : 0; + return TBI->hasValidHeight() ? TBI : nullptr; } /// Get an array of processor resource depths for MBB. Indexed by processor @@ -302,9 +302,9 @@ static bool isExitingLoop(const MachineLoop *From, const MachineLoop *To) { // instructions. namespace { class MinInstrCountEnsemble : public MachineTraceMetrics::Ensemble { - const char *getName() const { return "MinInstr"; } - const MachineBasicBlock *pickTracePred(const MachineBasicBlock*); - const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*); + const char *getName() const override { return "MinInstr"; } + const MachineBasicBlock *pickTracePred(const MachineBasicBlock*) override; + const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*) override; public: MinInstrCountEnsemble(MachineTraceMetrics *mtm) @@ -316,13 +316,13 @@ public: const MachineBasicBlock* MinInstrCountEnsemble::pickTracePred(const MachineBasicBlock *MBB) { if (MBB->pred_empty()) - return 0; + return nullptr; const MachineLoop *CurLoop = getLoopFor(MBB); // Don't leave loops, and never follow back-edges. if (CurLoop && MBB == CurLoop->getHeader()) - return 0; + return nullptr; unsigned CurCount = MTM.getResources(MBB)->InstrCount; - const MachineBasicBlock *Best = 0; + const MachineBasicBlock *Best = nullptr; unsigned BestDepth = 0; for (MachineBasicBlock::const_pred_iterator I = MBB->pred_begin(), E = MBB->pred_end(); I != E; ++I) { @@ -344,9 +344,9 @@ MinInstrCountEnsemble::pickTracePred(const MachineBasicBlock *MBB) { const MachineBasicBlock* MinInstrCountEnsemble::pickTraceSucc(const MachineBasicBlock *MBB) { if (MBB->pred_empty()) - return 0; + return nullptr; const MachineLoop *CurLoop = getLoopFor(MBB); - const MachineBasicBlock *Best = 0; + const MachineBasicBlock *Best = nullptr; unsigned BestHeight = 0; for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) { @@ -568,9 +568,8 @@ MachineTraceMetrics::Ensemble::invalidate(const MachineBasicBlock *BadMBB) { // invalidated, but their instructions will stay the same, so there is no // need to erase the Cycle entries. They will be overwritten when we // recompute. - for (MachineBasicBlock::const_iterator I = BadMBB->begin(), E = BadMBB->end(); - I != E; ++I) - Cycles.erase(I); + for (const auto &I : *BadMBB) + Cycles.erase(&I); } void MachineTraceMetrics::Ensemble::verify() const { @@ -627,7 +626,7 @@ struct DataDep { assert(TargetRegisterInfo::isVirtualRegister(VirtReg)); MachineRegisterInfo::def_iterator DefI = MRI->def_begin(VirtReg); assert(!DefI.atEnd() && "Register has no defs"); - DefMI = &*DefI; + DefMI = DefI->getParent(); DefOp = DefI.getOperandNo(); assert((++DefI).atEnd() && "Register has multiple defs"); } @@ -690,7 +689,7 @@ struct LiveRegUnit { unsigned getSparseSetIndex() const { return RegUnit; } - LiveRegUnit(unsigned RU) : RegUnit(RU), Cycle(0), MI(0), Op(0) {} + LiveRegUnit(unsigned RU) : RegUnit(RU), Cycle(0), MI(nullptr), Op(0) {} }; } @@ -828,16 +827,13 @@ computeInstrDepths(const MachineBasicBlock *MBB) { if (TBI.HasValidInstrHeights) TBI.CriticalPath = computeCrossBlockCriticalPath(TBI); - for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end(); - I != E; ++I) { - const MachineInstr *UseMI = I; - + for (const auto &UseMI : *MBB) { // Collect all data dependencies. Deps.clear(); - if (UseMI->isPHI()) - getPHIDeps(UseMI, Deps, TBI.Pred, MTM.MRI); - else if (getDataDeps(UseMI, Deps, MTM.MRI)) - updatePhysDepsDownwards(UseMI, Deps, RegUnits, MTM.TRI); + if (UseMI.isPHI()) + getPHIDeps(&UseMI, Deps, TBI.Pred, MTM.MRI); + else if (getDataDeps(&UseMI, Deps, MTM.MRI)) + updatePhysDepsDownwards(&UseMI, Deps, RegUnits, MTM.TRI); // Filter and process dependencies, computing the earliest issue cycle. unsigned Cycle = 0; @@ -853,20 +849,20 @@ computeInstrDepths(const MachineBasicBlock *MBB) { // Add latency if DefMI is a real instruction. Transients get latency 0. if (!Dep.DefMI->isTransient()) DepCycle += MTM.SchedModel - .computeOperandLatency(Dep.DefMI, Dep.DefOp, UseMI, Dep.UseOp); + .computeOperandLatency(Dep.DefMI, Dep.DefOp, &UseMI, Dep.UseOp); Cycle = std::max(Cycle, DepCycle); } // Remember the instruction depth. - InstrCycles &MICycles = Cycles[UseMI]; + InstrCycles &MICycles = Cycles[&UseMI]; MICycles.Depth = Cycle; if (!TBI.HasValidInstrHeights) { - DEBUG(dbgs() << Cycle << '\t' << *UseMI); + DEBUG(dbgs() << Cycle << '\t' << UseMI); continue; } // Update critical path length. TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Height); - DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << *UseMI); + DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << UseMI); } } } @@ -944,7 +940,7 @@ static bool pushDepHeight(const DataDep &Dep, // Update Heights[DefMI] to be the maximum height seen. MIHeightMap::iterator I; bool New; - tie(I, New) = Heights.insert(std::make_pair(Dep.DefMI, UseHeight)); + std::tie(I, New) = Heights.insert(std::make_pair(Dep.DefMI, UseHeight)); if (New) return true; @@ -1055,16 +1051,16 @@ computeInstrHeights(const MachineBasicBlock *MBB) { Succ = Loop->getHeader(); if (Succ) { - for (MachineBasicBlock::const_iterator I = Succ->begin(), E = Succ->end(); - I != E && I->isPHI(); ++I) { - const MachineInstr *PHI = I; + for (const auto &PHI : *Succ) { + if (!PHI.isPHI()) + break; Deps.clear(); - getPHIDeps(PHI, Deps, MBB, MTM.MRI); + getPHIDeps(&PHI, Deps, MBB, MTM.MRI); if (!Deps.empty()) { // Loop header PHI heights are all 0. - unsigned Height = TBI.Succ ? Cycles.lookup(PHI).Height : 0; - DEBUG(dbgs() << "pred\t" << Height << '\t' << *PHI); - if (pushDepHeight(Deps.front(), PHI, Height, + unsigned Height = TBI.Succ ? Cycles.lookup(&PHI).Height : 0; + DEBUG(dbgs() << "pred\t" << Height << '\t' << PHI); + if (pushDepHeight(Deps.front(), &PHI, Height, Heights, MTM.SchedModel, MTM.TII)) addLiveIns(Deps.front().DefMI, Deps.front().DefOp, Stack); } diff --git a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp index d61470c..8515b0f 100644 --- a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp @@ -33,7 +33,6 @@ #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/IR/BasicBlock.h" @@ -42,6 +41,7 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" @@ -241,17 +241,17 @@ namespace { static char ID; // Pass ID, replacement for typeid const char *const Banner; - MachineVerifierPass(const char *b = 0) + MachineVerifierPass(const char *b = nullptr) : MachineFunctionPass(ID), Banner(b) { initializeMachineVerifierPassPass(*PassRegistry::getPassRegistry()); } - void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); MachineFunctionPass::getAnalysisUsage(AU); } - bool runOnMachineFunction(MachineFunction &MF) { + bool runOnMachineFunction(MachineFunction &MF) override { MF.verify(this, Banner); return false; } @@ -273,10 +273,11 @@ void MachineFunction::verify(Pass *p, const char *Banner) const { } bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { - raw_ostream *OutFile = 0; + raw_ostream *OutFile = nullptr; if (OutFileName) { std::string ErrorInfo; - OutFile = new raw_fd_ostream(OutFileName, ErrorInfo, sys::fs::F_Append); + OutFile = new raw_fd_ostream(OutFileName, ErrorInfo, + sys::fs::F_Append | sys::fs::F_Text); if (!ErrorInfo.empty()) { errs() << "Error opening '" << OutFileName << "': " << ErrorInfo << '\n'; exit(1); @@ -295,10 +296,10 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { TRI = TM->getRegisterInfo(); MRI = &MF.getRegInfo(); - LiveVars = NULL; - LiveInts = NULL; - LiveStks = NULL; - Indexes = NULL; + LiveVars = nullptr; + LiveInts = nullptr; + LiveStks = nullptr; + Indexes = nullptr; if (PASS) { LiveInts = PASS->getAnalysisIfAvailable<LiveIntervals>(); // We don't want to verify LiveVariables if LiveIntervals is available. @@ -313,7 +314,7 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { MFI!=MFE; ++MFI) { visitMachineBasicBlockBefore(MFI); // Keep track of the current bundle header. - const MachineInstr *CurBundle = 0; + const MachineInstr *CurBundle = nullptr; // Do we expect the next instruction to be part of the same bundle? bool InBundle = false; @@ -468,18 +469,17 @@ void MachineVerifier::visitMachineFunctionBefore() { // Build a set of the basic blocks in the function. FunctionBlocks.clear(); - for (MachineFunction::const_iterator - I = MF->begin(), E = MF->end(); I != E; ++I) { - FunctionBlocks.insert(I); - BBInfo &MInfo = MBBInfoMap[I]; - - MInfo.Preds.insert(I->pred_begin(), I->pred_end()); - if (MInfo.Preds.size() != I->pred_size()) - report("MBB has duplicate entries in its predecessor list.", I); - - MInfo.Succs.insert(I->succ_begin(), I->succ_end()); - if (MInfo.Succs.size() != I->succ_size()) - report("MBB has duplicate entries in its successor list.", I); + for (const auto &MBB : *MF) { + FunctionBlocks.insert(&MBB); + BBInfo &MInfo = MBBInfoMap[&MBB]; + + MInfo.Preds.insert(MBB.pred_begin(), MBB.pred_end()); + if (MInfo.Preds.size() != MBB.pred_size()) + report("MBB has duplicate entries in its predecessor list.", &MBB); + + MInfo.Succs.insert(MBB.succ_begin(), MBB.succ_end()); + if (MInfo.Succs.size() != MBB.succ_size()) + report("MBB has duplicate entries in its successor list.", &MBB); } // Check that the register use lists are sane. @@ -500,7 +500,7 @@ static bool matchPair(MachineBasicBlock::const_succ_iterator i, void MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { - FirstTerminator = 0; + FirstTerminator = nullptr; if (MRI->isSSA()) { // If this block has allocatable physical registers live-in, check that @@ -552,7 +552,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { report("MBB has more than one landing pad successor", MBB); // Call AnalyzeBranch. If it succeeds, there several more conditions to check. - MachineBasicBlock *TBB = 0, *FBB = 0; + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector<MachineOperand, 4> Cond; if (!TII->AnalyzeBranch(*const_cast<MachineBasicBlock *>(MBB), TBB, FBB, Cond)) { @@ -577,8 +577,8 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { report("MBB exits via unconditional fall-through but its successor " "differs from its CFG successor!", MBB); } - if (!MBB->empty() && getBundleStart(&MBB->back())->isBarrier() && - !TII->isPredicated(getBundleStart(&MBB->back()))) { + if (!MBB->empty() && MBB->back().isBarrier() && + !TII->isPredicated(&MBB->back())) { report("MBB exits via unconditional fall-through but ends with a " "barrier instruction!", MBB); } @@ -598,10 +598,10 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { if (MBB->empty()) { report("MBB exits via unconditional branch but doesn't contain " "any instructions!", MBB); - } else if (!getBundleStart(&MBB->back())->isBarrier()) { + } else if (!MBB->back().isBarrier()) { report("MBB exits via unconditional branch but doesn't end with a " "barrier instruction!", MBB); - } else if (!getBundleStart(&MBB->back())->isTerminator()) { + } else if (!MBB->back().isTerminator()) { report("MBB exits via unconditional branch but the branch isn't a " "terminator instruction!", MBB); } @@ -629,10 +629,10 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { if (MBB->empty()) { report("MBB exits via conditional branch/fall-through but doesn't " "contain any instructions!", MBB); - } else if (getBundleStart(&MBB->back())->isBarrier()) { + } else if (MBB->back().isBarrier()) { report("MBB exits via conditional branch/fall-through but ends with a " "barrier instruction!", MBB); - } else if (!getBundleStart(&MBB->back())->isTerminator()) { + } else if (!MBB->back().isTerminator()) { report("MBB exits via conditional branch/fall-through but the branch " "isn't a terminator instruction!", MBB); } @@ -657,10 +657,10 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { if (MBB->empty()) { report("MBB exits via conditional branch/branch but doesn't " "contain any instructions!", MBB); - } else if (!getBundleStart(&MBB->back())->isBarrier()) { + } else if (!MBB->back().isBarrier()) { report("MBB exits via conditional branch/branch but doesn't end with a " "barrier instruction!", MBB); - } else if (!getBundleStart(&MBB->back())->isTerminator()) { + } else if (!MBB->back().isTerminator()) { report("MBB exits via conditional branch/branch but the branch " "isn't a terminator instruction!", MBB); } @@ -1075,7 +1075,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { // Verify SSA form. if (MRI->isSSA() && TargetRegisterInfo::isVirtualRegister(Reg) && - llvm::next(MRI->def_begin(Reg)) != MRI->def_end()) + std::next(MRI->def_begin(Reg)) != MRI->def_end()) report("Multiple virtual register defs in SSA form", MO, MONum); // Check LiveInts for a live segment, but only for virtual registers. @@ -1157,9 +1157,7 @@ void MachineVerifier::calcRegsPassed() { // First push live-out regs to successors' vregsPassed. Remember the MBBs that // have any vregsPassed. SmallPtrSet<const MachineBasicBlock*, 8> todo; - for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end(); - MFI != MFE; ++MFI) { - const MachineBasicBlock &MBB(*MFI); + for (const auto &MBB : *MF) { BBInfo &MInfo = MBBInfoMap[&MBB]; if (!MInfo.reachable) continue; @@ -1194,9 +1192,7 @@ void MachineVerifier::calcRegsPassed() { void MachineVerifier::calcRegsRequired() { // First push live-in regs to predecessors' vregsRequired. SmallPtrSet<const MachineBasicBlock*, 8> todo; - for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end(); - MFI != MFE; ++MFI) { - const MachineBasicBlock &MBB(*MFI); + for (const auto &MBB : *MF) { BBInfo &MInfo = MBBInfoMap[&MBB]; for (MachineBasicBlock::const_pred_iterator PrI = MBB.pred_begin(), PrE = MBB.pred_end(); PrI != PrE; ++PrI) { @@ -1227,27 +1223,28 @@ void MachineVerifier::calcRegsRequired() { // calcRegsPassed has been run so BBInfo::isLiveOut is valid. void MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB) { SmallPtrSet<const MachineBasicBlock*, 8> seen; - for (MachineBasicBlock::const_iterator BBI = MBB->begin(), BBE = MBB->end(); - BBI != BBE && BBI->isPHI(); ++BBI) { + for (const auto &BBI : *MBB) { + if (!BBI.isPHI()) + break; seen.clear(); - for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) { - unsigned Reg = BBI->getOperand(i).getReg(); - const MachineBasicBlock *Pre = BBI->getOperand(i + 1).getMBB(); + for (unsigned i = 1, e = BBI.getNumOperands(); i != e; i += 2) { + unsigned Reg = BBI.getOperand(i).getReg(); + const MachineBasicBlock *Pre = BBI.getOperand(i + 1).getMBB(); if (!Pre->isSuccessor(MBB)) continue; seen.insert(Pre); BBInfo &PrInfo = MBBInfoMap[Pre]; if (PrInfo.reachable && !PrInfo.isLiveOut(Reg)) report("PHI operand is not live-out from predecessor", - &BBI->getOperand(i), i); + &BBI.getOperand(i), i); } // Did we see all predecessors? for (MachineBasicBlock::const_pred_iterator PrI = MBB->pred_begin(), PrE = MBB->pred_end(); PrI != PrE; ++PrI) { if (!seen.count(*PrI)) { - report("Missing PHI operand", BBI); + report("Missing PHI operand", &BBI); *OS << "BB#" << (*PrI)->getNumber() << " is a predecessor according to the CFG.\n"; } @@ -1258,29 +1255,27 @@ void MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB) { void MachineVerifier::visitMachineFunctionAfter() { calcRegsPassed(); - for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end(); - MFI != MFE; ++MFI) { - BBInfo &MInfo = MBBInfoMap[MFI]; + for (const auto &MBB : *MF) { + BBInfo &MInfo = MBBInfoMap[&MBB]; // Skip unreachable MBBs. if (!MInfo.reachable) continue; - checkPHIOps(MFI); + checkPHIOps(&MBB); } // Now check liveness info if available calcRegsRequired(); // Check for killed virtual registers that should be live out. - for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end(); - MFI != MFE; ++MFI) { - BBInfo &MInfo = MBBInfoMap[MFI]; + for (const auto &MBB : *MF) { + BBInfo &MInfo = MBBInfoMap[&MBB]; for (RegSet::iterator I = MInfo.vregsRequired.begin(), E = MInfo.vregsRequired.end(); I != E; ++I) if (MInfo.regsKilled.count(*I)) { - report("Virtual register killed in block, but needed live out.", MFI); + report("Virtual register killed in block, but needed live out.", &MBB); *OS << "Virtual register " << PrintReg(*I) << " is used after the block.\n"; } @@ -1306,20 +1301,19 @@ void MachineVerifier::verifyLiveVariables() { for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { unsigned Reg = TargetRegisterInfo::index2VirtReg(i); LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg); - for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end(); - MFI != MFE; ++MFI) { - BBInfo &MInfo = MBBInfoMap[MFI]; + for (const auto &MBB : *MF) { + BBInfo &MInfo = MBBInfoMap[&MBB]; // Our vregsRequired should be identical to LiveVariables' AliveBlocks if (MInfo.vregsRequired.count(Reg)) { - if (!VI.AliveBlocks.test(MFI->getNumber())) { - report("LiveVariables: Block missing from AliveBlocks", MFI); + if (!VI.AliveBlocks.test(MBB.getNumber())) { + report("LiveVariables: Block missing from AliveBlocks", &MBB); *OS << "Virtual register " << PrintReg(Reg) << " must be live through the block.\n"; } } else { - if (VI.AliveBlocks.test(MFI->getNumber())) { - report("LiveVariables: Block should not be in AliveBlocks", MFI); + if (VI.AliveBlocks.test(MBB.getNumber())) { + report("LiveVariables: Block should not be in AliveBlocks", &MBB); *OS << "Virtual register " << PrintReg(Reg) << " is not needed live through the block.\n"; } @@ -1674,32 +1668,31 @@ void MachineVerifier::verifyStackFrame() { } // Update stack state by checking contents of MBB. - for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end(); - I != E; ++I) { - if (I->getOpcode() == FrameSetupOpcode) { + for (const auto &I : *MBB) { + if (I.getOpcode() == FrameSetupOpcode) { // The first operand of a FrameOpcode should be i32. - int Size = I->getOperand(0).getImm(); + int Size = I.getOperand(0).getImm(); assert(Size >= 0 && "Value should be non-negative in FrameSetup and FrameDestroy.\n"); if (BBState.ExitIsSetup) - report("FrameSetup is after another FrameSetup", I); + report("FrameSetup is after another FrameSetup", &I); BBState.ExitValue -= Size; BBState.ExitIsSetup = true; } - if (I->getOpcode() == FrameDestroyOpcode) { + if (I.getOpcode() == FrameDestroyOpcode) { // The first operand of a FrameOpcode should be i32. - int Size = I->getOperand(0).getImm(); + int Size = I.getOperand(0).getImm(); assert(Size >= 0 && "Value should be non-negative in FrameSetup and FrameDestroy.\n"); if (!BBState.ExitIsSetup) - report("FrameDestroy is not after a FrameSetup", I); + report("FrameDestroy is not after a FrameSetup", &I); int AbsSPAdj = BBState.ExitValue < 0 ? -BBState.ExitValue : BBState.ExitValue; if (BBState.ExitIsSetup && AbsSPAdj != Size) { - report("FrameDestroy <n> is after FrameSetup <m>", I); + report("FrameDestroy <n> is after FrameSetup <m>", &I); *OS << "FrameDestroy <" << Size << "> is after FrameSetup <" << AbsSPAdj << ">.\n"; } diff --git a/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp b/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp index 3982612..95a2934 100644 --- a/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp +++ b/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "phi-opt" #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" @@ -23,6 +22,8 @@ #include "llvm/Target/TargetInstrInfo.h" using namespace llvm; +#define DEBUG_TYPE "phi-opt" + STATISTIC(NumPHICycles, "Number of PHI cycles replaced"); STATISTIC(NumDeadPHICycles, "Number of dead PHI cycles"); @@ -37,9 +38,9 @@ namespace { initializeOptimizePHIsPass(*PassRegistry::getPassRegistry()); } - virtual bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -61,6 +62,9 @@ INITIALIZE_PASS(OptimizePHIs, "opt-phis", "Optimize machine instruction PHIs", false, false) bool OptimizePHIs::runOnMachineFunction(MachineFunction &Fn) { + if (skipOptnoneFunction(*Fn.getFunction())) + return false; + MRI = &Fn.getRegInfo(); TII = Fn.getTarget().getInstrInfo(); @@ -139,10 +143,8 @@ bool OptimizePHIs::IsDeadPHICycle(MachineInstr *MI, InstrSet &PHIsInCycle) { if (PHIsInCycle.size() == 16) return false; - for (MachineRegisterInfo::use_iterator I = MRI->use_begin(DstReg), - E = MRI->use_end(); I != E; ++I) { - MachineInstr *UseMI = &*I; - if (!UseMI->isPHI() || !IsDeadPHICycle(UseMI, PHIsInCycle)) + for (MachineInstr &UseMI : MRI->use_instructions(DstReg)) { + if (!UseMI.isPHI() || !IsDeadPHICycle(&UseMI, PHIsInCycle)) return false; } diff --git a/contrib/llvm/lib/CodeGen/PHIElimination.cpp b/contrib/llvm/lib/CodeGen/PHIElimination.cpp index dcd9072..c8d0819 100644 --- a/contrib/llvm/lib/CodeGen/PHIElimination.cpp +++ b/contrib/llvm/lib/CodeGen/PHIElimination.cpp @@ -13,7 +13,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "phielim" #include "llvm/CodeGen/Passes.h" #include "PHIEliminationUtils.h" #include "llvm/ADT/STLExtras.h" @@ -35,6 +34,8 @@ #include <algorithm> using namespace llvm; +#define DEBUG_TYPE "phielim" + static cl::opt<bool> DisableEdgeSplitting("disable-phi-elim-edge-splitting", cl::init(false), cl::Hidden, cl::desc("Disable critical edge splitting " @@ -57,8 +58,8 @@ namespace { initializePHIEliminationPass(*PassRegistry::getPassRegistry()); } - virtual bool runOnMachineFunction(MachineFunction &Fn); - virtual void getAnalysisUsage(AnalysisUsage &AU) const; + bool runOnMachineFunction(MachineFunction &Fn) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; private: /// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions @@ -186,7 +187,7 @@ bool PHIElimination::EliminatePHINodes(MachineFunction &MF, // Get an iterator to the first instruction after the last PHI node (this may // also be the end of the basic block). MachineBasicBlock::iterator LastPHIIt = - prior(MBB.SkipPHIsAndLabels(MBB.begin())); + std::prev(MBB.SkipPHIsAndLabels(MBB.begin())); while (MBB.front().isPHI()) LowerPHINode(MBB, LastPHIIt); @@ -198,9 +199,8 @@ bool PHIElimination::EliminatePHINodes(MachineFunction &MF, /// This includes registers with no defs. static bool isImplicitlyDefined(unsigned VirtReg, const MachineRegisterInfo *MRI) { - for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(VirtReg), - DE = MRI->def_end(); DI != DE; ++DI) - if (!DI->isImplicitDef()) + for (MachineInstr &DI : MRI->def_instructions(VirtReg)) + if (!DI.isImplicitDef()) return false; return true; } @@ -222,7 +222,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, MachineBasicBlock::iterator LastPHIIt) { ++NumLowered; - MachineBasicBlock::iterator AfterPHIsIt = llvm::next(LastPHIIt); + MachineBasicBlock::iterator AfterPHIsIt = std::next(LastPHIIt); // Unlink the PHI node from the basic block, but don't delete the PHI yet. MachineInstr *MPhi = MBB.remove(MBB.begin()); @@ -267,7 +267,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, // Update live variable information if there is any. if (LV) { - MachineInstr *PHICopy = prior(AfterPHIsIt); + MachineInstr *PHICopy = std::prev(AfterPHIsIt); if (IncomingReg) { LiveVariables::VarInfo &VI = LV->getVarInfo(IncomingReg); @@ -306,7 +306,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, // Update LiveIntervals for the new copy or implicit def. if (LIS) { - MachineInstr *NewInstr = prior(AfterPHIsIt); + MachineInstr *NewInstr = std::prev(AfterPHIsIt); SlotIndex DestCopyIndex = LIS->InsertMachineInstrInMaps(NewInstr); SlotIndex MBBStartIndex = LIS->getMBBStartIdx(&MBB); @@ -378,7 +378,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, findPHICopyInsertPoint(&opBlock, &MBB, SrcReg); // Insert the copy. - MachineInstr *NewSrcInstr = 0; + MachineInstr *NewSrcInstr = nullptr; if (!reusedIncoming && IncomingReg) { if (SrcUndef) { // The source register is undefined, so there is no need for a real @@ -444,7 +444,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, } } else { // We just inserted this copy. - KillInst = prior(InsertPos); + KillInst = std::prev(InsertPos); } } assert(KillInst->readsRegister(SrcReg) && "Cannot find kill instruction"); @@ -504,7 +504,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, } } else { // We just inserted this copy. - KillInst = prior(InsertPos); + KillInst = std::prev(InsertPos); } } assert(KillInst->readsRegister(SrcReg) && @@ -532,13 +532,14 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, /// used later to determine when the vreg is killed in the BB. /// void PHIElimination::analyzePHINodes(const MachineFunction& MF) { - for (MachineFunction::const_iterator I = MF.begin(), E = MF.end(); - I != E; ++I) - for (MachineBasicBlock::const_iterator BBI = I->begin(), BBE = I->end(); - BBI != BBE && BBI->isPHI(); ++BBI) - for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) - ++VRegPHIUseCount[BBVRegPair(BBI->getOperand(i+1).getMBB()->getNumber(), - BBI->getOperand(i).getReg())]; + for (const auto &MBB : MF) + for (const auto &BBI : MBB) { + if (!BBI.isPHI()) + break; + for (unsigned i = 1, e = BBI.getNumOperands(); i != e; i += 2) + ++VRegPHIUseCount[BBVRegPair(BBI.getOperand(i+1).getMBB()->getNumber(), + BBI.getOperand(i).getReg())]; + } } bool PHIElimination::SplitPHIEdges(MachineFunction &MF, @@ -547,7 +548,7 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF, if (MBB.empty() || !MBB.front().isPHI() || MBB.isLandingPad()) return false; // Quick exit for basic blocks without PHIs. - const MachineLoop *CurLoop = MLI ? MLI->getLoopFor(&MBB) : 0; + const MachineLoop *CurLoop = MLI ? MLI->getLoopFor(&MBB) : nullptr; bool IsLoopHeader = CurLoop && &MBB == CurLoop->getHeader(); bool Changed = false; @@ -564,7 +565,7 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF, // out-of-line blocks into the loop which is very bad for code placement. if (PreMBB == &MBB && !SplitAllCriticalEdges) continue; - const MachineLoop *PreLoop = MLI ? MLI->getLoopFor(PreMBB) : 0; + const MachineLoop *PreLoop = MLI ? MLI->getLoopFor(PreMBB) : nullptr; if (IsLoopHeader && PreLoop == CurLoop && !SplitAllCriticalEdges) continue; @@ -607,7 +608,7 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF, if (!ShouldSplit) continue; if (!PreMBB->SplitCriticalEdge(&MBB, this)) { - DEBUG(dbgs() << "Failed to split ciritcal edge.\n"); + DEBUG(dbgs() << "Failed to split critical edge.\n"); continue; } Changed = true; diff --git a/contrib/llvm/lib/CodeGen/PHIEliminationUtils.cpp b/contrib/llvm/lib/CodeGen/PHIEliminationUtils.cpp index e1b56e9..99bbad1 100644 --- a/contrib/llvm/lib/CodeGen/PHIEliminationUtils.cpp +++ b/contrib/llvm/lib/CodeGen/PHIEliminationUtils.cpp @@ -34,11 +34,9 @@ llvm::findPHICopyInsertPoint(MachineBasicBlock* MBB, MachineBasicBlock* SuccMBB, // Discover any defs/uses in this basic block. SmallPtrSet<MachineInstr*, 8> DefUsesInMBB; MachineRegisterInfo& MRI = MBB->getParent()->getRegInfo(); - for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(SrcReg), - RE = MRI.reg_end(); RI != RE; ++RI) { - MachineInstr* DefUseMI = &*RI; - if (DefUseMI->getParent() == MBB) - DefUsesInMBB.insert(DefUseMI); + for (MachineInstr &RI : MRI.reg_instructions(SrcReg)) { + if (RI.getParent() == MBB) + DefUsesInMBB.insert(&RI); } MachineBasicBlock::iterator InsertPoint; diff --git a/contrib/llvm/lib/CodeGen/Passes.cpp b/contrib/llvm/lib/CodeGen/Passes.cpp index f4ffd03..249b2d0 100644 --- a/contrib/llvm/lib/CodeGen/Passes.cpp +++ b/contrib/llvm/lib/CodeGen/Passes.cpp @@ -14,11 +14,11 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/Analysis/Passes.h" -#include "llvm/Analysis/Verifier.h" -#include "llvm/Assembly/PrintModulePass.h" #include "llvm/CodeGen/GCStrategy.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/RegAllocRegistry.h" +#include "llvm/IR/IRPrintingPasses.h" +#include "llvm/IR/Verifier.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/PassManager.h" #include "llvm/Support/CommandLine.h" @@ -56,7 +56,7 @@ static cl::opt<cl::boolOrDefault> OptimizeRegAlloc("optimize-regalloc", cl::Hidden, cl::desc("Enable optimized register allocation compilation path.")); static cl::opt<cl::boolOrDefault> -EnableMachineSched("enable-misched", cl::Hidden, +EnableMachineSched("enable-misched", cl::desc("Enable the machine instruction scheduling pass.")); static cl::opt<bool> DisablePostRAMachineLICM("disable-postra-machine-licm", cl::Hidden, @@ -65,6 +65,8 @@ static cl::opt<bool> DisableMachineSink("disable-machine-sink", cl::Hidden, cl::desc("Disable Machine Sinking")); static cl::opt<bool> DisableLSR("disable-lsr", cl::Hidden, cl::desc("Disable Loop Strength Reduction Pass")); +static cl::opt<bool> DisableConstantHoisting("disable-constant-hoisting", + cl::Hidden, cl::desc("Disable ConstantHoisting")); static cl::opt<bool> DisableCGP("disable-cgp", cl::Hidden, cl::desc("Disable Codegen Prepare")); static cl::opt<bool> DisableCopyProp("disable-copyprop", cl::Hidden, @@ -77,12 +79,20 @@ static cl::opt<bool> PrintGCInfo("print-gc", cl::Hidden, cl::desc("Dump garbage collector data")); static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden, cl::desc("Verify generated machine code"), - cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=NULL)); + cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=nullptr)); static cl::opt<std::string> PrintMachineInstrs("print-machineinstrs", cl::ValueOptional, cl::desc("Print machine instrs"), cl::value_desc("pass-name"), cl::init("option-unspecified")); +// Temporary option to allow experimenting with MachineScheduler as a post-RA +// scheduler. Targets can "properly" enable this with +// substitutePass(&PostRASchedulerID, &PostMachineSchedulerID); Ideally it +// wouldn't be part of the standard pass pipeline, and the target would just add +// a PostRA scheduling pass wherever it wants. +static cl::opt<bool> MISchedPostRA("misched-postra", cl::Hidden, + cl::desc("Run MachineScheduler post regalloc (independent of preRA sched)")); + // Experimental option to run live interval analysis early. static cl::opt<bool> EarlyLiveIntervals("early-live-intervals", cl::Hidden, cl::desc("Run live interval analysis earlier in the pipeline")); @@ -111,7 +121,7 @@ static IdentifyingPassPtr applyOverride(IdentifyingPassPtr TargetID, case cl::BOU_TRUE: if (TargetID.isValid()) return TargetID; - if (StandardID == 0) + if (StandardID == nullptr) report_fatal_error("Target cannot enable pass"); return StandardID; case cl::BOU_FALSE: @@ -217,8 +227,8 @@ TargetPassConfig::~TargetPassConfig() { // Out of line constructor provides default values for pass options and // registers all common codegen passes. TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm) - : ImmutablePass(ID), PM(&pm), StartAfter(0), StopAfter(0), - Started(true), Stopped(false), TM(tm), Impl(0), Initialized(false), + : ImmutablePass(ID), PM(&pm), StartAfter(nullptr), StopAfter(nullptr), + Started(true), Stopped(false), TM(tm), Impl(nullptr), Initialized(false), DisableVerify(false), EnableTailMerge(true) { @@ -259,7 +269,7 @@ TargetPassConfig *LLVMTargetMachine::createPassConfig(PassManagerBase &PM) { } TargetPassConfig::TargetPassConfig() - : ImmutablePass(ID), PM(0) { + : ImmutablePass(ID), PM(nullptr) { llvm_unreachable("TargetPassConfig should not be constructed on-the-fly"); } @@ -317,7 +327,7 @@ AnalysisID TargetPassConfig::addPass(AnalysisID PassID) { IdentifyingPassPtr TargetID = getPassSubstitution(PassID); IdentifyingPassPtr FinalPtr = overridePass(PassID, TargetID); if (!FinalPtr.isValid()) - return 0; + return nullptr; Pass *P; if (FinalPtr.isInstance()) @@ -369,20 +379,26 @@ void TargetPassConfig::addIRPasses() { // Before running any passes, run the verifier to determine if the input // coming from the front-end and/or optimizer is valid. - if (!DisableVerify) + if (!DisableVerify) { addPass(createVerifierPass()); + addPass(createDebugInfoVerifierPass()); + } // Run loop strength reduction before anything else. if (getOptLevel() != CodeGenOpt::None && !DisableLSR) { addPass(createLoopStrengthReducePass()); if (PrintLSR) - addPass(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs())); + addPass(createPrintFunctionPass(dbgs(), "\n\n*** Code after LSR ***\n")); } addPass(createGCLoweringPass()); // Make sure that no unreachable blocks are instruction selected. addPass(createUnreachableBlockEliminationPass()); + + // Prepare expensive constants for SelectionDAG. + if (getOptLevel() != CodeGenOpt::None && !DisableConstantHoisting) + addPass(createConstantHoistingPass()); } /// Turn exception handling constructs into something the code generators can @@ -400,11 +416,11 @@ void TargetPassConfig::addPassesToHandleExceptions() { // FALLTHROUGH case ExceptionHandling::DwarfCFI: case ExceptionHandling::ARM: - case ExceptionHandling::Win64: + case ExceptionHandling::WinEH: addPass(createDwarfEHPass(TM)); break; case ExceptionHandling::None: - addPass(createLowerInvokePass(TM)); + addPass(createLowerInvokePass()); // The lower invoke pass may create unreachable code. Remove it. addPass(createUnreachableBlockEliminationPass()); @@ -422,14 +438,19 @@ void TargetPassConfig::addCodeGenPrepare() { /// Add common passes that perform LLVM IR to IR transforms in preparation for /// instruction selection. void TargetPassConfig::addISelPrepare() { - addPass(createStackProtectorPass(TM)); - addPreISel(); + // Need to verify DebugInfo *before* creating the stack protector analysis. + // It's a function pass, and verifying between it and its users causes a + // crash. + if (!DisableVerify) + addPass(createDebugInfoVerifierPass()); + + addPass(createStackProtectorPass(TM)); + if (PrintISelInput) - addPass(createPrintFunctionPass("\n\n" - "*** Final LLVM Code input to ISel ***\n", - &dbgs())); + addPass(createPrintFunctionPass( + dbgs(), "\n\n*** Final LLVM Code input to ISel ***\n")); // All passes which modify the LLVM IR are now complete; run the verifier // to ensure that the IR is valid. @@ -520,7 +541,10 @@ void TargetPassConfig::addMachinePasses() { // Second pass scheduler. if (getOptLevel() != CodeGenOpt::None) { - addPass(&PostRASchedulerID); + if (MISchedPostRA) + addPass(&PostMachineSchedulerID); + else + addPass(&PostRASchedulerID); printAndVerify("After PostRAScheduler"); } @@ -536,6 +560,8 @@ void TargetPassConfig::addMachinePasses() { if (addPreEmitPass()) printAndVerify("After PreEmit passes"); + + addPass(&StackMapLivenessID); } /// Add passes that optimize machine instructions in SSA form. @@ -596,7 +622,7 @@ MachinePassRegistry RegisterRegAlloc::Registry; /// A dummy default pass factory indicates whether the register allocator is /// overridden on the command line. -static FunctionPass *useDefaultRegisterAllocator() { return 0; } +static FunctionPass *useDefaultRegisterAllocator() { return nullptr; } static RegisterRegAlloc defaultRegAlloc("default", "pick register allocator based on -O option", @@ -725,7 +751,10 @@ void TargetPassConfig::addMachineLateOptimization() { printAndVerify("After BranchFolding"); // Tail duplication. - if (addPass(&TailDuplicateID)) + // Note that duplicating tail just increases code size and degrades + // performance for targets that require Structured Control Flow. + // In addition it can also make CFG irreducible. Thus we disable it. + if (!TM->requiresStructuredCFG() && addPass(&TailDuplicateID)) printAndVerify("After TailDuplicate"); // Copy propagation. diff --git a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp index 28f2d2f..716cb1f 100644 --- a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp +++ b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp @@ -66,7 +66,6 @@ // C = copy A <-- same-bank copy //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "peephole-opt" #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" @@ -81,6 +80,8 @@ #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; +#define DEBUG_TYPE "peephole-opt" + // Optimize Extensions static cl::opt<bool> Aggressive("aggressive-ext-opt", cl::Hidden, @@ -90,6 +91,10 @@ static cl::opt<bool> DisablePeephole("disable-peephole", cl::Hidden, cl::init(false), cl::desc("Disable the peephole optimizer")); +static cl::opt<bool> +DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(true), + cl::desc("Disable advanced copy optimization")); + STATISTIC(NumReuse, "Number of extension results reused"); STATISTIC(NumCmps, "Number of compares eliminated"); STATISTIC(NumImmFold, "Number of move immediate folded"); @@ -110,9 +115,9 @@ namespace { initializePeepholeOptimizerPass(*PassRegistry::getPassRegistry()); } - virtual bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); if (Aggressive) { @@ -133,7 +138,107 @@ namespace { bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB, SmallSet<unsigned, 4> &ImmDefRegs, DenseMap<unsigned, MachineInstr*> &ImmDefMIs); - bool isLoadFoldable(MachineInstr *MI, unsigned &FoldAsLoadDefReg); + bool isLoadFoldable(MachineInstr *MI, + SmallSet<unsigned, 16> &FoldAsLoadDefCandidates); + }; + + /// \brief Helper class to track the possible sources of a value defined by + /// a (chain of) copy related instructions. + /// Given a definition (instruction and definition index), this class + /// follows the use-def chain to find successive suitable sources. + /// The given source can be used to rewrite the definition into + /// def = COPY src. + /// + /// For instance, let us consider the following snippet: + /// v0 = + /// v2 = INSERT_SUBREG v1, v0, sub0 + /// def = COPY v2.sub0 + /// + /// Using a ValueTracker for def = COPY v2.sub0 will give the following + /// suitable sources: + /// v2.sub0 and v0. + /// Then, def can be rewritten into def = COPY v0. + class ValueTracker { + private: + /// The current point into the use-def chain. + const MachineInstr *Def; + /// The index of the definition in Def. + unsigned DefIdx; + /// The sub register index of the definition. + unsigned DefSubReg; + /// The register where the value can be found. + unsigned Reg; + /// Specifiy whether or not the value tracking looks through + /// complex instructions. When this is false, the value tracker + /// bails on everything that is not a copy or a bitcast. + /// + /// Note: This could have been implemented as a specialized version of + /// the ValueTracker class but that would have complicated the code of + /// the users of this class. + bool UseAdvancedTracking; + /// Optional MachineRegisterInfo used to perform some complex + /// tracking. + const MachineRegisterInfo *MRI; + + /// \brief Dispatcher to the right underlying implementation of + /// getNextSource. + bool getNextSourceImpl(unsigned &SrcIdx, unsigned &SrcSubReg); + /// \brief Specialized version of getNextSource for Copy instructions. + bool getNextSourceFromCopy(unsigned &SrcIdx, unsigned &SrcSubReg); + /// \brief Specialized version of getNextSource for Bitcast instructions. + bool getNextSourceFromBitcast(unsigned &SrcIdx, unsigned &SrcSubReg); + /// \brief Specialized version of getNextSource for RegSequence + /// instructions. + bool getNextSourceFromRegSequence(unsigned &SrcIdx, unsigned &SrcSubReg); + /// \brief Specialized version of getNextSource for InsertSubreg + /// instructions. + bool getNextSourceFromInsertSubreg(unsigned &SrcIdx, unsigned &SrcSubReg); + /// \brief Specialized version of getNextSource for ExtractSubreg + /// instructions. + bool getNextSourceFromExtractSubreg(unsigned &SrcIdx, unsigned &SrcSubReg); + /// \brief Specialized version of getNextSource for SubregToReg + /// instructions. + bool getNextSourceFromSubregToReg(unsigned &SrcIdx, unsigned &SrcSubReg); + + public: + /// \brief Create a ValueTracker instance for the value defines by \p MI + /// at the operand index \p DefIdx. + /// \p DefSubReg represents the sub register index the value tracker will + /// track. It does not need to match the sub register index used in \p MI. + /// \p UseAdvancedTracking specifies whether or not the value tracker looks + /// through complex instructions. By default (false), it handles only copy + /// and bitcast instructions. + /// \p MRI useful to perform some complex checks. + ValueTracker(const MachineInstr &MI, unsigned DefIdx, unsigned DefSubReg, + bool UseAdvancedTracking = false, + const MachineRegisterInfo *MRI = nullptr) + : Def(&MI), DefIdx(DefIdx), DefSubReg(DefSubReg), + UseAdvancedTracking(UseAdvancedTracking), MRI(MRI) { + assert(Def->getOperand(DefIdx).isDef() && + Def->getOperand(DefIdx).isReg() && + "Definition does not match machine instruction"); + // Initially the value is in the defined register. + Reg = Def->getOperand(DefIdx).getReg(); + } + + /// \brief Following the use-def chain, get the next available source + /// for the tracked value. + /// When the returned value is not nullptr, getReg() gives the register + /// that contain the tracked value. + /// \note The sub register index returned in \p SrcSubReg must be used + /// on that getReg() to access the actual value. + /// \return Unless the returned value is nullptr (i.e., no source found), + /// \p SrcIdx gives the index of the next source in the returned + /// instruction and \p SrcSubReg the index to be used on that source to + /// get the tracked value. When nullptr is returned, no alternative source + /// has been found. + const MachineInstr *getNextSource(unsigned &SrcIdx, unsigned &SrcSubReg); + + /// \brief Get the last register where the initial value can be found. + /// Initially this is the register of the definition. + /// Then, after each successful call to getNextSource, this is the + /// register of the last source. + unsigned getReg() const { return Reg; } }; } @@ -182,15 +287,13 @@ optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, // If UseSrcSubIdx is Set, SubIdx also applies to SrcReg, and only uses of // SrcReg:SubIdx should be replaced. bool UseSrcSubIdx = TM->getRegisterInfo()-> - getSubClassWithSubReg(MRI->getRegClass(SrcReg), SubIdx) != 0; + getSubClassWithSubReg(MRI->getRegClass(SrcReg), SubIdx) != nullptr; // The source has other uses. See if we can replace the other uses with use of // the result of the extension. SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs; - for (MachineRegisterInfo::use_nodbg_iterator - UI = MRI->use_nodbg_begin(DstReg), UE = MRI->use_nodbg_end(); - UI != UE; ++UI) - ReachedBBs.insert(UI->getParent()); + for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg)) + ReachedBBs.insert(UI.getParent()); // Uses that are in the same BB of uses of the result of the instruction. SmallVector<MachineOperand*, 8> Uses; @@ -199,11 +302,8 @@ optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, SmallVector<MachineOperand*, 8> ExtendedUses; bool ExtendLife = true; - for (MachineRegisterInfo::use_nodbg_iterator - UI = MRI->use_nodbg_begin(SrcReg), UE = MRI->use_nodbg_end(); - UI != UE; ++UI) { - MachineOperand &UseMO = UI.getOperand(); - MachineInstr *UseMI = &*UI; + for (MachineOperand &UseMO : MRI->use_nodbg_operands(SrcReg)) { + MachineInstr *UseMI = UseMO.getParent(); if (UseMI == MI) continue; @@ -270,11 +370,9 @@ optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, // Look for PHI uses of the extended result, we don't want to extend the // liveness of a PHI input. It breaks all kinds of assumptions down // stream. A PHI use is expected to be the kill of its source values. - for (MachineRegisterInfo::use_nodbg_iterator - UI = MRI->use_nodbg_begin(DstReg), UE = MRI->use_nodbg_end(); - UI != UE; ++UI) - if (UI->isPHI()) - PHIBBs.insert(UI->getParent()); + for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg)) + if (UI.isPHI()) + PHIBBs.insert(UI.getParent()); const TargetRegisterClass *RC = MRI->getRegClass(SrcReg); for (unsigned i = 0, e = Uses.size(); i != e; ++i) { @@ -364,7 +462,7 @@ static bool shareSameRegisterFile(const TargetRegisterInfo &TRI, unsigned SrcIdx, DefIdx; if (SrcSubReg && DefSubReg) return TRI.getCommonSuperRegClass(SrcRC, SrcSubReg, DefRC, DefSubReg, - SrcIdx, DefIdx) != NULL; + SrcIdx, DefIdx) != nullptr; // At most one of the register is a sub register, make it Src to avoid // duplicating the test. if (!SrcSubReg) { @@ -374,9 +472,9 @@ static bool shareSameRegisterFile(const TargetRegisterInfo &TRI, // One of the register is a sub register, check if we can get a superclass. if (SrcSubReg) - return TRI.getMatchingSuperRegClass(SrcRC, DefRC, SrcSubReg) != NULL; + return TRI.getMatchingSuperRegClass(SrcRC, DefRC, SrcSubReg) != nullptr; // Plain copy. - return TRI.getCommonSubClass(DefRC, SrcRC) != NULL; + return TRI.getCommonSubClass(DefRC, SrcRC) != nullptr; } /// \brief Get the index of the definition and source for \p Copy @@ -448,31 +546,32 @@ bool PeepholeOptimizer::optimizeCopyOrBitcast(MachineInstr *MI) { unsigned Src; unsigned SrcSubReg; bool ShouldRewrite = false; - MachineInstr *Copy = MI; const TargetRegisterInfo &TRI = *TM->getRegisterInfo(); - // Follow the chain of copies until we reach the top or find a - // more suitable source. + // Follow the chain of copies until we reach the top of the use-def chain + // or find a more suitable source. + ValueTracker ValTracker(*MI, DefIdx, DefSubReg, !DisableAdvCopyOpt, MRI); do { - unsigned CopyDefIdx, CopySrcIdx; - if (!getCopyOrBitcastDefUseIdx(*Copy, CopyDefIdx, CopySrcIdx)) + unsigned CopySrcIdx, CopySrcSubReg; + if (!ValTracker.getNextSource(CopySrcIdx, CopySrcSubReg)) break; - const MachineOperand &MO = Copy->getOperand(CopySrcIdx); - assert(MO.isReg() && "Copies must be between registers."); - Src = MO.getReg(); - + Src = ValTracker.getReg(); + SrcSubReg = CopySrcSubReg; + + // Do not extend the live-ranges of physical registers as they add + // constraints to the register allocator. + // Moreover, if we want to extend the live-range of a physical register, + // unlike SSA virtual register, we will have to check that they are not + // redefine before the related use. if (TargetRegisterInfo::isPhysicalRegister(Src)) break; const TargetRegisterClass *SrcRC = MRI->getRegClass(Src); - SrcSubReg = MO.getSubReg(); // If this source does not incur a cross register bank copy, use it. ShouldRewrite = shareSameRegisterFile(TRI, DefRC, DefSubReg, SrcRC, SrcSubReg); - // Follow the chain of copies: get the definition of Src. - Copy = MRI->getVRegDef(Src); - } while (!ShouldRewrite && Copy && (Copy->isCopy() || Copy->isBitcast())); + } while (!ShouldRewrite); // If we did not find a more suitable source, there is nothing to optimize. if (!ShouldRewrite || Src == MI->getOperand(SrcIdx).getReg()) @@ -488,6 +587,9 @@ bool PeepholeOptimizer::optimizeCopyOrBitcast(MachineInstr *MI) { MRI->replaceRegWith(Def, NewVR); MRI->clearKillFlags(NewVR); + // We extended the lifetime of Src. + // Clear the kill flags to account for that. + MRI->clearKillFlags(Src); MI->eraseFromParent(); ++NumCopiesBitcasts; return true; @@ -496,8 +598,9 @@ bool PeepholeOptimizer::optimizeCopyOrBitcast(MachineInstr *MI) { /// isLoadFoldable - Check whether MI is a candidate for folding into a later /// instruction. We only fold loads to virtual registers and the virtual /// register defined has a single use. -bool PeepholeOptimizer::isLoadFoldable(MachineInstr *MI, - unsigned &FoldAsLoadDefReg) { +bool PeepholeOptimizer::isLoadFoldable( + MachineInstr *MI, + SmallSet<unsigned, 16> &FoldAsLoadDefCandidates) { if (!MI->canFoldAsLoad() || !MI->mayLoad()) return false; const MCInstrDesc &MCID = MI->getDesc(); @@ -505,13 +608,13 @@ bool PeepholeOptimizer::isLoadFoldable(MachineInstr *MI, return false; unsigned Reg = MI->getOperand(0).getReg(); - // To reduce compilation time, we check MRI->hasOneUse when inserting + // To reduce compilation time, we check MRI->hasOneNonDBGUse when inserting // loads. It should be checked when processing uses of the load, since // uses can be removed during peephole. if (!MI->getOperand(0).getSubReg() && TargetRegisterInfo::isVirtualRegister(Reg) && - MRI->hasOneUse(Reg)) { - FoldAsLoadDefReg = Reg; + MRI->hasOneNonDBGUse(Reg)) { + FoldAsLoadDefCandidates.insert(Reg); return true; } return false; @@ -561,6 +664,9 @@ bool PeepholeOptimizer::foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB, } bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { + if (skipOptnoneFunction(*MF.getFunction())) + return false; + DEBUG(dbgs() << "********** PEEPHOLE OPTIMIZER **********\n"); DEBUG(dbgs() << "********** Function: " << MF.getName() << '\n'); @@ -570,22 +676,18 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { TM = &MF.getTarget(); TII = TM->getInstrInfo(); MRI = &MF.getRegInfo(); - DT = Aggressive ? &getAnalysis<MachineDominatorTree>() : 0; + DT = Aggressive ? &getAnalysis<MachineDominatorTree>() : nullptr; bool Changed = false; - SmallPtrSet<MachineInstr*, 8> LocalMIs; - SmallSet<unsigned, 4> ImmDefRegs; - DenseMap<unsigned, MachineInstr*> ImmDefMIs; - unsigned FoldAsLoadDefReg; for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { MachineBasicBlock *MBB = &*I; bool SeenMoveImm = false; - LocalMIs.clear(); - ImmDefRegs.clear(); - ImmDefMIs.clear(); - FoldAsLoadDefReg = 0; + SmallPtrSet<MachineInstr*, 8> LocalMIs; + SmallSet<unsigned, 4> ImmDefRegs; + DenseMap<unsigned, MachineInstr*> ImmDefMIs; + SmallSet<unsigned, 16> FoldAsLoadDefCandidates; for (MachineBasicBlock::iterator MII = I->begin(), MIE = I->end(); MII != MIE; ) { @@ -594,16 +696,20 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { ++MII; LocalMIs.insert(MI); + // Skip debug values. They should not affect this peephole optimization. + if (MI->isDebugValue()) + continue; + // If there exists an instruction which belongs to the following - // categories, we will discard the load candidate. - if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() || - MI->isKill() || MI->isInlineAsm() || MI->isDebugValue() || + // categories, we will discard the load candidates. + if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || + MI->isKill() || MI->isInlineAsm() || MI->hasUnmodeledSideEffects()) { - FoldAsLoadDefReg = 0; + FoldAsLoadDefCandidates.clear(); continue; } if (MI->mayStore() || MI->isCall()) - FoldAsLoadDefReg = 0; + FoldAsLoadDefCandidates.clear(); if (((MI->isBitcast() || MI->isCopy()) && optimizeCopyOrBitcast(MI)) || (MI->isCompare() && optimizeCmpInstr(MI, MBB)) || @@ -630,26 +736,43 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { // Check whether MI is a load candidate for folding into a later // instruction. If MI is not a candidate, check whether we can fold an // earlier load into MI. - if (!isLoadFoldable(MI, FoldAsLoadDefReg) && FoldAsLoadDefReg) { - // We need to fold load after optimizeCmpInstr, since optimizeCmpInstr - // can enable folding by converting SUB to CMP. - MachineInstr *DefMI = 0; - MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI, - FoldAsLoadDefReg, DefMI); - if (FoldMI) { - // Update LocalMIs since we replaced MI with FoldMI and deleted DefMI. - DEBUG(dbgs() << "Replacing: " << *MI); - DEBUG(dbgs() << " With: " << *FoldMI); - LocalMIs.erase(MI); - LocalMIs.erase(DefMI); - LocalMIs.insert(FoldMI); - MI->eraseFromParent(); - DefMI->eraseFromParent(); - ++NumLoadFold; - - // MI is replaced with FoldMI. - Changed = true; - continue; + if (!isLoadFoldable(MI, FoldAsLoadDefCandidates) && + !FoldAsLoadDefCandidates.empty()) { + const MCInstrDesc &MIDesc = MI->getDesc(); + for (unsigned i = MIDesc.getNumDefs(); i != MIDesc.getNumOperands(); + ++i) { + const MachineOperand &MOp = MI->getOperand(i); + if (!MOp.isReg()) + continue; + unsigned FoldAsLoadDefReg = MOp.getReg(); + if (FoldAsLoadDefCandidates.count(FoldAsLoadDefReg)) { + // We need to fold load after optimizeCmpInstr, since + // optimizeCmpInstr can enable folding by converting SUB to CMP. + // Save FoldAsLoadDefReg because optimizeLoadInstr() resets it and + // we need it for markUsesInDebugValueAsUndef(). + unsigned FoldedReg = FoldAsLoadDefReg; + MachineInstr *DefMI = nullptr; + MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI, + FoldAsLoadDefReg, + DefMI); + if (FoldMI) { + // Update LocalMIs since we replaced MI with FoldMI and deleted + // DefMI. + DEBUG(dbgs() << "Replacing: " << *MI); + DEBUG(dbgs() << " With: " << *FoldMI); + LocalMIs.erase(MI); + LocalMIs.erase(DefMI); + LocalMIs.insert(FoldMI); + MI->eraseFromParent(); + DefMI->eraseFromParent(); + MRI->markUsesInDebugValueAsUndef(FoldedReg); + FoldAsLoadDefCandidates.erase(FoldedReg); + ++NumLoadFold; + // MI is replaced with FoldMI. + Changed = true; + break; + } + } } } } @@ -657,3 +780,251 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { return Changed; } + +bool ValueTracker::getNextSourceFromCopy(unsigned &SrcIdx, + unsigned &SrcSubReg) { + assert(Def->isCopy() && "Invalid definition"); + // Copy instruction are supposed to be: Def = Src. + // If someone breaks this assumption, bad things will happen everywhere. + assert(Def->getDesc().getNumOperands() == 2 && "Invalid number of operands"); + + if (Def->getOperand(DefIdx).getSubReg() != DefSubReg) + // If we look for a different subreg, it means we want a subreg of src. + // Bails as we do not support composing subreg yet. + return false; + // Otherwise, we want the whole source. + SrcIdx = 1; + SrcSubReg = Def->getOperand(SrcIdx).getSubReg(); + return true; +} + +bool ValueTracker::getNextSourceFromBitcast(unsigned &SrcIdx, + unsigned &SrcSubReg) { + assert(Def->isBitcast() && "Invalid definition"); + + // Bail if there are effects that a plain copy will not expose. + if (Def->hasUnmodeledSideEffects()) + return false; + + // Bitcasts with more than one def are not supported. + if (Def->getDesc().getNumDefs() != 1) + return false; + if (Def->getOperand(DefIdx).getSubReg() != DefSubReg) + // If we look for a different subreg, it means we want a subreg of the src. + // Bails as we do not support composing subreg yet. + return false; + + SrcIdx = Def->getDesc().getNumOperands(); + for (unsigned OpIdx = DefIdx + 1, EndOpIdx = SrcIdx; OpIdx != EndOpIdx; + ++OpIdx) { + const MachineOperand &MO = Def->getOperand(OpIdx); + if (!MO.isReg() || !MO.getReg()) + continue; + assert(!MO.isDef() && "We should have skipped all the definitions by now"); + if (SrcIdx != EndOpIdx) + // Multiple sources? + return false; + SrcIdx = OpIdx; + } + SrcSubReg = Def->getOperand(SrcIdx).getSubReg(); + return true; +} + +bool ValueTracker::getNextSourceFromRegSequence(unsigned &SrcIdx, + unsigned &SrcSubReg) { + assert(Def->isRegSequence() && "Invalid definition"); + + if (Def->getOperand(DefIdx).getSubReg()) + // If we are composing subreg, bails out. + // The case we are checking is Def.<subreg> = REG_SEQUENCE. + // This should almost never happen as the SSA property is tracked at + // the register level (as opposed to the subreg level). + // I.e., + // Def.sub0 = + // Def.sub1 = + // is a valid SSA representation for Def.sub0 and Def.sub1, but not for + // Def. Thus, it must not be generated. + // However, some code could theoretically generates a single + // Def.sub0 (i.e, not defining the other subregs) and we would + // have this case. + // If we can ascertain (or force) that this never happens, we could + // turn that into an assertion. + return false; + + // We are looking at: + // Def = REG_SEQUENCE v0, sub0, v1, sub1, ... + // Check if one of the operand defines the subreg we are interested in. + for (unsigned OpIdx = DefIdx + 1, EndOpIdx = Def->getNumOperands(); + OpIdx != EndOpIdx; OpIdx += 2) { + const MachineOperand &MOSubIdx = Def->getOperand(OpIdx + 1); + assert(MOSubIdx.isImm() && + "One of the subindex of the reg_sequence is not an immediate"); + if (MOSubIdx.getImm() == DefSubReg) { + assert(Def->getOperand(OpIdx).isReg() && + "One of the source of the reg_sequence is not a register"); + SrcIdx = OpIdx; + SrcSubReg = Def->getOperand(SrcIdx).getSubReg(); + return true; + } + } + + // If the subreg we are tracking is super-defined by another subreg, + // we could follow this value. However, this would require to compose + // the subreg and we do not do that for now. + return false; +} + +bool ValueTracker::getNextSourceFromInsertSubreg(unsigned &SrcIdx, + unsigned &SrcSubReg) { + assert(Def->isInsertSubreg() && "Invalid definition"); + if (Def->getOperand(DefIdx).getSubReg()) + // If we are composing subreg, bails out. + // Same remark as getNextSourceFromRegSequence. + // I.e., this may be turned into an assert. + return false; + + // We are looking at: + // Def = INSERT_SUBREG v0, v1, sub1 + // There are two cases: + // 1. DefSubReg == sub1, get v1. + // 2. DefSubReg != sub1, the value may be available through v0. + + // #1 Check if the inserted register matches the require sub index. + unsigned InsertedSubReg = Def->getOperand(3).getImm(); + if (InsertedSubReg == DefSubReg) { + SrcIdx = 2; + SrcSubReg = Def->getOperand(SrcIdx).getSubReg(); + return true; + } + // #2 Otherwise, if the sub register we are looking for is not partial + // defined by the inserted element, we can look through the main + // register (v0). + // To check the overlapping we need a MRI and a TRI. + if (!MRI) + return false; + + const MachineOperand &MODef = Def->getOperand(DefIdx); + const MachineOperand &MOBase = Def->getOperand(1); + // If the result register (Def) and the base register (v0) do not + // have the same register class or if we have to compose + // subregisters, bails out. + if (MRI->getRegClass(MODef.getReg()) != MRI->getRegClass(MOBase.getReg()) || + MOBase.getSubReg()) + return false; + + // Get the TRI and check if inserted sub register overlaps with the + // sub register we are tracking. + const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo(); + if (!TRI || + (TRI->getSubRegIndexLaneMask(DefSubReg) & + TRI->getSubRegIndexLaneMask(InsertedSubReg)) != 0) + return false; + // At this point, the value is available in v0 via the same subreg + // we used for Def. + SrcIdx = 1; + SrcSubReg = DefSubReg; + return true; +} + +bool ValueTracker::getNextSourceFromExtractSubreg(unsigned &SrcIdx, + unsigned &SrcSubReg) { + assert(Def->isExtractSubreg() && "Invalid definition"); + // We are looking at: + // Def = EXTRACT_SUBREG v0, sub0 + + // Bails if we have to compose sub registers. + // Indeed, if DefSubReg != 0, we would have to compose it with sub0. + if (DefSubReg) + return false; + + // Bails if we have to compose sub registers. + // Likewise, if v0.subreg != 0, we would have to compose v0.subreg with sub0. + if (Def->getOperand(1).getSubReg()) + return false; + // Otherwise, the value is available in the v0.sub0. + SrcIdx = 1; + SrcSubReg = Def->getOperand(2).getImm(); + return true; +} + +bool ValueTracker::getNextSourceFromSubregToReg(unsigned &SrcIdx, + unsigned &SrcSubReg) { + assert(Def->isSubregToReg() && "Invalid definition"); + // We are looking at: + // Def = SUBREG_TO_REG Imm, v0, sub0 + + // Bails if we have to compose sub registers. + // If DefSubReg != sub0, we would have to check that all the bits + // we track are included in sub0 and if yes, we would have to + // determine the right subreg in v0. + if (DefSubReg != Def->getOperand(3).getImm()) + return false; + // Bails if we have to compose sub registers. + // Likewise, if v0.subreg != 0, we would have to compose it with sub0. + if (Def->getOperand(2).getSubReg()) + return false; + + SrcIdx = 2; + SrcSubReg = Def->getOperand(3).getImm(); + return true; +} + +bool ValueTracker::getNextSourceImpl(unsigned &SrcIdx, unsigned &SrcSubReg) { + assert(Def && "This method needs a valid definition"); + + assert( + (DefIdx < Def->getDesc().getNumDefs() || Def->getDesc().isVariadic()) && + Def->getOperand(DefIdx).isDef() && "Invalid DefIdx"); + if (Def->isCopy()) + return getNextSourceFromCopy(SrcIdx, SrcSubReg); + if (Def->isBitcast()) + return getNextSourceFromBitcast(SrcIdx, SrcSubReg); + // All the remaining cases involve "complex" instructions. + // Bails if we did not ask for the advanced tracking. + if (!UseAdvancedTracking) + return false; + if (Def->isRegSequence()) + return getNextSourceFromRegSequence(SrcIdx, SrcSubReg); + if (Def->isInsertSubreg()) + return getNextSourceFromInsertSubreg(SrcIdx, SrcSubReg); + if (Def->isExtractSubreg()) + return getNextSourceFromExtractSubreg(SrcIdx, SrcSubReg); + if (Def->isSubregToReg()) + return getNextSourceFromSubregToReg(SrcIdx, SrcSubReg); + return false; +} + +const MachineInstr *ValueTracker::getNextSource(unsigned &SrcIdx, + unsigned &SrcSubReg) { + // If we reach a point where we cannot move up in the use-def chain, + // there is nothing we can get. + if (!Def) + return nullptr; + + const MachineInstr *PrevDef = nullptr; + // Try to find the next source. + if (getNextSourceImpl(SrcIdx, SrcSubReg)) { + // Update definition, definition index, and subregister for the + // next call of getNextSource. + const MachineOperand &MO = Def->getOperand(SrcIdx); + assert(MO.isReg() && !MO.isDef() && "Source is invalid"); + // Update the current register. + Reg = MO.getReg(); + // Update the return value before moving up in the use-def chain. + PrevDef = Def; + // If we can still move up in the use-def chain, move to the next + // defintion. + if (!TargetRegisterInfo::isPhysicalRegister(Reg)) { + Def = MRI->getVRegDef(Reg); + DefIdx = MRI->def_begin(Reg).getOperandNo(); + DefSubReg = SrcSubReg; + return PrevDef; + } + } + // If we end up here, this means we will not be able to find another source + // for the next iteration. + // Make sure any new call to getNextSource bails out early by cutting the + // use-def chain. + Def = nullptr; + return PrevDef; +} diff --git a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp index 1afc1ec..a1ab344 100644 --- a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp +++ b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp @@ -18,7 +18,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "post-RA-sched" #include "llvm/CodeGen/Passes.h" #include "AggressiveAntiDepBreaker.h" #include "AntiDepBreaker.h" @@ -30,7 +29,6 @@ #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterClassInfo.h" @@ -48,6 +46,8 @@ #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; +#define DEBUG_TYPE "post-RA-sched" + STATISTIC(NumNoops, "Number of noops inserted"); STATISTIC(NumStalls, "Number of pipeline stalls"); STATISTIC(NumFixedAnti, "Number of fixed anti-dependencies"); @@ -86,7 +86,7 @@ namespace { static char ID; PostRAScheduler() : MachineFunctionPass(ID) {} - void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); AU.addRequired<AliasAnalysis>(); AU.addRequired<TargetPassConfig>(); @@ -97,7 +97,12 @@ namespace { MachineFunctionPass::getAnalysisUsage(AU); } - bool runOnMachineFunction(MachineFunction &Fn); + bool runOnMachineFunction(MachineFunction &Fn) override; + + bool enablePostRAScheduler( + const TargetSubtargetInfo &ST, CodeGenOpt::Level OptLevel, + TargetSubtargetInfo::AntiDepBreakMode &Mode, + TargetSubtargetInfo::RegClassVector &CriticalPathRCs) const; }; char PostRAScheduler::ID = 0; @@ -121,9 +126,6 @@ namespace { /// AA - AliasAnalysis for making memory reference queries. AliasAnalysis *AA; - /// LiveRegs - true if the register is live. - BitVector LiveRegs; - /// The schedule. Null SUnit*'s represent noop instructions. std::vector<SUnit*> Sequence; @@ -145,23 +147,23 @@ namespace { /// startBlock - Initialize register live-range state for scheduling in /// this block. /// - void startBlock(MachineBasicBlock *BB); + void startBlock(MachineBasicBlock *BB) override; // Set the index of RegionEnd within the current BB. void setEndIndex(unsigned EndIdx) { EndIndex = EndIdx; } /// Initialize the scheduler state for the next scheduling region. - virtual void enterRegion(MachineBasicBlock *bb, - MachineBasicBlock::iterator begin, - MachineBasicBlock::iterator end, - unsigned regioninstrs); + void enterRegion(MachineBasicBlock *bb, + MachineBasicBlock::iterator begin, + MachineBasicBlock::iterator end, + unsigned regioninstrs) override; /// Notify that the scheduler has finished scheduling the current region. - virtual void exitRegion(); + void exitRegion() override; /// Schedule - Schedule the instruction range using list scheduling. /// - void schedule(); + void schedule() override; void EmitSchedule(); @@ -172,26 +174,16 @@ namespace { /// finishBlock - Clean up register live-range state. /// - void finishBlock(); - - /// FixupKills - Fix register kill flags that have been made - /// invalid due to scheduling - /// - void FixupKills(MachineBasicBlock *MBB); + void finishBlock() override; private: void ReleaseSucc(SUnit *SU, SDep *SuccEdge); void ReleaseSuccessors(SUnit *SU); void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle); void ListScheduleTopDown(); - void StartBlockForKills(MachineBasicBlock *BB); - - // ToggleKillFlag - Toggle a register operand kill flag. Other - // adjustments may be made to the instruction if necessary. Return - // true if the operand has been deleted, false if not. - bool ToggleKillFlag(MachineInstr *MI, MachineOperand &MO); void dumpSchedule() const; + void emitNoop(unsigned CurCycle); }; } @@ -205,9 +197,8 @@ SchedulePostRATDList::SchedulePostRATDList( AliasAnalysis *AA, const RegisterClassInfo &RCI, TargetSubtargetInfo::AntiDepBreakMode AntiDepMode, SmallVectorImpl<const TargetRegisterClass*> &CriticalPathRCs) - : ScheduleDAGInstrs(MF, MLI, MDT, /*IsPostRA=*/true), AA(AA), - LiveRegs(TRI->getNumRegs()), EndIndex(0) -{ + : ScheduleDAGInstrs(MF, MLI, MDT, /*IsPostRA=*/true), AA(AA), EndIndex(0) { + const TargetMachine &TM = MF.getTarget(); const InstrItineraryData *InstrItins = TM.getInstrItineraryData(); HazardRec = @@ -220,7 +211,7 @@ SchedulePostRATDList::SchedulePostRATDList( ((AntiDepMode == TargetSubtargetInfo::ANTIDEP_ALL) ? (AntiDepBreaker *)new AggressiveAntiDepBreaker(MF, RCI, CriticalPathRCs) : ((AntiDepMode == TargetSubtargetInfo::ANTIDEP_CRITICAL) ? - (AntiDepBreaker *)new CriticalAntiDepBreaker(MF, RCI) : NULL)); + (AntiDepBreaker *)new CriticalAntiDepBreaker(MF, RCI) : nullptr)); } SchedulePostRATDList::~SchedulePostRATDList() { @@ -259,7 +250,21 @@ void SchedulePostRATDList::dumpSchedule() const { } #endif +bool PostRAScheduler::enablePostRAScheduler( + const TargetSubtargetInfo &ST, + CodeGenOpt::Level OptLevel, + TargetSubtargetInfo::AntiDepBreakMode &Mode, + TargetSubtargetInfo::RegClassVector &CriticalPathRCs) const { + Mode = ST.getAntiDepBreakMode(); + ST.getCriticalPathRCs(CriticalPathRCs); + return ST.enablePostMachineScheduler() && + OptLevel >= ST.getOptLevelToEnablePostRAScheduler(); +} + bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { + if (skipOptnoneFunction(*Fn.getFunction())) + return false; + TII = Fn.getTarget().getInstrInfo(); MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>(); MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>(); @@ -278,9 +283,10 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { } else { // Check that post-RA scheduling is enabled for this target. // This may upgrade the AntiDepMode. - const TargetSubtargetInfo &ST = Fn.getTarget().getSubtarget<TargetSubtargetInfo>(); - if (!ST.enablePostRAScheduler(PassConfig->getOptLevel(), AntiDepMode, - CriticalPathRCs)) + const TargetSubtargetInfo &ST = + Fn.getTarget().getSubtarget<TargetSubtargetInfo>(); + if (!enablePostRAScheduler(ST, PassConfig->getOptLevel(), + AntiDepMode, CriticalPathRCs)) return false; } @@ -320,7 +326,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { MachineBasicBlock::iterator Current = MBB->end(); unsigned Count = MBB->size(), CurrentCount = Count; for (MachineBasicBlock::iterator I = Current; I != MBB->begin(); ) { - MachineInstr *MI = llvm::prior(I); + MachineInstr *MI = std::prev(I); --Count; // Calls are not scheduling boundaries before register allocation, but // post-ra we don't gain anything by scheduling across calls since we @@ -352,7 +358,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { Scheduler.finishBlock(); // Update register kills - Scheduler.FixupKills(MBB); + Scheduler.fixupKills(MBB); } return true; @@ -367,7 +373,7 @@ void SchedulePostRATDList::startBlock(MachineBasicBlock *BB) { // Reset the hazard recognizer and anti-dep breaker. HazardRec->Reset(); - if (AntiDepBreak != NULL) + if (AntiDepBreak) AntiDepBreak->StartBlock(BB); } @@ -377,7 +383,7 @@ void SchedulePostRATDList::schedule() { // Build the scheduling graph. buildSchedGraph(AA); - if (AntiDepBreak != NULL) { + if (AntiDepBreak) { unsigned Broken = AntiDepBreak->BreakAntiDependencies(SUnits, RegionBegin, RegionEnd, EndIndex, DbgValues); @@ -409,162 +415,20 @@ void SchedulePostRATDList::schedule() { /// instruction, which will not be scheduled. /// void SchedulePostRATDList::Observe(MachineInstr *MI, unsigned Count) { - if (AntiDepBreak != NULL) + if (AntiDepBreak) AntiDepBreak->Observe(MI, Count, EndIndex); } /// FinishBlock - Clean up register live-range state. /// void SchedulePostRATDList::finishBlock() { - if (AntiDepBreak != NULL) + if (AntiDepBreak) AntiDepBreak->FinishBlock(); // Call the superclass. ScheduleDAGInstrs::finishBlock(); } -/// StartBlockForKills - Initialize register live-range state for updating kills -/// -void SchedulePostRATDList::StartBlockForKills(MachineBasicBlock *BB) { - // Start with no live registers. - LiveRegs.reset(); - - // Examine the live-in regs of all successors. - for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), - SE = BB->succ_end(); SI != SE; ++SI) { - for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), - E = (*SI)->livein_end(); I != E; ++I) { - unsigned Reg = *I; - // Repeat, for reg and all subregs. - for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); - SubRegs.isValid(); ++SubRegs) - LiveRegs.set(*SubRegs); - } - } -} - -bool SchedulePostRATDList::ToggleKillFlag(MachineInstr *MI, - MachineOperand &MO) { - // Setting kill flag... - if (!MO.isKill()) { - MO.setIsKill(true); - return false; - } - - // If MO itself is live, clear the kill flag... - if (LiveRegs.test(MO.getReg())) { - MO.setIsKill(false); - return false; - } - - // If any subreg of MO is live, then create an imp-def for that - // subreg and keep MO marked as killed. - MO.setIsKill(false); - bool AllDead = true; - const unsigned SuperReg = MO.getReg(); - MachineInstrBuilder MIB(MF, MI); - for (MCSubRegIterator SubRegs(SuperReg, TRI); SubRegs.isValid(); ++SubRegs) { - if (LiveRegs.test(*SubRegs)) { - MIB.addReg(*SubRegs, RegState::ImplicitDefine); - AllDead = false; - } - } - - if(AllDead) - MO.setIsKill(true); - return false; -} - -/// FixupKills - Fix the register kill flags, they may have been made -/// incorrect by instruction reordering. -/// -void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) { - DEBUG(dbgs() << "Fixup kills for BB#" << MBB->getNumber() << '\n'); - - BitVector killedRegs(TRI->getNumRegs()); - - StartBlockForKills(MBB); - - // Examine block from end to start... - unsigned Count = MBB->size(); - for (MachineBasicBlock::iterator I = MBB->end(), E = MBB->begin(); - I != E; --Count) { - MachineInstr *MI = --I; - if (MI->isDebugValue()) - continue; - - // Update liveness. Registers that are defed but not used in this - // instruction are now dead. Mark register and all subregs as they - // are completely defined. - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (MO.isRegMask()) - LiveRegs.clearBitsNotInMask(MO.getRegMask()); - if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (Reg == 0) continue; - if (!MO.isDef()) continue; - // Ignore two-addr defs. - if (MI->isRegTiedToUseOperand(i)) continue; - - // Repeat for reg and all subregs. - for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); - SubRegs.isValid(); ++SubRegs) - LiveRegs.reset(*SubRegs); - } - - // Examine all used registers and set/clear kill flag. When a - // register is used multiple times we only set the kill flag on - // the first use. Don't set kill flags on undef operands. - killedRegs.reset(); - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue; - unsigned Reg = MO.getReg(); - if ((Reg == 0) || MRI.isReserved(Reg)) continue; - - bool kill = false; - if (!killedRegs.test(Reg)) { - kill = true; - // A register is not killed if any subregs are live... - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { - if (LiveRegs.test(*SubRegs)) { - kill = false; - break; - } - } - - // If subreg is not live, then register is killed if it became - // live in this instruction - if (kill) - kill = !LiveRegs.test(Reg); - } - - if (MO.isKill() != kill) { - DEBUG(dbgs() << "Fixing " << MO << " in "); - // Warning: ToggleKillFlag may invalidate MO. - ToggleKillFlag(MI, MO); - DEBUG(MI->dump()); - } - - killedRegs.set(Reg); - } - - // Mark any used register (that is not using undef) and subregs as - // now live... - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue; - unsigned Reg = MO.getReg(); - if ((Reg == 0) || MRI.isReserved(Reg)) continue; - - for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); - SubRegs.isValid(); ++SubRegs) - LiveRegs.set(*SubRegs); - } - } -} - //===----------------------------------------------------------------------===// // Top-Down Scheduling //===----------------------------------------------------------------------===// @@ -583,7 +447,7 @@ void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) { dbgs() << "*** Scheduling failed! ***\n"; SuccSU->dump(this); dbgs() << " has been released too many times!\n"; - llvm_unreachable(0); + llvm_unreachable(nullptr); } #endif --SuccSU->NumPredsLeft; @@ -630,6 +494,14 @@ void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { AvailableQueue.scheduledNode(SU); } +/// emitNoop - Add a noop to the current instruction sequence. +void SchedulePostRATDList::emitNoop(unsigned CurCycle) { + DEBUG(dbgs() << "*** Emitting noop in cycle " << CurCycle << '\n'); + HazardRec->EmitNoop(); + Sequence.push_back(nullptr); // NULL here means noop + ++NumNoops; +} + /// ListScheduleTopDown - The main loop of list scheduling for top-down /// schedulers. void SchedulePostRATDList::ListScheduleTopDown() { @@ -678,7 +550,7 @@ void SchedulePostRATDList::ListScheduleTopDown() { DEBUG(dbgs() << "\n*** Examining Available\n"; AvailableQueue.dump(this)); - SUnit *FoundSUnit = 0; + SUnit *FoundSUnit = nullptr, *NotPreferredSUnit = nullptr; bool HasNoopHazards = false; while (!AvailableQueue.empty()) { SUnit *CurSUnit = AvailableQueue.pop(); @@ -686,8 +558,19 @@ void SchedulePostRATDList::ListScheduleTopDown() { ScheduleHazardRecognizer::HazardType HT = HazardRec->getHazardType(CurSUnit, 0/*no stalls*/); if (HT == ScheduleHazardRecognizer::NoHazard) { - FoundSUnit = CurSUnit; - break; + if (HazardRec->ShouldPreferAnother(CurSUnit)) { + if (!NotPreferredSUnit) { + // If this is the first non-preferred node for this cycle, then + // record it and continue searching for a preferred node. If this + // is not the first non-preferred node, then treat it as though + // there had been a hazard. + NotPreferredSUnit = CurSUnit; + continue; + } + } else { + FoundSUnit = CurSUnit; + break; + } } // Remember if this is a noop hazard. @@ -696,6 +579,20 @@ void SchedulePostRATDList::ListScheduleTopDown() { NotReady.push_back(CurSUnit); } + // If we have a non-preferred node, push it back onto the available list. + // If we did not find a preferred node, then schedule this first + // non-preferred node. + if (NotPreferredSUnit) { + if (!FoundSUnit) { + DEBUG(dbgs() << "*** Will schedule a non-preferred instruction...\n"); + FoundSUnit = NotPreferredSUnit; + } else { + AvailableQueue.push(NotPreferredSUnit); + } + + NotPreferredSUnit = nullptr; + } + // Add the nodes that aren't ready back onto the available list. if (!NotReady.empty()) { AvailableQueue.push_all(NotReady); @@ -704,6 +601,11 @@ void SchedulePostRATDList::ListScheduleTopDown() { // If we found a node to schedule... if (FoundSUnit) { + // If we need to emit noops prior to this instruction, then do so. + unsigned NumPreNoops = HazardRec->PreEmitNoops(FoundSUnit); + for (unsigned i = 0; i != NumPreNoops; ++i) + emitNoop(CurCycle); + // ... schedule the node... ScheduleNodeTopDown(FoundSUnit, CurCycle); HazardRec->EmitInstruction(FoundSUnit); @@ -728,10 +630,7 @@ void SchedulePostRATDList::ListScheduleTopDown() { // Otherwise, we have no instructions to issue and we have instructions // that will fault if we don't do this right. This is the case for // processors without pipeline interlocks and other cases. - DEBUG(dbgs() << "*** Emitting noop in cycle " << CurCycle << '\n'); - HazardRec->EmitNoop(); - Sequence.push_back(0); // NULL here means noop - ++NumNoops; + emitNoop(CurCycle); } ++CurCycle; @@ -769,17 +668,17 @@ void SchedulePostRATDList::EmitSchedule() { // Update the Begin iterator, as the first instruction in the block // may have been scheduled later. if (i == 0) - RegionBegin = prior(RegionEnd); + RegionBegin = std::prev(RegionEnd); } // Reinsert any remaining debug_values. for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) { - std::pair<MachineInstr *, MachineInstr *> P = *prior(DI); + std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI); MachineInstr *DbgValue = P.first; MachineBasicBlock::iterator OrigPrivMI = P.second; BB->splice(++OrigPrivMI, BB, DbgValue); } DbgValues.clear(); - FirstDbgValue = NULL; + FirstDbgValue = nullptr; } diff --git a/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp b/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp index 0c5173a..3129927 100644 --- a/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp +++ b/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp @@ -7,8 +7,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "processimplicitdefs" - #include "llvm/ADT/SetVector.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -21,6 +19,8 @@ using namespace llvm; +#define DEBUG_TYPE "processimplicitdefs" + namespace { /// Process IMPLICIT_DEF instructions and make sure there is one implicit_def /// for each use. Add isUndef marker to implicit_def defs and their uses. @@ -41,9 +41,9 @@ public: initializeProcessImplicitDefsPass(*PassRegistry::getPassRegistry()); } - virtual void getAnalysisUsage(AnalysisUsage &au) const; + void getAnalysisUsage(AnalysisUsage &au) const override; - virtual bool runOnMachineFunction(MachineFunction &fn); + bool runOnMachineFunction(MachineFunction &fn) override; }; } // end anonymous namespace @@ -80,10 +80,7 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) { if (TargetRegisterInfo::isVirtualRegister(Reg)) { // For virtual registers, mark all uses as <undef>, and convert users to // implicit-def when possible. - for (MachineRegisterInfo::use_nodbg_iterator UI = - MRI->use_nodbg_begin(Reg), - UE = MRI->use_nodbg_end(); UI != UE; ++UI) { - MachineOperand &MO = UI.getOperand(); + for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) { MO.setIsUndef(); MachineInstr *UserMI = MO.getParent(); if (!canTurnIntoImplicitDef(UserMI)) diff --git a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp index b0e494f..b98d210 100644 --- a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -16,10 +16,10 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "pei" #include "PrologEpilogInserter.h" #include "llvm/ADT/IndexedMap.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineDominators.h" @@ -29,7 +29,10 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/CodeGen/StackProtector.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/InlineAsm.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" @@ -42,6 +45,8 @@ using namespace llvm; +#define DEBUG_TYPE "pei" + char PEI::ID = 0; char &llvm::PrologEpilogCodeInserterID = PEI::ID; @@ -54,6 +59,7 @@ INITIALIZE_PASS_BEGIN(PEI, "prologepilog", "Prologue/Epilogue Insertion", false, false) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(StackProtector) INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) INITIALIZE_PASS_END(PEI, "prologepilog", "Prologue/Epilogue Insertion & Frame Finalization", @@ -67,6 +73,7 @@ void PEI::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addPreserved<MachineLoopInfo>(); AU.addPreserved<MachineDominatorTree>(); + AU.addRequired<StackProtector>(); AU.addRequired<TargetPassConfig>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -95,6 +102,9 @@ void PEI::calculateSets(MachineFunction &Fn) { return; } +/// StackObjSet - A set of stack object indexes +typedef SmallSetVector<int, 8> StackObjSet; + /// runOnMachineFunction - Insert prolog/epilog code and replace abstract /// frame indexes with appropriate references. /// @@ -105,7 +115,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { assert(!Fn.getRegInfo().getNumVirtRegs() && "Regalloc must assign all vregs"); - RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : NULL; + RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : nullptr; FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn); // Calculate the MaxCallFrameSize and AdjustsStack variables for the @@ -150,7 +160,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { replaceFrameIndices(Fn); // If register scavenging is needed, as we've enabled doing it as a - // post-pass, scavenge the virtual registers that frame index elimiation + // post-pass, scavenge the virtual registers that frame index elimination // inserted. if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging) scavengeFrameVirtualRegs(Fn); @@ -160,10 +170,11 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { // Warn on stack size when we exceeds the given limit. MachineFrameInfo *MFI = Fn.getFrameInfo(); - if (WarnStackSize.getNumOccurrences() > 0 && - WarnStackSize < MFI->getStackSize()) - errs() << "warning: Stack size limit exceeded (" << MFI->getStackSize() - << ") in " << Fn.getName() << ".\n"; + uint64_t StackSize = MFI->getStackSize(); + if (WarnStackSize.getNumOccurrences() > 0 && WarnStackSize < StackSize) { + DiagnosticInfoStackSize DiagStackSize(*F, StackSize); + F->getContext().diagnose(DiagStackSize); + } delete RS; ReturnBlocks.clear(); @@ -233,14 +244,14 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &F) { MachineFrameInfo *MFI = F.getFrameInfo(); // Get the callee saved register list... - const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&F); + const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&F); // These are used to keep track the callee-save area. Initialize them. MinCSFrameIndex = INT_MAX; MaxCSFrameIndex = 0; // Early exit for targets which have no callee saved registers. - if (CSRegs == 0 || CSRegs[0] == 0) + if (!CSRegs || CSRegs[0] == 0) return; // In Naked functions we aren't going to save any registers. @@ -257,51 +268,56 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &F) { } } - if (CSI.empty()) - return; // Early exit if no callee saved registers are modified! + if (!TFI->assignCalleeSavedSpillSlots(F, RegInfo, CSI)) { + // If target doesn't implement this, use generic code. - unsigned NumFixedSpillSlots; - const TargetFrameLowering::SpillSlot *FixedSpillSlots = - TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots); + if (CSI.empty()) + return; // Early exit if no callee saved registers are modified! - // Now that we know which registers need to be saved and restored, allocate - // stack slots for them. - for (std::vector<CalleeSavedInfo>::iterator - I = CSI.begin(), E = CSI.end(); I != E; ++I) { - unsigned Reg = I->getReg(); - const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg); + unsigned NumFixedSpillSlots; + const TargetFrameLowering::SpillSlot *FixedSpillSlots = + TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots); - int FrameIdx; - if (RegInfo->hasReservedSpillSlot(F, Reg, FrameIdx)) { - I->setFrameIdx(FrameIdx); - continue; - } + // Now that we know which registers need to be saved and restored, allocate + // stack slots for them. + for (std::vector<CalleeSavedInfo>::iterator I = CSI.begin(), E = CSI.end(); + I != E; ++I) { + unsigned Reg = I->getReg(); + const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg); - // Check to see if this physreg must be spilled to a particular stack slot - // on this target. - const TargetFrameLowering::SpillSlot *FixedSlot = FixedSpillSlots; - while (FixedSlot != FixedSpillSlots+NumFixedSpillSlots && - FixedSlot->Reg != Reg) - ++FixedSlot; - - if (FixedSlot == FixedSpillSlots + NumFixedSpillSlots) { - // Nope, just spill it anywhere convenient. - unsigned Align = RC->getAlignment(); - unsigned StackAlign = TFI->getStackAlignment(); - - // We may not be able to satisfy the desired alignment specification of - // the TargetRegisterClass if the stack alignment is smaller. Use the - // min. - Align = std::min(Align, StackAlign); - FrameIdx = MFI->CreateStackObject(RC->getSize(), Align, true); - if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx; - if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx; - } else { - // Spill it to the stack where we must. - FrameIdx = MFI->CreateFixedObject(RC->getSize(), FixedSlot->Offset, true); - } + int FrameIdx; + if (RegInfo->hasReservedSpillSlot(F, Reg, FrameIdx)) { + I->setFrameIdx(FrameIdx); + continue; + } + + // Check to see if this physreg must be spilled to a particular stack slot + // on this target. + const TargetFrameLowering::SpillSlot *FixedSlot = FixedSpillSlots; + while (FixedSlot != FixedSpillSlots + NumFixedSpillSlots && + FixedSlot->Reg != Reg) + ++FixedSlot; + + if (FixedSlot == FixedSpillSlots + NumFixedSpillSlots) { + // Nope, just spill it anywhere convenient. + unsigned Align = RC->getAlignment(); + unsigned StackAlign = TFI->getStackAlignment(); + + // We may not be able to satisfy the desired alignment specification of + // the TargetRegisterClass if the stack alignment is smaller. Use the + // min. + Align = std::min(Align, StackAlign); + FrameIdx = MFI->CreateStackObject(RC->getSize(), Align, true); + if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx; + if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx; + } else { + // Spill it to the stack where we must. + FrameIdx = + MFI->CreateFixedSpillStackObject(RC->getSize(), FixedSlot->Offset); + } - I->setFrameIdx(FrameIdx); + I->setFrameIdx(FrameIdx); + } } MFI->setCalleeSavedInfo(CSI); @@ -409,11 +425,28 @@ AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx, } } +/// AssignProtectedObjSet - Helper function to assign large stack objects (i.e., +/// those required to be close to the Stack Protector) to stack offsets. +static void +AssignProtectedObjSet(const StackObjSet &UnassignedObjs, + SmallSet<int, 16> &ProtectedObjs, + MachineFrameInfo *MFI, bool StackGrowsDown, + int64_t &Offset, unsigned &MaxAlign) { + + for (StackObjSet::const_iterator I = UnassignedObjs.begin(), + E = UnassignedObjs.end(); I != E; ++I) { + int i = *I; + AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign); + ProtectedObjs.insert(i); + } +} + /// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the /// abstract stack objects. /// void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering(); + StackProtector *SP = &getAnalysis<StackProtector>(); bool StackGrowsDown = TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; @@ -523,8 +556,12 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // Make sure that the stack protector comes before the local variables on the // stack. - SmallSet<int, 16> LargeStackObjs; + SmallSet<int, 16> ProtectedObjs; if (MFI->getStackProtectorIndex() >= 0) { + StackObjSet LargeArrayObjs; + StackObjSet SmallArrayObjs; + StackObjSet AddrOfObjs; + AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), StackGrowsDown, Offset, MaxAlign); @@ -541,12 +578,29 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { continue; if (MFI->getStackProtectorIndex() == (int)i) continue; - if (!MFI->MayNeedStackProtector(i)) - continue; - AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign); - LargeStackObjs.insert(i); + switch (SP->getSSPLayout(MFI->getObjectAllocation(i))) { + case StackProtector::SSPLK_None: + continue; + case StackProtector::SSPLK_SmallArray: + SmallArrayObjs.insert(i); + continue; + case StackProtector::SSPLK_AddrOf: + AddrOfObjs.insert(i); + continue; + case StackProtector::SSPLK_LargeArray: + LargeArrayObjs.insert(i); + continue; + } + llvm_unreachable("Unexpected SSPLayoutKind."); } + + AssignProtectedObjSet(LargeArrayObjs, ProtectedObjs, MFI, StackGrowsDown, + Offset, MaxAlign); + AssignProtectedObjSet(SmallArrayObjs, ProtectedObjs, MFI, StackGrowsDown, + Offset, MaxAlign); + AssignProtectedObjSet(AddrOfObjs, ProtectedObjs, MFI, StackGrowsDown, + Offset, MaxAlign); } // Then assign frame offsets to stack objects that are not used to spill @@ -563,7 +617,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { continue; if (MFI->getStackProtectorIndex() == (int)i) continue; - if (LargeStackObjs.count(i)) + if (ProtectedObjs.count(i)) continue; AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign); @@ -632,7 +686,7 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) { // we've been asked for it. This, when linked with a runtime with support // for segmented stacks (libgcc is one), will result in allocating stack // space in small chunks instead of one large contiguous block. - if (Fn.getTarget().Options.EnableSegmentedStacks) + if (Fn.shouldSplitStack()) TFI.adjustForSegmentedStacks(Fn); // Emit additional code that is required to explicitly handle the stack in @@ -711,14 +765,14 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, SPAdj += Size; MachineBasicBlock::iterator PrevI = BB->end(); - if (I != BB->begin()) PrevI = prior(I); + if (I != BB->begin()) PrevI = std::prev(I); TFI->eliminateCallFramePseudoInstr(Fn, *BB, I); // Visit the instructions created by eliminateCallFramePseudoInstr(). if (PrevI == BB->end()) I = BB->begin(); // The replaced instr was the first in the block. else - I = llvm::next(PrevI); + I = std::next(PrevI); continue; } @@ -757,7 +811,7 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, // use that target machine register info object to eliminate // it. TRI.eliminateFrameIndex(MI, SPAdj, i, - FrameIndexVirtualScavenging ? NULL : RS); + FrameIndexVirtualScavenging ? nullptr : RS); // Reset the iterator if we were at the beginning of the BB. if (AtBeginning) { @@ -765,7 +819,7 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, DoIncr = false; } - MI = 0; + MI = nullptr; break; } @@ -797,13 +851,14 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) { // We might end up here again with a NULL iterator if we scavenged a // register for which we inserted spill code for definition by what was // originally the first instruction in BB. - if (I == MachineBasicBlock::iterator(NULL)) + if (I == MachineBasicBlock::iterator(nullptr)) I = BB->begin(); MachineInstr *MI = I; - MachineBasicBlock::iterator J = llvm::next(I); - MachineBasicBlock::iterator P = I == BB->begin() ? - MachineBasicBlock::iterator(NULL) : llvm::prior(I); + MachineBasicBlock::iterator J = std::next(I); + MachineBasicBlock::iterator P = + I == BB->begin() ? MachineBasicBlock::iterator(nullptr) + : std::prev(I); // RS should process this instruction before we might scavenge at this // location. This is because we might be replacing a virtual register @@ -846,7 +901,7 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) { // spill code will have been inserted in between I and J. This is a // problem because we need the spill code before I: Move I to just // prior to J. - if (I != llvm::prior(J)) { + if (I != std::prev(J)) { BB->splice(J, BB, I); // Before we move I, we need to prepare the RS to visit I again. diff --git a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.h b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.h index 77cfa2b..5a6d39a 100644 --- a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.h +++ b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.h @@ -37,12 +37,12 @@ namespace llvm { initializePEIPass(*PassRegistry::getPassRegistry()); } - virtual void getAnalysisUsage(AnalysisUsage &AU) const; + void getAnalysisUsage(AnalysisUsage &AU) const override; /// runOnMachineFunction - Insert prolog/epilog code and replace abstract /// frame indexes with appropriate references. /// - bool runOnMachineFunction(MachineFunction &Fn); + bool runOnMachineFunction(MachineFunction &Fn) override; private: RegScavenger *RS; diff --git a/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp b/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp index 8564911..12b2c90 100644 --- a/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp +++ b/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp @@ -58,13 +58,9 @@ static const char *const PSVNames[] = { "ConstantPool" }; -// FIXME: THIS IS A HACK!!!! -// Eventually these should be uniqued on LLVMContext rather than in a managed -// static. For now, we can safely use the global context for the time being to -// squeak by. -PseudoSourceValue::PseudoSourceValue(enum ValueTy Subclass) : - Value(Type::getInt8PtrTy(getGlobalContext()), - Subclass) {} +PseudoSourceValue::PseudoSourceValue(bool isFixed) : isFixed(isFixed) {} + +PseudoSourceValue::~PseudoSourceValue() {} void PseudoSourceValue::printCustom(raw_ostream &O) const { O << PSVNames[this - PSVGlobals->PSVs]; diff --git a/contrib/llvm/lib/CodeGen/RegAllocBase.cpp b/contrib/llvm/lib/CodeGen/RegAllocBase.cpp index 293e306..894aee7 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocBase.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocBase.cpp @@ -7,12 +7,11 @@ // //===----------------------------------------------------------------------===// // -// This file defines the RegAllocBase class which provides comon functionality +// This file defines the RegAllocBase class which provides common functionality // for LiveIntervalUnion-based register allocators. // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "regalloc" #include "RegAllocBase.h" #include "Spiller.h" #include "llvm/ADT/Statistic.h" @@ -35,6 +34,8 @@ using namespace llvm; +#define DEBUG_TYPE "regalloc" + STATISTIC(NumNewQueued , "Number of new live ranges queued"); // Temporary verification option until we can put verification inside @@ -101,8 +102,8 @@ void RegAllocBase::allocatePhysRegs() { // register if possible and populate a list of new live intervals that // result from splitting. DEBUG(dbgs() << "\nselectOrSplit " - << MRI->getRegClass(VirtReg->reg)->getName() - << ':' << *VirtReg << '\n'); + << MRI->getRegClass(VirtReg->reg)->getName() + << ':' << *VirtReg << " w=" << VirtReg->weight << '\n'); typedef SmallVector<unsigned, 4> VirtRegVec; VirtRegVec SplitVRegs; unsigned AvailablePhysReg = selectOrSplit(*VirtReg, SplitVRegs); @@ -110,11 +111,16 @@ void RegAllocBase::allocatePhysRegs() { if (AvailablePhysReg == ~0u) { // selectOrSplit failed to find a register! // Probably caused by an inline asm. - MachineInstr *MI; - for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(VirtReg->reg); - (MI = I.skipInstruction());) - if (MI->isInlineAsm()) + MachineInstr *MI = nullptr; + for (MachineRegisterInfo::reg_instr_iterator + I = MRI->reg_instr_begin(VirtReg->reg), E = MRI->reg_instr_end(); + I != E; ) { + MachineInstr *TmpMI = &*(I++); + if (TmpMI->isInlineAsm()) { + MI = TmpMI; break; + } + } if (MI) MI->emitError("inline assembly requires more registers than available"); else diff --git a/contrib/llvm/lib/CodeGen/RegAllocBase.h b/contrib/llvm/lib/CodeGen/RegAllocBase.h index c17a8d9..b333c36 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocBase.h +++ b/contrib/llvm/lib/CodeGen/RegAllocBase.h @@ -37,7 +37,6 @@ #ifndef LLVM_CODEGEN_REGALLOCBASE #define LLVM_CODEGEN_REGALLOCBASE -#include "llvm/ADT/OwningPtr.h" #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/RegisterClassInfo.h" @@ -66,7 +65,8 @@ protected: LiveRegMatrix *Matrix; RegisterClassInfo RegClassInfo; - RegAllocBase(): TRI(0), MRI(0), VRM(0), LIS(0), Matrix(0) {} + RegAllocBase() + : TRI(nullptr), MRI(nullptr), VRM(nullptr), LIS(nullptr), Matrix(nullptr) {} virtual ~RegAllocBase() {} diff --git a/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp b/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp index 6768e45..6bc678e8 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "regalloc" #include "llvm/CodeGen/Passes.h" #include "AllocationOrder.h" #include "LiveDebugVariables.h" @@ -41,6 +40,8 @@ using namespace llvm; +#define DEBUG_TYPE "regalloc" + static RegisterRegAlloc basicRegAlloc("basic", "basic register allocator", createBasicRegisterAllocator); @@ -64,7 +65,7 @@ class RABasic : public MachineFunctionPass, public RegAllocBase MachineFunction *MF; // state - OwningPtr<Spiller> SpillerInstance; + std::unique_ptr<Spiller> SpillerInstance; std::priority_queue<LiveInterval*, std::vector<LiveInterval*>, CompSpillWeight> Queue; @@ -76,36 +77,34 @@ public: RABasic(); /// Return the pass name. - virtual const char* getPassName() const { + const char* getPassName() const override { return "Basic Register Allocator"; } /// RABasic analysis usage. - virtual void getAnalysisUsage(AnalysisUsage &AU) const; - - virtual void releaseMemory(); + void getAnalysisUsage(AnalysisUsage &AU) const override; - virtual Spiller &spiller() { return *SpillerInstance; } + void releaseMemory() override; - virtual float getPriority(LiveInterval *LI) { return LI->weight; } + Spiller &spiller() override { return *SpillerInstance; } - virtual void enqueue(LiveInterval *LI) { + void enqueue(LiveInterval *LI) override { Queue.push(LI); } - virtual LiveInterval *dequeue() { + LiveInterval *dequeue() override { if (Queue.empty()) - return 0; + return nullptr; LiveInterval *LI = Queue.top(); Queue.pop(); return LI; } - virtual unsigned selectOrSplit(LiveInterval &VirtReg, - SmallVectorImpl<unsigned> &SplitVRegs); + unsigned selectOrSplit(LiveInterval &VirtReg, + SmallVectorImpl<unsigned> &SplitVRegs) override; /// Perform register allocation. - virtual bool runOnMachineFunction(MachineFunction &mf); + bool runOnMachineFunction(MachineFunction &mf) override; // Helper for spilling all live virtual registers currently unified under preg // that interfere with the most recently queried lvr. Return true if spilling @@ -158,7 +157,7 @@ void RABasic::getAnalysisUsage(AnalysisUsage &AU) const { } void RABasic::releaseMemory() { - SpillerInstance.reset(0); + SpillerInstance.reset(); } diff --git a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp index e92dbd2..97b9f76 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "regalloc" #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/IndexedMap.h" @@ -38,6 +37,8 @@ #include <algorithm> using namespace llvm; +#define DEBUG_TYPE "regalloc" + STATISTIC(NumStores, "Number of stores added"); STATISTIC(NumLoads , "Number of loads added"); STATISTIC(NumCopies, "Number of copies coalesced"); @@ -75,7 +76,7 @@ namespace { bool Dirty; // Register needs spill. explicit LiveReg(unsigned v) - : LastUse(0), VirtReg(v), PhysReg(0), LastOpNum(0), Dirty(false) {} + : LastUse(nullptr), VirtReg(v), PhysReg(0), LastOpNum(0), Dirty(false){} unsigned getSparseSetIndex() const { return TargetRegisterInfo::virtReg2Index(VirtReg); @@ -144,23 +145,23 @@ namespace { // not be erased. bool isBulkSpilling; - enum LLVM_ENUM_INT_TYPE(unsigned) { + enum : unsigned { spillClean = 1, spillDirty = 100, spillImpossible = ~0u }; public: - virtual const char *getPassName() const { + const char *getPassName() const override { return "Fast Register Allocator"; } - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); } private: - bool runOnMachineFunction(MachineFunction &Fn); + bool runOnMachineFunction(MachineFunction &Fn) override; void AllocateBasicBlock(); void handleThroughOperands(MachineInstr *MI, SmallVectorImpl<unsigned> &VirtDead); @@ -224,7 +225,7 @@ bool RAFast::isLastUseOfLocalReg(MachineOperand &MO) { // Check that the use/def chain has exactly one operand - MO. MachineRegisterInfo::reg_nodbg_iterator I = MRI->reg_nodbg_begin(MO.getReg()); - if (&I.getOperand() != &MO) + if (&*I != &MO) return false; return ++I == MRI->reg_nodbg_end(); } @@ -319,7 +320,7 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, // now. LRIDbgValues.clear(); if (SpillKill) - LR.LastUse = 0; // Don't kill register again + LR.LastUse = nullptr; // Don't kill register again } killVirtReg(LRI); } @@ -585,12 +586,12 @@ RAFast::defineVirtReg(MachineInstr *MI, unsigned OpNum, "Not a virtual register"); LiveRegMap::iterator LRI; bool New; - tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg)); + std::tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg)); if (New) { // If there is no hint, peek at the only use of this register. if ((!Hint || !TargetRegisterInfo::isPhysicalRegister(Hint)) && MRI->hasOneNonDBGUse(VirtReg)) { - const MachineInstr &UseMI = *MRI->use_nodbg_begin(VirtReg); + const MachineInstr &UseMI = *MRI->use_instr_nodbg_begin(VirtReg); // It's a copy, use the destination register as a hint. if (UseMI.isCopyLike()) Hint = UseMI.getOperand(0).getReg(); @@ -618,7 +619,7 @@ RAFast::reloadVirtReg(MachineInstr *MI, unsigned OpNum, "Not a virtual register"); LiveRegMap::iterator LRI; bool New; - tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg)); + std::tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg)); MachineOperand &MO = MI->getOperand(OpNum); if (New) { LRI = allocVirtReg(MI, LRI, Hint); diff --git a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp index c08d955..dee990c 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "regalloc" #include "llvm/CodeGen/Passes.h" #include "AllocationOrder.h" #include "InterferenceCache.h" @@ -35,17 +34,23 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegAllocRegistry.h" +#include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/PassAnalysisSupport.h" +#include "llvm/Support/BranchProbability.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include <queue> using namespace llvm; +#define DEBUG_TYPE "regalloc" + STATISTIC(NumGlobalSplits, "Number of split global live ranges"); STATISTIC(NumLocalSplits, "Number of split local live ranges"); STATISTIC(NumEvicted, "Number of interferences evicted"); @@ -59,6 +64,34 @@ SplitSpillMode("split-spill-mode", cl::Hidden, clEnumValEnd), cl::init(SplitEditor::SM_Partition)); +static cl::opt<unsigned> +LastChanceRecoloringMaxDepth("lcr-max-depth", cl::Hidden, + cl::desc("Last chance recoloring max depth"), + cl::init(5)); + +static cl::opt<unsigned> LastChanceRecoloringMaxInterference( + "lcr-max-interf", cl::Hidden, + cl::desc("Last chance recoloring maximum number of considered" + " interference at a time"), + cl::init(8)); + +static cl::opt<bool> +ExhaustiveSearch("exhaustive-register-search", cl::NotHidden, + cl::desc("Exhaustive Search for registers bypassing the depth " + "and interference cutoffs of last chance recoloring")); + +static cl::opt<bool> EnableLocalReassignment( + "enable-local-reassign", cl::Hidden, + cl::desc("Local reassignment can yield better allocation decisions, but " + "may be compile time intensive"), + cl::init(false)); + +// FIXME: Find a good default for this flag and remove the flag. +static cl::opt<unsigned> +CSRFirstTimeCost("regalloc-csr-first-time-cost", + cl::desc("Cost for first time use of callee-saved register."), + cl::init(0), cl::Hidden); + static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator", createGreedyRegisterAllocator); @@ -66,10 +99,19 @@ namespace { class RAGreedy : public MachineFunctionPass, public RegAllocBase, private LiveRangeEdit::Delegate { + // Convenient shortcuts. + typedef std::priority_queue<std::pair<unsigned, unsigned> > PQueue; + typedef SmallPtrSet<LiveInterval *, 4> SmallLISet; + typedef SmallSet<unsigned, 16> SmallVirtRegSet; // context MachineFunction *MF; + // Shortcuts to some useful interface. + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + RegisterClassInfo RCI; + // analyses SlotIndexes *Indexes; MachineBlockFrequencyInfo *MBFI; @@ -80,8 +122,8 @@ class RAGreedy : public MachineFunctionPass, LiveDebugVariables *DebugVars; // state - OwningPtr<Spiller> SpillerInstance; - std::priority_queue<std::pair<unsigned, unsigned> > Queue; + std::unique_ptr<Spiller> SpillerInstance; + PQueue Queue; unsigned NextCascade; // Live ranges pass through a number of stages as we try to allocate them. @@ -120,6 +162,22 @@ class RAGreedy : public MachineFunctionPass, RS_Done }; + // Enum CutOffStage to keep a track whether the register allocation failed + // because of the cutoffs encountered in last chance recoloring. + // Note: This is used as bitmask. New value should be next power of 2. + enum CutOffStage { + // No cutoffs encountered + CO_None = 0, + + // lcr-max-depth cutoff encountered + CO_Depth = 1, + + // lcr-max-interf cutoff encountered + CO_Interf = 2 + }; + + uint8_t CutOffInfo; + #ifndef NDEBUG static const char *const StageName[]; #endif @@ -160,20 +218,23 @@ class RAGreedy : public MachineFunctionPass, unsigned BrokenHints; ///< Total number of broken hints. float MaxWeight; ///< Maximum spill weight evicted. - EvictionCost(unsigned B = 0) : BrokenHints(B), MaxWeight(0) {} + EvictionCost(): BrokenHints(0), MaxWeight(0) {} bool isMax() const { return BrokenHints == ~0u; } + void setMax() { BrokenHints = ~0u; } + + void setBrokenHints(unsigned NHints) { BrokenHints = NHints; } + bool operator<(const EvictionCost &O) const { - if (BrokenHints != O.BrokenHints) - return BrokenHints < O.BrokenHints; - return MaxWeight < O.MaxWeight; + return std::tie(BrokenHints, MaxWeight) < + std::tie(O.BrokenHints, O.MaxWeight); } }; // splitting state. - OwningPtr<SplitAnalysis> SA; - OwningPtr<SplitEditor> SE; + std::unique_ptr<SplitAnalysis> SA; + std::unique_ptr<SplitEditor> SE; /// Cached per-block interference maps InterferenceCache IntfCache; @@ -217,43 +278,54 @@ class RAGreedy : public MachineFunctionPass, } }; - /// Candidate info for for each PhysReg in AllocationOrder. + /// Candidate info for each PhysReg in AllocationOrder. /// This vector never shrinks, but grows to the size of the largest register /// class. SmallVector<GlobalSplitCandidate, 32> GlobalCand; - enum LLVM_ENUM_INT_TYPE(unsigned) { NoCand = ~0u }; + enum : unsigned { NoCand = ~0u }; /// Candidate map. Each edge bundle is assigned to a GlobalCand entry, or to /// NoCand which indicates the stack interval. SmallVector<unsigned, 32> BundleCand; + /// Callee-save register cost, calculated once per machine function. + BlockFrequency CSRCost; + + /// Run or not the local reassignment heuristic. This information is + /// obtained from the TargetSubtargetInfo. + bool EnableLocalReassign; + public: RAGreedy(); /// Return the pass name. - virtual const char* getPassName() const { + const char* getPassName() const override { return "Greedy Register Allocator"; } /// RAGreedy analysis usage. - virtual void getAnalysisUsage(AnalysisUsage &AU) const; - virtual void releaseMemory(); - virtual Spiller &spiller() { return *SpillerInstance; } - virtual void enqueue(LiveInterval *LI); - virtual LiveInterval *dequeue(); - virtual unsigned selectOrSplit(LiveInterval&, - SmallVectorImpl<unsigned>&); + void getAnalysisUsage(AnalysisUsage &AU) const override; + void releaseMemory() override; + Spiller &spiller() override { return *SpillerInstance; } + void enqueue(LiveInterval *LI) override; + LiveInterval *dequeue() override; + unsigned selectOrSplit(LiveInterval&, SmallVectorImpl<unsigned>&) override; /// Perform register allocation. - virtual bool runOnMachineFunction(MachineFunction &mf); + bool runOnMachineFunction(MachineFunction &mf) override; static char ID; private: - bool LRE_CanEraseVirtReg(unsigned); - void LRE_WillShrinkVirtReg(unsigned); - void LRE_DidCloneVirtReg(unsigned, unsigned); + unsigned selectOrSplitImpl(LiveInterval &, SmallVectorImpl<unsigned> &, + SmallVirtRegSet &, unsigned = 0); + + bool LRE_CanEraseVirtReg(unsigned) override; + void LRE_WillShrinkVirtReg(unsigned) override; + void LRE_DidCloneVirtReg(unsigned, unsigned) override; + void enqueue(PQueue &CurQueue, LiveInterval *LI); + LiveInterval *dequeue(PQueue &CurQueue); BlockFrequency calcSpillCost(); bool addSplitConstraints(InterferenceCache::Cursor, BlockFrequency&); @@ -268,6 +340,9 @@ private: bool canEvictInterference(LiveInterval&, unsigned, bool, EvictionCost&); void evictInterference(LiveInterval&, unsigned, SmallVectorImpl<unsigned>&); + bool mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg, + SmallLISet &RecoloringCandidates, + const SmallVirtRegSet &FixedRegisters); unsigned tryAssign(LiveInterval&, AllocationOrder&, SmallVectorImpl<unsigned>&); @@ -275,6 +350,21 @@ private: SmallVectorImpl<unsigned>&, unsigned = ~0u); unsigned tryRegionSplit(LiveInterval&, AllocationOrder&, SmallVectorImpl<unsigned>&); + /// Calculate cost of region splitting. + unsigned calculateRegionSplitCost(LiveInterval &VirtReg, + AllocationOrder &Order, + BlockFrequency &BestCost, + unsigned &NumCands, bool IgnoreCSR); + /// Perform region splitting. + unsigned doRegionSplit(LiveInterval &VirtReg, unsigned BestCand, + bool HasCompact, + SmallVectorImpl<unsigned> &NewVRegs); + /// Check other options before using a callee-saved register for the first + /// time. + unsigned tryAssignCSRFirstTime(LiveInterval &VirtReg, AllocationOrder &Order, + unsigned PhysReg, unsigned &CostPerUseLimit, + SmallVectorImpl<unsigned> &NewVRegs); + void initializeCSRCost(); unsigned tryBlockSplit(LiveInterval&, AllocationOrder&, SmallVectorImpl<unsigned>&); unsigned tryInstructionSplit(LiveInterval&, AllocationOrder&, @@ -283,6 +373,11 @@ private: SmallVectorImpl<unsigned>&); unsigned trySplit(LiveInterval&, AllocationOrder&, SmallVectorImpl<unsigned>&); + unsigned tryLastChanceRecoloring(LiveInterval &, AllocationOrder &, + SmallVectorImpl<unsigned> &, + SmallVirtRegSet &, unsigned); + bool tryRecoloringCandidates(PQueue &, SmallVectorImpl<unsigned> &, + SmallVirtRegSet &, unsigned); }; } // end anonymous namespace @@ -301,7 +396,7 @@ const char *const RAGreedy::StageName[] = { // Hysteresis to use when comparing floats. // This helps stabilize decisions based on float comparisons. -const float Hysteresis = 0.98f; +const float Hysteresis = (2007 / 2048.0f); // 0.97998046875 FunctionPass* llvm::createGreedyRegisterAllocator() { @@ -391,12 +486,14 @@ void RAGreedy::LRE_DidCloneVirtReg(unsigned New, unsigned Old) { } void RAGreedy::releaseMemory() { - SpillerInstance.reset(0); + SpillerInstance.reset(); ExtraRegInfo.clear(); GlobalCand.clear(); } -void RAGreedy::enqueue(LiveInterval *LI) { +void RAGreedy::enqueue(LiveInterval *LI) { enqueue(Queue, LI); } + +void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) { // Prioritize live ranges by size, assigning larger ranges first. // The queue holds (size, reg) pairs. const unsigned Size = LI->getSize(); @@ -414,12 +511,25 @@ void RAGreedy::enqueue(LiveInterval *LI) { // everything else has been allocated. Prio = Size; } else { - if (ExtraRegInfo[Reg].Stage == RS_Assign && !LI->empty() && + // Giant live ranges fall back to the global assignment heuristic, which + // prevents excessive spilling in pathological cases. + bool ReverseLocal = TRI->reverseLocalAssignment(); + bool ForceGlobal = !ReverseLocal && TRI->mayOverrideLocalAssignment() && + (Size / SlotIndex::InstrDist) > (2 * MRI->getRegClass(Reg)->getNumRegs()); + + if (ExtraRegInfo[Reg].Stage == RS_Assign && !ForceGlobal && !LI->empty() && LIS->intervalIsInOneMBB(*LI)) { // Allocate original local ranges in linear instruction order. Since they // are singly defined, this produces optimal coloring in the absence of // global interference and other constraints. - Prio = LI->beginIndex().getInstrDistance(Indexes->getLastIndex()); + if (!ReverseLocal) + Prio = LI->beginIndex().getInstrDistance(Indexes->getLastIndex()); + else { + // Allocating bottom up may allow many short LRGs to be assigned first + // to one of the cheap registers. This could be much faster for very + // large blocks on targets with many physical registers. + Prio = Indexes->getZeroIndex().getInstrDistance(LI->beginIndex()); + } } else { // Allocate global and split ranges in long->short order. Long ranges that @@ -436,14 +546,16 @@ void RAGreedy::enqueue(LiveInterval *LI) { } // The virtual register number is a tie breaker for same-sized ranges. // Give lower vreg numbers higher priority to assign them first. - Queue.push(std::make_pair(Prio, ~Reg)); + CurQueue.push(std::make_pair(Prio, ~Reg)); } -LiveInterval *RAGreedy::dequeue() { - if (Queue.empty()) - return 0; - LiveInterval *LI = &LIS->getInterval(~Queue.top().second); - Queue.pop(); +LiveInterval *RAGreedy::dequeue() { return dequeue(Queue); } + +LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) { + if (CurQueue.empty()) + return nullptr; + LiveInterval *LI = &LIS->getInterval(~CurQueue.top().second); + CurQueue.pop(); return LI; } @@ -471,7 +583,8 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, if (unsigned Hint = MRI->getSimpleHint(VirtReg.reg)) if (Order.isHint(Hint)) { DEBUG(dbgs() << "missed hint " << PrintReg(Hint, TRI) << '\n'); - EvictionCost MaxCost(1); + EvictionCost MaxCost; + MaxCost.setBrokenHints(1); if (canEvictInterference(VirtReg, Hint, true, MaxCost)) { evictInterference(VirtReg, Hint, NewVRegs); return Hint; @@ -543,11 +656,15 @@ bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint, if (CanSplit && IsHint && !BreaksHint) return true; - return A.weight > B.weight; + if (A.weight > B.weight) { + DEBUG(dbgs() << "should evict: " << B << " w= " << B.weight << '\n'); + return true; + } + return false; } /// canEvictInterference - Return true if all interferences between VirtReg and -/// PhysReg can be evicted. When OnlyCheap is set, don't do anything +/// PhysReg can be evicted. /// /// @param VirtReg Live range that is about to be assigned. /// @param PhysReg Desired register for assignment. @@ -618,16 +735,16 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, return false; if (Urgent) continue; + // Apply the eviction policy for non-urgent evictions. + if (!shouldEvict(VirtReg, IsHint, *Intf, BreaksHint)) + return false; // If !MaxCost.isMax(), then we're just looking for a cheap register. // Evicting another local live range in this case could lead to suboptimal // coloring. if (!MaxCost.isMax() && IsLocal && LIS->intervalIsInOneMBB(*Intf) && - !canReassign(*Intf, PhysReg)) { + (!EnableLocalReassign || !canReassign(*Intf, PhysReg))) { return false; } - // Finally, apply the eviction policy for non-urgent evictions. - if (!shouldEvict(VirtReg, IsHint, *Intf, BreaksHint)) - return false; } } MaxCost = Cost; @@ -685,7 +802,8 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, NamedRegionTimer T("Evict", TimerGroupName, TimePassesIsEnabled); // Keep track of the cheapest interference seen so far. - EvictionCost BestCost(~0u); + EvictionCost BestCost; + BestCost.setMax(); unsigned BestPhys = 0; unsigned OrderLimit = Order.getOrder().size(); @@ -713,7 +831,7 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, } Order.rewind(); - while (unsigned PhysReg = Order.nextWithDups(OrderLimit)) { + while (unsigned PhysReg = Order.next(OrderLimit)) { if (TRI->getCostPerUse(PhysReg) >= CostPerUseLimit) continue; // The first use of a callee-saved register in a function has cost 1. @@ -1172,9 +1290,7 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, SmallVectorImpl<unsigned> &NewVRegs) { unsigned NumCands = 0; - unsigned BestCand = NoCand; BlockFrequency BestCost; - SmallVector<unsigned, 8> UsedCands; // Check if we can split this live range around a compact region. bool HasCompact = calcCompactRegion(GlobalCand.front()); @@ -1186,11 +1302,33 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, // No benefit from the compact region, our fallback will be per-block // splitting. Make sure we find a solution that is cheaper than spilling. BestCost = calcSpillCost(); - DEBUG(dbgs() << "Cost of isolating all blocks = " << BestCost << '\n'); + DEBUG(dbgs() << "Cost of isolating all blocks = "; + MBFI->printBlockFreq(dbgs(), BestCost) << '\n'); } + unsigned BestCand = + calculateRegionSplitCost(VirtReg, Order, BestCost, NumCands, + false/*IgnoreCSR*/); + + // No solutions found, fall back to single block splitting. + if (!HasCompact && BestCand == NoCand) + return 0; + + return doRegionSplit(VirtReg, BestCand, HasCompact, NewVRegs); +} + +unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg, + AllocationOrder &Order, + BlockFrequency &BestCost, + unsigned &NumCands, + bool IgnoreCSR) { + unsigned BestCand = NoCand; Order.rewind(); while (unsigned PhysReg = Order.next()) { + if (unsigned CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg)) + if (IgnoreCSR && !MRI->isPhysRegUsed(CSR)) + continue; + // Discard bad candidates before we run out of interference cache cursors. // This will only affect register classes with a lot of registers (>32). if (NumCands == IntfCache.getMaxCursors()) { @@ -1220,7 +1358,8 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tno positive bundles\n"); continue; } - DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tstatic = " << Cost); + DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tstatic = "; + MBFI->printBlockFreq(dbgs(), Cost)); if (Cost >= BestCost) { DEBUG({ if (BestCand == NoCand) @@ -1243,7 +1382,8 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, Cost += calcGlobalSplitCost(Cand); DEBUG({ - dbgs() << ", total = " << Cost << " with bundles"; + dbgs() << ", total = "; MBFI->printBlockFreq(dbgs(), Cost) + << " with bundles"; for (int i = Cand.LiveBundles.find_first(); i>=0; i = Cand.LiveBundles.find_next(i)) dbgs() << " EB#" << i; @@ -1255,11 +1395,13 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, } ++NumCands; } + return BestCand; +} - // No solutions found, fall back to single block splitting. - if (!HasCompact && BestCand == NoCand) - return 0; - +unsigned RAGreedy::doRegionSplit(LiveInterval &VirtReg, unsigned BestCand, + bool HasCompact, + SmallVectorImpl<unsigned> &NewVRegs) { + SmallVector<unsigned, 8> UsedCands; // Prepare split editor. LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); SE->reset(LREdit, SplitSpillMode); @@ -1348,6 +1490,22 @@ unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order, // Per-Instruction Splitting //===----------------------------------------------------------------------===// +/// Get the number of allocatable registers that match the constraints of \p Reg +/// on \p MI and that are also in \p SuperRC. +static unsigned getNumAllocatableRegsForConstraints( + const MachineInstr *MI, unsigned Reg, const TargetRegisterClass *SuperRC, + const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, + const RegisterClassInfo &RCI) { + assert(SuperRC && "Invalid register class"); + + const TargetRegisterClass *ConstrainedRC = + MI->getRegClassConstraintEffectForVReg(Reg, SuperRC, TII, TRI, + /* ExploreBundle */ true); + if (!ConstrainedRC) + return 0; + return RCI.getNumAllocatableRegs(ConstrainedRC); +} + /// tryInstructionSplit - Split a live range around individual instructions. /// This is normally not worthwhile since the spiller is doing essentially the /// same thing. However, when the live range is in a constrained register @@ -1358,8 +1516,9 @@ unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order, unsigned RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order, SmallVectorImpl<unsigned> &NewVRegs) { + const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg); // There is no point to this if there are no larger sub-classes. - if (!RegClassInfo.isProperSubClass(MRI->getRegClass(VirtReg.reg))) + if (!RegClassInfo.isProperSubClass(CurRC)) return 0; // Always enable split spill mode, since we're effectively spilling to a @@ -1373,10 +1532,18 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order, DEBUG(dbgs() << "Split around " << Uses.size() << " individual instrs.\n"); - // Split around every non-copy instruction. + const TargetRegisterClass *SuperRC = TRI->getLargestLegalSuperClass(CurRC); + unsigned SuperRCNumAllocatableRegs = RCI.getNumAllocatableRegs(SuperRC); + // Split around every non-copy instruction if this split will relax + // the constraints on the virtual register. + // Otherwise, splitting just inserts uncoalescable copies that do not help + // the allocation. for (unsigned i = 0; i != Uses.size(); ++i) { if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Uses[i])) - if (MI->isFullCopy()) { + if (MI->isFullCopy() || + SuperRCNumAllocatableRegs == + getNumAllocatableRegsForConstraints(MI, VirtReg.reg, SuperRC, TII, + TRI, RCI)) { DEBUG(dbgs() << " skip:\t" << Uses[i] << '\t' << *MI); continue; } @@ -1571,7 +1738,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, const float blockFreq = SpillPlacer->getBlockFrequency(BI.MBB->getNumber()).getFrequency() * - (1.0f / BlockFrequency::getEntryFrequency()); + (1.0f / MBFI->getEntryFreq()); SmallVector<float, 8> GapWeight; Order.rewind(); @@ -1759,6 +1926,222 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, return tryBlockSplit(VirtReg, Order, NewVRegs); } +//===----------------------------------------------------------------------===// +// Last Chance Recoloring +//===----------------------------------------------------------------------===// + +/// mayRecolorAllInterferences - Check if the virtual registers that +/// interfere with \p VirtReg on \p PhysReg (or one of its aliases) may be +/// recolored to free \p PhysReg. +/// When true is returned, \p RecoloringCandidates has been augmented with all +/// the live intervals that need to be recolored in order to free \p PhysReg +/// for \p VirtReg. +/// \p FixedRegisters contains all the virtual registers that cannot be +/// recolored. +bool +RAGreedy::mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg, + SmallLISet &RecoloringCandidates, + const SmallVirtRegSet &FixedRegisters) { + const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg); + + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); + // If there is LastChanceRecoloringMaxInterference or more interferences, + // chances are one would not be recolorable. + if (Q.collectInterferingVRegs(LastChanceRecoloringMaxInterference) >= + LastChanceRecoloringMaxInterference && !ExhaustiveSearch) { + DEBUG(dbgs() << "Early abort: too many interferences.\n"); + CutOffInfo |= CO_Interf; + return false; + } + for (unsigned i = Q.interferingVRegs().size(); i; --i) { + LiveInterval *Intf = Q.interferingVRegs()[i - 1]; + // If Intf is done and sit on the same register class as VirtReg, + // it would not be recolorable as it is in the same state as VirtReg. + if ((getStage(*Intf) == RS_Done && + MRI->getRegClass(Intf->reg) == CurRC) || + FixedRegisters.count(Intf->reg)) { + DEBUG(dbgs() << "Early abort: the inteference is not recolorable.\n"); + return false; + } + RecoloringCandidates.insert(Intf); + } + } + return true; +} + +/// tryLastChanceRecoloring - Try to assign a color to \p VirtReg by recoloring +/// its interferences. +/// Last chance recoloring chooses a color for \p VirtReg and recolors every +/// virtual register that was using it. The recoloring process may recursively +/// use the last chance recoloring. Therefore, when a virtual register has been +/// assigned a color by this mechanism, it is marked as Fixed, i.e., it cannot +/// be last-chance-recolored again during this recoloring "session". +/// E.g., +/// Let +/// vA can use {R1, R2 } +/// vB can use { R2, R3} +/// vC can use {R1 } +/// Where vA, vB, and vC cannot be split anymore (they are reloads for +/// instance) and they all interfere. +/// +/// vA is assigned R1 +/// vB is assigned R2 +/// vC tries to evict vA but vA is already done. +/// Regular register allocation fails. +/// +/// Last chance recoloring kicks in: +/// vC does as if vA was evicted => vC uses R1. +/// vC is marked as fixed. +/// vA needs to find a color. +/// None are available. +/// vA cannot evict vC: vC is a fixed virtual register now. +/// vA does as if vB was evicted => vA uses R2. +/// vB needs to find a color. +/// R3 is available. +/// Recoloring => vC = R1, vA = R2, vB = R3 +/// +/// \p Order defines the preferred allocation order for \p VirtReg. +/// \p NewRegs will contain any new virtual register that have been created +/// (split, spill) during the process and that must be assigned. +/// \p FixedRegisters contains all the virtual registers that cannot be +/// recolored. +/// \p Depth gives the current depth of the last chance recoloring. +/// \return a physical register that can be used for VirtReg or ~0u if none +/// exists. +unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, + AllocationOrder &Order, + SmallVectorImpl<unsigned> &NewVRegs, + SmallVirtRegSet &FixedRegisters, + unsigned Depth) { + DEBUG(dbgs() << "Try last chance recoloring for " << VirtReg << '\n'); + // Ranges must be Done. + assert((getStage(VirtReg) >= RS_Done || !VirtReg.isSpillable()) && + "Last chance recoloring should really be last chance"); + // Set the max depth to LastChanceRecoloringMaxDepth. + // We may want to reconsider that if we end up with a too large search space + // for target with hundreds of registers. + // Indeed, in that case we may want to cut the search space earlier. + if (Depth >= LastChanceRecoloringMaxDepth && !ExhaustiveSearch) { + DEBUG(dbgs() << "Abort because max depth has been reached.\n"); + CutOffInfo |= CO_Depth; + return ~0u; + } + + // Set of Live intervals that will need to be recolored. + SmallLISet RecoloringCandidates; + // Record the original mapping virtual register to physical register in case + // the recoloring fails. + DenseMap<unsigned, unsigned> VirtRegToPhysReg; + // Mark VirtReg as fixed, i.e., it will not be recolored pass this point in + // this recoloring "session". + FixedRegisters.insert(VirtReg.reg); + + Order.rewind(); + while (unsigned PhysReg = Order.next()) { + DEBUG(dbgs() << "Try to assign: " << VirtReg << " to " + << PrintReg(PhysReg, TRI) << '\n'); + RecoloringCandidates.clear(); + VirtRegToPhysReg.clear(); + + // It is only possible to recolor virtual register interference. + if (Matrix->checkInterference(VirtReg, PhysReg) > + LiveRegMatrix::IK_VirtReg) { + DEBUG(dbgs() << "Some inteferences are not with virtual registers.\n"); + + continue; + } + + // Early give up on this PhysReg if it is obvious we cannot recolor all + // the interferences. + if (!mayRecolorAllInterferences(PhysReg, VirtReg, RecoloringCandidates, + FixedRegisters)) { + DEBUG(dbgs() << "Some inteferences cannot be recolored.\n"); + continue; + } + + // RecoloringCandidates contains all the virtual registers that interfer + // with VirtReg on PhysReg (or one of its aliases). + // Enqueue them for recoloring and perform the actual recoloring. + PQueue RecoloringQueue; + for (SmallLISet::iterator It = RecoloringCandidates.begin(), + EndIt = RecoloringCandidates.end(); + It != EndIt; ++It) { + unsigned ItVirtReg = (*It)->reg; + enqueue(RecoloringQueue, *It); + assert(VRM->hasPhys(ItVirtReg) && + "Interferences are supposed to be with allocated vairables"); + + // Record the current allocation. + VirtRegToPhysReg[ItVirtReg] = VRM->getPhys(ItVirtReg); + // unset the related struct. + Matrix->unassign(**It); + } + + // Do as if VirtReg was assigned to PhysReg so that the underlying + // recoloring has the right information about the interferes and + // available colors. + Matrix->assign(VirtReg, PhysReg); + + // Save the current recoloring state. + // If we cannot recolor all the interferences, we will have to start again + // at this point for the next physical register. + SmallVirtRegSet SaveFixedRegisters(FixedRegisters); + if (tryRecoloringCandidates(RecoloringQueue, NewVRegs, FixedRegisters, + Depth)) { + // Do not mess up with the global assignment process. + // I.e., VirtReg must be unassigned. + Matrix->unassign(VirtReg); + return PhysReg; + } + + DEBUG(dbgs() << "Fail to assign: " << VirtReg << " to " + << PrintReg(PhysReg, TRI) << '\n'); + + // The recoloring attempt failed, undo the changes. + FixedRegisters = SaveFixedRegisters; + Matrix->unassign(VirtReg); + + for (SmallLISet::iterator It = RecoloringCandidates.begin(), + EndIt = RecoloringCandidates.end(); + It != EndIt; ++It) { + unsigned ItVirtReg = (*It)->reg; + if (VRM->hasPhys(ItVirtReg)) + Matrix->unassign(**It); + Matrix->assign(**It, VirtRegToPhysReg[ItVirtReg]); + } + } + + // Last chance recoloring did not worked either, give up. + return ~0u; +} + +/// tryRecoloringCandidates - Try to assign a new color to every register +/// in \RecoloringQueue. +/// \p NewRegs will contain any new virtual register created during the +/// recoloring process. +/// \p FixedRegisters[in/out] contains all the registers that have been +/// recolored. +/// \return true if all virtual registers in RecoloringQueue were successfully +/// recolored, false otherwise. +bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue, + SmallVectorImpl<unsigned> &NewVRegs, + SmallVirtRegSet &FixedRegisters, + unsigned Depth) { + while (!RecoloringQueue.empty()) { + LiveInterval *LI = dequeue(RecoloringQueue); + DEBUG(dbgs() << "Try to recolor: " << *LI << '\n'); + unsigned PhysReg; + PhysReg = selectOrSplitImpl(*LI, NewVRegs, FixedRegisters, Depth + 1); + if (PhysReg == ~0u || !PhysReg) + return false; + DEBUG(dbgs() << "Recoloring of " << *LI + << " succeeded with: " << PrintReg(PhysReg, TRI) << '\n'); + Matrix->assign(*LI, PhysReg); + FixedRegisters.insert(LI->reg); + } + return true; +} //===----------------------------------------------------------------------===// // Main Entry Point @@ -1766,10 +2149,122 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, SmallVectorImpl<unsigned> &NewVRegs) { + CutOffInfo = CO_None; + LLVMContext &Ctx = MF->getFunction()->getContext(); + SmallVirtRegSet FixedRegisters; + unsigned Reg = selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters); + if (Reg == ~0U && (CutOffInfo != CO_None)) { + uint8_t CutOffEncountered = CutOffInfo & (CO_Depth | CO_Interf); + if (CutOffEncountered == CO_Depth) + Ctx.emitError("register allocation failed: maximum depth for recoloring " + "reached. Use -fexhaustive-register-search to skip " + "cutoffs"); + else if (CutOffEncountered == CO_Interf) + Ctx.emitError("register allocation failed: maximum interference for " + "recoloring reached. Use -fexhaustive-register-search " + "to skip cutoffs"); + else if (CutOffEncountered == (CO_Depth | CO_Interf)) + Ctx.emitError("register allocation failed: maximum interference and " + "depth for recoloring reached. Use " + "-fexhaustive-register-search to skip cutoffs"); + } + return Reg; +} + +/// Using a CSR for the first time has a cost because it causes push|pop +/// to be added to prologue|epilogue. Splitting a cold section of the live +/// range can have lower cost than using the CSR for the first time; +/// Spilling a live range in the cold path can have lower cost than using +/// the CSR for the first time. Returns the physical register if we decide +/// to use the CSR; otherwise return 0. +unsigned RAGreedy::tryAssignCSRFirstTime(LiveInterval &VirtReg, + AllocationOrder &Order, + unsigned PhysReg, + unsigned &CostPerUseLimit, + SmallVectorImpl<unsigned> &NewVRegs) { + if (getStage(VirtReg) == RS_Spill && VirtReg.isSpillable()) { + // We choose spill over using the CSR for the first time if the spill cost + // is lower than CSRCost. + SA->analyze(&VirtReg); + if (calcSpillCost() >= CSRCost) + return PhysReg; + + // We are going to spill, set CostPerUseLimit to 1 to make sure that + // we will not use a callee-saved register in tryEvict. + CostPerUseLimit = 1; + return 0; + } + if (getStage(VirtReg) < RS_Split) { + // We choose pre-splitting over using the CSR for the first time if + // the cost of splitting is lower than CSRCost. + SA->analyze(&VirtReg); + unsigned NumCands = 0; + BlockFrequency BestCost = CSRCost; // Don't modify CSRCost. + unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost, + NumCands, true /*IgnoreCSR*/); + if (BestCand == NoCand) + // Use the CSR if we can't find a region split below CSRCost. + return PhysReg; + + // Perform the actual pre-splitting. + doRegionSplit(VirtReg, BestCand, false/*HasCompact*/, NewVRegs); + return 0; + } + return PhysReg; +} + +void RAGreedy::initializeCSRCost() { + // We use the larger one out of the command-line option and the value report + // by TRI. + CSRCost = BlockFrequency( + std::max((unsigned)CSRFirstTimeCost, TRI->getCSRFirstUseCost())); + if (!CSRCost.getFrequency()) + return; + + // Raw cost is relative to Entry == 2^14; scale it appropriately. + uint64_t ActualEntry = MBFI->getEntryFreq(); + if (!ActualEntry) { + CSRCost = 0; + return; + } + uint64_t FixedEntry = 1 << 14; + if (ActualEntry < FixedEntry) + CSRCost *= BranchProbability(ActualEntry, FixedEntry); + else if (ActualEntry <= UINT32_MAX) + // Invert the fraction and divide. + CSRCost /= BranchProbability(FixedEntry, ActualEntry); + else + // Can't use BranchProbability in general, since it takes 32-bit numbers. + CSRCost = CSRCost.getFrequency() * (ActualEntry / FixedEntry); +} + +unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, + SmallVectorImpl<unsigned> &NewVRegs, + SmallVirtRegSet &FixedRegisters, + unsigned Depth) { + unsigned CostPerUseLimit = ~0u; // First try assigning a free register. AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo); - if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) - return PhysReg; + if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) { + // We check other options if we are using a CSR for the first time. + bool CSRFirstUse = false; + if (unsigned CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg)) + if (!MRI->isPhysRegUsed(CSR)) + CSRFirstUse = true; + + // When NewVRegs is not empty, we may have made decisions such as evicting + // a virtual register, go with the earlier decisions and use the physical + // register. + if (CSRCost.getFrequency() && CSRFirstUse && NewVRegs.empty()) { + unsigned CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg, + CostPerUseLimit, NewVRegs); + if (CSRReg || !NewVRegs.empty()) + // Return now if we decide to use a CSR or create new vregs due to + // pre-splitting. + return CSRReg; + } else + return PhysReg; + } LiveRangeStage Stage = getStage(VirtReg); DEBUG(dbgs() << StageName[Stage] @@ -1779,7 +2274,7 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, // queue. The RS_Split ranges already failed to do this, and they should not // get a second chance until they have been split. if (Stage != RS_Split) - if (unsigned PhysReg = tryEvict(VirtReg, Order, NewVRegs)) + if (unsigned PhysReg = tryEvict(VirtReg, Order, NewVRegs, CostPerUseLimit)) return PhysReg; assert(NewVRegs.empty() && "Cannot append to existing NewVRegs"); @@ -1797,7 +2292,8 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, // If we couldn't allocate a register from spilling, there is probably some // invalid inline assembly. The base class wil report it. if (Stage >= RS_Done || !VirtReg.isSpillable()) - return ~0u; + return tryLastChanceRecoloring(VirtReg, Order, NewVRegs, FixedRegisters, + Depth); // Try splitting VirtReg or interferences. unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs); @@ -1823,6 +2319,14 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { << "********** Function: " << mf.getName() << '\n'); MF = &mf; + const TargetMachine &TM = MF->getTarget(); + TRI = TM.getRegisterInfo(); + TII = TM.getInstrInfo(); + RCI.runOnMachineFunction(mf); + + EnableLocalReassign = EnableLocalReassignment || + TM.getSubtargetImpl()->enableRALocalReassignment(TM.getOptLevel()); + if (VerifyEnabled) MF->verify(this, "Before greedy register allocator"); @@ -1838,6 +2342,8 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { SpillPlacer = &getAnalysis<SpillPlacement>(); DebugVars = &getAnalysis<LiveDebugVariables>(); + initializeCSRCost(); + calculateSpillWeightsAndHints(*LIS, mf, *Loops, *MBFI); DEBUG(LIS->dump()); diff --git a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp index 88c8201..8a3b53f 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp @@ -29,12 +29,9 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "regalloc" - #include "llvm/CodeGen/RegAllocPBQP.h" #include "RegisterCoalescer.h" #include "Spiller.h" -#include "llvm/ADT/OwningPtr.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" @@ -45,13 +42,11 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/PBQP/Graph.h" -#include "llvm/CodeGen/PBQP/HeuristicSolver.h" -#include "llvm/CodeGen/PBQP/Heuristics/Briggs.h" #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" @@ -63,6 +58,8 @@ using namespace llvm; +#define DEBUG_TYPE "regalloc" + static RegisterRegAlloc registerPBQPRepAlloc("pbqp", "PBQP register allocator", createDefaultPBQPRegisterAllocator); @@ -91,8 +88,8 @@ public: static char ID; /// Construct a PBQP register allocator. - RegAllocPBQP(OwningPtr<PBQPBuilder> &b, char *cPassID=0) - : MachineFunctionPass(ID), builder(b.take()), customPassID(cPassID) { + RegAllocPBQP(std::unique_ptr<PBQPBuilder> b, char *cPassID = nullptr) + : MachineFunctionPass(ID), builder(std::move(b)), customPassID(cPassID) { initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); initializeLiveStacksPass(*PassRegistry::getPassRegistry()); @@ -100,15 +97,15 @@ public: } /// Return the pass name. - virtual const char* getPassName() const { + const char* getPassName() const override { return "PBQP Register Allocator"; } /// PBQP analysis usage. - virtual void getAnalysisUsage(AnalysisUsage &au) const; + void getAnalysisUsage(AnalysisUsage &au) const override; /// Perform register allocation - virtual bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; private: @@ -120,8 +117,7 @@ private: typedef std::map<RegPair, PBQP::PBQPNum> CoalesceMap; typedef std::set<unsigned> RegSet; - - OwningPtr<PBQPBuilder> builder; + std::unique_ptr<PBQPBuilder> builder; char *customPassID; @@ -132,7 +128,7 @@ private: MachineRegisterInfo *mri; const MachineBlockFrequencyInfo *mbfi; - OwningPtr<Spiller> spiller; + std::unique_ptr<Spiller> spiller; LiveIntervals *lis; LiveStacks *lss; VirtRegMap *vrm; @@ -157,13 +153,13 @@ char RegAllocPBQP::ID = 0; } // End anonymous namespace. -unsigned PBQPRAProblem::getVRegForNode(PBQP::Graph::NodeId node) const { +unsigned PBQPRAProblem::getVRegForNode(PBQPRAGraph::NodeId node) const { Node2VReg::const_iterator vregItr = node2VReg.find(node); assert(vregItr != node2VReg.end() && "No vreg for node."); return vregItr->second; } -PBQP::Graph::NodeId PBQPRAProblem::getNodeForVReg(unsigned vreg) const { +PBQPRAGraph::NodeId PBQPRAProblem::getNodeForVReg(unsigned vreg) const { VReg2Node::const_iterator nodeItr = vreg2Node.find(vreg); assert(nodeItr != vreg2Node.end() && "No node for vreg."); return nodeItr->second; @@ -194,8 +190,8 @@ PBQPRAProblem *PBQPBuilder::build(MachineFunction *mf, const LiveIntervals *lis, MachineRegisterInfo *mri = &mf->getRegInfo(); const TargetRegisterInfo *tri = mf->getTarget().getRegisterInfo(); - OwningPtr<PBQPRAProblem> p(new PBQPRAProblem()); - PBQP::Graph &g = p->getGraph(); + std::unique_ptr<PBQPRAProblem> p(new PBQPRAProblem()); + PBQPRAGraph &g = p->getGraph(); RegSet pregs; // Collect the set of preg intervals, record that they're used in the MF. @@ -220,7 +216,7 @@ PBQPRAProblem *PBQPBuilder::build(MachineFunction *mf, const LiveIntervals *lis, // Compute an initial allowed set for the current vreg. typedef std::vector<unsigned> VRAllowed; VRAllowed vrAllowed; - ArrayRef<uint16_t> rawOrder = trc->getRawAllocationOrder(*mf); + ArrayRef<MCPhysReg> rawOrder = trc->getRawAllocationOrder(*mf); for (unsigned i = 0; i != rawOrder.size(); ++i) { unsigned preg = rawOrder[i]; if (mri->isReserved(preg)) @@ -245,17 +241,19 @@ PBQPRAProblem *PBQPBuilder::build(MachineFunction *mf, const LiveIntervals *lis, vrAllowed.push_back(preg); } - // Construct the node. - PBQP::Graph::NodeId node = - g.addNode(PBQP::Vector(vrAllowed.size() + 1, 0)); - - // Record the mapping and allowed set in the problem. - p->recordVReg(vreg, node, vrAllowed.begin(), vrAllowed.end()); + PBQP::Vector nodeCosts(vrAllowed.size() + 1, 0); PBQP::PBQPNum spillCost = (vregLI->weight != 0.0) ? vregLI->weight : std::numeric_limits<PBQP::PBQPNum>::min(); - addSpillCosts(g.getNodeCosts(node), spillCost); + addSpillCosts(nodeCosts, spillCost); + + // Construct the node. + PBQPRAGraph::NodeId nId = g.addNode(std::move(nodeCosts)); + + // Record the mapping and allowed set in the problem. + p->recordVReg(vreg, nId, vrAllowed.begin(), vrAllowed.end()); + } for (RegSet::const_iterator vr1Itr = vregs.begin(), vrEnd = vregs.end(); @@ -264,24 +262,24 @@ PBQPRAProblem *PBQPBuilder::build(MachineFunction *mf, const LiveIntervals *lis, const LiveInterval &l1 = lis->getInterval(vr1); const PBQPRAProblem::AllowedSet &vr1Allowed = p->getAllowedSet(vr1); - for (RegSet::const_iterator vr2Itr = llvm::next(vr1Itr); - vr2Itr != vrEnd; ++vr2Itr) { + for (RegSet::const_iterator vr2Itr = std::next(vr1Itr); vr2Itr != vrEnd; + ++vr2Itr) { unsigned vr2 = *vr2Itr; const LiveInterval &l2 = lis->getInterval(vr2); const PBQPRAProblem::AllowedSet &vr2Allowed = p->getAllowedSet(vr2); assert(!l2.empty() && "Empty interval in vreg set?"); if (l1.overlaps(l2)) { - PBQP::Graph::EdgeId edge = - g.addEdge(p->getNodeForVReg(vr1), p->getNodeForVReg(vr2), - PBQP::Matrix(vr1Allowed.size()+1, vr2Allowed.size()+1, 0)); + PBQP::Matrix edgeCosts(vr1Allowed.size()+1, vr2Allowed.size()+1, 0); + addInterferenceCosts(edgeCosts, vr1Allowed, vr2Allowed, tri); - addInterferenceCosts(g.getEdgeCosts(edge), vr1Allowed, vr2Allowed, tri); + g.addEdge(p->getNodeForVReg(vr1), p->getNodeForVReg(vr2), + std::move(edgeCosts)); } } } - return p.take(); + return p.release(); } void PBQPBuilder::addSpillCosts(PBQP::Vector &costVec, @@ -315,25 +313,17 @@ PBQPRAProblem *PBQPBuilderWithCoalescing::build(MachineFunction *mf, const MachineBlockFrequencyInfo *mbfi, const RegSet &vregs) { - OwningPtr<PBQPRAProblem> p(PBQPBuilder::build(mf, lis, mbfi, vregs)); - PBQP::Graph &g = p->getGraph(); + std::unique_ptr<PBQPRAProblem> p(PBQPBuilder::build(mf, lis, mbfi, vregs)); + PBQPRAGraph &g = p->getGraph(); const TargetMachine &tm = mf->getTarget(); CoalescerPair cp(*tm.getRegisterInfo()); // Scan the machine function and add a coalescing cost whenever CoalescerPair // gives the Ok. - for (MachineFunction::const_iterator mbbItr = mf->begin(), - mbbEnd = mf->end(); - mbbItr != mbbEnd; ++mbbItr) { - const MachineBasicBlock *mbb = &*mbbItr; - - for (MachineBasicBlock::const_iterator miItr = mbb->begin(), - miEnd = mbb->end(); - miItr != miEnd; ++miItr) { - const MachineInstr *mi = &*miItr; - - if (!cp.setRegisters(mi)) { + for (const auto &mbb : *mf) { + for (const auto &mi : mbb) { + if (!cp.setRegisters(&mi)) { continue; // Not coalescable. } @@ -348,8 +338,7 @@ PBQPRAProblem *PBQPBuilderWithCoalescing::build(MachineFunction *mf, // value plucked randomly out of the air. PBQP::PBQPNum cBenefit = - copyFactor * LiveIntervals::getSpillWeight(false, true, - mbfi->getBlockFreq(mbb)); + copyFactor * LiveIntervals::getSpillWeight(false, true, mbfi, &mi); if (cp.isPhys()) { if (!mf->getRegInfo().isAllocatable(dst)) { @@ -363,33 +352,37 @@ PBQPRAProblem *PBQPBuilderWithCoalescing::build(MachineFunction *mf, } if (pregOpt < allowed.size()) { ++pregOpt; // +1 to account for spill option. - PBQP::Graph::NodeId node = p->getNodeForVReg(src); - addPhysRegCoalesce(g.getNodeCosts(node), pregOpt, cBenefit); + PBQPRAGraph::NodeId node = p->getNodeForVReg(src); + llvm::dbgs() << "Reading node costs for node " << node << "\n"; + llvm::dbgs() << "Source node: " << &g.getNodeCosts(node) << "\n"; + PBQP::Vector newCosts(g.getNodeCosts(node)); + addPhysRegCoalesce(newCosts, pregOpt, cBenefit); + g.setNodeCosts(node, newCosts); } } else { const PBQPRAProblem::AllowedSet *allowed1 = &p->getAllowedSet(dst); const PBQPRAProblem::AllowedSet *allowed2 = &p->getAllowedSet(src); - PBQP::Graph::NodeId node1 = p->getNodeForVReg(dst); - PBQP::Graph::NodeId node2 = p->getNodeForVReg(src); - PBQP::Graph::EdgeId edge = g.findEdge(node1, node2); + PBQPRAGraph::NodeId node1 = p->getNodeForVReg(dst); + PBQPRAGraph::NodeId node2 = p->getNodeForVReg(src); + PBQPRAGraph::EdgeId edge = g.findEdge(node1, node2); if (edge == g.invalidEdgeId()) { - edge = g.addEdge(node1, node2, PBQP::Matrix(allowed1->size() + 1, - allowed2->size() + 1, - 0)); + PBQP::Matrix costs(allowed1->size() + 1, allowed2->size() + 1, 0); + addVirtRegCoalesce(costs, *allowed1, *allowed2, cBenefit); + g.addEdge(node1, node2, costs); } else { - if (g.getEdgeNode1(edge) == node2) { + if (g.getEdgeNode1Id(edge) == node2) { std::swap(node1, node2); std::swap(allowed1, allowed2); } + PBQP::Matrix costs(g.getEdgeCosts(edge)); + addVirtRegCoalesce(costs, *allowed1, *allowed2, cBenefit); + g.setEdgeCosts(edge, costs); } - - addVirtRegCoalesce(g.getEdgeCosts(edge), *allowed1, *allowed2, - cBenefit); } } } - return p.take(); + return p.release(); } void PBQPBuilderWithCoalescing::addPhysRegCoalesce(PBQP::Vector &costVec, @@ -472,14 +465,12 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem, // Clear the existing allocation. vrm->clearAllVirt(); - const PBQP::Graph &g = problem.getGraph(); + const PBQPRAGraph &g = problem.getGraph(); // Iterate over the nodes mapping the PBQP solution to a register // assignment. - for (PBQP::Graph::NodeItr nodeItr = g.nodesBegin(), - nodeEnd = g.nodesEnd(); - nodeItr != nodeEnd; ++nodeItr) { - unsigned vreg = problem.getVRegForNode(*nodeItr); - unsigned alloc = solution.getSelection(*nodeItr); + for (auto NId : g.nodeIds()) { + unsigned vreg = problem.getVRegForNode(NId); + unsigned alloc = solution.getSelection(NId); if (problem.isPRegOption(vreg, alloc)) { unsigned preg = problem.getPRegForOption(vreg, alloc); @@ -587,8 +578,8 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { while (!pbqpAllocComplete) { DEBUG(dbgs() << " PBQP Regalloc round " << round << ":\n"); - OwningPtr<PBQPRAProblem> problem( - builder->build(mf, lis, mbfi, vregsToAlloc)); + std::unique_ptr<PBQPRAProblem> problem( + builder->build(mf, lis, mbfi, vregsToAlloc)); #ifndef NDEBUG if (pbqpDumpGraphs) { @@ -596,7 +587,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { rs << round; std::string graphFileName(fqn + "." + rs.str() + ".pbqpgraph"); std::string tmp; - raw_fd_ostream os(graphFileName.c_str(), tmp); + raw_fd_ostream os(graphFileName.c_str(), tmp, sys::fs::F_Text); DEBUG(dbgs() << "Dumping graph for round " << round << " to \"" << graphFileName << "\"\n"); problem->getGraph().dump(os); @@ -604,8 +595,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { #endif PBQP::Solution solution = - PBQP::HeuristicSolver<PBQP::Heuristics::Briggs>::solve( - problem->getGraph()); + PBQP::RegAlloc::solve(problem->getGraph()); pbqpAllocComplete = mapPBQPToRegAlloc(*problem, solution); @@ -623,19 +613,19 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { return true; } -FunctionPass* llvm::createPBQPRegisterAllocator( - OwningPtr<PBQPBuilder> &builder, - char *customPassID) { - return new RegAllocPBQP(builder, customPassID); +FunctionPass * +llvm::createPBQPRegisterAllocator(std::unique_ptr<PBQPBuilder> builder, + char *customPassID) { + return new RegAllocPBQP(std::move(builder), customPassID); } FunctionPass* llvm::createDefaultPBQPRegisterAllocator() { - OwningPtr<PBQPBuilder> Builder; + std::unique_ptr<PBQPBuilder> Builder; if (pbqpCoalescing) - Builder.reset(new PBQPBuilderWithCoalescing()); + Builder = llvm::make_unique<PBQPBuilderWithCoalescing>(); else - Builder.reset(new PBQPBuilder()); - return createPBQPRegisterAllocator(Builder); + Builder = llvm::make_unique<PBQPBuilder>(); + return createPBQPRegisterAllocator(std::move(Builder)); } #undef DEBUG_TYPE diff --git a/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp b/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp index cacd7de..8b5445c 100644 --- a/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp +++ b/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp @@ -8,13 +8,12 @@ //===----------------------------------------------------------------------===// // // This file implements the RegisterClassInfo class which provides dynamic -// information about target register classes. Callee saved and reserved -// registers depends on calling conventions and other dynamic information, so -// some things cannot be determined statically. +// information about target register classes. Callee-saved vs. caller-saved and +// reserved registers depend on calling conventions and other dynamic +// information, so some things cannot be determined statically. // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "regalloc" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -25,12 +24,14 @@ using namespace llvm; +#define DEBUG_TYPE "regalloc" + static cl::opt<unsigned> StressRA("stress-regalloc", cl::Hidden, cl::init(0), cl::value_desc("N"), cl::desc("Limit all regclasses to N registers")); -RegisterClassInfo::RegisterClassInfo() : Tag(0), MF(0), TRI(0), CalleeSaved(0) -{} +RegisterClassInfo::RegisterClassInfo() + : Tag(0), MF(nullptr), TRI(nullptr), CalleeSaved(nullptr) {} void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) { bool Update = false; @@ -151,7 +152,7 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const { /// nonoverlapping reserved registers. However, computing the allocation order /// for all register classes would be too expensive. unsigned RegisterClassInfo::computePSetLimit(unsigned Idx) const { - const TargetRegisterClass *RC = 0; + const TargetRegisterClass *RC = nullptr; unsigned NumRCUnits = 0; for (TargetRegisterInfo::regclass_iterator RI = TRI->regclass_begin(), RE = TRI->regclass_end(); RI != RE; ++RI) { diff --git a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp index dd86c1f..e04a3cf 100644 --- a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -13,9 +13,7 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "regalloc" #include "RegisterCoalescer.h" -#include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" @@ -43,6 +41,8 @@ #include <cmath> using namespace llvm; +#define DEBUG_TYPE "regalloc" + STATISTIC(numJoins , "Number of interval joins performed"); STATISTIC(numCrossRCs , "Number of cross class joins performed"); STATISTIC(numCommutes , "Number of instruction commuting performed"); @@ -112,7 +112,7 @@ namespace { void eliminateDeadDefs(); /// LiveRangeEdit callback. - void LRE_WillEraseInstruction(MachineInstr *MI); + void LRE_WillEraseInstruction(MachineInstr *MI) override; /// coalesceLocals - coalesce the LocalWorkList. void coalesceLocals(); @@ -188,15 +188,15 @@ namespace { initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry()); } - virtual void getAnalysisUsage(AnalysisUsage &AU) const; + void getAnalysisUsage(AnalysisUsage &AU) const override; - virtual void releaseMemory(); + void releaseMemory() override; /// runOnMachineFunction - pass entry point - virtual bool runOnMachineFunction(MachineFunction&); + bool runOnMachineFunction(MachineFunction&) override; /// print - Implement the dump method. - virtual void print(raw_ostream &O, const Module* = 0) const; + void print(raw_ostream &O, const Module* = nullptr) const override; }; } /// end anonymous namespace @@ -241,9 +241,8 @@ static bool isSplitEdge(const MachineBasicBlock *MBB) { if (MBB->pred_size() != 1 || MBB->succ_size() != 1) return false; - for (MachineBasicBlock::const_iterator MII = MBB->begin(), E = MBB->end(); - MII != E; ++MII) { - if (!MII->isCopyLike() && !MII->isUnconditionalBranch()) + for (const auto &MI : *MBB) { + if (!MI.isCopyLike() && !MI.isUnconditionalBranch()) return false; } return true; @@ -252,7 +251,7 @@ static bool isSplitEdge(const MachineBasicBlock *MBB) { bool CoalescerPair::setRegisters(const MachineInstr *MI) { SrcReg = DstReg = 0; SrcIdx = DstIdx = 0; - NewRC = 0; + NewRC = nullptr; Flipped = CrossClass = false; unsigned Src, Dst, SrcSub, DstSub; @@ -283,7 +282,6 @@ bool CoalescerPair::setRegisters(const MachineInstr *MI) { if (SrcSub) { Dst = TRI.getMatchingSuperReg(Dst, SrcSub, MRI.getRegClass(Src)); if (!Dst) return false; - SrcSub = 0; } else if (!MRI.getRegClass(Src)->contains(Dst)) { return false; } @@ -399,7 +397,8 @@ void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const { void RegisterCoalescer::eliminateDeadDefs() { SmallVector<unsigned, 8> NewRegs; - LiveRangeEdit(0, NewRegs, *MF, *LIS, 0, this).eliminateDeadDefs(DeadDefs); + LiveRangeEdit(nullptr, NewRegs, *MF, *LIS, + nullptr, this).eliminateDeadDefs(DeadDefs); } // Callback from eliminateDeadDefs(). @@ -622,16 +621,15 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, // If some of the uses of IntA.reg is already coalesced away, return false. // It's not possible to determine whether it's safe to perform the coalescing. - for (MachineRegisterInfo::use_nodbg_iterator UI = - MRI->use_nodbg_begin(IntA.reg), - UE = MRI->use_nodbg_end(); UI != UE; ++UI) { - MachineInstr *UseMI = &*UI; + for (MachineOperand &MO : MRI->use_nodbg_operands(IntA.reg)) { + MachineInstr *UseMI = MO.getParent(); + unsigned OpNo = &MO - &UseMI->getOperand(0); SlotIndex UseIdx = LIS->getInstructionIndex(UseMI); LiveInterval::iterator US = IntA.FindSegmentContaining(UseIdx); if (US == IntA.end() || US->valno != AValNo) continue; // If this use is tied to a def, we can't rewrite the register. - if (UseMI->isRegTiedToDefOperand(UI.getOperandNo())) + if (UseMI->isRegTiedToDefOperand(OpNo)) return false; } @@ -669,8 +667,8 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, // Update uses of IntA of the specific Val# with IntB. for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(IntA.reg), UE = MRI->use_end(); UI != UE;) { - MachineOperand &UseMO = UI.getOperand(); - MachineInstr *UseMI = &*UI; + MachineOperand &UseMO = *UI; + MachineInstr *UseMI = UseMO.getParent(); ++UI; if (UseMI->isDebugValue()) { // FIXME These don't have an instruction index. Not clear we have enough @@ -769,6 +767,14 @@ bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP, if (DstOperand.getSubReg() && !DstOperand.isUndef()) return false; + // If both SrcIdx and DstIdx are set, correct rematerialization would widen + // the register substantially (beyond both source and dest size). This is bad + // for performance since it can cascade through a function, introducing many + // extra spills and fills (e.g. ARM can easily end up copying QQQQPR registers + // around after a few subreg copies). + if (SrcIdx && DstIdx) + return false; + const TargetRegisterClass *DefRC = TII->getRegClass(MCID, 0, TRI, *MF); if (!DefMI->isImplicitDef()) { if (TargetRegisterInfo::isPhysicalRegister(DstReg)) { @@ -793,9 +799,9 @@ bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP, MachineBasicBlock *MBB = CopyMI->getParent(); MachineBasicBlock::iterator MII = - llvm::next(MachineBasicBlock::iterator(CopyMI)); + std::next(MachineBasicBlock::iterator(CopyMI)); TII->reMaterialize(*MBB, MII, DstReg, SrcIdx, DefMI, *TRI); - MachineInstr *NewMI = prior(MII); + MachineInstr *NewMI = std::prev(MII); LIS->ReplaceMachineInstrInMaps(CopyMI, NewMI); CopyMI->eraseFromParent(); @@ -816,31 +822,19 @@ bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP, } if (TargetRegisterInfo::isVirtualRegister(DstReg)) { + const TargetRegisterClass *NewRC = CP.getNewRC(); unsigned NewIdx = NewMI->getOperand(0).getSubReg(); - const TargetRegisterClass *RCForInst; + if (NewIdx) - RCForInst = TRI->getMatchingSuperRegClass(MRI->getRegClass(DstReg), DefRC, - NewIdx); - - if (MRI->constrainRegClass(DstReg, DefRC)) { - // The materialized instruction is quite capable of setting DstReg - // directly, but it may still have a now-trivial subregister index which - // we should clear. - NewMI->getOperand(0).setSubReg(0); - } else if (NewIdx && RCForInst) { - // The subreg index on NewMI is essential; we still have to make sure - // DstReg:idx is in a class that NewMI can use. - MRI->constrainRegClass(DstReg, RCForInst); - } else { - // DstReg is actually incompatible with NewMI, we have to move to a - // super-reg's class. This could come from a sequence like: - // GR32 = MOV32r0 - // GR8 = COPY GR32:sub_8 - MRI->setRegClass(DstReg, CP.getNewRC()); - updateRegDefsUses(DstReg, DstReg, DstIdx); - NewMI->getOperand(0).setSubReg( - TRI->composeSubRegIndices(SrcIdx, DefMI->getOperand(0).getSubReg())); - } + NewRC = TRI->getMatchingSuperRegClass(NewRC, DefRC, NewIdx); + else + NewRC = TRI->getCommonSubClass(NewRC, DefRC); + + assert(NewRC && "subreg chosen for remat incompatible with instruction"); + MRI->setRegClass(DstReg, NewRC); + + updateRegDefsUses(DstReg, DstReg, DstIdx); + NewMI->getOperand(0).setSubReg(NewIdx); } else if (NewMI->getOperand(0).getReg() != CopyDstReg) { // The New instruction may be defining a sub-register of what's actually // been asked for. If so it must implicitly define the whole thing. @@ -851,6 +845,27 @@ bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP, true /*IsDef*/, true /*IsImp*/, false /*IsKill*/)); + // Record small dead def live-ranges for all the subregisters + // of the destination register. + // Otherwise, variables that live through may miss some + // interferences, thus creating invalid allocation. + // E.g., i386 code: + // vreg1 = somedef ; vreg1 GR8 + // vreg2 = remat ; vreg2 GR32 + // CL = COPY vreg2.sub_8bit + // = somedef vreg1 ; vreg1 GR8 + // => + // vreg1 = somedef ; vreg1 GR8 + // ECX<def, dead> = remat ; CL<imp-def> + // = somedef vreg1 ; vreg1 GR8 + // vreg1 will see the inteferences with CL but not with CH since + // no live-ranges would have been created for ECX. + // Fix that! + SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI); + for (MCRegUnitIterator Units(NewMI->getOperand(0).getReg(), TRI); + Units.isValid(); ++Units) + if (LiveRange *LR = LIS->getCachedRegUnit(*Units)) + LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator()); } if (NewMI->getOperand(0).getSubReg()) @@ -909,17 +924,14 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI, // No intervals are live-in to CopyMI - it is undef. if (CP.isFlipped()) DstInt = SrcInt; - SrcInt = 0; + SrcInt = nullptr; VNInfo *DeadVNI = DstInt->getVNInfoAt(Idx.getRegSlot()); assert(DeadVNI && "No value defined in DstInt"); DstInt->removeValNo(DeadVNI); // Find new undef uses. - for (MachineRegisterInfo::reg_nodbg_iterator - I = MRI->reg_nodbg_begin(DstInt->reg), E = MRI->reg_nodbg_end(); - I != E; ++I) { - MachineOperand &MO = I.getOperand(); + for (MachineOperand &MO : MRI->reg_nodbg_operands(DstInt->reg)) { if (MO.isDef() || MO.isUndef()) continue; MachineInstr *MI = MO.getParent(); @@ -941,11 +953,14 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx) { bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); - LiveInterval *DstInt = DstIsPhys ? 0 : &LIS->getInterval(DstReg); + LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval(DstReg); SmallPtrSet<MachineInstr*, 8> Visited; - for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(SrcReg); - MachineInstr *UseMI = I.skipInstruction();) { + for (MachineRegisterInfo::reg_instr_iterator + I = MRI->reg_instr_begin(SrcReg), E = MRI->reg_instr_end(); + I != E; ) { + MachineInstr *UseMI = &*(I++); + // Each instruction can only be rewritten once because sub-register // composition is not always idempotent. When SrcReg != DstReg, rewriting // the UseMI operands removes them from the SrcReg use-def chain, but when @@ -956,7 +971,7 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg, SmallVector<unsigned,8> Ops; bool Reads, Writes; - tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops); + std::tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops); // If SrcReg wasn't read, it may still be the case that DstReg is live-in // because SrcReg is a sub-register. @@ -1022,6 +1037,22 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { return false; } + if (CP.getNewRC()) { + auto SrcRC = MRI->getRegClass(CP.getSrcReg()); + auto DstRC = MRI->getRegClass(CP.getDstReg()); + unsigned SrcIdx = CP.getSrcIdx(); + unsigned DstIdx = CP.getDstIdx(); + if (CP.isFlipped()) { + std::swap(SrcIdx, DstIdx); + std::swap(SrcRC, DstRC); + } + if (!TRI->shouldCoalesce(CopyMI, SrcRC, SrcIdx, DstRC, DstIdx, + CP.getNewRC())) { + DEBUG(dbgs() << "\tSubtarget bailed on coalescing.\n"); + return false; + } + } + // Dead code elimination. This really should be handled by MachineDCE, but // sometimes dead copies slip through, and we can't generate invalid live // ranges. @@ -1362,7 +1393,7 @@ class JoinVals { bool PrunedComputed; Val() : Resolution(CR_Keep), WriteLanes(0), ValidLanes(0), - RedefVNI(0), OtherVNI(0), ErasableImplicitDef(false), + RedefVNI(nullptr), OtherVNI(nullptr), ErasableImplicitDef(false), Pruned(false), PrunedComputed(false) {} bool isAnalyzed() const { return WriteLanes != 0; } @@ -1468,7 +1499,7 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { } // Get the instruction defining this value, compute the lanes written. - const MachineInstr *DefMI = 0; + const MachineInstr *DefMI = nullptr; if (VNI->isPHIDef()) { // Conservatively assume that all lanes in a PHI are valid. V.ValidLanes = V.WriteLanes = TRI->getSubRegIndexLaneMask(SubIdx); @@ -2092,14 +2123,14 @@ copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList) { // Skip instruction pointers that have already been erased, for example by // dead code elimination. if (ErasedInstrs.erase(CurrList[i])) { - CurrList[i] = 0; + CurrList[i] = nullptr; continue; } bool Again = false; bool Success = joinCopy(CurrList[i], Again); Progress |= Success; if (Success || !Again) - CurrList[i] = 0; + CurrList[i] = nullptr; } return Progress; } @@ -2139,7 +2170,7 @@ RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) { CurrList(WorkList.begin() + PrevSize, WorkList.end()); if (copyCoalesceWorkList(CurrList)) WorkList.erase(std::remove(WorkList.begin() + PrevSize, WorkList.end(), - (MachineInstr*)0), WorkList.end()); + (MachineInstr*)nullptr), WorkList.end()); } void RegisterCoalescer::coalesceLocals() { diff --git a/contrib/llvm/lib/CodeGen/RegisterCoalescer.h b/contrib/llvm/lib/CodeGen/RegisterCoalescer.h index 47c3df1..e57ceab 100644 --- a/contrib/llvm/lib/CodeGen/RegisterCoalescer.h +++ b/contrib/llvm/lib/CodeGen/RegisterCoalescer.h @@ -61,14 +61,14 @@ namespace llvm { public: CoalescerPair(const TargetRegisterInfo &tri) : TRI(tri), DstReg(0), SrcReg(0), DstIdx(0), SrcIdx(0), - Partial(false), CrossClass(false), Flipped(false), NewRC(0) {} + Partial(false), CrossClass(false), Flipped(false), NewRC(nullptr) {} /// Create a CoalescerPair representing a virtreg-to-physreg copy. /// No need to call setRegisters(). CoalescerPair(unsigned VirtReg, unsigned PhysReg, const TargetRegisterInfo &tri) : TRI(tri), DstReg(PhysReg), SrcReg(VirtReg), DstIdx(0), SrcIdx(0), - Partial(false), CrossClass(false), Flipped(false), NewRC(0) {} + Partial(false), CrossClass(false), Flipped(false), NewRC(nullptr) {} /// setRegisters - set registers to match the copy instruction MI. Return /// false if MI is not a coalescable copy instruction. diff --git a/contrib/llvm/lib/CodeGen/RegisterPressure.cpp b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp index 092ecdd..617e459 100644 --- a/contrib/llvm/lib/CodeGen/RegisterPressure.cpp +++ b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp @@ -41,7 +41,7 @@ static void decreaseSetPressure(std::vector<unsigned> &CurrSetPressure, } } -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void llvm::dumpRegSetPressure(ArrayRef<unsigned> SetPressure, const TargetRegisterInfo *TRI) { bool Empty = true; @@ -55,6 +55,7 @@ void llvm::dumpRegSetPressure(ArrayRef<unsigned> SetPressure, dbgs() << "\n"; } +LLVM_DUMP_METHOD void RegisterPressure::dump(const TargetRegisterInfo *TRI) const { dbgs() << "Max Pressure: "; dumpRegSetPressure(MaxSetPressure, TRI); @@ -68,6 +69,7 @@ void RegisterPressure::dump(const TargetRegisterInfo *TRI) const { dbgs() << '\n'; } +LLVM_DUMP_METHOD void RegPressureTracker::dump() const { if (!isTopClosed() || !isBottomClosed()) { dbgs() << "Curr Pressure: "; @@ -75,7 +77,6 @@ void RegPressureTracker::dump() const { } P.dump(TRI); } -#endif /// Increase the current pressure as impacted by these registers and bump /// the high water mark if needed. @@ -154,8 +155,8 @@ const LiveRange *RegPressureTracker::getLiveRange(unsigned Reg) const { } void RegPressureTracker::reset() { - MBB = 0; - LIS = 0; + MBB = nullptr; + LIS = nullptr; CurrSetPressure.clear(); LiveThruPressure.clear(); @@ -506,7 +507,13 @@ bool RegPressureTracker::recede(SmallVectorImpl<unsigned> *LiveUses, DeadDef = LRQ.isDeadDef(); } } - if (!DeadDef) { + if (DeadDef) { + // LiveIntervals knows this is a dead even though it's MachineOperand is + // not flagged as such. Since this register will not be recorded as + // live-out, increase its PDiff value to avoid underflowing pressure. + if (PDiff) + PDiff->addPressureChange(Reg, false, MRI); + } else { if (LiveRegs.erase(Reg)) decreaseRegPressure(Reg); else @@ -876,9 +883,9 @@ static bool findUseBetween(unsigned Reg, SlotIndex PriorUseIdx, SlotIndex NextUseIdx, const MachineRegisterInfo *MRI, const LiveIntervals *LIS) { - for (MachineRegisterInfo::use_nodbg_iterator - UI = MRI->use_nodbg_begin(Reg), UE = MRI->use_nodbg_end(); - UI != UE; UI.skipInstruction()) { + for (MachineRegisterInfo::use_instr_nodbg_iterator + UI = MRI->use_instr_nodbg_begin(Reg), + UE = MRI->use_instr_nodbg_end(); UI != UE; ++UI) { const MachineInstr* MI = &*UI; if (MI->isDebugValue()) continue; diff --git a/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp index 75ebdaa..72b6285 100644 --- a/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp +++ b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp @@ -14,7 +14,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "reg-scavenging" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -29,6 +28,8 @@ #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; +#define DEBUG_TYPE "reg-scavenging" + /// setUsed - Set the register and its sub-registers as being used. void RegScavenger::setUsed(unsigned Reg) { for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); @@ -47,7 +48,7 @@ void RegScavenger::initRegState() { for (SmallVectorImpl<ScavengedInfo>::iterator I = Scavenged.begin(), IE = Scavenged.end(); I != IE; ++I) { I->Reg = 0; - I->Restore = NULL; + I->Restore = nullptr; } // All registers started out unused. @@ -91,8 +92,8 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) { // Create callee-saved registers bitvector. CalleeSavedRegs.resize(NumPhysRegs); - const uint16_t *CSRegs = TRI->getCalleeSavedRegs(&MF); - if (CSRegs != NULL) + const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF); + if (CSRegs != nullptr) for (unsigned i = 0; CSRegs[i]; ++i) CalleeSavedRegs.set(CSRegs[i]); } @@ -162,7 +163,7 @@ void RegScavenger::unprocess() { } if (MBBI == MBB->begin()) { - MBBI = MachineBasicBlock::iterator(NULL); + MBBI = MachineBasicBlock::iterator(nullptr); Tracking = false; } else --MBBI; @@ -175,7 +176,7 @@ void RegScavenger::forward() { Tracking = true; } else { assert(MBBI != MBB->end() && "Already past the end of the basic block!"); - MBBI = llvm::next(MBBI); + MBBI = std::next(MBBI); } assert(MBBI != MBB->end() && "Already at the end of the basic block!"); @@ -187,7 +188,7 @@ void RegScavenger::forward() { continue; I->Reg = 0; - I->Restore = NULL; + I->Restore = nullptr; } if (MI->isDebugValue()) @@ -223,7 +224,7 @@ void RegScavenger::forward() { break; } if (!SubUsed) { - MBB->getParent()->verify(NULL, "In Register Scavenger"); + MBB->getParent()->verify(nullptr, "In Register Scavenger"); llvm_unreachable("Using an undefined register!"); } (void)SubUsed; @@ -415,7 +416,7 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, "Cannot scavenge register without an emergency spill slot!"); TII->storeRegToStackSlot(*MBB, I, SReg, true, Scavenged[SI].FrameIndex, RC, TRI); - MachineBasicBlock::iterator II = prior(I); + MachineBasicBlock::iterator II = std::prev(I); unsigned FIOperandNum = getFrameIndexOperandNum(II); TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this); @@ -423,13 +424,13 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, // Restore the scavenged register before its use (or first terminator). TII->loadRegFromStackSlot(*MBB, UseMI, SReg, Scavenged[SI].FrameIndex, RC, TRI); - II = prior(UseMI); + II = std::prev(UseMI); FIOperandNum = getFrameIndexOperandNum(II); TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this); } - Scavenged[SI].Restore = prior(UseMI); + Scavenged[SI].Restore = std::prev(UseMI); // Doing this here leads to infinite regress. // Scavenged[SI].Reg = SReg; diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp index 75e3790..6a2a080 100644 --- a/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp +++ b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "pre-RA-sched" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/CodeGen/SelectionDAGNodes.h" @@ -25,6 +24,8 @@ #include <climits> using namespace llvm; +#define DEBUG_TYPE "pre-RA-sched" + #ifndef NDEBUG static cl::opt<bool> StressSchedOpt( "stress-sched", cl::Hidden, cl::init(false), @@ -55,7 +56,7 @@ void ScheduleDAG::clearDAG() { /// getInstrDesc helper to handle SDNodes. const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const { - if (!Node || !Node->isMachineOpcode()) return NULL; + if (!Node || !Node->isMachineOpcode()) return nullptr; return &TII->get(Node->getMachineOpcode()); } @@ -63,7 +64,7 @@ const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const { /// not already. It also adds the current node as a successor of the /// specified node. bool SUnit::addPred(const SDep &D, bool Required) { - // If this node already has this depenence, don't add a redundant one. + // If this node already has this dependence, don't add a redundant one. for (SmallVectorImpl<SDep>::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) { // Zero-latency weak edges may be added purely for heuristic ordering. Don't @@ -301,8 +302,8 @@ void SUnit::biasCriticalPath() { SUnit::pred_iterator BestI = Preds.begin(); unsigned MaxDepth = BestI->getSUnit()->getDepth(); - for (SUnit::pred_iterator - I = llvm::next(BestI), E = Preds.end(); I != E; ++I) { + for (SUnit::pred_iterator I = std::next(BestI), E = Preds.end(); I != E; + ++I) { if (I->getKind() == SDep::Data && I->getSUnit()->getDepth() > MaxDepth) BestI = I; } diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp index 7f1f9c4..0f8b21c 100644 --- a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "misched" #include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallPtrSet.h" @@ -21,6 +20,7 @@ #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" @@ -40,17 +40,24 @@ using namespace llvm; +#define DEBUG_TYPE "misched" + static cl::opt<bool> EnableAASchedMI("enable-aa-sched-mi", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Enable use of AA during MI GAD construction")); +static cl::opt<bool> UseTBAA("use-tbaa-in-sched-mi", cl::Hidden, + cl::init(true), cl::desc("Enable use of TBAA during MI GAD construction")); + ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, const MachineLoopInfo &mli, const MachineDominatorTree &mdt, bool IsPostRAFlag, + bool RemoveKillFlags, LiveIntervals *lis) : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()), LIS(lis), - IsPostRA(IsPostRAFlag), CanHandleTerminators(false), FirstDbgValue(0) { + IsPostRA(IsPostRAFlag), RemoveKillFlags(RemoveKillFlags), + CanHandleTerminators(false), FirstDbgValue(nullptr) { assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals"); DbgValues.clear(); assert(!(IsPostRA && MRI.getNumVirtRegs()) && @@ -92,7 +99,7 @@ static const Value *getUnderlyingObjectFromInt(const Value *V) { /// and adds support for basic ptrtoint+arithmetic+inttoptr sequences. static void getUnderlyingObjects(const Value *V, SmallVectorImpl<Value *> &Objects) { - SmallPtrSet<const Value*, 16> Visited; + SmallPtrSet<const Value *, 16> Visited; SmallVector<const Value *, 4> Working(1, V); do { V = Working.pop_back_val(); @@ -118,7 +125,8 @@ static void getUnderlyingObjects(const Value *V, } while (!Working.empty()); } -typedef SmallVector<PointerIntPair<const Value *, 1, bool>, 4> +typedef PointerUnion<const Value *, const PseudoSourceValue *> ValueType; +typedef SmallVector<PointerIntPair<ValueType, 1, bool>, 4> UnderlyingObjectsVector; /// getUnderlyingObjectsForInstr - If this machine instr has memory reference @@ -128,10 +136,23 @@ static void getUnderlyingObjectsForInstr(const MachineInstr *MI, const MachineFrameInfo *MFI, UnderlyingObjectsVector &Objects) { if (!MI->hasOneMemOperand() || - !(*MI->memoperands_begin())->getValue() || + (!(*MI->memoperands_begin())->getValue() && + !(*MI->memoperands_begin())->getPseudoValue()) || (*MI->memoperands_begin())->isVolatile()) return; + if (const PseudoSourceValue *PSV = + (*MI->memoperands_begin())->getPseudoValue()) { + // For now, ignore PseudoSourceValues which may alias LLVM IR values + // because the code that uses this function has no way to cope with + // such aliases. + if (!PSV->isAliased(MFI)) { + bool MayAlias = PSV->mayAlias(MFI); + Objects.push_back(UnderlyingObjectsVector::value_type(PSV, MayAlias)); + } + return; + } + const Value *V = (*MI->memoperands_begin())->getValue(); if (!V) return; @@ -141,26 +162,14 @@ static void getUnderlyingObjectsForInstr(const MachineInstr *MI, for (SmallVectorImpl<Value *>::iterator I = Objs.begin(), IE = Objs.end(); I != IE; ++I) { - bool MayAlias = true; V = *I; - if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) { - // For now, ignore PseudoSourceValues which may alias LLVM IR values - // because the code that uses this function has no way to cope with - // such aliases. - - if (PSV->isAliased(MFI)) { - Objects.clear(); - return; - } - - MayAlias = PSV->mayAlias(MFI); - } else if (!isIdentifiedObject(V)) { + if (!isIdentifiedObject(V)) { Objects.clear(); return; } - Objects.push_back(UnderlyingObjectsVector::value_type(V, MayAlias)); + Objects.push_back(UnderlyingObjectsVector::value_type(V, true)); } } @@ -170,7 +179,7 @@ void ScheduleDAGInstrs::startBlock(MachineBasicBlock *bb) { void ScheduleDAGInstrs::finishBlock() { // Subclasses should no longer refer to the old block. - BB = 0; + BB = nullptr; } /// Initialize the DAG and common scheduler state for the current scheduling @@ -202,7 +211,7 @@ void ScheduleDAGInstrs::exitRegion() { /// are too high to be hidden by the branch or when the liveout registers /// used by instructions in the fallthrough block. void ScheduleDAGInstrs::addSchedBarrierDeps() { - MachineInstr *ExitMI = RegionEnd != BB->end() ? &*RegionEnd : 0; + MachineInstr *ExitMI = RegionEnd != BB->end() ? &*RegionEnd : nullptr; ExitSU.setInstr(ExitMI); bool AllDepKnown = ExitMI && (ExitMI->isCall() || ExitMI->isBarrier()); @@ -259,7 +268,7 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) { // Adjust the dependence latency using operand def/use information, // then allow the target to perform its own adjustments. int UseOp = I->OpIdx; - MachineInstr *RegUse = 0; + MachineInstr *RegUse = nullptr; SDep Dep; if (UseOp < 0) Dep = SDep(SU, SDep::Artificial); @@ -284,8 +293,8 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) { /// this SUnit to following instructions in the same scheduling region that /// depend the physical register referenced at OperIdx. void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { - const MachineInstr *MI = SU->getInstr(); - const MachineOperand &MO = MI->getOperand(OperIdx); + MachineInstr *MI = SU->getInstr(); + MachineOperand &MO = MI->getOperand(OperIdx); // Optionally add output and anti dependencies. For anti // dependencies we use a latency of 0 because for a multi-issue @@ -323,6 +332,8 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { // retrieve the existing SUnits list for this register's uses. // Push this SUnit on the use list. Uses.insert(PhysRegSUOper(SU, OperIdx, MO.getReg())); + if (RemoveKillFlags) + MO.setIsKill(false); } else { addPhysRegDataDeps(SU, OperIdx); @@ -468,6 +479,15 @@ static inline bool isUnsafeMemoryObject(MachineInstr *MI, if ((*MI->memoperands_begin())->isVolatile() || MI->hasUnmodeledSideEffects()) return true; + + if ((*MI->memoperands_begin())->getPseudoValue()) { + // Similarly to getUnderlyingObjectForInstr: + // For now, ignore PseudoSourceValues which may alias LLVM IR values + // because the code that uses this function has no way to cope with + // such aliases. + return true; + } + const Value *V = (*MI->memoperands_begin())->getValue(); if (!V) return true; @@ -476,19 +496,8 @@ static inline bool isUnsafeMemoryObject(MachineInstr *MI, getUnderlyingObjects(V, Objs); for (SmallVectorImpl<Value *>::iterator I = Objs.begin(), IE = Objs.end(); I != IE; ++I) { - V = *I; - - if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) { - // Similarly to getUnderlyingObjectForInstr: - // For now, ignore PseudoSourceValues which may alias LLVM IR values - // because the code that uses this function has no way to cope with - // such aliases. - if (PSV->isAliased(MFI)) - return true; - } - // Does this pointer refer to a distinct and identifiable object? - if (!isIdentifiedObject(V)) + if (!isIdentifiedObject(*I)) return true; } @@ -507,6 +516,10 @@ static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI, if (MIa == MIb) return false; + // FIXME: Need to handle multiple memory operands to support all targets. + if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand()) + return true; + if (isUnsafeMemoryObject(MIa, MFI) || isUnsafeMemoryObject(MIb, MFI)) return true; @@ -522,9 +535,8 @@ static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI, MachineMemOperand *MMOa = *MIa->memoperands_begin(); MachineMemOperand *MMOb = *MIb->memoperands_begin(); - // FIXME: Need to handle multiple memory operands to support all targets. - if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand()) - llvm_unreachable("Multiple memory operands."); + if (!MMOa->getValue() || !MMOb->getValue()) + return true; // The following interface to AA is fashioned after DAGCombiner::isAlias // and operates with MachineMemOperand offset with some important @@ -550,10 +562,10 @@ static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI, int64_t Overlapb = MMOb->getSize() + MMOb->getOffset() - MinOffset; AliasAnalysis::AliasResult AAResult = AA->alias( - AliasAnalysis::Location(MMOa->getValue(), Overlapa, - MMOa->getTBAAInfo()), - AliasAnalysis::Location(MMOb->getValue(), Overlapb, - MMOb->getTBAAInfo())); + AliasAnalysis::Location(MMOa->getValue(), Overlapa, + UseTBAA ? MMOa->getTBAAInfo() : nullptr), + AliasAnalysis::Location(MMOb->getValue(), Overlapb, + UseTBAA ? MMOb->getTBAAInfo() : nullptr)); return (AAResult != AliasAnalysis::NoAlias); } @@ -687,10 +699,36 @@ void ScheduleDAGInstrs::initSUnits() { // Assign the Latency field of SU using target-provided information. SU->Latency = SchedModel.computeInstrLatency(SU->getInstr()); + + // If this SUnit uses a reserved or unbuffered resource, mark it as such. + // + // Reserved resources block an instruction from issuing and stall the + // entire pipeline. These are identified by BufferSize=0. + // + // Unbuffered resources prevent execution of subsequent instructions that + // require the same resources. This is used for in-order execution pipelines + // within an out-of-order core. These are identified by BufferSize=1. + if (SchedModel.hasInstrSchedModel()) { + const MCSchedClassDesc *SC = getSchedClass(SU); + for (TargetSchedModel::ProcResIter + PI = SchedModel.getWriteProcResBegin(SC), + PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) { + switch (SchedModel.getProcResource(PI->ProcResourceIdx)->BufferSize) { + case 0: + SU->hasReservedResource = true; + break; + case 1: + SU->isUnbuffered = true; + break; + default: + break; + } + } + } } } -/// If RegPressure is non null, compute register pressure as a side effect. The +/// If RegPressure is non-null, compute register pressure as a side effect. The /// DAG builder is an efficient place to do it because it already visits /// operands. void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, @@ -699,7 +737,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); bool UseAA = EnableAASchedMI.getNumOccurrences() > 0 ? EnableAASchedMI : ST.useAA(); - AliasAnalysis *AAForDep = UseAA ? AA : 0; + AliasAnalysis *AAForDep = UseAA ? AA : nullptr; MISUnitMap.clear(); ScheduleDAG::clearDAG(); @@ -714,20 +752,20 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, // to top. // Remember where a generic side-effecting instruction is as we procede. - SUnit *BarrierChain = 0, *AliasChain = 0; + SUnit *BarrierChain = nullptr, *AliasChain = nullptr; // Memory references to specific known memory locations are tracked // so that they can be given more precise dependencies. We track // separately the known memory locations that may alias and those // that are known not to alias - MapVector<const Value *, SUnit *> AliasMemDefs, NonAliasMemDefs; - MapVector<const Value *, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses; + MapVector<ValueType, std::vector<SUnit *> > AliasMemDefs, NonAliasMemDefs; + MapVector<ValueType, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses; std::set<SUnit*> RejectMemNodes; // Remove any stale debug info; sometimes BuildSchedGraph is called again // without emitting the info from the previous call. DbgValues.clear(); - FirstDbgValue = NULL; + FirstDbgValue = nullptr; assert(Defs.empty() && Uses.empty() && "Only BuildGraph should update Defs/Uses"); @@ -744,13 +782,13 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, addSchedBarrierDeps(); // Walk the list of instructions, from bottom moving up. - MachineInstr *DbgMI = NULL; + MachineInstr *DbgMI = nullptr; for (MachineBasicBlock::iterator MII = RegionEnd, MIE = RegionBegin; MII != MIE; --MII) { - MachineInstr *MI = prior(MII); + MachineInstr *MI = std::prev(MII); if (MI && DbgMI) { DbgValues.push_back(std::make_pair(DbgMI, MI)); - DbgMI = NULL; + DbgMI = nullptr; } if (MI->isDebugValue()) { @@ -761,13 +799,15 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, assert(SU && "No SUnit mapped to this MI"); if (RPTracker) { - PressureDiff *PDiff = PDiffs ? &(*PDiffs)[SU->NodeNum] : 0; - RPTracker->recede(/*LiveUses=*/0, PDiff); - assert(RPTracker->getPos() == prior(MII) && "RPTracker can't find MI"); + PressureDiff *PDiff = PDiffs ? &(*PDiffs)[SU->NodeNum] : nullptr; + RPTracker->recede(/*LiveUses=*/nullptr, PDiff); + assert(RPTracker->getPos() == std::prev(MII) && + "RPTracker can't find MI"); } - assert((CanHandleTerminators || (!MI->isTerminator() && !MI->isLabel())) && - "Cannot schedule terminators or labels!"); + assert( + (CanHandleTerminators || (!MI->isTerminator() && !MI->isPosition())) && + "Cannot schedule terminators or labels!"); // Add register-based dependencies (data, anti, and output). bool HasVRegDef = false; @@ -815,11 +855,13 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, if (isGlobalMemoryObject(AA, MI)) { // Be conservative with these and add dependencies on all memory // references, even those that are known to not alias. - for (MapVector<const Value *, SUnit *>::iterator I = + for (MapVector<ValueType, std::vector<SUnit *> >::iterator I = NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) { - I->second->addPred(SDep(SU, SDep::Barrier)); + for (unsigned i = 0, e = I->second.size(); i != e; ++i) { + I->second[i]->addPred(SDep(SU, SDep::Barrier)); + } } - for (MapVector<const Value *, std::vector<SUnit *> >::iterator I = + for (MapVector<ValueType, std::vector<SUnit *> >::iterator I = NonAliasMemUses.begin(), E = NonAliasMemUses.end(); I != E; ++I) { for (unsigned i = 0, e = I->second.size(); i != e; ++i) { SDep Dep(SU, SDep::Barrier); @@ -853,10 +895,12 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) addChainDependency(AAForDep, MFI, SU, PendingLoads[k], RejectMemNodes, TrueMemOrderLatency); - for (MapVector<const Value *, SUnit *>::iterator I = AliasMemDefs.begin(), - E = AliasMemDefs.end(); I != E; ++I) - addChainDependency(AAForDep, MFI, SU, I->second, RejectMemNodes); - for (MapVector<const Value *, std::vector<SUnit *> >::iterator I = + for (MapVector<ValueType, std::vector<SUnit *> >::iterator I = + AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) { + for (unsigned i = 0, e = I->second.size(); i != e; ++i) + addChainDependency(AAForDep, MFI, SU, I->second[i], RejectMemNodes); + } + for (MapVector<ValueType, std::vector<SUnit *> >::iterator I = AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) { for (unsigned i = 0, e = I->second.size(); i != e; ++i) addChainDependency(AAForDep, MFI, SU, I->second[i], RejectMemNodes, @@ -879,7 +923,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, bool MayAlias = false; for (UnderlyingObjectsVector::iterator K = Objs.begin(), KE = Objs.end(); K != KE; ++K) { - const Value *V = K->getPointer(); + ValueType V = K->getPointer(); bool ThisMayAlias = K->getInt(); if (ThisMayAlias) MayAlias = true; @@ -887,24 +931,34 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, // A store to a specific PseudoSourceValue. Add precise dependencies. // Record the def in MemDefs, first adding a dep if there is // an existing def. - MapVector<const Value *, SUnit *>::iterator I = + MapVector<ValueType, std::vector<SUnit *> >::iterator I = ((ThisMayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V)); - MapVector<const Value *, SUnit *>::iterator IE = + MapVector<ValueType, std::vector<SUnit *> >::iterator IE = ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); if (I != IE) { - addChainDependency(AAForDep, MFI, SU, I->second, RejectMemNodes, - 0, true); - I->second = SU; + for (unsigned i = 0, e = I->second.size(); i != e; ++i) + addChainDependency(AAForDep, MFI, SU, I->second[i], RejectMemNodes, + 0, true); + + // If we're not using AA, then we only need one store per object. + if (!AAForDep) + I->second.clear(); + I->second.push_back(SU); } else { - if (ThisMayAlias) - AliasMemDefs[V] = SU; - else - NonAliasMemDefs[V] = SU; + if (ThisMayAlias) { + if (!AAForDep) + AliasMemDefs[V].clear(); + AliasMemDefs[V].push_back(SU); + } else { + if (!AAForDep) + NonAliasMemDefs[V].clear(); + NonAliasMemDefs[V].push_back(SU); + } } // Handle the uses in MemUses, if there are any. - MapVector<const Value *, std::vector<SUnit *> >::iterator J = + MapVector<ValueType, std::vector<SUnit *> >::iterator J = ((ThisMayAlias) ? AliasMemUses.find(V) : NonAliasMemUses.find(V)); - MapVector<const Value *, std::vector<SUnit *> >::iterator JE = + MapVector<ValueType, std::vector<SUnit *> >::iterator JE = ((ThisMayAlias) ? AliasMemUses.end() : NonAliasMemUses.end()); if (J != JE) { for (unsigned i = 0, e = J->second.size(); i != e; ++i) @@ -933,11 +987,6 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, // we have lost all RejectMemNodes below barrier. if (BarrierChain) BarrierChain->addPred(SDep(SU, SDep::Barrier)); - - if (!ExitSU.isPred(SU)) - // Push store's up a bit to avoid them getting in between cmp - // and branches. - ExitSU.addPred(SDep(SU, SDep::Artificial)); } else if (MI->mayLoad()) { bool MayAlias = true; if (MI->isInvariantLoad(AA)) { @@ -949,9 +998,11 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, if (Objs.empty()) { // A load with no underlying object. Depend on all // potentially aliasing stores. - for (MapVector<const Value *, SUnit *>::iterator I = + for (MapVector<ValueType, std::vector<SUnit *> >::iterator I = AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) - addChainDependency(AAForDep, MFI, SU, I->second, RejectMemNodes); + for (unsigned i = 0, e = I->second.size(); i != e; ++i) + addChainDependency(AAForDep, MFI, SU, I->second[i], + RejectMemNodes); PendingLoads.push_back(SU); MayAlias = true; @@ -961,20 +1012,21 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, for (UnderlyingObjectsVector::iterator J = Objs.begin(), JE = Objs.end(); J != JE; ++J) { - const Value *V = J->getPointer(); + ValueType V = J->getPointer(); bool ThisMayAlias = J->getInt(); if (ThisMayAlias) MayAlias = true; // A load from a specific PseudoSourceValue. Add precise dependencies. - MapVector<const Value *, SUnit *>::iterator I = + MapVector<ValueType, std::vector<SUnit *> >::iterator I = ((ThisMayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V)); - MapVector<const Value *, SUnit *>::iterator IE = + MapVector<ValueType, std::vector<SUnit *> >::iterator IE = ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); if (I != IE) - addChainDependency(AAForDep, MFI, SU, I->second, RejectMemNodes, - 0, true); + for (unsigned i = 0, e = I->second.size(); i != e; ++i) + addChainDependency(AAForDep, MFI, SU, I->second[i], + RejectMemNodes, 0, true); if (ThisMayAlias) AliasMemUses[V].push_back(SU); else @@ -999,6 +1051,145 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, PendingLoads.clear(); } +/// \brief Initialize register live-range state for updating kills. +void ScheduleDAGInstrs::startBlockForKills(MachineBasicBlock *BB) { + // Start with no live registers. + LiveRegs.reset(); + + // Examine the live-in regs of all successors. + for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + SE = BB->succ_end(); SI != SE; ++SI) { + for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), + E = (*SI)->livein_end(); I != E; ++I) { + unsigned Reg = *I; + // Repeat, for reg and all subregs. + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) + LiveRegs.set(*SubRegs); + } + } +} + +bool ScheduleDAGInstrs::toggleKillFlag(MachineInstr *MI, MachineOperand &MO) { + // Setting kill flag... + if (!MO.isKill()) { + MO.setIsKill(true); + return false; + } + + // If MO itself is live, clear the kill flag... + if (LiveRegs.test(MO.getReg())) { + MO.setIsKill(false); + return false; + } + + // If any subreg of MO is live, then create an imp-def for that + // subreg and keep MO marked as killed. + MO.setIsKill(false); + bool AllDead = true; + const unsigned SuperReg = MO.getReg(); + MachineInstrBuilder MIB(MF, MI); + for (MCSubRegIterator SubRegs(SuperReg, TRI); SubRegs.isValid(); ++SubRegs) { + if (LiveRegs.test(*SubRegs)) { + MIB.addReg(*SubRegs, RegState::ImplicitDefine); + AllDead = false; + } + } + + if(AllDead) + MO.setIsKill(true); + return false; +} + +// FIXME: Reuse the LivePhysRegs utility for this. +void ScheduleDAGInstrs::fixupKills(MachineBasicBlock *MBB) { + DEBUG(dbgs() << "Fixup kills for BB#" << MBB->getNumber() << '\n'); + + LiveRegs.resize(TRI->getNumRegs()); + BitVector killedRegs(TRI->getNumRegs()); + + startBlockForKills(MBB); + + // Examine block from end to start... + unsigned Count = MBB->size(); + for (MachineBasicBlock::iterator I = MBB->end(), E = MBB->begin(); + I != E; --Count) { + MachineInstr *MI = --I; + if (MI->isDebugValue()) + continue; + + // Update liveness. Registers that are defed but not used in this + // instruction are now dead. Mark register and all subregs as they + // are completely defined. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isRegMask()) + LiveRegs.clearBitsNotInMask(MO.getRegMask()); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + if (!MO.isDef()) continue; + // Ignore two-addr defs. + if (MI->isRegTiedToUseOperand(i)) continue; + + // Repeat for reg and all subregs. + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) + LiveRegs.reset(*SubRegs); + } + + // Examine all used registers and set/clear kill flag. When a + // register is used multiple times we only set the kill flag on + // the first use. Don't set kill flags on undef operands. + killedRegs.reset(); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue; + unsigned Reg = MO.getReg(); + if ((Reg == 0) || MRI.isReserved(Reg)) continue; + + bool kill = false; + if (!killedRegs.test(Reg)) { + kill = true; + // A register is not killed if any subregs are live... + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { + if (LiveRegs.test(*SubRegs)) { + kill = false; + break; + } + } + + // If subreg is not live, then register is killed if it became + // live in this instruction + if (kill) + kill = !LiveRegs.test(Reg); + } + + if (MO.isKill() != kill) { + DEBUG(dbgs() << "Fixing " << MO << " in "); + // Warning: toggleKillFlag may invalidate MO. + toggleKillFlag(MI, MO); + DEBUG(MI->dump()); + } + + killedRegs.set(Reg); + } + + // Mark any used register (that is not using undef) and subregs as + // now live... + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue; + unsigned Reg = MO.getReg(); + if ((Reg == 0) || MRI.isReserved(Reg)) continue; + + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) + LiveRegs.set(*SubRegs); + } + } +} + void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const { #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) SU->getInstr()->dump(); @@ -1234,7 +1425,7 @@ public: const SDep *backtrack() { DFSStack.pop_back(); - return DFSStack.empty() ? 0 : llvm::prior(DFSStack.back().second); + return DFSStack.empty() ? nullptr : std::prev(DFSStack.back().second); } const SUnit *getCurr() const { return DFSStack.back().first; } @@ -1317,7 +1508,7 @@ void SchedDFSResult::scheduleTree(unsigned SubtreeID) { } } -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void ILPValue::print(raw_ostream &OS) const { OS << InstrCount << " / " << Length << " = "; if (!Length) @@ -1326,16 +1517,17 @@ void ILPValue::print(raw_ostream &OS) const { OS << format("%g", ((double)InstrCount / Length)); } +LLVM_DUMP_METHOD void ILPValue::dump() const { dbgs() << *this << '\n'; } namespace llvm { +LLVM_DUMP_METHOD raw_ostream &operator<<(raw_ostream &OS, const ILPValue &Val) { Val.print(OS); return OS; } } // namespace llvm -#endif // !NDEBUG || LLVM_ENABLE_DUMP diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp index 8ddb3e8..f59c6cf 100644 --- a/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp +++ b/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp @@ -14,7 +14,6 @@ #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" diff --git a/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp index 2cd84d6..004c685 100644 --- a/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp +++ b/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp @@ -13,7 +13,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE ::llvm::ScoreboardHazardRecognizer::DebugType #include "llvm/CodeGen/ScoreboardHazardRecognizer.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/MC/MCInstrItineraries.h" @@ -24,6 +23,8 @@ using namespace llvm; +#define DEBUG_TYPE ::llvm::ScoreboardHazardRecognizer::DebugType + #ifndef NDEBUG const char *ScoreboardHazardRecognizer::DebugType = ""; #endif @@ -126,7 +127,7 @@ ScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { // free FU's in the scoreboard at the appropriate future cycles. const MCInstrDesc *MCID = DAG->getInstrDesc(SU); - if (MCID == NULL) { + if (!MCID) { // Don't check hazards for non-machineinstr Nodes. return NoHazard; } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 69cf8d9..2abcdd5 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -16,7 +16,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "dagcombine" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" @@ -40,6 +39,8 @@ #include <algorithm> using namespace llvm; +#define DEBUG_TYPE "dagcombine" + STATISTIC(NodesCombined , "Number of dag nodes combined"); STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created"); STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created"); @@ -50,11 +51,22 @@ STATISTIC(SlicedLoads, "Number of load sliced"); namespace { static cl::opt<bool> CombinerAA("combiner-alias-analysis", cl::Hidden, - cl::desc("Turn on alias analysis during testing")); + cl::desc("Enable DAG combiner alias-analysis heuristics")); static cl::opt<bool> CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, - cl::desc("Include global information in alias analysis")); + cl::desc("Enable DAG combiner's use of IR alias analysis")); + + static cl::opt<bool> + UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true), + cl::desc("Enable DAG combiner's use of TBAA")); + +#ifndef NDEBUG + static cl::opt<std::string> + CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden, + cl::desc("Only use DAG-combiner alias analysis in this" + " function")); +#endif /// Hidden option to stress test load slicing, i.e., when this option /// is enabled, load slicing bypasses most of its profitability guards. @@ -92,20 +104,19 @@ namespace { // contain duplicate or removed nodes. When choosing a node to // visit, we pop off the order stack until we find an item that is // also in the contents set. All operations are O(log N). - SmallPtrSet<SDNode*, 64> WorkListContents; - SmallVector<SDNode*, 64> WorkListOrder; + SmallPtrSet<SDNode*, 64> WorklistContents; + SmallVector<SDNode*, 64> WorklistOrder; // AA - Used for DAG load/store alias analysis. AliasAnalysis &AA; - /// AddUsersToWorkList - When an instruction is simplified, add all users of + /// AddUsersToWorklist - When an instruction is simplified, add all users of /// the instruction to the work lists because they might get more simplified /// now. /// - void AddUsersToWorkList(SDNode *N) { - for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); - UI != UE; ++UI) - AddToWorkList(*UI); + void AddUsersToWorklist(SDNode *N) { + for (SDNode *Node : N->uses()) + AddToWorklist(Node); } /// visit - call the node-specific routine that knows how to fold each @@ -113,17 +124,22 @@ namespace { SDValue visit(SDNode *N); public: - /// AddToWorkList - Add to the work list making sure its instance is at the + /// AddToWorklist - Add to the work list making sure its instance is at the /// back (next to be processed.) - void AddToWorkList(SDNode *N) { - WorkListContents.insert(N); - WorkListOrder.push_back(N); + void AddToWorklist(SDNode *N) { + // Skip handle nodes as they can't usefully be combined and confuse the + // zero-use deletion strategy. + if (N->getOpcode() == ISD::HANDLENODE) + return; + + WorklistContents.insert(N); + WorklistOrder.push_back(N); } - /// removeFromWorkList - remove all instances of N from the worklist. + /// removeFromWorklist - remove all instances of N from the worklist. /// - void removeFromWorkList(SDNode *N) { - WorkListContents.erase(N); + void removeFromWorklist(SDNode *N) { + WorklistContents.erase(N); } SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, @@ -212,6 +228,7 @@ namespace { SDValue visitSHL(SDNode *N); SDValue visitSRA(SDNode *N); SDValue visitSRL(SDNode *N); + SDValue visitRotate(SDNode *N); SDValue visitCTLZ(SDNode *N); SDValue visitCTLZ_ZERO_UNDEF(SDNode *N); SDValue visitCTTZ(SDNode *N); @@ -257,11 +274,12 @@ namespace { SDValue visitCONCAT_VECTORS(SDNode *N); SDValue visitEXTRACT_SUBVECTOR(SDNode *N); SDValue visitVECTOR_SHUFFLE(SDNode *N); + SDValue visitINSERT_SUBVECTOR(SDNode *N); SDValue XformToShuffleWithZero(SDNode *N); SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS); - SDValue visitShiftByConstant(SDNode *N, unsigned Amt); + SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt); bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS); SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N); @@ -271,6 +289,11 @@ namespace { bool NotExtCompare = false); SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, SDLoc DL, bool foldBooleans = true); + + bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, + SDValue &CC) const; + bool isOneUseSetCC(SDValue N) const; + SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, unsigned HiOp); SDValue CombineConsecutiveLoads(SDNode *N, EVT VT); @@ -280,6 +303,10 @@ namespace { SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, bool DemandHighBits = true); SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); + SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg, + SDValue InnerPos, SDValue InnerNeg, + unsigned PosOpcode, unsigned NegOpcode, + SDLoc DL); SDNode *MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL); SDValue ReduceLoadWidth(SDNode *N); SDValue ReduceLoadOpStoreWidth(SDNode *N); @@ -296,26 +323,7 @@ namespace { /// isAlias - Return true if there is any possibility that the two addresses /// overlap. - bool isAlias(SDValue Ptr1, int64_t Size1, bool IsVolatile1, - const Value *SrcValue1, int SrcValueOffset1, - unsigned SrcValueAlign1, - const MDNode *TBAAInfo1, - SDValue Ptr2, int64_t Size2, bool IsVolatile2, - const Value *SrcValue2, int SrcValueOffset2, - unsigned SrcValueAlign2, - const MDNode *TBAAInfo2) const; - - /// isAlias - Return true if there is any possibility that the two addresses - /// overlap. - bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1); - - /// FindAliasInfo - Extracts the relevant alias information from the memory - /// node. Returns true if the operand was a load. - bool FindAliasInfo(SDNode *N, - SDValue &Ptr, int64_t &Size, bool &IsVolatile, - const Value *&SrcValue, int &SrcValueOffset, - unsigned &SrcValueAlignment, - const MDNode *&TBAAInfo) const; + bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const; /// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, /// looking for a better chain (aliasing node.) @@ -326,6 +334,14 @@ namespace { /// \return True if some memory operations were changed. bool MergeConsecutiveStores(StoreSDNode *N); + /// \brief Try to transform a truncation where C is a constant: + /// (trunc (and X, C)) -> (and (trunc X), (trunc C)) + /// + /// \p N needs to be a truncation and its first operand an AND. Other + /// requirements are checked by the function (e.g. that trunc is + /// single-use) and if missed an empty SDValue is returned. + SDValue distributeTruncateThroughAnd(SDNode *N); + public: DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL) : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), @@ -370,16 +386,16 @@ namespace { namespace { -/// WorkListRemover - This class is a DAGUpdateListener that removes any deleted +/// WorklistRemover - This class is a DAGUpdateListener that removes any deleted /// nodes from the worklist. -class WorkListRemover : public SelectionDAG::DAGUpdateListener { +class WorklistRemover : public SelectionDAG::DAGUpdateListener { DAGCombiner &DC; public: - explicit WorkListRemover(DAGCombiner &dc) + explicit WorklistRemover(DAGCombiner &dc) : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {} - virtual void NodeDeleted(SDNode *N, SDNode *E) { - DC.removeFromWorkList(N); + void NodeDeleted(SDNode *N, SDNode *E) override { + DC.removeFromWorklist(N); } }; } @@ -389,11 +405,11 @@ public: //===----------------------------------------------------------------------===// void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) { - ((DAGCombiner*)DC)->AddToWorkList(N); + ((DAGCombiner*)DC)->AddToWorklist(N); } void TargetLowering::DAGCombinerInfo::RemoveFromWorklist(SDNode *N) { - ((DAGCombiner*)DC)->removeFromWorkList(N); + ((DAGCombiner*)DC)->removeFromWorklist(N); } SDValue TargetLowering::DAGCombinerInfo:: @@ -566,79 +582,130 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, } } - // isSetCCEquivalent - Return true if this node is a setcc, or is a select_cc -// that selects between the values 1 and 0, making it equivalent to a setcc. -// Also, set the incoming LHS, RHS, and CC references to the appropriate -// nodes based on the type of node we are checking. This simplifies life a -// bit for the callers. -static bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, - SDValue &CC) { +// that selects between the target values used for true and false, making it +// equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to +// the appropriate nodes based on the type of node we are checking. This +// simplifies life a bit for the callers. +bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, + SDValue &CC) const { if (N.getOpcode() == ISD::SETCC) { LHS = N.getOperand(0); RHS = N.getOperand(1); CC = N.getOperand(2); return true; } - if (N.getOpcode() == ISD::SELECT_CC && - N.getOperand(2).getOpcode() == ISD::Constant && - N.getOperand(3).getOpcode() == ISD::Constant && - cast<ConstantSDNode>(N.getOperand(2))->getAPIntValue() == 1 && - cast<ConstantSDNode>(N.getOperand(3))->isNullValue()) { - LHS = N.getOperand(0); - RHS = N.getOperand(1); - CC = N.getOperand(4); - return true; - } - return false; + + if (N.getOpcode() != ISD::SELECT_CC || + !TLI.isConstTrueVal(N.getOperand(2).getNode()) || + !TLI.isConstFalseVal(N.getOperand(3).getNode())) + return false; + + LHS = N.getOperand(0); + RHS = N.getOperand(1); + CC = N.getOperand(4); + return true; } // isOneUseSetCC - Return true if this is a SetCC-equivalent operation with only // one use. If this is true, it allows the users to invert the operation for // free when it is profitable to do so. -static bool isOneUseSetCC(SDValue N) { +bool DAGCombiner::isOneUseSetCC(SDValue N) const { SDValue N0, N1, N2; if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse()) return true; return false; } +/// isConstantSplatVector - Returns true if N is a BUILD_VECTOR node whose +/// elements are all the same constant or undefined. +static bool isConstantSplatVector(SDNode *N, APInt& SplatValue) { + BuildVectorSDNode *C = dyn_cast<BuildVectorSDNode>(N); + if (!C) + return false; + + APInt SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + EVT EltVT = N->getValueType(0).getVectorElementType(); + return (C->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, + HasAnyUndefs) && + EltVT.getSizeInBits() >= SplatBitSize); +} + +// \brief Returns the SDNode if it is a constant BuildVector or constant. +static SDNode *isConstantBuildVectorOrConstantInt(SDValue N) { + if (isa<ConstantSDNode>(N)) + return N.getNode(); + BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N); + if(BV && BV->isConstant()) + return BV; + return nullptr; +} + +// \brief Returns the SDNode if it is a constant splat BuildVector or constant +// int. +static ConstantSDNode *isConstOrConstSplat(SDValue N) { + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) + return CN; + + if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) { + BitVector UndefElements; + ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements); + + // BuildVectors can truncate their operands. Ignore that case here. + // FIXME: We blindly ignore splats which include undef which is overly + // pessimistic. + if (CN && UndefElements.none() && + CN->getValueType(0) == N.getValueType().getScalarType()) + return CN; + } + + return nullptr; +} + SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL, SDValue N0, SDValue N1) { EVT VT = N0.getValueType(); - if (N0.getOpcode() == Opc && isa<ConstantSDNode>(N0.getOperand(1))) { - if (isa<ConstantSDNode>(N1)) { - // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2)) - SDValue OpNode = - DAG.FoldConstantArithmetic(Opc, VT, - cast<ConstantSDNode>(N0.getOperand(1)), - cast<ConstantSDNode>(N1)); - return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode); - } - if (N0.hasOneUse()) { - // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one use - SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, - N0.getOperand(0), N1); - AddToWorkList(OpNode.getNode()); - return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1)); - } - } - - if (N1.getOpcode() == Opc && isa<ConstantSDNode>(N1.getOperand(1))) { - if (isa<ConstantSDNode>(N0)) { - // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2)) - SDValue OpNode = - DAG.FoldConstantArithmetic(Opc, VT, - cast<ConstantSDNode>(N1.getOperand(1)), - cast<ConstantSDNode>(N0)); - return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode); - } - if (N1.hasOneUse()) { - // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one use - SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, - N1.getOperand(0), N0); - AddToWorkList(OpNode.getNode()); - return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1)); + if (N0.getOpcode() == Opc) { + if (SDNode *L = isConstantBuildVectorOrConstantInt(N0.getOperand(1))) { + if (SDNode *R = isConstantBuildVectorOrConstantInt(N1)) { + // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2)) + SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, L, R); + if (!OpNode.getNode()) + return SDValue(); + return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode); + } + if (N0.hasOneUse()) { + // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one + // use + SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1); + if (!OpNode.getNode()) + return SDValue(); + AddToWorklist(OpNode.getNode()); + return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1)); + } + } + } + + if (N1.getOpcode() == Opc) { + if (SDNode *R = isConstantBuildVectorOrConstantInt(N1.getOperand(1))) { + if (SDNode *L = isConstantBuildVectorOrConstantInt(N0)) { + // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2)) + SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, R, L); + if (!OpNode.getNode()) + return SDValue(); + return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode); + } + if (N1.hasOneUse()) { + // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one + // use + SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N1.getOperand(0), N0); + if (!OpNode.getNode()) + return SDValue(); + AddToWorklist(OpNode.getNode()); + return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1)); + } } } @@ -658,14 +725,14 @@ SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, assert((!To[i].getNode() || N->getValueType(i) == To[i].getValueType()) && "Cannot combine value to value of different type!")); - WorkListRemover DeadNodes(*this); + WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesWith(N, To); if (AddTo) { // Push the new nodes and any users onto the worklist for (unsigned i = 0, e = NumTo; i != e; ++i) { if (To[i].getNode()) { - AddToWorkList(To[i].getNode()); - AddUsersToWorkList(To[i].getNode()); + AddToWorklist(To[i].getNode()); + AddUsersToWorklist(To[i].getNode()); } } } @@ -676,7 +743,7 @@ SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, if (N->use_empty()) { // Nodes can be reintroduced into the worklist. Make sure we do not // process a node that has been replaced. - removeFromWorkList(N); + removeFromWorklist(N); // Finally, since the node is now dead, remove it from the graph. DAG.DeleteNode(N); @@ -688,24 +755,24 @@ void DAGCombiner:: CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { // Replace all uses. If any nodes become isomorphic to other nodes and // are deleted, make sure to remove them from our worklist. - WorkListRemover DeadNodes(*this); + WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New); // Push the new node and any (possibly new) users onto the worklist. - AddToWorkList(TLO.New.getNode()); - AddUsersToWorkList(TLO.New.getNode()); + AddToWorklist(TLO.New.getNode()); + AddUsersToWorklist(TLO.New.getNode()); // Finally, if the node is now dead, remove it from the graph. The node // may not be dead if the replacement process recursively simplified to // something else needing this node. if (TLO.Old.getNode()->use_empty()) { - removeFromWorkList(TLO.Old.getNode()); + removeFromWorklist(TLO.Old.getNode()); // If the operands of this node are only used by the node, they will now // be dead. Make sure to visit them first to delete dead nodes early. for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); i != e; ++i) if (TLO.Old.getNode()->getOperand(i).getNode()->hasOneUse()) - AddToWorkList(TLO.Old.getNode()->getOperand(i).getNode()); + AddToWorklist(TLO.Old.getNode()->getOperand(i).getNode()); DAG.DeleteNode(TLO.Old.getNode()); } @@ -721,7 +788,7 @@ bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) { return false; // Revisit the node. - AddToWorkList(Op.getNode()); + AddToWorklist(Op.getNode()); // Replace the old value with the new one. ++NodesCombined; @@ -745,12 +812,12 @@ void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) { dbgs() << "\nWith: "; Trunc.getNode()->dump(&DAG); dbgs() << '\n'); - WorkListRemover DeadNodes(*this); + WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc); DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1)); - removeFromWorkList(Load); + removeFromWorklist(Load); DAG.DeleteNode(Load); - AddToWorkList(Trunc.getNode()); + AddToWorklist(Trunc.getNode()); } SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { @@ -798,9 +865,9 @@ SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) { SDLoc dl(Op); bool Replace = false; SDValue NewOp = PromoteOperand(Op, PVT, Replace); - if (NewOp.getNode() == 0) + if (!NewOp.getNode()) return SDValue(); - AddToWorkList(NewOp.getNode()); + AddToWorklist(NewOp.getNode()); if (Replace) ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); @@ -813,9 +880,9 @@ SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) { SDLoc dl(Op); bool Replace = false; SDValue NewOp = PromoteOperand(Op, PVT, Replace); - if (NewOp.getNode() == 0) + if (!NewOp.getNode()) return SDValue(); - AddToWorkList(NewOp.getNode()); + AddToWorklist(NewOp.getNode()); if (Replace) ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); @@ -848,7 +915,7 @@ SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) { bool Replace0 = false; SDValue N0 = Op.getOperand(0); SDValue NN0 = PromoteOperand(N0, PVT, Replace0); - if (NN0.getNode() == 0) + if (!NN0.getNode()) return SDValue(); bool Replace1 = false; @@ -858,13 +925,13 @@ SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) { NN1 = NN0; else { NN1 = PromoteOperand(N1, PVT, Replace1); - if (NN1.getNode() == 0) + if (!NN1.getNode()) return SDValue(); } - AddToWorkList(NN0.getNode()); + AddToWorklist(NN0.getNode()); if (NN1.getNode()) - AddToWorkList(NN1.getNode()); + AddToWorklist(NN1.getNode()); if (Replace0) ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode()); @@ -911,10 +978,10 @@ SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) { N0 = ZExtPromoteOperand(Op.getOperand(0), PVT); else N0 = PromoteOperand(N0, PVT, Replace); - if (N0.getNode() == 0) + if (!N0.getNode()) return SDValue(); - AddToWorkList(N0.getNode()); + AddToWorklist(N0.getNode()); if (Replace) ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode()); @@ -994,12 +1061,12 @@ bool DAGCombiner::PromoteLoad(SDValue Op) { dbgs() << "\nTo: "; Result.getNode()->dump(&DAG); dbgs() << '\n'); - WorkListRemover DeadNodes(*this); + WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1)); - removeFromWorkList(N); + removeFromWorklist(N); DAG.DeleteNode(N); - AddToWorkList(Result.getNode()); + AddToWorklist(Result.getNode()); return true; } return false; @@ -1019,7 +1086,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) { // Add all the dag nodes to the worklist. for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), E = DAG.allnodes_end(); I != E; ++I) - AddToWorkList(I); + AddToWorklist(I); // Create a dummy node (which is not added to allnodes), that adds a reference // to the root node, preventing it from being deleted, and tracking any @@ -1032,23 +1099,23 @@ void DAGCombiner::Run(CombineLevel AtLevel) { // while the worklist isn't empty, find a node and // try and combine it. - while (!WorkListContents.empty()) { + while (!WorklistContents.empty()) { SDNode *N; - // The WorkListOrder holds the SDNodes in order, but it may contain + // The WorklistOrder holds the SDNodes in order, but it may contain // duplicates. // In order to avoid a linear scan, we use a set (O(log N)) to hold what the // worklist *should* contain, and check the node we want to visit is should // actually be visited. do { - N = WorkListOrder.pop_back_val(); - } while (!WorkListContents.erase(N)); + N = WorklistOrder.pop_back_val(); + } while (!WorklistContents.erase(N)); // If N has no uses, it is dead. Make sure to revisit all N's operands once // N is deleted from the DAG, since they too may now be dead or may have a // reduced number of uses, allowing other xforms. - if (N->use_empty() && N != &Dummy) { + if (N->use_empty()) { for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) - AddToWorkList(N->getOperand(i).getNode()); + AddToWorklist(N->getOperand(i).getNode()); DAG.DeleteNode(N); continue; @@ -1056,7 +1123,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) { SDValue RV = combine(N); - if (RV.getNode() == 0) + if (!RV.getNode()) continue; ++NodesCombined; @@ -1080,7 +1147,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) { // Transfer debug value. DAG.TransferDbgValues(SDValue(N, 0), RV); - WorkListRemover DeadNodes(*this); + WorklistRemover DeadNodes(*this); if (N->getNumValues() == RV.getNode()->getNumValues()) DAG.ReplaceAllUsesWith(N, RV.getNode()); else { @@ -1091,14 +1158,14 @@ void DAGCombiner::Run(CombineLevel AtLevel) { } // Push the new node and any users onto the worklist - AddToWorkList(RV.getNode()); - AddUsersToWorkList(RV.getNode()); + AddToWorklist(RV.getNode()); + AddUsersToWorklist(RV.getNode()); // Add any uses of the old node to the worklist in case this node is the // last one that uses them. They may become dead after this node is // deleted. for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) - AddToWorkList(N->getOperand(i).getNode()); + AddToWorklist(N->getOperand(i).getNode()); // Finally, if the node is now dead, remove it from the graph. The node // may not be dead if the replacement process recursively simplified to @@ -1106,7 +1173,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) { if (N->use_empty()) { // Nodes can be reintroduced into the worklist. Make sure we do not // process a node that has been replaced. - removeFromWorkList(N); + removeFromWorklist(N); // Finally, since the node is now dead, remove it from the graph. DAG.DeleteNode(N); @@ -1148,6 +1215,8 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::SHL: return visitSHL(N); case ISD::SRA: return visitSRA(N); case ISD::SRL: return visitSRL(N); + case ISD::ROTR: + case ISD::ROTL: return visitRotate(N); case ISD::CTLZ: return visitCTLZ(N); case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N); case ISD::CTTZ: return visitCTTZ(N); @@ -1193,6 +1262,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N); case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N); case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N); + case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N); } return SDValue(); } @@ -1201,7 +1271,7 @@ SDValue DAGCombiner::combine(SDNode *N) { SDValue RV = visit(N); // If nothing happened, try a target-specific DAG combine. - if (RV.getNode() == 0) { + if (!RV.getNode()) { assert(N->getOpcode() != ISD::DELETED_NODE && "Node was deleted but visit returned NULL!"); @@ -1217,7 +1287,7 @@ SDValue DAGCombiner::combine(SDNode *N) { } // If nothing happened still, try promoting the operation. - if (RV.getNode() == 0) { + if (!RV.getNode()) { switch (N->getOpcode()) { default: break; case ISD::ADD: @@ -1247,17 +1317,23 @@ SDValue DAGCombiner::combine(SDNode *N) { // If N is a commutative binary node, try commuting it to enable more // sdisel CSE. - if (RV.getNode() == 0 && - SelectionDAG::isCommutativeBinOp(N->getOpcode()) && + if (!RV.getNode() && SelectionDAG::isCommutativeBinOp(N->getOpcode()) && N->getNumValues() == 1) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); // Constant operands are canonicalized to RHS. if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) { - SDValue Ops[] = { N1, N0 }; - SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), - Ops, 2); + SDValue Ops[] = {N1, N0}; + SDNode *CSENode; + if (const BinaryWithFlagsSDNode *BinNode = + dyn_cast<BinaryWithFlagsSDNode>(N)) { + CSENode = DAG.getNodeIfExists( + N->getOpcode(), N->getVTList(), Ops, BinNode->hasNoUnsignedWrap(), + BinNode->hasNoSignedWrap(), BinNode->isExact()); + } else { + CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops); + } if (CSENode) return SDValue(CSENode, 0); } @@ -1321,7 +1397,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { // Queue up for processing. TFs.push_back(Op.getNode()); // Clean up in case the token factor is removed. - AddToWorkList(Op.getNode()); + AddToWorklist(Op.getNode()); Changed = true; break; } @@ -1347,8 +1423,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { Result = DAG.getEntryNode(); } else { // New and improved token factor. - Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), - MVT::Other, &Ops[0], Ops.size()); + Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops); } // Don't add users to work list. @@ -1360,18 +1435,18 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { /// MERGE_VALUES can always be eliminated. SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) { - WorkListRemover DeadNodes(*this); + WorklistRemover DeadNodes(*this); // Replacing results may cause a different MERGE_VALUES to suddenly // be CSE'd with N, and carry its uses with it. Iterate until no // uses remain, to ensure that the node can be safely deleted. // First add the users of this node to the work list so that they // can be tried again once they have new operands. - AddUsersToWorkList(N); + AddUsersToWorklist(N); do { for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i)); } while (!N->use_empty()); - removeFromWorkList(N); + removeFromWorklist(N); DAG.DeleteNode(N); return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -1447,7 +1522,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) { N0.getOperand(1)); // reassociate add SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1); - if (RADD.getNode() != 0) + if (RADD.getNode()) return RADD; // fold ((0-A) + B) -> B-A if (N0.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N0.getOperand(0)) && @@ -1500,15 +1575,17 @@ SDValue DAGCombiner::visitADD(SDNode *N) { if (VT.isInteger() && !VT.isVector()) { APInt LHSZero, LHSOne; APInt RHSZero, RHSOne; - DAG.ComputeMaskedBits(N0, LHSZero, LHSOne); + DAG.computeKnownBits(N0, LHSZero, LHSOne); if (LHSZero.getBoolValue()) { - DAG.ComputeMaskedBits(N1, RHSZero, RHSOne); + DAG.computeKnownBits(N1, RHSZero, RHSOne); // If all possibly-set bits on the LHS are clear on the RHS, return an OR. // If all possibly-set bits on the RHS are clear on the LHS, return an OR. - if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero) - return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1); + if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero){ + if (!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) + return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1); + } } } @@ -1593,10 +1670,10 @@ SDValue DAGCombiner::visitADDC(SDNode *N) { // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits. APInt LHSZero, LHSOne; APInt RHSZero, RHSOne; - DAG.ComputeMaskedBits(N0, LHSZero, LHSOne); + DAG.computeKnownBits(N0, LHSZero, LHSOne); if (LHSZero.getBoolValue()) { - DAG.ComputeMaskedBits(N1, RHSZero, RHSOne); + DAG.computeKnownBits(N1, RHSZero, RHSOne); // If all possibly-set bits on the LHS are clear on the RHS, return an OR. // If all possibly-set bits on the RHS are clear on the LHS, return an OR. @@ -1645,7 +1722,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode()); ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); - ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? 0 : + ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? nullptr : dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode()); EVT VT = N0.getValueType(); @@ -1778,22 +1855,6 @@ SDValue DAGCombiner::visitSUBE(SDNode *N) { return SDValue(); } -/// isConstantSplatVector - Returns true if N is a BUILD_VECTOR node whose -/// elements are all the same constant or undefined. -static bool isConstantSplatVector(SDNode *N, APInt& SplatValue) { - BuildVectorSDNode *C = dyn_cast<BuildVectorSDNode>(N); - if (!C) - return false; - - APInt SplatUndef; - unsigned SplatBitSize; - bool HasAnyUndefs; - EVT EltVT = N->getValueType(0).getVectorElementType(); - return (C->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, - HasAnyUndefs) && - EltVT.getSizeInBits() >= SplatBitSize); -} - SDValue DAGCombiner::visitMUL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -1814,10 +1875,10 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { N0IsConst = isConstantSplatVector(N0.getNode(), ConstValue0); N1IsConst = isConstantSplatVector(N1.getNode(), ConstValue1); } else { - N0IsConst = dyn_cast<ConstantSDNode>(N0) != 0; + N0IsConst = dyn_cast<ConstantSDNode>(N0) != nullptr; ConstValue0 = N0IsConst ? (dyn_cast<ConstantSDNode>(N0))->getAPIntValue() : APInt(); - N1IsConst = dyn_cast<ConstantSDNode>(N1) != 0; + N1IsConst = dyn_cast<ConstantSDNode>(N1) != nullptr; ConstValue1 = N1IsConst ? (dyn_cast<ConstantSDNode>(N1))->getAPIntValue() : APInt(); } @@ -1867,7 +1928,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { isa<ConstantSDNode>(N0.getOperand(1)))) { SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1)); - AddToWorkList(C3.getNode()); + AddToWorklist(C3.getNode()); return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3); } @@ -1875,7 +1936,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one // use. { - SDValue Sh(0,0), Y(0,0); + SDValue Sh(nullptr,0), Y(nullptr,0); // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)). if (N0.getOpcode() == ISD::SHL && (isConstantSplatVector(N0.getOperand(1).getNode(), Val) || @@ -1908,7 +1969,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { // reassociate mul SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1); - if (RMUL.getNode() != 0) + if (RMUL.getNode()) return RMUL; return SDValue(); @@ -1917,8 +1978,8 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { SDValue DAGCombiner::visitSDIV(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode()); - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); + ConstantSDNode *N0C = isConstOrConstSplat(N0); + ConstantSDNode *N1C = isConstOrConstSplat(N1); EVT VT = N->getValueType(0); // fold vector ops @@ -1944,10 +2005,10 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { return DAG.getNode(ISD::UDIV, SDLoc(N), N1.getValueType(), N0, N1); } + // fold (sdiv X, pow2) -> simple ops after legalize - if (N1C && !N1C->isNullValue() && - (N1C->getAPIntValue().isPowerOf2() || - (-N1C->getAPIntValue()).isPowerOf2())) { + if (N1C && !N1C->isNullValue() && (N1C->getAPIntValue().isPowerOf2() || + (-N1C->getAPIntValue()).isPowerOf2())) { // If dividing by powers of two is cheap, then don't perform the following // fold. if (TLI.isPow2DivCheap()) @@ -1956,18 +2017,20 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { unsigned lg2 = N1C->getAPIntValue().countTrailingZeros(); // Splat the sign bit into the register - SDValue SGN = DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, - DAG.getConstant(VT.getSizeInBits()-1, - getShiftAmountTy(N0.getValueType()))); - AddToWorkList(SGN.getNode()); + SDValue SGN = + DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, + DAG.getConstant(VT.getScalarSizeInBits() - 1, + getShiftAmountTy(N0.getValueType()))); + AddToWorklist(SGN.getNode()); // Add (N0 < 0) ? abs2 - 1 : 0; - SDValue SRL = DAG.getNode(ISD::SRL, SDLoc(N), VT, SGN, - DAG.getConstant(VT.getSizeInBits() - lg2, - getShiftAmountTy(SGN.getValueType()))); + SDValue SRL = + DAG.getNode(ISD::SRL, SDLoc(N), VT, SGN, + DAG.getConstant(VT.getScalarSizeInBits() - lg2, + getShiftAmountTy(SGN.getValueType()))); SDValue ADD = DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, SRL); - AddToWorkList(SRL.getNode()); - AddToWorkList(ADD.getNode()); // Divide by pow2 + AddToWorklist(SRL.getNode()); + AddToWorklist(ADD.getNode()); // Divide by pow2 SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), VT, ADD, DAG.getConstant(lg2, getShiftAmountTy(ADD.getValueType()))); @@ -1976,14 +2039,13 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { if (N1C->getAPIntValue().isNonNegative()) return SRA; - AddToWorkList(SRA.getNode()); - return DAG.getNode(ISD::SUB, SDLoc(N), VT, - DAG.getConstant(0, VT), SRA); + AddToWorklist(SRA.getNode()); + return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(0, VT), SRA); } // if integer divide is expensive and we satisfy the requirements, emit an // alternate sequence. - if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap()) { + if (N1C && !TLI.isIntDivCheap()) { SDValue Op = BuildSDIV(N); if (Op.getNode()) return Op; } @@ -2001,8 +2063,8 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { SDValue DAGCombiner::visitUDIV(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode()); - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); + ConstantSDNode *N0C = isConstOrConstSplat(N0); + ConstantSDNode *N1C = isConstOrConstSplat(N1); EVT VT = N->getValueType(0); // fold vector ops @@ -2029,13 +2091,13 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { DAG.getConstant(SHC->getAPIntValue() .logBase2(), ADDVT)); - AddToWorkList(Add.getNode()); + AddToWorklist(Add.getNode()); return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, Add); } } } // fold (udiv x, c) -> alternate - if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap()) { + if (N1C && !TLI.isIntDivCheap()) { SDValue Op = BuildUDIV(N); if (Op.getNode()) return Op; } @@ -2053,8 +2115,8 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { SDValue DAGCombiner::visitSREM(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + ConstantSDNode *N0C = isConstOrConstSplat(N0); + ConstantSDNode *N1C = isConstOrConstSplat(N1); EVT VT = N->getValueType(0); // fold (srem c1, c2) -> c1%c2 @@ -2071,13 +2133,13 @@ SDValue DAGCombiner::visitSREM(SDNode *N) { // X%C to the equivalent of X-X/C*C. if (N1C && !N1C->isNullValue()) { SDValue Div = DAG.getNode(ISD::SDIV, SDLoc(N), VT, N0, N1); - AddToWorkList(Div.getNode()); + AddToWorklist(Div.getNode()); SDValue OptimizedDiv = combine(Div.getNode()); if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) { SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, OptimizedDiv, N1); SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul); - AddToWorkList(Mul.getNode()); + AddToWorklist(Mul.getNode()); return Sub; } } @@ -2095,8 +2157,8 @@ SDValue DAGCombiner::visitSREM(SDNode *N) { SDValue DAGCombiner::visitUREM(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + ConstantSDNode *N0C = isConstOrConstSplat(N0); + ConstantSDNode *N1C = isConstOrConstSplat(N1); EVT VT = N->getValueType(0); // fold (urem c1, c2) -> c1%c2 @@ -2114,7 +2176,7 @@ SDValue DAGCombiner::visitUREM(SDNode *N) { DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT)); - AddToWorkList(Add.getNode()); + AddToWorklist(Add.getNode()); return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, Add); } } @@ -2124,13 +2186,13 @@ SDValue DAGCombiner::visitUREM(SDNode *N) { // X%C to the equivalent of X-X/C*C. if (N1C && !N1C->isNullValue()) { SDValue Div = DAG.getNode(ISD::UDIV, SDLoc(N), VT, N0, N1); - AddToWorkList(Div.getNode()); + AddToWorklist(Div.getNode()); SDValue OptimizedDiv = combine(Div.getNode()); if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) { SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, OptimizedDiv, N1); SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul); - AddToWorkList(Mul.getNode()); + AddToWorklist(Mul.getNode()); return Sub; } } @@ -2229,9 +2291,9 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, bool HiExists = N->hasAnyUseOfValue(1); if (!HiExists && (!LegalOperations || - TLI.isOperationLegal(LoOp, N->getValueType(0)))) { + TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) { SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), - N->op_begin(), N->getNumOperands()); + ArrayRef<SDUse>(N->op_begin(), N->op_end())); return CombineTo(N, Res, Res); } @@ -2241,7 +2303,7 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, (!LegalOperations || TLI.isOperationLegal(HiOp, N->getValueType(1)))) { SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), - N->op_begin(), N->getNumOperands()); + ArrayRef<SDUse>(N->op_begin(), N->op_end())); return CombineTo(N, Res, Res); } @@ -2252,8 +2314,8 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, // If the two computed results can be simplified separately, separate them. if (LoExists) { SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), - N->op_begin(), N->getNumOperands()); - AddToWorkList(Lo.getNode()); + ArrayRef<SDUse>(N->op_begin(), N->op_end())); + AddToWorklist(Lo.getNode()); SDValue LoOpt = combine(Lo.getNode()); if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() && (!LegalOperations || @@ -2263,8 +2325,8 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, if (HiExists) { SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), - N->op_begin(), N->getNumOperands()); - AddToWorkList(Hi.getNode()); + ArrayRef<SDUse>(N->op_begin(), N->op_end())); + AddToWorklist(Hi.getNode()); SDValue HiOpt = combine(Hi.getNode()); if (HiOpt.getNode() && HiOpt != Hi && (!LegalOperations || @@ -2403,7 +2465,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0), N0.getOperand(0).getValueType(), N0.getOperand(0), N1.getOperand(0)); - AddToWorkList(ORNode.getNode()); + AddToWorklist(ORNode.getNode()); return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode); } @@ -2417,7 +2479,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0), N0.getOperand(0).getValueType(), N0.getOperand(0), N1.getOperand(0)); - AddToWorkList(ORNode.getNode()); + AddToWorklist(ORNode.getNode()); return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode, N0.getOperand(1)); } @@ -2442,7 +2504,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) { SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1); SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op); - AddToWorkList(Op.getNode()); + AddToWorklist(Op.getNode()); return BC; } } @@ -2454,35 +2516,66 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { // The type-legalizer generates this pattern when loading illegal // vector types from memory. In many cases this allows additional shuffle // optimizations. - if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG && - N0.getOperand(1).getOpcode() == ISD::UNDEF && - N1.getOperand(1).getOpcode() == ISD::UNDEF) { + // There are other cases where moving the shuffle after the xor/and/or + // is profitable even if shuffles don't perform a swizzle. + // If both shuffles use the same mask, and both shuffles have the same first + // or second operand, then it might still be profitable to move the shuffle + // after the xor/and/or operation. + if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) { ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0); ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1); - assert(N0.getOperand(0).getValueType() == N1.getOperand(1).getValueType() && + assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() && "Inputs to shuffles are not the same type"); - unsigned NumElts = VT.getVectorNumElements(); - // Check that both shuffles use the same mask. The masks are known to be of // the same length because the result vector type is the same. - bool SameMask = true; - for (unsigned i = 0; i != NumElts; ++i) { - int Idx0 = SVN0->getMaskElt(i); - int Idx1 = SVN1->getMaskElt(i); - if (Idx0 != Idx1) { - SameMask = false; - break; + // Check also that shuffles have only one use to avoid introducing extra + // instructions. + if (SVN0->hasOneUse() && SVN1->hasOneUse() && + SVN0->getMask().equals(SVN1->getMask())) { + SDValue ShOp = N0->getOperand(1); + + // Don't try to fold this node if it requires introducing a + // build vector of all zeros that might be illegal at this stage. + if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) { + if (!LegalTypes) + ShOp = DAG.getConstant(0, VT); + else + ShOp = SDValue(); } - } - if (SameMask) { - SDValue Op = DAG.getNode(N->getOpcode(), SDLoc(N), VT, - N0.getOperand(0), N1.getOperand(0)); - AddToWorkList(Op.getNode()); - return DAG.getVectorShuffle(VT, SDLoc(N), Op, - DAG.getUNDEF(VT), &SVN0->getMask()[0]); + // (AND (shuf (A, C), shuf (B, C)) -> shuf (AND (A, B), C) + // (OR (shuf (A, C), shuf (B, C)) -> shuf (OR (A, B), C) + // (XOR (shuf (A, C), shuf (B, C)) -> shuf (XOR (A, B), V_0) + if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) { + SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT, + N0->getOperand(0), N1->getOperand(0)); + AddToWorklist(NewNode.getNode()); + return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp, + &SVN0->getMask()[0]); + } + + // Don't try to fold this node if it requires introducing a + // build vector of all zeros that might be illegal at this stage. + ShOp = N0->getOperand(0); + if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) { + if (!LegalTypes) + ShOp = DAG.getConstant(0, VT); + else + ShOp = SDValue(); + } + + // (AND (shuf (C, A), shuf (C, B)) -> shuf (C, AND (A, B)) + // (OR (shuf (C, A), shuf (C, B)) -> shuf (C, OR (A, B)) + // (XOR (shuf (C, A), shuf (C, B)) -> shuf (V_0, XOR (A, B)) + if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) { + SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT, + N0->getOperand(1), N1->getOperand(1)); + AddToWorklist(NewNode.getNode()); + return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode, + &SVN0->getMask()[0]); + } } } @@ -2534,7 +2627,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return DAG.getConstant(0, VT); // reassociate and SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1); - if (RAND.getNode() != 0) + if (RAND.getNode()) return RAND; // fold (and (or x, C), D) -> D if (C & D) == D if (N1C && N0.getOpcode() == ISD::OR) @@ -2670,21 +2763,21 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (cast<ConstantSDNode>(LR)->isNullValue() && Op1 == ISD::SETEQ) { SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), LR.getValueType(), LL, RL); - AddToWorkList(ORNode.getNode()); + AddToWorklist(ORNode.getNode()); return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1); } // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1) if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETEQ) { SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0), LR.getValueType(), LL, RL); - AddToWorkList(ANDNode.getNode()); + AddToWorklist(ANDNode.getNode()); return DAG.getSetCC(SDLoc(N), VT, ANDNode, LR, Op1); } // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1) if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETGT) { SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), LR.getValueType(), LL, RL); - AddToWorkList(ORNode.getNode()); + AddToWorklist(ORNode.getNode()); return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1); } } @@ -2697,7 +2790,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { cast<ConstantSDNode>(RR)->isNullValue()))) { SDValue ADDNode = DAG.getNode(ISD::ADD, SDLoc(N0), LL.getValueType(), LL, DAG.getConstant(1, LL.getValueType())); - AddToWorkList(ADDNode.getNode()); + AddToWorklist(ADDNode.getNode()); return DAG.getSetCC(SDLoc(N), VT, ADDNode, DAG.getConstant(2, LL.getValueType()), ISD::SETUGE); } @@ -2745,7 +2838,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(), LN0->getBasePtr(), MemVT, LN0->getMemOperand()); - AddToWorkList(N); + AddToWorklist(N); CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -2765,7 +2858,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(), LN0->getBasePtr(), MemVT, LN0->getMemOperand()); - AddToWorkList(N); + AddToWorklist(N); CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -2796,7 +2889,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, LN0->getChain(), LN0->getBasePtr(), ExtVT, LN0->getMemOperand()); - AddToWorkList(N); + AddToWorklist(N); CombineTo(LN0, NewLoad, NewLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -2823,7 +2916,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { Alignment = MinAlign(Alignment, PtrOff); } - AddToWorkList(NewPtr.getNode()); + AddToWorklist(NewPtr.getNode()); EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; SDValue Load = @@ -2832,7 +2925,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { LN0->getPointerInfo(), ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), Alignment, LN0->getTBAAInfo()); - AddToWorkList(N); + AddToWorklist(N); CombineTo(LN0, Load, Load.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -3067,7 +3160,7 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { if (!TLI.isOperationLegal(ISD::BSWAP, VT)) return SDValue(); - SmallVector<SDNode*,4> Parts(4, (SDNode*)0); + SmallVector<SDNode*,4> Parts(4, (SDNode*)nullptr); // Look for either // (or (or (and), (and)), (or (and), (and))) // (or (or (or (and), (and)), (and)), (and)) @@ -3151,6 +3244,62 @@ SDValue DAGCombiner::visitOR(SDNode *N) { return N0; if (ISD::isBuildVectorAllOnes(N1.getNode())) return N1; + + // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask1) + // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf B, A, Mask2) + // Do this only if the resulting shuffle is legal. + if (isa<ShuffleVectorSDNode>(N0) && + isa<ShuffleVectorSDNode>(N1) && + // Avoid folding a node with illegal type. + TLI.isTypeLegal(VT) && + N0->getOperand(1) == N1->getOperand(1) && + ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode())) { + bool CanFold = true; + unsigned NumElts = VT.getVectorNumElements(); + const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0); + const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1); + // We construct two shuffle masks: + // - Mask1 is a shuffle mask for a shuffle with N0 as the first operand + // and N1 as the second operand. + // - Mask2 is a shuffle mask for a shuffle with N1 as the first operand + // and N0 as the second operand. + // We do this because OR is commutable and therefore there might be + // two ways to fold this node into a shuffle. + SmallVector<int,4> Mask1; + SmallVector<int,4> Mask2; + + for (unsigned i = 0; i != NumElts && CanFold; ++i) { + int M0 = SV0->getMaskElt(i); + int M1 = SV1->getMaskElt(i); + + // Both shuffle indexes are undef. Propagate Undef. + if (M0 < 0 && M1 < 0) { + Mask1.push_back(M0); + Mask2.push_back(M0); + continue; + } + + if (M0 < 0 || M1 < 0 || + (M0 < (int)NumElts && M1 < (int)NumElts) || + (M0 >= (int)NumElts && M1 >= (int)NumElts)) { + CanFold = false; + break; + } + + Mask1.push_back(M0 < (int)NumElts ? M0 : M1 + NumElts); + Mask2.push_back(M1 < (int)NumElts ? M1 : M0 + NumElts); + } + + if (CanFold) { + // Fold this sequence only if the resulting shuffle is 'legal'. + if (TLI.isShuffleMaskLegal(Mask1, VT)) + return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0), + N1->getOperand(0), &Mask1[0]); + if (TLI.isShuffleMaskLegal(Mask2, VT)) + return DAG.getVectorShuffle(VT, SDLoc(N), N1->getOperand(0), + N0->getOperand(0), &Mask2[0]); + } + } } // fold (or x, undef) -> -1 @@ -3177,26 +3326,29 @@ SDValue DAGCombiner::visitOR(SDNode *N) { // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16) SDValue BSwap = MatchBSwapHWord(N, N0, N1); - if (BSwap.getNode() != 0) + if (BSwap.getNode()) return BSwap; BSwap = MatchBSwapHWordLow(N, N0, N1); - if (BSwap.getNode() != 0) + if (BSwap.getNode()) return BSwap; // reassociate or SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1); - if (ROR.getNode() != 0) + if (ROR.getNode()) return ROR; // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2) // iff (c1 & c2) == 0. if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && isa<ConstantSDNode>(N0.getOperand(1))) { ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1)); - if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) + if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) { + SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1); + if (!COR.getNode()) + return SDValue(); return DAG.getNode(ISD::AND, SDLoc(N), VT, DAG.getNode(ISD::OR, SDLoc(N0), VT, - N0.getOperand(0), N1), - DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1)); + N0.getOperand(0), N1), COR); + } } // fold (or (setcc x), (setcc y)) -> (setcc (or x, y)) if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ @@ -3211,7 +3363,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) { (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) { SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR), LR.getValueType(), LL, RL); - AddToWorkList(ORNode.getNode()); + AddToWorklist(ORNode.getNode()); return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1); } // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1) @@ -3220,7 +3372,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) { (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) { SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR), LR.getValueType(), LL, RL); - AddToWorkList(ANDNode.getNode()); + AddToWorklist(ANDNode.getNode()); return DAG.getSetCC(SDLoc(N), VT, ANDNode, LR, Op1); } } @@ -3302,35 +3454,163 @@ static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) { return false; } +// Return true if we can prove that, whenever Neg and Pos are both in the +// range [0, OpSize), Neg == (Pos == 0 ? 0 : OpSize - Pos). This means that +// for two opposing shifts shift1 and shift2 and a value X with OpBits bits: +// +// (or (shift1 X, Neg), (shift2 X, Pos)) +// +// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate +// in direction shift1 by Neg. The range [0, OpSize) means that we only need +// to consider shift amounts with defined behavior. +static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned OpSize) { + // If OpSize is a power of 2 then: + // + // (a) (Pos == 0 ? 0 : OpSize - Pos) == (OpSize - Pos) & (OpSize - 1) + // (b) Neg == Neg & (OpSize - 1) whenever Neg is in [0, OpSize). + // + // So if OpSize is a power of 2 and Neg is (and Neg', OpSize-1), we check + // for the stronger condition: + // + // Neg & (OpSize - 1) == (OpSize - Pos) & (OpSize - 1) [A] + // + // for all Neg and Pos. Since Neg & (OpSize - 1) == Neg' & (OpSize - 1) + // we can just replace Neg with Neg' for the rest of the function. + // + // In other cases we check for the even stronger condition: + // + // Neg == OpSize - Pos [B] + // + // for all Neg and Pos. Note that the (or ...) then invokes undefined + // behavior if Pos == 0 (and consequently Neg == OpSize). + // + // We could actually use [A] whenever OpSize is a power of 2, but the + // only extra cases that it would match are those uninteresting ones + // where Neg and Pos are never in range at the same time. E.g. for + // OpSize == 32, using [A] would allow a Neg of the form (sub 64, Pos) + // as well as (sub 32, Pos), but: + // + // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos)) + // + // always invokes undefined behavior for 32-bit X. + // + // Below, Mask == OpSize - 1 when using [A] and is all-ones otherwise. + unsigned MaskLoBits = 0; + if (Neg.getOpcode() == ISD::AND && + isPowerOf2_64(OpSize) && + Neg.getOperand(1).getOpcode() == ISD::Constant && + cast<ConstantSDNode>(Neg.getOperand(1))->getAPIntValue() == OpSize - 1) { + Neg = Neg.getOperand(0); + MaskLoBits = Log2_64(OpSize); + } + + // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1. + if (Neg.getOpcode() != ISD::SUB) + return 0; + ConstantSDNode *NegC = dyn_cast<ConstantSDNode>(Neg.getOperand(0)); + if (!NegC) + return 0; + SDValue NegOp1 = Neg.getOperand(1); + + // On the RHS of [A], if Pos is Pos' & (OpSize - 1), just replace Pos with + // Pos'. The truncation is redundant for the purpose of the equality. + if (MaskLoBits && + Pos.getOpcode() == ISD::AND && + Pos.getOperand(1).getOpcode() == ISD::Constant && + cast<ConstantSDNode>(Pos.getOperand(1))->getAPIntValue() == OpSize - 1) + Pos = Pos.getOperand(0); + + // The condition we need is now: + // + // (NegC - NegOp1) & Mask == (OpSize - Pos) & Mask + // + // If NegOp1 == Pos then we need: + // + // OpSize & Mask == NegC & Mask + // + // (because "x & Mask" is a truncation and distributes through subtraction). + APInt Width; + if (Pos == NegOp1) + Width = NegC->getAPIntValue(); + // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC. + // Then the condition we want to prove becomes: + // + // (NegC - NegOp1) & Mask == (OpSize - (NegOp1 + PosC)) & Mask + // + // which, again because "x & Mask" is a truncation, becomes: + // + // NegC & Mask == (OpSize - PosC) & Mask + // OpSize & Mask == (NegC + PosC) & Mask + else if (Pos.getOpcode() == ISD::ADD && + Pos.getOperand(0) == NegOp1 && + Pos.getOperand(1).getOpcode() == ISD::Constant) + Width = (cast<ConstantSDNode>(Pos.getOperand(1))->getAPIntValue() + + NegC->getAPIntValue()); + else + return false; + + // Now we just need to check that OpSize & Mask == Width & Mask. + if (MaskLoBits) + // Opsize & Mask is 0 since Mask is Opsize - 1. + return Width.getLoBits(MaskLoBits) == 0; + return Width == OpSize; +} + +// A subroutine of MatchRotate used once we have found an OR of two opposite +// shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces +// to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the +// former being preferred if supported. InnerPos and InnerNeg are Pos and +// Neg with outer conversions stripped away. +SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, + SDValue Neg, SDValue InnerPos, + SDValue InnerNeg, unsigned PosOpcode, + unsigned NegOpcode, SDLoc DL) { + // fold (or (shl x, (*ext y)), + // (srl x, (*ext (sub 32, y)))) -> + // (rotl x, y) or (rotr x, (sub 32, y)) + // + // fold (or (shl x, (*ext (sub 32, y))), + // (srl x, (*ext y))) -> + // (rotr x, y) or (rotl x, (sub 32, y)) + EVT VT = Shifted.getValueType(); + if (matchRotateSub(InnerPos, InnerNeg, VT.getSizeInBits())) { + bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT); + return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted, + HasPos ? Pos : Neg).getNode(); + } + + return nullptr; +} + // MatchRotate - Handle an 'or' of two operands. If this is one of the many // idioms for rotate, and if the target supports rotation instructions, generate // a rot[lr]. SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { // Must be a legal type. Expanded 'n promoted things won't work with rotates. EVT VT = LHS.getValueType(); - if (!TLI.isTypeLegal(VT)) return 0; + if (!TLI.isTypeLegal(VT)) return nullptr; // The target must have at least one rotate flavor. bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT); bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT); - if (!HasROTL && !HasROTR) return 0; + if (!HasROTL && !HasROTR) return nullptr; // Match "(X shl/srl V1) & V2" where V2 may not be present. SDValue LHSShift; // The shift. SDValue LHSMask; // AND value if any. if (!MatchRotateHalf(LHS, LHSShift, LHSMask)) - return 0; // Not part of a rotate. + return nullptr; // Not part of a rotate. SDValue RHSShift; // The shift. SDValue RHSMask; // AND value if any. if (!MatchRotateHalf(RHS, RHSShift, RHSMask)) - return 0; // Not part of a rotate. + return nullptr; // Not part of a rotate. if (LHSShift.getOperand(0) != RHSShift.getOperand(0)) - return 0; // Not shifting the same value. + return nullptr; // Not shifting the same value. if (LHSShift.getOpcode() == RHSShift.getOpcode()) - return 0; // Shifts must disagree. + return nullptr; // Shifts must disagree. // Canonicalize shl to left side in a shl/srl pair. if (RHSShift.getOpcode() == ISD::SHL) { @@ -3342,6 +3622,7 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { unsigned OpSizeInBits = VT.getSizeInBits(); SDValue LHSShiftArg = LHSShift.getOperand(0); SDValue LHSShiftAmt = LHSShift.getOperand(1); + SDValue RHSShiftArg = RHSShift.getOperand(0); SDValue RHSShiftAmt = RHSShift.getOperand(1); // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1) @@ -3351,7 +3632,7 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { uint64_t LShVal = cast<ConstantSDNode>(LHSShiftAmt)->getZExtValue(); uint64_t RShVal = cast<ConstantSDNode>(RHSShiftAmt)->getZExtValue(); if ((LShVal + RShVal) != OpSizeInBits) - return 0; + return nullptr; SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt); @@ -3378,7 +3659,7 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { // If there is a mask here, and we have a variable shift, we can't be sure // that we're masking out the right stuff. if (LHSMask.getNode() || RHSMask.getNode()) - return 0; + return nullptr; // If the shift amount is sign/zext/any-extended just peel it off. SDValue LExtOp0 = LHSShiftAmt; @@ -3395,30 +3676,17 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { RExtOp0 = RHSShiftAmt.getOperand(0); } - if (RExtOp0.getOpcode() == ISD::SUB && RExtOp0.getOperand(1) == LExtOp0) { - // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) -> - // (rotl x, y) - // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) -> - // (rotr x, (sub 32, y)) - if (ConstantSDNode *SUBC = - dyn_cast<ConstantSDNode>(RExtOp0.getOperand(0))) - if (SUBC->getAPIntValue() == OpSizeInBits) - return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg, - HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode(); - } else if (LExtOp0.getOpcode() == ISD::SUB && - RExtOp0 == LExtOp0.getOperand(1)) { - // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) -> - // (rotr x, y) - // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) -> - // (rotl x, (sub 32, y)) - if (ConstantSDNode *SUBC = - dyn_cast<ConstantSDNode>(LExtOp0.getOperand(0))) - if (SUBC->getAPIntValue() == OpSizeInBits) - return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, LHSShiftArg, - HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode(); - } - - return 0; + SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, + LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL); + if (TryL) + return TryL; + + SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, + RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL); + if (TryR) + return TryR; + + return nullptr; } SDValue DAGCombiner::visitXOR(SDNode *N) { @@ -3460,7 +3728,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { return N0; // reassociate xor SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1); - if (RXOR.getNode() != 0) + if (RXOR.getNode()) return RXOR; // fold !(x cc y) -> (x !cc y) @@ -3490,7 +3758,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { SDValue V = N0.getOperand(0); V = DAG.getNode(ISD::XOR, SDLoc(N0), V.getValueType(), V, DAG.getConstant(1, V.getValueType())); - AddToWorkList(V.getNode()); + AddToWorklist(V.getNode()); return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V); } @@ -3502,7 +3770,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND; LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS - AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode()); + AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode()); return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS); } } @@ -3514,7 +3782,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND; LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS - AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode()); + AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode()); return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS); } } @@ -3523,7 +3791,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { N0->getOperand(1) == N1) { SDValue X = N0->getOperand(0); SDValue NotX = DAG.getNOT(SDLoc(X), X, VT); - AddToWorkList(NotX.getNode()); + AddToWorklist(NotX.getNode()); return DAG.getNode(ISD::AND, SDLoc(N), VT, NotX, N1); } // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2)) @@ -3559,7 +3827,11 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { /// visitShiftByConstant - Handle transforms common to the three shifts, when /// the shift amount is a constant. -SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) { +SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) { + // We can't and shouldn't fold opaque constants. + if (Amt->isOpaque()) + return SDValue(); + SDNode *LHS = N->getOperand(0).getNode(); if (!LHS->hasOneUse()) return SDValue(); @@ -3585,9 +3857,9 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) { break; } - // We require the RHS of the binop to be a constant as well. + // We require the RHS of the binop to be a constant and not opaque as well. ConstantSDNode *BinOpCst = dyn_cast<ConstantSDNode>(LHS->getOperand(1)); - if (!BinOpCst) return SDValue(); + if (!BinOpCst || BinOpCst->isOpaque()) return SDValue(); // FIXME: disable this unless the input to the binop is a shift by a constant. // If it is not a shift, it pessimizes some common cases like: @@ -3613,10 +3885,14 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) { return SDValue(); } + if (!TLI.isDesirableToCommuteWithShift(LHS)) + return SDValue(); + // Fold the constants, shifting the binop RHS by the shift amount. SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)), N->getValueType(0), LHS->getOperand(1), N->getOperand(1)); + assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!"); // Create the new shift. SDValue NewShift = DAG.getNode(N->getOpcode(), @@ -3627,18 +3903,74 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) { return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS); } +SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) { + assert(N->getOpcode() == ISD::TRUNCATE); + assert(N->getOperand(0).getOpcode() == ISD::AND); + + // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC) + if (N->hasOneUse() && N->getOperand(0).hasOneUse()) { + SDValue N01 = N->getOperand(0).getOperand(1); + + if (ConstantSDNode *N01C = isConstOrConstSplat(N01)) { + EVT TruncVT = N->getValueType(0); + SDValue N00 = N->getOperand(0).getOperand(0); + APInt TruncC = N01C->getAPIntValue(); + TruncC = TruncC.trunc(TruncVT.getScalarSizeInBits()); + + return DAG.getNode(ISD::AND, SDLoc(N), TruncVT, + DAG.getNode(ISD::TRUNCATE, SDLoc(N), TruncVT, N00), + DAG.getConstant(TruncC, TruncVT)); + } + } + + return SDValue(); +} + +SDValue DAGCombiner::visitRotate(SDNode *N) { + // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))). + if (N->getOperand(1).getOpcode() == ISD::TRUNCATE && + N->getOperand(1).getOperand(0).getOpcode() == ISD::AND) { + SDValue NewOp1 = distributeTruncateThroughAnd(N->getOperand(1).getNode()); + if (NewOp1.getNode()) + return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), + N->getOperand(0), NewOp1); + } + return SDValue(); +} + SDValue DAGCombiner::visitSHL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); EVT VT = N0.getValueType(); - unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); + unsigned OpSizeInBits = VT.getScalarSizeInBits(); // fold vector ops if (VT.isVector()) { SDValue FoldedVOp = SimplifyVBinOp(N); if (FoldedVOp.getNode()) return FoldedVOp; + + BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1); + // If setcc produces all-one true value then: + // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV) + if (N1CV && N1CV->isConstant()) { + if (N0.getOpcode() == ISD::AND) { + SDValue N00 = N0->getOperand(0); + SDValue N01 = N0->getOperand(1); + BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01); + + if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC && + TLI.getBooleanContents(N00.getOperand(0).getValueType()) == + TargetLowering::ZeroOrNegativeOneBooleanContent) { + SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, VT, N01CV, N1CV); + if (C.getNode()) + return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C); + } + } else { + N1C = isConstOrConstSplat(N1); + } + } } // fold (shl c1, c2) -> c1<<c2 @@ -3662,35 +3994,25 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { return DAG.getConstant(0, VT); // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))). if (N1.getOpcode() == ISD::TRUNCATE && - N1.getOperand(0).getOpcode() == ISD::AND && - N1.hasOneUse() && N1.getOperand(0).hasOneUse()) { - SDValue N101 = N1.getOperand(0).getOperand(1); - if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) { - EVT TruncVT = N1.getValueType(); - SDValue N100 = N1.getOperand(0).getOperand(0); - APInt TruncC = N101C->getAPIntValue(); - TruncC = TruncC.trunc(TruncVT.getSizeInBits()); - return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, - DAG.getNode(ISD::AND, SDLoc(N), TruncVT, - DAG.getNode(ISD::TRUNCATE, - SDLoc(N), - TruncVT, N100), - DAG.getConstant(TruncC, TruncVT))); - } + N1.getOperand(0).getOpcode() == ISD::AND) { + SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()); + if (NewOp1.getNode()) + return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1); } if (N1C && SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2)) - if (N1C && N0.getOpcode() == ISD::SHL && - N0.getOperand(1).getOpcode() == ISD::Constant) { - uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); - uint64_t c2 = N1C->getZExtValue(); - if (c1 + c2 >= OpSizeInBits) - return DAG.getConstant(0, VT); - return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0), - DAG.getConstant(c1 + c2, N1.getValueType())); + if (N1C && N0.getOpcode() == ISD::SHL) { + if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { + uint64_t c1 = N0C1->getZExtValue(); + uint64_t c2 = N1C->getZExtValue(); + if (c1 + c2 >= OpSizeInBits) + return DAG.getConstant(0, VT); + return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0), + DAG.getConstant(c1 + c2, N1.getValueType())); + } } // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2))) @@ -3701,20 +4023,21 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND || N0.getOpcode() == ISD::SIGN_EXTEND) && - N0.getOperand(0).getOpcode() == ISD::SHL && - isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) { - uint64_t c1 = - cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue(); - uint64_t c2 = N1C->getZExtValue(); - EVT InnerShiftVT = N0.getOperand(0).getValueType(); - uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits(); - if (c2 >= OpSizeInBits - InnerShiftSize) { - if (c1 + c2 >= OpSizeInBits) - return DAG.getConstant(0, VT); - return DAG.getNode(ISD::SHL, SDLoc(N0), VT, - DAG.getNode(N0.getOpcode(), SDLoc(N0), VT, - N0.getOperand(0)->getOperand(0)), - DAG.getConstant(c1 + c2, N1.getValueType())); + N0.getOperand(0).getOpcode() == ISD::SHL) { + SDValue N0Op0 = N0.getOperand(0); + if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) { + uint64_t c1 = N0Op0C1->getZExtValue(); + uint64_t c2 = N1C->getZExtValue(); + EVT InnerShiftVT = N0Op0.getValueType(); + uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits(); + if (c2 >= OpSizeInBits - InnerShiftSize) { + if (c1 + c2 >= OpSizeInBits) + return DAG.getConstant(0, VT); + return DAG.getNode(ISD::SHL, SDLoc(N0), VT, + DAG.getNode(N0.getOpcode(), SDLoc(N0), VT, + N0Op0->getOperand(0)), + DAG.getConstant(c1 + c2, N1.getValueType())); + } } } @@ -3722,19 +4045,20 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { // Only fold this if the inner zext has no other uses to avoid increasing // the total number of instructions. if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() && - N0.getOperand(0).getOpcode() == ISD::SRL && - isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) { - uint64_t c1 = - cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue(); - if (c1 < VT.getSizeInBits()) { - uint64_t c2 = N1C->getZExtValue(); - if (c1 == c2) { - SDValue NewOp0 = N0.getOperand(0); - EVT CountVT = NewOp0.getOperand(1).getValueType(); - SDValue NewSHL = DAG.getNode(ISD::SHL, SDLoc(N), NewOp0.getValueType(), - NewOp0, DAG.getConstant(c2, CountVT)); - AddToWorkList(NewSHL.getNode()); - return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL); + N0.getOperand(0).getOpcode() == ISD::SRL) { + SDValue N0Op0 = N0.getOperand(0); + if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) { + uint64_t c1 = N0Op0C1->getZExtValue(); + if (c1 < VT.getScalarSizeInBits()) { + uint64_t c2 = N1C->getZExtValue(); + if (c1 == c2) { + SDValue NewOp0 = N0.getOperand(0); + EVT CountVT = NewOp0.getOperand(1).getValueType(); + SDValue NewSHL = DAG.getNode(ISD::SHL, SDLoc(N), NewOp0.getValueType(), + NewOp0, DAG.getConstant(c2, CountVT)); + AddToWorklist(NewSHL.getNode()); + return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL); + } } } } @@ -3743,40 +4067,39 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { // (and (srl x, (sub c1, c2), MASK) // Only fold this if the inner shift has no other uses -- if it does, folding // this will increase the total number of instructions. - if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() && - N0.getOperand(1).getOpcode() == ISD::Constant) { - uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); - if (c1 < VT.getSizeInBits()) { - uint64_t c2 = N1C->getZExtValue(); - APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), - VT.getSizeInBits() - c1); - SDValue Shift; - if (c2 > c1) { - Mask = Mask.shl(c2-c1); - Shift = DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0), - DAG.getConstant(c2-c1, N1.getValueType())); - } else { - Mask = Mask.lshr(c1-c2); - Shift = DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), - DAG.getConstant(c1-c2, N1.getValueType())); + if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { + if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { + uint64_t c1 = N0C1->getZExtValue(); + if (c1 < OpSizeInBits) { + uint64_t c2 = N1C->getZExtValue(); + APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1); + SDValue Shift; + if (c2 > c1) { + Mask = Mask.shl(c2 - c1); + Shift = DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0), + DAG.getConstant(c2 - c1, N1.getValueType())); + } else { + Mask = Mask.lshr(c1 - c2); + Shift = DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), + DAG.getConstant(c1 - c2, N1.getValueType())); + } + return DAG.getNode(ISD::AND, SDLoc(N0), VT, Shift, + DAG.getConstant(Mask, VT)); } - return DAG.getNode(ISD::AND, SDLoc(N0), VT, Shift, - DAG.getConstant(Mask, VT)); } } // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1)) if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) { + unsigned BitSize = VT.getScalarSizeInBits(); SDValue HiBitsMask = - DAG.getConstant(APInt::getHighBitsSet(VT.getSizeInBits(), - VT.getSizeInBits() - - N1C->getZExtValue()), - VT); + DAG.getConstant(APInt::getHighBitsSet(BitSize, + BitSize - N1C->getZExtValue()), VT); return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0), HiBitsMask); } if (N1C) { - SDValue NewSHL = visitShiftByConstant(N, N1C->getZExtValue()); + SDValue NewSHL = visitShiftByConstant(N, N1C); if (NewSHL.getNode()) return NewSHL; } @@ -3796,6 +4119,8 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { if (VT.isVector()) { SDValue FoldedVOp = SimplifyVBinOp(N); if (FoldedVOp.getNode()) return FoldedVOp; + + N1C = isConstOrConstSplat(N1); } // fold (sra c1, c2) -> (sra c1, c2) @@ -3829,11 +4154,12 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2)) if (N1C && N0.getOpcode() == ISD::SRA) { - if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { + if (ConstantSDNode *C1 = isConstOrConstSplat(N0.getOperand(1))) { unsigned Sum = N1C->getZExtValue() + C1->getZExtValue(); - if (Sum >= OpSizeInBits) Sum = OpSizeInBits-1; + if (Sum >= OpSizeInBits) + Sum = OpSizeInBits - 1; return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0), - DAG.getConstant(Sum, N1C->getValueType(0))); + DAG.getConstant(Sum, N1.getValueType())); } } @@ -3842,14 +4168,17 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { // result_size - n != m. // If truncate is free for the target sext(shl) is likely to result in better // code. - if (N0.getOpcode() == ISD::SHL) { + if (N0.getOpcode() == ISD::SHL && N1C) { // Get the two constanst of the shifts, CN0 = m, CN = n. - const ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); - if (N01C && N1C) { + const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1)); + if (N01C) { + LLVMContext &Ctx = *DAG.getContext(); // Determine what the truncate's result bitsize and type would be. - EVT TruncVT = - EVT::getIntegerVT(*DAG.getContext(), - OpSizeInBits - N1C->getZExtValue()); + EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue()); + + if (VT.isVector()) + TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements()); + // Determine the residual right-shift amount. signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue(); @@ -3876,44 +4205,33 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))). if (N1.getOpcode() == ISD::TRUNCATE && - N1.getOperand(0).getOpcode() == ISD::AND && - N1.hasOneUse() && N1.getOperand(0).hasOneUse()) { - SDValue N101 = N1.getOperand(0).getOperand(1); - if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) { - EVT TruncVT = N1.getValueType(); - SDValue N100 = N1.getOperand(0).getOperand(0); - APInt TruncC = N101C->getAPIntValue(); - TruncC = TruncC.trunc(TruncVT.getScalarType().getSizeInBits()); - return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, - DAG.getNode(ISD::AND, SDLoc(N), - TruncVT, - DAG.getNode(ISD::TRUNCATE, - SDLoc(N), - TruncVT, N100), - DAG.getConstant(TruncC, TruncVT))); - } - } - - // fold (sra (trunc (sr x, c1)), c2) -> (trunc (sra x, c1+c2)) + N1.getOperand(0).getOpcode() == ISD::AND) { + SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()); + if (NewOp1.getNode()) + return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1); + } + + // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2)) // if c1 is equal to the number of bits the trunc removes if (N0.getOpcode() == ISD::TRUNCATE && (N0.getOperand(0).getOpcode() == ISD::SRL || N0.getOperand(0).getOpcode() == ISD::SRA) && N0.getOperand(0).hasOneUse() && N0.getOperand(0).getOperand(1).hasOneUse() && - N1C && isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) { - EVT LargeVT = N0.getOperand(0).getValueType(); - ConstantSDNode *LargeShiftAmt = - cast<ConstantSDNode>(N0.getOperand(0).getOperand(1)); - - if (LargeVT.getScalarType().getSizeInBits() - OpSizeInBits == - LargeShiftAmt->getZExtValue()) { - SDValue Amt = - DAG.getConstant(LargeShiftAmt->getZExtValue() + N1C->getZExtValue(), - getShiftAmountTy(N0.getOperand(0).getOperand(0).getValueType())); - SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), LargeVT, - N0.getOperand(0).getOperand(0), Amt); - return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, SRA); + N1C) { + SDValue N0Op0 = N0.getOperand(0); + if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) { + unsigned LargeShiftVal = LargeShift->getZExtValue(); + EVT LargeVT = N0Op0.getValueType(); + + if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) { + SDValue Amt = + DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), + getShiftAmountTy(N0Op0.getOperand(0).getValueType())); + SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), LargeVT, + N0Op0.getOperand(0), Amt); + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, SRA); + } } } @@ -3927,7 +4245,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1); if (N1C) { - SDValue NewSRA = visitShiftByConstant(N, N1C->getZExtValue()); + SDValue NewSRA = visitShiftByConstant(N, N1C); if (NewSRA.getNode()) return NewSRA; } @@ -3947,6 +4265,8 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (VT.isVector()) { SDValue FoldedVOp = SimplifyVBinOp(N); if (FoldedVOp.getNode()) return FoldedVOp; + + N1C = isConstOrConstSplat(N1); } // fold (srl c1, c2) -> c1 >>u c2 @@ -3967,14 +4287,15 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { return DAG.getConstant(0, VT); // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2)) - if (N1C && N0.getOpcode() == ISD::SRL && - N0.getOperand(1).getOpcode() == ISD::Constant) { - uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); - uint64_t c2 = N1C->getZExtValue(); - if (c1 + c2 >= OpSizeInBits) - return DAG.getConstant(0, VT); - return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), - DAG.getConstant(c1 + c2, N1.getValueType())); + if (N1C && N0.getOpcode() == ISD::SRL) { + if (ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1))) { + uint64_t c1 = N01C->getZExtValue(); + uint64_t c2 = N1C->getZExtValue(); + if (c1 + c2 >= OpSizeInBits) + return DAG.getConstant(0, VT); + return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), + DAG.getConstant(c1 + c2, N1.getValueType())); + } } // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2))) @@ -3999,18 +4320,21 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { } // fold (srl (shl x, c), c) -> (and x, cst2) - if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 && - N0.getValueSizeInBits() <= 64) { - uint64_t ShAmt = N1C->getZExtValue()+64-N0.getValueSizeInBits(); - return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0), - DAG.getConstant(~0ULL >> ShAmt, VT)); + if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1) { + unsigned BitSize = N0.getScalarValueSizeInBits(); + if (BitSize <= 64) { + uint64_t ShAmt = N1C->getZExtValue() + 64 - BitSize; + return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0), + DAG.getConstant(~0ULL >> ShAmt, VT)); + } } // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask) if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { // Shifting in all undef bits? EVT SmallVT = N0.getOperand(0).getValueType(); - if (N1C->getZExtValue() >= SmallVT.getSizeInBits()) + unsigned BitSize = SmallVT.getScalarSizeInBits(); + if (N1C->getZExtValue() >= BitSize) return DAG.getUNDEF(VT); if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) { @@ -4018,8 +4342,8 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { SDValue SmallShift = DAG.getNode(ISD::SRL, SDLoc(N0), SmallVT, N0.getOperand(0), DAG.getConstant(ShiftAmt, getShiftAmountTy(SmallVT))); - AddToWorkList(SmallShift.getNode()); - APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits()).lshr(ShiftAmt); + AddToWorklist(SmallShift.getNode()); + APInt Mask = APInt::getAllOnesValue(OpSizeInBits).lshr(ShiftAmt); return DAG.getNode(ISD::AND, SDLoc(N), VT, DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, SmallShift), DAG.getConstant(Mask, VT)); @@ -4028,16 +4352,16 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign // bit, which is unmodified by sra. - if (N1C && N1C->getZExtValue() + 1 == VT.getSizeInBits()) { + if (N1C && N1C->getZExtValue() + 1 == OpSizeInBits) { if (N0.getOpcode() == ISD::SRA) return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1); } // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit). if (N1C && N0.getOpcode() == ISD::CTLZ && - N1C->getAPIntValue() == Log2_32(VT.getSizeInBits())) { + N1C->getAPIntValue() == Log2_32(OpSizeInBits)) { APInt KnownZero, KnownOne; - DAG.ComputeMaskedBits(N0.getOperand(0), KnownZero, KnownOne); + DAG.computeKnownBits(N0.getOperand(0), KnownZero, KnownOne); // If any of the input bits are KnownOne, then the input couldn't be all // zeros, thus the result of the srl will always be zero. @@ -4060,7 +4384,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (ShAmt) { Op = DAG.getNode(ISD::SRL, SDLoc(N0), VT, Op, DAG.getConstant(ShAmt, getShiftAmountTy(Op.getValueType()))); - AddToWorkList(Op.getNode()); + AddToWorklist(Op.getNode()); } return DAG.getNode(ISD::XOR, SDLoc(N), VT, @@ -4070,22 +4394,10 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))). if (N1.getOpcode() == ISD::TRUNCATE && - N1.getOperand(0).getOpcode() == ISD::AND && - N1.hasOneUse() && N1.getOperand(0).hasOneUse()) { - SDValue N101 = N1.getOperand(0).getOperand(1); - if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) { - EVT TruncVT = N1.getValueType(); - SDValue N100 = N1.getOperand(0).getOperand(0); - APInt TruncC = N101C->getAPIntValue(); - TruncC = TruncC.trunc(TruncVT.getSizeInBits()); - return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, - DAG.getNode(ISD::AND, SDLoc(N), - TruncVT, - DAG.getNode(ISD::TRUNCATE, - SDLoc(N), - TruncVT, N100), - DAG.getConstant(TruncC, TruncVT))); - } + N1.getOperand(0).getOpcode() == ISD::AND) { + SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()); + if (NewOp1.getNode()) + return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1); } // fold operands of srl based on knowledge that the low bits are not @@ -4094,7 +4406,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { return SDValue(N, 0); if (N1C) { - SDValue NewSRL = visitShiftByConstant(N, N1C->getZExtValue()); + SDValue NewSRL = visitShiftByConstant(N, N1C); if (NewSRL.getNode()) return NewSRL; } @@ -4124,12 +4436,12 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (N->hasOneUse()) { SDNode *Use = *N->use_begin(); if (Use->getOpcode() == ISD::BRCOND) - AddToWorkList(Use); + AddToWorklist(Use); else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) { // Also look pass the truncate. Use = *Use->use_begin(); if (Use->getOpcode() == ISD::BRCOND) - AddToWorkList(Use); + AddToWorklist(Use); } } @@ -4209,11 +4521,20 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { if (VT == MVT::i1 && N1C && N1C->getAPIntValue() == 1) return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2); // fold (select C, 0, 1) -> (xor C, 1) + // We can't do this reliably if integer based booleans have different contents + // to floating point based booleans. This is because we can't tell whether we + // have an integer-based boolean or a floating-point-based boolean unless we + // can find the SETCC that produced it and inspect its operands. This is + // fairly easy if C is the SETCC node, but it can potentially be + // undiscoverable (or not reasonably discoverable). For example, it could be + // in another basic block or it could require searching a complicated + // expression. if (VT.isInteger() && - (VT0 == MVT::i1 || - (VT0.isInteger() && - TLI.getBooleanContents(false) == - TargetLowering::ZeroOrOneBooleanContent)) && + (VT0 == MVT::i1 || (VT0.isInteger() && + TLI.getBooleanContents(false, false) == + TLI.getBooleanContents(false, true) && + TLI.getBooleanContents(false, false) == + TargetLowering::ZeroOrOneBooleanContent)) && N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) { SDValue XORNode; if (VT == VT0) @@ -4221,7 +4542,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { N0, DAG.getConstant(1, VT0)); XORNode = DAG.getNode(ISD::XOR, SDLoc(N0), VT0, N0, DAG.getConstant(1, VT0)); - AddToWorkList(XORNode.getNode()); + AddToWorklist(XORNode.getNode()); if (VT.bitsGT(VT0)) return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, XORNode); return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, XORNode); @@ -4229,13 +4550,13 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { // fold (select C, 0, X) -> (and (not C), X) if (VT == VT0 && VT == MVT::i1 && N1C && N1C->isNullValue()) { SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT); - AddToWorkList(NOTNode.getNode()); + AddToWorklist(NOTNode.getNode()); return DAG.getNode(ISD::AND, SDLoc(N), VT, NOTNode, N2); } // fold (select C, X, 1) -> (or (not C), X) if (VT == VT0 && VT == MVT::i1 && N2C && N2C->getAPIntValue() == 1) { SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT); - AddToWorkList(NOTNode.getNode()); + AddToWorklist(NOTNode.getNode()); return DAG.getNode(ISD::OR, SDLoc(N), VT, NOTNode, N1); } // fold (select C, X, 0) -> (and C, X) @@ -4256,12 +4577,9 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { // fold selects based on a setcc into other things, such as min/max/abs if (N0.getOpcode() == ISD::SETCC) { - // FIXME: - // Check against MVT::Other for SELECT_CC, which is a workaround for targets - // having to say they don't support SELECT_CC on every type the DAG knows - // about, since there is no way to mark an opcode illegal at all value types - if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other) && - TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) + if ((!LegalOperations && + TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) || + TLI.isOperationLegal(ISD::SELECT_CC, VT)) return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1), N1, N2, N0.getOperand(2)); @@ -4275,12 +4593,12 @@ static std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) { SDLoc DL(N); EVT LoVT, HiVT; - llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); // Split the inputs. SDValue Lo, Hi, LL, LH, RL, RH; - llvm::tie(LL, LH) = DAG.SplitVectorOperand(N, 0); - llvm::tie(RL, RH) = DAG.SplitVectorOperand(N, 1); + std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0); + std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1); Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2)); Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2)); @@ -4288,6 +4606,56 @@ std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) { return std::make_pair(Lo, Hi); } +// This function assumes all the vselect's arguments are CONCAT_VECTOR +// nodes and that the condition is a BV of ConstantSDNodes (or undefs). +static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) { + SDLoc dl(N); + SDValue Cond = N->getOperand(0); + SDValue LHS = N->getOperand(1); + SDValue RHS = N->getOperand(2); + MVT VT = N->getSimpleValueType(0); + int NumElems = VT.getVectorNumElements(); + assert(LHS.getOpcode() == ISD::CONCAT_VECTORS && + RHS.getOpcode() == ISD::CONCAT_VECTORS && + Cond.getOpcode() == ISD::BUILD_VECTOR); + + // We're sure we have an even number of elements due to the + // concat_vectors we have as arguments to vselect. + // Skip BV elements until we find one that's not an UNDEF + // After we find an UNDEF element, keep looping until we get to half the + // length of the BV and see if all the non-undef nodes are the same. + ConstantSDNode *BottomHalf = nullptr; + for (int i = 0; i < NumElems / 2; ++i) { + if (Cond->getOperand(i)->getOpcode() == ISD::UNDEF) + continue; + + if (BottomHalf == nullptr) + BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i)); + else if (Cond->getOperand(i).getNode() != BottomHalf) + return SDValue(); + } + + // Do the same for the second half of the BuildVector + ConstantSDNode *TopHalf = nullptr; + for (int i = NumElems / 2; i < NumElems; ++i) { + if (Cond->getOperand(i)->getOpcode() == ISD::UNDEF) + continue; + + if (TopHalf == nullptr) + TopHalf = cast<ConstantSDNode>(Cond.getOperand(i)); + else if (Cond->getOperand(i).getNode() != TopHalf) + return SDValue(); + } + + assert(TopHalf && BottomHalf && + "One half of the selector was all UNDEFs and the other was all the " + "same value. This should have been addressed before this function."); + return DAG.getNode( + ISD::CONCAT_VECTORS, dl, VT, + BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0), + TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1)); +} + SDValue DAGCombiner::visitVSELECT(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -4319,8 +4687,8 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { ISD::SRA, DL, VT, LHS, DAG.getConstant(VT.getScalarType().getSizeInBits() - 1, VT)); SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift); - AddToWorkList(Shift.getNode()); - AddToWorkList(Add.getNode()); + AddToWorklist(Shift.getNode()); + AddToWorklist(Add.getNode()); return DAG.getNode(ISD::XOR, DL, VT, Add, Shift); } } @@ -4338,21 +4706,39 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { return SDValue(); SDValue Lo, Hi, CCLo, CCHi, LL, LH, RL, RH; - llvm::tie(CCLo, CCHi) = SplitVSETCC(N0.getNode(), DAG); - llvm::tie(LL, LH) = DAG.SplitVectorOperand(N, 1); - llvm::tie(RL, RH) = DAG.SplitVectorOperand(N, 2); + std::tie(CCLo, CCHi) = SplitVSETCC(N0.getNode(), DAG); + std::tie(LL, LH) = DAG.SplitVectorOperand(N, 1); + std::tie(RL, RH) = DAG.SplitVectorOperand(N, 2); Lo = DAG.getNode(N->getOpcode(), DL, LL.getValueType(), CCLo, LL, RL); Hi = DAG.getNode(N->getOpcode(), DL, LH.getValueType(), CCHi, LH, RH); // Add the new VSELECT nodes to the work list in case they need to be split // again. - AddToWorkList(Lo.getNode()); - AddToWorkList(Hi.getNode()); + AddToWorklist(Lo.getNode()); + AddToWorklist(Hi.getNode()); return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); } + // Fold (vselect (build_vector all_ones), N1, N2) -> N1 + if (ISD::isBuildVectorAllOnes(N0.getNode())) + return N1; + // Fold (vselect (build_vector all_zeros), N1, N2) -> N2 + if (ISD::isBuildVectorAllZeros(N0.getNode())) + return N2; + + // The ConvertSelectToConcatVector function is assuming both the above + // checks for (vselect (build_vector all{ones,zeros) ...) have been made + // and addressed. + if (N1.getOpcode() == ISD::CONCAT_VECTORS && + N2.getOpcode() == ISD::CONCAT_VECTORS && + ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) { + SDValue CV = ConvertSelectToConcatVector(N, DAG); + if (CV.getNode()) + return CV; + } + return SDValue(); } @@ -4372,7 +4758,7 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) { SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1, CC, SDLoc(N), false); if (SCC.getNode()) { - AddToWorkList(SCC.getNode()); + AddToWorklist(SCC.getNode()); if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) { if (!SCCC->isNullValue()) @@ -4402,6 +4788,65 @@ SDValue DAGCombiner::visitSETCC(SDNode *N) { SDLoc(N)); } +// tryToFoldExtendOfConstant - Try to fold a sext/zext/aext +// dag node into a ConstantSDNode or a build_vector of constants. +// This function is called by the DAGCombiner when visiting sext/zext/aext +// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND). +// Vector extends are not folded if operations are legal; this is to +// avoid introducing illegal build_vector dag nodes. +static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, + SelectionDAG &DAG, bool LegalTypes, + bool LegalOperations) { + unsigned Opcode = N->getOpcode(); + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || + Opcode == ISD::ANY_EXTEND) && "Expected EXTEND dag node in input!"); + + // fold (sext c1) -> c1 + // fold (zext c1) -> c1 + // fold (aext c1) -> c1 + if (isa<ConstantSDNode>(N0)) + return DAG.getNode(Opcode, SDLoc(N), VT, N0).getNode(); + + // fold (sext (build_vector AllConstants) -> (build_vector AllConstants) + // fold (zext (build_vector AllConstants) -> (build_vector AllConstants) + // fold (aext (build_vector AllConstants) -> (build_vector AllConstants) + EVT SVT = VT.getScalarType(); + if (!(VT.isVector() && + (!LegalTypes || (!LegalOperations && TLI.isTypeLegal(SVT))) && + ISD::isBuildVectorOfConstantSDNodes(N0.getNode()))) + return nullptr; + + // We can fold this node into a build_vector. + unsigned VTBits = SVT.getSizeInBits(); + unsigned EVTBits = N0->getValueType(0).getScalarType().getSizeInBits(); + unsigned ShAmt = VTBits - EVTBits; + SmallVector<SDValue, 8> Elts; + unsigned NumElts = N0->getNumOperands(); + SDLoc DL(N); + + for (unsigned i=0; i != NumElts; ++i) { + SDValue Op = N0->getOperand(i); + if (Op->getOpcode() == ISD::UNDEF) { + Elts.push_back(DAG.getUNDEF(SVT)); + continue; + } + + ConstantSDNode *CurrentND = cast<ConstantSDNode>(Op); + const APInt &C = APInt(VTBits, CurrentND->getAPIntValue().getZExtValue()); + if (Opcode == ISD::SIGN_EXTEND) + Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt).getZExtValue(), + SVT)); + else + Elts.push_back(DAG.getConstant(C.shl(ShAmt).lshr(ShAmt).getZExtValue(), + SVT)); + } + + return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Elts).getNode(); +} + // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this: // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))" // transformation. Returns true if extension are possible and the above @@ -4483,8 +4928,7 @@ void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, } Ops.push_back(SetCC->getOperand(2)); - CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), - &Ops[0], Ops.size())); + CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops)); } } @@ -4492,9 +4936,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); - // fold (sext c1) -> c1 - if (isa<ConstantSDNode>(N0)) - return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N0); + if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, + LegalOperations)) + return SDValue(Res, 0); // fold (sext (sext x)) -> (sext x) // fold (sext (aext x)) -> (sext x) @@ -4511,7 +4955,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { if (NarrowLoad.getNode() != N0.getNode()) { CombineTo(N0.getNode(), NarrowLoad); // CombineTo deleted the truncate, if needed, but not what's under it. - AddToWorkList(oye); + AddToWorklist(oye); } return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -4558,6 +5002,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // on vectors in one instruction. We only perform this transformation on // scalars. if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && + ISD::isUNINDEXEDLoad(N0.getNode()) && ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()))) { bool DoXform = true; @@ -4610,7 +5055,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()) && (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0)); - if (LN0->getExtensionType() != ISD::ZEXTLOAD) { + if (LN0->getExtensionType() != ISD::ZEXTLOAD && LN0->isUnindexed()) { bool DoXform = true; SmallVector<SDNode*, 4> SetCCs; if (!N0.hasOneUse()) @@ -4638,12 +5083,12 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { } if (N0.getOpcode() == ISD::SETCC) { + EVT N0VT = N0.getOperand(0).getValueType(); // sext(setcc) -> sext_in_reg(vsetcc) for vectors. // Only do this before legalize for now. if (VT.isVector() && !LegalOperations && - TLI.getBooleanContents(true) == - TargetLowering::ZeroOrNegativeOneBooleanContent) { - EVT N0VT = N0.getOperand(0).getValueType(); + TLI.getBooleanContents(N0VT) == + TargetLowering::ZeroOrNegativeOneBooleanContent) { // On some architectures (such as SSE/NEON/etc) the SETCC result type is // of the same size as the compared operands. Only optimize sext(setcc()) // if this is the case. @@ -4671,7 +5116,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { } } - // sext(setcc x, y, cc) -> (select_cc x, y, -1, 0, cc) + // sext(setcc x, y, cc) -> (select (setcc x, y, cc), -1, 0) unsigned ElementWidth = VT.getScalarType().getSizeInBits(); SDValue NegOne = DAG.getConstant(APInt::getAllOnesValue(ElementWidth), VT); @@ -4680,15 +5125,21 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { NegOne, DAG.getConstant(0, VT), cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); if (SCC.getNode()) return SCC; - if (!VT.isVector() && - (!LegalOperations || - TLI.isOperationLegal(ISD::SETCC, getSetCCResultType(VT)))) { - return DAG.getSelect(SDLoc(N), VT, - DAG.getSetCC(SDLoc(N), - getSetCCResultType(VT), - N0.getOperand(0), N0.getOperand(1), - cast<CondCodeSDNode>(N0.getOperand(2))->get()), - NegOne, DAG.getConstant(0, VT)); + + if (!VT.isVector()) { + EVT SetCCVT = getSetCCResultType(N0.getOperand(0).getValueType()); + if (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, SetCCVT)) { + SDLoc DL(N); + ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); + SDValue SetCC = DAG.getSetCC(DL, + SetCCVT, + N0.getOperand(0), N0.getOperand(1), CC); + EVT SelectVT = getSetCCResultType(VT); + return DAG.getSelect(DL, VT, + DAG.getSExtOrTrunc(SetCC, DL, SelectVT), + NegOne, DAG.getConstant(0, VT)); + + } } } @@ -4703,13 +5154,13 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // isTruncateOf - If N is a truncate of some other value, return true, record // the value being truncated in Op and which of Op's bits are zero in KnownZero. // This function computes KnownZero to avoid a duplicated call to -// ComputeMaskedBits in the caller. +// computeKnownBits in the caller. static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, APInt &KnownZero) { APInt KnownOne; if (N->getOpcode() == ISD::TRUNCATE) { Op = N->getOperand(0); - DAG.ComputeMaskedBits(Op, KnownZero, KnownOne); + DAG.computeKnownBits(Op, KnownZero, KnownOne); return true; } @@ -4730,7 +5181,7 @@ static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, else return false; - DAG.ComputeMaskedBits(Op, KnownZero, KnownOne); + DAG.computeKnownBits(Op, KnownZero, KnownOne); if (!(KnownZero | APInt(Op.getValueSizeInBits(), 1)).isAllOnesValue()) return false; @@ -4742,9 +5193,10 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); - // fold (zext c1) -> c1 - if (isa<ConstantSDNode>(N0)) - return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0); + if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, + LegalOperations)) + return SDValue(Res, 0); + // fold (zext (zext x)) -> (zext x) // fold (zext (aext x)) -> (zext x) if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) @@ -4784,7 +5236,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { if (NarrowLoad.getNode() != N0.getNode()) { CombineTo(N0.getNode(), NarrowLoad); // CombineTo deleted the truncate, if needed, but not what's under it. - AddToWorkList(oye); + AddToWorklist(oye); } return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -4802,7 +5254,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { if (NarrowLoad.getNode() != N0.getNode()) { CombineTo(N0.getNode(), NarrowLoad); // CombineTo deleted the truncate, if needed, but not what's under it. - AddToWorkList(oye); + AddToWorklist(oye); } return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -4810,10 +5262,10 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { SDValue Op = N0.getOperand(0); if (Op.getValueType().bitsLT(VT)) { Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Op); - AddToWorkList(Op.getNode()); + AddToWorklist(Op.getNode()); } else if (Op.getValueType().bitsGT(VT)) { Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op); - AddToWorkList(Op.getNode()); + AddToWorklist(Op.getNode()); } return DAG.getZeroExtendInReg(Op, SDLoc(N), N0.getValueType().getScalarType()); @@ -4844,6 +5296,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { // on vectors in one instruction. We only perform this transformation on // scalars. if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && + ISD::isUNINDEXEDLoad(N0.getNode()) && ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()))) { bool DoXform = true; @@ -4876,7 +5329,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()) && (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0)); - if (LN0->getExtensionType() != ISD::SEXTLOAD) { + if (LN0->getExtensionType() != ISD::SEXTLOAD && LN0->isUnindexed()) { bool DoXform = true; SmallVector<SDNode*, 4> SetCCs; if (!N0.hasOneUse()) @@ -4925,10 +5378,14 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { } if (N0.getOpcode() == ISD::SETCC) { - if (!LegalOperations && VT.isVector()) { + if (!LegalOperations && VT.isVector() && + N0.getValueType().getVectorElementType() == MVT::i1) { + EVT N0VT = N0.getOperand(0).getValueType(); + if (getSetCCResultType(N0VT) == N0.getValueType()) + return SDValue(); + // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors. // Only do this before legalize for now. - EVT N0VT = N0.getOperand(0).getValueType(); EVT EltVT = VT.getVectorElementType(); SmallVector<SDValue,8> OneOps(VT.getVectorNumElements(), DAG.getConstant(1, EltVT)); @@ -4943,7 +5400,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { N0.getOperand(1), cast<CondCodeSDNode>(N0.getOperand(2))->get()), DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, - &OneOps[0], OneOps.size())); + OneOps)); // If the desired elements are smaller or larger than the source // elements we can use a matching integer vector type and then @@ -4960,8 +5417,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { cast<CondCodeSDNode>(N0.getOperand(2))->get()); return DAG.getNode(ISD::AND, SDLoc(N), VT, DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT), - DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, - &OneOps[0], OneOps.size())); + DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, OneOps)); } // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc @@ -5007,9 +5463,10 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); - // fold (aext c1) -> c1 - if (isa<ConstantSDNode>(N0)) - return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, N0); + if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, + LegalOperations)) + return SDValue(Res, 0); + // fold (aext (aext x)) -> (aext x) // fold (aext (zext x)) -> (zext x) // fold (aext (sext x)) -> (sext x) @@ -5027,7 +5484,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { if (NarrowLoad.getNode() != N0.getNode()) { CombineTo(N0.getNode(), NarrowLoad); // CombineTo deleted the truncate, if needed, but not what's under it. - AddToWorkList(oye); + AddToWorklist(oye); } return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -5067,8 +5524,8 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { // on vectors in one instruction. We only perform this transformation on // scalars. if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && - ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || - TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) { + ISD::isUNINDEXEDLoad(N0.getNode()) && + TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType())) { bool DoXform = true; SmallVector<SDNode*, 4> SetCCs; if (!N0.hasOneUse()) @@ -5096,20 +5553,26 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); + ISD::LoadExtType ExtType = LN0->getExtensionType(); EVT MemVT = LN0->getMemoryVT(); - SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(N), - VT, LN0->getChain(), LN0->getBasePtr(), - MemVT, LN0->getMemOperand()); - CombineTo(N, ExtLoad); - CombineTo(N0.getNode(), - DAG.getNode(ISD::TRUNCATE, SDLoc(N0), - N0.getValueType(), ExtLoad), - ExtLoad.getValue(1)); - return SDValue(N, 0); // Return N so it doesn't get rechecked! + if (!LegalOperations || TLI.isLoadExtLegal(ExtType, MemVT)) { + SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N), + VT, LN0->getChain(), LN0->getBasePtr(), + MemVT, LN0->getMemOperand()); + CombineTo(N, ExtLoad); + CombineTo(N0.getNode(), + DAG.getNode(ISD::TRUNCATE, SDLoc(N0), + N0.getValueType(), ExtLoad), + ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } } if (N0.getOpcode() == ISD::SETCC) { - // aext(setcc) -> sext_in_reg(vsetcc) for vectors. + // For vectors: + // aext(setcc) -> vsetcc + // aext(setcc) -> truncate(vsetcc) + // aext(setcc) -> aext(vsetcc) // Only do this before legalize for now. if (VT.isVector() && !LegalOperations) { EVT N0VT = N0.getOperand(0).getValueType(); @@ -5124,19 +5587,14 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { cast<CondCodeSDNode>(N0.getOperand(2))->get()); // If the desired elements are smaller or larger than the source // elements we can use a matching integer vector type and then - // truncate/sign extend + // truncate/any extend else { - EVT MatchingElementType = - EVT::getIntegerVT(*DAG.getContext(), - N0VT.getScalarType().getSizeInBits()); - EVT MatchingVectorType = - EVT::getVectorVT(*DAG.getContext(), MatchingElementType, - N0VT.getVectorNumElements()); + EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger(); SDValue VsetCC = DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0), N0.getOperand(1), cast<CondCodeSDNode>(N0.getOperand(2))->get()); - return DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT); + return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT); } } @@ -5160,7 +5618,7 @@ SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) { default: break; case ISD::Constant: { const ConstantSDNode *CV = cast<ConstantSDNode>(V.getNode()); - assert(CV != 0 && "Const value should be ConstSDNode."); + assert(CV && "Const value should be ConstSDNode."); const APInt &CVal = CV->getAPIntValue(); APInt NewVal = CVal & Mask; if (NewVal != CVal) @@ -5324,7 +5782,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LN0), PtrType, LN0->getBasePtr(), DAG.getConstant(PtrOff, PtrType)); - AddToWorkList(NewPtr.getNode()); + AddToWorklist(NewPtr.getNode()); SDValue Load; if (ExtType == ISD::NON_EXTLOAD) @@ -5339,7 +5797,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { NewAlign, LN0->getTBAAInfo()); // Replace the old load's chain with the new load's chain. - WorkListRemover DeadNodes(*this); + WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1)); // Shift the result left, if we've swallowed a left shift. @@ -5438,7 +5896,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { LN0->getMemOperand()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); - AddToWorkList(ExtLoad.getNode()); + AddToWorklist(ExtLoad.getNode()); return SDValue(N, 0); // Return N so it doesn't get rechecked! } // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use @@ -5461,11 +5919,34 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) { SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), N0.getOperand(1), false); - if (BSwap.getNode() != 0) + if (BSwap.getNode()) return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, BSwap, N1); } + // Fold a sext_inreg of a build_vector of ConstantSDNodes or undefs + // into a build_vector. + if (ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) { + SmallVector<SDValue, 8> Elts; + unsigned NumElts = N0->getNumOperands(); + unsigned ShAmt = VTBits - EVTBits; + + for (unsigned i = 0; i != NumElts; ++i) { + SDValue Op = N0->getOperand(i); + if (Op->getOpcode() == ISD::UNDEF) { + Elts.push_back(Op); + continue; + } + + ConstantSDNode *CurrentND = cast<ConstantSDNode>(Op); + const APInt &C = APInt(VTBits, CurrentND->getAPIntValue().getZExtValue()); + Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt).getZExtValue(), + Op.getValueType())); + } + + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Elts); + } + return SDValue(); } @@ -5510,7 +5991,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { // creates this pattern) and before operation legalization after which // we need to be more careful about the vector instructions that we generate. if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && - LegalTypes && !LegalOperations && N0->hasOneUse()) { + LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) { EVT VecTy = N0.getOperand(0).getValueType(); EVT ExTy = N0.getValueType(); @@ -5537,6 +6018,19 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { } } + // trunc (select c, a, b) -> select c, (trunc a), (trunc b) + if (N0.getOpcode() == ISD::SELECT) { + EVT SrcVT = N0.getValueType(); + if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) && + TLI.isTruncateFree(SrcVT, VT)) { + SDLoc SL(N0); + SDValue Cond = N0.getOperand(0); + SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1)); + SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2)); + return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1); + } + } + // Fold a series of buildvector, bitcast, and truncate if possible. // For example fold // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to @@ -5564,8 +6058,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset) Opnds.push_back(BuildVect.getOperand(i)); - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, &Opnds[0], - Opnds.size()); + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds); } } @@ -5587,6 +6080,20 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { SDValue Reduced = ReduceLoadWidth(N); if (Reduced.getNode()) return Reduced; + // Handle the case where the load remains an extending load even + // after truncation. + if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + if (!LN0->isVolatile() && + LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) { + SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0), + VT, LN0->getChain(), LN0->getBasePtr(), + LN0->getMemoryVT(), + LN0->getMemOperand()); + DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1)); + return NewLoad; + } + } } // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)), // where ... are all 'undef'. @@ -5623,11 +6130,10 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { continue; } SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V); - AddToWorkList(NV.getNode()); + AddToWorklist(NV.getNode()); Opnds.push_back(NV); } - return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, - &Opnds[0], Opnds.size()); + return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds); } } @@ -5654,8 +6160,7 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0)); LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1)); if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() || - LD1->getPointerInfo().getAddrSpace() != - LD2->getPointerInfo().getAddrSpace()) + LD1->getAddressSpace() != LD2->getAddressSpace()) return SDValue(); EVT LD1VT = LD1->getValueType(0); @@ -5691,14 +6196,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { if (!LegalTypes && N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() && VT.isVector()) { - bool isSimple = true; - for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) - if (N0.getOperand(i).getOpcode() != ISD::UNDEF && - N0.getOperand(i).getOpcode() != ISD::Constant && - N0.getOperand(i).getOpcode() != ISD::ConstantFP) { - isSimple = false; - break; - } + bool isSimple = cast<BuildVectorSDNode>(N0)->isConstant(); EVT DestEltVT = N->getValueType(0).getVectorElementType(); assert(!DestEltVT.isVector() && @@ -5734,6 +6232,9 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && // Do not change the width of a volatile load. !cast<LoadSDNode>(N0)->isVolatile() && + // Do not remove the cast if the types differ in endian layout. + TLI.hasBigEndianPartOrdering(N0.getValueType()) == + TLI.hasBigEndianPartOrdering(VT) && (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) && TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); @@ -5747,7 +6248,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { LN0->isVolatile(), LN0->isNonTemporal(), LN0->isInvariant(), OrigAlign, LN0->getTBAAInfo()); - AddToWorkList(N); + AddToWorklist(N); CombineTo(N0.getNode(), DAG.getNode(ISD::BITCAST, SDLoc(N0), N0.getValueType(), Load), @@ -5765,7 +6266,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { !VT.isVector() && !N0.getValueType().isVector()) { SDValue NewConv = DAG.getNode(ISD::BITCAST, SDLoc(N0), VT, N0.getOperand(0)); - AddToWorkList(NewConv.getNode()); + AddToWorklist(NewConv.getNode()); APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); if (N0.getOpcode() == ISD::FNEG) @@ -5788,34 +6289,34 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { if (isTypeLegal(IntXVT)) { SDValue X = DAG.getNode(ISD::BITCAST, SDLoc(N0), IntXVT, N0.getOperand(1)); - AddToWorkList(X.getNode()); + AddToWorklist(X.getNode()); // If X has a different width than the result/lhs, sext it or truncate it. unsigned VTWidth = VT.getSizeInBits(); if (OrigXWidth < VTWidth) { X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X); - AddToWorkList(X.getNode()); + AddToWorklist(X.getNode()); } else if (OrigXWidth > VTWidth) { // To get the sign bit in the right place, we have to shift it right // before truncating. X = DAG.getNode(ISD::SRL, SDLoc(X), X.getValueType(), X, DAG.getConstant(OrigXWidth-VTWidth, X.getValueType())); - AddToWorkList(X.getNode()); + AddToWorklist(X.getNode()); X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X); - AddToWorkList(X.getNode()); + AddToWorklist(X.getNode()); } APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); X = DAG.getNode(ISD::AND, SDLoc(X), VT, X, DAG.getConstant(SignBit, VT)); - AddToWorkList(X.getNode()); + AddToWorklist(X.getNode()); SDValue Cst = DAG.getNode(ISD::BITCAST, SDLoc(N0), VT, N0.getOperand(0)); Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT, Cst, DAG.getConstant(~SignBit, VT)); - AddToWorkList(Cst.getNode()); + AddToWorklist(Cst.getNode()); return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst); } @@ -5871,10 +6372,9 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op); Ops.push_back(DAG.getNode(ISD::BITCAST, SDLoc(BV), DstEltVT, Op)); - AddToWorkList(Ops.back().getNode()); + AddToWorklist(Ops.back().getNode()); } - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, - &Ops[0], Ops.size()); + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops); } // Otherwise, we're growing or shrinking the elements. To avoid having to @@ -5930,8 +6430,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { } EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size()); - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, - &Ops[0], Ops.size()); + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops); } // Finally, this must be the case where we are shrinking elements: each input @@ -5967,8 +6466,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { std::reverse(Ops.end()-NumOutputsPerInput, Ops.end()); } - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, - &Ops[0], Ops.size()); + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops); } SDValue DAGCombiner::visitFADD(SDNode *N) { @@ -6389,7 +6887,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { if (N1CFP->isExactlyValue(-1.0) && (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) { SDValue RHSNeg = DAG.getNode(ISD::FNEG, dl, VT, N0); - AddToWorkList(RHSNeg.getNode()); + AddToWorklist(RHSNeg.getNode()); return DAG.getNode(ISD::FADD, dl, VT, N2, RHSNeg); } } @@ -6551,12 +7049,8 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0); } - // The next optimizations are desireable only if SELECT_CC can be lowered. - // Check against MVT::Other for SELECT_CC, which is a workaround for targets - // having to say they don't support SELECT_CC on every type the DAG knows - // about, since there is no way to mark an opcode illegal at all value types - // (See also visitSELECT) - if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other)) { + // The next optimizations are desirable only if SELECT_CC can be lowered. + if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) { // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc) if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 && !VT.isVector() && @@ -6566,7 +7060,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { { N0.getOperand(0), N0.getOperand(1), DAG.getConstantFP(-1.0, VT) , DAG.getConstantFP(0.0, VT), N0.getOperand(2) }; - return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops, 5); + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops); } // fold (sint_to_fp (zext (setcc x, y, cc))) -> @@ -6579,7 +7073,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1), DAG.getConstantFP(1.0, VT) , DAG.getConstantFP(0.0, VT), N0.getOperand(0).getOperand(2) }; - return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops, 5); + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops); } } @@ -6608,12 +7102,8 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0); } - // The next optimizations are desireable only if SELECT_CC can be lowered. - // Check against MVT::Other for SELECT_CC, which is a workaround for targets - // having to say they don't support SELECT_CC on every type the DAG knows - // about, since there is no way to mark an opcode illegal at all value types - // (See also visitSELECT) - if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other)) { + // The next optimizations are desirable only if SELECT_CC can be lowered. + if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) { // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc) if (N0.getOpcode() == ISD::SETCC && !VT.isVector() && @@ -6623,7 +7113,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { { N0.getOperand(0), N0.getOperand(1), DAG.getConstantFP(1.0, VT), DAG.getConstantFP(0.0, VT), N0.getOperand(2) }; - return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops, 5); + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops); } } @@ -6681,7 +7171,7 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) { SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT, N0.getOperand(0), N1); - AddToWorkList(Tmp.getNode()); + AddToWorklist(Tmp.getNode()); return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, Tmp, N0.getOperand(1)); } @@ -6732,8 +7222,7 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { // fold (fpext (load x)) -> (fpext (fptrunc (extload x))) if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && - ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || - TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) { + TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType())) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, LN0->getChain(), @@ -6765,6 +7254,8 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { // Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading // constant pool values. + // TODO: We can also optimize for vectors here, but we need to make sure + // that the sign mask is created properly for each vector element. if (!TLI.isFNegFree(VT) && N0.getOpcode() == ISD::BITCAST && !VT.isVector() && N0.getNode()->hasOneUse() && @@ -6774,7 +7265,7 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { if (IntVT.isInteger() && !IntVT.isVector()) { Int = DAG.getNode(ISD::XOR, SDLoc(N0), IntVT, Int, DAG.getConstant(APInt::getSignBit(IntVT.getSizeInBits()), IntVT)); - AddToWorkList(Int.getNode()); + AddToWorklist(Int.getNode()); return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Int); } @@ -6783,11 +7274,16 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { // (fneg (fmul c, x)) -> (fmul -c, x) if (N0.getOpcode() == ISD::FMUL) { ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1)); - if (CFP1) - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, - N0.getOperand(0), - DAG.getNode(ISD::FNEG, SDLoc(N), VT, - N0.getOperand(1))); + if (CFP1) { + APFloat CVal = CFP1->getValueAPF(); + CVal.changeSign(); + if (Level >= AfterLegalizeDAG && + (TLI.isFPImmLegal(CVal, N->getValueType(0)) || + TLI.isOperationLegal(ISD::ConstantFP, N->getValueType(0)))) + return DAG.getNode( + ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), + DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1))); + } } return SDValue(); @@ -6852,16 +7348,18 @@ SDValue DAGCombiner::visitFABS(SDNode *N) { // Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading // constant pool values. + // TODO: We can also optimize for vectors here, but we need to make sure + // that the sign mask is created properly for each vector element. if (!TLI.isFAbsFree(VT) && N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse() && N0.getOperand(0).getValueType().isInteger() && - !N0.getOperand(0).getValueType().isVector()) { + !VT.isVector()) { SDValue Int = N0.getOperand(0); EVT IntVT = Int.getValueType(); if (IntVT.isInteger() && !IntVT.isVector()) { Int = DAG.getNode(ISD::AND, SDLoc(N0), IntVT, Int, DAG.getConstant(~APInt::getSignBit(IntVT.getSizeInBits()), IntVT)); - AddToWorkList(Int.getNode()); + AddToWorklist(Int.getNode()); return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Int); } @@ -6895,7 +7393,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { ((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) && (N1.getOperand(0).hasOneUse() && N1.getOperand(0).getOpcode() == ISD::SRL))) { - SDNode *Trunc = 0; + SDNode *Trunc = nullptr; if (N1.getOpcode() == ISD::TRUNCATE) { // Look pass the truncate. Trunc = N1.getNode(); @@ -6944,13 +7442,13 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { CombineTo(N, NewBRCond, false); // Truncate is dead. if (Trunc) { - removeFromWorkList(Trunc); + removeFromWorklist(Trunc); DAG.DeleteNode(Trunc); } // Replace the uses of SRL with SETCC - WorkListRemover DeadNodes(*this); + WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(N1, SetCC); - removeFromWorkList(N1.getNode()); + removeFromWorklist(N1.getNode()); DAG.DeleteNode(N1.getNode()); return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -6978,9 +7476,9 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { dbgs() << "\nWith: "; Tmp.getNode()->dump(&DAG); dbgs() << '\n'); - WorkListRemover DeadNodes(*this); + WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(N1, Tmp); - removeFromWorkList(TheXor); + removeFromWorklist(TheXor); DAG.DeleteNode(TheXor); return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, Tmp, N2); @@ -7009,9 +7507,9 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { Op0, Op1, Equal ? ISD::SETEQ : ISD::SETNE); // Replace the uses of XOR with SETCC - WorkListRemover DeadNodes(*this); + WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(N1, SetCC); - removeFromWorkList(N1.getNode()); + removeFromWorklist(N1.getNode()); DAG.DeleteNode(N1.getNode()); return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, SetCC, N2); @@ -7037,7 +7535,7 @@ SDValue DAGCombiner::visitBR_CC(SDNode *N) { SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()), CondLHS, CondRHS, CC->get(), SDLoc(N), false); - if (Simp.getNode()) AddToWorkList(Simp.getNode()); + if (Simp.getNode()) AddToWorklist(Simp.getNode()); // fold to a simpler setcc if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC) @@ -7176,9 +7674,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { // a copy of the original base pointer. SmallVector<SDNode *, 16> OtherUses; if (isa<ConstantSDNode>(Offset)) - for (SDNode::use_iterator I = BasePtr.getNode()->use_begin(), - E = BasePtr.getNode()->use_end(); I != E; ++I) { - SDNode *Use = *I; + for (SDNode *Use : BasePtr.getNode()->uses()) { if (Use == Ptr.getNode()) continue; @@ -7220,9 +7716,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { SmallPtrSet<const SDNode *, 32> Visited; SmallVector<const SDNode *, 16> Worklist; - for (SDNode::use_iterator I = Ptr.getNode()->use_begin(), - E = Ptr.getNode()->use_end(); I != E; ++I) { - SDNode *Use = *I; + for (SDNode *Use : Ptr.getNode()->uses()) { if (Use == N) continue; if (N->hasPredecessorHelper(Use, Visited, Worklist)) @@ -7251,7 +7745,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { dbgs() << "\nWith: "; Result.getNode()->dump(&DAG); dbgs() << '\n'); - WorkListRemover DeadNodes(*this); + WorklistRemover DeadNodes(*this); if (isLoad) { DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2)); @@ -7310,13 +7804,13 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { SDLoc(OtherUses[i]), OtherUses[i]->getValueType(0), NewOp1, NewOp2); DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse); - removeFromWorkList(OtherUses[i]); + removeFromWorklist(OtherUses[i]); DAG.DeleteNode(OtherUses[i]); } // Replace the uses of Ptr with uses of the updated base value. DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0)); - removeFromWorkList(Ptr.getNode()); + removeFromWorklist(Ptr.getNode()); DAG.DeleteNode(Ptr.getNode()); return true; @@ -7358,9 +7852,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { if (Ptr.getNode()->hasOneUse()) return false; - for (SDNode::use_iterator I = Ptr.getNode()->use_begin(), - E = Ptr.getNode()->use_end(); I != E; ++I) { - SDNode *Op = *I; + for (SDNode *Op : Ptr.getNode()->uses()) { if (Op == N || (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)) continue; @@ -7386,9 +7878,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { // Check for #1. bool TryNext = false; - for (SDNode::use_iterator II = BasePtr.getNode()->use_begin(), - EE = BasePtr.getNode()->use_end(); II != EE; ++II) { - SDNode *Use = *II; + for (SDNode *Use : BasePtr.getNode()->uses()) { if (Use == Ptr.getNode()) continue; @@ -7396,9 +7886,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { // transformation. if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){ bool RealUse = false; - for (SDNode::use_iterator III = Use->use_begin(), - EEE = Use->use_end(); III != EEE; ++III) { - SDNode *UseUse = *III; + for (SDNode *UseUse : Use->uses()) { if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI)) RealUse = true; } @@ -7427,7 +7915,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { dbgs() << "\nWith: "; Result.getNode()->dump(&DAG); dbgs() << '\n'); - WorkListRemover DeadNodes(*this); + WorklistRemover DeadNodes(*this); if (isLoad) { DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2)); @@ -7441,7 +7929,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { // Replace the uses of Use with uses of the updated base value. DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0), Result.getValue(isLoad ? 1 : 0)); - removeFromWorkList(Op); + removeFromWorklist(Op); DAG.DeleteNode(Op); return true; } @@ -7474,11 +7962,11 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { dbgs() << "\nWith chain: "; Chain.getNode()->dump(&DAG); dbgs() << "\n"); - WorkListRemover DeadNodes(*this); + WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain); if (N->use_empty()) { - removeFromWorkList(N); + removeFromWorklist(N); DAG.DeleteNode(N); } @@ -7494,12 +7982,12 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG); dbgs() << " and 2 other values\n"); - WorkListRemover DeadNodes(*this); + WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), DAG.getUNDEF(N->getValueType(1))); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain); - removeFromWorkList(N); + removeFromWorklist(N); DAG.DeleteNode(N); return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -7537,7 +8025,12 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA : TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA(); - if (UseAA) { +#ifndef NDEBUG + if (CombinerAAOnlyFunc.getNumOccurrences() && + CombinerAAOnlyFunc != DAG.getMachineFunction().getName()) + UseAA = false; +#endif + if (UseAA && LD->isUnindexed()) { // Walk up chain skipping non-aliasing memory nodes. SDValue BetterChain = FindBetterChain(N, Chain); @@ -7561,7 +8054,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { MVT::Other, Chain, ReplLoad.getValue(1)); // Make sure the new and old chains are cleaned up. - AddToWorkList(Token.getNode()); + AddToWorklist(Token.getNode()); // Replace uses with load result and token factor. Don't add users // to work list. @@ -7686,8 +8179,8 @@ struct LoadedSlice { // This is used to get some contextual information about legal types, etc. SelectionDAG *DAG; - LoadedSlice(SDNode *Inst = NULL, LoadSDNode *Origin = NULL, - unsigned Shift = 0, SelectionDAG *DAG = NULL) + LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr, + unsigned Shift = 0, SelectionDAG *DAG = nullptr) : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {} LoadedSlice(const LoadedSlice &LS) @@ -7783,7 +8276,7 @@ struct LoadedSlice { /// \brief Get the offset in bytes of this slice in the original chunk of /// bits. - /// \pre DAG != NULL. + /// \pre DAG != nullptr. uint64_t getOffsetFromBase() const { assert(DAG && "Missing context."); bool IsBigEndian = @@ -7888,14 +8381,6 @@ struct LoadedSlice { }; } -/// \brief Sorts LoadedSlice according to their offset. -struct LoadedSliceSorter { - bool operator()(const LoadedSlice &LHS, const LoadedSlice &RHS) { - assert(LHS.Origin == RHS.Origin && "Different bases not implemented."); - return LHS.getOffsetFromBase() < RHS.getOffsetFromBase(); - } -}; - /// \brief Check that all bits set in \p UsedBits form a dense region, i.e., /// \p UsedBits looks like 0..0 1..1 0..0. static bool areUsedBitsDense(const APInt &UsedBits) { @@ -7939,12 +8424,16 @@ static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices, // Sort the slices so that elements that are likely to be next to each // other in memory are next to each other in the list. - std::sort(LoadedSlices.begin(), LoadedSlices.end(), LoadedSliceSorter()); + std::sort(LoadedSlices.begin(), LoadedSlices.end(), + [](const LoadedSlice &LHS, const LoadedSlice &RHS) { + assert(LHS.Origin == RHS.Origin && "Different bases not implemented."); + return LHS.getOffsetFromBase() < RHS.getOffsetFromBase(); + }); const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo(); // First (resp. Second) is the first (resp. Second) potentially candidate // to be placed in a paired load. - const LoadedSlice *First = NULL; - const LoadedSlice *Second = NULL; + const LoadedSlice *First = nullptr; + const LoadedSlice *Second = nullptr; for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice, // Set the beginning of the pair. First = Second) { @@ -7966,7 +8455,7 @@ static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices, unsigned RequiredAlignment = 0; if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) { // move to the next pair, this type is hopeless. - Second = NULL; + Second = nullptr; continue; } // Check if we meet the alignment requirement. @@ -7980,7 +8469,7 @@ static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices, assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!"); --GlobalLSCost.Loads; // Move to the next pair. - Second = NULL; + Second = nullptr; } } @@ -8075,8 +8564,8 @@ bool DAGCombiner::SliceUpLoad(SDNode *N) { // The width of the type must be a power of 2 and greater than 8-bits. // Otherwise the load cannot be represented in LLVM IR. - // Moreover, if we shifted with a non 8-bits multiple, the slice - // will be accross several bytes. We do not support that. + // Moreover, if we shifted with a non-8-bits multiple, the slice + // will be across several bytes. We do not support that. unsigned Width = User->getValueSizeInBits(0); if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7)) return 0; @@ -8124,7 +8613,7 @@ bool DAGCombiner::SliceUpLoad(SDNode *N) { } SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, - &ArgChains[0], ArgChains.size()); + ArgChains); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain); return true; } @@ -8219,14 +8708,14 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, // that uses this. If not, this is not a replacement. APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(), ByteShift*8, (ByteShift+NumBytes)*8); - if (!DAG.MaskedValueIsZero(IVal, Mask)) return 0; + if (!DAG.MaskedValueIsZero(IVal, Mask)) return nullptr; // Check that it is legal on the target to do this. It is legal if the new // VT we're shrinking to (i8/i16/i32) is legal or we're still before type // legalization. MVT VT = MVT::getIntegerVT(NumBytes*8); if (!DC->isTypeLegal(VT)) - return 0; + return nullptr; // Okay, we can do this! Replace the 'St' store with a store of IVal that is // shifted by ByteShift and truncated down to NumBytes. @@ -8372,10 +8861,10 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { ST->getPointerInfo().getWithOffset(PtrOff), false, false, NewAlign); - AddToWorkList(NewPtr.getNode()); - AddToWorkList(NewLD.getNode()); - AddToWorkList(NewVal.getNode()); - WorkListRemover DeadNodes(*this); + AddToWorklist(NewPtr.getNode()); + AddToWorklist(NewLD.getNode()); + AddToWorklist(NewVal.getNode()); + WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1)); ++OpsNarrowed; return NewST; @@ -8430,9 +8919,9 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { ST->getPointerInfo(), false, false, STAlign); - AddToWorkList(NewLD.getNode()); - AddToWorkList(NewST.getNode()); - WorkListRemover DeadNodes(*this); + AddToWorklist(NewLD.getNode()); + AddToWorklist(NewST.getNode()); + WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1)); ++LdStFP2Int; return NewST; @@ -8543,17 +9032,6 @@ struct MemOpLink { unsigned SequenceNum; }; -/// Sorts store nodes in a link according to their offset from a shared -// base ptr. -struct ConsecutiveMemoryChainSorter { - bool operator()(MemOpLink LHS, MemOpLink RHS) { - return - LHS.OffsetFromBase < RHS.OffsetFromBase || - (LHS.OffsetFromBase == RHS.OffsetFromBase && - LHS.SequenceNum > RHS.SequenceNum); - } -}; - bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { EVT MemVT = St->getMemoryVT(); int64_t ElementSizeBytes = MemVT.getSizeInBits()/8; @@ -8651,7 +9129,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { break; } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) { if (Ldn->isVolatile()) { - Index = NULL; + Index = nullptr; break; } @@ -8660,7 +9138,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { NextInChain = Ldn->getChain().getNode(); continue; } else { - Index = NULL; + Index = nullptr; break; } } @@ -8672,7 +9150,11 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { // Sort the memory operands according to their distance from the base pointer. std::sort(StoreNodes.begin(), StoreNodes.end(), - ConsecutiveMemoryChainSorter()); + [](MemOpLink LHS, MemOpLink RHS) { + return LHS.OffsetFromBase < RHS.OffsetFromBase || + (LHS.OffsetFromBase == RHS.OffsetFromBase && + LHS.SequenceNum > RHS.SequenceNum); + }); // Scan the memory operations on the chain and find the first non-consecutive // store memory address. @@ -8720,7 +9202,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal)) { NonZero |= !C->getConstantFPValue()->isNullValue(); } else { - // Non constant. + // Non-constant. break; } @@ -8831,7 +9313,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { // Since we know that St is redundant, just iterate. while (!St->use_empty()) DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain()); - removeFromWorkList(St); + removeFromWorklist(St); DAG.DeleteNode(St); } @@ -9006,7 +9488,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { continue; StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain()); - removeFromWorkList(St); + removeFromWorklist(St); DAG.DeleteNode(St); } @@ -9128,7 +9610,12 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA : TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA(); - if (UseAA) { +#ifndef NDEBUG + if (CombinerAAOnlyFunc.getNumOccurrences() && + CombinerAAOnlyFunc != DAG.getMachineFunction().getName()) + UseAA = false; +#endif + if (UseAA && ST->isUnindexed()) { // Walk up chain skipping non-aliasing memory nodes. SDValue BetterChain = FindBetterChain(N, Chain); @@ -9150,7 +9637,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { MVT::Other, Chain, ReplStore); // Make sure the new and old chains are cleaned up. - AddToWorkList(Token.getNode()); + AddToWorklist(Token.getNode()); // Don't add users to work list. return CombineTo(N, Token, false); @@ -9172,7 +9659,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { APInt::getLowBitsSet( Value.getValueType().getScalarType().getSizeInBits(), ST->getMemoryVT().getScalarType().getSizeInBits())); - AddToWorkList(Value.getNode()); + AddToWorklist(Value.getNode()); if (Shorter.getNode()) return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(), ST->getMemOperand()); @@ -9251,6 +9738,27 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { return SDValue(); unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); + // Canonicalize insert_vector_elt dag nodes. + // Example: + // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1) + // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0) + // + // Do this only if the child insert_vector node has one use; also + // do this only if indices are both constants and Idx1 < Idx0. + if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse() + && isa<ConstantSDNode>(InVec.getOperand(2))) { + unsigned OtherElt = + cast<ConstantSDNode>(InVec.getOperand(2))->getZExtValue(); + if (Elt < OtherElt) { + // Swap nodes. + SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VT, + InVec.getOperand(0), InVal, EltNo); + AddToWorklist(NewOp.getNode()); + return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()), + VT, NewOp, InVec.getOperand(1), InVec.getOperand(2)); + } + } + // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially // be converted to a BUILD_VECTOR). Fill in the Ops vector with the // vector elements. @@ -9280,8 +9788,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { } // Return the new vector - return DAG.getNode(ISD::BUILD_VECTOR, dl, - VT, &Ops[0], Ops.size()); + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); } SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { @@ -9309,9 +9816,10 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // We only perform this optimization before the op legalization phase because // we may introduce new vector instructions which are not backed by TD // patterns. For example on AVX, extracting elements from a wide vector - // without using extract_subvector. + // without using extract_subvector. However, if we can find an underlying + // scalar value, then we can always use that. if (InVec.getOpcode() == ISD::VECTOR_SHUFFLE - && ConstEltNo && !LegalOperations) { + && ConstEltNo) { int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); int NumElem = VT.getVectorNumElements(); ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec); @@ -9323,16 +9831,32 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { return DAG.getUNDEF(NVT); // Select the right vector half to extract from. + SDValue SVInVec; if (OrigElt < NumElem) { - InVec = InVec->getOperand(0); + SVInVec = InVec->getOperand(0); } else { - InVec = InVec->getOperand(1); + SVInVec = InVec->getOperand(1); OrigElt -= NumElem; } - EVT IndexTy = TLI.getVectorIdxTy(); - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, - InVec, DAG.getConstant(OrigElt, IndexTy)); + if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) { + SDValue InOp = SVInVec.getOperand(OrigElt); + if (InOp.getValueType() != NVT) { + assert(InOp.getValueType().isInteger() && NVT.isInteger()); + InOp = DAG.getSExtOrTrunc(InOp, SDLoc(SVInVec), NVT); + } + + return InOp; + } + + // FIXME: We should handle recursing on other vector shuffles and + // scalar_to_vector here as well. + + if (!LegalOperations) { + EVT IndexTy = TLI.getVectorIdxTy(); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, + SVInVec, DAG.getConstant(OrigElt, IndexTy)); + } } // Perform only after legalization to ensure build_vector / vector_shuffle @@ -9370,8 +9894,8 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { NewLoad = true; } - LoadSDNode *LN0 = NULL; - const ShuffleVectorSDNode *SVN = NULL; + LoadSDNode *LN0 = nullptr; + const ShuffleVectorSDNode *SVN = nullptr; if (ISD::isNormalLoad(InVec.getNode())) { LN0 = cast<LoadSDNode>(InVec); } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR && @@ -9478,16 +10002,16 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { else Load = DAG.getNode(ISD::BITCAST, SDLoc(N), NVT, Load); } - WorkListRemover DeadNodes(*this); + WorklistRemover DeadNodes(*this); SDValue From[] = { SDValue(N, 0), SDValue(LN0,1) }; SDValue To[] = { Load, Chain }; DAG.ReplaceAllUsesOfValuesWith(From, To, 2); // Since we're explcitly calling ReplaceAllUses, add the new node to the // worklist explicitly as well. - AddToWorkList(Load.getNode()); - AddUsersToWorkList(Load.getNode()); // Add users too + AddToWorklist(Load.getNode()); + AddUsersToWorklist(Load.getNode()); // Add users too // Make sure to revisit this node to clean it up; it will usually be dead. - AddToWorkList(N); + AddToWorklist(N); return SDValue(N, 0); } @@ -9596,10 +10120,10 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) { if (!isTypeLegal(VecVT)) return SDValue(); // Make the new BUILD_VECTOR. - SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, &Ops[0], Ops.size()); + SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, Ops); // The new BUILD_VECTOR node has the potential to be further optimized. - AddToWorkList(BV.getNode()); + AddToWorklist(BV.getNode()); // Bitcast to the desired type. return DAG.getNode(ISD::BITCAST, dl, VT, BV); } @@ -9664,9 +10188,8 @@ SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) { else Opnds.push_back(In.getOperand(0)); } - SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, - &Opnds[0], Opnds.size()); - AddToWorkList(BV.getNode()); + SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Opnds); + AddToWorklist(BV.getNode()); return DAG.getNode(Opcode, dl, VT, BV); } @@ -9706,7 +10229,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { // constant index, bail out. if (N->getOperand(i).getOpcode() != ISD::EXTRACT_VECTOR_ELT || !isa<ConstantSDNode>(N->getOperand(i).getOperand(1))) { - VecIn1 = VecIn2 = SDValue(0, 0); + VecIn1 = VecIn2 = SDValue(nullptr, 0); break; } @@ -9715,18 +10238,18 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2) continue; - if (VecIn1.getNode() == 0) { + if (!VecIn1.getNode()) { VecIn1 = ExtractedFromVec; - } else if (VecIn2.getNode() == 0) { + } else if (!VecIn2.getNode()) { VecIn2 = ExtractedFromVec; } else { // Too many inputs. - VecIn1 = VecIn2 = SDValue(0, 0); + VecIn1 = VecIn2 = SDValue(nullptr, 0); break; } } - // If everything is good, we can make a shuffle operation. + // If everything is good, we can make a shuffle operation. if (VecIn1.getNode()) { SmallVector<int, 8> Mask; for (unsigned i = 0; i != NumInScalars; ++i) { @@ -9756,7 +10279,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { // Attempt to transform a single input vector to the correct type. if ((VT != VecIn1.getValueType())) { // We don't support shuffeling between TWO values of different types. - if (VecIn2.getNode() != 0) + if (VecIn2.getNode()) return SDValue(); // We only support widening of vectors which are half the size of the @@ -9839,6 +10362,39 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { } } + // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...)) + // -> (BUILD_VECTOR A, B, ..., C, D, ...) + if (N->getNumOperands() == 2 && + N->getOperand(0).getOpcode() == ISD::BUILD_VECTOR && + N->getOperand(1).getOpcode() == ISD::BUILD_VECTOR) { + EVT VT = N->getValueType(0); + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SmallVector<SDValue, 8> Opnds; + unsigned BuildVecNumElts = N0.getNumOperands(); + + EVT SclTy0 = N0.getOperand(0)->getValueType(0); + EVT SclTy1 = N1.getOperand(0)->getValueType(0); + if (SclTy0.isFloatingPoint()) { + for (unsigned i = 0; i != BuildVecNumElts; ++i) + Opnds.push_back(N0.getOperand(i)); + for (unsigned i = 0; i != BuildVecNumElts; ++i) + Opnds.push_back(N1.getOperand(i)); + } else { + // If BUILD_VECTOR are from built from integer, they may have different + // operand types. Get the smaller type and truncate all operands to it. + EVT MinTy = SclTy0.bitsLE(SclTy1) ? SclTy0 : SclTy1; + for (unsigned i = 0; i != BuildVecNumElts; ++i) + Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy, + N0.getOperand(i))); + for (unsigned i = 0; i != BuildVecNumElts; ++i) + Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy, + N1.getOperand(i))); + } + + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds); + } + // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR // nodes often generate nop CONCAT_VECTOR nodes. // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that @@ -9993,8 +10549,7 @@ static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) { } } - return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops.data(), - Ops.size()); + return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops); } SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { @@ -10110,22 +10665,19 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { } // If this shuffle node is simply a swizzle of another shuffle node, - // and it reverses the swizzle of the previous shuffle then we can - // optimize shuffle(shuffle(x, undef), undef) -> x. + // then try to simplify it. if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG && N1.getOpcode() == ISD::UNDEF) { ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0); - // Shuffle nodes can only reverse shuffles with a single non-undef value. - if (N0.getOperand(1).getOpcode() != ISD::UNDEF) - return SDValue(); - // The incoming shuffle must be of the same type as the result of the // current shuffle. assert(OtherSV->getOperand(0).getValueType() == VT && "Shuffle types don't match"); + SmallVector<int, 4> Mask; + // Compute the combined shuffle mask. for (unsigned i = 0; i != NumElts; ++i) { int Idx = SVN->getMaskElt(i); assert(Idx < (int)NumElts && "Index references undef operand"); @@ -10133,13 +10685,174 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { // shuffle. Adopt the incoming index. if (Idx >= 0) Idx = OtherSV->getMaskElt(Idx); + Mask.push_back(Idx); + } + + bool CommuteOperands = false; + if (N0.getOperand(1).getOpcode() != ISD::UNDEF) { + // To be valid, the combine shuffle mask should only reference elements + // from one of the two vectors in input to the inner shufflevector. + bool IsValidMask = true; + for (unsigned i = 0; i != NumElts && IsValidMask; ++i) + // See if the combined mask only reference undefs or elements coming + // from the first shufflevector operand. + IsValidMask = Mask[i] < 0 || (unsigned)Mask[i] < NumElts; + + if (!IsValidMask) { + IsValidMask = true; + for (unsigned i = 0; i != NumElts && IsValidMask; ++i) + // Check that all the elements come from the second shuffle operand. + IsValidMask = Mask[i] < 0 || (unsigned)Mask[i] >= NumElts; + CommuteOperands = IsValidMask; + } - // The combined shuffle must map each index to itself. - if (Idx >= 0 && (unsigned)Idx != i) + // Early exit if the combined shuffle mask is not valid. + if (!IsValidMask) return SDValue(); } - return OtherSV->getOperand(0); + // See if this pair of shuffles can be safely folded according to either + // of the following rules: + // shuffle(shuffle(x, y), undef) -> x + // shuffle(shuffle(x, undef), undef) -> x + // shuffle(shuffle(x, y), undef) -> y + bool IsIdentityMask = true; + unsigned BaseMaskIndex = CommuteOperands ? NumElts : 0; + for (unsigned i = 0; i != NumElts && IsIdentityMask; ++i) { + // Skip Undefs. + if (Mask[i] < 0) + continue; + + // The combined shuffle must map each index to itself. + IsIdentityMask = (unsigned)Mask[i] == i + BaseMaskIndex; + } + + if (IsIdentityMask) { + if (CommuteOperands) + // optimize shuffle(shuffle(x, y), undef) -> y. + return OtherSV->getOperand(1); + + // optimize shuffle(shuffle(x, undef), undef) -> x + // optimize shuffle(shuffle(x, y), undef) -> x + return OtherSV->getOperand(0); + } + + // It may still be beneficial to combine the two shuffles if the + // resulting shuffle is legal. + if (TLI.isTypeLegal(VT) && TLI.isShuffleMaskLegal(Mask, VT)) { + if (!CommuteOperands) + // shuffle(shuffle(x, undef, M1), undef, M2) -> shuffle(x, undef, M3). + // shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(x, undef, M3) + return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0), N1, + &Mask[0]); + + // shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(undef, y, M3) + return DAG.getVectorShuffle(VT, SDLoc(N), N1, N0->getOperand(1), + &Mask[0]); + } + } + + // Canonicalize shuffles according to rules: + // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A) + // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B) + // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B) + if (N1.getOpcode() == ISD::VECTOR_SHUFFLE && N0.getOpcode() != ISD::UNDEF && + N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG && + TLI.isTypeLegal(VT)) { + // The incoming shuffle must be of the same type as the result of the + // current shuffle. + assert(N1->getOperand(0).getValueType() == VT && + "Shuffle types don't match"); + + SDValue SV0 = N1->getOperand(0); + SDValue SV1 = N1->getOperand(1); + bool HasSameOp0 = N0 == SV0; + bool IsSV1Undef = SV1.getOpcode() == ISD::UNDEF; + if (HasSameOp0 || IsSV1Undef || N0 == SV1) + // Commute the operands of this shuffle so that next rule + // will trigger. + return DAG.getCommutedVectorShuffle(*SVN); + } + + // Try to fold according to rules: + // shuffle(shuffle(A, B, M0), B, M1) -> shuffle(A, B, M2) + // shuffle(shuffle(A, B, M0), A, M1) -> shuffle(A, B, M2) + // shuffle(shuffle(A, Undef, M0), B, M1) -> shuffle(A, B, M2) + // shuffle(shuffle(A, Undef, M0), A, M1) -> shuffle(A, Undef, M2) + // Don't try to fold shuffles with illegal type. + if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG && + N1.getOpcode() != ISD::UNDEF && TLI.isTypeLegal(VT)) { + ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0); + + // The incoming shuffle must be of the same type as the result of the + // current shuffle. + assert(OtherSV->getOperand(0).getValueType() == VT && + "Shuffle types don't match"); + + SDValue SV0 = OtherSV->getOperand(0); + SDValue SV1 = OtherSV->getOperand(1); + bool HasSameOp0 = N1 == SV0; + bool IsSV1Undef = SV1.getOpcode() == ISD::UNDEF; + if (!HasSameOp0 && !IsSV1Undef && N1 != SV1) + // Early exit. + return SDValue(); + + SmallVector<int, 4> Mask; + // Compute the combined shuffle mask for a shuffle with SV0 as the first + // operand, and SV1 as the second operand. + for (unsigned i = 0; i != NumElts; ++i) { + int Idx = SVN->getMaskElt(i); + if (Idx < 0) { + // Propagate Undef. + Mask.push_back(Idx); + continue; + } + + if (Idx < (int)NumElts) { + Idx = OtherSV->getMaskElt(Idx); + if (IsSV1Undef && Idx >= (int) NumElts) + Idx = -1; // Propagate Undef. + } else + Idx = HasSameOp0 ? Idx - NumElts : Idx; + + Mask.push_back(Idx); + } + + // Avoid introducing shuffles with illegal mask. + if (TLI.isShuffleMaskLegal(Mask, VT)) { + if (IsSV1Undef) + // shuffle(shuffle(A, Undef, M0), B, M1) -> shuffle(A, B, M2) + // shuffle(shuffle(A, Undef, M0), A, M1) -> shuffle(A, Undef, M2) + return DAG.getVectorShuffle(VT, SDLoc(N), SV0, N1, &Mask[0]); + return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, &Mask[0]); + } + } + + return SDValue(); +} + +SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N2 = N->getOperand(2); + + // If the input vector is a concatenation, and the insert replaces + // one of the halves, we can optimize into a single concat_vectors. + if (N0.getOpcode() == ISD::CONCAT_VECTORS && + N0->getNumOperands() == 2 && N2.getOpcode() == ISD::Constant) { + APInt InsIdx = cast<ConstantSDNode>(N2)->getAPIntValue(); + EVT VT = N->getValueType(0); + + // Lower half: fold (insert_subvector (concat_vectors X, Y), Z) -> + // (concat_vectors Z, Y) + if (InsIdx == 0) + return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, + N->getOperand(1), N0.getOperand(1)); + + // Upper half: fold (insert_subvector (concat_vectors X, Y), Z) -> + // (concat_vectors X, Z) + if (InsIdx == VT.getVectorNumElements()/2) + return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, + N0.getOperand(0), N->getOperand(1)); } return SDValue(); @@ -10182,8 +10895,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { EVT EltVT = RVT.getVectorElementType(); SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(), DAG.getConstant(0, EltVT)); - SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), - RVT, &ZeroOps[0], ZeroOps.size()); + SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), RVT, ZeroOps); LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS); SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]); return DAG.getNode(ISD::BITCAST, dl, VT, Shuf); @@ -10207,18 +10919,15 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { // this operation. if (LHS.getOpcode() == ISD::BUILD_VECTOR && RHS.getOpcode() == ISD::BUILD_VECTOR) { + // Check if both vectors are constants. If not bail out. + if (!(cast<BuildVectorSDNode>(LHS)->isConstant() && + cast<BuildVectorSDNode>(RHS)->isConstant())) + return SDValue(); + SmallVector<SDValue, 8> Ops; for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) { SDValue LHSOp = LHS.getOperand(i); SDValue RHSOp = RHS.getOperand(i); - // If these two elements can't be folded, bail out. - if ((LHSOp.getOpcode() != ISD::UNDEF && - LHSOp.getOpcode() != ISD::Constant && - LHSOp.getOpcode() != ISD::ConstantFP) || - (RHSOp.getOpcode() != ISD::UNDEF && - RHSOp.getOpcode() != ISD::Constant && - RHSOp.getOpcode() != ISD::ConstantFP)) - break; // Can't fold divide by zero. if (N->getOpcode() == ISD::SDIV || N->getOpcode() == ISD::UDIV || @@ -10251,12 +10960,32 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { FoldOp.getOpcode() != ISD::ConstantFP) break; Ops.push_back(FoldOp); - AddToWorkList(FoldOp.getNode()); + AddToWorklist(FoldOp.getNode()); } if (Ops.size() == LHS.getNumOperands()) - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), - LHS.getValueType(), &Ops[0], Ops.size()); + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), LHS.getValueType(), Ops); + } + + // Type legalization might introduce new shuffles in the DAG. + // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask))) + // -> (shuffle (VBinOp (A, B)), Undef, Mask). + if (LegalTypes && isa<ShuffleVectorSDNode>(LHS) && + isa<ShuffleVectorSDNode>(RHS) && LHS.hasOneUse() && RHS.hasOneUse() && + LHS.getOperand(1).getOpcode() == ISD::UNDEF && + RHS.getOperand(1).getOpcode() == ISD::UNDEF) { + ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(LHS); + ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(RHS); + + if (SVN0->getMask().equals(SVN1->getMask())) { + EVT VT = N->getValueType(0); + SDValue UndefVector = LHS.getOperand(1); + SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT, + LHS.getOperand(0), RHS.getOperand(0)); + AddUsersToWorklist(N); + return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector, + &SVN0->getMask()[0]); + } } return SDValue(); @@ -10285,14 +11014,13 @@ SDValue DAGCombiner::SimplifyVUnaryOp(SDNode *N) { FoldOp.getOpcode() != ISD::ConstantFP) break; Ops.push_back(FoldOp); - AddToWorkList(FoldOp.getNode()); + AddToWorklist(FoldOp.getNode()); } if (Ops.size() != N0.getNumOperands()) return SDValue(); - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), - N0.getValueType(), &Ops[0], Ops.size()); + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N0.getValueType(), Ops); } SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0, @@ -10313,7 +11041,7 @@ SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0, N0.getValueType(), SCC.getOperand(0), SCC.getOperand(1), SCC.getOperand(4)); - AddToWorkList(SETCC.getNode()); + AddToWorklist(SETCC.getNode()); return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SCC.getOperand(2), SCC.getOperand(3), SETCC); } @@ -10454,7 +11182,7 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, // Determine if the condition we're dealing with is constant SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1, CC, DL, false); - if (SCC.getNode()) AddToWorkList(SCC.getNode()); + if (SCC.getNode()) AddToWorklist(SCC.getNode()); ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode()); // fold select_cc true, x, y -> x @@ -10494,7 +11222,9 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) { if (TLI.isTypeLegal(N2.getValueType()) && (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) != - TargetLowering::Legal) && + TargetLowering::Legal && + !TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0)) && + !TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0))) && // If both constants have multiple uses, then we won't need to do an // extra load, they are likely around in registers for other users. (TV->hasOneUse() || FV->hasOneUse())) { @@ -10520,13 +11250,13 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, SDValue Cond = DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC); - AddToWorkList(Cond.getNode()); + AddToWorklist(Cond.getNode()); SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero); - AddToWorkList(CstOffset.getNode()); + AddToWorklist(CstOffset.getNode()); CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset); - AddToWorkList(CPIdx.getNode()); + AddToWorklist(CPIdx.getNode()); return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(), false, false, false, Alignment); @@ -10551,11 +11281,11 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, getShiftAmountTy(N0.getValueType())); SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0), XType, N0, ShCt); - AddToWorkList(Shift.getNode()); + AddToWorklist(Shift.getNode()); if (XType.bitsGT(AType)) { Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift); - AddToWorkList(Shift.getNode()); + AddToWorklist(Shift.getNode()); } return DAG.getNode(ISD::AND, DL, AType, Shift, N2); @@ -10565,11 +11295,11 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, XType, N0, DAG.getConstant(XType.getSizeInBits()-1, getShiftAmountTy(N0.getValueType()))); - AddToWorkList(Shift.getNode()); + AddToWorklist(Shift.getNode()); if (XType.bitsGT(AType)) { Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift); - AddToWorkList(Shift.getNode()); + AddToWorklist(Shift.getNode()); } return DAG.getNode(ISD::AND, DL, AType, Shift, N2); @@ -10609,8 +11339,8 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, // fold select C, 16, 0 -> shl C, 4 if (N2C && N3C && N3C->isNullValue() && N2C->getAPIntValue().isPowerOf2() && - TLI.getBooleanContents(N0.getValueType().isVector()) == - TargetLowering::ZeroOrOneBooleanContent) { + TLI.getBooleanContents(N0.getValueType()) == + TargetLowering::ZeroOrOneBooleanContent) { // If the caller doesn't want us to simplify this into a zext of a compare, // don't do it. @@ -10639,8 +11369,8 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, N2.getValueType(), SCC); } - AddToWorkList(SCC.getNode()); - AddToWorkList(Temp.getNode()); + AddToWorklist(SCC.getNode()); + AddToWorklist(Temp.getNode()); if (N2C->getAPIntValue() == 1) return Temp; @@ -10701,7 +11431,7 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, // select_cc setlt X, 1, -X, X -> // Y = sra (X, size(X)-1); xor (add (X, Y), Y) if (N1C) { - ConstantSDNode *SubC = NULL; + ConstantSDNode *SubC = nullptr; if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) || (N1C->isAllOnesValue() && CC == ISD::SETGT)) && N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1)) @@ -10719,8 +11449,8 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, getShiftAmountTy(N0.getValueType()))); SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), XType, N0, Shift); - AddToWorkList(Shift.getNode()); - AddToWorkList(Add.getNode()); + AddToWorklist(Shift.getNode()); + AddToWorklist(Add.getNode()); return DAG.getNode(ISD::XOR, DL, XType, Add, Shift); } } @@ -10742,26 +11472,42 @@ SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, /// multiplying by a magic number. See: /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> SDValue DAGCombiner::BuildSDIV(SDNode *N) { + ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1)); + if (!C) + return SDValue(); + + // Avoid division by zero. + if (!C->getAPIntValue()) + return SDValue(); + std::vector<SDNode*> Built; - SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, &Built); + SDValue S = + TLI.BuildSDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built); - for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end(); - ii != ee; ++ii) - AddToWorkList(*ii); + for (SDNode *N : Built) + AddToWorklist(N); return S; } -/// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant, +/// BuildUDIV - Given an ISD::UDIV node expressing a divide by constant, /// return a DAG expression to select that will generate the same value by /// multiplying by a magic number. See: /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> SDValue DAGCombiner::BuildUDIV(SDNode *N) { + ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1)); + if (!C) + return SDValue(); + + // Avoid division by zero. + if (!C->getAPIntValue()) + return SDValue(); + std::vector<SDNode*> Built; - SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, &Built); + SDValue S = + TLI.BuildUDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built); - for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end(); - ii != ee; ++ii) - AddToWorkList(*ii); + for (SDNode *N : Built) + AddToWorklist(N); return S; } @@ -10771,7 +11517,7 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) { static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, const GlobalValue *&GV, const void *&CV) { // Assume it is a primitive operation. - Base = Ptr; Offset = 0; GV = 0; CV = 0; + Base = Ptr; Offset = 0; GV = nullptr; CV = nullptr; // If it's an adding a simple constant then integrate the offset. if (Base.getOpcode() == ISD::ADD) { @@ -10805,31 +11551,27 @@ static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, /// isAlias - Return true if there is any possibility that the two addresses /// overlap. -bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, bool IsVolatile1, - const Value *SrcValue1, int SrcValueOffset1, - unsigned SrcValueAlign1, - const MDNode *TBAAInfo1, - SDValue Ptr2, int64_t Size2, bool IsVolatile2, - const Value *SrcValue2, int SrcValueOffset2, - unsigned SrcValueAlign2, - const MDNode *TBAAInfo2) const { +bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const { // If they are the same then they must be aliases. - if (Ptr1 == Ptr2) return true; + if (Op0->getBasePtr() == Op1->getBasePtr()) return true; // If they are both volatile then they cannot be reordered. - if (IsVolatile1 && IsVolatile2) return true; + if (Op0->isVolatile() && Op1->isVolatile()) return true; // Gather base node and offset information. SDValue Base1, Base2; int64_t Offset1, Offset2; const GlobalValue *GV1, *GV2; const void *CV1, *CV2; - bool isFrameIndex1 = FindBaseOffset(Ptr1, Base1, Offset1, GV1, CV1); - bool isFrameIndex2 = FindBaseOffset(Ptr2, Base2, Offset2, GV2, CV2); + bool isFrameIndex1 = FindBaseOffset(Op0->getBasePtr(), + Base1, Offset1, GV1, CV1); + bool isFrameIndex2 = FindBaseOffset(Op1->getBasePtr(), + Base2, Offset2, GV2, CV2); // If they have a same base address then check to see if they overlap. if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2))) - return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1); + return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 || + (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1); // It is possible for different frame indices to alias each other, mostly // when tail call optimization reuses return address slots for arguments. @@ -10839,7 +11581,8 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, bool IsVolatile1, MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); Offset1 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex()); Offset2 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base2)->getIndex()); - return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1); + return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 || + (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1); } // Otherwise, if we know what the bases are, and they aren't identical, then @@ -10851,28 +11594,44 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, bool IsVolatile1, // compared to the size and offset of the access, we may be able to prove they // do not alias. This check is conservative for now to catch cases created by // splitting vector types. - if ((SrcValueAlign1 == SrcValueAlign2) && - (SrcValueOffset1 != SrcValueOffset2) && - (Size1 == Size2) && (SrcValueAlign1 > Size1)) { - int64_t OffAlign1 = SrcValueOffset1 % SrcValueAlign1; - int64_t OffAlign2 = SrcValueOffset2 % SrcValueAlign1; + if ((Op0->getOriginalAlignment() == Op1->getOriginalAlignment()) && + (Op0->getSrcValueOffset() != Op1->getSrcValueOffset()) && + (Op0->getMemoryVT().getSizeInBits() >> 3 == + Op1->getMemoryVT().getSizeInBits() >> 3) && + (Op0->getOriginalAlignment() > Op0->getMemoryVT().getSizeInBits()) >> 3) { + int64_t OffAlign1 = Op0->getSrcValueOffset() % Op0->getOriginalAlignment(); + int64_t OffAlign2 = Op1->getSrcValueOffset() % Op1->getOriginalAlignment(); // There is no overlap between these relatively aligned accesses of similar // size, return no alias. - if ((OffAlign1 + Size1) <= OffAlign2 || (OffAlign2 + Size2) <= OffAlign1) + if ((OffAlign1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= OffAlign2 || + (OffAlign2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= OffAlign1) return false; } bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0 ? CombinerGlobalAA : TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA(); - if (UseAA && SrcValue1 && SrcValue2) { +#ifndef NDEBUG + if (CombinerAAOnlyFunc.getNumOccurrences() && + CombinerAAOnlyFunc != DAG.getMachineFunction().getName()) + UseAA = false; +#endif + if (UseAA && + Op0->getMemOperand()->getValue() && Op1->getMemOperand()->getValue()) { // Use alias analysis information. - int64_t MinOffset = std::min(SrcValueOffset1, SrcValueOffset2); - int64_t Overlap1 = Size1 + SrcValueOffset1 - MinOffset; - int64_t Overlap2 = Size2 + SrcValueOffset2 - MinOffset; + int64_t MinOffset = std::min(Op0->getSrcValueOffset(), + Op1->getSrcValueOffset()); + int64_t Overlap1 = (Op0->getMemoryVT().getSizeInBits() >> 3) + + Op0->getSrcValueOffset() - MinOffset; + int64_t Overlap2 = (Op1->getMemoryVT().getSizeInBits() >> 3) + + Op1->getSrcValueOffset() - MinOffset; AliasAnalysis::AliasResult AAResult = - AA.alias(AliasAnalysis::Location(SrcValue1, Overlap1, TBAAInfo1), - AliasAnalysis::Location(SrcValue2, Overlap2, TBAAInfo2)); + AA.alias(AliasAnalysis::Location(Op0->getMemOperand()->getValue(), + Overlap1, + UseTBAA ? Op0->getTBAAInfo() : nullptr), + AliasAnalysis::Location(Op1->getMemOperand()->getValue(), + Overlap2, + UseTBAA ? Op1->getTBAAInfo() : nullptr)); if (AAResult == AliasAnalysis::NoAlias) return false; } @@ -10881,44 +11640,6 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, bool IsVolatile1, return true; } -bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) { - SDValue Ptr0, Ptr1; - int64_t Size0, Size1; - bool IsVolatile0, IsVolatile1; - const Value *SrcValue0, *SrcValue1; - int SrcValueOffset0, SrcValueOffset1; - unsigned SrcValueAlign0, SrcValueAlign1; - const MDNode *SrcTBAAInfo0, *SrcTBAAInfo1; - FindAliasInfo(Op0, Ptr0, Size0, IsVolatile0, SrcValue0, SrcValueOffset0, - SrcValueAlign0, SrcTBAAInfo0); - FindAliasInfo(Op1, Ptr1, Size1, IsVolatile1, SrcValue1, SrcValueOffset1, - SrcValueAlign1, SrcTBAAInfo1); - return isAlias(Ptr0, Size0, IsVolatile0, SrcValue0, SrcValueOffset0, - SrcValueAlign0, SrcTBAAInfo0, - Ptr1, Size1, IsVolatile1, SrcValue1, SrcValueOffset1, - SrcValueAlign1, SrcTBAAInfo1); -} - -/// FindAliasInfo - Extracts the relevant alias information from the memory -/// node. Returns true if the operand was a nonvolatile load. -bool DAGCombiner::FindAliasInfo(SDNode *N, - SDValue &Ptr, int64_t &Size, bool &IsVolatile, - const Value *&SrcValue, - int &SrcValueOffset, - unsigned &SrcValueAlign, - const MDNode *&TBAAInfo) const { - LSBaseSDNode *LS = cast<LSBaseSDNode>(N); - - Ptr = LS->getBasePtr(); - Size = LS->getMemoryVT().getSizeInBits() >> 3; - IsVolatile = LS->isVolatile(); - SrcValue = LS->getSrcValue(); - SrcValueOffset = LS->getSrcValueOffset(); - SrcValueAlign = LS->getOriginalAlignment(); - TBAAInfo = LS->getTBAAInfo(); - return isa<LoadSDNode>(LS) && !IsVolatile; -} - /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes, /// looking for aliasing nodes and adding them to the Aliases vector. void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, @@ -10927,15 +11648,7 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, SmallPtrSet<SDNode *, 16> Visited; // Visited node set. // Get alias information for node. - SDValue Ptr; - int64_t Size; - bool IsVolatile; - const Value *SrcValue; - int SrcValueOffset; - unsigned SrcValueAlign; - const MDNode *SrcTBAAInfo; - bool IsLoad = FindAliasInfo(N, Ptr, Size, IsVolatile, SrcValue, - SrcValueOffset, SrcValueAlign, SrcTBAAInfo); + bool IsLoad = isa<LoadSDNode>(N) && !cast<LSBaseSDNode>(N)->isVolatile(); // Starting off. Chains.push_back(OriginalChain); @@ -10959,7 +11672,7 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, if (Depth > 6 || Aliases.size() == 2) { Aliases.clear(); Aliases.push_back(OriginalChain); - break; + return; } // Don't bother if we've been before. @@ -10974,24 +11687,12 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, case ISD::LOAD: case ISD::STORE: { // Get alias information for Chain. - SDValue OpPtr; - int64_t OpSize; - bool OpIsVolatile; - const Value *OpSrcValue; - int OpSrcValueOffset; - unsigned OpSrcValueAlign; - const MDNode *OpSrcTBAAInfo; - bool IsOpLoad = FindAliasInfo(Chain.getNode(), OpPtr, OpSize, - OpIsVolatile, OpSrcValue, OpSrcValueOffset, - OpSrcValueAlign, - OpSrcTBAAInfo); + bool IsOpLoad = isa<LoadSDNode>(Chain.getNode()) && + !cast<LSBaseSDNode>(Chain.getNode())->isVolatile(); // If chain is alias then stop here. if (!(IsLoad && IsOpLoad) && - isAlias(Ptr, Size, IsVolatile, SrcValue, SrcValueOffset, - SrcValueAlign, SrcTBAAInfo, - OpPtr, OpSize, OpIsVolatile, OpSrcValue, OpSrcValueOffset, - OpSrcValueAlign, OpSrcTBAAInfo)) { + isAlias(cast<LSBaseSDNode>(N), cast<LSBaseSDNode>(Chain.getNode()))) { Aliases.push_back(Chain); } else { // Look further up the chain. @@ -11021,6 +11722,63 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, break; } } + + // We need to be careful here to also search for aliases through the + // value operand of a store, etc. Consider the following situation: + // Token1 = ... + // L1 = load Token1, %52 + // S1 = store Token1, L1, %51 + // L2 = load Token1, %52+8 + // S2 = store Token1, L2, %51+8 + // Token2 = Token(S1, S2) + // L3 = load Token2, %53 + // S3 = store Token2, L3, %52 + // L4 = load Token2, %53+8 + // S4 = store Token2, L4, %52+8 + // If we search for aliases of S3 (which loads address %52), and we look + // only through the chain, then we'll miss the trivial dependence on L1 + // (which also loads from %52). We then might change all loads and + // stores to use Token1 as their chain operand, which could result in + // copying %53 into %52 before copying %52 into %51 (which should + // happen first). + // + // The problem is, however, that searching for such data dependencies + // can become expensive, and the cost is not directly related to the + // chain depth. Instead, we'll rule out such configurations here by + // insisting that we've visited all chain users (except for users + // of the original chain, which is not necessary). When doing this, + // we need to look through nodes we don't care about (otherwise, things + // like register copies will interfere with trivial cases). + + SmallVector<const SDNode *, 16> Worklist; + for (SmallPtrSet<SDNode *, 16>::iterator I = Visited.begin(), + IE = Visited.end(); I != IE; ++I) + if (*I != OriginalChain.getNode()) + Worklist.push_back(*I); + + while (!Worklist.empty()) { + const SDNode *M = Worklist.pop_back_val(); + + // We have already visited M, and want to make sure we've visited any uses + // of M that we care about. For uses that we've not visisted, and don't + // care about, queue them to the worklist. + + for (SDNode::use_iterator UI = M->use_begin(), + UIE = M->use_end(); UI != UIE; ++UI) + if (UI.getUse().getValueType() == MVT::Other && Visited.insert(*UI)) { + if (isa<MemIntrinsicSDNode>(*UI) || isa<MemSDNode>(*UI)) { + // We've not visited this use, and we care about it (it could have an + // ordering dependency with the original node). + Aliases.clear(); + Aliases.push_back(OriginalChain); + return; + } + + // We've not visited this use, but we don't care about it. Mark it as + // visited and enqueue it to the worklist. + Worklist.push_back(*UI); + } + } } /// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, looking @@ -11040,8 +11798,7 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { return Aliases[0]; // Construct a custom tailored token factor. - return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, - &Aliases[0], Aliases.size()); + return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases); } // SelectionDAG::Combine - This is the entry point for the file. diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index a6f7461..ad75e91 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -39,18 +39,21 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "isel" +#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/Loads.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/DebugInfo.h" +#include "llvm/CodeGen/StackMaps.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Instructions.h" @@ -64,12 +67,29 @@ #include "llvm/Target/TargetMachine.h" using namespace llvm; +#define DEBUG_TYPE "isel" + STATISTIC(NumFastIselSuccessIndependent, "Number of insts selected by " "target-independent selector"); STATISTIC(NumFastIselSuccessTarget, "Number of insts selected by " "target-specific selector"); STATISTIC(NumFastIselDead, "Number of dead insts removed on failure"); +/// \brief Set CallLoweringInfo attribute flags based on a call instruction +/// and called function attributes. +void FastISel::ArgListEntry::setAttributes(ImmutableCallSite *CS, + unsigned AttrIdx) { + isSExt = CS->paramHasAttr(AttrIdx, Attribute::SExt); + isZExt = CS->paramHasAttr(AttrIdx, Attribute::ZExt); + isInReg = CS->paramHasAttr(AttrIdx, Attribute::InReg); + isSRet = CS->paramHasAttr(AttrIdx, Attribute::StructRet); + isNest = CS->paramHasAttr(AttrIdx, Attribute::Nest); + isByVal = CS->paramHasAttr(AttrIdx, Attribute::ByVal); + isInAlloca = CS->paramHasAttr(AttrIdx, Attribute::InAlloca); + isReturned = CS->paramHasAttr(AttrIdx, Attribute::Returned); + Alignment = CS->getParamAlignment(AttrIdx); +} + /// startNewBlock - Set the current block to which generated machine /// instructions will be appended, and clear the local CSE map. /// @@ -79,7 +99,7 @@ void FastISel::startNewBlock() { // Instructions are appended to FuncInfo.MBB. If the basic block already // contains labels or copies, use the last instruction as the last local // value. - EmitStartPt = 0; + EmitStartPt = nullptr; if (!FuncInfo.MBB->empty()) EmitStartPt = &FuncInfo.MBB->back(); LastLocalValue = EmitStartPt; @@ -118,7 +138,7 @@ bool FastISel::hasTrivialKill(const Value *V) const { // No-op casts are trivially coalesced by fast-isel. if (const CastInst *Cast = dyn_cast<CastInst>(I)) - if (Cast->isNoopCast(TD.getIntPtrType(Cast->getContext())) && + if (Cast->isNoopCast(DL.getIntPtrType(Cast->getContext())) && !hasTrivialKill(Cast->getOperand(0))) return false; @@ -133,7 +153,7 @@ bool FastISel::hasTrivialKill(const Value *V) const { !(I->getOpcode() == Instruction::BitCast || I->getOpcode() == Instruction::PtrToInt || I->getOpcode() == Instruction::IntToPtr) && - cast<Instruction>(*I->use_begin())->getParent() == I->getParent(); + cast<Instruction>(*I->user_begin())->getParent() == I->getParent(); } unsigned FastISel::getRegForValue(const Value *V) { @@ -192,7 +212,7 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) { // Translate this as an integer zero so that it can be // local-CSE'd with actual integer zeros. Reg = - getRegForValue(Constant::getNullValue(TD.getIntPtrType(V->getContext()))); + getRegForValue(Constant::getNullValue(DL.getIntPtrType(V->getContext()))); } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) { if (CF->isNullValue()) { Reg = TargetMaterializeFloatZero(CF); @@ -229,7 +249,7 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) { Reg = lookUpRegForValue(Op); } else if (isa<UndefValue>(V)) { Reg = createResultReg(TLI.getRegClassFor(VT)); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::IMPLICIT_DEF), Reg); } @@ -335,20 +355,20 @@ void FastISel::removeDeadCode(MachineBasicBlock::iterator I, FastISel::SavePoint FastISel::enterLocalValueArea() { MachineBasicBlock::iterator OldInsertPt = FuncInfo.InsertPt; - DebugLoc OldDL = DL; + DebugLoc OldDL = DbgLoc; recomputeInsertPt(); - DL = DebugLoc(); + DbgLoc = DebugLoc(); SavePoint SP = { OldInsertPt, OldDL }; return SP; } void FastISel::leaveLocalValueArea(SavePoint OldInsertPt) { if (FuncInfo.InsertPt != FuncInfo.MBB->begin()) - LastLocalValue = llvm::prior(FuncInfo.InsertPt); + LastLocalValue = std::prev(FuncInfo.InsertPt); // Restore the previous insert position. FuncInfo.InsertPt = OldInsertPt.InsertPt; - DL = OldInsertPt.DL; + DbgLoc = OldInsertPt.DL; } /// SelectBinaryOp - Select and emit code for a binary operator instruction, @@ -484,7 +504,7 @@ bool FastISel::SelectGetElementPtr(const User *I) { unsigned Field = cast<ConstantInt>(Idx)->getZExtValue(); if (Field) { // N = N + Offset - TotalOffs += TD.getStructLayout(StTy)->getElementOffset(Field); + TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field); if (TotalOffs >= MaxOffs) { N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); if (N == 0) @@ -503,7 +523,7 @@ bool FastISel::SelectGetElementPtr(const User *I) { if (CI->isZero()) continue; // N = N + Offset TotalOffs += - TD.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue(); + DL.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue(); if (TotalOffs >= MaxOffs) { N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); if (N == 0) @@ -524,7 +544,7 @@ bool FastISel::SelectGetElementPtr(const User *I) { } // N = N + Idx * ElementSize; - uint64_t ElementSize = TD.getTypeAllocSize(Ty); + uint64_t ElementSize = DL.getTypeAllocSize(Ty); std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx); unsigned IdxN = Pair.first; bool IdxNIsKill = Pair.second; @@ -557,11 +577,472 @@ bool FastISel::SelectGetElementPtr(const User *I) { return true; } +/// \brief Add a stackmap or patchpoint intrinsic call's live variable operands +/// to a stackmap or patchpoint machine instruction. +bool FastISel::addStackMapLiveVars(SmallVectorImpl<MachineOperand> &Ops, + const CallInst *CI, unsigned StartIdx) { + for (unsigned i = StartIdx, e = CI->getNumArgOperands(); i != e; ++i) { + Value *Val = CI->getArgOperand(i); + // Check for constants and encode them with a StackMaps::ConstantOp prefix. + if (auto *C = dyn_cast<ConstantInt>(Val)) { + Ops.push_back(MachineOperand::CreateImm(StackMaps::ConstantOp)); + Ops.push_back(MachineOperand::CreateImm(C->getSExtValue())); + } else if (isa<ConstantPointerNull>(Val)) { + Ops.push_back(MachineOperand::CreateImm(StackMaps::ConstantOp)); + Ops.push_back(MachineOperand::CreateImm(0)); + } else if (auto *AI = dyn_cast<AllocaInst>(Val)) { + // Values coming from a stack location also require a sepcial encoding, + // but that is added later on by the target specific frame index + // elimination implementation. + auto SI = FuncInfo.StaticAllocaMap.find(AI); + if (SI != FuncInfo.StaticAllocaMap.end()) + Ops.push_back(MachineOperand::CreateFI(SI->second)); + else + return false; + } else { + unsigned Reg = getRegForValue(Val); + if (Reg == 0) + return false; + Ops.push_back(MachineOperand::CreateReg(Reg, /*IsDef=*/false)); + } + } + + return true; +} + +bool FastISel::SelectStackmap(const CallInst *I) { + // void @llvm.experimental.stackmap(i64 <id>, i32 <numShadowBytes>, + // [live variables...]) + assert(I->getCalledFunction()->getReturnType()->isVoidTy() && + "Stackmap cannot return a value."); + + // The stackmap intrinsic only records the live variables (the arguments + // passed to it) and emits NOPS (if requested). Unlike the patchpoint + // intrinsic, this won't be lowered to a function call. This means we don't + // have to worry about calling conventions and target-specific lowering code. + // Instead we perform the call lowering right here. + // + // CALLSEQ_START(0) + // STACKMAP(id, nbytes, ...) + // CALLSEQ_END(0, 0) + // + SmallVector<MachineOperand, 32> Ops; + + // Add the <id> and <numBytes> constants. + assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::IDPos)) && + "Expected a constant integer."); + const auto *ID = cast<ConstantInt>(I->getOperand(PatchPointOpers::IDPos)); + Ops.push_back(MachineOperand::CreateImm(ID->getZExtValue())); + + assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos)) && + "Expected a constant integer."); + const auto *NumBytes = + cast<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos)); + Ops.push_back(MachineOperand::CreateImm(NumBytes->getZExtValue())); + + // Push live variables for the stack map (skipping the first two arguments + // <id> and <numBytes>). + if (!addStackMapLiveVars(Ops, I, 2)) + return false; + + // We are not adding any register mask info here, because the stackmap doesn't + // clobber anything. + + // Add scratch registers as implicit def and early clobber. + CallingConv::ID CC = I->getCallingConv(); + const MCPhysReg *ScratchRegs = TLI.getScratchRegisters(CC); + for (unsigned i = 0; ScratchRegs[i]; ++i) + Ops.push_back(MachineOperand::CreateReg( + ScratchRegs[i], /*IsDef=*/true, /*IsImp=*/true, /*IsKill=*/false, + /*IsDead=*/false, /*IsUndef=*/false, /*IsEarlyClobber=*/true)); + + // Issue CALLSEQ_START + unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown)) + .addImm(0); + + // Issue STACKMAP. + MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::STACKMAP)); + for (auto const &MO : Ops) + MIB.addOperand(MO); + + // Issue CALLSEQ_END + unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp)) + .addImm(0).addImm(0); + + // Inform the Frame Information that we have a stackmap in this function. + FuncInfo.MF->getFrameInfo()->setHasStackMap(); + + return true; +} + +/// \brief Lower an argument list according to the target calling convention. +/// +/// This is a helper for lowering intrinsics that follow a target calling +/// convention or require stack pointer adjustment. Only a subset of the +/// intrinsic's operands need to participate in the calling convention. +bool FastISel::lowerCallOperands(const CallInst *CI, unsigned ArgIdx, + unsigned NumArgs, const Value *Callee, + bool ForceRetVoidTy, CallLoweringInfo &CLI) { + ArgListTy Args; + Args.reserve(NumArgs); + + // Populate the argument list. + // Attributes for args start at offset 1, after the return attribute. + ImmutableCallSite CS(CI); + for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs, AttrI = ArgIdx + 1; + ArgI != ArgE; ++ArgI) { + Value *V = CI->getOperand(ArgI); + + assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic."); + + ArgListEntry Entry; + Entry.Val = V; + Entry.Ty = V->getType(); + Entry.setAttributes(&CS, AttrI); + Args.push_back(Entry); + } + + Type *RetTy = ForceRetVoidTy ? Type::getVoidTy(CI->getType()->getContext()) + : CI->getType(); + CLI.setCallee(CI->getCallingConv(), RetTy, Callee, std::move(Args), NumArgs); + + return LowerCallTo(CLI); +} + +bool FastISel::SelectPatchpoint(const CallInst *I) { + // void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>, + // i32 <numBytes>, + // i8* <target>, + // i32 <numArgs>, + // [Args...], + // [live variables...]) + CallingConv::ID CC = I->getCallingConv(); + bool IsAnyRegCC = CC == CallingConv::AnyReg; + bool HasDef = !I->getType()->isVoidTy(); + Value *Callee = I->getOperand(PatchPointOpers::TargetPos); + + // Get the real number of arguments participating in the call <numArgs> + assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::NArgPos)) && + "Expected a constant integer."); + const auto *NumArgsVal = + cast<ConstantInt>(I->getOperand(PatchPointOpers::NArgPos)); + unsigned NumArgs = NumArgsVal->getZExtValue(); + + // Skip the four meta args: <id>, <numNopBytes>, <target>, <numArgs> + // This includes all meta-operands up to but not including CC. + unsigned NumMetaOpers = PatchPointOpers::CCPos; + assert(I->getNumArgOperands() >= NumMetaOpers + NumArgs && + "Not enough arguments provided to the patchpoint intrinsic"); + + // For AnyRegCC the arguments are lowered later on manually. + unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs; + CallLoweringInfo CLI; + if (!lowerCallOperands(I, NumMetaOpers, NumCallArgs, Callee, IsAnyRegCC, CLI)) + return false; + + assert(CLI.Call && "No call instruction specified."); + + SmallVector<MachineOperand, 32> Ops; + + // Add an explicit result reg if we use the anyreg calling convention. + if (IsAnyRegCC && HasDef) { + assert(CLI.NumResultRegs == 0 && "Unexpected result register."); + CLI.ResultReg = createResultReg(TLI.getRegClassFor(MVT::i64)); + CLI.NumResultRegs = 1; + Ops.push_back(MachineOperand::CreateReg(CLI.ResultReg, /*IsDef=*/true)); + } + + // Add the <id> and <numBytes> constants. + assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::IDPos)) && + "Expected a constant integer."); + const auto *ID = cast<ConstantInt>(I->getOperand(PatchPointOpers::IDPos)); + Ops.push_back(MachineOperand::CreateImm(ID->getZExtValue())); + + assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos)) && + "Expected a constant integer."); + const auto *NumBytes = + cast<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos)); + Ops.push_back(MachineOperand::CreateImm(NumBytes->getZExtValue())); + + // Assume that the callee is a constant address or null pointer. + // FIXME: handle function symbols in the future. + unsigned CalleeAddr; + if (const auto *C = dyn_cast<IntToPtrInst>(Callee)) + CalleeAddr = cast<ConstantInt>(C->getOperand(0))->getZExtValue(); + else if (const auto *C = dyn_cast<ConstantExpr>(Callee)) { + if (C->getOpcode() == Instruction::IntToPtr) + CalleeAddr = cast<ConstantInt>(C->getOperand(0))->getZExtValue(); + else + llvm_unreachable("Unsupported ConstantExpr."); + } else if (isa<ConstantPointerNull>(Callee)) + CalleeAddr = 0; + else + llvm_unreachable("Unsupported callee address."); + + Ops.push_back(MachineOperand::CreateImm(CalleeAddr)); + + // Adjust <numArgs> to account for any arguments that have been passed on + // the stack instead. + unsigned NumCallRegArgs = IsAnyRegCC ? NumArgs : CLI.OutRegs.size(); + Ops.push_back(MachineOperand::CreateImm(NumCallRegArgs)); + + // Add the calling convention + Ops.push_back(MachineOperand::CreateImm((unsigned)CC)); + + // Add the arguments we omitted previously. The register allocator should + // place these in any free register. + if (IsAnyRegCC) { + for (unsigned i = NumMetaOpers, e = NumMetaOpers + NumArgs; i != e; ++i) { + unsigned Reg = getRegForValue(I->getArgOperand(i)); + if (!Reg) + return false; + Ops.push_back(MachineOperand::CreateReg(Reg, /*IsDef=*/false)); + } + } + + // Push the arguments from the call instruction. + for (auto Reg : CLI.OutRegs) + Ops.push_back(MachineOperand::CreateReg(Reg, /*IsDef=*/false)); + + // Push live variables for the stack map. + if (!addStackMapLiveVars(Ops, I, NumMetaOpers + NumArgs)) + return false; + + // Push the register mask info. + Ops.push_back(MachineOperand::CreateRegMask(TRI.getCallPreservedMask(CC))); + + // Add scratch registers as implicit def and early clobber. + const MCPhysReg *ScratchRegs = TLI.getScratchRegisters(CC); + for (unsigned i = 0; ScratchRegs[i]; ++i) + Ops.push_back(MachineOperand::CreateReg( + ScratchRegs[i], /*IsDef=*/true, /*IsImp=*/true, /*IsKill=*/false, + /*IsDead=*/false, /*IsUndef=*/false, /*IsEarlyClobber=*/true)); + + // Add implicit defs (return values). + for (auto Reg : CLI.InRegs) + Ops.push_back(MachineOperand::CreateReg(Reg, /*IsDef=*/true, + /*IsImpl=*/true)); + + // Insert the patchpoint instruction before the call generated by the target. + MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, CLI.Call, DbgLoc, + TII.get(TargetOpcode::PATCHPOINT)); + + for (auto &MO : Ops) + MIB.addOperand(MO); + + MIB->setPhysRegsDeadExcept(CLI.InRegs, TRI); + + // Delete the original call instruction. + CLI.Call->eraseFromParent(); + + // Inform the Frame Information that we have a patchpoint in this function. + FuncInfo.MF->getFrameInfo()->setHasPatchPoint(); + + if (CLI.NumResultRegs) + UpdateValueMap(I, CLI.ResultReg, CLI.NumResultRegs); + return true; +} + +/// Returns an AttributeSet representing the attributes applied to the return +/// value of the given call. +static AttributeSet getReturnAttrs(FastISel::CallLoweringInfo &CLI) { + SmallVector<Attribute::AttrKind, 2> Attrs; + if (CLI.RetSExt) + Attrs.push_back(Attribute::SExt); + if (CLI.RetZExt) + Attrs.push_back(Attribute::ZExt); + if (CLI.IsInReg) + Attrs.push_back(Attribute::InReg); + + return AttributeSet::get(CLI.RetTy->getContext(), AttributeSet::ReturnIndex, + Attrs); +} + +bool FastISel::LowerCallTo(const CallInst *CI, const char *SymName, + unsigned NumArgs) { + ImmutableCallSite CS(CI); + + PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); + FunctionType *FTy = cast<FunctionType>(PT->getElementType()); + Type *RetTy = FTy->getReturnType(); + + ArgListTy Args; + Args.reserve(NumArgs); + + // Populate the argument list. + // Attributes for args start at offset 1, after the return attribute. + for (unsigned ArgI = 0; ArgI != NumArgs; ++ArgI) { + Value *V = CI->getOperand(ArgI); + + assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic."); + + ArgListEntry Entry; + Entry.Val = V; + Entry.Ty = V->getType(); + Entry.setAttributes(&CS, ArgI + 1); + Args.push_back(Entry); + } + + CallLoweringInfo CLI; + CLI.setCallee(RetTy, FTy, SymName, std::move(Args), CS, NumArgs); + + return LowerCallTo(CLI); +} + +bool FastISel::LowerCallTo(CallLoweringInfo &CLI) { + // Handle the incoming return values from the call. + CLI.clearIns(); + SmallVector<EVT, 4> RetTys; + ComputeValueVTs(TLI, CLI.RetTy, RetTys); + + SmallVector<ISD::OutputArg, 4> Outs; + GetReturnInfo(CLI.RetTy, getReturnAttrs(CLI), Outs, TLI); + + bool CanLowerReturn = TLI.CanLowerReturn(CLI.CallConv, *FuncInfo.MF, + CLI.IsVarArg, Outs, + CLI.RetTy->getContext()); + + // FIXME: sret demotion isn't supported yet - bail out. + if (!CanLowerReturn) + return false; + + for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { + EVT VT = RetTys[I]; + MVT RegisterVT = TLI.getRegisterType(CLI.RetTy->getContext(), VT); + unsigned NumRegs = TLI.getNumRegisters(CLI.RetTy->getContext(), VT); + for (unsigned i = 0; i != NumRegs; ++i) { + ISD::InputArg MyFlags; + MyFlags.VT = RegisterVT; + MyFlags.ArgVT = VT; + MyFlags.Used = CLI.IsReturnValueUsed; + if (CLI.RetSExt) + MyFlags.Flags.setSExt(); + if (CLI.RetZExt) + MyFlags.Flags.setZExt(); + if (CLI.IsInReg) + MyFlags.Flags.setInReg(); + CLI.Ins.push_back(MyFlags); + } + } + + // Handle all of the outgoing arguments. + CLI.clearOuts(); + for (auto &Arg : CLI.getArgs()) { + Type *FinalType = Arg.Ty; + if (Arg.isByVal) + FinalType = cast<PointerType>(Arg.Ty)->getElementType(); + bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters( + FinalType, CLI.CallConv, CLI.IsVarArg); + + ISD::ArgFlagsTy Flags; + if (Arg.isZExt) + Flags.setZExt(); + if (Arg.isSExt) + Flags.setSExt(); + if (Arg.isInReg) + Flags.setInReg(); + if (Arg.isSRet) + Flags.setSRet(); + if (Arg.isByVal) + Flags.setByVal(); + if (Arg.isInAlloca) { + Flags.setInAlloca(); + // Set the byval flag for CCAssignFn callbacks that don't know about + // inalloca. This way we can know how many bytes we should've allocated + // and how many bytes a callee cleanup function will pop. If we port + // inalloca to more targets, we'll have to add custom inalloca handling in + // the various CC lowering callbacks. + Flags.setByVal(); + } + if (Arg.isByVal || Arg.isInAlloca) { + PointerType *Ty = cast<PointerType>(Arg.Ty); + Type *ElementTy = Ty->getElementType(); + unsigned FrameSize = DL.getTypeAllocSize(ElementTy); + // For ByVal, alignment should come from FE. BE will guess if this info is + // not there, but there are cases it cannot get right. + unsigned FrameAlign = Arg.Alignment; + if (!FrameAlign) + FrameAlign = TLI.getByValTypeAlignment(ElementTy); + Flags.setByValSize(FrameSize); + Flags.setByValAlign(FrameAlign); + } + if (Arg.isNest) + Flags.setNest(); + if (NeedsRegBlock) + Flags.setInConsecutiveRegs(); + unsigned OriginalAlignment = DL.getABITypeAlignment(Arg.Ty); + Flags.setOrigAlign(OriginalAlignment); + + CLI.OutVals.push_back(Arg.Val); + CLI.OutFlags.push_back(Flags); + } + + if (!FastLowerCall(CLI)) + return false; + + // Set all unused physreg defs as dead. + assert(CLI.Call && "No call instruction specified."); + CLI.Call->setPhysRegsDeadExcept(CLI.InRegs, TRI); + + if (CLI.NumResultRegs && CLI.CS) + UpdateValueMap(CLI.CS->getInstruction(), CLI.ResultReg, CLI.NumResultRegs); + + return true; +} + +bool FastISel::LowerCall(const CallInst *CI) { + ImmutableCallSite CS(CI); + + PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); + FunctionType *FuncTy = cast<FunctionType>(PT->getElementType()); + Type *RetTy = FuncTy->getReturnType(); + + ArgListTy Args; + ArgListEntry Entry; + Args.reserve(CS.arg_size()); + + for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); + i != e; ++i) { + Value *V = *i; + + // Skip empty types + if (V->getType()->isEmptyTy()) + continue; + + Entry.Val = V; + Entry.Ty = V->getType(); + + // Skip the first return-type Attribute to get to params. + Entry.setAttributes(&CS, i - CS.arg_begin() + 1); + Args.push_back(Entry); + } + + // Check if target-independent constraints permit a tail call here. + // Target-dependent constraints are checked within FastLowerCall. + bool IsTailCall = CI->isTailCall(); + if (IsTailCall && !isInTailCallPosition(CS, TM)) + IsTailCall = false; + + CallLoweringInfo CLI; + CLI.setCallee(RetTy, FuncTy, CI->getCalledValue(), std::move(Args), CS) + .setTailCall(IsTailCall); + + return LowerCallTo(CLI); +} + bool FastISel::SelectCall(const User *I) { const CallInst *Call = cast<CallInst>(I); // Handle simple inline asms. if (const InlineAsm *IA = dyn_cast<InlineAsm>(Call->getCalledValue())) { + // If the inline asm has side effects, then make sure that no local value + // lives across by flushing the local value map. + if (IA->hasSideEffects()) + flushLocalValueMap(); + // Don't attempt to handle constraints. if (!IA->getConstraintString().empty()) return false; @@ -572,7 +1053,7 @@ bool FastISel::SelectCall(const User *I) { if (IA->isAlignStack()) ExtraInfo |= InlineAsm::Extra_IsAlignStack; - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::INLINEASM)) .addExternalSymbol(IA->getAsmString().c_str()) .addImm(ExtraInfo); @@ -582,26 +1063,37 @@ bool FastISel::SelectCall(const User *I) { MachineModuleInfo &MMI = FuncInfo.MF->getMMI(); ComputeUsesVAFloatArgument(*Call, &MMI); - const Function *F = Call->getCalledFunction(); - if (!F) return false; + // Handle intrinsic function calls. + if (const auto *II = dyn_cast<IntrinsicInst>(Call)) + return SelectIntrinsicCall(II); - // Handle selected intrinsic function calls. - switch (F->getIntrinsicID()) { + // Usually, it does not make sense to initialize a value, + // make an unrelated function call and use the value, because + // it tends to be spilled on the stack. So, we move the pointer + // to the last local value to the beginning of the block, so that + // all the values which have already been materialized, + // appear after the call. It also makes sense to skip intrinsics + // since they tend to be inlined. + flushLocalValueMap(); + + return LowerCall(Call); +} + +bool FastISel::SelectIntrinsicCall(const IntrinsicInst *II) { + switch (II->getIntrinsicID()) { default: break; - // At -O0 we don't care about the lifetime intrinsics. + // At -O0 we don't care about the lifetime intrinsics. case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: - // The donothing intrinsic does, well, nothing. + // The donothing intrinsic does, well, nothing. case Intrinsic::donothing: return true; - case Intrinsic::dbg_declare: { - const DbgDeclareInst *DI = cast<DbgDeclareInst>(Call); + const DbgDeclareInst *DI = cast<DbgDeclareInst>(II); DIVariable DIVar(DI->getVariable()); assert((!DIVar || DIVar.isVariable()) && - "Variable in DbgDeclareInst should be either null or a DIVariable."); - if (!DIVar || - !FuncInfo.MF->getMMI().hasDebugInfo()) { + "Variable in DbgDeclareInst should be either null or a DIVariable."); + if (!DIVar || !FuncInfo.MF->getMMI().hasDebugInfo()) { DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); return true; } @@ -618,7 +1110,7 @@ bool FastISel::SelectCall(const User *I) { // Some arguments' frame index is recorded during argument lowering. Offset = FuncInfo.getArgumentFrameIndex(Arg); if (Offset) - Op = MachineOperand::CreateFI(Offset); + Op = MachineOperand::CreateFI(Offset); if (!Op) if (unsigned Reg = lookUpRegForValue(Address)) Op = MachineOperand::CreateReg(Reg, false); @@ -643,15 +1135,15 @@ bool FastISel::SelectCall(const User *I) { if (Op) { if (Op->isReg()) { Op->setIsDebug(true); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::DBG_VALUE), false, Op->getReg(), 0, DI->getVariable()); } else - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::DBG_VALUE)) - .addOperand(*Op) - .addImm(0) - .addMetadata(DI->getVariable()); + .addOperand(*Op) + .addImm(0) + .addMetadata(DI->getVariable()); } else { // We can't yet handle anything else here because it would require // generating code, thus altering codegen because of debug info. @@ -661,32 +1153,32 @@ bool FastISel::SelectCall(const User *I) { } case Intrinsic::dbg_value: { // This form of DBG_VALUE is target-independent. - const DbgValueInst *DI = cast<DbgValueInst>(Call); + const DbgValueInst *DI = cast<DbgValueInst>(II); const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE); const Value *V = DI->getValue(); if (!V) { // Currently the optimizer can produce this; insert an undef to // help debugging. Probably the optimizer should not do this. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addReg(0U).addImm(DI->getOffset()) .addMetadata(DI->getVariable()); } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) { if (CI->getBitWidth() > 64) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addCImm(CI).addImm(DI->getOffset()) .addMetadata(DI->getVariable()); else - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addImm(CI->getZExtValue()).addImm(DI->getOffset()) .addMetadata(DI->getVariable()); } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addFPImm(CF).addImm(DI->getOffset()) .addMetadata(DI->getVariable()); } else if (unsigned Reg = lookUpRegForValue(V)) { // FIXME: This does not handle register-indirect values at offset 0. bool IsIndirect = DI->getOffset() != 0; - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, IsIndirect, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, IsIndirect, Reg, DI->getOffset(), DI->getVariable()); } else { // We can't yet handle anything else here because it would require @@ -696,36 +1188,30 @@ bool FastISel::SelectCall(const User *I) { return true; } case Intrinsic::objectsize: { - ConstantInt *CI = cast<ConstantInt>(Call->getArgOperand(1)); + ConstantInt *CI = cast<ConstantInt>(II->getArgOperand(1)); unsigned long long Res = CI->isZero() ? -1ULL : 0; - Constant *ResCI = ConstantInt::get(Call->getType(), Res); + Constant *ResCI = ConstantInt::get(II->getType(), Res); unsigned ResultReg = getRegForValue(ResCI); if (ResultReg == 0) return false; - UpdateValueMap(Call, ResultReg); + UpdateValueMap(II, ResultReg); return true; } case Intrinsic::expect: { - unsigned ResultReg = getRegForValue(Call->getArgOperand(0)); + unsigned ResultReg = getRegForValue(II->getArgOperand(0)); if (ResultReg == 0) return false; - UpdateValueMap(Call, ResultReg); + UpdateValueMap(II, ResultReg); return true; } + case Intrinsic::experimental_stackmap: + return SelectStackmap(II); + case Intrinsic::experimental_patchpoint_void: + case Intrinsic::experimental_patchpoint_i64: + return SelectPatchpoint(II); } - // Usually, it does not make sense to initialize a value, - // make an unrelated function call and use the value, because - // it tends to be spilled on the stack. So, we move the pointer - // to the last local value to the beginning of the block, so that - // all the values which have already been materialized, - // appear after the call. It also makes sense to skip intrinsics - // since they tend to be inlined. - if (!isa<IntrinsicInst>(Call)) - flushLocalValueMap(); - - // An arbitrary call. Bail. - return false; + return FastLowerIntrinsicCall(II); } bool FastISel::SelectCast(const User *I, unsigned Opcode) { @@ -798,8 +1284,8 @@ bool FastISel::SelectBitCast(const User *I) { // Don't attempt a cross-class copy. It will likely fail. if (SrcClass == DstClass) { ResultReg = createResultReg(DstClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - ResultReg).addReg(Op0); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(Op0); } } @@ -822,25 +1308,31 @@ FastISel::SelectInstruction(const Instruction *I) { if (!HandlePHINodesInSuccessorBlocks(I->getParent())) return false; - DL = I->getDebugLoc(); + DbgLoc = I->getDebugLoc(); MachineBasicBlock::iterator SavedInsertPt = FuncInfo.InsertPt; - // As a special case, don't handle calls to builtin library functions that - // may be translated directly to target instructions. if (const CallInst *Call = dyn_cast<CallInst>(I)) { const Function *F = Call->getCalledFunction(); LibFunc::Func Func; + + // As a special case, don't handle calls to builtin library functions that + // may be translated directly to target instructions. if (F && !F->hasLocalLinkage() && F->hasName() && LibInfo->getLibFunc(F->getName(), Func) && LibInfo->hasOptimizedCodeGen(Func)) return false; + + // Don't handle Intrinsic::trap if a trap funciton is specified. + if (F && F->getIntrinsicID() == Intrinsic::trap && + !TM.Options.getTrapFunctionName().empty()) + return false; } // First, try doing target-independent selection. if (SelectOperator(I, I->getOpcode())) { ++NumFastIselSuccessIndependent; - DL = DebugLoc(); + DbgLoc = DebugLoc(); return true; } // Remove dead code. However, ignore call instructions since we've flushed @@ -855,7 +1347,7 @@ FastISel::SelectInstruction(const Instruction *I) { SavedInsertPt = FuncInfo.InsertPt; if (TargetSelectInstruction(I)) { ++NumFastIselSuccessTarget; - DL = DebugLoc(); + DbgLoc = DebugLoc(); return true; } // Check for dead code and remove as necessary. @@ -863,7 +1355,7 @@ FastISel::SelectInstruction(const Instruction *I) { if (SavedInsertPt != FuncInfo.InsertPt) removeDeadCode(FuncInfo.InsertPt, SavedInsertPt); - DL = DebugLoc(); + DbgLoc = DebugLoc(); return false; } @@ -871,8 +1363,7 @@ FastISel::SelectInstruction(const Instruction *I) { /// unless it is the immediate (fall-through) successor, and update /// the CFG. void -FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DL) { - +FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DbgLoc) { if (FuncInfo.MBB->getBasicBlock()->size() > 1 && FuncInfo.MBB->isLayoutSuccessor(MSucc)) { // For more accurate line information if this is the only instruction @@ -880,10 +1371,14 @@ FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DL) { // fall-through case, which needs no instructions. } else { // The unconditional branch case. - TII.InsertBranch(*FuncInfo.MBB, MSucc, NULL, - SmallVector<MachineOperand, 0>(), DL); + TII.InsertBranch(*FuncInfo.MBB, MSucc, nullptr, + SmallVector<MachineOperand, 0>(), DbgLoc); } - FuncInfo.MBB->addSuccessor(MSucc); + uint32_t BranchWeight = 0; + if (FuncInfo.BPI) + BranchWeight = FuncInfo.BPI->getEdgeWeight(FuncInfo.MBB->getBasicBlock(), + MSucc->getBasicBlock()); + FuncInfo.MBB->addSuccessor(MSucc, BranchWeight); } /// SelectFNeg - Emit an FNeg operation. @@ -1035,8 +1530,10 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) { } case Instruction::Unreachable: - // Nothing to emit. - return true; + if (TM.Options.TrapUnreachable) + return FastEmit_(MVT::Other, MVT::Other, ISD::TRAP) != 0; + else + return true; case Instruction::Alloca: // FunctionLowering has the static-sized case covered. @@ -1092,11 +1589,12 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) { FastISel::FastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) : FuncInfo(funcInfo), + MF(funcInfo.MF), MRI(FuncInfo.MF->getRegInfo()), MFI(*FuncInfo.MF->getFrameInfo()), MCP(*FuncInfo.MF->getConstantPool()), TM(FuncInfo.MF->getTarget()), - TD(*TM.getDataLayout()), + DL(*TM.getDataLayout()), TII(*TM.getInstrInfo()), TLI(*TM.getTargetLowering()), TRI(*TM.getRegisterInfo()), @@ -1109,6 +1607,14 @@ bool FastISel::FastLowerArguments() { return false; } +bool FastISel::FastLowerCall(CallLoweringInfo &/*CLI*/) { + return false; +} + +bool FastISel::FastLowerIntrinsicCall(const IntrinsicInst * /*II*/) { + return false; +} + unsigned FastISel::FastEmit_(MVT, MVT, unsigned) { return 0; @@ -1204,29 +1710,48 @@ unsigned FastISel::createResultReg(const TargetRegisterClass* RC) { return MRI.createVirtualRegister(RC); } +unsigned FastISel::constrainOperandRegClass(const MCInstrDesc &II, + unsigned Op, unsigned OpNum) { + if (TargetRegisterInfo::isVirtualRegister(Op)) { + const TargetRegisterClass *RegClass = + TII.getRegClass(II, OpNum, &TRI, *FuncInfo.MF); + if (!MRI.constrainRegClass(Op, RegClass)) { + // If it's not legal to COPY between the register classes, something + // has gone very wrong before we got here. + unsigned NewOp = createResultReg(RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), NewOp).addReg(Op); + return NewOp; + } + } + return Op; +} + unsigned FastISel::FastEmitInst_(unsigned MachineInstOpcode, const TargetRegisterClass* RC) { unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg); return ResultReg; } unsigned FastISel::FastEmitInst_r(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill) { - unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); + unsigned ResultReg = createResultReg(RC); + Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); + if (II.getNumDefs() >= 1) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill); else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addReg(Op0, Op0IsKill * RegState::Kill); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - ResultReg).addReg(II.ImplicitDefs[0]); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; @@ -1236,19 +1761,22 @@ unsigned FastISel::FastEmitInst_rr(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill) { - unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); + unsigned ResultReg = createResultReg(RC); + Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); + Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1); + if (II.getNumDefs() >= 1) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill); else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - ResultReg).addReg(II.ImplicitDefs[0]); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } @@ -1258,21 +1786,25 @@ unsigned FastISel::FastEmitInst_rrr(unsigned MachineInstOpcode, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill, unsigned Op2, bool Op2IsKill) { - unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); + unsigned ResultReg = createResultReg(RC); + Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); + Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1); + Op2 = constrainOperandRegClass(II, Op2, II.getNumDefs() + 2); + if (II.getNumDefs() >= 1) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill) .addReg(Op2, Op2IsKill * RegState::Kill); else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill) .addReg(Op2, Op2IsKill * RegState::Kill); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - ResultReg).addReg(II.ImplicitDefs[0]); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } @@ -1281,19 +1813,22 @@ unsigned FastISel::FastEmitInst_ri(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, uint64_t Imm) { - unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); + unsigned ResultReg = createResultReg(RC); + RC = TII.getRegClass(II, II.getNumDefs(), &TRI, *FuncInfo.MF); + MRI.constrainRegClass(Op0, RC); + if (II.getNumDefs() >= 1) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) .addImm(Imm); else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addReg(Op0, Op0IsKill * RegState::Kill) .addImm(Imm); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - ResultReg).addReg(II.ImplicitDefs[0]); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } @@ -1302,21 +1837,23 @@ unsigned FastISel::FastEmitInst_rii(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, uint64_t Imm1, uint64_t Imm2) { - unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); + unsigned ResultReg = createResultReg(RC); + Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); + if (II.getNumDefs() >= 1) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) .addImm(Imm1) .addImm(Imm2); else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addReg(Op0, Op0IsKill * RegState::Kill) .addImm(Imm1) .addImm(Imm2); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - ResultReg).addReg(II.ImplicitDefs[0]); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } @@ -1325,19 +1862,21 @@ unsigned FastISel::FastEmitInst_rf(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, const ConstantFP *FPImm) { - unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); + unsigned ResultReg = createResultReg(RC); + Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); + if (II.getNumDefs() >= 1) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) .addFPImm(FPImm); else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addReg(Op0, Op0IsKill * RegState::Kill) .addFPImm(FPImm); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - ResultReg).addReg(II.ImplicitDefs[0]); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } @@ -1347,21 +1886,24 @@ unsigned FastISel::FastEmitInst_rri(unsigned MachineInstOpcode, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill, uint64_t Imm) { - unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); + unsigned ResultReg = createResultReg(RC); + Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); + Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1); + if (II.getNumDefs() >= 1) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill) .addImm(Imm); else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill) .addImm(Imm); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - ResultReg).addReg(II.ImplicitDefs[0]); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } @@ -1371,21 +1913,24 @@ unsigned FastISel::FastEmitInst_rrii(unsigned MachineInstOpcode, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill, uint64_t Imm1, uint64_t Imm2) { - unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); + unsigned ResultReg = createResultReg(RC); + Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); + Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1); + if (II.getNumDefs() >= 1) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill) .addImm(Imm1).addImm(Imm2); else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill) .addImm(Imm1).addImm(Imm2); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - ResultReg).addReg(II.ImplicitDefs[0]); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } @@ -1397,11 +1942,11 @@ unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode, const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg).addImm(Imm); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg).addImm(Imm); else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II).addImm(Imm); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - ResultReg).addReg(II.ImplicitDefs[0]); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addImm(Imm); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } @@ -1413,12 +1958,12 @@ unsigned FastISel::FastEmitInst_ii(unsigned MachineInstOpcode, const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) .addImm(Imm1).addImm(Imm2); else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II).addImm(Imm1).addImm(Imm2); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - ResultReg).addReg(II.ImplicitDefs[0]); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addImm(Imm1).addImm(Imm2); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } @@ -1432,7 +1977,7 @@ unsigned FastISel::FastEmitInst_extractsubreg(MVT RetVT, const TargetRegisterClass *RC = MRI.getRegClass(Op0); MRI.constrainRegClass(Op0, TRI.getSubClassWithSubReg(RC, Idx)); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, - DL, TII.get(TargetOpcode::COPY), ResultReg) + DbgLoc, TII.get(TargetOpcode::COPY), ResultReg) .addReg(Op0, getKillRegState(Op0IsKill), Idx); return ResultReg; } @@ -1498,9 +2043,9 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { // Set the DebugLoc for the copy. Prefer the location of the operand // if there is one; use the location of the PHI otherwise. - DL = PN->getDebugLoc(); + DbgLoc = PN->getDebugLoc(); if (const Instruction *Inst = dyn_cast<Instruction>(PHIOp)) - DL = Inst->getDebugLoc(); + DbgLoc = Inst->getDebugLoc(); unsigned Reg = getRegForValue(PHIOp); if (Reg == 0) { @@ -1508,7 +2053,7 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { return false; } FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg)); - DL = DebugLoc(); + DbgLoc = DebugLoc(); } } @@ -1523,7 +2068,7 @@ bool FastISel::tryToFoldLoad(const LoadInst *LI, const Instruction *FoldInst) { // this by scanning the single-use users of the load until we get to FoldInst. unsigned MaxUsers = 6; // Don't scan down huge single-use chains of instrs. - const Instruction *TheUser = LI->use_back(); + const Instruction *TheUser = LI->user_back(); while (TheUser != FoldInst && // Scan up until we find FoldInst. // Stay in the right block. TheUser->getParent() == FoldInst->getParent() && @@ -1532,7 +2077,7 @@ bool FastISel::tryToFoldLoad(const LoadInst *LI, const Instruction *FoldInst) { if (!TheUser->hasOneUse()) return false; - TheUser = TheUser->use_back(); + TheUser = TheUser->user_back(); } // If we didn't find the fold instruction, then we failed to collapse the @@ -1559,7 +2104,7 @@ bool FastISel::tryToFoldLoad(const LoadInst *LI, const Instruction *FoldInst) { return false; MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(LoadReg); - MachineInstr *User = &*RI; + MachineInstr *User = RI->getParent(); // Set the insertion point properly. Folding the load can cause generation of // other random instructions (like sign extends) for addressing modes; make @@ -1576,8 +2121,8 @@ bool FastISel::canFoldAddIntoGEP(const User *GEP, const Value *Add) { if (!isa<AddOperator>(Add)) return false; // Type size needs to match. - if (TD.getTypeSizeInBits(GEP->getType()) != - TD.getTypeSizeInBits(Add->getType())) + if (DL.getTypeSizeInBits(GEP->getType()) != + DL.getTypeSizeInBits(Add->getType())) return false; // Must be in the same basic block. if (isa<Instruction>(Add) && @@ -1587,3 +2132,47 @@ bool FastISel::canFoldAddIntoGEP(const User *GEP, const Value *Add) { return isa<ConstantInt>(cast<AddOperator>(Add)->getOperand(1)); } +MachineMemOperand * +FastISel::createMachineMemOperandFor(const Instruction *I) const { + const Value *Ptr; + Type *ValTy; + unsigned Alignment; + unsigned Flags; + bool IsVolatile; + + if (const auto *LI = dyn_cast<LoadInst>(I)) { + Alignment = LI->getAlignment(); + IsVolatile = LI->isVolatile(); + Flags = MachineMemOperand::MOLoad; + Ptr = LI->getPointerOperand(); + ValTy = LI->getType(); + } else if (const auto *SI = dyn_cast<StoreInst>(I)) { + Alignment = SI->getAlignment(); + IsVolatile = SI->isVolatile(); + Flags = MachineMemOperand::MOStore; + Ptr = SI->getPointerOperand(); + ValTy = SI->getValueOperand()->getType(); + } else { + return nullptr; + } + + bool IsNonTemporal = I->getMetadata("nontemporal") != nullptr; + bool IsInvariant = I->getMetadata("invariant.load") != nullptr; + const MDNode *TBAAInfo = I->getMetadata(LLVMContext::MD_tbaa); + const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range); + + if (Alignment == 0) // Ensure that codegen never sees alignment 0. + Alignment = DL.getABITypeAlignment(ValTy); + + unsigned Size = TM.getDataLayout()->getTypeStoreSize(ValTy); + + if (IsVolatile) + Flags |= MachineMemOperand::MOVolatile; + if (IsNonTemporal) + Flags |= MachineMemOperand::MONonTemporal; + if (IsInvariant) + Flags |= MachineMemOperand::MOInvariant; + + return FuncInfo.MF->getMachineMemOperand(MachinePointerInfo(Ptr), Flags, Size, + Alignment, TBAAInfo, Ranges); +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index e9d2324..ae124e8 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "function-lowering-info" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/CodeGen/Analysis.h" @@ -21,8 +20,8 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/DebugInfo.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" @@ -32,14 +31,16 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" #include <algorithm> using namespace llvm; +#define DEBUG_TYPE "function-lowering-info" + /// isUsedOutsideOfDefiningBlock - Return true if this instruction is used by /// PHI nodes or outside of the basic block that defines it, or used by a /// switch or atomic instruction, which may expand to multiple basic blocks. @@ -47,12 +48,10 @@ static bool isUsedOutsideOfDefiningBlock(const Instruction *I) { if (I->use_empty()) return false; if (isa<PHINode>(I)) return true; const BasicBlock *BB = I->getParent(); - for (Value::const_use_iterator UI = I->use_begin(), E = I->use_end(); - UI != E; ++UI) { - const User *U = *UI; + for (const User *U : I->users()) if (cast<Instruction>(U)->getParent() != BB || isa<PHINode>(U)) return true; - } + return false; } @@ -76,7 +75,12 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, // them. Function::const_iterator BB = Fn->begin(), EB = Fn->end(); for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) - if (const AllocaInst *AI = dyn_cast<AllocaInst>(I)) + if (const AllocaInst *AI = dyn_cast<AllocaInst>(I)) { + // Don't fold inalloca allocas or other dynamic allocas into the initial + // stack frame allocation, even if they are in the entry block. + if (!AI->isStaticAlloca()) + continue; + if (const ConstantInt *CUI = dyn_cast<ConstantInt>(AI->getArraySize())) { Type *Ty = AI->getAllocatedType(); uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize(Ty); @@ -87,17 +91,10 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, TySize *= CUI->getZExtValue(); // Get total allocated size. if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects. - // The object may need to be placed onto the stack near the stack - // protector if one exists. Determine here if this object is a suitable - // candidate. I.e., it would trigger the creation of a stack protector. - bool MayNeedSP = - (AI->isArrayAllocation() || - (TySize >= 8 && isa<ArrayType>(Ty) && - cast<ArrayType>(Ty)->getElementType()->isIntegerTy(8))); StaticAllocaMap[AI] = - MF->getFrameInfo()->CreateStackObject(TySize, Align, false, - MayNeedSP, AI); + MF->getFrameInfo()->CreateStackObject(TySize, Align, false, AI); } + } for (; BB != EB; ++BB) for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); @@ -120,7 +117,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, // Look for inline asm that clobbers the SP register. if (isa<CallInst>(I) || isa<InvokeInst>(I)) { ImmutableCallSite CS(I); - if (const InlineAsm *IA = dyn_cast<InlineAsm>(CS.getCalledValue())) { + if (isa<InlineAsm>(CS.getCalledValue())) { unsigned SP = TLI->getStackPointerRegisterToSaveRestore(); std::vector<TargetLowering::AsmOperandInfo> Ops = TLI->ParseConstraints(CS); @@ -287,11 +284,11 @@ unsigned FunctionLoweringInfo::CreateRegs(Type *Ty) { const FunctionLoweringInfo::LiveOutInfo * FunctionLoweringInfo::GetLiveOutRegInfo(unsigned Reg, unsigned BitWidth) { if (!LiveOutRegInfo.inBounds(Reg)) - return NULL; + return nullptr; LiveOutInfo *LOI = &LiveOutRegInfo[Reg]; if (!LOI->IsValid) - return NULL; + return nullptr; if (BitWidth > LOI->KnownZero.getBitWidth()) { LOI->NumSignBits = 1; diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 856ef34..7c124b8 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -13,7 +13,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "instr-emitter" #include "InstrEmitter.h" #include "SDNodeDbgValue.h" #include "llvm/ADT/Statistic.h" @@ -31,6 +30,8 @@ #include "llvm/Target/TargetMachine.h" using namespace llvm; +#define DEBUG_TYPE "instr-emitter" + /// MinRCSize - Smallest register class we allow when constraining virtual /// registers. If satisfying all register class constraints would require /// using a smaller register class, emit a COPY to a new virtual register @@ -99,7 +100,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, // If the node is only used by a CopyToReg and the dest reg is a vreg, use // the CopyToReg'd destination register instead of creating a new vreg. bool MatchReg = true; - const TargetRegisterClass *UseRC = NULL; + const TargetRegisterClass *UseRC = nullptr; MVT VT = Node->getSimpleValueType(ResNo); // Stick to the preferred register classes for legal types. @@ -107,9 +108,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, UseRC = TLI->getRegClassFor(VT); if (!IsClone && !IsCloned) - for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end(); - UI != E; ++UI) { - SDNode *User = *UI; + for (SDNode *User : Node->uses()) { bool Match = true; if (User->getOpcode() == ISD::CopyToReg && User->getOperand(2).getNode() == Node && @@ -131,7 +130,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, Match = false; if (User->isMachineOpcode()) { const MCInstrDesc &II = TII->get(User->getMachineOpcode()); - const TargetRegisterClass *RC = 0; + const TargetRegisterClass *RC = nullptr; if (i+II.getNumDefs() < II.getNumOperands()) { RC = TRI->getAllocatableClass( TII->getRegClass(II, i+II.getNumDefs(), TRI, *MF)); @@ -154,7 +153,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, break; } - const TargetRegisterClass *SrcRC = 0, *DstRC = 0; + const TargetRegisterClass *SrcRC = nullptr, *DstRC = nullptr; SrcRC = TRI->getMinimalPhysRegClass(SrcReg, VT); // Figure out the register class to create for the destreg. @@ -242,9 +241,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, } if (!VRBase && !IsClone && !IsCloned) - for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end(); - UI != E; ++UI) { - SDNode *User = *UI; + for (SDNode *User : Node->uses()) { if (User->getOpcode() == ISD::CopyToReg && User->getOperand(2).getNode() == Node && User->getOperand(2).getResNo() == i) { @@ -329,7 +326,7 @@ InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB, // shrink VReg's register class within reason. For example, if VReg == GR32 // and II requires a GR32_NOSP, just constrain VReg to GR32_NOSP. if (II) { - const TargetRegisterClass *DstRC = 0; + const TargetRegisterClass *DstRC = nullptr; if (IIOpNum < II->getNumOperands()) DstRC = TRI->getAllocatableClass(TII->getRegClass(*II,IIOpNum,TRI,*MF)); if (DstRC && !MRI->constrainRegClass(VReg, DstRC, MinRCSize)) { @@ -470,9 +467,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, // If the node is only used by a CopyToReg and the dest reg is a vreg, use // the CopyToReg'd destination register instead of creating a new vreg. - for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end(); - UI != E; ++UI) { - SDNode *User = *UI; + for (SDNode *User : Node->uses()) { if (User->getOpcode() == ISD::CopyToReg && User->getOperand(2).getNode() == Node) { unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); @@ -561,10 +556,10 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, const ConstantSDNode *SD = cast<ConstantSDNode>(N0); MIB.addImm(SD->getZExtValue()); } else - AddOperand(MIB, N0, 0, 0, VRBaseMap, /*IsDebug=*/false, + AddOperand(MIB, N0, 0, nullptr, VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); // Add the subregster being inserted - AddOperand(MIB, N1, 0, 0, VRBaseMap, /*IsDebug=*/false, + AddOperand(MIB, N1, 0, nullptr, VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); MIB.addImm(SubIdx); MBB->insert(InsertPos, MIB); @@ -693,10 +688,13 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD, MIB.addReg(0U); } - if (Offset != 0) // Indirect addressing. + // Indirect addressing is indicated by an Imm as the second parameter. + if (SD->isIndirect()) MIB.addImm(Offset); - else + else { + assert(Offset == 0 && "direct value cannot have an offset"); MIB.addReg(0U, RegState::Debug); + } MIB.addMetadata(MDPtr); @@ -738,19 +736,25 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, const MCInstrDesc &II = TII->get(Opc); unsigned NumResults = CountResults(Node); unsigned NumDefs = II.getNumDefs(); - const uint16_t *ScratchRegs = NULL; - - // Handle PATCHPOINT specially and then use the generic code. - if (Opc == TargetOpcode::PATCHPOINT) { - unsigned CC = Node->getConstantOperandVal(PatchPointOpers::CCPos); - NumDefs = NumResults; + const MCPhysReg *ScratchRegs = nullptr; + + // Handle STACKMAP and PATCHPOINT specially and then use the generic code. + if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) { + // Stackmaps do not have arguments and do not preserve their calling + // convention. However, to simplify runtime support, they clobber the same + // scratch registers as AnyRegCC. + unsigned CC = CallingConv::AnyReg; + if (Opc == TargetOpcode::PATCHPOINT) { + CC = Node->getConstantOperandVal(PatchPointOpers::CCPos); + NumDefs = NumResults; + } ScratchRegs = TLI->getScratchRegisters((CallingConv::ID) CC); } unsigned NumImpUses = 0; unsigned NodeOperands = countOperands(Node, II.getNumOperands() - NumDefs, NumImpUses); - bool HasPhysRegOuts = NumResults > NumDefs && II.getImplicitDefs()!=0; + bool HasPhysRegOuts = NumResults > NumDefs && II.getImplicitDefs()!=nullptr; #ifndef NDEBUG unsigned NumMIOperands = NodeOperands + NumResults; if (II.isVariadic()) @@ -976,7 +980,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, // The addressing mode has been selected, just add all of the // operands to the machine instruction. for (unsigned j = 0; j != NumVals; ++j, ++i) - AddOperand(MIB, Node->getOperand(i), 0, 0, VRBaseMap, + AddOperand(MIB, Node->getOperand(i), 0, nullptr, VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); // Manually set isTied bits. diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 9061ae9..16c5b4b 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -13,15 +13,16 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Triple.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" -#include "llvm/DebugInfo.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/LLVMContext.h" @@ -152,10 +153,10 @@ private: public: // DAGUpdateListener implementation. - virtual void NodeDeleted(SDNode *N, SDNode *E) { + void NodeDeleted(SDNode *N, SDNode *E) override { ForgetNode(N); } - virtual void NodeUpdated(SDNode *N) {} + void NodeUpdated(SDNode *N) override {} // Node replacement helpers void ReplacedNode(SDNode *N) { @@ -269,7 +270,7 @@ SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) { EVT OrigVT = VT; EVT SVT = VT; - while (SVT != MVT::f32) { + while (SVT != MVT::f32 && SVT != MVT::f16) { SVT = (MVT::SimpleValueType)(SVT.getSimpleVT().SimpleTy - 1); if (ConstantFPSDNode::isValueValidForType(SVT, CFP->getValueAPF()) && // Only do this if the target has a native EXTLOAD instruction from @@ -386,9 +387,7 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, MinAlign(ST->getAlignment(), Offset), ST->getTBAAInfo())); // The order of the stores doesn't matter - say it with a TokenFactor. - SDValue Result = - DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0], - Stores.size()); + SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); DAGLegalize->ReplaceNode(SDValue(ST, 0), Result); return; } @@ -505,8 +504,7 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, false, false, 0)); // The order of the stores doesn't matter - say it with a TokenFactor. - SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0], - Stores.size()); + SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); // Finally, perform the original load only redirected to the stack slot. Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase, @@ -704,7 +702,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { } } } - return SDValue(0, 0); + return SDValue(nullptr, 0); } void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { @@ -729,10 +727,11 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { MVT VT = Value.getSimpleValueType(); switch (TLI.getOperationAction(ISD::STORE, VT)) { default: llvm_unreachable("This action is not supported yet!"); - case TargetLowering::Legal: + case TargetLowering::Legal: { // If this is an unaligned store and the target doesn't support it, // expand it. - if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) { + unsigned AS = ST->getAddressSpace(); + if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT(), AS)) { Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment= TLI.getDataLayout()->getABITypeAlignment(Ty); if (ST->getAlignment() < ABIAlignment) @@ -740,6 +739,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { DAG, TLI, this); } break; + } case TargetLowering::Custom: { SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); if (Res.getNode()) @@ -807,7 +807,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { DAG.getConstant(IncrementSize, Ptr.getValueType())); Hi = DAG.getNode(ISD::SRL, dl, Value.getValueType(), Value, DAG.getConstant(RoundWidth, - TLI.getShiftAmountTy(Value.getValueType()))); + TLI.getShiftAmountTy(Value.getValueType()))); Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, isVolatile, isNonTemporal, @@ -818,7 +818,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { // Store the top RoundWidth bits. Hi = DAG.getNode(ISD::SRL, dl, Value.getValueType(), Value, DAG.getConstant(ExtraWidth, - TLI.getShiftAmountTy(Value.getValueType()))); + TLI.getShiftAmountTy(Value.getValueType()))); Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(), RoundVT, isVolatile, isNonTemporal, Alignment, TBAAInfo); @@ -826,7 +826,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { // Store the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, Ptr.getValueType())); + DAG.getConstant(IncrementSize, Ptr.getValueType())); Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, isVolatile, isNonTemporal, @@ -840,16 +840,18 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { switch (TLI.getTruncStoreAction(ST->getValue().getSimpleValueType(), StVT.getSimpleVT())) { default: llvm_unreachable("This action is not supported yet!"); - case TargetLowering::Legal: + case TargetLowering::Legal: { + unsigned AS = ST->getAddressSpace(); // If this is an unaligned store and the target doesn't support it, // expand it. - if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) { + if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT(), AS)) { Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment= TLI.getDataLayout()->getABITypeAlignment(Ty); if (ST->getAlignment() < ABIAlignment) ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this); } break; + } case TargetLowering::Custom: { SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); if (Res.getNode()) @@ -889,10 +891,11 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { switch (TLI.getOperationAction(Node->getOpcode(), VT)) { default: llvm_unreachable("This action is not supported yet!"); - case TargetLowering::Legal: + case TargetLowering::Legal: { + unsigned AS = LD->getAddressSpace(); // If this is an unaligned load and the target doesn't support it, // expand it. - if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) { + if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT(), AS)) { Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment = TLI.getDataLayout()->getABITypeAlignment(Ty); @@ -901,6 +904,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { } } break; + } case TargetLowering::Custom: { SDValue Res = TLI.LowerOperation(RVal, DAG); if (Res.getNode()) { @@ -1017,7 +1021,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { // Move the top bits to the right place. Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi, DAG.getConstant(RoundWidth, - TLI.getShiftAmountTy(Hi.getValueType()))); + TLI.getShiftAmountTy(Hi.getValueType()))); // Join the hi and lo parts. Value = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); @@ -1047,7 +1051,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { // Move the top bits to the right place. Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi, DAG.getConstant(ExtraWidth, - TLI.getShiftAmountTy(Hi.getValueType()))); + TLI.getShiftAmountTy(Hi.getValueType()))); // Join the hi and lo parts. Value = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); @@ -1059,77 +1063,82 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { switch (TLI.getLoadExtAction(ExtType, SrcVT.getSimpleVT())) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Custom: - isCustom = true; - // FALLTHROUGH + isCustom = true; + // FALLTHROUGH case TargetLowering::Legal: { - Value = SDValue(Node, 0); - Chain = SDValue(Node, 1); - - if (isCustom) { - SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); - if (Res.getNode()) { - Value = Res; - Chain = Res.getValue(1); - } - } else { - // If this is an unaligned load and the target doesn't support it, - // expand it. - if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) { - Type *Ty = - LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment = - TLI.getDataLayout()->getABITypeAlignment(Ty); - if (LD->getAlignment() < ABIAlignment){ - ExpandUnalignedLoad(cast<LoadSDNode>(Node), - DAG, TLI, Value, Chain); - } - } - } - break; + Value = SDValue(Node, 0); + Chain = SDValue(Node, 1); + + if (isCustom) { + SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); + if (Res.getNode()) { + Value = Res; + Chain = Res.getValue(1); + } + } else { + // If this is an unaligned load and the target doesn't support + // it, expand it. + EVT MemVT = LD->getMemoryVT(); + unsigned AS = LD->getAddressSpace(); + if (!TLI.allowsUnalignedMemoryAccesses(MemVT, AS)) { + Type *Ty = + LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); + unsigned ABIAlignment = + TLI.getDataLayout()->getABITypeAlignment(Ty); + if (LD->getAlignment() < ABIAlignment){ + ExpandUnalignedLoad(cast<LoadSDNode>(Node), + DAG, TLI, Value, Chain); + } + } + } + break; } case TargetLowering::Expand: - if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && TLI.isTypeLegal(SrcVT)) { - SDValue Load = DAG.getLoad(SrcVT, dl, Chain, Ptr, - LD->getMemOperand()); - unsigned ExtendOp; - switch (ExtType) { - case ISD::EXTLOAD: - ExtendOp = (SrcVT.isFloatingPoint() ? - ISD::FP_EXTEND : ISD::ANY_EXTEND); - break; - case ISD::SEXTLOAD: ExtendOp = ISD::SIGN_EXTEND; break; - case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break; - default: llvm_unreachable("Unexpected extend load type!"); - } - Value = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load); - Chain = Load.getValue(1); - break; - } - - assert(!SrcVT.isVector() && - "Vector Loads are handled in LegalizeVectorOps"); - - // FIXME: This does not work for vectors on most targets. Sign- and - // zero-extend operations are currently folded into extending loads, - // whether they are legal or not, and then we end up here without any - // support for legalizing them. - assert(ExtType != ISD::EXTLOAD && - "EXTLOAD should always be supported!"); - // Turn the unsupported load into an EXTLOAD followed by an explicit - // zero/sign extend inreg. - SDValue Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0), - Chain, Ptr, SrcVT, - LD->getMemOperand()); - SDValue ValRes; - if (ExtType == ISD::SEXTLOAD) - ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, - Result.getValueType(), - Result, DAG.getValueType(SrcVT)); - else - ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT.getScalarType()); - Value = ValRes; - Chain = Result.getValue(1); - break; + if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && + TLI.isTypeLegal(SrcVT)) { + SDValue Load = DAG.getLoad(SrcVT, dl, Chain, Ptr, + LD->getMemOperand()); + unsigned ExtendOp; + switch (ExtType) { + case ISD::EXTLOAD: + ExtendOp = (SrcVT.isFloatingPoint() ? + ISD::FP_EXTEND : ISD::ANY_EXTEND); + break; + case ISD::SEXTLOAD: ExtendOp = ISD::SIGN_EXTEND; break; + case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break; + default: llvm_unreachable("Unexpected extend load type!"); + } + Value = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load); + Chain = Load.getValue(1); + break; + } + + assert(!SrcVT.isVector() && + "Vector Loads are handled in LegalizeVectorOps"); + + // FIXME: This does not work for vectors on most targets. Sign- + // and zero-extend operations are currently folded into extending + // loads, whether they are legal or not, and then we end up here + // without any support for legalizing them. + assert(ExtType != ISD::EXTLOAD && + "EXTLOAD should always be supported!"); + // Turn the unsupported load into an EXTLOAD followed by an + // explicit zero/sign extend inreg. + SDValue Result = DAG.getExtLoad(ISD::EXTLOAD, dl, + Node->getValueType(0), + Chain, Ptr, SrcVT, + LD->getMemOperand()); + SDValue ValRes; + if (ExtType == ISD::SEXTLOAD) + ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, + Result.getValueType(), + Result, DAG.getValueType(SrcVT)); + else + ValRes = DAG.getZeroExtendInReg(Result, dl, + SrcVT.getScalarType()); + Value = ValRes; + Chain = Result.getValue(1); + break; } } @@ -1177,6 +1186,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { if (Action != TargetLowering::Promote) Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other); break; + case ISD::FP_TO_FP16: case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: case ISD::EXTRACT_VECTOR_ELT: @@ -1256,6 +1266,13 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { if (Action == TargetLowering::Legal) Action = TargetLowering::Custom; break; + case ISD::READ_REGISTER: + case ISD::WRITE_REGISTER: + // Named register is legal in the DAG, but blocked by register name + // selection if not implemented by target (to chose the correct register) + // They'll be converted to Copy(To/From)Reg. + Action = TargetLowering::Legal; + break; case ISD::DEBUGTRAP: Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); if (Action == TargetLowering::Expand) { @@ -1383,10 +1400,39 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { SDValue Vec = Op.getOperand(0); SDValue Idx = Op.getOperand(1); SDLoc dl(Op); - // Store the value to a temporary stack slot, then LOAD the returned part. - SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType()); - SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, - MachinePointerInfo(), false, false, 0); + + // Before we generate a new store to a temporary stack slot, see if there is + // already one that we can use. There often is because when we scalarize + // vector operations (using SelectionDAG::UnrollVectorOp for example) a whole + // series of EXTRACT_VECTOR_ELT nodes are generated, one for each element in + // the vector. If all are expanded here, we don't want one store per vector + // element. + SDValue StackPtr, Ch; + for (SDNode::use_iterator UI = Vec.getNode()->use_begin(), + UE = Vec.getNode()->use_end(); UI != UE; ++UI) { + SDNode *User = *UI; + if (StoreSDNode *ST = dyn_cast<StoreSDNode>(User)) { + if (ST->isIndexed() || ST->isTruncatingStore() || + ST->getValue() != Vec) + continue; + + // Make sure that nothing else could have stored into the destination of + // this store. + if (!ST->getChain().reachesChainWithoutSideEffects(DAG.getEntryNode())) + continue; + + StackPtr = ST->getBasePtr(); + Ch = SDValue(ST, 0); + break; + } + } + + if (!Ch.getNode()) { + // Store the value to a temporary stack slot, then LOAD the returned part. + StackPtr = DAG.CreateStackTemporary(Vec.getValueType()); + Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, + MachinePointerInfo(), false, false, 0); + } // Add the offset to the index. unsigned EltSize = @@ -1487,8 +1533,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { SDValue StoreChain; if (!Stores.empty()) // Not all undef elements? - StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &Stores[0], Stores.size()); + StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); else StoreChain = DAG.getEntryNode(); @@ -1530,9 +1575,8 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { // the pointer so that the loaded integer will contain the sign bit. unsigned Strides = (FloatVT.getSizeInBits()-1)/LoadTy.getSizeInBits(); unsigned ByteOffset = (Strides * LoadTy.getSizeInBits()) / 8; - LoadPtr = DAG.getNode(ISD::ADD, dl, LoadPtr.getValueType(), - LoadPtr, - DAG.getConstant(ByteOffset, LoadPtr.getValueType())); + LoadPtr = DAG.getNode(ISD::ADD, dl, LoadPtr.getValueType(), LoadPtr, + DAG.getConstant(ByteOffset, LoadPtr.getValueType())); // Load a legal integer containing the sign bit. SignBit = DAG.getLoad(LoadTy, dl, Ch, LoadPtr, MachinePointerInfo(), false, false, false, 0); @@ -1555,8 +1599,8 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { // Select between the nabs and abs value based on the sign bit of // the input. return DAG.getSelect(dl, AbsVal.getValueType(), SignBit, - DAG.getNode(ISD::FNEG, dl, AbsVal.getValueType(), AbsVal), - AbsVal); + DAG.getNode(ISD::FNEG, dl, AbsVal.getValueType(), AbsVal), + AbsVal); } void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, @@ -1609,8 +1653,8 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, /// If the SETCC has been legalized using the inverse condcode, then LHS and /// RHS will be unchanged, CC will set to the inverted condcode, and NeedInvert /// will be set to true. The caller must invert the result of the SETCC with -/// SelectionDAG::getNOT() or take equivalent action to swap the effect of a -/// true/false result. +/// SelectionDAG::getLogicalNOT() or take equivalent action to swap the effect +/// of a true/false result. /// /// \returns true if the SetCC has been legalized, false if it hasn't. bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, @@ -1776,6 +1820,98 @@ SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) { false, false, false, 0); } +static bool +ExpandBVWithShuffles(SDNode *Node, SelectionDAG &DAG, + const TargetLowering &TLI, SDValue &Res) { + unsigned NumElems = Node->getNumOperands(); + SDLoc dl(Node); + EVT VT = Node->getValueType(0); + + // Try to group the scalars into pairs, shuffle the pairs together, then + // shuffle the pairs of pairs together, etc. until the vector has + // been built. This will work only if all of the necessary shuffle masks + // are legal. + + // We do this in two phases; first to check the legality of the shuffles, + // and next, assuming that all shuffles are legal, to create the new nodes. + for (int Phase = 0; Phase < 2; ++Phase) { + SmallVector<std::pair<SDValue, SmallVector<int, 16> >, 16> IntermedVals, + NewIntermedVals; + for (unsigned i = 0; i < NumElems; ++i) { + SDValue V = Node->getOperand(i); + if (V.getOpcode() == ISD::UNDEF) + continue; + + SDValue Vec; + if (Phase) + Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, V); + IntermedVals.push_back(std::make_pair(Vec, SmallVector<int, 16>(1, i))); + } + + while (IntermedVals.size() > 2) { + NewIntermedVals.clear(); + for (unsigned i = 0, e = (IntermedVals.size() & ~1u); i < e; i += 2) { + // This vector and the next vector are shuffled together (simply to + // append the one to the other). + SmallVector<int, 16> ShuffleVec(NumElems, -1); + + SmallVector<int, 16> FinalIndices; + FinalIndices.reserve(IntermedVals[i].second.size() + + IntermedVals[i+1].second.size()); + + int k = 0; + for (unsigned j = 0, f = IntermedVals[i].second.size(); j != f; + ++j, ++k) { + ShuffleVec[k] = j; + FinalIndices.push_back(IntermedVals[i].second[j]); + } + for (unsigned j = 0, f = IntermedVals[i+1].second.size(); j != f; + ++j, ++k) { + ShuffleVec[k] = NumElems + j; + FinalIndices.push_back(IntermedVals[i+1].second[j]); + } + + SDValue Shuffle; + if (Phase) + Shuffle = DAG.getVectorShuffle(VT, dl, IntermedVals[i].first, + IntermedVals[i+1].first, + ShuffleVec.data()); + else if (!TLI.isShuffleMaskLegal(ShuffleVec, VT)) + return false; + NewIntermedVals.push_back(std::make_pair(Shuffle, FinalIndices)); + } + + // If we had an odd number of defined values, then append the last + // element to the array of new vectors. + if ((IntermedVals.size() & 1) != 0) + NewIntermedVals.push_back(IntermedVals.back()); + + IntermedVals.swap(NewIntermedVals); + } + + assert(IntermedVals.size() <= 2 && IntermedVals.size() > 0 && + "Invalid number of intermediate vectors"); + SDValue Vec1 = IntermedVals[0].first; + SDValue Vec2; + if (IntermedVals.size() > 1) + Vec2 = IntermedVals[1].first; + else if (Phase) + Vec2 = DAG.getUNDEF(VT); + + SmallVector<int, 16> ShuffleVec(NumElems, -1); + for (unsigned i = 0, e = IntermedVals[0].second.size(); i != e; ++i) + ShuffleVec[IntermedVals[0].second[i]] = i; + for (unsigned i = 0, e = IntermedVals[1].second.size(); i != e; ++i) + ShuffleVec[IntermedVals[1].second[i]] = NumElems + i; + + if (Phase) + Res = DAG.getVectorShuffle(VT, dl, Vec1, Vec2, ShuffleVec.data()); + else if (!TLI.isShuffleMaskLegal(ShuffleVec, VT)) + return false; + } + + return true; +} /// ExpandBUILD_VECTOR - Expand a BUILD_VECTOR node on targets that don't /// support the operation, but do support the resultant vector type. @@ -1850,25 +1986,38 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { false, false, false, Alignment); } - if (!MoreThanTwoValues) { - SmallVector<int, 8> ShuffleVec(NumElems, -1); - for (unsigned i = 0; i < NumElems; ++i) { - SDValue V = Node->getOperand(i); - if (V.getOpcode() == ISD::UNDEF) - continue; - ShuffleVec[i] = V == Value1 ? 0 : NumElems; - } - if (TLI.isShuffleMaskLegal(ShuffleVec, Node->getValueType(0))) { - // Get the splatted value into the low element of a vector register. - SDValue Vec1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value1); - SDValue Vec2; - if (Value2.getNode()) - Vec2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value2); - else - Vec2 = DAG.getUNDEF(VT); + SmallSet<SDValue, 16> DefinedValues; + for (unsigned i = 0; i < NumElems; ++i) { + if (Node->getOperand(i).getOpcode() == ISD::UNDEF) + continue; + DefinedValues.insert(Node->getOperand(i)); + } - // Return shuffle(LowValVec, undef, <0,0,0,0>) - return DAG.getVectorShuffle(VT, dl, Vec1, Vec2, ShuffleVec.data()); + if (TLI.shouldExpandBuildVectorWithShuffles(VT, DefinedValues.size())) { + if (!MoreThanTwoValues) { + SmallVector<int, 8> ShuffleVec(NumElems, -1); + for (unsigned i = 0; i < NumElems; ++i) { + SDValue V = Node->getOperand(i); + if (V.getOpcode() == ISD::UNDEF) + continue; + ShuffleVec[i] = V == Value1 ? 0 : NumElems; + } + if (TLI.isShuffleMaskLegal(ShuffleVec, Node->getValueType(0))) { + // Get the splatted value into the low element of a vector register. + SDValue Vec1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value1); + SDValue Vec2; + if (Value2.getNode()) + Vec2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value2); + else + Vec2 = DAG.getUNDEF(VT); + + // Return shuffle(LowValVec, undef, <0,0,0,0>) + return DAG.getVectorShuffle(VT, dl, Vec1, Vec2, ShuffleVec.data()); + } + } else { + SDValue Res; + if (ExpandBVWithShuffles(Node, DAG, TLI, Res)) + return Res; } } @@ -1910,13 +2059,12 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, if (isTailCall) InChain = TCChain; - TargetLowering:: - CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, false, - 0, TLI.getLibcallCallingConv(LC), isTailCall, - /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, - Callee, Args, DAG, SDLoc(Node)); - std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(SDLoc(Node)).setChain(InChain) + .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0) + .setTailCall(isTailCall).setSExtResult(isSigned).setZExtResult(!isSigned); + std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); if (!CallInfo.second.getNode()) // It's a tailcall, return the chain (which is the DAG root). @@ -1945,12 +2093,12 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, TLI.getPointerTy()); Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); - TargetLowering:: - CallLoweringInfo CLI(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false, - false, 0, TLI.getLibcallCallingConv(LC), - /*isTailCall=*/false, - /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, - Callee, Args, DAG, dl); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(DAG.getEntryNode()) + .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0) + .setSExtResult(isSigned).setZExtResult(!isSigned); + std::pair<SDValue,SDValue> CallInfo = TLI.LowerCallTo(CLI); return CallInfo.first; @@ -1979,11 +2127,12 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC, TLI.getPointerTy()); Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); - TargetLowering:: - CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, false, - 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, - /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, - Callee, Args, DAG, SDLoc(Node)); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(SDLoc(Node)).setChain(InChain) + .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0) + .setSExtResult(isSigned).setZExtResult(!isSigned); + std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); return CallInfo; @@ -2038,7 +2187,7 @@ static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break; } - return TLI.getLibcallName(LC) != 0; + return TLI.getLibcallName(LC) != nullptr; } /// useDivRem - Only issue divrem libcall if both quotient and remainder are @@ -2116,11 +2265,11 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, TLI.getPointerTy()); SDLoc dl(Node); - TargetLowering:: - CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, false, - 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, - /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, - Callee, Args, DAG, dl); + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(InChain) + .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0) + .setSExtResult(isSigned).setZExtResult(!isSigned); + std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); // Remainder is loaded back from the stack frame. @@ -2141,7 +2290,7 @@ static bool isSinCosLibcallAvailable(SDNode *Node, const TargetLowering &TLI) { case MVT::f128: LC = RTLIB::SINCOS_F128; break; case MVT::ppcf128: LC = RTLIB::SINCOS_PPCF128; break; } - return TLI.getLibcallName(LC) != 0; + return TLI.getLibcallName(LC) != nullptr; } /// canCombineSinCosLibcall - Return true if sincos libcall is available and @@ -2230,12 +2379,11 @@ SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node, TLI.getPointerTy()); SDLoc dl(Node); - TargetLowering:: - CallLoweringInfo CLI(InChain, Type::getVoidTy(*DAG.getContext()), - false, false, false, false, - 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, - /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, - Callee, Args, DAG, dl); + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(InChain) + .setCallee(TLI.getLibcallCallingConv(LC), + Type::getVoidTy(*DAG.getContext()), Callee, std::move(Args), 0); + std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); Results.push_back(DAG.getLoad(RetVT, dl, CallInfo.second, SinPtr, @@ -2503,12 +2651,15 @@ SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, NewOutTy = (MVT::SimpleValueType)(NewOutTy.getSimpleVT().SimpleTy+1); assert(NewOutTy.isInteger() && "Ran out of possibilities!"); + // A larger signed type can hold all unsigned values of the requested type, + // so using FP_TO_SINT is valid if (TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NewOutTy)) { OpToUse = ISD::FP_TO_SINT; break; } - if (TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NewOutTy)) { + // However, if the value may be < 0.0, we *must* use some FP_TO_SINT. + if (!isSigned && TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NewOutTy)) { OpToUse = ISD::FP_TO_UINT; break; } @@ -2845,15 +2996,13 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { // If the target didn't lower this, lower it to '__sync_synchronize()' call // FIXME: handle "fence singlethread" more efficiently. TargetLowering::ArgListTy Args; - TargetLowering:: - CallLoweringInfo CLI(Node->getOperand(0), - Type::getVoidTy(*DAG.getContext()), - false, false, false, false, 0, CallingConv::C, - /*isTailCall=*/false, - /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, - DAG.getExternalSymbol("__sync_synchronize", - TLI.getPointerTy()), - Args, DAG, dl); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(Node->getOperand(0)) + .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), + DAG.getExternalSymbol("__sync_synchronize", + TLI.getPointerTy()), std::move(Args), 0); + std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); Results.push_back(CallResult.second); @@ -2862,13 +3011,14 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { case ISD::ATOMIC_LOAD: { // There is no libcall for atomic load; fake it with ATOMIC_CMP_SWAP. SDValue Zero = DAG.getConstant(0, Node->getValueType(0)); - SDValue Swap = DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl, - cast<AtomicSDNode>(Node)->getMemoryVT(), - Node->getOperand(0), - Node->getOperand(1), Zero, Zero, - cast<AtomicSDNode>(Node)->getMemOperand(), - cast<AtomicSDNode>(Node)->getOrdering(), - cast<AtomicSDNode>(Node)->getSynchScope()); + SDVTList VTs = DAG.getVTList(Node->getValueType(0), MVT::Other); + SDValue Swap = DAG.getAtomicCmpSwap( + ISD::ATOMIC_CMP_SWAP, dl, cast<AtomicSDNode>(Node)->getMemoryVT(), VTs, + Node->getOperand(0), Node->getOperand(1), Zero, Zero, + cast<AtomicSDNode>(Node)->getMemOperand(), + cast<AtomicSDNode>(Node)->getOrdering(), + cast<AtomicSDNode>(Node)->getOrdering(), + cast<AtomicSDNode>(Node)->getSynchScope()); Results.push_back(Swap.getValue(0)); Results.push_back(Swap.getValue(1)); break; @@ -2905,6 +3055,27 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(Tmp.second); break; } + case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: { + // Expanding an ATOMIC_CMP_SWAP_WITH_SUCCESS produces an ATOMIC_CMP_SWAP and + // splits out the success value as a comparison. Expanding the resulting + // ATOMIC_CMP_SWAP will produce a libcall. + SDVTList VTs = DAG.getVTList(Node->getValueType(0), MVT::Other); + SDValue Res = DAG.getAtomicCmpSwap( + ISD::ATOMIC_CMP_SWAP, dl, cast<AtomicSDNode>(Node)->getMemoryVT(), VTs, + Node->getOperand(0), Node->getOperand(1), Node->getOperand(2), + Node->getOperand(3), cast<MemSDNode>(Node)->getMemOperand(), + cast<AtomicSDNode>(Node)->getSuccessOrdering(), + cast<AtomicSDNode>(Node)->getFailureOrdering(), + cast<AtomicSDNode>(Node)->getSynchScope()); + + SDValue Success = DAG.getSetCC(SDLoc(Node), Node->getValueType(1), + Res, Node->getOperand(2), ISD::SETEQ); + + Results.push_back(Res.getValue(0)); + Results.push_back(Success); + Results.push_back(Res.getValue(1)); + break; + } case ISD::DYNAMIC_STACKALLOC: ExpandDYNAMIC_STACKALLOC(Node, Results); break; @@ -2925,14 +3096,11 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { case ISD::TRAP: { // If this operation is not supported, lower it to 'abort()' call TargetLowering::ArgListTy Args; - TargetLowering:: - CallLoweringInfo CLI(Node->getOperand(0), - Type::getVoidTy(*DAG.getContext()), - false, false, false, false, 0, CallingConv::C, - /*isTailCall=*/false, - /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, - DAG.getExternalSymbol("abort", TLI.getPointerTy()), - Args, DAG, dl); + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(Node->getOperand(0)) + .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), + DAG.getExternalSymbol("abort", TLI.getPointerTy()), + std::move(Args), 0); std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); Results.push_back(CallResult.second); @@ -2986,6 +3154,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Node->getOperand(0), Node->getValueType(0), dl); Results.push_back(Tmp1); break; + case ISD::FP_TO_SINT: + if (TLI.expandFP_TO_SINT(Node, Tmp1, DAG)) + Results.push_back(Tmp1); + break; case ISD::FP_TO_UINT: { SDValue True, False; EVT VT = Node->getOperand(0).getValueType(); @@ -3099,7 +3271,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { EVT NewEltVT = TLI.getTypeToTransformTo(*DAG.getContext(), EltVT); // BUILD_VECTOR operands are allowed to be wider than the element type. - // But if NewEltVT is smaller that EltVT the BUILD_VECTOR does not accept it + // But if NewEltVT is smaller that EltVT the BUILD_VECTOR does not accept + // it. if (NewEltVT.bitsLT(EltVT)) { // Convert shuffle node. @@ -3107,8 +3280,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { // cast operands to v8i32 and re-build the mask. // Calculate new VT, the size of the new VT should be equal to original. - EVT NewVT = EVT::getVectorVT(*DAG.getContext(), NewEltVT, - VT.getSizeInBits()/NewEltVT.getSizeInBits()); + EVT NewVT = + EVT::getVectorVT(*DAG.getContext(), NewEltVT, + VT.getSizeInBits() / NewEltVT.getSizeInBits()); assert(NewVT.bitsEq(VT)); // cast operands to new VT @@ -3116,7 +3290,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Op1 = DAG.getNode(ISD::BITCAST, dl, NewVT, Op1); // Convert the shuffle mask - unsigned int factor = NewVT.getVectorNumElements()/VT.getVectorNumElements(); + unsigned int factor = + NewVT.getVectorNumElements()/VT.getVectorNumElements(); // EltVT gets smaller assert(factor > 0); @@ -3155,7 +3330,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { TLI.getVectorIdxTy()))); } - Tmp1 = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size()); + Tmp1 = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); // We may have changed the BUILD_VECTOR type. Cast it back to the Node type. Tmp1 = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0), Tmp1); Results.push_back(Tmp1); @@ -3339,12 +3514,28 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { RTLIB::FMA_F80, RTLIB::FMA_F128, RTLIB::FMA_PPCF128)); break; - case ISD::FP16_TO_FP32: - Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false)); + case ISD::FP16_TO_FP: { + if (Node->getValueType(0) == MVT::f32) { + Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false)); + break; + } + + // We can extend to types bigger than f32 in two steps without changing the + // result. Since "f16 -> f32" is much more commonly available, give CodeGen + // the option of emitting that before resorting to a libcall. + SDValue Res = + DAG.getNode(ISD::FP16_TO_FP, dl, MVT::f32, Node->getOperand(0)); + Results.push_back( + DAG.getNode(ISD::FP_EXTEND, dl, Node->getValueType(0), Res)); break; - case ISD::FP32_TO_FP16: - Results.push_back(ExpandLibCall(RTLIB::FPROUND_F32_F16, Node, false)); + } + case ISD::FP_TO_FP16: { + RTLIB::Libcall LC = + RTLIB::getFPROUND(Node->getOperand(0).getValueType(), MVT::f16); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unable to expand fp_to_fp16"); + Results.push_back(ExpandLibCall(LC, Node, false)); break; + } case ISD::ConstantFP: { ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Node); // Check to see if this FP immediate is already legal. @@ -3476,6 +3667,23 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Node->getOperand(1))); break; } + + SDValue Lo, Hi; + EVT HalfType = VT.getHalfSizedIntegerVT(*DAG.getContext()); + if (TLI.isOperationLegalOrCustom(ISD::ZERO_EXTEND, VT) && + TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND, VT) && + TLI.isOperationLegalOrCustom(ISD::SHL, VT) && + TLI.isOperationLegalOrCustom(ISD::OR, VT) && + TLI.expandMUL(Node, Lo, Hi, HalfType, DAG)) { + Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo); + Hi = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Hi); + SDValue Shift = DAG.getConstant(HalfType.getSizeInBits(), + TLI.getShiftAmountTy(HalfType)); + Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift); + Results.push_back(DAG.getNode(ISD::OR, dl, VT, Lo, Hi)); + break; + } + Tmp1 = ExpandIntLibCall(Node, false, RTLIB::MUL_I8, RTLIB::MUL_I16, RTLIB::MUL_I32, @@ -3491,7 +3699,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { ISD::ADD : ISD::SUB, dl, LHS.getValueType(), LHS, RHS); Results.push_back(Sum); - EVT OType = Node->getValueType(1); + EVT ResultType = Node->getValueType(1); + EVT OType = getSetCCResultType(Node->getValueType(0)); SDValue Zero = DAG.getConstant(0, LHS.getValueType()); @@ -3514,7 +3723,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE); SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE); - Results.push_back(Cmp); + Results.push_back(DAG.getBoolExtOrTrunc(Cmp, dl, ResultType, ResultType)); break; } case ISD::UADDO: @@ -3525,9 +3734,14 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { ISD::ADD : ISD::SUB, dl, LHS.getValueType(), LHS, RHS); Results.push_back(Sum); - Results.push_back(DAG.getSetCC(dl, Node->getValueType(1), Sum, LHS, - Node->getOpcode () == ISD::UADDO ? - ISD::SETULT : ISD::SETUGT)); + + EVT ResultType = Node->getValueType(1); + EVT SetCCType = getSetCCResultType(Node->getValueType(0)); + ISD::CondCode CC + = Node->getOpcode() == ISD::UADDO ? ISD::SETULT : ISD::SETUGT; + SDValue SetCC = DAG.getSetCC(dl, SetCCType, Sum, LHS, CC); + + Results.push_back(DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType)); break; } case ISD::UMULO: @@ -3549,8 +3763,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS, RHS); TopHalf = BottomHalf.getValue(1); - } else if (TLI.isTypeLegal(EVT::getIntegerVT(*DAG.getContext(), - VT.getSizeInBits() * 2))) { + } else if (TLI.isTypeLegal(WideVT)) { LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS); RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS); Tmp1 = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS); @@ -3708,7 +3921,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { // If we expanded the SETCC by inverting the condition code, then wrap // the existing SETCC in a NOT to restore the intended condition. if (NeedInvert) - Tmp1 = DAG.getNOT(dl, Tmp1, Tmp1->getValueType(0)); + Tmp1 = DAG.getLogicalNOT(dl, Tmp1, Tmp1->getValueType(0)); Results.push_back(Tmp1); break; @@ -3718,7 +3931,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { // illegal; expand it into a SELECT_CC. EVT VT = Node->getValueType(0); int TrueValue; - switch (TLI.getBooleanContents(VT.isVector())) { + switch (TLI.getBooleanContents(Tmp1->getValueType(0))) { case TargetLowering::ZeroOrOneBooleanContent: case TargetLowering::UndefinedBooleanContent: TrueValue = 1; @@ -3738,13 +3951,29 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Tmp2 = Node->getOperand(1); // RHS Tmp3 = Node->getOperand(2); // True Tmp4 = Node->getOperand(3); // False + EVT VT = Node->getValueType(0); SDValue CC = Node->getOperand(4); + ISD::CondCode CCOp = cast<CondCodeSDNode>(CC)->get(); + + if (TLI.isCondCodeLegal(CCOp, Tmp1.getSimpleValueType())) { + // If the condition code is legal, then we need to expand this + // node using SETCC and SELECT. + EVT CmpVT = Tmp1.getValueType(); + assert(!TLI.isOperationExpand(ISD::SELECT, VT) && + "Cannot expand ISD::SELECT_CC when ISD::SELECT also needs to be " + "expanded."); + EVT CCVT = TLI.getSetCCResultType(*DAG.getContext(), CmpVT); + SDValue Cond = DAG.getNode(ISD::SETCC, dl, CCVT, Tmp1, Tmp2, CC); + Results.push_back(DAG.getSelect(dl, VT, Cond, Tmp3, Tmp4)); + break; + } + // SELECT_CC is legal, so the condition code must not be. bool Legalized = false; // Try to legalize by inverting the condition. This is for targets that // might support an ordered version of a condition, but not the unordered // version (or vice versa). - ISD::CondCode InvCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(), + ISD::CondCode InvCC = ISD::getSetCCInverse(CCOp, Tmp1.getValueType().isInteger()); if (TLI.isCondCodeLegal(InvCC, Tmp1.getSimpleValueType())) { // Use the new condition code and swap true and false @@ -3782,8 +4011,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { } else { Tmp2 = DAG.getConstant(0, Tmp1.getValueType()); CC = DAG.getCondCode(ISD::SETNE); - Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1, Tmp2, - Tmp3, Tmp4, CC); + Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1, + Tmp2, Tmp3, Tmp4, CC); } } Results.push_back(Tmp1); @@ -3813,8 +4042,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { } else { Tmp3 = DAG.getConstant(0, Tmp2.getValueType()); Tmp4 = DAG.getCondCode(ISD::SETNE); - Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4, Tmp2, - Tmp3, Node->getOperand(4)); + Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4, + Tmp2, Tmp3, Node->getOperand(4)); } Results.push_back(Tmp1); break; @@ -3845,8 +4074,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { VT.getScalarType(), Ex, Sh)); } SDValue Result = - DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), - &Scalars[0], Scalars.size()); + DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), Scalars); ReplaceNode(SDValue(Node, 0), Result); break; } @@ -3976,7 +4204,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { } case ISD::SELECT: { unsigned ExtOp, TruncOp; - if (Node->getValueType(0).isVector()) { + if (Node->getValueType(0).isVector() || + Node->getValueType(0).getSizeInBits() == NVT.getSizeInBits()) { ExtOp = ISD::BITCAST; TruncOp = ISD::BITCAST; } else if (Node->getValueType(0).isInteger()) { diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index ecf4c5d..649dd7a 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -24,6 +24,8 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; +#define DEBUG_TYPE "legalize-types" + /// GetFPLibCall - Return the right libcall for the given floating point type. static RTLIB::Libcall GetFPLibCall(EVT VT, RTLIB::Libcall Call_F32, @@ -83,7 +85,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::FNEG: R = SoftenFloatRes_FNEG(N); break; case ISD::FP_EXTEND: R = SoftenFloatRes_FP_EXTEND(N); break; case ISD::FP_ROUND: R = SoftenFloatRes_FP_ROUND(N); break; - case ISD::FP16_TO_FP32:R = SoftenFloatRes_FP16_TO_FP32(N); break; + case ISD::FP16_TO_FP: R = SoftenFloatRes_FP16_TO_FP(N); break; case ISD::FPOW: R = SoftenFloatRes_FPOW(N); break; case ISD::FPOWI: R = SoftenFloatRes_FPOWI(N); break; case ISD::FREM: R = SoftenFloatRes_FREM(N); break; @@ -371,6 +373,13 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) { SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = N->getOperand(0); + + // There's only a libcall for f16 -> f32, so proceed in two stages. Also, it's + // entirely possible for both f16 and f32 to be legal, so use the fully + // hard-float FP_EXTEND rather than FP16_TO_FP. + if (Op.getValueType() == MVT::f16 && N->getValueType(0) != MVT::f32) + Op = DAG.getNode(ISD::FP_EXTEND, SDLoc(N), MVT::f32, Op); + RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0)); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!"); return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, SDLoc(N)).first; @@ -378,16 +387,29 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { // FIXME: Should we just use 'normal' FP_EXTEND / FP_TRUNC instead of special // nodes? -SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP32(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); +SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP(SDNode *N) { + EVT MidVT = TLI.getTypeToTransformTo(*DAG.getContext(), MVT::f32); SDValue Op = N->getOperand(0); - return TLI.makeLibCall(DAG, RTLIB::FPEXT_F16_F32, NVT, &Op, 1, false, - SDLoc(N)).first; + SDValue Res32 = TLI.makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MidVT, &Op, 1, + false, SDLoc(N)).first; + if (N->getValueType(0) == MVT::f32) + return Res32; + + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + RTLIB::Libcall LC = RTLIB::getFPEXT(MVT::f32, N->getValueType(0)); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!"); + return TLI.makeLibCall(DAG, LC, NVT, &Res32, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = N->getOperand(0); + if (N->getValueType(0) == MVT::f16) { + // Semi-soften first, to FP_TO_FP16, so that targets which support f16 as a + // storage-only type get a chance to select things. + return DAG.getNode(ISD::FP_TO_FP16, SDLoc(N), NVT, Op); + } + RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0)); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!"); return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, SDLoc(N)).first; @@ -496,6 +518,9 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) { SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + if (N->getValueType(0) == MVT::f16) + return DAG.getNode(ISD::FP_TO_FP16, SDLoc(N), NVT, N->getOperand(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::TRUNC_F32, @@ -623,10 +648,11 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { case ISD::BITCAST: Res = SoftenFloatOp_BITCAST(N); break; case ISD::BR_CC: Res = SoftenFloatOp_BR_CC(N); break; + case ISD::FP_EXTEND: Res = SoftenFloatOp_FP_EXTEND(N); break; + case ISD::FP_TO_FP16: // Same as FP_ROUND for softening purposes case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break; case ISD::FP_TO_SINT: Res = SoftenFloatOp_FP_TO_SINT(N); break; case ISD::FP_TO_UINT: Res = SoftenFloatOp_FP_TO_UINT(N); break; - case ISD::FP32_TO_FP16:Res = SoftenFloatOp_FP32_TO_FP16(N); break; case ISD::SELECT_CC: Res = SoftenFloatOp_SELECT_CC(N); break; case ISD::SETCC: Res = SoftenFloatOp_SETCC(N); break; case ISD::STORE: Res = SoftenFloatOp_STORE(N, OpNo); break; @@ -652,11 +678,32 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BITCAST(SDNode *N) { GetSoftenedFloat(N->getOperand(0))); } +SDValue DAGTypeLegalizer::SoftenFloatOp_FP_EXTEND(SDNode *N) { + // If we get here, the result must be legal but the source illegal. + EVT SVT = N->getOperand(0).getValueType(); + EVT RVT = N->getValueType(0); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + + if (SVT == MVT::f16) + return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), RVT, Op); + + RTLIB::Libcall LC = RTLIB::getFPEXT(SVT, RVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND libcall"); + + return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first; +} + + SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) { + // We actually deal with the partially-softened FP_TO_FP16 node too, which + // returns an i16 so doesn't meet the constraints necessary for FP_ROUND. + assert(N->getOpcode() == ISD::FP_ROUND || N->getOpcode() == ISD::FP_TO_FP16); + EVT SVT = N->getOperand(0).getValueType(); EVT RVT = N->getValueType(0); + EVT FloatRVT = N->getOpcode() == ISD::FP_TO_FP16 ? MVT::f16 : RVT; - RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, RVT); + RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, FloatRVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall"); SDValue Op = GetSoftenedFloat(N->getOperand(0)); @@ -674,7 +721,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) { // If softenSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. - if (NewRHS.getNode() == 0) { + if (!NewRHS.getNode()) { NewRHS = DAG.getConstant(0, NewLHS.getValueType()); CCCode = ISD::SETNE; } @@ -702,13 +749,6 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_UINT(SDNode *N) { return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first; } -SDValue DAGTypeLegalizer::SoftenFloatOp_FP32_TO_FP16(SDNode *N) { - EVT RVT = N->getValueType(0); - RTLIB::Libcall LC = RTLIB::FPROUND_F32_F16; - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first; -} - SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) { SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get(); @@ -720,7 +760,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) { // If softenSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. - if (NewRHS.getNode() == 0) { + if (!NewRHS.getNode()) { NewRHS = DAG.getConstant(0, NewLHS.getValueType()); CCCode = ISD::SETNE; } @@ -742,7 +782,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_SETCC(SDNode *N) { TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N)); // If softenSetCCOperands returned a scalar, use it. - if (NewRHS.getNode() == 0) { + if (!NewRHS.getNode()) { assert(NewLHS.getValueType() == N->getValueType(0) && "Unexpected setcc expansion!"); return NewLHS; @@ -1340,7 +1380,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_BR_CC(SDNode *N) { // If ExpandSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. - if (NewRHS.getNode() == 0) { + if (!NewRHS.getNode()) { NewRHS = DAG.getConstant(0, NewLHS.getValueType()); CCCode = ISD::SETNE; } @@ -1433,7 +1473,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) { // If ExpandSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. - if (NewRHS.getNode() == 0) { + if (!NewRHS.getNode()) { NewRHS = DAG.getConstant(0, NewLHS.getValueType()); CCCode = ISD::SETNE; } @@ -1450,7 +1490,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_SETCC(SDNode *N) { FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N)); // If ExpandSetCCOperands returned a scalar, use it. - if (NewRHS.getNode() == 0) { + if (!NewRHS.getNode()) { assert(NewLHS.getValueType() == N->getValueType(0) && "Unexpected setcc expansion!"); return NewLHS; diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 4255948..44d9e38 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -24,6 +24,8 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; +#define DEBUG_TYPE "legalize-types" + //===----------------------------------------------------------------------===// // Integer Result Promotion //===----------------------------------------------------------------------===// @@ -97,7 +99,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: Res = PromoteIntRes_FP_TO_XINT(N); break; - case ISD::FP32_TO_FP16:Res = PromoteIntRes_FP32_TO_FP16(N); break; + case ISD::FP_TO_FP16: Res = PromoteIntRes_FP_TO_FP16(N); break; case ISD::AND: case ISD::OR: @@ -136,7 +138,9 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { Res = PromoteIntRes_Atomic1(cast<AtomicSDNode>(N)); break; case ISD::ATOMIC_CMP_SWAP: - Res = PromoteIntRes_Atomic2(cast<AtomicSDNode>(N)); break; + case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: + Res = PromoteIntRes_AtomicCmpSwap(cast<AtomicSDNode>(N), ResNo); + break; } // If the result is null then the sub-method took care of registering it. @@ -190,16 +194,40 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) { return Res; } -SDValue DAGTypeLegalizer::PromoteIntRes_Atomic2(AtomicSDNode *N) { +SDValue DAGTypeLegalizer::PromoteIntRes_AtomicCmpSwap(AtomicSDNode *N, + unsigned ResNo) { + if (ResNo == 1) { + assert(N->getOpcode() == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS); + EVT SVT = getSetCCResultType(N->getOperand(2).getValueType()); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(1)); + + // Only use the result of getSetCCResultType if it is legal, + // otherwise just use the promoted result type (NVT). + if (!TLI.isTypeLegal(SVT)) + SVT = NVT; + + SDVTList VTs = DAG.getVTList(N->getValueType(0), SVT, MVT::Other); + SDValue Res = DAG.getAtomicCmpSwap( + ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, SDLoc(N), N->getMemoryVT(), VTs, + N->getChain(), N->getBasePtr(), N->getOperand(2), N->getOperand(3), + N->getMemOperand(), N->getSuccessOrdering(), N->getFailureOrdering(), + N->getSynchScope()); + ReplaceValueWith(SDValue(N, 0), Res.getValue(0)); + ReplaceValueWith(SDValue(N, 2), Res.getValue(2)); + return Res.getValue(1); + } + SDValue Op2 = GetPromotedInteger(N->getOperand(2)); SDValue Op3 = GetPromotedInteger(N->getOperand(3)); - SDValue Res = DAG.getAtomic(N->getOpcode(), SDLoc(N), - N->getMemoryVT(), N->getChain(), N->getBasePtr(), - Op2, Op3, N->getMemOperand(), N->getOrdering(), - N->getSynchScope()); - // Legalized the chain result - switch anything that used the old chain to - // use the new one. - ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + SDVTList VTs = + DAG.getVTList(Op2.getValueType(), N->getValueType(1), MVT::Other); + SDValue Res = DAG.getAtomicCmpSwap( + N->getOpcode(), SDLoc(N), N->getMemoryVT(), VTs, N->getChain(), + N->getBasePtr(), Op2, Op3, N->getMemOperand(), N->getSuccessOrdering(), + N->getFailureOrdering(), N->getSynchScope()); + // Update the use to N with the newly created Res. + for (unsigned i = 1, NumResults = N->getNumValues(); i < NumResults; ++i) + ReplaceValueWith(SDValue(N, i), Res.getValue(i)); return Res; } @@ -266,9 +294,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) { EVT NVT = Op.getValueType(); SDLoc dl(N); - unsigned DiffBits = NVT.getSizeInBits() - OVT.getSizeInBits(); + unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(); return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op), - DAG.getConstant(DiffBits, TLI.getPointerTy())); + DAG.getConstant(DiffBits, TLI.getShiftAmountTy(NVT))); } SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) { @@ -372,7 +400,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) { DAG.getValueType(N->getValueType(0).getScalarType())); } -SDValue DAGTypeLegalizer::PromoteIntRes_FP32_TO_FP16(SDNode *N) { +SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_FP16(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDLoc dl(N); @@ -432,7 +460,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) { EVT ValueVTs[] = { N->getValueType(0), NVT }; SDValue Ops[] = { N->getOperand(0), N->getOperand(1) }; SDValue Res = DAG.getNode(N->getOpcode(), SDLoc(N), - DAG.getVTList(ValueVTs, 2), Ops, 2); + DAG.getVTList(ValueVTs), Ops); // Modified the sum result - switch anything that used the old sum to use // the new one. @@ -490,7 +518,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VSELECT(SDNode *N) { EVT OpTy = N->getOperand(1).getValueType(); // Promote all the way up to the canonical SetCC type. - Mask = PromoteTargetBoolean(Mask, getSetCCResultType(OpTy)); + Mask = PromoteTargetBoolean(Mask, OpTy); SDValue LHS = GetPromotedInteger(N->getOperand(1)); SDValue RHS = GetPromotedInteger(N->getOperand(2)); return DAG.getNode(ISD::VSELECT, SDLoc(N), @@ -797,7 +825,7 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::STORE: Res = PromoteIntOp_STORE(cast<StoreSDNode>(N), OpNo); break; case ISD::TRUNCATE: Res = PromoteIntOp_TRUNCATE(N); break; - case ISD::FP16_TO_FP32: + case ISD::FP16_TO_FP: case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break; case ISD::ZERO_EXTEND: Res = PromoteIntOp_ZERO_EXTEND(N); break; @@ -890,8 +918,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo) { assert(OpNo == 1 && "only know how to promote condition"); // Promote all the way up to the canonical SetCC type. - EVT SVT = getSetCCResultType(MVT::Other); - SDValue Cond = PromoteTargetBoolean(N->getOperand(1), SVT); + SDValue Cond = PromoteTargetBoolean(N->getOperand(1), MVT::Other); // The chain (Op#0) and basic block destination (Op#2) are always legal types. return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Cond, @@ -931,7 +958,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_VECTOR(SDNode *N) { for (unsigned i = 0; i < NumElts; ++i) NewOps.push_back(GetPromotedInteger(N->getOperand(i))); - return SDValue(DAG.UpdateNodeOperands(N, &NewOps[0], NumElts), 0); + return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); } SDValue DAGTypeLegalizer::PromoteIntOp_CONVERT_RNDSAT(SDNode *N) { @@ -984,9 +1011,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) { EVT OpTy = N->getOperand(1).getValueType(); // Promote all the way up to the canonical SetCC type. - EVT SVT = getSetCCResultType(N->getOpcode() == ISD::SELECT ? - OpTy.getScalarType() : OpTy); - Cond = PromoteTargetBoolean(Cond, SVT); + EVT OpVT = N->getOpcode() == ISD::SELECT ? OpTy.getScalarType() : OpTy; + Cond = PromoteTargetBoolean(Cond, OpVT); return SDValue(DAG.UpdateNodeOperands(N, Cond, N->getOperand(1), N->getOperand(2)), 0); @@ -1141,6 +1167,26 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { ReplaceValueWith(SDValue(N, 1), Tmp.second); break; } + case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: { + AtomicSDNode *AN = cast<AtomicSDNode>(N); + SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::Other); + SDValue Tmp = DAG.getAtomicCmpSwap( + ISD::ATOMIC_CMP_SWAP, SDLoc(N), AN->getMemoryVT(), VTs, + N->getOperand(0), N->getOperand(1), N->getOperand(2), N->getOperand(3), + AN->getMemOperand(), AN->getSuccessOrdering(), AN->getFailureOrdering(), + AN->getSynchScope()); + + // Expanding to the strong ATOMIC_CMP_SWAP node means we can determine + // success simply by comparing the loaded value against the ingoing + // comparison. + SDValue Success = DAG.getSetCC(SDLoc(N), N->getValueType(1), Tmp, + N->getOperand(2), ISD::SETEQ); + + SplitInteger(Tmp, Lo, Hi); + ReplaceValueWith(SDValue(N, 1), Success); + ReplaceValueWith(SDValue(N, 2), Tmp.getValue(1)); + break; + } case ISD::AND: case ISD::OR: @@ -1270,6 +1316,7 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { /// and the shift amount is a constant 'Amt'. Expand the operation. void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt, SDValue &Lo, SDValue &Hi) { + assert(Amt && "Expected zero shifts to be already optimized away."); SDLoc DL(N); // Expand the incoming operand to be shifted, so that we have its parts SDValue InL, InH; @@ -1296,9 +1343,9 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt, // Emit this X << 1 as X+X. SDVTList VTList = DAG.getVTList(NVT, MVT::Glue); SDValue LoOps[2] = { InL, InL }; - Lo = DAG.getNode(ISD::ADDC, DL, VTList, LoOps, 2); + Lo = DAG.getNode(ISD::ADDC, DL, VTList, LoOps); SDValue HiOps[3] = { InH, InH, Lo.getValue(1) }; - Hi = DAG.getNode(ISD::ADDE, DL, VTList, HiOps, 3); + Hi = DAG.getNode(ISD::ADDE, DL, VTList, HiOps); } else { Lo = DAG.getNode(ISD::SHL, DL, NVT, InL, DAG.getConstant(Amt, ShTy)); Hi = DAG.getNode(ISD::OR, DL, NVT, @@ -1372,7 +1419,7 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { APInt HighBitMask = APInt::getHighBitsSet(ShBits, ShBits - Log2_32(NVTBits)); APInt KnownZero, KnownOne; - DAG.ComputeMaskedBits(N->getOperand(1), KnownZero, KnownOne); + DAG.computeKnownBits(N->getOperand(1), KnownZero, KnownOne); // If we don't know anything about the high bits, exit. if (((KnownZero|KnownOne) & HighBitMask) == 0) @@ -1547,20 +1594,20 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, if (hasCarry) { SDVTList VTList = DAG.getVTList(NVT, MVT::Glue); if (N->getOpcode() == ISD::ADD) { - Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps, 2); + Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps); HiOps[2] = Lo.getValue(1); - Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps, 3); + Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps); } else { - Lo = DAG.getNode(ISD::SUBC, dl, VTList, LoOps, 2); + Lo = DAG.getNode(ISD::SUBC, dl, VTList, LoOps); HiOps[2] = Lo.getValue(1); - Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps, 3); + Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps); } return; } if (N->getOpcode() == ISD::ADD) { - Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps, 2); - Hi = DAG.getNode(ISD::ADD, dl, NVT, HiOps, 2); + Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps); + Hi = DAG.getNode(ISD::ADD, dl, NVT, makeArrayRef(HiOps, 2)); SDValue Cmp1 = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[0], ISD::SETULT); SDValue Carry1 = DAG.getSelect(dl, NVT, Cmp1, @@ -1572,8 +1619,8 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, DAG.getConstant(1, NVT), Carry1); Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry2); } else { - Lo = DAG.getNode(ISD::SUB, dl, NVT, LoOps, 2); - Hi = DAG.getNode(ISD::SUB, dl, NVT, HiOps, 2); + Lo = DAG.getNode(ISD::SUB, dl, NVT, LoOps); + Hi = DAG.getNode(ISD::SUB, dl, NVT, makeArrayRef(HiOps, 2)); SDValue Cmp = DAG.getSetCC(dl, getSetCCResultType(LoOps[0].getValueType()), LoOps[0], LoOps[1], ISD::SETULT); @@ -1596,13 +1643,13 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUBC(SDNode *N, SDValue HiOps[3] = { LHSH, RHSH }; if (N->getOpcode() == ISD::ADDC) { - Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps, 2); + Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps); HiOps[2] = Lo.getValue(1); - Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps, 3); + Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps); } else { - Lo = DAG.getNode(ISD::SUBC, dl, VTList, LoOps, 2); + Lo = DAG.getNode(ISD::SUBC, dl, VTList, LoOps); HiOps[2] = Lo.getValue(1); - Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps, 3); + Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps); } // Legalized the flag result - switch anything that used the old flag to @@ -1621,9 +1668,9 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUBE(SDNode *N, SDValue LoOps[3] = { LHSL, RHSL, N->getOperand(2) }; SDValue HiOps[3] = { LHSH, RHSH }; - Lo = DAG.getNode(N->getOpcode(), dl, VTList, LoOps, 3); + Lo = DAG.getNode(N->getOpcode(), dl, VTList, LoOps); HiOps[2] = Lo.getValue(1); - Hi = DAG.getNode(N->getOpcode(), dl, VTList, HiOps, 3); + Hi = DAG.getNode(N->getOpcode(), dl, VTList, HiOps); // Legalized the flag result - switch anything that used the old flag to // use the new one. @@ -1712,9 +1759,13 @@ void DAGTypeLegalizer::ExpandIntRes_Constant(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); unsigned NBitWidth = NVT.getSizeInBits(); - const APInt &Cst = cast<ConstantSDNode>(N)->getAPIntValue(); - Lo = DAG.getConstant(Cst.trunc(NBitWidth), NVT); - Hi = DAG.getConstant(Cst.lshr(NBitWidth).trunc(NBitWidth), NVT); + auto Constant = cast<ConstantSDNode>(N); + const APInt &Cst = Constant->getAPIntValue(); + bool IsTarget = Constant->isTargetOpcode(); + bool IsOpaque = Constant->isOpaque(); + Lo = DAG.getConstant(Cst.trunc(NBitWidth), NVT, IsTarget, IsOpaque); + Hi = DAG.getConstant(Cst.lshr(NBitWidth).trunc(NBitWidth), NVT, IsTarget, + IsOpaque); } void DAGTypeLegalizer::ExpandIntRes_CTLZ(SDNode *N, @@ -1923,73 +1974,12 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N, EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); SDLoc dl(N); - bool HasMULHS = TLI.isOperationLegalOrCustom(ISD::MULHS, NVT); - bool HasMULHU = TLI.isOperationLegalOrCustom(ISD::MULHU, NVT); - bool HasSMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::SMUL_LOHI, NVT); - bool HasUMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::UMUL_LOHI, NVT); - if (HasMULHU || HasMULHS || HasUMUL_LOHI || HasSMUL_LOHI) { - SDValue LL, LH, RL, RH; - GetExpandedInteger(N->getOperand(0), LL, LH); - GetExpandedInteger(N->getOperand(1), RL, RH); - unsigned OuterBitSize = VT.getSizeInBits(); - unsigned InnerBitSize = NVT.getSizeInBits(); - unsigned LHSSB = DAG.ComputeNumSignBits(N->getOperand(0)); - unsigned RHSSB = DAG.ComputeNumSignBits(N->getOperand(1)); - - APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize); - if (DAG.MaskedValueIsZero(N->getOperand(0), HighMask) && - DAG.MaskedValueIsZero(N->getOperand(1), HighMask)) { - // The inputs are both zero-extended. - if (HasUMUL_LOHI) { - // We can emit a umul_lohi. - Lo = DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(NVT, NVT), LL, RL); - Hi = SDValue(Lo.getNode(), 1); - return; - } - if (HasMULHU) { - // We can emit a mulhu+mul. - Lo = DAG.getNode(ISD::MUL, dl, NVT, LL, RL); - Hi = DAG.getNode(ISD::MULHU, dl, NVT, LL, RL); - return; - } - } - if (LHSSB > InnerBitSize && RHSSB > InnerBitSize) { - // The input values are both sign-extended. - if (HasSMUL_LOHI) { - // We can emit a smul_lohi. - Lo = DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(NVT, NVT), LL, RL); - Hi = SDValue(Lo.getNode(), 1); - return; - } - if (HasMULHS) { - // We can emit a mulhs+mul. - Lo = DAG.getNode(ISD::MUL, dl, NVT, LL, RL); - Hi = DAG.getNode(ISD::MULHS, dl, NVT, LL, RL); - return; - } - } - if (HasUMUL_LOHI) { - // Lo,Hi = umul LHS, RHS. - SDValue UMulLOHI = DAG.getNode(ISD::UMUL_LOHI, dl, - DAG.getVTList(NVT, NVT), LL, RL); - Lo = UMulLOHI; - Hi = UMulLOHI.getValue(1); - RH = DAG.getNode(ISD::MUL, dl, NVT, LL, RH); - LH = DAG.getNode(ISD::MUL, dl, NVT, LH, RL); - Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, RH); - Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, LH); - return; - } - if (HasMULHU) { - Lo = DAG.getNode(ISD::MUL, dl, NVT, LL, RL); - Hi = DAG.getNode(ISD::MULHU, dl, NVT, LL, RL); - RH = DAG.getNode(ISD::MUL, dl, NVT, LL, RH); - LH = DAG.getNode(ISD::MUL, dl, NVT, LH, RL); - Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, RH); - Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, LH); - return; - } - } + SDValue LL, LH, RL, RH; + GetExpandedInteger(N->getOperand(0), LL, LH); + GetExpandedInteger(N->getOperand(1), RL, RH); + + if (TLI.expandMUL(N, Lo, Hi, NVT, DAG, LL, LH, RL, RH)) + return; // If nothing else, we can make a libcall. RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; @@ -2120,7 +2110,7 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, ShiftOp = DAG.getZExtOrTrunc(ShiftOp, dl, ShiftTy); SDValue Ops[] = { LHSL, LHSH, ShiftOp }; - Lo = DAG.getNode(PartsOpc, dl, DAG.getVTList(VT, VT), Ops, 3); + Lo = DAG.getNode(PartsOpc, dl, DAG.getVTList(VT, VT), Ops); Hi = Lo.getValue(1); return; } @@ -2352,12 +2342,12 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, Args.push_back(Entry); SDValue Func = DAG.getExternalSymbol(TLI.getLibcallName(LC), PtrVT); - TargetLowering:: - CallLoweringInfo CLI(Chain, RetTy, true, false, false, false, - 0, TLI.getLibcallCallingConv(LC), - /*isTailCall=*/false, - /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, - Func, Args, DAG, dl); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(Chain) + .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Func, std::move(Args), 0) + .setSExtResult(); + std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); SplitInteger(CallInfo.first, Lo, Hi); @@ -2442,15 +2432,18 @@ void DAGTypeLegalizer::ExpandIntRes_ATOMIC_LOAD(SDNode *N, SDValue &Lo, SDValue &Hi) { SDLoc dl(N); EVT VT = cast<AtomicSDNode>(N)->getMemoryVT(); + SDVTList VTs = DAG.getVTList(VT, MVT::i1, MVT::Other); SDValue Zero = DAG.getConstant(0, VT); - SDValue Swap = DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl, VT, - N->getOperand(0), - N->getOperand(1), Zero, Zero, - cast<AtomicSDNode>(N)->getMemOperand(), - cast<AtomicSDNode>(N)->getOrdering(), - cast<AtomicSDNode>(N)->getSynchScope()); + SDValue Swap = DAG.getAtomicCmpSwap( + ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, + cast<AtomicSDNode>(N)->getMemoryVT(), VTs, N->getOperand(0), + N->getOperand(1), Zero, Zero, cast<AtomicSDNode>(N)->getMemOperand(), + cast<AtomicSDNode>(N)->getOrdering(), + cast<AtomicSDNode>(N)->getOrdering(), + cast<AtomicSDNode>(N)->getSynchScope()); + ReplaceValueWith(SDValue(N, 0), Swap.getValue(0)); - ReplaceValueWith(SDValue(N, 1), Swap.getValue(1)); + ReplaceValueWith(SDValue(N, 1), Swap.getValue(2)); } //===----------------------------------------------------------------------===// @@ -2575,15 +2568,20 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS, // NOTE: on targets without efficient SELECT of bools, we can always use // this identity: (B1 ? B2 : B3) --> (B1 & B2)|(!B1&B3) - TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, AfterLegalizeTypes, true, NULL); + TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, AfterLegalizeTypes, true, + nullptr); SDValue Tmp1, Tmp2; - Tmp1 = TLI.SimplifySetCC(getSetCCResultType(LHSLo.getValueType()), - LHSLo, RHSLo, LowCC, false, DagCombineInfo, dl); + if (TLI.isTypeLegal(LHSLo.getValueType()) && + TLI.isTypeLegal(RHSLo.getValueType())) + Tmp1 = TLI.SimplifySetCC(getSetCCResultType(LHSLo.getValueType()), + LHSLo, RHSLo, LowCC, false, DagCombineInfo, dl); if (!Tmp1.getNode()) Tmp1 = DAG.getSetCC(dl, getSetCCResultType(LHSLo.getValueType()), LHSLo, RHSLo, LowCC); - Tmp2 = TLI.SimplifySetCC(getSetCCResultType(LHSHi.getValueType()), - LHSHi, RHSHi, CCCode, false, DagCombineInfo, dl); + if (TLI.isTypeLegal(LHSHi.getValueType()) && + TLI.isTypeLegal(RHSHi.getValueType())) + Tmp2 = TLI.SimplifySetCC(getSetCCResultType(LHSHi.getValueType()), + LHSHi, RHSHi, CCCode, false, DagCombineInfo, dl); if (!Tmp2.getNode()) Tmp2 = DAG.getNode(ISD::SETCC, dl, getSetCCResultType(LHSHi.getValueType()), @@ -2624,7 +2622,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_BR_CC(SDNode *N) { // If ExpandSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. - if (NewRHS.getNode() == 0) { + if (!NewRHS.getNode()) { NewRHS = DAG.getConstant(0, NewLHS.getValueType()); CCCode = ISD::SETNE; } @@ -2642,7 +2640,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SELECT_CC(SDNode *N) { // If ExpandSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. - if (NewRHS.getNode() == 0) { + if (!NewRHS.getNode()) { NewRHS = DAG.getConstant(0, NewLHS.getValueType()); CCCode = ISD::SETNE; } @@ -2659,7 +2657,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SETCC(SDNode *N) { IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N)); // If ExpandSetCCOperands returned a scalar, use it. - if (NewRHS.getNode() == 0) { + if (!NewRHS.getNode()) { assert(NewLHS.getValueType() == N->getValueType(0) && "Unexpected setcc expansion!"); return NewLHS; @@ -2907,7 +2905,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) { Ops.push_back(Op); } - return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, &Ops[0], Ops.size()); + return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, Ops); } @@ -2954,7 +2952,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_VECTOR(SDNode *N) { Ops.push_back(Op); } - return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, &Ops[0], Ops.size()); + return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, Ops); } SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) { @@ -3002,7 +3000,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) { } } - return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, &Ops[0], Ops.size()); + return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, Ops); } SDValue DAGTypeLegalizer::PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N) { @@ -3058,6 +3056,5 @@ SDValue DAGTypeLegalizer::PromoteIntOp_CONCAT_VECTORS(SDNode *N) { } } - return DAG.getNode(ISD::BUILD_VECTOR, dl, N->getValueType(0), - &NewOps[0], NewOps.size()); - } + return DAG.getNode(ISD::BUILD_VECTOR, dl, N->getValueType(0), NewOps); +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index eb13230..bd7dacf 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -22,6 +22,8 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; +#define DEBUG_TYPE "legalize-types" + static cl::opt<bool> EnableExpensiveChecks("enable-legalize-types-checking", cl::Hidden); @@ -159,7 +161,7 @@ void DAGTypeLegalizer::PerformExpensiveChecks() { if (Mapped & 128) dbgs() << " WidenedVectors"; dbgs() << "\n"; - llvm_unreachable(0); + llvm_unreachable(nullptr); } } } @@ -433,7 +435,7 @@ NodeDone: if (Failed) { I->dump(&DAG); dbgs() << "\n"; - llvm_unreachable(0); + llvm_unreachable(nullptr); } } #endif @@ -488,7 +490,7 @@ SDNode *DAGTypeLegalizer::AnalyzeNewNode(SDNode *N) { // Some operands changed - update the node. if (!NewOps.empty()) { - SDNode *M = DAG.UpdateNodeOperands(N, &NewOps[0], NewOps.size()); + SDNode *M = DAG.UpdateNodeOperands(N, NewOps); if (M != N) { // The node morphed into a different node. Normally for this to happen // the original node would have to be marked NewNode. However this can @@ -634,7 +636,7 @@ namespace { : SelectionDAG::DAGUpdateListener(dtl.getDAG()), DTL(dtl), NodesToAnalyze(nta) {} - virtual void NodeDeleted(SDNode *N, SDNode *E) { + void NodeDeleted(SDNode *N, SDNode *E) override { assert(N->getNodeId() != DAGTypeLegalizer::ReadyToProcess && N->getNodeId() != DAGTypeLegalizer::Processed && "Invalid node ID for RAUW deletion!"); @@ -655,7 +657,7 @@ namespace { NodesToAnalyze.insert(E); } - virtual void NodeUpdated(SDNode *N) { + void NodeUpdated(SDNode *N) override { // Node updates can mean pretty much anything. It is possible that an // operand was set to something already processed (f.e.) in which case // this node could become ready. Recompute its flags. @@ -736,7 +738,7 @@ void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) { AnalyzeNewValue(Result); SDValue &OpEntry = PromotedIntegers[Op]; - assert(OpEntry.getNode() == 0 && "Node is already promoted!"); + assert(!OpEntry.getNode() && "Node is already promoted!"); OpEntry = Result; } @@ -747,7 +749,7 @@ void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) { AnalyzeNewValue(Result); SDValue &OpEntry = SoftenedFloats[Op]; - assert(OpEntry.getNode() == 0 && "Node is already converted to integer!"); + assert(!OpEntry.getNode() && "Node is already converted to integer!"); OpEntry = Result; } @@ -761,7 +763,7 @@ void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) { AnalyzeNewValue(Result); SDValue &OpEntry = ScalarizedVectors[Op]; - assert(OpEntry.getNode() == 0 && "Node is already scalarized!"); + assert(!OpEntry.getNode() && "Node is already scalarized!"); OpEntry = Result; } @@ -787,7 +789,7 @@ void DAGTypeLegalizer::SetExpandedInteger(SDValue Op, SDValue Lo, // Remember that this is the result of the node. std::pair<SDValue, SDValue> &Entry = ExpandedIntegers[Op]; - assert(Entry.first.getNode() == 0 && "Node already expanded"); + assert(!Entry.first.getNode() && "Node already expanded"); Entry.first = Lo; Entry.second = Hi; } @@ -814,7 +816,7 @@ void DAGTypeLegalizer::SetExpandedFloat(SDValue Op, SDValue Lo, // Remember that this is the result of the node. std::pair<SDValue, SDValue> &Entry = ExpandedFloats[Op]; - assert(Entry.first.getNode() == 0 && "Node already expanded"); + assert(!Entry.first.getNode() && "Node already expanded"); Entry.first = Lo; Entry.second = Hi; } @@ -843,7 +845,7 @@ void DAGTypeLegalizer::SetSplitVector(SDValue Op, SDValue Lo, // Remember that this is the result of the node. std::pair<SDValue, SDValue> &Entry = SplitVectors[Op]; - assert(Entry.first.getNode() == 0 && "Node already split"); + assert(!Entry.first.getNode() && "Node already split"); Entry.first = Lo; Entry.second = Hi; } @@ -855,7 +857,7 @@ void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) { AnalyzeNewValue(Result); SDValue &OpEntry = WidenedVectors[Op]; - assert(OpEntry.getNode() == 0 && "Node already widened!"); + assert(!OpEntry.getNode() && "Node already widened!"); OpEntry = Result; } @@ -1007,7 +1009,7 @@ SDValue DAGTypeLegalizer::LibCallify(RTLIB::Libcall LC, SDNode *N, unsigned NumOps = N->getNumOperands(); SDLoc dl(N); if (NumOps == 0) { - return TLI.makeLibCall(DAG, LC, N->getValueType(0), 0, 0, isSigned, + return TLI.makeLibCall(DAG, LC, N->getValueType(0), nullptr, 0, isSigned, dl).first; } else if (NumOps == 1) { SDValue Op = N->getOperand(0); @@ -1049,11 +1051,12 @@ DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC, TLI.getPointerTy()); Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); - TargetLowering:: - CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, false, - 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, - /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, - Callee, Args, DAG, SDLoc(Node)); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(SDLoc(Node)).setChain(InChain) + .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0) + .setSExtResult(isSigned).setZExtResult(!isSigned); + std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); return CallInfo; @@ -1062,11 +1065,14 @@ DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC, /// PromoteTargetBoolean - Promote the given target boolean to a target boolean /// of the given type. A target boolean is an integer value, not necessarily of /// type i1, the bits of which conform to getBooleanContents. -SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, EVT VT) { +/// +/// ValVT is the type of values that produced the boolean. +SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, EVT ValVT) { SDLoc dl(Bool); + EVT BoolVT = getSetCCResultType(ValVT); ISD::NodeType ExtendCode = - TargetLowering::getExtendForContent(TLI.getBooleanContents(VT.isVector())); - return DAG.getNode(ExtendCode, dl, VT, Bool); + TargetLowering::getExtendForContent(TLI.getBooleanContents(ValVT)); + return DAG.getNode(ExtendCode, dl, BoolVT, Bool); } /// SplitInteger - Return the lower LoVT bits of Op in Lo and the upper HiVT diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index ce2ba01..117ff31 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -16,7 +16,6 @@ #ifndef SELECTIONDAG_LEGALIZETYPES_H #define SELECTIONDAG_LEGALIZETYPES_H -#define DEBUG_TYPE "legalize-types" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/CodeGen/SelectionDAG.h" @@ -168,7 +167,7 @@ private: SDNode *Node, bool isSigned); std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node); - SDValue PromoteTargetBoolean(SDValue Bool, EVT VT); + SDValue PromoteTargetBoolean(SDValue Bool, EVT ValVT); void ReplaceValueWith(SDValue From, SDValue To); void SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi); void SplitInteger(SDValue Op, EVT LoVT, EVT HiVT, @@ -221,7 +220,7 @@ private: SDValue PromoteIntRes_AssertZext(SDNode *N); SDValue PromoteIntRes_Atomic0(AtomicSDNode *N); SDValue PromoteIntRes_Atomic1(AtomicSDNode *N); - SDValue PromoteIntRes_Atomic2(AtomicSDNode *N); + SDValue PromoteIntRes_AtomicCmpSwap(AtomicSDNode *N, unsigned ResNo); SDValue PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N); SDValue PromoteIntRes_VECTOR_SHUFFLE(SDNode *N); SDValue PromoteIntRes_BUILD_VECTOR(SDNode *N); @@ -238,7 +237,7 @@ private: SDValue PromoteIntRes_CTTZ(SDNode *N); SDValue PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N); SDValue PromoteIntRes_FP_TO_XINT(SDNode *N); - SDValue PromoteIntRes_FP32_TO_FP16(SDNode *N); + SDValue PromoteIntRes_FP_TO_FP16(SDNode *N); SDValue PromoteIntRes_INT_EXTEND(SDNode *N); SDValue PromoteIntRes_LOAD(LoadSDNode *N); SDValue PromoteIntRes_Overflow(SDNode *N); @@ -404,7 +403,7 @@ private: SDValue SoftenFloatRes_FNEARBYINT(SDNode *N); SDValue SoftenFloatRes_FNEG(SDNode *N); SDValue SoftenFloatRes_FP_EXTEND(SDNode *N); - SDValue SoftenFloatRes_FP16_TO_FP32(SDNode *N); + SDValue SoftenFloatRes_FP16_TO_FP(SDNode *N); SDValue SoftenFloatRes_FP_ROUND(SDNode *N); SDValue SoftenFloatRes_FPOW(SDNode *N); SDValue SoftenFloatRes_FPOWI(SDNode *N); @@ -426,10 +425,10 @@ private: bool SoftenFloatOperand(SDNode *N, unsigned OpNo); SDValue SoftenFloatOp_BITCAST(SDNode *N); SDValue SoftenFloatOp_BR_CC(SDNode *N); + SDValue SoftenFloatOp_FP_EXTEND(SDNode *N); SDValue SoftenFloatOp_FP_ROUND(SDNode *N); SDValue SoftenFloatOp_FP_TO_SINT(SDNode *N); SDValue SoftenFloatOp_FP_TO_UINT(SDNode *N); - SDValue SoftenFloatOp_FP32_TO_FP16(SDNode *N); SDValue SoftenFloatOp_SELECT_CC(SDNode *N); SDValue SoftenFloatOp_SETCC(SDNode *N); SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo); @@ -540,7 +539,9 @@ private: SDValue ScalarizeVecOp_UnaryOp(SDNode *N); SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N); SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N); + SDValue ScalarizeVecOp_VSELECT(SDNode *N); SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo); + SDValue ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo); //===--------------------------------------------------------------------===// // Vector Splitting Support: LegalizeVectorTypes.cpp @@ -644,6 +645,7 @@ private: bool WidenVectorOperand(SDNode *N, unsigned OpNo); SDValue WidenVecOp_BITCAST(SDNode *N); SDValue WidenVecOp_CONCAT_VECTORS(SDNode *N); + SDValue WidenVecOp_EXTEND(SDNode *N); SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N); SDValue WidenVecOp_STORE(SDNode* N); @@ -671,13 +673,13 @@ private: LoadSDNode *LD, ISD::LoadExtType ExtType); /// Helper genWidenVectorStores - Helper function to generate a set of - /// stores to store a widen vector into non widen memory + /// stores to store a widen vector into non-widen memory /// StChain: list of chains for the stores we have generated /// ST: store of a widen value void GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, StoreSDNode *ST); /// Helper genWidenVectorTruncStores - Helper function to generate a set of - /// stores to store a truncate widen vector into non widen memory + /// stores to store a truncate widen vector into non-widen memory /// StChain: list of chains for the stores we have generated /// ST: store of a widen value void GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain, diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index c749fde..7e2f7b6 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -23,6 +23,8 @@ #include "llvm/IR/DataLayout.h" using namespace llvm; +#define DEBUG_TYPE "legalize-types" + //===----------------------------------------------------------------------===// // Generic Result Expansion. //===----------------------------------------------------------------------===// @@ -58,12 +60,15 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { case TargetLowering::TypeExpandFloat: // Convert the expanded pieces of the input. GetExpandedOp(InOp, Lo, Hi); + if (TLI.hasBigEndianPartOrdering(InVT) != + TLI.hasBigEndianPartOrdering(OutVT)) + std::swap(Lo, Hi); Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); return; case TargetLowering::TypeSplitVector: GetSplitVector(InOp, Lo, Hi); - if (TLI.isBigEndian()) + if (TLI.hasBigEndianPartOrdering(OutVT)) std::swap(Lo, Hi); Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); @@ -78,9 +83,9 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BITCAST"); InOp = GetWidenedVector(InOp); EVT LoVT, HiVT; - llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(InVT); - llvm::tie(Lo, Hi) = DAG.SplitVector(InOp, dl, LoVT, HiVT); - if (TLI.isBigEndian()) + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(InVT); + std::tie(Lo, Hi) = DAG.SplitVector(InOp, dl, LoVT, HiVT); + if (TLI.hasBigEndianPartOrdering(OutVT)) std::swap(Lo, Hi); Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); @@ -174,7 +179,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { false, false, MinAlign(Alignment, IncrementSize)); // Handle endianness of the load. - if (TLI.isBigEndian()) + if (TLI.hasBigEndianPartOrdering(OutVT)) std::swap(Lo, Hi); } @@ -243,7 +248,8 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, SDLoc dl(N); LoadSDNode *LD = cast<LoadSDNode>(N); - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0)); + EVT ValueVT = LD->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), ValueVT); SDValue Chain = LD->getChain(); SDValue Ptr = LD->getBasePtr(); unsigned Alignment = LD->getAlignment(); @@ -273,7 +279,7 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, Hi.getValue(1)); // Handle endianness of the load. - if (TLI.isBigEndian()) + if (TLI.hasBigEndianPartOrdering(ValueVT)) std::swap(Lo, Hi); // Modified the chain - switch anything that used the old chain to use @@ -293,7 +299,7 @@ void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) { Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, N->getOperand(2), 0); // Handle endianness of the load. - if (TLI.isBigEndian()) + if (TLI.hasBigEndianPartOrdering(OVT)) std::swap(Lo, Hi); // Modified the chain - switch anything that used the old chain to use @@ -352,7 +358,8 @@ SDValue DAGTypeLegalizer::ExpandOp_BITCAST(SDNode *N) { SmallVector<SDValue, 8> Ops; IntegerToVector(N->getOperand(0), NumElts, Ops, NVT.getVectorElementType()); - SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, &Ops[0], NumElts); + SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, + makeArrayRef(Ops.data(), NumElts)); return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), Vec); } @@ -388,7 +395,7 @@ SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) { SDValue NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl, EVT::getVectorVT(*DAG.getContext(), NewVT, NewElts.size()), - &NewElts[0], NewElts.size()); + NewElts); // Convert the new vector to the old vector type. return DAG.getNode(ISD::BITCAST, dl, VecVT, NewVec); @@ -447,7 +454,7 @@ SDValue DAGTypeLegalizer::ExpandOp_SCALAR_TO_VECTOR(SDNode *N) { SDValue UndefVal = DAG.getUNDEF(Ops[0].getValueType()); for (unsigned i = 1; i < NumElts; ++i) Ops[i] = UndefVal; - return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts); + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); } SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { @@ -456,8 +463,8 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { SDLoc dl(N); StoreSDNode *St = cast<StoreSDNode>(N); - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), - St->getValue().getValueType()); + EVT ValueVT = St->getValue().getValueType(); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), ValueVT); SDValue Chain = St->getChain(); SDValue Ptr = St->getBasePtr(); unsigned Alignment = St->getAlignment(); @@ -471,7 +478,7 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { SDValue Lo, Hi; GetExpandedOp(St->getValue(), Lo, Hi); - if (TLI.isBigEndian()) + if (TLI.hasBigEndianPartOrdering(ValueVT)) std::swap(Lo, Hi); Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getPointerInfo(), @@ -518,7 +525,7 @@ void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo, if (getTypeAction(Cond.getValueType()) == TargetLowering::TypeSplitVector) GetSplitVector(Cond, CL, CH); else - llvm::tie(CL, CH) = DAG.SplitVector(Cond, dl); + std::tie(CL, CH) = DAG.SplitVector(Cond, dl); } Lo = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), CL, LL, RL); @@ -540,7 +547,7 @@ void DAGTypeLegalizer::SplitRes_SELECT_CC(SDNode *N, SDValue &Lo, void DAGTypeLegalizer::SplitRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT LoVT, HiVT; - llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); Lo = DAG.getUNDEF(LoVT); Hi = DAG.getUNDEF(HiVT); } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 3fb2d9b..507e7ff 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -37,12 +37,12 @@ class VectorLegalizer { const TargetLowering &TLI; bool Changed; // Keep track of whether anything changed - /// LegalizedNodes - For nodes that are of legal width, and that have more - /// than one use, this map indicates what regularized operand to use. This - /// allows us to avoid legalizing the same thing more than once. + /// For nodes that are of legal width, and that have more than one use, this + /// map indicates what regularized operand to use. This allows us to avoid + /// legalizing the same thing more than once. SmallDenseMap<SDValue, SDValue, 64> LegalizedNodes; - // Adds a node to the translation cache + /// \brief Adds a node to the translation cache. void AddLegalizedOperand(SDValue From, SDValue To) { LegalizedNodes.insert(std::make_pair(From, To)); // If someone requests legalization of the new node, return itself. @@ -50,35 +50,81 @@ class VectorLegalizer { LegalizedNodes.insert(std::make_pair(To, To)); } - // Legalizes the given node + /// \brief Legalizes the given node. SDValue LegalizeOp(SDValue Op); - // Assuming the node is legal, "legalize" the results + + /// \brief Assuming the node is legal, "legalize" the results. SDValue TranslateLegalizeResults(SDValue Op, SDValue Result); - // Implements unrolling a VSETCC. + + /// \brief Implements unrolling a VSETCC. SDValue UnrollVSETCC(SDValue Op); - // Implements expansion for FNEG; falls back to UnrollVectorOp if FSUB - // isn't legal. - // Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if - // SINT_TO_FLOAT and SHR on vectors isn't legal. + + /// \brief Implement expand-based legalization of vector operations. + /// + /// This is just a high-level routine to dispatch to specific code paths for + /// operations to legalize them. + SDValue Expand(SDValue Op); + + /// \brief Implements expansion for FNEG; falls back to UnrollVectorOp if + /// FSUB isn't legal. + /// + /// Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if + /// SINT_TO_FLOAT and SHR on vectors isn't legal. SDValue ExpandUINT_TO_FLOAT(SDValue Op); - // Implement expansion for SIGN_EXTEND_INREG using SRL and SRA. + + /// \brief Implement expansion for SIGN_EXTEND_INREG using SRL and SRA. SDValue ExpandSEXTINREG(SDValue Op); - // Implement vselect in terms of XOR, AND, OR when blend is not supported - // by the target. + + /// \brief Implement expansion for ANY_EXTEND_VECTOR_INREG. + /// + /// Shuffles the low lanes of the operand into place and bitcasts to the proper + /// type. The contents of the bits in the extended part of each element are + /// undef. + SDValue ExpandANY_EXTEND_VECTOR_INREG(SDValue Op); + + /// \brief Implement expansion for SIGN_EXTEND_VECTOR_INREG. + /// + /// Shuffles the low lanes of the operand into place, bitcasts to the proper + /// type, then shifts left and arithmetic shifts right to introduce a sign + /// extension. + SDValue ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op); + + /// \brief Implement expansion for ZERO_EXTEND_VECTOR_INREG. + /// + /// Shuffles the low lanes of the operand into place and blends zeros into + /// the remaining lanes, finally bitcasting to the proper type. + SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op); + + /// \brief Expand bswap of vectors into a shuffle if legal. + SDValue ExpandBSWAP(SDValue Op); + + /// \brief Implement vselect in terms of XOR, AND, OR when blend is not + /// supported by the target. SDValue ExpandVSELECT(SDValue Op); SDValue ExpandSELECT(SDValue Op); SDValue ExpandLoad(SDValue Op); SDValue ExpandStore(SDValue Op); SDValue ExpandFNEG(SDValue Op); - // Implements vector promotion; this is essentially just bitcasting the - // operands to a different type and bitcasting the result back to the - // original type. - SDValue PromoteVectorOp(SDValue Op); - // Implements [SU]INT_TO_FP vector promotion; this is a [zs]ext of the input - // operand to the next size up. - SDValue PromoteVectorOpINT_TO_FP(SDValue Op); - - public: + + /// \brief Implements vector promotion. + /// + /// This is essentially just bitcasting the operands to a different type and + /// bitcasting the result back to the original type. + SDValue Promote(SDValue Op); + + /// \brief Implements [SU]INT_TO_FP vector promotion. + /// + /// This is a [zs]ext of the input operand to the next size up. + SDValue PromoteINT_TO_FP(SDValue Op); + + /// \brief Implements FP_TO_[SU]INT vector promotion of the result type. + /// + /// It is promoted to the next size up integer type. The result is then + /// truncated back to the original type. + SDValue PromoteFP_TO_INT(SDValue Op, bool isSigned); + +public: + /// \brief Begin legalizer the vector operations in the DAG. bool Run(); VectorLegalizer(SelectionDAG& dag) : DAG(dag), TLI(dag.getTargetLoweringInfo()), Changed(false) {} @@ -88,7 +134,7 @@ bool VectorLegalizer::Run() { // Before we start legalizing vector nodes, check if there are any vectors. bool HasVectors = false; for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), - E = prior(DAG.allnodes_end()); I != llvm::next(E); ++I) { + E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I) { // Check if the values of the nodes contain vectors. We don't need to check // the operands because we are going to check their values at some point. for (SDNode::value_iterator J = I->value_begin(), E = I->value_end(); @@ -112,7 +158,7 @@ bool VectorLegalizer::Run() { // node is only legalized after all of its operands are legalized. DAG.AssignTopologicalOrder(); for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), - E = prior(DAG.allnodes_end()); I != llvm::next(E); ++I) + E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I) LegalizeOp(SDValue(I, 0)); // Finally, it's possible the root changed. Get the new root. @@ -148,8 +194,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) Ops.push_back(LegalizeOp(Node->getOperand(i))); - SDValue Result = - SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops.data(), Ops.size()), 0); + SDValue Result = SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops), 0); if (Op.getOpcode() == ISD::LOAD) { LoadSDNode *LD = cast<LoadSDNode>(Op.getNode()); @@ -249,6 +294,9 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::FP_EXTEND: case ISD::FMA: case ISD::SIGN_EXTEND_INREG: + case ISD::ANY_EXTEND_VECTOR_INREG: + case ISD::SIGN_EXTEND_VECTOR_INREG: + case ISD::ZERO_EXTEND_VECTOR_INREG: QueryType = Node->getValueType(0); break; case ISD::FP_ROUND_INREG: @@ -262,21 +310,11 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { switch (TLI.getOperationAction(Node->getOpcode(), QueryType)) { case TargetLowering::Promote: - switch (Op.getOpcode()) { - default: - // "Promote" the operation by bitcasting - Result = PromoteVectorOp(Op); - Changed = true; - break; - case ISD::SINT_TO_FP: - case ISD::UINT_TO_FP: - // "Promote" the operation by extending the operand. - Result = PromoteVectorOpINT_TO_FP(Op); - Changed = true; - break; - } + Result = Promote(Op); + Changed = true; + break; + case TargetLowering::Legal: break; - case TargetLowering::Legal: break; case TargetLowering::Custom: { SDValue Tmp1 = TLI.LowerOperation(Op, DAG); if (Tmp1.getNode()) { @@ -286,21 +324,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { // FALL THROUGH } case TargetLowering::Expand: - if (Node->getOpcode() == ISD::SIGN_EXTEND_INREG) - Result = ExpandSEXTINREG(Op); - else if (Node->getOpcode() == ISD::VSELECT) - Result = ExpandVSELECT(Op); - else if (Node->getOpcode() == ISD::SELECT) - Result = ExpandSELECT(Op); - else if (Node->getOpcode() == ISD::UINT_TO_FP) - Result = ExpandUINT_TO_FLOAT(Op); - else if (Node->getOpcode() == ISD::FNEG) - Result = ExpandFNEG(Op); - else if (Node->getOpcode() == ISD::SETCC) - Result = UnrollVSETCC(Op); - else - Result = DAG.UnrollVectorOp(Op.getNode()); - break; + Result = Expand(Op); } // Make sure that the generated code is itself legal. @@ -315,10 +339,23 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { return Result; } -SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) { - // Vector "promotion" is basically just bitcasting and doing the operation - // in a different type. For example, x86 promotes ISD::AND on v2i32 to - // v1i64. +SDValue VectorLegalizer::Promote(SDValue Op) { + // For a few operations there is a specific concept for promotion based on + // the operand's type. + switch (Op.getOpcode()) { + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + // "Promote" the operation by extending the operand. + return PromoteINT_TO_FP(Op); + case ISD::FP_TO_UINT: + case ISD::FP_TO_SINT: + // Promote the operation by extending the operand. + return PromoteFP_TO_INT(Op, Op->getOpcode() == ISD::FP_TO_SINT); + } + + // The rest of the time, vector "promotion" is basically just bitcasting and + // doing the operation in a different type. For example, x86 promotes + // ISD::AND on v2i32 to v1i64. MVT VT = Op.getSimpleValueType(); assert(Op.getNode()->getNumValues() == 1 && "Can't promote a vector with multiple results!"); @@ -333,12 +370,12 @@ SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) { Operands[j] = Op.getOperand(j); } - Op = DAG.getNode(Op.getOpcode(), dl, NVT, &Operands[0], Operands.size()); + Op = DAG.getNode(Op.getOpcode(), dl, NVT, Operands); return DAG.getNode(ISD::BITCAST, dl, VT, Op); } -SDValue VectorLegalizer::PromoteVectorOpINT_TO_FP(SDValue Op) { +SDValue VectorLegalizer::PromoteINT_TO_FP(SDValue Op) { // INT_TO_FP operations may require the input operand be promoted even // when the type is otherwise legal. EVT VT = Op.getOperand(0).getValueType(); @@ -352,14 +389,9 @@ SDValue VectorLegalizer::PromoteVectorOpINT_TO_FP(SDValue Op) { // // Increase the bitwidth of the element to the next pow-of-two // (which is greater than 8 bits). - unsigned NumElts = VT.getVectorNumElements(); - EVT EltVT = VT.getVectorElementType(); - EltVT = EVT::getIntegerVT(*DAG.getContext(), 2 * EltVT.getSizeInBits()); - assert(EltVT.isSimple() && "Promoting to a non-simple vector type!"); - - // Build a new vector type and check if it is legal. - MVT NVT = MVT::getVectorVT(EltVT.getSimpleVT(), NumElts); + EVT NVT = VT.widenIntegerVectorElementType(*DAG.getContext()); + assert(NVT.isSimple() && "Promoting to a non-simple vector type!"); SDLoc dl(Op); SmallVector<SDValue, 4> Operands(Op.getNumOperands()); @@ -372,8 +404,36 @@ SDValue VectorLegalizer::PromoteVectorOpINT_TO_FP(SDValue Op) { Operands[j] = Op.getOperand(j); } - return DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), &Operands[0], - Operands.size()); + return DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), Operands); +} + +// For FP_TO_INT we promote the result type to a vector type with wider +// elements and then truncate the result. This is different from the default +// PromoteVector which uses bitcast to promote thus assumning that the +// promoted vector type has the same overall size. +SDValue VectorLegalizer::PromoteFP_TO_INT(SDValue Op, bool isSigned) { + assert(Op.getNode()->getNumValues() == 1 && + "Can't promote a vector with multiple results!"); + EVT VT = Op.getValueType(); + + EVT NewVT; + unsigned NewOpc; + while (1) { + NewVT = VT.widenIntegerVectorElementType(*DAG.getContext()); + assert(NewVT.isSimple() && "Promoting to a non-simple vector type!"); + if (TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NewVT)) { + NewOpc = ISD::FP_TO_SINT; + break; + } + if (!isSigned && TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NewVT)) { + NewOpc = ISD::FP_TO_UINT; + break; + } + } + + SDLoc loc(Op); + SDValue promoted = DAG.getNode(NewOpc, SDLoc(Op), NewVT, Op.getOperand(0)); + return DAG.getNode(ISD::TRUNCATE, SDLoc(Op), VT, promoted); } @@ -512,10 +572,9 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { } } - SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &LoadChains[0], LoadChains.size()); + SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl, - Op.getNode()->getValueType(0), &Vals[0], Vals.size()); + Op.getNode()->getValueType(0), Vals); AddLegalizedOperand(Op.getValue(0), Value); AddLegalizedOperand(Op.getValue(1), NewChain); @@ -569,12 +628,38 @@ SDValue VectorLegalizer::ExpandStore(SDValue Op) { Stores.push_back(Store); } - SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &Stores[0], Stores.size()); + SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); AddLegalizedOperand(Op, TF); return TF; } +SDValue VectorLegalizer::Expand(SDValue Op) { + switch (Op->getOpcode()) { + case ISD::SIGN_EXTEND_INREG: + return ExpandSEXTINREG(Op); + case ISD::ANY_EXTEND_VECTOR_INREG: + return ExpandANY_EXTEND_VECTOR_INREG(Op); + case ISD::SIGN_EXTEND_VECTOR_INREG: + return ExpandSIGN_EXTEND_VECTOR_INREG(Op); + case ISD::ZERO_EXTEND_VECTOR_INREG: + return ExpandZERO_EXTEND_VECTOR_INREG(Op); + case ISD::BSWAP: + return ExpandBSWAP(Op); + case ISD::VSELECT: + return ExpandVSELECT(Op); + case ISD::SELECT: + return ExpandSELECT(Op); + case ISD::UINT_TO_FP: + return ExpandUINT_TO_FLOAT(Op); + case ISD::FNEG: + return ExpandFNEG(Op); + case ISD::SETCC: + return UnrollVSETCC(Op); + default: + return DAG.UnrollVectorOp(Op.getNode()); + } +} + SDValue VectorLegalizer::ExpandSELECT(SDValue Op) { // Lower a select instruction where the condition is a scalar and the // operands are vectors. Lower this select to VSELECT and implement it @@ -614,7 +699,7 @@ SDValue VectorLegalizer::ExpandSELECT(SDValue Op) { // Broadcast the mask so that the entire vector is all-one or all zero. SmallVector<SDValue, 8> Ops(NumElem, Mask); - Mask = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskTy, &Ops[0], Ops.size()); + Mask = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskTy, Ops); // Bitcast the operands to be the same type as the mask. // This is needed when we select between FP types because @@ -652,6 +737,108 @@ SDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) { return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz); } +// Generically expand a vector anyext in register to a shuffle of the relevant +// lanes into the appropriate locations, with other lanes left undef. +SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDValue Op) { + SDLoc DL(Op); + EVT VT = Op.getValueType(); + int NumElements = VT.getVectorNumElements(); + SDValue Src = Op.getOperand(0); + EVT SrcVT = Src.getValueType(); + int NumSrcElements = SrcVT.getVectorNumElements(); + + // Build a base mask of undef shuffles. + SmallVector<int, 16> ShuffleMask; + ShuffleMask.resize(NumSrcElements, -1); + + // Place the extended lanes into the correct locations. + int ExtLaneScale = NumSrcElements / NumElements; + int EndianOffset = TLI.isBigEndian() ? ExtLaneScale - 1 : 0; + for (int i = 0; i < NumElements; ++i) + ShuffleMask[i * ExtLaneScale + EndianOffset] = i; + + return DAG.getNode( + ISD::BITCAST, DL, VT, + DAG.getVectorShuffle(SrcVT, DL, Src, DAG.getUNDEF(SrcVT), ShuffleMask)); +} + +SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op) { + SDLoc DL(Op); + EVT VT = Op.getValueType(); + SDValue Src = Op.getOperand(0); + EVT SrcVT = Src.getValueType(); + + // First build an any-extend node which can be legalized above when we + // recurse through it. + Op = DAG.getAnyExtendVectorInReg(Src, DL, VT); + + // Now we need sign extend. Do this by shifting the elements. Even if these + // aren't legal operations, they have a better chance of being legalized + // without full scalarization than the sign extension does. + unsigned EltWidth = VT.getVectorElementType().getSizeInBits(); + unsigned SrcEltWidth = SrcVT.getVectorElementType().getSizeInBits(); + SDValue ShiftAmount = DAG.getConstant(EltWidth - SrcEltWidth, VT); + return DAG.getNode(ISD::SRA, DL, VT, + DAG.getNode(ISD::SHL, DL, VT, Op, ShiftAmount), + ShiftAmount); +} + +// Generically expand a vector zext in register to a shuffle of the relevant +// lanes into the appropriate locations, a blend of zero into the high bits, +// and a bitcast to the wider element type. +SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op) { + SDLoc DL(Op); + EVT VT = Op.getValueType(); + int NumElements = VT.getVectorNumElements(); + SDValue Src = Op.getOperand(0); + EVT SrcVT = Src.getValueType(); + int NumSrcElements = SrcVT.getVectorNumElements(); + + // Build up a zero vector to blend into this one. + EVT SrcScalarVT = SrcVT.getScalarType(); + SDValue ScalarZero = DAG.getTargetConstant(0, SrcScalarVT); + SmallVector<SDValue, 4> BuildVectorOperands(NumSrcElements, ScalarZero); + SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, DL, SrcVT, BuildVectorOperands); + + // Shuffle the incoming lanes into the correct position, and pull all other + // lanes from the zero vector. + SmallVector<int, 16> ShuffleMask; + ShuffleMask.reserve(NumSrcElements); + for (int i = 0; i < NumSrcElements; ++i) + ShuffleMask.push_back(i); + + int ExtLaneScale = NumSrcElements / NumElements; + int EndianOffset = TLI.isBigEndian() ? ExtLaneScale - 1 : 0; + for (int i = 0; i < NumElements; ++i) + ShuffleMask[i * ExtLaneScale + EndianOffset] = NumSrcElements + i; + + return DAG.getNode(ISD::BITCAST, DL, VT, + DAG.getVectorShuffle(SrcVT, DL, Zero, Src, ShuffleMask)); +} + +SDValue VectorLegalizer::ExpandBSWAP(SDValue Op) { + EVT VT = Op.getValueType(); + + // Generate a byte wise shuffle mask for the BSWAP. + SmallVector<int, 16> ShuffleMask; + int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8; + for (int I = 0, E = VT.getVectorNumElements(); I != E; ++I) + for (int J = ScalarSizeInBytes - 1; J >= 0; --J) + ShuffleMask.push_back((I * ScalarSizeInBytes) + J); + + EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, ShuffleMask.size()); + + // Only emit a shuffle if the mask is legal. + if (!TLI.isShuffleMaskLegal(ShuffleMask, ByteVT)) + return DAG.UnrollVectorOp(Op.getNode()); + + SDLoc DL(Op); + Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Op.getOperand(0)); + Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), + ShuffleMask.data()); + return DAG.getNode(ISD::BITCAST, DL, VT, Op); +} + SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { // Implement VSELECT in terms of XOR, AND, OR // on platforms which do not support blend natively. @@ -672,9 +859,9 @@ SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { // FIXME: Sign extend 1 to all ones if thats legal on the target. if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand || TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand || - TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand || - TLI.getBooleanContents(true) != - TargetLowering::ZeroOrNegativeOneBooleanContent) + TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand || + TLI.getBooleanContents(Op1.getValueType()) != + TargetLowering::ZeroOrNegativeOneBooleanContent) return DAG.UnrollVectorOp(Op.getNode()); // If the mask and the type are different sizes, unroll the vector op. This @@ -769,7 +956,7 @@ SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) { (EltVT.getSizeInBits()), EltVT), DAG.getConstant(0, EltVT)); } - return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElems); + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); } } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index fb8c602..f77c592 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -26,6 +26,8 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; +#define DEBUG_TYPE "legalize-types" + //===----------------------------------------------------------------------===// // Result Vector Scalarization: <1 x ty> -> ty. //===----------------------------------------------------------------------===// @@ -65,6 +67,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::UNDEF: R = ScalarizeVecRes_UNDEF(N); break; case ISD::VECTOR_SHUFFLE: R = ScalarizeVecRes_VECTOR_SHUFFLE(N); break; case ISD::ANY_EXTEND: + case ISD::BSWAP: case ISD::CTLZ: case ISD::CTPOP: case ISD::CTTZ: @@ -254,8 +257,26 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) { SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) { SDValue Cond = GetScalarizedVector(N->getOperand(0)); SDValue LHS = GetScalarizedVector(N->getOperand(1)); - TargetLowering::BooleanContent ScalarBool = TLI.getBooleanContents(false); - TargetLowering::BooleanContent VecBool = TLI.getBooleanContents(true); + TargetLowering::BooleanContent ScalarBool = + TLI.getBooleanContents(false, false); + TargetLowering::BooleanContent VecBool = TLI.getBooleanContents(true, false); + + // If integer and float booleans have different contents then we can't + // reliably optimize in all cases. There is a full explanation for this in + // DAGCombiner::visitSELECT() where the same issue affects folding + // (select C, 0, 1) to (xor C, 1). + if (TLI.getBooleanContents(false, false) != + TLI.getBooleanContents(false, true)) { + // At least try the common case where the boolean is generated by a + // comparison. + if (Cond->getOpcode() == ISD::SETCC) { + EVT OpVT = Cond->getOperand(0)->getValueType(0); + ScalarBool = TLI.getBooleanContents(OpVT.getScalarType()); + VecBool = TLI.getBooleanContents(OpVT); + } else + ScalarBool = TargetLowering::UndefinedBooleanContent; + } + if (ScalarBool != VecBool) { EVT CondVT = Cond.getValueType(); switch (ScalarBool) { @@ -330,19 +351,31 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) { assert(N->getValueType(0).isVector() && N->getOperand(0).getValueType().isVector() && "Operand types must be vectors"); - - SDValue LHS = GetScalarizedVector(N->getOperand(0)); - SDValue RHS = GetScalarizedVector(N->getOperand(1)); + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + EVT OpVT = LHS.getValueType(); EVT NVT = N->getValueType(0).getVectorElementType(); SDLoc DL(N); + // The result needs scalarizing, but it's not a given that the source does. + if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) { + LHS = GetScalarizedVector(LHS); + RHS = GetScalarizedVector(RHS); + } else { + EVT VT = OpVT.getVectorElementType(); + LHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, LHS, + DAG.getConstant(0, TLI.getVectorIdxTy())); + RHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, RHS, + DAG.getConstant(0, TLI.getVectorIdxTy())); + } + // Turn it into a scalar SETCC. SDValue Res = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, N->getOperand(2)); // Vectors may have a different boolean contents to scalars. Promote the // value appropriately. ISD::NodeType ExtendCode = - TargetLowering::getExtendForContent(TLI.getBooleanContents(true)); + TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT)); return DAG.getNode(ExtendCode, DL, NVT, Res); } @@ -357,7 +390,7 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { dbgs() << "\n"); SDValue Res = SDValue(); - if (Res.getNode() == 0) { + if (!Res.getNode()) { switch (N->getOpcode()) { default: #ifndef NDEBUG @@ -381,9 +414,15 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { case ISD::EXTRACT_VECTOR_ELT: Res = ScalarizeVecOp_EXTRACT_VECTOR_ELT(N); break; + case ISD::VSELECT: + Res = ScalarizeVecOp_VSELECT(N); + break; case ISD::STORE: Res = ScalarizeVecOp_STORE(cast<StoreSDNode>(N), OpNo); break; + case ISD::FP_ROUND: + Res = ScalarizeVecOp_FP_ROUND(N, OpNo); + break; } } @@ -416,13 +455,11 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp(SDNode *N) { assert(N->getValueType(0).getVectorNumElements() == 1 && "Unexected vector type!"); SDValue Elt = GetScalarizedVector(N->getOperand(0)); - SmallVector<SDValue, 1> Ops(1); - Ops[0] = DAG.getNode(N->getOpcode(), SDLoc(N), - N->getValueType(0).getScalarType(), Elt); + SDValue Op = DAG.getNode(N->getOpcode(), SDLoc(N), + N->getValueType(0).getScalarType(), Elt); // Revectorize the result so the types line up with what the uses of this // expression expect. - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N->getValueType(0), - &Ops[0], 1); + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N->getValueType(0), Op); } /// ScalarizeVecOp_CONCAT_VECTORS - The vectors to concatenate have length one - @@ -431,8 +468,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) { SmallVector<SDValue, 8> Ops(N->getNumOperands()); for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) Ops[i] = GetScalarizedVector(N->getOperand(i)); - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N->getValueType(0), - &Ops[0], Ops.size()); + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N->getValueType(0), Ops); } /// ScalarizeVecOp_EXTRACT_VECTOR_ELT - If the input is a vector that needs to @@ -446,6 +482,18 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { return Res; } + +/// ScalarizeVecOp_VSELECT - If the input condition is a vector that needs to be +/// scalarized, it must be <1 x i1>, so just convert to a normal ISD::SELECT +/// (still with vector output type since that was acceptable if we got here). +SDValue DAGTypeLegalizer::ScalarizeVecOp_VSELECT(SDNode *N) { + SDValue ScalarCond = GetScalarizedVector(N->getOperand(0)); + EVT VT = N->getValueType(0); + + return DAG.getNode(ISD::SELECT, SDLoc(N), VT, ScalarCond, N->getOperand(1), + N->getOperand(2)); +} + /// ScalarizeVecOp_STORE - If the value to store is a vector that needs to be /// scalarized, it must be <1 x ty>. Just store the element. SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){ @@ -467,6 +515,15 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){ N->getOriginalAlignment(), N->getTBAAInfo()); } +/// ScalarizeVecOp_FP_ROUND - If the value to round is a vector that needs +/// to be scalarized, it must be <1 x ty>. Convert the element instead. +SDValue DAGTypeLegalizer::ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo) { + SDValue Elt = GetScalarizedVector(N->getOperand(0)); + SDValue Res = DAG.getNode(ISD::FP_ROUND, SDLoc(N), + N->getValueType(0).getVectorElementType(), Elt, + N->getOperand(1)); + return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res); +} //===----------------------------------------------------------------------===// // Result Vector Splitting @@ -522,6 +579,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi); break; + case ISD::BSWAP: case ISD::CONVERT_RNDSAT: case ISD::CTLZ: case ISD::CTTZ: @@ -625,7 +683,7 @@ void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, // We know the result is a vector. The input may be either a vector or a // scalar value. EVT LoVT, HiVT; - llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); SDLoc dl(N); SDValue InOp = N->getOperand(0); @@ -680,13 +738,13 @@ void DAGTypeLegalizer::SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT LoVT, HiVT; SDLoc dl(N); - llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); unsigned LoNumElts = LoVT.getVectorNumElements(); SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+LoNumElts); - Lo = DAG.getNode(ISD::BUILD_VECTOR, dl, LoVT, &LoOps[0], LoOps.size()); + Lo = DAG.getNode(ISD::BUILD_VECTOR, dl, LoVT, LoOps); SmallVector<SDValue, 8> HiOps(N->op_begin()+LoNumElts, N->op_end()); - Hi = DAG.getNode(ISD::BUILD_VECTOR, dl, HiVT, &HiOps[0], HiOps.size()); + Hi = DAG.getNode(ISD::BUILD_VECTOR, dl, HiVT, HiOps); } void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, @@ -701,13 +759,13 @@ void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, } EVT LoVT, HiVT; - llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+NumSubvectors); - Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, LoVT, &LoOps[0], LoOps.size()); + Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, LoVT, LoOps); SmallVector<SDValue, 8> HiOps(N->op_begin()+NumSubvectors, N->op_end()); - Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HiVT, &HiOps[0], HiOps.size()); + Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HiVT, HiOps); } void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, @@ -717,7 +775,7 @@ void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDLoc dl(N); EVT LoVT, HiVT; - llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx); uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); @@ -778,7 +836,7 @@ void DAGTypeLegalizer::SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDLoc dl(N); EVT LoVT, HiVT; - llvm::tie(LoVT, HiVT) = + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(cast<VTSDNode>(N->getOperand(1))->getVT()); Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo, @@ -842,7 +900,7 @@ void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT LoVT, HiVT; SDLoc dl(N); - llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); Lo = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoVT, N->getOperand(0)); Hi = DAG.getUNDEF(HiVT); } @@ -852,7 +910,7 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type legalization!"); EVT LoVT, HiVT; SDLoc dl(LD); - llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(LD->getValueType(0)); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(LD->getValueType(0)); ISD::LoadExtType ExtType = LD->getExtensionType(); SDValue Ch = LD->getChain(); @@ -866,7 +924,7 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, const MDNode *TBAAInfo = LD->getTBAAInfo(); EVT LoMemVT, HiMemVT; - llvm::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset, LD->getPointerInfo(), LoMemVT, isVolatile, isNonTemporal, @@ -897,12 +955,12 @@ void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT LoVT, HiVT; SDLoc DL(N); - llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); // Split the input. SDValue LL, LH, RL, RH; - llvm::tie(LL, LH) = DAG.SplitVectorOperand(N, 0); - llvm::tie(RL, RH) = DAG.SplitVectorOperand(N, 1); + std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0); + std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1); Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2)); Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2)); @@ -913,7 +971,7 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, // Get the dest types - they may not match the input types, e.g. int_to_fp. EVT LoVT, HiVT; SDLoc dl(N); - llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); // If the input also splits, handle it directly for a compile time speedup. // Otherwise split it by hand. @@ -921,7 +979,7 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) GetSplitVector(N->getOperand(0), Lo, Hi); else - llvm::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0); + std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0); if (N->getOpcode() == ISD::FP_ROUND) { Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1)); @@ -950,7 +1008,7 @@ void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, EVT SrcVT = N->getOperand(0).getValueType(); EVT DestVT = N->getValueType(0); EVT LoVT, HiVT; - llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(DestVT); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(DestVT); // We can do better than a generic split operation if the extend is doing // more than just doubling the width of the elements and the following are @@ -976,7 +1034,7 @@ void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, EVT SplitSrcVT = EVT::getVectorVT(Ctx, SrcVT.getVectorElementType(), NumElements / 2); EVT SplitLoVT, SplitHiVT; - llvm::tie(SplitLoVT, SplitHiVT) = DAG.GetSplitDestVTs(NewSrcVT); + std::tie(SplitLoVT, SplitHiVT) = DAG.GetSplitDestVTs(NewSrcVT); if (TLI.isTypeLegal(SrcVT) && !TLI.isTypeLegal(SplitSrcVT) && TLI.isTypeLegal(NewSrcVT) && TLI.isTypeLegal(SplitLoVT)) { DEBUG(dbgs() << "Split vector extend via incremental extend:"; @@ -985,7 +1043,7 @@ void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, SDValue NewSrc = DAG.getNode(N->getOpcode(), dl, NewSrcVT, N->getOperand(0)); // Get the low and high halves of the new, extended one step, vector. - llvm::tie(Lo, Hi) = DAG.SplitVector(NewSrc, dl); + std::tie(Lo, Hi) = DAG.SplitVector(NewSrc, dl); // Extend those vector halves the rest of the way. Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo); Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi); @@ -1088,7 +1146,7 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, } // Construct the Lo/Hi output using a BUILD_VECTOR. - Output = DAG.getNode(ISD::BUILD_VECTOR,dl,NewVT, &SVOps[0], SVOps.size()); + Output = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, SVOps); } else if (InputUsed[0] == -1U) { // No input vectors were used! The result is undefined. Output = DAG.getUNDEF(NewVT); @@ -1124,7 +1182,7 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) return false; - if (Res.getNode() == 0) { + if (!Res.getNode()) { switch (N->getOpcode()) { default: #ifndef NDEBUG @@ -1198,13 +1256,13 @@ SDValue DAGTypeLegalizer::SplitVecOp_VSELECT(SDNode *N, unsigned OpNo) { "Lo and Hi have differing types"); EVT LoOpVT, HiOpVT; - llvm::tie(LoOpVT, HiOpVT) = DAG.GetSplitDestVTs(Src0VT); + std::tie(LoOpVT, HiOpVT) = DAG.GetSplitDestVTs(Src0VT); assert(LoOpVT == HiOpVT && "Asymmetric vector split?"); SDValue LoOp0, HiOp0, LoOp1, HiOp1, LoMask, HiMask; - llvm::tie(LoOp0, HiOp0) = DAG.SplitVector(Src0, DL); - llvm::tie(LoOp1, HiOp1) = DAG.SplitVector(Src1, DL); - llvm::tie(LoMask, HiMask) = DAG.SplitVector(Mask, DL); + std::tie(LoOp0, HiOp0) = DAG.SplitVector(Src0, DL); + std::tie(LoOp1, HiOp1) = DAG.SplitVector(Src1, DL); + std::tie(LoMask, HiMask) = DAG.SplitVector(Mask, DL); SDValue LoSelect = DAG.getNode(ISD::VSELECT, DL, LoOpVT, LoMask, LoOp0, LoOp1); @@ -1319,7 +1377,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { GetSplitVector(N->getOperand(1), Lo, Hi); EVT LoMemVT, HiMemVT; - llvm::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); unsigned IncrementSize = LoMemVT.getSizeInBits()/8; @@ -1366,8 +1424,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) { } } - return DAG.getNode(ISD::BUILD_VECTOR, DL, N->getValueType(0), - &Elts[0], Elts.size()); + return DAG.getNode(ISD::BUILD_VECTOR, DL, N->getValueType(0), Elts); } SDValue DAGTypeLegalizer::SplitVecOp_TRUNCATE(SDNode *N) { @@ -1408,7 +1465,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_TRUNCATE(SDNode *N) { // Extract the halves of the input via extract_subvector. SDValue InLoVec, InHiVec; - llvm::tie(InLoVec, InHiVec) = DAG.SplitVector(InVec, DL); + std::tie(InLoVec, InHiVec) = DAG.SplitVector(InVec, DL); // Truncate them to 1/2 the element size. EVT HalfElementVT = EVT::getIntegerVT(*DAG.getContext(), InElementSize/2); EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, @@ -1510,7 +1567,6 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::ADD: case ISD::AND: - case ISD::BSWAP: case ISD::MUL: case ISD::MULHS: case ISD::MULHU: @@ -1557,6 +1613,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { Res = WidenVecRes_Convert(N); break; + case ISD::BSWAP: case ISD::CTLZ: case ISD::CTPOP: case ISD::CTTZ: @@ -1724,8 +1781,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) { while (SubConcatEnd < OpsToConcat) SubConcatOps[SubConcatEnd++] = undefVec; ConcatOps[SubConcatIdx] = DAG.getNode(ISD::CONCAT_VECTORS, dl, - NextVT, &SubConcatOps[0], - OpsToConcat); + NextVT, SubConcatOps); ConcatEnd = SubConcatIdx + 1; } } @@ -1744,7 +1800,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) { for (unsigned j = ConcatEnd; j < NumOps; ++j) ConcatOps[j] = UndefVal; } - return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &ConcatOps[0], NumOps); + return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, + makeArrayRef(ConcatOps.data(), NumOps)); } SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { @@ -1786,8 +1843,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { SDValue UndefVal = DAG.getUNDEF(InVT); for (unsigned i = 1; i != NumConcat; ++i) Ops[i] = UndefVal; - SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT, - &Ops[0], NumConcat); + SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT, Ops); if (N->getNumOperands() == 1) return DAG.getNode(Opcode, DL, WidenVT, InVec); return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1)); @@ -1822,7 +1878,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { for (; i < WidenNumElts; ++i) Ops[i] = UndefVal; - return DAG.getNode(ISD::BUILD_VECTOR, DL, WidenVT, &Ops[0], WidenNumElts); + return DAG.getNode(ISD::BUILD_VECTOR, DL, WidenVT, Ops); } SDValue DAGTypeLegalizer::WidenVecRes_POWI(SDNode *N) { @@ -1946,11 +2002,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) { SDValue NewVec; if (InVT.isVector()) - NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, - NewInVT, &Ops[0], NewNumElts); + NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewInVT, Ops); else - NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl, - NewInVT, &Ops[0], NewNumElts); + NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl, NewInVT, Ops); return DAG.getNode(ISD::BITCAST, dl, WidenVT, NewVec); } } @@ -1975,7 +2029,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BUILD_VECTOR(SDNode *N) { assert(WidenNumElts >= NumElts && "Shrinking vector instead of widening!"); NewOps.append(WidenNumElts - NumElts, DAG.getUNDEF(EltVT)); - return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &NewOps[0], NewOps.size()); + return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, NewOps); } SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { @@ -1998,7 +2052,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { Ops[i] = N->getOperand(i); for (unsigned i = NumOperands; i != NumConcat; ++i) Ops[i] = UndefVal; - return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &Ops[0], NumConcat); + return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, Ops); } } else { InputWidened = true; @@ -2044,7 +2098,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { SDValue UndefVal = DAG.getUNDEF(EltVT); for (; Idx < WidenNumElts; ++Idx) Ops[Idx] = UndefVal; - return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts); + return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, Ops); } SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) { @@ -2089,7 +2143,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) { for (unsigned i = 1; i != NumConcat; ++i) Ops[i] = UndefVal; - InOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, InWidenVT, &Ops[0],NumConcat); + InOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, InWidenVT, Ops); return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp, SatOp, CvtCode); } @@ -2122,7 +2176,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) { for (; i < WidenNumElts; ++i) Ops[i] = UndefVal; - return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts); + return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, Ops); } SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { @@ -2161,7 +2215,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { SDValue UndefVal = DAG.getUNDEF(EltVT); for (; i < WidenNumElts; ++i) Ops[i] = UndefVal; - return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts); + return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, Ops); } SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) { @@ -2189,8 +2243,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) { if (LdChain.size() == 1) NewChain = LdChain[0]; else - NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, - &LdChain[0], LdChain.size()); + NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, LdChain); // Modified the chain - switch anything that used the old chain to use // the new one. @@ -2300,7 +2353,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) { SDValue InOp1 = N->getOperand(0); EVT InVT = InOp1.getValueType(); - assert(InVT.isVector() && "can not widen non vector type"); + assert(InVT.isVector() && "can not widen non-vector type"); EVT WidenInVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), WidenNumElts); InOp1 = GetWidenedVector(InOp1); @@ -2346,15 +2399,18 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { case ISD::STORE: Res = WidenVecOp_STORE(N); break; case ISD::SETCC: Res = WidenVecOp_SETCC(N); break; + case ISD::ANY_EXTEND: + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + Res = WidenVecOp_EXTEND(N); + break; + case ISD::FP_EXTEND: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: case ISD::TRUNCATE: - case ISD::SIGN_EXTEND: - case ISD::ZERO_EXTEND: - case ISD::ANY_EXTEND: Res = WidenVecOp_Convert(N); break; } @@ -2375,6 +2431,68 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { return false; } +SDValue DAGTypeLegalizer::WidenVecOp_EXTEND(SDNode *N) { + SDLoc DL(N); + EVT VT = N->getValueType(0); + + SDValue InOp = N->getOperand(0); + // If some legalization strategy other than widening is used on the operand, + // we can't safely assume that just extending the low lanes is the correct + // transformation. + if (getTypeAction(InOp.getValueType()) != TargetLowering::TypeWidenVector) + return WidenVecOp_Convert(N); + InOp = GetWidenedVector(InOp); + assert(VT.getVectorNumElements() < + InOp.getValueType().getVectorNumElements() && + "Input wasn't widened!"); + + // We may need to further widen the operand until it has the same total + // vector size as the result. + EVT InVT = InOp.getValueType(); + if (InVT.getSizeInBits() != VT.getSizeInBits()) { + EVT InEltVT = InVT.getVectorElementType(); + for (int i = MVT::FIRST_VECTOR_VALUETYPE, e = MVT::LAST_VECTOR_VALUETYPE; i < e; ++i) { + EVT FixedVT = (MVT::SimpleValueType)i; + EVT FixedEltVT = FixedVT.getVectorElementType(); + if (TLI.isTypeLegal(FixedVT) && + FixedVT.getSizeInBits() == VT.getSizeInBits() && + FixedEltVT == InEltVT) { + assert(FixedVT.getVectorNumElements() >= VT.getVectorNumElements() && + "Not enough elements in the fixed type for the operand!"); + assert(FixedVT.getVectorNumElements() != InVT.getVectorNumElements() && + "We can't have the same type as we started with!"); + if (FixedVT.getVectorNumElements() > InVT.getVectorNumElements()) + InOp = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, FixedVT, + DAG.getUNDEF(FixedVT), InOp, + DAG.getConstant(0, TLI.getVectorIdxTy())); + else + InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, FixedVT, InOp, + DAG.getConstant(0, TLI.getVectorIdxTy())); + break; + } + } + InVT = InOp.getValueType(); + if (InVT.getSizeInBits() != VT.getSizeInBits()) + // We couldn't find a legal vector type that was a widening of the input + // and could be extended in-register to the result type, so we have to + // scalarize. + return WidenVecOp_Convert(N); + } + + // Use special DAG nodes to represent the operation of extending the + // low lanes. + switch (N->getOpcode()) { + default: + llvm_unreachable("Extend legalization on on extend operation!"); + case ISD::ANY_EXTEND: + return DAG.getAnyExtendVectorInReg(InOp, DL, VT); + case ISD::SIGN_EXTEND: + return DAG.getSignExtendVectorInReg(InOp, DL, VT); + case ISD::ZERO_EXTEND: + return DAG.getZeroExtendVectorInReg(InOp, DL, VT); + } +} + SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { // Since the result is legal and the input is illegal, it is unlikely // that we can fix the input to a legal type so unroll the convert @@ -2396,7 +2514,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp, DAG.getConstant(i, TLI.getVectorIdxTy()))); - return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts); + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); } SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) { @@ -2445,7 +2563,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) { Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, DAG.getConstant(j, TLI.getVectorIdxTy())); } - return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts); + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); } SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N) { @@ -2474,8 +2592,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) { if (StChain.size() == 1) return StChain[0]; else - return DAG.getNode(ISD::TokenFactor, SDLoc(ST), - MVT::Other,&StChain[0],StChain.size()); + return DAG.getNode(ISD::TokenFactor, SDLoc(ST), MVT::Other, StChain); } SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) { @@ -2650,8 +2767,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, ConcatOps[0] = LdOp; for (unsigned i = 1; i != NumConcat; ++i) ConcatOps[i] = UndefVal; - return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &ConcatOps[0], - NumConcat); + return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, ConcatOps); } // Load vector by using multiple loads from largest vector to scalar @@ -2685,8 +2801,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, Loads.push_back(DAG.getUNDEF(L->getValueType(0))); size += L->getValueSizeInBits(0); } - L = DAG.getNode(ISD::CONCAT_VECTORS, dl, LdOp->getValueType(0), - &Loads[0], Loads.size()); + L = DAG.getNode(ISD::CONCAT_VECTORS, dl, LdOp->getValueType(0), Loads); } } else { L = DAG.getLoad(NewVT, dl, Chain, BasePtr, @@ -2730,7 +2845,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, if (NewLdTy != LdTy) { // Create a larger vector ConcatOps[End-1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewLdTy, - &ConcatOps[Idx], End - Idx); + makeArrayRef(&ConcatOps[Idx], End - Idx)); Idx = End - 1; LdTy = NewLdTy; } @@ -2739,7 +2854,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, if (WidenWidth == LdTy.getSizeInBits()*(End - Idx)) return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, - &ConcatOps[Idx], End - Idx); + makeArrayRef(&ConcatOps[Idx], End - Idx)); // We need to fill the rest with undefs to build the vector unsigned NumOps = WidenWidth / LdTy.getSizeInBits(); @@ -2752,7 +2867,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, for (; i != NumOps; ++i) WidenOps[i] = UndefVal; } - return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &WidenOps[0],NumOps); + return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, WidenOps); } SDValue @@ -2803,7 +2918,7 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain, for (; i != WidenNumElts; ++i) Ops[i] = UndefVal; - return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], Ops.size()); + return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, Ops); } @@ -2949,7 +3064,7 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) { for (unsigned i = 1; i != NumConcat; ++i) Ops[i] = UndefVal; - return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, &Ops[0], NumConcat); + return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, Ops); } if (WidenNumElts < InNumElts && InNumElts % WidenNumElts) @@ -2968,5 +3083,5 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) { SDValue UndefVal = DAG.getUNDEF(EltVT); for ( ; Idx < WidenNumElts; ++Idx) Ops[Idx] = UndefVal; - return DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, &Ops[0], WidenNumElts); + return DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Ops); } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp index 1dd2128..624003f 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp @@ -19,7 +19,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "scheduler" #include "llvm/CodeGen/ResourcePriorityQueue.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/SelectionDAGNodes.h" @@ -31,6 +30,8 @@ using namespace llvm; +#define DEBUG_TYPE "scheduler" + static cl::opt<bool> DisableDFASched("disable-dfa-sched", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Disable use of DFA during scheduling")); @@ -49,7 +50,7 @@ ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS) : TLI = IS->getTargetLowering(); const TargetMachine &tm = (*IS->MF).getTarget(); - ResourcesModel = tm.getInstrInfo()->CreateTargetScheduleState(&tm,NULL); + ResourcesModel = tm.getInstrInfo()->CreateTargetScheduleState(&tm,nullptr); // This hard requirement could be relaxed, but for now // do not let it procede. assert (ResourcesModel && "Unimplemented CreateTargetScheduleState."); @@ -214,7 +215,7 @@ bool resource_sort::operator()(const SUnit *LHS, const SUnit *RHS) const { /// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor /// of SU, return it, otherwise return null. SUnit *ResourcePriorityQueue::getSingleUnscheduledPred(SUnit *SU) { - SUnit *OnlyAvailablePred = 0; + SUnit *OnlyAvailablePred = nullptr; for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { SUnit &Pred = *I->getSUnit(); @@ -222,7 +223,7 @@ SUnit *ResourcePriorityQueue::getSingleUnscheduledPred(SUnit *SU) { // We found an available, but not scheduled, predecessor. If it's the // only one we have found, keep track of it... otherwise give up. if (OnlyAvailablePred && OnlyAvailablePred != &Pred) - return 0; + return nullptr; OnlyAvailablePred = &Pred; } } @@ -441,7 +442,7 @@ signed ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) { ResCount -= (regPressureDelta(SU) * ScaleTwo); } - // These are platform specific things. + // These are platform-specific things. // Will need to go into the back end // and accessed from here via a hook. for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) { @@ -581,7 +582,7 @@ void ResourcePriorityQueue::adjustPriorityOfUnscheduledPreds(SUnit *SU) { if (SU->isAvailable) return; // All preds scheduled. SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU); - if (OnlyAvailablePred == 0 || !OnlyAvailablePred->isAvailable) + if (!OnlyAvailablePred || !OnlyAvailablePred->isAvailable) return; // Okay, we found a single predecessor that is available, but not scheduled. @@ -598,12 +599,12 @@ void ResourcePriorityQueue::adjustPriorityOfUnscheduledPreds(SUnit *SU) { /// to be placed in scheduling sequence. SUnit *ResourcePriorityQueue::pop() { if (empty()) - return 0; + return nullptr; std::vector<SUnit *>::iterator Best = Queue.begin(); if (!DisableDFASched) { signed BestCost = SUSchedulingCost(*Best); - for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()), + for (std::vector<SUnit *>::iterator I = std::next(Queue.begin()), E = Queue.end(); I != E; ++I) { if (SUSchedulingCost(*I) > BestCost) { @@ -614,14 +615,14 @@ SUnit *ResourcePriorityQueue::pop() { } // Use default TD scheduling mechanism. else { - for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()), + for (std::vector<SUnit *>::iterator I = std::next(Queue.begin()), E = Queue.end(); I != E; ++I) if (Picker(*Best, *I)) Best = I; } SUnit *V = *Best; - if (Best != prior(Queue.end())) + if (Best != std::prev(Queue.end())) std::swap(*Best, Queue.back()); Queue.pop_back(); @@ -633,7 +634,7 @@ SUnit *ResourcePriorityQueue::pop() { void ResourcePriorityQueue::remove(SUnit *SU) { assert(!Queue.empty() && "Queue is empty!"); std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), SU); - if (I != prior(Queue.end())) + if (I != std::prev(Queue.end())) std::swap(*I, Queue.back()); Queue.pop_back(); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h index 4af7172..ee54292 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h @@ -15,8 +15,8 @@ #define LLVM_CODEGEN_SDNODEDBGVALUE_H #include "llvm/ADT/SmallVector.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/Support/DataTypes.h" -#include "llvm/Support/DebugLoc.h" namespace llvm { @@ -45,14 +45,17 @@ private: unsigned FrameIx; // valid for stack objects } u; MDNode *mdPtr; + bool IsIndirect; uint64_t Offset; DebugLoc DL; unsigned Order; bool Invalid; public: // Constructor for non-constants. - SDDbgValue(MDNode *mdP, SDNode *N, unsigned R, uint64_t off, DebugLoc dl, - unsigned O) : mdPtr(mdP), Offset(off), DL(dl), Order(O), + SDDbgValue(MDNode *mdP, SDNode *N, unsigned R, + bool indir, uint64_t off, DebugLoc dl, + unsigned O) : mdPtr(mdP), IsIndirect(indir), + Offset(off), DL(dl), Order(O), Invalid(false) { kind = SDNODE; u.s.Node = N; @@ -62,14 +65,16 @@ public: // Constructor for constants. SDDbgValue(MDNode *mdP, const Value *C, uint64_t off, DebugLoc dl, unsigned O) : - mdPtr(mdP), Offset(off), DL(dl), Order(O), Invalid(false) { + mdPtr(mdP), IsIndirect(false), Offset(off), DL(dl), Order(O), + Invalid(false) { kind = CONST; u.Const = C; } // Constructor for frame indices. SDDbgValue(MDNode *mdP, unsigned FI, uint64_t off, DebugLoc dl, unsigned O) : - mdPtr(mdP), Offset(off), DL(dl), Order(O), Invalid(false) { + mdPtr(mdP), IsIndirect(false), Offset(off), DL(dl), Order(O), + Invalid(false) { kind = FRAMEIX; u.FrameIx = FI; } @@ -92,6 +97,9 @@ public: // Returns the FrameIx for a stack object unsigned getFrameIx() { assert (kind==FRAMEIX); return u.FrameIx; } + // Returns whether this is an indirect value. + bool isIndirect() { return IsIndirect; } + // Returns the offset. uint64_t getOffset() { return Offset; } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index 6c5e0ab..4d8c2c7 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "pre-RA-sched" #include "llvm/CodeGen/SchedulerRegistry.h" #include "InstrEmitter.h" #include "ScheduleDAGSDNodes.h" @@ -28,6 +27,8 @@ #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; +#define DEBUG_TYPE "pre-RA-sched" + STATISTIC(NumUnfolds, "Number of nodes unfolded"); STATISTIC(NumDups, "Number of duplicated nodes"); STATISTIC(NumPRCopies, "Number of physical copies"); @@ -54,7 +55,7 @@ namespace { } SUnit *pop() { - if (empty()) return NULL; + if (empty()) return nullptr; SUnit *V = Queue.back(); Queue.pop_back(); return V; @@ -80,7 +81,7 @@ public: ScheduleDAGFast(MachineFunction &mf) : ScheduleDAGSDNodes(mf) {} - void Schedule(); + void Schedule() override; /// AddPred - adds a predecessor edge to SUnit SU. /// This returns true if this is a new predecessor. @@ -107,7 +108,7 @@ private: void ListScheduleBottomUp(); /// forceUnitLatencies - The fast scheduler doesn't care about real latencies. - bool forceUnitLatencies() const { return true; } + bool forceUnitLatencies() const override { return true; } }; } // end anonymous namespace @@ -117,11 +118,11 @@ void ScheduleDAGFast::Schedule() { DEBUG(dbgs() << "********** List Scheduling **********\n"); NumLiveRegs = 0; - LiveRegDefs.resize(TRI->getNumRegs(), NULL); + LiveRegDefs.resize(TRI->getNumRegs(), nullptr); LiveRegCycles.resize(TRI->getNumRegs(), 0); // Build the scheduling graph. - BuildSchedGraph(NULL); + BuildSchedGraph(nullptr); DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) SUnits[su].dumpAll(this)); @@ -144,7 +145,7 @@ void ScheduleDAGFast::ReleasePred(SUnit *SU, SDep *PredEdge) { dbgs() << "*** Scheduling failed! ***\n"; PredSU->dump(this); dbgs() << " has been released too many times!\n"; - llvm_unreachable(0); + llvm_unreachable(nullptr); } #endif --PredSU->NumSuccsLeft; @@ -198,7 +199,7 @@ void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { assert(LiveRegDefs[I->getReg()] == SU && "Physical register dependency violated?"); --NumLiveRegs; - LiveRegDefs[I->getReg()] = NULL; + LiveRegDefs[I->getReg()] = nullptr; LiveRegCycles[I->getReg()] = 0; } } @@ -211,18 +212,18 @@ void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { /// successors to the newly created node. SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { if (SU->getNode()->getGluedNode()) - return NULL; + return nullptr; SDNode *N = SU->getNode(); if (!N) - return NULL; + return nullptr; SUnit *NewSU; bool TryUnfold = false; for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { EVT VT = N->getValueType(i); if (VT == MVT::Glue) - return NULL; + return nullptr; else if (VT == MVT::Other) TryUnfold = true; } @@ -230,13 +231,13 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { const SDValue &Op = N->getOperand(i); EVT VT = Op.getNode()->getValueType(Op.getResNo()); if (VT == MVT::Glue) - return NULL; + return nullptr; } if (TryUnfold) { SmallVector<SDNode*, 2> NewNodes; if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) - return NULL; + return nullptr; DEBUG(dbgs() << "Unfolding SU # " << SU->NodeNum << "\n"); assert(NewNodes.size() == 2 && "Expected a load folding node!"); @@ -388,11 +389,11 @@ void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, const TargetRegisterClass *DestRC, const TargetRegisterClass *SrcRC, SmallVectorImpl<SUnit*> &Copies) { - SUnit *CopyFromSU = newSUnit(static_cast<SDNode *>(NULL)); + SUnit *CopyFromSU = newSUnit(static_cast<SDNode *>(nullptr)); CopyFromSU->CopySrcRC = SrcRC; CopyFromSU->CopyDstRC = DestRC; - SUnit *CopyToSU = newSUnit(static_cast<SDNode *>(NULL)); + SUnit *CopyToSU = newSUnit(static_cast<SDNode *>(nullptr)); CopyToSU->CopySrcRC = DestRC; CopyToSU->CopyDstRC = SrcRC; @@ -583,7 +584,7 @@ void ScheduleDAGFast::ListScheduleBottomUp() { // and it is expensive. // If cross copy register class is null, then it's not possible to copy // the value at all. - SUnit *NewDef = 0; + SUnit *NewDef = nullptr; if (DestRC != RC) { NewDef = CopyAndMoveSuccessors(LRDef); if (!DestRC && !NewDef) @@ -646,9 +647,10 @@ class ScheduleDAGLinearize : public ScheduleDAGSDNodes { public: ScheduleDAGLinearize(MachineFunction &mf) : ScheduleDAGSDNodes(mf) {} - void Schedule(); + void Schedule() override; - MachineBasicBlock *EmitSchedule(MachineBasicBlock::iterator &InsertPos); + MachineBasicBlock * + EmitSchedule(MachineBasicBlock::iterator &InsertPos) override; private: std::vector<SDNode*> Sequence; @@ -660,7 +662,7 @@ private: void ScheduleDAGLinearize::ScheduleNode(SDNode *N) { if (N->getNodeId() != 0) - llvm_unreachable(0); + llvm_unreachable(nullptr); if (!N->isMachineOpcode() && (N->getOpcode() == ISD::EntryToken || isPassiveNode(N))) @@ -673,7 +675,7 @@ void ScheduleDAGLinearize::ScheduleNode(SDNode *N) { unsigned NumOps = N->getNumOperands(); if (unsigned NumLeft = NumOps) { - SDNode *GluedOpN = 0; + SDNode *GluedOpN = nullptr; do { const SDValue &Op = N->getOperand(NumLeft-1); SDNode *OpN = Op.getNode(); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 1a562d7..dedca41 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -15,7 +15,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "pre-RA-sched" #include "llvm/CodeGen/SchedulerRegistry.h" #include "ScheduleDAGSDNodes.h" #include "llvm/ADT/STLExtras.h" @@ -36,6 +35,8 @@ #include <climits> using namespace llvm; +#define DEBUG_TYPE "pre-RA-sched" + STATISTIC(NumBacktracks, "Number of times scheduler backtracked"); STATISTIC(NumUnfolds, "Number of nodes unfolded"); STATISTIC(NumDups, "Number of duplicated nodes"); @@ -163,13 +164,14 @@ public: CodeGenOpt::Level OptLevel) : ScheduleDAGSDNodes(mf), NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0), - Topo(SUnits, NULL) { + Topo(SUnits, nullptr) { const TargetMachine &tm = mf.getTarget(); if (DisableSchedCycles || !NeedLatency) HazardRec = new ScheduleHazardRecognizer(); else - HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this); + HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer( + tm.getSubtargetImpl(), this); } ~ScheduleDAGRRList() { @@ -177,7 +179,7 @@ public: delete AvailableQueue; } - void Schedule(); + void Schedule() override; ScheduleHazardRecognizer *getHazardRec() { return HazardRec; } @@ -261,7 +263,7 @@ private: /// forceUnitLatencies - Register-pressure-reducing scheduling doesn't /// need actual latency information but the hybrid scheduler does. - bool forceUnitLatencies() const { + bool forceUnitLatencies() const override { return !NeedLatency; } }; @@ -327,13 +329,13 @@ void ScheduleDAGRRList::Schedule() { NumLiveRegs = 0; // Allocate slots for each physical register, plus one for a special register // to track the virtual resource of a calling sequence. - LiveRegDefs.resize(TRI->getNumRegs() + 1, NULL); - LiveRegGens.resize(TRI->getNumRegs() + 1, NULL); + LiveRegDefs.resize(TRI->getNumRegs() + 1, nullptr); + LiveRegGens.resize(TRI->getNumRegs() + 1, nullptr); CallSeqEndForStart.clear(); assert(Interferences.empty() && LRegsMap.empty() && "stale Interferences"); // Build the scheduling graph. - BuildSchedGraph(NULL); + BuildSchedGraph(nullptr); DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) SUnits[su].dumpAll(this)); @@ -369,7 +371,7 @@ void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { dbgs() << "*** Scheduling failed! ***\n"; PredSU->dump(this); dbgs() << " has been released too many times!\n"; - llvm_unreachable(0); + llvm_unreachable(nullptr); } #endif --PredSU->NumSuccsLeft; @@ -461,7 +463,7 @@ FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest, // to get to the CALLSEQ_BEGIN, but we need to find the path with the // most nesting in order to ensure that we find the corresponding match. if (N->getOpcode() == ISD::TokenFactor) { - SDNode *Best = 0; + SDNode *Best = nullptr; unsigned BestMaxNest = MaxNest; for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { unsigned MyNestLevel = NestLevel; @@ -497,10 +499,10 @@ FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest, N = N->getOperand(i).getNode(); goto found_chain_operand; } - return 0; + return nullptr; found_chain_operand:; if (N->getOpcode() == ISD::EntryToken) - return 0; + return nullptr; } } @@ -742,8 +744,8 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] == SU) { assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); --NumLiveRegs; - LiveRegDefs[I->getReg()] = NULL; - LiveRegGens[I->getReg()] = NULL; + LiveRegDefs[I->getReg()] = nullptr; + LiveRegGens[I->getReg()] = nullptr; releaseInterferences(I->getReg()); } } @@ -757,8 +759,8 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) { assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); --NumLiveRegs; - LiveRegDefs[CallResource] = NULL; - LiveRegGens[CallResource] = NULL; + LiveRegDefs[CallResource] = nullptr; + LiveRegGens[CallResource] = nullptr; releaseInterferences(CallResource); } } @@ -813,8 +815,8 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { assert(LiveRegDefs[I->getReg()] == I->getSUnit() && "Physical register dependency violated?"); --NumLiveRegs; - LiveRegDefs[I->getReg()] = NULL; - LiveRegGens[I->getReg()] = NULL; + LiveRegDefs[I->getReg()] = nullptr; + LiveRegGens[I->getReg()] = nullptr; releaseInterferences(I->getReg()); } } @@ -841,8 +843,8 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) { assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); --NumLiveRegs; - LiveRegDefs[CallResource] = NULL; - LiveRegGens[CallResource] = NULL; + LiveRegDefs[CallResource] = nullptr; + LiveRegGens[CallResource] = nullptr; releaseInterferences(CallResource); } } @@ -855,7 +857,7 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { // This becomes the nearest def. Note that an earlier def may still be // pending if this is a two-address node. LiveRegDefs[I->getReg()] = SU; - if (LiveRegGens[I->getReg()] == NULL || + if (LiveRegGens[I->getReg()] == nullptr || I->getSUnit()->getHeight() < LiveRegGens[I->getReg()]->getHeight()) LiveRegGens[I->getReg()] = I->getSUnit(); } @@ -936,17 +938,17 @@ static bool isOperandOf(const SUnit *SU, SDNode *N) { SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { SDNode *N = SU->getNode(); if (!N) - return NULL; + return nullptr; if (SU->getNode()->getGluedNode()) - return NULL; + return nullptr; SUnit *NewSU; bool TryUnfold = false; for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { EVT VT = N->getValueType(i); if (VT == MVT::Glue) - return NULL; + return nullptr; else if (VT == MVT::Other) TryUnfold = true; } @@ -954,18 +956,18 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { const SDValue &Op = N->getOperand(i); EVT VT = Op.getNode()->getValueType(Op.getResNo()); if (VT == MVT::Glue) - return NULL; + return nullptr; } if (TryUnfold) { SmallVector<SDNode*, 2> NewNodes; if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) - return NULL; + return nullptr; // unfolding an x86 DEC64m operation results in store, dec, load which // can't be handled here so quit if (NewNodes.size() == 3) - return NULL; + return nullptr; DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n"); assert(NewNodes.size() == 2 && "Expected a load folding node!"); @@ -1136,11 +1138,11 @@ void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, const TargetRegisterClass *DestRC, const TargetRegisterClass *SrcRC, SmallVectorImpl<SUnit*> &Copies) { - SUnit *CopyFromSU = CreateNewSUnit(NULL); + SUnit *CopyFromSU = CreateNewSUnit(nullptr); CopyFromSU->CopySrcRC = SrcRC; CopyFromSU->CopyDstRC = DestRC; - SUnit *CopyToSU = CreateNewSUnit(NULL); + SUnit *CopyToSU = CreateNewSUnit(nullptr); CopyToSU->CopySrcRC = DestRC; CopyToSU->CopyDstRC = SrcRC; @@ -1244,7 +1246,7 @@ static const uint32_t *getNodeRegMask(const SDNode *N) { if (const RegisterMaskSDNode *Op = dyn_cast<RegisterMaskSDNode>(N->getOperand(i).getNode())) return Op->getRegMask(); - return NULL; + return nullptr; } /// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay @@ -1355,7 +1357,7 @@ void ScheduleDAGRRList::releaseInterferences(unsigned Reg) { /// (2) No Hazards: resources are available /// (3) No Interferences: may unschedule to break register interferences. SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { - SUnit *CurSU = AvailableQueue->empty() ? 0 : AvailableQueue->pop(); + SUnit *CurSU = AvailableQueue->empty() ? nullptr : AvailableQueue->pop(); while (CurSU) { SmallVector<unsigned, 4> LRegs; if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) @@ -1371,7 +1373,7 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { Interferences.push_back(CurSU); } else { - assert(CurSU->isPending && "Intereferences are pending"); + assert(CurSU->isPending && "Interferences are pending"); // Update the interference with current live regs. LRegsPair.first->second = LRegs; } @@ -1389,7 +1391,7 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { // Try unscheduling up to the point where it's safe to schedule // this node. - SUnit *BtSU = NULL; + SUnit *BtSU = nullptr; unsigned LiveCycle = UINT_MAX; for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) { unsigned Reg = LRegs[j]; @@ -1449,7 +1451,7 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { // expensive. // If cross copy register class is null, then it's not possible to copy // the value at all. - SUnit *NewDef = 0; + SUnit *NewDef = nullptr; if (DestRC != RC) { NewDef = CopyAndMoveSuccessors(LRDef); if (!DestRC && !NewDef) @@ -1539,7 +1541,6 @@ template<class SF> struct reverse_sort : public queue_sort { SF &SortFunc; reverse_sort(SF &sf) : SortFunc(sf) {} - reverse_sort(const reverse_sort &RHS) : SortFunc(RHS.SortFunc) {} bool operator()(SUnit* left, SUnit* right) const { // reverse left/right rather than simply !SortFunc(left, right) @@ -1559,7 +1560,6 @@ struct bu_ls_rr_sort : public queue_sort { RegReductionPQBase *SPQ; bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} - bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} bool operator()(SUnit* left, SUnit* right) const; }; @@ -1574,8 +1574,6 @@ struct src_ls_rr_sort : public queue_sort { RegReductionPQBase *SPQ; src_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} - src_ls_rr_sort(const src_ls_rr_sort &RHS) - : SPQ(RHS.SPQ) {} bool operator()(SUnit* left, SUnit* right) const; }; @@ -1590,8 +1588,6 @@ struct hybrid_ls_rr_sort : public queue_sort { RegReductionPQBase *SPQ; hybrid_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} - hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS) - : SPQ(RHS.SPQ) {} bool isReady(SUnit *SU, unsigned CurCycle) const; @@ -1609,8 +1605,6 @@ struct ilp_ls_rr_sort : public queue_sort { RegReductionPQBase *SPQ; ilp_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} - ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS) - : SPQ(RHS.SPQ) {} bool isReady(SUnit *SU, unsigned CurCycle) const; @@ -1654,7 +1648,7 @@ public: const TargetLowering *tli) : SchedulingPriorityQueue(hasReadyFilter), CurQueueId(0), TracksRegPressure(tracksrp), SrcOrder(srcorder), - MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) { + MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(nullptr) { if (TracksRegPressure) { unsigned NumRC = TRI->getNumRegClasses(); RegLimit.resize(NumRC); @@ -1675,14 +1669,14 @@ public: return scheduleDAG->getHazardRec(); } - void initNodes(std::vector<SUnit> &sunits); + void initNodes(std::vector<SUnit> &sunits) override; - void addNode(const SUnit *SU); + void addNode(const SUnit *SU) override; - void updateNode(const SUnit *SU); + void updateNode(const SUnit *SU) override; - void releaseState() { - SUnits = 0; + void releaseState() override { + SUnits = nullptr; SethiUllmanNumbers.clear(); std::fill(RegPressure.begin(), RegPressure.end(), 0); } @@ -1695,26 +1689,26 @@ public: return SU->getNode()->getIROrder(); } - bool empty() const { return Queue.empty(); } + bool empty() const override { return Queue.empty(); } - void push(SUnit *U) { + void push(SUnit *U) override { assert(!U->NodeQueueId && "Node in the queue already"); U->NodeQueueId = ++CurQueueId; Queue.push_back(U); } - void remove(SUnit *SU) { + void remove(SUnit *SU) override { assert(!Queue.empty() && "Queue is empty!"); assert(SU->NodeQueueId != 0 && "Not in queue!"); std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), SU); - if (I != prior(Queue.end())) + if (I != std::prev(Queue.end())) std::swap(*I, Queue.back()); Queue.pop_back(); SU->NodeQueueId = 0; } - bool tracksRegPressure() const { return TracksRegPressure; } + bool tracksRegPressure() const override { return TracksRegPressure; } void dumpRegPressure() const; @@ -1724,9 +1718,9 @@ public: int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const; - void scheduledNode(SUnit *SU); + void scheduledNode(SUnit *SU) override; - void unscheduledNode(SUnit *SU); + void unscheduledNode(SUnit *SU) override; protected: bool canClobber(const SUnit *SU, const SUnit *Op); @@ -1738,12 +1732,12 @@ protected: template<class SF> static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) { std::vector<SUnit *>::iterator Best = Q.begin(); - for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()), + for (std::vector<SUnit *>::iterator I = std::next(Q.begin()), E = Q.end(); I != E; ++I) if (Picker(*Best, *I)) Best = I; SUnit *V = *Best; - if (Best != prior(Q.end())) + if (Best != std::prev(Q.end())) std::swap(*Best, Q.back()); Q.pop_back(); return V; @@ -1776,14 +1770,14 @@ public: tii, tri, tli), Picker(this) {} - bool isBottomUp() const { return SF::IsBottomUp; } + bool isBottomUp() const override { return SF::IsBottomUp; } - bool isReady(SUnit *U) const { + bool isReady(SUnit *U) const override { return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle()); } - SUnit *pop() { - if (Queue.empty()) return NULL; + SUnit *pop() override { + if (Queue.empty()) return nullptr; SUnit *V = popFromQueue(Queue, Picker, scheduleDAG); V->NodeQueueId = 0; @@ -1791,7 +1785,7 @@ public: } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - void dump(ScheduleDAG *DAG) const { + void dump(ScheduleDAG *DAG) const override { // Emulate pop() without clobbering NodeQueueIds. std::vector<SUnit*> DumpQueue = Queue; SF DumpPicker = Picker; @@ -2832,7 +2826,7 @@ void RegReductionPQBase::PrescheduleNodesWithMultipleUses() { continue; // Locate the single data predecessor. - SUnit *PredSU = 0; + SUnit *PredSU = nullptr; for (SUnit::const_pred_iterator II = SU->Preds.begin(), EE = SU->Preds.end(); II != EE; ++II) if (!II->isCtrl()) { @@ -2988,7 +2982,7 @@ llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, const TargetRegisterInfo *TRI = TM.getRegisterInfo(); BURegReductionPriorityQueue *PQ = - new BURegReductionPriorityQueue(*IS->MF, false, false, TII, TRI, 0); + new BURegReductionPriorityQueue(*IS->MF, false, false, TII, TRI, nullptr); ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); PQ->setScheduleDAG(SD); return SD; @@ -3002,7 +2996,7 @@ llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, const TargetRegisterInfo *TRI = TM.getRegisterInfo(); SrcRegReductionPriorityQueue *PQ = - new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, 0); + new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, nullptr); ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); PQ->setScheduleDAG(SD); return SD; diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index c1893c9..de910b7 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "pre-RA-sched" #include "ScheduleDAGSDNodes.h" #include "InstrEmitter.h" #include "SDNodeDbgValue.h" @@ -35,6 +34,8 @@ #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; +#define DEBUG_TYPE "pre-RA-sched" + STATISTIC(LoadsClustered, "Number of loads clustered together"); // This allows latency based scheduler to notice high latency instructions @@ -46,7 +47,7 @@ static cl::opt<int> HighLatencyCycles( "instructions take for targets with no itinerary")); ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf) - : ScheduleDAG(mf), BB(0), DAG(0), + : ScheduleDAG(mf), BB(nullptr), DAG(nullptr), InstrItins(mf.getTarget().getInstrItineraryData()) {} /// Run - perform scheduling. @@ -67,12 +68,12 @@ void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb) { /// SUnit *ScheduleDAGSDNodes::newSUnit(SDNode *N) { #ifndef NDEBUG - const SUnit *Addr = 0; + const SUnit *Addr = nullptr; if (!SUnits.empty()) Addr = &SUnits[0]; #endif SUnits.push_back(SUnit(N, (unsigned)SUnits.size())); - assert((Addr == 0 || Addr == &SUnits[0]) && + assert((Addr == nullptr || Addr == &SUnits[0]) && "SUnits std::vector reallocated on the fly!"); SUnits.back().OrigNode = &SUnits.back(); SUnit *SU = &SUnits.back(); @@ -142,8 +143,8 @@ static void CloneNodeWithValues(SDNode *N, SelectionDAG *DAG, if (ExtraOper.getNode()) Ops.push_back(ExtraOper); - SDVTList VTList = DAG->getVTList(&VTs[0], VTs.size()); - MachineSDNode::mmo_iterator Begin = 0, End = 0; + SDVTList VTList = DAG->getVTList(VTs); + MachineSDNode::mmo_iterator Begin = nullptr, End = nullptr; MachineSDNode *MN = dyn_cast<MachineSDNode>(N); // Store memory references. @@ -152,7 +153,7 @@ static void CloneNodeWithValues(SDNode *N, SelectionDAG *DAG, End = MN->memoperands_end(); } - DAG->MorphNodeTo(N, N->getOpcode(), VTList, &Ops[0], Ops.size()); + DAG->MorphNodeTo(N, N->getOpcode(), VTList, Ops); // Reset the memory references if (MN) @@ -205,7 +206,7 @@ static void RemoveUnusedGlue(SDNode *N, SelectionDAG *DAG) { /// outputs to ensure they are scheduled together and in order. This /// optimization may benefit some targets by improving cache locality. void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) { - SDNode *Chain = 0; + SDNode *Chain = nullptr; unsigned NumOps = Node->getNumOperands(); if (Node->getOperand(NumOps-1).getValueType() == MVT::Other) Chain = Node->getOperand(NumOps-1).getNode(); @@ -271,7 +272,7 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) { // Cluster loads by adding MVT::Glue outputs and inputs. This also // ensure they are scheduled in order of increasing addresses. SDNode *Lead = Loads[0]; - SDValue InGlue = SDValue(0, 0); + SDValue InGlue = SDValue(nullptr, 0); if (AddGlue(Lead, InGlue, true, DAG)) InGlue = SDValue(Lead, Lead->getNumValues() - 1); for (unsigned I = 1, E = Loads.size(); I != E; ++I) { @@ -572,7 +573,7 @@ void ScheduleDAGSDNodes::RegDefIter::Advance() { return; // Found a normal regdef. } Node = Node->getGluedNode(); - if (Node == NULL) { + if (!Node) { return; // No values left to visit. } InitNodeNumDefs(); @@ -743,13 +744,13 @@ ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, if (Emitter.getInsertPos() == BB->begin() || BB->back().isPHI() || // Fast-isel may have inserted some instructions, in which case the // BB->back().isPHI() test will not fire when we want it to. - prior(Emitter.getInsertPos())->isPHI()) { + std::prev(Emitter.getInsertPos())->isPHI()) { // Did not insert any instruction. - Orders.push_back(std::make_pair(Order, (MachineInstr*)0)); + Orders.push_back(std::make_pair(Order, (MachineInstr*)nullptr)); return; } - Orders.push_back(std::make_pair(Order, prior(Emitter.getInsertPos()))); + Orders.push_back(std::make_pair(Order, std::prev(Emitter.getInsertPos()))); ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, Order); } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index 2ff37e0..39ebadf 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -117,13 +117,13 @@ namespace llvm { virtual MachineBasicBlock* EmitSchedule(MachineBasicBlock::iterator &InsertPos); - virtual void dumpNode(const SUnit *SU) const; + void dumpNode(const SUnit *SU) const override; void dumpSchedule() const; - virtual std::string getGraphNodeLabel(const SUnit *SU) const; + std::string getGraphNodeLabel(const SUnit *SU) const override; - virtual std::string getDAGName() const; + std::string getDAGName() const override; virtual void getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const; @@ -139,7 +139,7 @@ namespace llvm { public: RegDefIter(const SUnit *SU, const ScheduleDAGSDNodes *SD); - bool IsValid() const { return Node != NULL; } + bool IsValid() const { return Node != nullptr; } MVT GetValue() const { assert(IsValid() && "bad iterator"); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp index 58aa1fe..4589b0c 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp @@ -18,7 +18,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "pre-RA-sched" #include "llvm/CodeGen/SchedulerRegistry.h" #include "ScheduleDAGSDNodes.h" #include "llvm/ADT/Statistic.h" @@ -35,6 +34,8 @@ #include <climits> using namespace llvm; +#define DEBUG_TYPE "pre-RA-sched" + STATISTIC(NumNoops , "Number of noops inserted"); STATISTIC(NumStalls, "Number of pipeline stalls"); @@ -72,7 +73,8 @@ public: : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue), AA(aa) { const TargetMachine &tm = mf.getTarget(); - HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this); + HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer( + tm.getSubtargetImpl(), this); } ~ScheduleDAGVLIW() { @@ -80,7 +82,7 @@ public: delete AvailableQueue; } - void Schedule(); + void Schedule() override; private: void releaseSucc(SUnit *SU, const SDep &D); @@ -120,7 +122,7 @@ void ScheduleDAGVLIW::releaseSucc(SUnit *SU, const SDep &D) { dbgs() << "*** Scheduling failed! ***\n"; SuccSU->dump(this); dbgs() << " has been released too many times!\n"; - llvm_unreachable(0); + llvm_unreachable(nullptr); } #endif assert(!D.isWeak() && "unexpected artificial DAG edge"); @@ -204,12 +206,12 @@ void ScheduleDAGVLIW::listScheduleTopDown() { // don't advance the hazard recognizer. if (AvailableQueue->empty()) { // Reset DFA state. - AvailableQueue->scheduledNode(0); + AvailableQueue->scheduledNode(nullptr); ++CurCycle; continue; } - SUnit *FoundSUnit = 0; + SUnit *FoundSUnit = nullptr; bool HasNoopHazards = false; while (!AvailableQueue->empty()) { @@ -256,7 +258,7 @@ void ScheduleDAGVLIW::listScheduleTopDown() { // processors without pipeline interlocks and other cases. DEBUG(dbgs() << "*** Emitting noop\n"); HazardRec->EmitNoop(); - Sequence.push_back(0); // NULL here means noop + Sequence.push_back(nullptr); // NULL here means noop ++NumNoops; ++CurCycle; } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 00ffe00..16f7349 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -18,17 +18,15 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/DebugInfo.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalAlias.h" @@ -50,6 +48,7 @@ #include "llvm/Target/TargetSelectionDAGInfo.h" #include <algorithm> #include <cmath> + using namespace llvm; /// makeVTList - Return an instance of the SDVTList struct initialized with the @@ -149,33 +148,50 @@ bool ISD::isBuildVectorAllZeros(const SDNode *N) { if (N->getOpcode() != ISD::BUILD_VECTOR) return false; - unsigned i = 0, e = N->getNumOperands(); - - // Skip over all of the undef values. - while (i != e && N->getOperand(i).getOpcode() == ISD::UNDEF) - ++i; + bool IsAllUndef = true; + for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) { + if (N->getOperand(i).getOpcode() == ISD::UNDEF) + continue; + IsAllUndef = false; + // Do not accept build_vectors that aren't all constants or which have non-0 + // elements. We have to be a bit careful here, as the type of the constant + // may not be the same as the type of the vector elements due to type + // legalization (the elements are promoted to a legal type for the target + // and a vector of a type may be legal when the base element type is not). + // We only want to check enough bits to cover the vector elements, because + // we care if the resultant vector is all zeros, not whether the individual + // constants are. + SDValue Zero = N->getOperand(i); + unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits(); + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Zero)) { + if (CN->getAPIntValue().countTrailingZeros() < EltSize) + return false; + } else if (ConstantFPSDNode *CFPN = dyn_cast<ConstantFPSDNode>(Zero)) { + if (CFPN->getValueAPF().bitcastToAPInt().countTrailingZeros() < EltSize) + return false; + } else + return false; + } // Do not accept an all-undef vector. - if (i == e) return false; + if (IsAllUndef) + return false; + return true; +} - // Do not accept build_vectors that aren't all constants or which have non-0 - // elements. - SDValue Zero = N->getOperand(i); - if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Zero)) { - if (!CN->isNullValue()) - return false; - } else if (ConstantFPSDNode *CFPN = dyn_cast<ConstantFPSDNode>(Zero)) { - if (!CFPN->getValueAPF().isPosZero()) - return false; - } else +/// \brief Return true if the specified node is a BUILD_VECTOR node of +/// all ConstantSDNode or undef. +bool ISD::isBuildVectorOfConstantSDNodes(const SDNode *N) { + if (N->getOpcode() != ISD::BUILD_VECTOR) return false; - // Okay, we have at least one 0 value, check to see if the rest match or are - // undefs. - for (++i; i != e; ++i) - if (N->getOperand(i) != Zero && - N->getOperand(i).getOpcode() != ISD::UNDEF) + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + SDValue Op = N->getOperand(i); + if (Op.getOpcode() == ISD::UNDEF) + continue; + if (!isa<ConstantSDNode>(Op)) return false; + } return true; } @@ -217,6 +233,21 @@ bool ISD::allOperandsUndef(const SDNode *N) { return true; } +ISD::NodeType ISD::getExtForLoadExtType(ISD::LoadExtType ExtType) { + switch (ExtType) { + case ISD::EXTLOAD: + return ISD::ANY_EXTEND; + case ISD::SEXTLOAD: + return ISD::SIGN_EXTEND; + case ISD::ZEXTLOAD: + return ISD::ZERO_EXTEND; + default: + break; + } + + llvm_unreachable("Invalid LoadExtType"); +} + /// getSetCCSwappedOperands - Return the operation corresponding to (Y op X) /// when given the operation for (X op Y). ISD::CondCode ISD::getSetCCSwappedOperands(ISD::CondCode Operation) { @@ -335,29 +366,42 @@ static void AddNodeIDValueTypes(FoldingSetNodeID &ID, SDVTList VTList) { /// AddNodeIDOperands - Various routines for adding operands to the NodeID data. /// static void AddNodeIDOperands(FoldingSetNodeID &ID, - const SDValue *Ops, unsigned NumOps) { - for (; NumOps; --NumOps, ++Ops) { - ID.AddPointer(Ops->getNode()); - ID.AddInteger(Ops->getResNo()); + ArrayRef<SDValue> Ops) { + for (auto& Op : Ops) { + ID.AddPointer(Op.getNode()); + ID.AddInteger(Op.getResNo()); } } /// AddNodeIDOperands - Various routines for adding operands to the NodeID data. /// static void AddNodeIDOperands(FoldingSetNodeID &ID, - const SDUse *Ops, unsigned NumOps) { - for (; NumOps; --NumOps, ++Ops) { - ID.AddPointer(Ops->getNode()); - ID.AddInteger(Ops->getResNo()); + ArrayRef<SDUse> Ops) { + for (auto& Op : Ops) { + ID.AddPointer(Op.getNode()); + ID.AddInteger(Op.getResNo()); } } -static void AddNodeIDNode(FoldingSetNodeID &ID, - unsigned short OpC, SDVTList VTList, - const SDValue *OpList, unsigned N) { +static void AddBinaryNodeIDCustom(FoldingSetNodeID &ID, bool nuw, bool nsw, + bool exact) { + ID.AddBoolean(nuw); + ID.AddBoolean(nsw); + ID.AddBoolean(exact); +} + +/// AddBinaryNodeIDCustom - Add BinarySDNodes special infos +static void AddBinaryNodeIDCustom(FoldingSetNodeID &ID, unsigned Opcode, + bool nuw, bool nsw, bool exact) { + if (isBinOpWithFlags(Opcode)) + AddBinaryNodeIDCustom(ID, nuw, nsw, exact); +} + +static void AddNodeIDNode(FoldingSetNodeID &ID, unsigned short OpC, + SDVTList VTList, ArrayRef<SDValue> OpList) { AddNodeIDOpcode(ID, OpC); AddNodeIDValueTypes(ID, VTList); - AddNodeIDOperands(ID, OpList, N); + AddNodeIDOperands(ID, OpList); } /// AddNodeIDCustom - If this is an SDNode with special info, add this info to @@ -369,9 +413,12 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { llvm_unreachable("Should only be used on nodes with operands"); default: break; // Normal nodes don't need extra info. case ISD::TargetConstant: - case ISD::Constant: - ID.AddPointer(cast<ConstantSDNode>(N)->getConstantIntValue()); + case ISD::Constant: { + const ConstantSDNode *C = cast<ConstantSDNode>(N); + ID.AddPointer(C->getConstantIntValue()); + ID.AddBoolean(C->isOpaque()); break; + } case ISD::TargetConstantFP: case ISD::ConstantFP: { ID.AddPointer(cast<ConstantFPSDNode>(N)->getConstantFPValue()); @@ -442,7 +489,21 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { ID.AddInteger(ST->getPointerInfo().getAddrSpace()); break; } + case ISD::SDIV: + case ISD::UDIV: + case ISD::SRA: + case ISD::SRL: + case ISD::MUL: + case ISD::ADD: + case ISD::SUB: + case ISD::SHL: { + const BinaryWithFlagsSDNode *BinNode = cast<BinaryWithFlagsSDNode>(N); + AddBinaryNodeIDCustom(ID, N->getOpcode(), BinNode->hasNoUnsignedWrap(), + BinNode->hasNoSignedWrap(), BinNode->isExact()); + break; + } case ISD::ATOMIC_CMP_SWAP: + case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: case ISD::ATOMIC_SWAP: case ISD::ATOMIC_LOAD_ADD: case ISD::ATOMIC_LOAD_SUB: @@ -496,7 +557,7 @@ static void AddNodeIDNode(FoldingSetNodeID &ID, const SDNode *N) { // Add the return value info. AddNodeIDValueTypes(ID, N->getVTList()); // Add the operand info. - AddNodeIDOperands(ID, N->op_begin(), N->getNumOperands()); + AddNodeIDOperands(ID, N->ops()); // Handle SDNode leafs with special info. AddNodeIDCustom(ID, N); @@ -574,7 +635,7 @@ void SelectionDAG::RemoveDeadNodes(SmallVectorImpl<SDNode *> &DeadNodes) { SDNode *N = DeadNodes.pop_back_val(); for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next) - DUL->NodeDeleted(N, 0); + DUL->NodeDeleted(N, nullptr); // Take the node out of the appropriate CSE map. RemoveNodeFromCSEMaps(N); @@ -660,8 +721,8 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) { case ISD::CONDCODE: assert(CondCodeNodes[cast<CondCodeSDNode>(N)->get()] && "Cond code doesn't exist!"); - Erased = CondCodeNodes[cast<CondCodeSDNode>(N)->get()] != 0; - CondCodeNodes[cast<CondCodeSDNode>(N)->get()] = 0; + Erased = CondCodeNodes[cast<CondCodeSDNode>(N)->get()] != nullptr; + CondCodeNodes[cast<CondCodeSDNode>(N)->get()] = nullptr; break; case ISD::ExternalSymbol: Erased = ExternalSymbols.erase(cast<ExternalSymbolSDNode>(N)->getSymbol()); @@ -678,8 +739,8 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) { if (VT.isExtended()) { Erased = ExtendedValueTypeNodes.erase(VT); } else { - Erased = ValueTypeNodes[VT.getSimpleVT().SimpleTy] != 0; - ValueTypeNodes[VT.getSimpleVT().SimpleTy] = 0; + Erased = ValueTypeNodes[VT.getSimpleVT().SimpleTy] != nullptr; + ValueTypeNodes[VT.getSimpleVT().SimpleTy] = nullptr; } break; } @@ -741,11 +802,11 @@ SelectionDAG::AddModifiedNodeToCSEMaps(SDNode *N) { SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, SDValue Op, void *&InsertPos) { if (doNotCSE(N)) - return 0; + return nullptr; SDValue Ops[] = { Op }; FoldingSetNodeID ID; - AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, 1); + AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops); AddNodeIDCustom(ID, N); SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos); return Node; @@ -759,11 +820,11 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, SDValue Op1, SDValue Op2, void *&InsertPos) { if (doNotCSE(N)) - return 0; + return nullptr; SDValue Ops[] = { Op1, Op2 }; FoldingSetNodeID ID; - AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, 2); + AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops); AddNodeIDCustom(ID, N); SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos); return Node; @@ -774,14 +835,13 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, /// were replaced with those specified. If this node is never memoized, /// return null, otherwise return a pointer to the slot it would take. If a /// node already exists with these operands, the slot will be non-null. -SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, - const SDValue *Ops,unsigned NumOps, +SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, ArrayRef<SDValue> Ops, void *&InsertPos) { if (doNotCSE(N)) - return 0; + return nullptr; FoldingSetNodeID ID; - AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, NumOps); + AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops); AddNodeIDCustom(ID, N); SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos); return Node; @@ -877,18 +937,16 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const { // EntryNode could meaningfully have debug info if we can find it... SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL) - : TM(tm), TSI(*tm.getSelectionDAGInfo()), TTI(0), TLI(0), OptLevel(OL), + : TM(tm), TSI(*tm.getSelectionDAGInfo()), TLI(nullptr), OptLevel(OL), EntryNode(ISD::EntryToken, 0, DebugLoc(), getVTList(MVT::Other)), Root(getEntryNode()), NewNodesMustHaveLegalTypes(false), - UpdateListeners(0) { + UpdateListeners(nullptr) { AllNodes.push_back(&EntryNode); DbgInfo = new SDDbgInfo(); } -void SelectionDAG::init(MachineFunction &mf, const TargetTransformInfo *tti, - const TargetLowering *tli) { +void SelectionDAG::init(MachineFunction &mf, const TargetLowering *tli) { MF = &mf; - TTI = tti; TLI = tli; Context = &mf.getFunction()->getContext(); } @@ -906,6 +964,25 @@ void SelectionDAG::allnodes_clear() { DeallocateNode(AllNodes.begin()); } +BinarySDNode *SelectionDAG::GetBinarySDNode(unsigned Opcode, SDLoc DL, + SDVTList VTs, SDValue N1, + SDValue N2, bool nuw, bool nsw, + bool exact) { + if (isBinOpWithFlags(Opcode)) { + BinaryWithFlagsSDNode *FN = new (NodeAllocator) BinaryWithFlagsSDNode( + Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, N1, N2); + FN->setHasNoUnsignedWrap(nuw); + FN->setHasNoSignedWrap(nsw); + FN->setIsExact(exact); + + return FN; + } + + BinarySDNode *N = new (NodeAllocator) + BinarySDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, N1, N2); + return N; +} + void SelectionDAG::clear() { allnodes_clear(); OperandAllocator.Reset(); @@ -915,11 +992,11 @@ void SelectionDAG::clear() { ExternalSymbols.clear(); TargetExternalSymbols.clear(); std::fill(CondCodeNodes.begin(), CondCodeNodes.end(), - static_cast<CondCodeSDNode*>(0)); + static_cast<CondCodeSDNode*>(nullptr)); std::fill(ValueTypeNodes.begin(), ValueTypeNodes.end(), - static_cast<SDNode*>(0)); + static_cast<SDNode*>(nullptr)); - EntryNode.UseList = 0; + EntryNode.UseList = nullptr; AllNodes.push_back(&EntryNode); Root = getEntryNode(); DbgInfo->clear(); @@ -943,6 +1020,15 @@ SDValue SelectionDAG::getZExtOrTrunc(SDValue Op, SDLoc DL, EVT VT) { getNode(ISD::TRUNCATE, DL, VT, Op); } +SDValue SelectionDAG::getBoolExtOrTrunc(SDValue Op, SDLoc SL, EVT VT, + EVT OpVT) { + if (VT.bitsLE(Op.getValueType())) + return getNode(ISD::TRUNCATE, SL, VT, Op); + + TargetLowering::BooleanContent BType = TLI->getBooleanContents(OpVT); + return getNode(TLI->getExtendForContent(BType), SL, VT, Op); +} + SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, SDLoc DL, EVT VT) { assert(!VT.isVector() && "getZeroExtendInReg should use the vector element type instead of " @@ -955,6 +1041,36 @@ SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, SDLoc DL, EVT VT) { getConstant(Imm, Op.getValueType())); } +SDValue SelectionDAG::getAnyExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT) { + assert(VT.isVector() && "This DAG node is restricted to vector types."); + assert(VT.getSizeInBits() == Op.getValueType().getSizeInBits() && + "The sizes of the input and result must match in order to perform the " + "extend in-register."); + assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() && + "The destination vector type must have fewer lanes than the input."); + return getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, Op); +} + +SDValue SelectionDAG::getSignExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT) { + assert(VT.isVector() && "This DAG node is restricted to vector types."); + assert(VT.getSizeInBits() == Op.getValueType().getSizeInBits() && + "The sizes of the input and result must match in order to perform the " + "extend in-register."); + assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() && + "The destination vector type must have fewer lanes than the input."); + return getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, VT, Op); +} + +SDValue SelectionDAG::getZeroExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT) { + assert(VT.isVector() && "This DAG node is restricted to vector types."); + assert(VT.getSizeInBits() == Op.getValueType().getSizeInBits() && + "The sizes of the input and result must match in order to perform the " + "extend in-register."); + assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() && + "The destination vector type must have fewer lanes than the input."); + return getNode(ISD::ZERO_EXTEND_VECTOR_INREG, DL, VT, Op); +} + /// getNOT - Create a bitwise NOT operation as (XOR Val, -1). /// SDValue SelectionDAG::getNOT(SDLoc DL, SDValue Val, EVT VT) { @@ -964,19 +1080,37 @@ SDValue SelectionDAG::getNOT(SDLoc DL, SDValue Val, EVT VT) { return getNode(ISD::XOR, DL, VT, Val, NegOne); } -SDValue SelectionDAG::getConstant(uint64_t Val, EVT VT, bool isT) { +SDValue SelectionDAG::getLogicalNOT(SDLoc DL, SDValue Val, EVT VT) { + EVT EltVT = VT.getScalarType(); + SDValue TrueValue; + switch (TLI->getBooleanContents(VT)) { + case TargetLowering::ZeroOrOneBooleanContent: + case TargetLowering::UndefinedBooleanContent: + TrueValue = getConstant(1, VT); + break; + case TargetLowering::ZeroOrNegativeOneBooleanContent: + TrueValue = getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), + VT); + break; + } + return getNode(ISD::XOR, DL, VT, Val, TrueValue); +} + +SDValue SelectionDAG::getConstant(uint64_t Val, EVT VT, bool isT, bool isO) { EVT EltVT = VT.getScalarType(); assert((EltVT.getSizeInBits() >= 64 || (uint64_t)((int64_t)Val >> EltVT.getSizeInBits()) + 1 < 2) && "getConstant with a uint64_t value that doesn't fit in the type!"); - return getConstant(APInt(EltVT.getSizeInBits(), Val), VT, isT); + return getConstant(APInt(EltVT.getSizeInBits(), Val), VT, isT, isO); } -SDValue SelectionDAG::getConstant(const APInt &Val, EVT VT, bool isT) { - return getConstant(*ConstantInt::get(*Context, Val), VT, isT); +SDValue SelectionDAG::getConstant(const APInt &Val, EVT VT, bool isT, bool isO) +{ + return getConstant(*ConstantInt::get(*Context, Val), VT, isT, isO); } -SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) { +SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT, + bool isO) { assert(VT.isInteger() && "Cannot create FP integer constant!"); EVT EltVT = VT.getScalarType(); @@ -1018,7 +1152,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) { for (unsigned i = 0; i < ViaVecNumElts / VT.getVectorNumElements(); ++i) { EltParts.push_back(getConstant(NewVal.lshr(i * ViaEltSizeInBits) .trunc(ViaEltSizeInBits), - ViaEltVT, isT)); + ViaEltVT, isT, isO)); } // EltParts is currently in little endian order. If we actually want @@ -1039,7 +1173,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) { SDValue Result = getNode(ISD::BITCAST, SDLoc(), VT, getNode(ISD::BUILD_VECTOR, SDLoc(), ViaVecVT, - &Ops[0], Ops.size())); + Ops)); return Result; } @@ -1047,16 +1181,17 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) { "APInt size does not match type size!"); unsigned Opc = isT ? ISD::TargetConstant : ISD::Constant; FoldingSetNodeID ID; - AddNodeIDNode(ID, Opc, getVTList(EltVT), 0, 0); + AddNodeIDNode(ID, Opc, getVTList(EltVT), None); ID.AddPointer(Elt); - void *IP = 0; - SDNode *N = NULL; + ID.AddBoolean(isO); + void *IP = nullptr; + SDNode *N = nullptr; if ((N = CSEMap.FindNodeOrInsertPos(ID, IP))) if (!VT.isVector()) return SDValue(N, 0); if (!N) { - N = new (NodeAllocator) ConstantSDNode(isT, Elt, EltVT); + N = new (NodeAllocator) ConstantSDNode(isT, isO, Elt, EltVT); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); } @@ -1065,7 +1200,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) { if (VT.isVector()) { SmallVector<SDValue, 8> Ops; Ops.assign(VT.getVectorNumElements(), Result); - Result = getNode(ISD::BUILD_VECTOR, SDLoc(), VT, &Ops[0], Ops.size()); + Result = getNode(ISD::BUILD_VECTOR, SDLoc(), VT, Ops); } return Result; } @@ -1089,10 +1224,10 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP& V, EVT VT, bool isTarget){ // we don't have issues with SNANs. unsigned Opc = isTarget ? ISD::TargetConstantFP : ISD::ConstantFP; FoldingSetNodeID ID; - AddNodeIDNode(ID, Opc, getVTList(EltVT), 0, 0); + AddNodeIDNode(ID, Opc, getVTList(EltVT), None); ID.AddPointer(&V); - void *IP = 0; - SDNode *N = NULL; + void *IP = nullptr; + SDNode *N = nullptr; if ((N = CSEMap.FindNodeOrInsertPos(ID, IP))) if (!VT.isVector()) return SDValue(N, 0); @@ -1108,7 +1243,7 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP& V, EVT VT, bool isTarget){ SmallVector<SDValue, 8> Ops; Ops.assign(VT.getVectorNumElements(), Result); // FIXME SDLoc info might be appropriate here - Result = getNode(ISD::BUILD_VECTOR, SDLoc(), VT, &Ops[0], Ops.size()); + Result = getNode(ISD::BUILD_VECTOR, SDLoc(), VT, Ops); } return Result; } @@ -1143,26 +1278,19 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, SDLoc DL, if (BitWidth < 64) Offset = SignExtend64(Offset, BitWidth); - const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV); - if (!GVar) { - // If GV is an alias then use the aliasee for determining thread-localness. - if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV)) - GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal(false)); - } - unsigned Opc; - if (GVar && GVar->isThreadLocal()) + if (GV->isThreadLocal()) Opc = isTargetGA ? ISD::TargetGlobalTLSAddress : ISD::GlobalTLSAddress; else Opc = isTargetGA ? ISD::TargetGlobalAddress : ISD::GlobalAddress; FoldingSetNodeID ID; - AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); + AddNodeIDNode(ID, Opc, getVTList(VT), None); ID.AddPointer(GV); ID.AddInteger(Offset); ID.AddInteger(TargetFlags); ID.AddInteger(GV->getType()->getAddressSpace()); - void *IP = 0; + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -1177,9 +1305,9 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, SDLoc DL, SDValue SelectionDAG::getFrameIndex(int FI, EVT VT, bool isTarget) { unsigned Opc = isTarget ? ISD::TargetFrameIndex : ISD::FrameIndex; FoldingSetNodeID ID; - AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); + AddNodeIDNode(ID, Opc, getVTList(VT), None); ID.AddInteger(FI); - void *IP = 0; + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -1195,10 +1323,10 @@ SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget, "Cannot set target flags on target-independent jump tables"); unsigned Opc = isTarget ? ISD::TargetJumpTable : ISD::JumpTable; FoldingSetNodeID ID; - AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); + AddNodeIDNode(ID, Opc, getVTList(VT), None); ID.AddInteger(JTI); ID.AddInteger(TargetFlags); - void *IP = 0; + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -1220,12 +1348,12 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT, TM.getTargetLowering()->getDataLayout()->getPrefTypeAlignment(C->getType()); unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; FoldingSetNodeID ID; - AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); + AddNodeIDNode(ID, Opc, getVTList(VT), None); ID.AddInteger(Alignment); ID.AddInteger(Offset); ID.AddPointer(C); ID.AddInteger(TargetFlags); - void *IP = 0; + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -1248,12 +1376,12 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, TM.getTargetLowering()->getDataLayout()->getPrefTypeAlignment(C->getType()); unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; FoldingSetNodeID ID; - AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); + AddNodeIDNode(ID, Opc, getVTList(VT), None); ID.AddInteger(Alignment); ID.AddInteger(Offset); C->addSelectionDAGCSEId(ID); ID.AddInteger(TargetFlags); - void *IP = 0; + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -1267,11 +1395,11 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset, unsigned char TargetFlags) { FoldingSetNodeID ID; - AddNodeIDNode(ID, ISD::TargetIndex, getVTList(VT), 0, 0); + AddNodeIDNode(ID, ISD::TargetIndex, getVTList(VT), None); ID.AddInteger(Index); ID.AddInteger(Offset); ID.AddInteger(TargetFlags); - void *IP = 0; + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -1284,9 +1412,9 @@ SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset, SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) { FoldingSetNodeID ID; - AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), 0, 0); + AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), None); ID.AddPointer(MBB); - void *IP = 0; + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -1333,7 +1461,7 @@ SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) { if ((unsigned)Cond >= CondCodeNodes.size()) CondCodeNodes.resize(Cond+1); - if (CondCodeNodes[Cond] == 0) { + if (!CondCodeNodes[Cond]) { CondCodeSDNode *N = new (NodeAllocator) CondCodeSDNode(Cond); CondCodeNodes[Cond] = N; AllNodes.push_back(N); @@ -1407,6 +1535,11 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, N1 = getUNDEF(VT); commuteShuffle(N1, N2, MaskVec); } + // Reset our undef status after accounting for the mask. + N2Undef = N2.getOpcode() == ISD::UNDEF; + // Re-check whether both sides ended up undef. + if (N1.getOpcode() == ISD::UNDEF && N2Undef) + return getUNDEF(VT); // If Identity shuffle return that node. bool Identity = true; @@ -1416,13 +1549,45 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, if (Identity && NElts) return N1; + // Shuffling a constant splat doesn't change the result. + if (N2Undef) { + SDValue V = N1; + + // Look through any bitcasts. We check that these don't change the number + // (and size) of elements and just changes their types. + while (V.getOpcode() == ISD::BITCAST) + V = V->getOperand(0); + + // A splat should always show up as a build vector node. + if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) { + BitVector UndefElements; + SDValue Splat = BV->getSplatValue(&UndefElements); + // If this is a splat of an undef, shuffling it is also undef. + if (Splat && Splat.getOpcode() == ISD::UNDEF) + return getUNDEF(VT); + + // We only have a splat which can skip shuffles if there is a splatted + // value and no undef lanes rearranged by the shuffle. + if (Splat && UndefElements.none()) { + // Splat of <x, x, ..., x>, return <x, x, ..., x>, provided that the + // number of elements match or the value splatted is a zero constant. + if (V.getValueType().getVectorNumElements() == + VT.getVectorNumElements()) + return N1; + if (auto *C = dyn_cast<ConstantSDNode>(Splat)) + if (C->isNullValue()) + return N1; + } + } + } + FoldingSetNodeID ID; SDValue Ops[2] = { N1, N2 }; - AddNodeIDNode(ID, ISD::VECTOR_SHUFFLE, getVTList(VT), Ops, 2); + AddNodeIDNode(ID, ISD::VECTOR_SHUFFLE, getVTList(VT), Ops); for (unsigned i = 0; i != NElts; ++i) ID.AddInteger(MaskVec[i]); - void* IP = 0; + void* IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -1441,6 +1606,27 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, return SDValue(N, 0); } +SDValue SelectionDAG::getCommutedVectorShuffle(const ShuffleVectorSDNode &SV) { + MVT VT = SV.getSimpleValueType(0); + unsigned NumElems = VT.getVectorNumElements(); + SmallVector<int, 8> MaskVec; + + for (unsigned i = 0; i != NumElems; ++i) { + int Idx = SV.getMaskElt(i); + if (Idx >= 0) { + if (Idx < (int)NumElems) + Idx += NumElems; + else + Idx -= NumElems; + } + MaskVec.push_back(Idx); + } + + SDValue Op0 = SV.getOperand(0); + SDValue Op1 = SV.getOperand(1); + return getVectorShuffle(VT, SDLoc(&SV), Op1, Op0, &MaskVec[0]); +} + SDValue SelectionDAG::getConvertRndSat(EVT VT, SDLoc dl, SDValue Val, SDValue DTy, SDValue STy, SDValue Rnd, SDValue Sat, @@ -1453,14 +1639,14 @@ SDValue SelectionDAG::getConvertRndSat(EVT VT, SDLoc dl, FoldingSetNodeID ID; SDValue Ops[] = { Val, DTy, STy, Rnd, Sat }; - AddNodeIDNode(ID, ISD::CONVERT_RNDSAT, getVTList(VT), &Ops[0], 5); - void* IP = 0; + AddNodeIDNode(ID, ISD::CONVERT_RNDSAT, getVTList(VT), Ops); + void* IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); CvtRndSatSDNode *N = new (NodeAllocator) CvtRndSatSDNode(VT, dl.getIROrder(), dl.getDebugLoc(), - Ops, 5, Code); + Ops, Code); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); @@ -1468,9 +1654,9 @@ SDValue SelectionDAG::getConvertRndSat(EVT VT, SDLoc dl, SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) { FoldingSetNodeID ID; - AddNodeIDNode(ID, ISD::Register, getVTList(VT), 0, 0); + AddNodeIDNode(ID, ISD::Register, getVTList(VT), None); ID.AddInteger(RegNo); - void *IP = 0; + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -1482,9 +1668,9 @@ SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) { SDValue SelectionDAG::getRegisterMask(const uint32_t *RegMask) { FoldingSetNodeID ID; - AddNodeIDNode(ID, ISD::RegisterMask, getVTList(MVT::Untyped), 0, 0); + AddNodeIDNode(ID, ISD::RegisterMask, getVTList(MVT::Untyped), None); ID.AddPointer(RegMask); - void *IP = 0; + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -1497,9 +1683,9 @@ SDValue SelectionDAG::getRegisterMask(const uint32_t *RegMask) { SDValue SelectionDAG::getEHLabel(SDLoc dl, SDValue Root, MCSymbol *Label) { FoldingSetNodeID ID; SDValue Ops[] = { Root }; - AddNodeIDNode(ID, ISD::EH_LABEL, getVTList(MVT::Other), &Ops[0], 1); + AddNodeIDNode(ID, ISD::EH_LABEL, getVTList(MVT::Other), Ops); ID.AddPointer(Label); - void *IP = 0; + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -1518,11 +1704,11 @@ SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT, unsigned Opc = isTarget ? ISD::TargetBlockAddress : ISD::BlockAddress; FoldingSetNodeID ID; - AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); + AddNodeIDNode(ID, Opc, getVTList(VT), None); ID.AddPointer(BA); ID.AddInteger(Offset); ID.AddInteger(TargetFlags); - void *IP = 0; + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -1538,10 +1724,10 @@ SDValue SelectionDAG::getSrcValue(const Value *V) { "SrcValue is not a pointer?"); FoldingSetNodeID ID; - AddNodeIDNode(ID, ISD::SRCVALUE, getVTList(MVT::Other), 0, 0); + AddNodeIDNode(ID, ISD::SRCVALUE, getVTList(MVT::Other), None); ID.AddPointer(V); - void *IP = 0; + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -1554,10 +1740,10 @@ SDValue SelectionDAG::getSrcValue(const Value *V) { /// getMDNode - Return an MDNodeSDNode which holds an MDNode. SDValue SelectionDAG::getMDNode(const MDNode *MD) { FoldingSetNodeID ID; - AddNodeIDNode(ID, ISD::MDNODE_SDNODE, getVTList(MVT::Other), 0, 0); + AddNodeIDNode(ID, ISD::MDNODE_SDNODE, getVTList(MVT::Other), None); ID.AddPointer(MD); - void *IP = 0; + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -1572,11 +1758,11 @@ SDValue SelectionDAG::getAddrSpaceCast(SDLoc dl, EVT VT, SDValue Ptr, unsigned SrcAS, unsigned DestAS) { SDValue Ops[] = {Ptr}; FoldingSetNodeID ID; - AddNodeIDNode(ID, ISD::ADDRSPACECAST, getVTList(VT), &Ops[0], 1); + AddNodeIDNode(ID, ISD::ADDRSPACECAST, getVTList(VT), Ops); ID.AddInteger(SrcAS); ID.AddInteger(DestAS); - void *IP = 0; + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -1640,7 +1826,8 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, case ISD::SETTRUE: case ISD::SETTRUE2: { const TargetLowering *TLI = TM.getTargetLowering(); - TargetLowering::BooleanContent Cnt = TLI->getBooleanContents(VT.isVector()); + TargetLowering::BooleanContent Cnt = + TLI->getBooleanContents(N1->getValueType(0)); return getConstant( Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, VT); } @@ -1755,17 +1942,14 @@ bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const { bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth) const { APInt KnownZero, KnownOne; - ComputeMaskedBits(Op, KnownZero, KnownOne, Depth); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(Op, KnownZero, KnownOne, Depth); return (KnownZero & Mask) == Mask; } -/// ComputeMaskedBits - Determine which of the bits specified in Mask are -/// known to be either zero or one and return them in the KnownZero/KnownOne -/// bitsets. This code only analyzes bits in Mask, in order to short-circuit -/// processing. -void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, - APInt &KnownOne, unsigned Depth) const { +/// Determine which bits of Op are known to be either zero or one and return +/// them in the KnownZero/KnownOne bitsets. +void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, + APInt &KnownOne, unsigned Depth) const { const TargetLowering *TLI = TM.getTargetLowering(); unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits(); @@ -1780,48 +1964,40 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, // We know all of the bits for a constant! KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue(); KnownZero = ~KnownOne; - return; + break; case ISD::AND: // If either the LHS or the RHS are Zero, the result is zero. - ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); - ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); + computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); // Output known-1 bits are only known if set in both the LHS & RHS. KnownOne &= KnownOne2; // Output known-0 are known to be clear if zero in either the LHS | RHS. KnownZero |= KnownZero2; - return; + break; case ISD::OR: - ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); - ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); + computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); // Output known-0 bits are only known if clear in both the LHS & RHS. KnownZero &= KnownZero2; // Output known-1 are known to be set if set in either the LHS | RHS. KnownOne |= KnownOne2; - return; + break; case ISD::XOR: { - ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); - ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); + computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); // Output known-0 bits are known if clear or set in both the LHS & RHS. APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2); // Output known-1 are known to be set if set in only one of the LHS, RHS. KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2); KnownZero = KnownZeroOut; - return; + break; } case ISD::MUL: { - ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); - ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); + computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); // If low bits are zero in either operand, output low known-0 bits. // Also compute a conserative estimate for high known-0 bits. @@ -1838,46 +2014,42 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, LeadZ = std::min(LeadZ, BitWidth); KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) | APInt::getHighBitsSet(BitWidth, LeadZ); - return; + break; } case ISD::UDIV: { // For the purposes of computing leading zeros we can conservatively // treat a udiv as a logical right shift by the power of 2 known to // be less than the denominator. - ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); + computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); unsigned LeadZ = KnownZero2.countLeadingOnes(); KnownOne2.clearAllBits(); KnownZero2.clearAllBits(); - ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); + computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros(); if (RHSUnknownLeadingOnes != BitWidth) LeadZ = std::min(BitWidth, LeadZ + BitWidth - RHSUnknownLeadingOnes - 1); KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ); - return; + break; } case ISD::SELECT: - ComputeMaskedBits(Op.getOperand(2), KnownZero, KnownOne, Depth+1); - ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(Op.getOperand(2), KnownZero, KnownOne, Depth+1); + computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); // Only known if known in both the LHS and RHS. KnownOne &= KnownOne2; KnownZero &= KnownZero2; - return; + break; case ISD::SELECT_CC: - ComputeMaskedBits(Op.getOperand(3), KnownZero, KnownOne, Depth+1); - ComputeMaskedBits(Op.getOperand(2), KnownZero2, KnownOne2, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(Op.getOperand(3), KnownZero, KnownOne, Depth+1); + computeKnownBits(Op.getOperand(2), KnownZero2, KnownOne2, Depth+1); // Only known if known in both the LHS and RHS. KnownOne &= KnownOne2; KnownZero &= KnownZero2; - return; + break; case ISD::SADDO: case ISD::UADDO: case ISD::SSUBO: @@ -1885,14 +2057,23 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, case ISD::SMULO: case ISD::UMULO: if (Op.getResNo() != 1) - return; - // The boolean result conforms to getBooleanContents. Fall through. + break; + // The boolean result conforms to getBooleanContents. + // If we know the result of a setcc has the top bits zero, use this info. + // We know that we have an integer-based boolean since these operations + // are only available for integer. + if (TLI->getBooleanContents(Op.getValueType().isVector(), false) == + TargetLowering::ZeroOrOneBooleanContent && + BitWidth > 1) + KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1); + break; case ISD::SETCC: // If we know the result of a setcc has the top bits zero, use this info. - if (TLI->getBooleanContents(Op.getValueType().isVector()) == - TargetLowering::ZeroOrOneBooleanContent && BitWidth > 1) + if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) == + TargetLowering::ZeroOrOneBooleanContent && + BitWidth > 1) KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1); - return; + break; case ISD::SHL: // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0 if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { @@ -1900,16 +2081,15 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, // If the shift count is an invalid immediate, don't do anything. if (ShAmt >= BitWidth) - return; + break; - ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); KnownZero <<= ShAmt; KnownOne <<= ShAmt; // low bits known zero. KnownZero |= APInt::getLowBitsSet(BitWidth, ShAmt); } - return; + break; case ISD::SRL: // (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0 if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { @@ -1917,31 +2097,29 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, // If the shift count is an invalid immediate, don't do anything. if (ShAmt >= BitWidth) - return; + break; - ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); KnownZero = KnownZero.lshr(ShAmt); KnownOne = KnownOne.lshr(ShAmt); APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt); KnownZero |= HighBits; // High bits known zero. } - return; + break; case ISD::SRA: if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { unsigned ShAmt = SA->getZExtValue(); // If the shift count is an invalid immediate, don't do anything. if (ShAmt >= BitWidth) - return; + break; // If any of the demanded bits are produced by the sign extension, we also // demand the input sign bit. APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt); - ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); KnownZero = KnownZero.lshr(ShAmt); KnownOne = KnownOne.lshr(ShAmt); @@ -1955,7 +2133,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, KnownOne |= HighBits; // New bits are known one. } } - return; + break; case ISD::SIGN_EXTEND_INREG: { EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); unsigned EBits = EVT.getScalarType().getSizeInBits(); @@ -1973,10 +2151,9 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, if (NewBits.getBoolValue()) InputDemandedBits |= InSignBit; - ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); KnownOne &= InputDemandedBits; KnownZero &= InputDemandedBits; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); // If the sign bit of the input is known set or clear, then we know the // top bits of the result. @@ -1990,7 +2167,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, KnownZero &= ~NewBits; KnownOne &= ~NewBits; } - return; + break; } case ISD::CTTZ: case ISD::CTTZ_ZERO_UNDEF: @@ -2000,7 +2177,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, unsigned LowBits = Log2_32(BitWidth)+1; KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits); KnownOne.clearAllBits(); - return; + break; } case ISD::LOAD: { LoadSDNode *LD = cast<LoadSDNode>(Op); @@ -2010,9 +2187,9 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, unsigned MemBits = VT.getScalarType().getSizeInBits(); KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits); } else if (const MDNode *Ranges = LD->getRanges()) { - computeMaskedBitsLoad(*Ranges, KnownZero); + computeKnownBitsFromRangeMetadata(*Ranges, KnownZero); } - return; + break; } case ISD::ZERO_EXTEND: { EVT InVT = Op.getOperand(0).getValueType(); @@ -2020,11 +2197,11 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits); KnownZero = KnownZero.trunc(InBits); KnownOne = KnownOne.trunc(InBits); - ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); KnownZero = KnownZero.zext(BitWidth); KnownOne = KnownOne.zext(BitWidth); KnownZero |= NewBits; - return; + break; } case ISD::SIGN_EXTEND: { EVT InVT = Op.getOperand(0).getValueType(); @@ -2033,13 +2210,11 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, KnownZero = KnownZero.trunc(InBits); KnownOne = KnownOne.trunc(InBits); - ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); // Note if the sign bit is known to be zero or one. bool SignBitKnownZero = KnownZero.isNegative(); bool SignBitKnownOne = KnownOne.isNegative(); - assert(!(SignBitKnownZero && SignBitKnownOne) && - "Sign bit can't be known to be both zero and one!"); KnownZero = KnownZero.zext(BitWidth); KnownOne = KnownOne.zext(BitWidth); @@ -2049,25 +2224,24 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, KnownZero |= NewBits; else if (SignBitKnownOne) KnownOne |= NewBits; - return; + break; } case ISD::ANY_EXTEND: { EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getScalarType().getSizeInBits(); KnownZero = KnownZero.trunc(InBits); KnownOne = KnownOne.trunc(InBits); - ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); KnownZero = KnownZero.zext(BitWidth); KnownOne = KnownOne.zext(BitWidth); - return; + break; } case ISD::TRUNCATE: { EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getScalarType().getSizeInBits(); KnownZero = KnownZero.zext(InBits); KnownOne = KnownOne.zext(InBits); - ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); KnownZero = KnownZero.trunc(BitWidth); KnownOne = KnownOne.trunc(BitWidth); break; @@ -2075,15 +2249,15 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, case ISD::AssertZext: { EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT(); APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits()); - ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); KnownZero |= (~InMask); KnownOne &= (~KnownZero); - return; + break; } case ISD::FGETSIGN: // All bits are zero except the low bit. KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - 1); - return; + break; case ISD::SUB: { if (ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0))) { @@ -2094,7 +2268,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, unsigned NLZ = (CLHS->getAPIntValue()+1).countLeadingZeros(); // NLZ can't be BitWidth with no sign bit APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1); - ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); + computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); // If all of the MaskV bits are known to be zero, then we know the // output top bits are zero, because we now know that the output is @@ -2113,18 +2287,16 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, // Output known-0 bits are known if clear or set in both the low clear bits // common to both LHS & RHS. For example, 8+(X<<3) is known to have the // low 3 bits clear. - ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); unsigned KnownZeroOut = KnownZero2.countTrailingOnes(); - ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); KnownZeroOut = std::min(KnownZeroOut, KnownZero2.countTrailingOnes()); if (Op.getOpcode() == ISD::ADD) { KnownZero |= APInt::getLowBitsSet(BitWidth, KnownZeroOut); - return; + break; } // With ADDE, a carry bit may be added in, so we can only use this @@ -2133,14 +2305,14 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, // are known zero. if (KnownZeroOut >= 2) // ADDE KnownZero |= APInt::getBitsSet(BitWidth, 1, KnownZeroOut); - return; + break; } case ISD::SREM: if (ConstantSDNode *Rem = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { const APInt &RA = Rem->getAPIntValue().abs(); if (RA.isPowerOf2()) { APInt LowBits = RA - 1; - ComputeMaskedBits(Op.getOperand(0), KnownZero2,KnownOne2,Depth+1); + computeKnownBits(Op.getOperand(0), KnownZero2,KnownOne2,Depth+1); // The low bits of the first operand are unchanged by the srem. KnownZero = KnownZero2 & LowBits; @@ -2158,36 +2330,38 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); } } - return; + break; case ISD::UREM: { if (ConstantSDNode *Rem = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { const APInt &RA = Rem->getAPIntValue(); if (RA.isPowerOf2()) { APInt LowBits = (RA - 1); - KnownZero |= ~LowBits; - ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne,Depth+1); - assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); + computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth + 1); + + // The upper bits are all zero, the lower ones are unchanged. + KnownZero = KnownZero2 | ~LowBits; + KnownOne = KnownOne2 & LowBits; break; } } // Since the result is less than or equal to either operand, any leading // zero bits in either operand must also exist in the result. - ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); - ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); uint32_t Leaders = std::max(KnownZero.countLeadingOnes(), KnownZero2.countLeadingOnes()); KnownOne.clearAllBits(); KnownZero = APInt::getHighBitsSet(BitWidth, Leaders); - return; + break; } case ISD::FrameIndex: case ISD::TargetFrameIndex: if (unsigned Align = InferPtrAlignment(Op)) { // The low bits are known zero if the pointer is aligned. KnownZero = APInt::getLowBitsSet(BitWidth, Log2_32(Align)); - return; + break; } break; @@ -2199,9 +2373,11 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, case ISD::INTRINSIC_W_CHAIN: case ISD::INTRINSIC_VOID: // Allow the target to implement this method for its nodes. - TLI->computeMaskedBitsForTargetNode(Op, KnownZero, KnownOne, *this, Depth); - return; + TLI->computeKnownBitsForTargetNode(Op, KnownZero, KnownOne, *this, Depth); + break; } + + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); } /// ComputeNumSignBits - Return the number of times the sign bit of the @@ -2275,7 +2451,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ FirstAnswer = std::min(Tmp, Tmp2); // We computed what we know about the sign bits as our first // answer. Now proceed to the generic code that uses - // ComputeMaskedBits, and pick whichever answer is better. + // computeKnownBits, and pick whichever answer is better. } break; @@ -2294,9 +2470,16 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ if (Op.getResNo() != 1) break; // The boolean result conforms to getBooleanContents. Fall through. + // If setcc returns 0/-1, all bits are sign bits. + // We know that we have an integer-based boolean since these operations + // are only available for integer. + if (TLI->getBooleanContents(Op.getValueType().isVector(), false) == + TargetLowering::ZeroOrNegativeOneBooleanContent) + return VTBits; + break; case ISD::SETCC: // If setcc returns 0/-1, all bits are sign bits. - if (TLI->getBooleanContents(Op.getValueType().isVector()) == + if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) == TargetLowering::ZeroOrNegativeOneBooleanContent) return VTBits; break; @@ -2325,7 +2508,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ if (ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(Op.getOperand(1))) if (CRHS->isAllOnesValue()) { APInt KnownZero, KnownOne; - ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. @@ -2350,7 +2533,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ if (ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0))) if (CLHS->isNullValue()) { APInt KnownZero, KnownOne; - ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); + computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. if ((KnownZero | APInt(VTBits, 1)).isAllOnesValue()) @@ -2397,14 +2580,14 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) { - unsigned NumBits = TLI->ComputeNumSignBitsForTargetNode(Op, Depth); + unsigned NumBits = TLI->ComputeNumSignBitsForTargetNode(Op, *this, Depth); if (NumBits > 1) FirstAnswer = std::max(FirstAnswer, NumBits); } // Finally, if we can prove that the top bits of the result are 0's or 1's, // use this information. APInt KnownZero, KnownOne; - ComputeMaskedBits(Op, KnownZero, KnownOne, Depth); + computeKnownBits(Op, KnownZero, KnownOne, Depth); APInt Mask; if (KnownZero.isNegative()) { // sign bit is 0 @@ -2492,8 +2675,8 @@ bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const { /// SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT) { FoldingSetNodeID ID; - AddNodeIDNode(ID, Opcode, getVTList(VT), 0, 0); - void *IP = 0; + AddNodeIDNode(ID, Opcode, getVTList(VT), None); + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -2510,17 +2693,23 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT) { SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue Operand) { - // Constant fold unary operations with an integer constant operand. + // Constant fold unary operations with an integer constant operand. Even + // opaque constant will be folded, because the folding of unary operations + // doesn't create new constants with different values. Nevertheless, the + // opaque flag is preserved during folding to prevent future folding with + // other constants. if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Operand.getNode())) { const APInt &Val = C->getAPIntValue(); switch (Opcode) { default: break; case ISD::SIGN_EXTEND: - return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), VT); + return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), VT, + C->isTargetOpcode(), C->isOpaque()); case ISD::ANY_EXTEND: case ISD::ZERO_EXTEND: case ISD::TRUNCATE: - return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), VT); + return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), VT, + C->isTargetOpcode(), C->isOpaque()); case ISD::UINT_TO_FP: case ISD::SINT_TO_FP: { APFloat apf(EVTToAPFloatSemantics(VT), @@ -2537,15 +2726,19 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, return getConstantFP(APFloat(APFloat::IEEEdouble, Val), VT); break; case ISD::BSWAP: - return getConstant(Val.byteSwap(), VT); + return getConstant(Val.byteSwap(), VT, C->isTargetOpcode(), + C->isOpaque()); case ISD::CTPOP: - return getConstant(Val.countPopulation(), VT); + return getConstant(Val.countPopulation(), VT, C->isTargetOpcode(), + C->isOpaque()); case ISD::CTLZ: case ISD::CTLZ_ZERO_UNDEF: - return getConstant(Val.countLeadingZeros(), VT); + return getConstant(Val.countLeadingZeros(), VT, C->isTargetOpcode(), + C->isOpaque()); case ISD::CTTZ: case ISD::CTTZ_ZERO_UNDEF: - return getConstant(Val.countTrailingZeros(), VT); + return getConstant(Val.countTrailingZeros(), VT, C->isTargetOpcode(), + C->isOpaque()); } } @@ -2608,6 +2801,31 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, } } + // Constant fold unary operations with a vector integer operand. + if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Operand.getNode())) { + if (BV->isConstant()) { + switch (Opcode) { + default: + // FIXME: Entirely reasonable to perform folding of other unary + // operations here as the need arises. + break; + case ISD::UINT_TO_FP: + case ISD::SINT_TO_FP: { + SmallVector<SDValue, 8> Ops; + for (int i = 0, e = VT.getVectorNumElements(); i != e; ++i) { + SDValue OpN = BV->getOperand(i); + // Let the above scalar folding handle the conversion of each + // element. + OpN = getNode(ISD::SINT_TO_FP, DL, VT.getVectorElementType(), + OpN); + Ops.push_back(OpN); + } + return getNode(ISD::BUILD_VECTOR, DL, VT, Ops); + } + } + } + } + unsigned OpOpcode = Operand.getNode()->getOpcode(); switch (Opcode) { case ISD::TokenFactor: @@ -2754,8 +2972,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, if (VT != MVT::Glue) { // Don't CSE flag producing nodes FoldingSetNodeID ID; SDValue Ops[1] = { Operand }; - AddNodeIDNode(ID, Opcode, VTs, Ops, 1); - void *IP = 0; + AddNodeIDNode(ID, Opcode, VTs, Ops); + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -2776,16 +2994,25 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, EVT VT, SDNode *Cst1, SDNode *Cst2) { + // If the opcode is a target-specific ISD node, there's nothing we can + // do here and the operand rules may not line up with the below, so + // bail early. + if (Opcode >= ISD::BUILTIN_OP_END) + return SDValue(); + SmallVector<std::pair<ConstantSDNode *, ConstantSDNode *>, 4> Inputs; SmallVector<SDValue, 4> Outputs; EVT SVT = VT.getScalarType(); ConstantSDNode *Scalar1 = dyn_cast<ConstantSDNode>(Cst1); ConstantSDNode *Scalar2 = dyn_cast<ConstantSDNode>(Cst2); - if (Scalar1 && Scalar2) { + if (Scalar1 && Scalar2 && (Scalar1->isOpaque() || Scalar2->isOpaque())) + return SDValue(); + + if (Scalar1 && Scalar2) // Scalar instruction. Inputs.push_back(std::make_pair(Scalar1, Scalar2)); - } else { + else { // For vectors extract each constant element into Inputs so we can constant // fold them individually. BuildVectorSDNode *BV1 = dyn_cast<BuildVectorSDNode>(Cst1); @@ -2801,6 +3028,9 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, EVT VT, if (!V1 || !V2) // Not a constant, bail. return SDValue(); + if (V1->isOpaque() || V2->isOpaque()) + return SDValue(); + // Avoid BUILD_VECTOR nodes that perform implicit truncation. // FIXME: This is valid and could be handled by truncating the APInts. if (V1->getValueType(0) != SVT || V2->getValueType(0) != SVT) @@ -2874,17 +3104,22 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, EVT VT, } } + assert((Scalar1 && Scalar2) || (VT.getVectorNumElements() == Outputs.size() && + "Expected a scalar or vector!")); + // Handle the scalar case first. - if (Scalar1 && Scalar2) + if (!VT.isVector()) return Outputs.back(); - // Otherwise build a big vector out of the scalar elements we generated. - return getNode(ISD::BUILD_VECTOR, SDLoc(), VT, Outputs.data(), - Outputs.size()); + // We may have a vector type but a scalar result. Create a splat. + Outputs.resize(VT.getVectorNumElements(), Outputs.back()); + + // Build a big vector out of the scalar elements we generated. + return getNode(ISD::BUILD_VECTOR, SDLoc(), VT, Outputs); } SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, - SDValue N2) { + SDValue N2, bool nuw, bool nsw, bool exact) { ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode()); switch (Opcode) { @@ -2910,7 +3145,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(), N1.getNode()->op_end()); Elts.append(N2.getNode()->op_begin(), N2.getNode()->op_end()); - return getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0], Elts.size()); + return getNode(ISD::BUILD_VECTOR, DL, VT, Elts); } break; case ISD::AND: @@ -3324,22 +3559,25 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, } // Memoize this node if possible. - SDNode *N; + BinarySDNode *N; SDVTList VTs = getVTList(VT); + const bool BinOpHasFlags = isBinOpWithFlags(Opcode); if (VT != MVT::Glue) { - SDValue Ops[] = { N1, N2 }; + SDValue Ops[] = {N1, N2}; FoldingSetNodeID ID; - AddNodeIDNode(ID, Opcode, VTs, Ops, 2); - void *IP = 0; + AddNodeIDNode(ID, Opcode, VTs, Ops); + if (BinOpHasFlags) + AddBinaryNodeIDCustom(ID, Opcode, nuw, nsw, exact); + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(), - DL.getDebugLoc(), VTs, N1, N2); + N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, nuw, nsw, exact); + CSEMap.InsertNode(N, IP); } else { - N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(), - DL.getDebugLoc(), VTs, N1, N2); + + N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, nuw, nsw, exact); } AllNodes.push_back(N); @@ -3379,7 +3617,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, N1.getNode()->op_end()); Elts.append(N2.getNode()->op_begin(), N2.getNode()->op_end()); Elts.append(N3.getNode()->op_begin(), N3.getNode()->op_end()); - return getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0], Elts.size()); + return getNode(ISD::BUILD_VECTOR, DL, VT, Elts); } break; case ISD::SETCC: { @@ -3436,8 +3674,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, if (VT != MVT::Glue) { SDValue Ops[] = { N1, N2, N3 }; FoldingSetNodeID ID; - AddNodeIDNode(ID, Opcode, VTs, Ops, 3); - void *IP = 0; + AddNodeIDNode(ID, Opcode, VTs, Ops); + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -3460,14 +3698,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, SDValue N2, SDValue N3, SDValue N4) { SDValue Ops[] = { N1, N2, N3, N4 }; - return getNode(Opcode, DL, VT, Ops, 4); + return getNode(Opcode, DL, VT, Ops); } SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, SDValue N2, SDValue N3, SDValue N4, SDValue N5) { SDValue Ops[] = { N1, N2, N3, N4, N5 }; - return getNode(Opcode, DL, VT, Ops, 5); + return getNode(Opcode, DL, VT, Ops); } /// getStackArgumentTokenFactor - Compute a TokenFactor to force all @@ -3489,8 +3727,7 @@ SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) { ArgChains.push_back(SDValue(L, 1)); // Build a tokenfactor for all the chains. - return getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, - &ArgChains[0], ArgChains.size()); + return getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains); } /// getMemsetValue - Vectorized representation of the memset value @@ -3528,7 +3765,7 @@ static SDValue getMemsetStringVal(EVT VT, SDLoc dl, SelectionDAG &DAG, if (Str.empty()) { if (VT.isInteger()) return DAG.getConstant(0, VT); - else if (VT == MVT::f32 || VT == MVT::f64) + else if (VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128) return DAG.getConstantFP(0.0, VT); else if (VT.isVector()) { unsigned NumElts = VT.getVectorNumElements(); @@ -3554,12 +3791,12 @@ static SDValue getMemsetStringVal(EVT VT, SDLoc dl, SelectionDAG &DAG, Val |= (uint64_t)(unsigned char)Str[i] << (NumVTBytes-i-1)*8; } - // If the "cost" of materializing the integer immediate is 1 or free, then - // it is cost effective to turn the load into the immediate. - const TargetTransformInfo *TTI = DAG.getTargetTransformInfo(); - if (TTI->getIntImmCost(Val, VT.getTypeForEVT(*DAG.getContext())) < 2) + // If the "cost" of materializing the integer immediate is less than the cost + // of a load, then it is cost effective to turn the load into the immediate. + Type *Ty = VT.getTypeForEVT(*DAG.getContext()); + if (TLI.shouldConvertConstantLoadToIntImm(Val, Ty)) return DAG.getConstant(Val, VT); - return SDValue(0, 0); + return SDValue(nullptr, 0); } /// getMemBasePlusOffset - Returns base and offset node for the @@ -3575,7 +3812,7 @@ static SDValue getMemBasePlusOffset(SDValue Base, unsigned Offset, SDLoc dl, /// static bool isMemSrcFromString(SDValue Src, StringRef &Str) { unsigned SrcDelta = 0; - GlobalAddressSDNode *G = NULL; + GlobalAddressSDNode *G = nullptr; if (Src.getOpcode() == ISD::GlobalAddress) G = cast<GlobalAddressSDNode>(Src); else if (Src.getOpcode() == ISD::ADD && @@ -3617,8 +3854,9 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps, DAG.getMachineFunction()); if (VT == MVT::Other) { - if (DstAlign >= TLI.getDataLayout()->getPointerPrefAlignment() || - TLI.allowsUnalignedMemoryAccesses(VT)) { + unsigned AS = 0; + if (DstAlign >= TLI.getDataLayout()->getPointerPrefAlignment(AS) || + TLI.allowsUnalignedMemoryAccesses(VT, AS)) { VT = TLI.getPointerTy(); } else { switch (DstAlign & 7) { @@ -3675,9 +3913,10 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps, // FIXME: Only does this for 64-bit or more since we don't have proper // cost model for unaligned load / store. bool Fast; + unsigned AS = 0; if (NumMemOps && AllowOverlap && VTSize >= 8 && NewVTSize < Size && - TLI.allowsUnalignedMemoryAccesses(VT, &Fast) && Fast) + TLI.allowsUnalignedMemoryAccesses(VT, AS, &Fast) && Fast) VTSize = Size; else { VT = NewVT; @@ -3809,8 +4048,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl, Size -= VTSize; } - return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &OutChains[0], OutChains.size()); + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); } static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl, @@ -3875,8 +4113,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl, LoadChains.push_back(Value.getValue(1)); SrcOff += VTSize; } - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &LoadChains[0], LoadChains.size()); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); OutChains.clear(); for (unsigned i = 0; i < NumMemOps; i++) { EVT VT = MemOps[i]; @@ -3890,8 +4127,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl, DstOff += VTSize; } - return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &OutChains[0], OutChains.size()); + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); } /// \brief Lower the call to 'memset' intrinsic function into a series of store @@ -3992,8 +4228,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, SDLoc dl, Size -= VTSize; } - return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &OutChains[0], OutChains.size()); + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); } SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst, @@ -4052,15 +4287,13 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst, Entry.Node = Src; Args.push_back(Entry); Entry.Node = Size; Args.push_back(Entry); // FIXME: pass in SDLoc - TargetLowering:: - CallLoweringInfo CLI(Chain, Type::getVoidTy(*getContext()), - false, false, false, false, 0, - TLI->getLibcallCallingConv(RTLIB::MEMCPY), - /*isTailCall=*/false, - /*doesNotReturn=*/false, /*isReturnValueUsed=*/false, - getExternalSymbol(TLI->getLibcallName(RTLIB::MEMCPY), - TLI->getPointerTy()), - Args, *this, dl); + TargetLowering::CallLoweringInfo CLI(*this); + CLI.setDebugLoc(dl).setChain(Chain) + .setCallee(TLI->getLibcallCallingConv(RTLIB::MEMCPY), + Type::getVoidTy(*getContext()), + getExternalSymbol(TLI->getLibcallName(RTLIB::MEMCPY), + TLI->getPointerTy()), std::move(Args), 0) + .setDiscardResult(); std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); return CallResult.second; @@ -4110,15 +4343,13 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst, Entry.Node = Src; Args.push_back(Entry); Entry.Node = Size; Args.push_back(Entry); // FIXME: pass in SDLoc - TargetLowering:: - CallLoweringInfo CLI(Chain, Type::getVoidTy(*getContext()), - false, false, false, false, 0, - TLI->getLibcallCallingConv(RTLIB::MEMMOVE), - /*isTailCall=*/false, - /*doesNotReturn=*/false, /*isReturnValueUsed=*/false, - getExternalSymbol(TLI->getLibcallName(RTLIB::MEMMOVE), - TLI->getPointerTy()), - Args, *this, dl); + TargetLowering::CallLoweringInfo CLI(*this); + CLI.setDebugLoc(dl).setChain(Chain) + .setCallee(TLI->getLibcallCallingConv(RTLIB::MEMMOVE), + Type::getVoidTy(*getContext()), + getExternalSymbol(TLI->getLibcallName(RTLIB::MEMMOVE), + TLI->getPointerTy()), std::move(Args), 0) + .setDiscardResult(); std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); return CallResult.second; @@ -4174,31 +4405,31 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst, Entry.Ty = IntPtrTy; Entry.isSExt = false; Args.push_back(Entry); + // FIXME: pass in SDLoc - TargetLowering:: - CallLoweringInfo CLI(Chain, Type::getVoidTy(*getContext()), - false, false, false, false, 0, - TLI->getLibcallCallingConv(RTLIB::MEMSET), - /*isTailCall=*/false, - /*doesNotReturn*/false, /*isReturnValueUsed=*/false, - getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET), - TLI->getPointerTy()), - Args, *this, dl); - std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); + TargetLowering::CallLoweringInfo CLI(*this); + CLI.setDebugLoc(dl).setChain(Chain) + .setCallee(TLI->getLibcallCallingConv(RTLIB::MEMSET), + Type::getVoidTy(*getContext()), + getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET), + TLI->getPointerTy()), std::move(Args), 0) + .setDiscardResult(); + std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); return CallResult.second; } SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, - SDVTList VTList, SDValue* Ops, unsigned NumOps, + SDVTList VTList, ArrayRef<SDValue> Ops, MachineMemOperand *MMO, - AtomicOrdering Ordering, + AtomicOrdering SuccessOrdering, + AtomicOrdering FailureOrdering, SynchronizationScope SynchScope) { FoldingSetNodeID ID; ID.AddInteger(MemVT.getRawBits()); - AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps); + AddNodeIDNode(ID, Opcode, VTList, Ops); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); - void* IP = 0; + void* IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast<AtomicSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); @@ -4209,59 +4440,70 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, // the node is deallocated, but recovered when the allocator is released. // If the number of operands is less than 5 we use AtomicSDNode's internal // storage. - SDUse *DynOps = NumOps > 4 ? OperandAllocator.Allocate<SDUse>(NumOps) : 0; + unsigned NumOps = Ops.size(); + SDUse *DynOps = NumOps > 4 ? OperandAllocator.Allocate<SDUse>(NumOps) + : nullptr; SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl.getIROrder(), dl.getDebugLoc(), VTList, MemVT, - Ops, DynOps, NumOps, MMO, - Ordering, SynchScope); + Ops.data(), DynOps, NumOps, MMO, + SuccessOrdering, FailureOrdering, + SynchScope); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); } SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, - SDValue Chain, SDValue Ptr, SDValue Cmp, - SDValue Swp, MachinePointerInfo PtrInfo, - unsigned Alignment, + SDVTList VTList, ArrayRef<SDValue> Ops, + MachineMemOperand *MMO, AtomicOrdering Ordering, SynchronizationScope SynchScope) { + return getAtomic(Opcode, dl, MemVT, VTList, Ops, MMO, Ordering, + Ordering, SynchScope); +} + +SDValue SelectionDAG::getAtomicCmpSwap( + unsigned Opcode, SDLoc dl, EVT MemVT, SDVTList VTs, SDValue Chain, + SDValue Ptr, SDValue Cmp, SDValue Swp, MachinePointerInfo PtrInfo, + unsigned Alignment, AtomicOrdering SuccessOrdering, + AtomicOrdering FailureOrdering, SynchronizationScope SynchScope) { + assert(Opcode == ISD::ATOMIC_CMP_SWAP || + Opcode == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS); + assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types"); + if (Alignment == 0) // Ensure that codegen never sees alignment 0 Alignment = getEVTAlignment(MemVT); MachineFunction &MF = getMachineFunction(); - // All atomics are load and store, except for ATMOIC_LOAD and ATOMIC_STORE. - // For now, atomics are considered to be volatile always. // FIXME: Volatile isn't really correct; we should keep track of atomic // orderings in the memoperand. unsigned Flags = MachineMemOperand::MOVolatile; - if (Opcode != ISD::ATOMIC_STORE) - Flags |= MachineMemOperand::MOLoad; - if (Opcode != ISD::ATOMIC_LOAD) - Flags |= MachineMemOperand::MOStore; + Flags |= MachineMemOperand::MOLoad; + Flags |= MachineMemOperand::MOStore; MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment); - return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Cmp, Swp, MMO, - Ordering, SynchScope); + return getAtomicCmpSwap(Opcode, dl, MemVT, VTs, Chain, Ptr, Cmp, Swp, MMO, + SuccessOrdering, FailureOrdering, SynchScope); } -SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, - SDValue Chain, - SDValue Ptr, SDValue Cmp, - SDValue Swp, MachineMemOperand *MMO, - AtomicOrdering Ordering, - SynchronizationScope SynchScope) { - assert(Opcode == ISD::ATOMIC_CMP_SWAP && "Invalid Atomic Op"); +SDValue SelectionDAG::getAtomicCmpSwap(unsigned Opcode, SDLoc dl, EVT MemVT, + SDVTList VTs, SDValue Chain, SDValue Ptr, + SDValue Cmp, SDValue Swp, + MachineMemOperand *MMO, + AtomicOrdering SuccessOrdering, + AtomicOrdering FailureOrdering, + SynchronizationScope SynchScope) { + assert(Opcode == ISD::ATOMIC_CMP_SWAP || + Opcode == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS); assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types"); - EVT VT = Cmp.getValueType(); - - SDVTList VTs = getVTList(VT, MVT::Other); SDValue Ops[] = {Chain, Ptr, Cmp, Swp}; - return getAtomic(Opcode, dl, MemVT, VTs, Ops, 4, MMO, Ordering, SynchScope); + return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO, + SuccessOrdering, FailureOrdering, SynchScope); } SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, @@ -4320,38 +4562,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, SDVTList VTs = Opcode == ISD::ATOMIC_STORE ? getVTList(MVT::Other) : getVTList(VT, MVT::Other); SDValue Ops[] = {Chain, Ptr, Val}; - return getAtomic(Opcode, dl, MemVT, VTs, Ops, 3, MMO, Ordering, SynchScope); -} - -SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, - EVT VT, SDValue Chain, - SDValue Ptr, - const Value* PtrVal, - unsigned Alignment, - AtomicOrdering Ordering, - SynchronizationScope SynchScope) { - if (Alignment == 0) // Ensure that codegen never sees alignment 0 - Alignment = getEVTAlignment(MemVT); - - MachineFunction &MF = getMachineFunction(); - // An atomic store does not load. An atomic load does not store. - // (An atomicrmw obviously both loads and stores.) - // For now, atomics are considered to be volatile always, and they are - // chained as such. - // FIXME: Volatile isn't really correct; we should keep track of atomic - // orderings in the memoperand. - unsigned Flags = MachineMemOperand::MOVolatile; - if (Opcode != ISD::ATOMIC_STORE) - Flags |= MachineMemOperand::MOLoad; - if (Opcode != ISD::ATOMIC_LOAD) - Flags |= MachineMemOperand::MOStore; - - MachineMemOperand *MMO = - MF.getMachineMemOperand(MachinePointerInfo(PtrVal), Flags, - MemVT.getStoreSize(), Alignment); - - return getAtomic(Opcode, dl, MemVT, VT, Chain, Ptr, MMO, - Ordering, SynchScope); + return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO, Ordering, SynchScope); } SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, @@ -4364,38 +4575,24 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, SDVTList VTs = getVTList(VT, MVT::Other); SDValue Ops[] = {Chain, Ptr}; - return getAtomic(Opcode, dl, MemVT, VTs, Ops, 2, MMO, Ordering, SynchScope); + return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO, Ordering, SynchScope); } /// getMergeValues - Create a MERGE_VALUES node from the given operands. -SDValue SelectionDAG::getMergeValues(const SDValue *Ops, unsigned NumOps, - SDLoc dl) { - if (NumOps == 1) +SDValue SelectionDAG::getMergeValues(ArrayRef<SDValue> Ops, SDLoc dl) { + if (Ops.size() == 1) return Ops[0]; SmallVector<EVT, 4> VTs; - VTs.reserve(NumOps); - for (unsigned i = 0; i < NumOps; ++i) + VTs.reserve(Ops.size()); + for (unsigned i = 0; i < Ops.size(); ++i) VTs.push_back(Ops[i].getValueType()); - return getNode(ISD::MERGE_VALUES, dl, getVTList(&VTs[0], NumOps), - Ops, NumOps); -} - -SDValue -SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, - const EVT *VTs, unsigned NumVTs, - const SDValue *Ops, unsigned NumOps, - EVT MemVT, MachinePointerInfo PtrInfo, - unsigned Align, bool Vol, - bool ReadMem, bool WriteMem) { - return getMemIntrinsicNode(Opcode, dl, makeVTList(VTs, NumVTs), Ops, NumOps, - MemVT, PtrInfo, Align, Vol, - ReadMem, WriteMem); + return getNode(ISD::MERGE_VALUES, dl, getVTList(VTs), Ops); } SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList, - const SDValue *Ops, unsigned NumOps, + ArrayRef<SDValue> Ops, EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align, bool Vol, bool ReadMem, bool WriteMem) { @@ -4413,13 +4610,13 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList, MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Align); - return getMemIntrinsicNode(Opcode, dl, VTList, Ops, NumOps, MemVT, MMO); + return getMemIntrinsicNode(Opcode, dl, VTList, Ops, MemVT, MMO); } SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList, - const SDValue *Ops, unsigned NumOps, - EVT MemVT, MachineMemOperand *MMO) { + ArrayRef<SDValue> Ops, EVT MemVT, + MachineMemOperand *MMO) { assert((Opcode == ISD::INTRINSIC_VOID || Opcode == ISD::INTRINSIC_W_CHAIN || Opcode == ISD::PREFETCH || @@ -4433,9 +4630,9 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList, MemIntrinsicSDNode *N; if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) { FoldingSetNodeID ID; - AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps); + AddNodeIDNode(ID, Opcode, VTList, Ops); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); - void *IP = 0; + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast<MemIntrinsicSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); @@ -4443,12 +4640,12 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList, N = new (NodeAllocator) MemIntrinsicSDNode(Opcode, dl.getIROrder(), dl.getDebugLoc(), VTList, Ops, - NumOps, MemVT, MMO); + MemVT, MMO); CSEMap.InsertNode(N, IP); } else { N = new (NodeAllocator) MemIntrinsicSDNode(Opcode, dl.getIROrder(), dl.getDebugLoc(), VTList, Ops, - NumOps, MemVT, MMO); + MemVT, MMO); } AllNodes.push_back(N); return SDValue(N, 0); @@ -4511,7 +4708,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, // If we don't have a PtrInfo, infer the trivial frame index case to simplify // clients. - if (PtrInfo.V == 0) + if (PtrInfo.V.isNull()) PtrInfo = InferPointerInfo(Ptr, Offset); MachineFunction &MF = getMachineFunction(); @@ -4551,13 +4748,13 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, getVTList(VT, Ptr.getValueType(), MVT::Other) : getVTList(VT, MVT::Other); SDValue Ops[] = { Chain, Ptr, Offset }; FoldingSetNodeID ID; - AddNodeIDNode(ID, ISD::LOAD, VTs, Ops, 3); + AddNodeIDNode(ID, ISD::LOAD, VTs, Ops); ID.AddInteger(MemVT.getRawBits()); ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, MMO->isVolatile(), MMO->isNonTemporal(), MMO->isInvariant())); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); - void *IP = 0; + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast<LoadSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); @@ -4638,7 +4835,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, SDLoc dl, SDValue Val, if (isNonTemporal) Flags |= MachineMemOperand::MONonTemporal; - if (PtrInfo.V == 0) + if (PtrInfo.V.isNull()) PtrInfo = InferPointerInfo(Ptr); MachineFunction &MF = getMachineFunction(); @@ -4659,12 +4856,12 @@ SDValue SelectionDAG::getStore(SDValue Chain, SDLoc dl, SDValue Val, SDValue Undef = getUNDEF(Ptr.getValueType()); SDValue Ops[] = { Chain, Val, Ptr, Undef }; FoldingSetNodeID ID; - AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4); + AddNodeIDNode(ID, ISD::STORE, VTs, Ops); ID.AddInteger(VT.getRawBits()); ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(), MMO->isNonTemporal(), MMO->isInvariant())); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); - void *IP = 0; + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast<StoreSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); @@ -4693,7 +4890,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, SDLoc dl, SDValue Val, if (isNonTemporal) Flags |= MachineMemOperand::MONonTemporal; - if (PtrInfo.V == 0) + if (PtrInfo.V.isNull()) PtrInfo = InferPointerInfo(Ptr); MachineFunction &MF = getMachineFunction(); @@ -4728,12 +4925,12 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, SDLoc dl, SDValue Val, SDValue Undef = getUNDEF(Ptr.getValueType()); SDValue Ops[] = { Chain, Val, Ptr, Undef }; FoldingSetNodeID ID; - AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4); + AddNodeIDNode(ID, ISD::STORE, VTs, Ops); ID.AddInteger(SVT.getRawBits()); ID.AddInteger(encodeMemSDNodeFlags(true, ISD::UNINDEXED, MMO->isVolatile(), MMO->isNonTemporal(), MMO->isInvariant())); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); - void *IP = 0; + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast<StoreSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); @@ -4755,11 +4952,11 @@ SelectionDAG::getIndexedStore(SDValue OrigStore, SDLoc dl, SDValue Base, SDVTList VTs = getVTList(Base.getValueType(), MVT::Other); SDValue Ops[] = { ST->getChain(), ST->getValue(), Base, Offset }; FoldingSetNodeID ID; - AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4); + AddNodeIDNode(ID, ISD::STORE, VTs, Ops); ID.AddInteger(ST->getMemoryVT().getRawBits()); ID.AddInteger(ST->getRawSubclassData()); ID.AddInteger(ST->getPointerInfo().getAddrSpace()); - void *IP = 0; + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -4778,14 +4975,14 @@ SDValue SelectionDAG::getVAArg(EVT VT, SDLoc dl, SDValue SV, unsigned Align) { SDValue Ops[] = { Chain, Ptr, SV, getTargetConstant(Align, MVT::i32) }; - return getNode(ISD::VAARG, dl, getVTList(VT, MVT::Other), Ops, 4); + return getNode(ISD::VAARG, dl, getVTList(VT, MVT::Other), Ops); } SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, - const SDUse *Ops, unsigned NumOps) { - switch (NumOps) { + ArrayRef<SDUse> Ops) { + switch (Ops.size()) { case 0: return getNode(Opcode, DL, VT); - case 1: return getNode(Opcode, DL, VT, Ops[0]); + case 1: return getNode(Opcode, DL, VT, static_cast<const SDValue>(Ops[0])); case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1]); case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2]); default: break; @@ -4793,12 +4990,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, // Copy from an SDUse array into an SDValue array for use with // the regular getNode logic. - SmallVector<SDValue, 8> NewOps(Ops, Ops + NumOps); - return getNode(Opcode, DL, VT, &NewOps[0], NumOps); + SmallVector<SDValue, 8> NewOps(Ops.begin(), Ops.end()); + return getNode(Opcode, DL, VT, NewOps); } SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, - const SDValue *Ops, unsigned NumOps) { + ArrayRef<SDValue> Ops) { + unsigned NumOps = Ops.size(); switch (NumOps) { case 0: return getNode(Opcode, DL, VT); case 1: return getNode(Opcode, DL, VT, Ops[0]); @@ -4833,18 +5031,18 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, if (VT != MVT::Glue) { FoldingSetNodeID ID; - AddNodeIDNode(ID, Opcode, VTs, Ops, NumOps); - void *IP = 0; + AddNodeIDNode(ID, Opcode, VTs, Ops); + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), - VTs, Ops, NumOps); + VTs, Ops); CSEMap.InsertNode(N, IP); } else { N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), - VTs, Ops, NumOps); + VTs, Ops); } AllNodes.push_back(N); @@ -4855,24 +5053,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, } SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, - ArrayRef<EVT> ResultTys, - const SDValue *Ops, unsigned NumOps) { - return getNode(Opcode, DL, getVTList(&ResultTys[0], ResultTys.size()), - Ops, NumOps); -} - -SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, - const EVT *VTs, unsigned NumVTs, - const SDValue *Ops, unsigned NumOps) { - if (NumVTs == 1) - return getNode(Opcode, DL, VTs[0], Ops, NumOps); - return getNode(Opcode, DL, makeVTList(VTs, NumVTs), Ops, NumOps); + ArrayRef<EVT> ResultTys, ArrayRef<SDValue> Ops) { + return getNode(Opcode, DL, getVTList(ResultTys), Ops); } SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, - const SDValue *Ops, unsigned NumOps) { + ArrayRef<SDValue> Ops) { if (VTList.NumVTs == 1) - return getNode(Opcode, DL, VTList.VTs[0], Ops, NumOps); + return getNode(Opcode, DL, VTList.VTs[0], Ops); #if 0 switch (Opcode) { @@ -4899,10 +5087,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, // Memoize the node unless it returns a flag. SDNode *N; + unsigned NumOps = Ops.size(); if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) { FoldingSetNodeID ID; - AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps); - void *IP = 0; + AddNodeIDNode(ID, Opcode, VTList, Ops); + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -4919,7 +5108,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, Ops[1], Ops[2]); } else { N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), - VTList, Ops, NumOps); + VTList, Ops); } CSEMap.InsertNode(N, IP); } else { @@ -4936,7 +5125,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, Ops[1], Ops[2]); } else { N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), - VTList, Ops, NumOps); + VTList, Ops); } } AllNodes.push_back(N); @@ -4947,39 +5136,39 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, } SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList) { - return getNode(Opcode, DL, VTList, 0, 0); + return getNode(Opcode, DL, VTList, ArrayRef<SDValue>()); } SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, SDValue N1) { SDValue Ops[] = { N1 }; - return getNode(Opcode, DL, VTList, Ops, 1); + return getNode(Opcode, DL, VTList, Ops); } SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, SDValue N1, SDValue N2) { SDValue Ops[] = { N1, N2 }; - return getNode(Opcode, DL, VTList, Ops, 2); + return getNode(Opcode, DL, VTList, Ops); } SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, SDValue N1, SDValue N2, SDValue N3) { SDValue Ops[] = { N1, N2, N3 }; - return getNode(Opcode, DL, VTList, Ops, 3); + return getNode(Opcode, DL, VTList, Ops); } SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, SDValue N1, SDValue N2, SDValue N3, SDValue N4) { SDValue Ops[] = { N1, N2, N3, N4 }; - return getNode(Opcode, DL, VTList, Ops, 4); + return getNode(Opcode, DL, VTList, Ops); } SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, SDValue N1, SDValue N2, SDValue N3, SDValue N4, SDValue N5) { SDValue Ops[] = { N1, N2, N3, N4, N5 }; - return getNode(Opcode, DL, VTList, Ops, 5); + return getNode(Opcode, DL, VTList, Ops); } SDVTList SelectionDAG::getVTList(EVT VT) { @@ -4992,9 +5181,9 @@ SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2) { ID.AddInteger(VT1.getRawBits()); ID.AddInteger(VT2.getRawBits()); - void *IP = 0; + void *IP = nullptr; SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP); - if (Result == NULL) { + if (!Result) { EVT *Array = Allocator.Allocate<EVT>(2); Array[0] = VT1; Array[1] = VT2; @@ -5011,9 +5200,9 @@ SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3) { ID.AddInteger(VT2.getRawBits()); ID.AddInteger(VT3.getRawBits()); - void *IP = 0; + void *IP = nullptr; SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP); - if (Result == NULL) { + if (!Result) { EVT *Array = Allocator.Allocate<EVT>(3); Array[0] = VT1; Array[1] = VT2; @@ -5032,9 +5221,9 @@ SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3, EVT VT4) { ID.AddInteger(VT3.getRawBits()); ID.AddInteger(VT4.getRawBits()); - void *IP = 0; + void *IP = nullptr; SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP); - if (Result == NULL) { + if (!Result) { EVT *Array = Allocator.Allocate<EVT>(4); Array[0] = VT1; Array[1] = VT2; @@ -5046,18 +5235,19 @@ SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3, EVT VT4) { return Result->getSDVTList(); } -SDVTList SelectionDAG::getVTList(const EVT *VTs, unsigned NumVTs) { +SDVTList SelectionDAG::getVTList(ArrayRef<EVT> VTs) { + unsigned NumVTs = VTs.size(); FoldingSetNodeID ID; ID.AddInteger(NumVTs); for (unsigned index = 0; index < NumVTs; index++) { ID.AddInteger(VTs[index].getRawBits()); } - void *IP = 0; + void *IP = nullptr; SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP); - if (Result == NULL) { + if (!Result) { EVT *Array = Allocator.Allocate<EVT>(NumVTs); - std::copy(VTs, VTs + NumVTs, Array); + std::copy(VTs.begin(), VTs.end(), Array); Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, NumVTs); VTListMap.InsertNode(Result, IP); } @@ -5078,14 +5268,14 @@ SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op) { if (Op == N->getOperand(0)) return N; // See if the modified node already exists. - void *InsertPos = 0; + void *InsertPos = nullptr; if (SDNode *Existing = FindModifiedNodeSlot(N, Op, InsertPos)) return Existing; // Nope it doesn't. Remove the node from its current place in the maps. if (InsertPos) if (!RemoveNodeFromCSEMaps(N)) - InsertPos = 0; + InsertPos = nullptr; // Now we update the operands. N->OperandList[0].set(Op); @@ -5103,14 +5293,14 @@ SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2) { return N; // No operands changed, just return the input node. // See if the modified node already exists. - void *InsertPos = 0; + void *InsertPos = nullptr; if (SDNode *Existing = FindModifiedNodeSlot(N, Op1, Op2, InsertPos)) return Existing; // Nope it doesn't. Remove the node from its current place in the maps. if (InsertPos) if (!RemoveNodeFromCSEMaps(N)) - InsertPos = 0; + InsertPos = nullptr; // Now we update the operands. if (N->OperandList[0] != Op1) @@ -5126,25 +5316,26 @@ SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2) { SDNode *SelectionDAG:: UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2, SDValue Op3) { SDValue Ops[] = { Op1, Op2, Op3 }; - return UpdateNodeOperands(N, Ops, 3); + return UpdateNodeOperands(N, Ops); } SDNode *SelectionDAG:: UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2, SDValue Op3, SDValue Op4) { SDValue Ops[] = { Op1, Op2, Op3, Op4 }; - return UpdateNodeOperands(N, Ops, 4); + return UpdateNodeOperands(N, Ops); } SDNode *SelectionDAG:: UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2, SDValue Op3, SDValue Op4, SDValue Op5) { SDValue Ops[] = { Op1, Op2, Op3, Op4, Op5 }; - return UpdateNodeOperands(N, Ops, 5); + return UpdateNodeOperands(N, Ops); } SDNode *SelectionDAG:: -UpdateNodeOperands(SDNode *N, const SDValue *Ops, unsigned NumOps) { +UpdateNodeOperands(SDNode *N, ArrayRef<SDValue> Ops) { + unsigned NumOps = Ops.size(); assert(N->getNumOperands() == NumOps && "Update with wrong number of operands"); @@ -5161,14 +5352,14 @@ UpdateNodeOperands(SDNode *N, const SDValue *Ops, unsigned NumOps) { if (!AnyChange) return N; // See if the modified node already exists. - void *InsertPos = 0; - if (SDNode *Existing = FindModifiedNodeSlot(N, Ops, NumOps, InsertPos)) + void *InsertPos = nullptr; + if (SDNode *Existing = FindModifiedNodeSlot(N, Ops, InsertPos)) return Existing; // Nope it doesn't. Remove the node from its current place in the maps. if (InsertPos) if (!RemoveNodeFromCSEMaps(N)) - InsertPos = 0; + InsertPos = nullptr; // Now we update the operands. for (unsigned i = 0; i != NumOps; ++i) @@ -5197,14 +5388,14 @@ void SDNode::DropOperands() { SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT) { SDVTList VTs = getVTList(VT); - return SelectNodeTo(N, MachineOpc, VTs, 0, 0); + return SelectNodeTo(N, MachineOpc, VTs, None); } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT, SDValue Op1) { SDVTList VTs = getVTList(VT); SDValue Ops[] = { Op1 }; - return SelectNodeTo(N, MachineOpc, VTs, Ops, 1); + return SelectNodeTo(N, MachineOpc, VTs, Ops); } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, @@ -5212,7 +5403,7 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, SDValue Op2) { SDVTList VTs = getVTList(VT); SDValue Ops[] = { Op1, Op2 }; - return SelectNodeTo(N, MachineOpc, VTs, Ops, 2); + return SelectNodeTo(N, MachineOpc, VTs, Ops); } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, @@ -5220,41 +5411,39 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, SDValue Op2, SDValue Op3) { SDVTList VTs = getVTList(VT); SDValue Ops[] = { Op1, Op2, Op3 }; - return SelectNodeTo(N, MachineOpc, VTs, Ops, 3); + return SelectNodeTo(N, MachineOpc, VTs, Ops); } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, - EVT VT, const SDValue *Ops, - unsigned NumOps) { + EVT VT, ArrayRef<SDValue> Ops) { SDVTList VTs = getVTList(VT); - return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps); + return SelectNodeTo(N, MachineOpc, VTs, Ops); } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, - EVT VT1, EVT VT2, const SDValue *Ops, - unsigned NumOps) { + EVT VT1, EVT VT2, ArrayRef<SDValue> Ops) { SDVTList VTs = getVTList(VT1, VT2); - return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps); + return SelectNodeTo(N, MachineOpc, VTs, Ops); } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT1, EVT VT2) { SDVTList VTs = getVTList(VT1, VT2); - return SelectNodeTo(N, MachineOpc, VTs, (SDValue *)0, 0); + return SelectNodeTo(N, MachineOpc, VTs, None); } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT1, EVT VT2, EVT VT3, - const SDValue *Ops, unsigned NumOps) { + ArrayRef<SDValue> Ops) { SDVTList VTs = getVTList(VT1, VT2, VT3); - return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps); + return SelectNodeTo(N, MachineOpc, VTs, Ops); } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT1, EVT VT2, EVT VT3, EVT VT4, - const SDValue *Ops, unsigned NumOps) { + ArrayRef<SDValue> Ops) { SDVTList VTs = getVTList(VT1, VT2, VT3, VT4); - return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps); + return SelectNodeTo(N, MachineOpc, VTs, Ops); } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, @@ -5262,7 +5451,7 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, SDValue Op1) { SDVTList VTs = getVTList(VT1, VT2); SDValue Ops[] = { Op1 }; - return SelectNodeTo(N, MachineOpc, VTs, Ops, 1); + return SelectNodeTo(N, MachineOpc, VTs, Ops); } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, @@ -5270,7 +5459,7 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, SDValue Op1, SDValue Op2) { SDVTList VTs = getVTList(VT1, VT2); SDValue Ops[] = { Op1, Op2 }; - return SelectNodeTo(N, MachineOpc, VTs, Ops, 2); + return SelectNodeTo(N, MachineOpc, VTs, Ops); } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, @@ -5279,7 +5468,7 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, SDValue Op3) { SDVTList VTs = getVTList(VT1, VT2); SDValue Ops[] = { Op1, Op2, Op3 }; - return SelectNodeTo(N, MachineOpc, VTs, Ops, 3); + return SelectNodeTo(N, MachineOpc, VTs, Ops); } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, @@ -5288,13 +5477,12 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, SDValue Op3) { SDVTList VTs = getVTList(VT1, VT2, VT3); SDValue Ops[] = { Op1, Op2, Op3 }; - return SelectNodeTo(N, MachineOpc, VTs, Ops, 3); + return SelectNodeTo(N, MachineOpc, VTs, Ops); } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, - SDVTList VTs, const SDValue *Ops, - unsigned NumOps) { - N = MorphNodeTo(N, ~MachineOpc, VTs, Ops, NumOps); + SDVTList VTs,ArrayRef<SDValue> Ops) { + N = MorphNodeTo(N, ~MachineOpc, VTs, Ops); // Reset the NodeID to -1. N->setNodeId(-1); return N; @@ -5331,19 +5519,19 @@ SDNode *SelectionDAG::UpdadeSDLocOnMergedSDNode(SDNode *N, SDLoc OLoc) { /// the node's users. /// SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, - SDVTList VTs, const SDValue *Ops, - unsigned NumOps) { + SDVTList VTs, ArrayRef<SDValue> Ops) { + unsigned NumOps = Ops.size(); // If an identical node already exists, use it. - void *IP = 0; + void *IP = nullptr; if (VTs.VTs[VTs.NumVTs-1] != MVT::Glue) { FoldingSetNodeID ID; - AddNodeIDNode(ID, Opc, VTs, Ops, NumOps); + AddNodeIDNode(ID, Opc, VTs, Ops); if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP)) return UpdadeSDLocOnMergedSDNode(ON, SDLoc(N)); } if (!RemoveNodeFromCSEMaps(N)) - IP = 0; + IP = nullptr; // Start the morphing. N->NodeType = Opc; @@ -5363,7 +5551,7 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, if (MachineSDNode *MN = dyn_cast<MachineSDNode>(N)) { // Initialize the memory references information. - MN->setMemRefs(0, 0); + MN->setMemRefs(nullptr, nullptr); // If NumOps is larger than the # of operands we can have in a // MachineSDNode, reallocate the operand list. if (NumOps > MN->NumOperands || !MN->OperandsNeedDelete) { @@ -5374,22 +5562,22 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, // remainder of the current SelectionDAG iteration, so we can allocate // the operands directly out of a pool with no recycling metadata. MN->InitOperands(OperandAllocator.Allocate<SDUse>(NumOps), - Ops, NumOps); + Ops.data(), NumOps); else - MN->InitOperands(MN->LocalOperands, Ops, NumOps); + MN->InitOperands(MN->LocalOperands, Ops.data(), NumOps); MN->OperandsNeedDelete = false; } else - MN->InitOperands(MN->OperandList, Ops, NumOps); + MN->InitOperands(MN->OperandList, Ops.data(), NumOps); } else { // If NumOps is larger than the # of operands we currently have, reallocate // the operand list. if (NumOps > N->NumOperands) { if (N->OperandsNeedDelete) delete[] N->OperandList; - N->InitOperands(new SDUse[NumOps], Ops, NumOps); + N->InitOperands(new SDUse[NumOps], Ops.data(), NumOps); N->OperandsNeedDelete = true; } else - N->InitOperands(N->OperandList, Ops, NumOps); + N->InitOperands(N->OperandList, Ops.data(), NumOps); } // Delete any nodes that are still dead after adding the uses for the @@ -5528,7 +5716,7 @@ MachineSDNode * SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, ArrayRef<EVT> ResultTys, ArrayRef<SDValue> Ops) { - SDVTList VTs = getVTList(&ResultTys[0], ResultTys.size()); + SDVTList VTs = getVTList(ResultTys); return getMachineNode(Opcode, dl, VTs, Ops); } @@ -5537,14 +5725,14 @@ SelectionDAG::getMachineNode(unsigned Opcode, SDLoc DL, SDVTList VTs, ArrayRef<SDValue> OpsArray) { bool DoCSE = VTs.VTs[VTs.NumVTs-1] != MVT::Glue; MachineSDNode *N; - void *IP = 0; + void *IP = nullptr; const SDValue *Ops = OpsArray.data(); unsigned NumOps = OpsArray.size(); if (DoCSE) { FoldingSetNodeID ID; - AddNodeIDNode(ID, ~Opcode, VTs, Ops, NumOps); - IP = 0; + AddNodeIDNode(ID, ~Opcode, VTs, OpsArray); + IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { return cast<MachineSDNode>(UpdadeSDLocOnMergedSDNode(E, DL)); } @@ -5600,34 +5788,42 @@ SelectionDAG::getTargetInsertSubreg(int SRIdx, SDLoc DL, EVT VT, /// getNodeIfExists - Get the specified node if it's already available, or /// else return NULL. SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList, - const SDValue *Ops, unsigned NumOps) { - if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) { + ArrayRef<SDValue> Ops, bool nuw, bool nsw, + bool exact) { + if (VTList.VTs[VTList.NumVTs - 1] != MVT::Glue) { FoldingSetNodeID ID; - AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps); - void *IP = 0; + AddNodeIDNode(ID, Opcode, VTList, Ops); + if (isBinOpWithFlags(Opcode)) + AddBinaryNodeIDCustom(ID, nuw, nsw, exact); + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return E; } - return NULL; + return nullptr; } /// getDbgValue - Creates a SDDbgValue node. /// +/// SDNode SDDbgValue * -SelectionDAG::getDbgValue(MDNode *MDPtr, SDNode *N, unsigned R, uint64_t Off, +SelectionDAG::getDbgValue(MDNode *MDPtr, SDNode *N, unsigned R, + bool IsIndirect, uint64_t Off, DebugLoc DL, unsigned O) { - return new (Allocator) SDDbgValue(MDPtr, N, R, Off, DL, O); + return new (Allocator) SDDbgValue(MDPtr, N, R, IsIndirect, Off, DL, O); } +/// Constant SDDbgValue * -SelectionDAG::getDbgValue(MDNode *MDPtr, const Value *C, uint64_t Off, - DebugLoc DL, unsigned O) { +SelectionDAG::getConstantDbgValue(MDNode *MDPtr, const Value *C, + uint64_t Off, + DebugLoc DL, unsigned O) { return new (Allocator) SDDbgValue(MDPtr, C, Off, DL, O); } +/// FrameIndex SDDbgValue * -SelectionDAG::getDbgValue(MDNode *MDPtr, unsigned FI, uint64_t Off, - DebugLoc DL, unsigned O) { +SelectionDAG::getFrameIndexDbgValue(MDNode *MDPtr, unsigned FI, uint64_t Off, + DebugLoc DL, unsigned O) { return new (Allocator) SDDbgValue(MDPtr, FI, Off, DL, O); } @@ -5641,7 +5837,7 @@ class RAUWUpdateListener : public SelectionDAG::DAGUpdateListener { SDNode::use_iterator &UI; SDNode::use_iterator &UE; - virtual void NodeDeleted(SDNode *N, SDNode *E) { + void NodeDeleted(SDNode *N, SDNode *E) override { // Increment the iterator as needed. while (UI != UE && N == *UI) ++UI; @@ -5945,7 +6141,7 @@ unsigned SelectionDAG::AssignTopologicalOrder() { // count of outstanding operands. for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ) { SDNode *N = I++; - checkForCycles(N); + checkForCycles(N, this); unsigned Degree = N->getNumOperands(); if (Degree == 0) { // A node with no uses, add it to the result array immediately. @@ -5965,7 +6161,7 @@ unsigned SelectionDAG::AssignTopologicalOrder() { // such that by the time the end is reached all nodes will be sorted. for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ++I) { SDNode *N = I; - checkForCycles(N); + checkForCycles(N, this); // N is in sorted position, so all its uses have one less operand // that needs to be sorted. for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); @@ -5990,9 +6186,11 @@ unsigned SelectionDAG::AssignTopologicalOrder() { #ifndef NDEBUG SDNode *S = ++I; dbgs() << "Overran sorted position:\n"; - S->dumprFull(); + S->dumprFull(this); dbgs() << "\n"; + dbgs() << "Checking if this is due to cycles\n"; + checkForCycles(this, true); #endif - llvm_unreachable(0); + llvm_unreachable(nullptr); } } @@ -6033,6 +6231,7 @@ void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) { SDDbgValue *Dbg = *I; if (Dbg->getKind() == SDDbgValue::SDNODE) { SDDbgValue *Clone = getDbgValue(Dbg->getMDPtr(), ToNode, To.getResNo(), + Dbg->isIndirect(), Dbg->getOffset(), Dbg->getDebugLoc(), Dbg->getOrder()); ClonedDVs.push_back(Clone); @@ -6076,9 +6275,8 @@ MemSDNode::MemSDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs, } MemSDNode::MemSDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs, - const SDValue *Ops, unsigned NumOps, EVT memvt, - MachineMemOperand *mmo) - : SDNode(Opc, Order, dl, VTs, Ops, NumOps), + ArrayRef<SDValue> Ops, EVT memvt, MachineMemOperand *mmo) + : SDNode(Opc, Order, dl, VTs, Ops), MemoryVT(memvt), MMO(mmo) { SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(), MMO->isNonTemporal(), MMO->isInvariant()); @@ -6297,12 +6495,10 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { switch (N->getOpcode()) { default: - Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, - &Operands[0], Operands.size())); + Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands)); break; case ISD::VSELECT: - Scalars.push_back(getNode(ISD::SELECT, dl, EltVT, - &Operands[0], Operands.size())); + Scalars.push_back(getNode(ISD::SELECT, dl, EltVT, Operands)); break; case ISD::SHL: case ISD::SRA: @@ -6327,8 +6523,7 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { Scalars.push_back(getUNDEF(EltVT)); return getNode(ISD::BUILD_VECTOR, dl, - EVT::getVectorVT(*getContext(), EltVT, ResNE), - &Scalars[0], Scalars.size()); + EVT::getVectorVT(*getContext(), EltVT, ResNE), Scalars); } @@ -6362,8 +6557,8 @@ bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base, cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue() == Dist*Bytes) return true; - const GlobalValue *GV1 = NULL; - const GlobalValue *GV2 = NULL; + const GlobalValue *GV1 = nullptr; + const GlobalValue *GV2 = nullptr; int64_t Offset1 = 0; int64_t Offset2 = 0; const TargetLowering *TLI = TM.getTargetLowering(); @@ -6385,8 +6580,8 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) { unsigned PtrWidth = TLI->getPointerTypeSizeInBits(GV->getType()); APInt KnownZero(PtrWidth, 0), KnownOne(PtrWidth, 0); - llvm::ComputeMaskedBits(const_cast<GlobalValue*>(GV), KnownZero, KnownOne, - TLI->getDataLayout()); + llvm::computeKnownBits(const_cast<GlobalValue*>(GV), KnownZero, KnownOne, + TLI->getDataLayout()); unsigned AlignBits = KnownZero.countTrailingOnes(); unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0; if (Align) @@ -6448,6 +6643,22 @@ SelectionDAG::SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, return std::make_pair(Lo, Hi); } +void SelectionDAG::ExtractVectorElements(SDValue Op, + SmallVectorImpl<SDValue> &Args, + unsigned Start, unsigned Count) { + EVT VT = Op.getValueType(); + if (Count == 0) + Count = VT.getVectorNumElements(); + + EVT EltVT = VT.getVectorElementType(); + EVT IdxTy = TLI->getVectorIdxTy(); + SDLoc SL(Op); + for (unsigned i = Start, e = Start + Count; i != e; ++i) { + Args.push_back(getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT, + Op, getConstant(i, IdxTy))); + } +} + // getAddressSpace - Return the address space this GlobalAddress belongs to. unsigned GlobalAddressSDNode::getAddressSpace() const { return getGlobal()->getType()->getAddressSpace(); @@ -6465,7 +6676,7 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits, - bool isBigEndian) { + bool isBigEndian) const { EVT VT = getValueType(0); assert(VT.isVector() && "Expected a vector type"); unsigned sz = VT.getSizeInBits(); @@ -6526,6 +6737,54 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, return true; } +SDValue BuildVectorSDNode::getSplatValue(BitVector *UndefElements) const { + if (UndefElements) { + UndefElements->clear(); + UndefElements->resize(getNumOperands()); + } + SDValue Splatted; + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + SDValue Op = getOperand(i); + if (Op.getOpcode() == ISD::UNDEF) { + if (UndefElements) + (*UndefElements)[i] = true; + } else if (!Splatted) { + Splatted = Op; + } else if (Splatted != Op) { + return SDValue(); + } + } + + if (!Splatted) { + assert(getOperand(0).getOpcode() == ISD::UNDEF && + "Can only have a splat without a constant for all undefs."); + return getOperand(0); + } + + return Splatted; +} + +ConstantSDNode * +BuildVectorSDNode::getConstantSplatNode(BitVector *UndefElements) const { + return dyn_cast_or_null<ConstantSDNode>( + getSplatValue(UndefElements).getNode()); +} + +ConstantFPSDNode * +BuildVectorSDNode::getConstantFPSplatNode(BitVector *UndefElements) const { + return dyn_cast_or_null<ConstantFPSDNode>( + getSplatValue(UndefElements).getNode()); +} + +bool BuildVectorSDNode::isConstant() const { + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + unsigned Opc = getOperand(i).getOpcode(); + if (Opc != ISD::UNDEF && Opc != ISD::Constant && Opc != ISD::ConstantFP) + return false; + } + return true; +} + bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) { // Find the first non-undef value in the shuffle mask. unsigned i, e; @@ -6542,10 +6801,11 @@ bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) { return true; } -#ifdef XDEBUG +#ifndef NDEBUG static void checkForCyclesHelper(const SDNode *N, SmallPtrSet<const SDNode*, 32> &Visited, - SmallPtrSet<const SDNode*, 32> &Checked) { + SmallPtrSet<const SDNode*, 32> &Checked, + const llvm::SelectionDAG *DAG) { // If this node has already been checked, don't check it again. if (Checked.count(N)) return; @@ -6553,29 +6813,37 @@ static void checkForCyclesHelper(const SDNode *N, // If a node has already been visited on this depth-first walk, reject it as // a cycle. if (!Visited.insert(N)) { - dbgs() << "Offending node:\n"; - N->dumprFull(); errs() << "Detected cycle in SelectionDAG\n"; + dbgs() << "Offending node:\n"; + N->dumprFull(DAG); dbgs() << "\n"; abort(); } for(unsigned i = 0, e = N->getNumOperands(); i != e; ++i) - checkForCyclesHelper(N->getOperand(i).getNode(), Visited, Checked); + checkForCyclesHelper(N->getOperand(i).getNode(), Visited, Checked, DAG); Checked.insert(N); Visited.erase(N); } #endif -void llvm::checkForCycles(const llvm::SDNode *N) { +void llvm::checkForCycles(const llvm::SDNode *N, + const llvm::SelectionDAG *DAG, + bool force) { +#ifndef NDEBUG + bool check = force; #ifdef XDEBUG - assert(N && "Checking nonexistent SDNode"); - SmallPtrSet<const SDNode*, 32> visited; - SmallPtrSet<const SDNode*, 32> checked; - checkForCyclesHelper(N, visited, checked); -#endif + check = true; +#endif // XDEBUG + if (check) { + assert(N && "Checking nonexistent SDNode"); + SmallPtrSet<const SDNode*, 32> visited; + SmallPtrSet<const SDNode*, 32> checked; + checkForCyclesHelper(N, visited, checked, DAG); + } +#endif // !NDEBUG } -void llvm::checkForCycles(const llvm::SelectionDAG *DAG) { - checkForCycles(DAG->getRoot().getNode()); +void llvm::checkForCycles(const llvm::SelectionDAG *DAG, bool force) { + checkForCycles(DAG->getRoot().getNode(), DAG, force); } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 41662a9..c07b5e6 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "isel" #include "SelectionDAGBuilder.h" #include "SDNodeDbgValue.h" #include "llvm/ADT/BitVector.h" @@ -34,10 +33,10 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/StackMaps.h" -#include "llvm/DebugInfo.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" @@ -62,6 +61,8 @@ #include <algorithm> using namespace llvm; +#define DEBUG_TYPE "isel" + /// LimitFloatPrecision - Generate low-precision inline sequences for /// some float libcalls (6, 8 or 12 bits). static unsigned LimitFloatPrecision; @@ -168,7 +169,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL, SDValue Lo, Hi; Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]); Hi = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[1]); - if (TLI.isBigEndian()) + if (TLI.hasBigEndianPartOrdering(ValueVT)) std::swap(Lo, Hi); Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi); } else { @@ -214,6 +215,20 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL, llvm_unreachable("Unknown mismatch!"); } +static void diagnosePossiblyInvalidConstraint(LLVMContext &Ctx, const Value *V, + const Twine &ErrMsg) { + const Instruction *I = dyn_cast_or_null<Instruction>(V); + if (!V) + return Ctx.emitError(ErrMsg); + + const char *AsmError = ", possible invalid constraint for vector type"; + if (const CallInst *CI = dyn_cast<CallInst>(I)) + if (isa<InlineAsm>(CI->getCalledValue())) + return Ctx.emitError(I, ErrMsg + AsmError); + + return Ctx.emitError(I, ErrMsg); +} + /// getCopyFromPartsVector - Create a value that contains the specified legal /// parts combined into the value they represent. If the parts combine to a /// type larger then ValueVT then AssertOp can be used to specify whether the @@ -262,9 +277,9 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL, // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the // intermediate operands. - Val = DAG.getNode(IntermediateVT.isVector() ? - ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, DL, - ValueVT, &Ops[0], NumIntermediates); + Val = DAG.getNode(IntermediateVT.isVector() ? ISD::CONCAT_VECTORS + : ISD::BUILD_VECTOR, + DL, ValueVT, Ops); } // There is now one part, held in Val. Correct it to match ValueVT. @@ -306,16 +321,8 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL, // Handle cases such as i8 -> <1 x i1> if (ValueVT.getVectorNumElements() != 1) { - LLVMContext &Ctx = *DAG.getContext(); - Twine ErrMsg("non-trivial scalar-to-vector conversion"); - if (const Instruction *I = dyn_cast_or_null<Instruction>(V)) { - if (const CallInst *CI = dyn_cast<CallInst>(I)) - if (isa<InlineAsm>(CI->getCalledValue())) - ErrMsg = ErrMsg + ", possible invalid constraint for vector type"; - Ctx.emitError(I, ErrMsg); - } else { - Ctx.emitError(ErrMsg); - } + diagnosePossiblyInvalidConstraint(*DAG.getContext(), V, + "non-trivial scalar-to-vector conversion"); return DAG.getUNDEF(ValueVT); } @@ -397,18 +404,9 @@ static void getCopyToParts(SelectionDAG &DAG, SDLoc DL, "Failed to tile the value with PartVT!"); if (NumParts == 1) { - if (PartEVT != ValueVT) { - LLVMContext &Ctx = *DAG.getContext(); - Twine ErrMsg("scalar-to-vector conversion failed"); - if (const Instruction *I = dyn_cast_or_null<Instruction>(V)) { - if (const CallInst *CI = dyn_cast<CallInst>(I)) - if (isa<InlineAsm>(CI->getCalledValue())) - ErrMsg = ErrMsg + ", possible invalid constraint for vector type"; - Ctx.emitError(I, ErrMsg); - } else { - Ctx.emitError(ErrMsg); - } - } + if (PartEVT != ValueVT) + diagnosePossiblyInvalidConstraint(*DAG.getContext(), V, + "scalar-to-vector conversion failed"); Parts[0] = Val; return; @@ -498,7 +496,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL, e = PartVT.getVectorNumElements(); i != e; ++i) Ops.push_back(DAG.getUNDEF(ElementVT)); - Val = DAG.getNode(ISD::BUILD_VECTOR, DL, PartVT, &Ops[0], Ops.size()); + Val = DAG.getNode(ISD::BUILD_VECTOR, DL, PartVT, Ops); // FIXME: Use CONCAT for 2x -> 4x. @@ -627,16 +625,6 @@ namespace { } } - /// areValueTypesLegal - Return true if types of all the values are legal. - bool areValueTypesLegal(const TargetLowering &TLI) { - for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { - MVT RegisterVT = RegVTs[Value]; - if (!TLI.isTypeLegal(RegisterVT)) - return false; - } - return true; - } - /// append - Add the specified values to this one. void append(const RegsForValue &RHS) { ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end()); @@ -651,7 +639,7 @@ namespace { SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo, SDLoc dl, SDValue &Chain, SDValue *Flag, - const Value *V = 0) const; + const Value *V = nullptr) const; /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the /// specified value into the registers specified by this object. This uses @@ -697,7 +685,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, Parts.resize(NumRegs); for (unsigned i = 0; i != NumRegs; ++i) { SDValue P; - if (Flag == 0) { + if (!Flag) { P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT); } else { P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag); @@ -765,9 +753,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, Parts.clear(); } - return DAG.getNode(ISD::MERGE_VALUES, dl, - DAG.getVTList(&ValueVTs[0], ValueVTs.size()), - &Values[0], ValueVTs.size()); + return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(ValueVTs), Values); } /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the @@ -798,7 +784,7 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, SmallVector<SDValue, 8> Chains(NumRegs); for (unsigned i = 0; i != NumRegs; ++i) { SDValue Part; - if (Flag == 0) { + if (!Flag) { Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]); } else { Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag); @@ -821,7 +807,7 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, // = op c3, ..., f2 Chain = Chains[NumRegs-1]; else - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Chains[0], NumRegs); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains); } /// AddInlineAsmOperands - Add this value to the specified inlineasm node @@ -874,7 +860,7 @@ void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa, AA = &aa; GFI = gfi; LibInfo = li; - TD = DAG.getTarget().getDataLayout(); + DL = DAG.getTarget().getDataLayout(); Context = DAG.getContext(); LPadToCallSiteMap.clear(); } @@ -890,8 +876,9 @@ void SelectionDAGBuilder::clear() { UnusedArgNodeMap.clear(); PendingLoads.clear(); PendingExports.clear(); - CurInst = NULL; + CurInst = nullptr; HasTailCall = false; + SDNodeOrder = LowestSDNodeOrder; } /// clearDanglingDebugInfo - Clear the dangling debug information @@ -922,7 +909,7 @@ SDValue SelectionDAGBuilder::getRoot() { // Otherwise, we have to make a token factor node. SDValue Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, - &PendingLoads[0], PendingLoads.size()); + PendingLoads); PendingLoads.clear(); DAG.setRoot(Root); return Root; @@ -952,8 +939,7 @@ SDValue SelectionDAGBuilder::getControlRoot() { } Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, - &PendingExports[0], - PendingExports.size()); + PendingExports); PendingExports.clear(); DAG.setRoot(Root); return Root; @@ -973,7 +959,7 @@ void SelectionDAGBuilder::visit(const Instruction &I) { if (!isa<TerminatorInst>(&I) && !HasTailCall) CopyToExportRegsIfNeeded(&I); - CurInst = NULL; + CurInst = nullptr; } void SelectionDAGBuilder::visitPHI(const PHINode &) { @@ -1003,11 +989,14 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, unsigned DbgSDNodeOrder = DDI.getSDNodeOrder(); MDNode *Variable = DI->getVariable(); uint64_t Offset = DI->getOffset(); + // A dbg.value for an alloca is always indirect. + bool IsIndirect = isa<AllocaInst>(V) || Offset != 0; SDDbgValue *SDV; if (Val.getNode()) { - if (!EmitFuncArgumentDbgValue(V, Variable, Offset, Val)) { + if (!EmitFuncArgumentDbgValue(V, Variable, Offset, IsIndirect, Val)) { SDV = DAG.getDbgValue(Variable, Val.getNode(), - Val.getResNo(), Offset, dl, DbgSDNodeOrder); + Val.getResNo(), IsIndirect, + Offset, dl, DbgSDNodeOrder); DAG.AddDbgValue(SDV, Val.getNode(), false); } } else @@ -1032,7 +1021,7 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) { RegsForValue RFV(*DAG.getContext(), *TM.getTargetLowering(), InReg, V->getType()); SDValue Chain = DAG.getEntryNode(); - N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, NULL, V); + N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); resolveDanglingDebugInfo(V, N); return N; } @@ -1103,8 +1092,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { Constants.push_back(SDValue(Val, i)); } - return DAG.getMergeValues(&Constants[0], Constants.size(), - getCurSDLoc()); + return DAG.getMergeValues(Constants, getCurSDLoc()); } if (const ConstantDataSequential *CDS = @@ -1119,9 +1107,9 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { } if (isa<ArrayType>(CDS->getType())) - return DAG.getMergeValues(&Ops[0], Ops.size(), getCurSDLoc()); + return DAG.getMergeValues(Ops, getCurSDLoc()); return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(), - VT, &Ops[0], Ops.size()); + VT, Ops); } if (C->getType()->isStructTy() || C->getType()->isArrayTy()) { @@ -1144,8 +1132,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { Constants[i] = DAG.getConstant(0, EltVT); } - return DAG.getMergeValues(&Constants[0], NumElts, - getCurSDLoc()); + return DAG.getMergeValues(Constants, getCurSDLoc()); } if (const BlockAddress *BA = dyn_cast<BlockAddress>(C)) @@ -1173,8 +1160,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { } // Create a BUILD_VECTOR node. - return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(), - VT, &Ops[0], Ops.size()); + return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(), VT, Ops); } // If this is a static alloca, generate it as the frameindex instead of @@ -1191,7 +1177,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { unsigned InReg = FuncInfo.InitializeRegForValue(Inst); RegsForValue RFV(*DAG.getContext(), *TLI, InReg, Inst->getType()); SDValue Chain = DAG.getEntryNode(); - return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, NULL, V); + return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); } llvm_unreachable("Can't get register for value!"); @@ -1235,7 +1221,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { } Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), - MVT::Other, &Chains[0], NumValues); + MVT::Other, Chains); } else if (I.getNumOperands() != 0) { SmallVector<EVT, 4> ValueVTs; ComputeValueVTs(*TLI, I.getOperand(0)->getType(), ValueVTs); @@ -1392,7 +1378,9 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, MachineBasicBlock *CurBB, - MachineBasicBlock *SwitchBB) { + MachineBasicBlock *SwitchBB, + uint32_t TWeight, + uint32_t FWeight) { const BasicBlock *BB = CurBB->getBasicBlock(); // If the leaf of the tree is a comparison, merge the condition into @@ -1416,8 +1404,8 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, llvm_unreachable("Unknown compare instruction"); } - CaseBlock CB(Condition, BOp->getOperand(0), - BOp->getOperand(1), NULL, TBB, FBB, CurBB); + CaseBlock CB(Condition, BOp->getOperand(0), BOp->getOperand(1), nullptr, + TBB, FBB, CurBB, TWeight, FWeight); SwitchCases.push_back(CB); return; } @@ -1425,17 +1413,26 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, // Create a CaseBlock record representing this branch. CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(*DAG.getContext()), - NULL, TBB, FBB, CurBB); + nullptr, TBB, FBB, CurBB, TWeight, FWeight); SwitchCases.push_back(CB); } +/// Scale down both weights to fit into uint32_t. +static void ScaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) { + uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse; + uint32_t Scale = (NewMax / UINT32_MAX) + 1; + NewTrue = NewTrue / Scale; + NewFalse = NewFalse / Scale; +} + /// FindMergedConditions - If Cond is an expression like void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB, - unsigned Opc) { + unsigned Opc, uint32_t TWeight, + uint32_t FWeight) { // If this node is not part of the or/and tree, emit it as a branch. const Instruction *BOp = dyn_cast<Instruction>(Cond); if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) || @@ -1443,7 +1440,8 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, BOp->getParent() != CurBB->getBasicBlock() || !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) || !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) { - EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB); + EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB, + TWeight, FWeight); return; } @@ -1455,6 +1453,7 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, if (Opc == Instruction::Or) { // Codegen X | Y as: + // BB1: // jmp_if_X TBB // jmp TmpBB // TmpBB: @@ -1462,14 +1461,34 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, // jmp FBB // + // We have flexibility in setting Prob for BB1 and Prob for TmpBB. + // The requirement is that + // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB) + // = TrueProb for orignal BB. + // Assuming the orignal weights are A and B, one choice is to set BB1's + // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice + // assumes that + // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB. + // Another choice is to assume TrueProb for BB1 equals to TrueProb for + // TmpBB, but the math is more complicated. + + uint64_t NewTrueWeight = TWeight; + uint64_t NewFalseWeight = (uint64_t)TWeight + 2 * (uint64_t)FWeight; + ScaleWeights(NewTrueWeight, NewFalseWeight); // Emit the LHS condition. - FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc); + FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc, + NewTrueWeight, NewFalseWeight); + NewTrueWeight = TWeight; + NewFalseWeight = 2 * (uint64_t)FWeight; + ScaleWeights(NewTrueWeight, NewFalseWeight); // Emit the RHS condition into TmpBB. - FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc); + FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc, + NewTrueWeight, NewFalseWeight); } else { assert(Opc == Instruction::And && "Unknown merge op!"); // Codegen X & Y as: + // BB1: // jmp_if_X TmpBB // jmp FBB // TmpBB: @@ -1478,11 +1497,28 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, // // This requires creation of TmpBB after CurBB. + // We have flexibility in setting Prob for BB1 and Prob for TmpBB. + // The requirement is that + // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB) + // = FalseProb for orignal BB. + // Assuming the orignal weights are A and B, one choice is to set BB1's + // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice + // assumes that + // FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB. + + uint64_t NewTrueWeight = 2 * (uint64_t)TWeight + (uint64_t)FWeight; + uint64_t NewFalseWeight = FWeight; + ScaleWeights(NewTrueWeight, NewFalseWeight); // Emit the LHS condition. - FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc); + FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc, + NewTrueWeight, NewFalseWeight); + NewTrueWeight = 2 * (uint64_t)TWeight; + NewFalseWeight = FWeight; + ScaleWeights(NewTrueWeight, NewFalseWeight); // Emit the RHS condition into TmpBB. - FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc); + FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc, + NewTrueWeight, NewFalseWeight); } } @@ -1524,7 +1560,7 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)]; // Figure out which block is immediately after the current one. - MachineBasicBlock *NextBlock = 0; + MachineBasicBlock *NextBlock = nullptr; MachineFunction::iterator BBI = BrMBB; if (++BBI != FuncInfo.MF->end()) NextBlock = BBI; @@ -1533,8 +1569,9 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { // Update machine-CFG edges. BrMBB->addSuccessor(Succ0MBB); - // If this is not a fall-through branch, emit the branch. - if (Succ0MBB != NextBlock) + // If this is not a fall-through branch or optimizations are switched off, + // emit the branch. + if (Succ0MBB != NextBlock || TM.getOptLevel() == CodeGenOpt::None) DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, getControlRoot(), DAG.getBasicBlock(Succ0MBB))); @@ -1569,7 +1606,8 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { (BOp->getOpcode() == Instruction::And || BOp->getOpcode() == Instruction::Or)) { FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, - BOp->getOpcode()); + BOp->getOpcode(), getEdgeWeight(BrMBB, Succ0MBB), + getEdgeWeight(BrMBB, Succ1MBB)); // If the compares in later blocks need to use values not currently // exported from this block, export them now. This block should always // be the first entry. @@ -1599,7 +1637,7 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { // Create a CaseBlock record representing this branch. CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()), - NULL, Succ0MBB, Succ1MBB, BrMBB); + nullptr, Succ0MBB, Succ1MBB, BrMBB); // Use visitSwitchCase to actually insert the fast branch sequence for this // cond branch. @@ -1615,7 +1653,7 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, SDLoc dl = getCurSDLoc(); // Build the setcc now. - if (CB.CmpMHS == NULL) { + if (!CB.CmpMHS) { // Fold "(X == true)" to X and "(X == false)" to !X to // handle common cases produced by branch lowering. if (CB.CmpRHS == ConstantInt::getTrue(*DAG.getContext()) && @@ -1656,7 +1694,7 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, // Set NextBlock to be the MBB immediately after the current one, if any. // This is used to avoid emitting unnecessary branches to the next block. - MachineBasicBlock *NextBlock = 0; + MachineBasicBlock *NextBlock = nullptr; MachineFunction::iterator BBI = SwitchBB; if (++BBI != FuncInfo.MF->end()) NextBlock = BBI; @@ -1734,7 +1772,7 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, // Set NextBlock to be the MBB immediately after the current one, if any. // This is used to avoid emitting unnecessary branches to the next block. - MachineBasicBlock *NextBlock = 0; + MachineBasicBlock *NextBlock = nullptr; MachineFunction::iterator BBI = SwitchBB; if (++BBI != FuncInfo.MF->end()) @@ -1817,8 +1855,8 @@ void SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) { const TargetLowering *TLI = TM.getTargetLowering(); SDValue Chain = TLI->makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, - MVT::isVoid, 0, 0, false, getCurSDLoc(), - false, false).second; + MVT::isVoid, nullptr, 0, false, + getCurSDLoc(), false, false).second; DAG.setRoot(Chain); } @@ -1865,7 +1903,7 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, // Set NextBlock to be the MBB immediately after the current one, if any. // This is used to avoid emitting unnecessary branches to the next block. - MachineBasicBlock *NextBlock = 0; + MachineBasicBlock *NextBlock = nullptr; MachineFunction::iterator BBI = SwitchBB; if (++BBI != FuncInfo.MF->end()) NextBlock = BBI; @@ -1939,7 +1977,7 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, // Set NextBlock to be the MBB immediately after the current one, if any. // This is used to avoid emitting unnecessary branches to the next block. - MachineBasicBlock *NextBlock = 0; + MachineBasicBlock *NextBlock = nullptr; MachineFunction::iterator BBI = SwitchBB; if (++BBI != FuncInfo.MF->end()) NextBlock = BBI; @@ -2019,8 +2057,7 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) { // Merge into one. SDValue Res = DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), - DAG.getVTList(&ValueVTs[0], ValueVTs.size()), - &Ops[0], 2); + DAG.getVTList(ValueVTs), Ops); setValue(&LP, Res); } @@ -2041,7 +2078,7 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, MachineFunction *CurMF = FuncInfo.MF; // Figure out which block is immediately after the current one. - MachineBasicBlock *NextBlock = 0; + MachineBasicBlock *NextBlock = nullptr; MachineFunction::iterator BBI = CR.CaseBB; if (++BBI != FuncInfo.MF->end()) @@ -2152,7 +2189,7 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, if (I->High == I->Low) { // This is just small small case range :) containing exactly 1 case CC = ISD::SETEQ; - LHS = SV; RHS = I->High; MHS = NULL; + LHS = SV; RHS = I->High; MHS = nullptr; } else { CC = ISD::SETLE; LHS = I->Low; MHS = SV; RHS = I->High; @@ -2359,7 +2396,7 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, volatile double RDensity = (double)RSize.roundToDouble() / (Last - RBegin + 1ULL).roundToDouble(); - double Metric = Range.logBase2()*(LDensity+RDensity); + volatile double Metric = Range.logBase2()*(LDensity+RDensity); // Should always split in some non-trivial place DEBUG(dbgs() <<"=>Step\n" << "LEnd: " << LEnd << ", RBegin: " << RBegin << '\n' @@ -2387,7 +2424,7 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, CaseRange LHSR(CR.Range.first, Pivot); CaseRange RHSR(Pivot, CR.Range.second); const Constant *C = Pivot->Low; - MachineBasicBlock *FalseBB = 0, *TrueBB = 0; + MachineBasicBlock *FalseBB = nullptr, *TrueBB = nullptr; // We know that we branch to the LHS if the Value being switched on is // less than the Pivot value, C. We use this to optimize our binary @@ -2429,7 +2466,7 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, // Create a CaseBlock record representing a conditional branch to // the LHS node if the value being switched on SV is less than C. // Otherwise, branch to LHS. - CaseBlock CB(ISD::SETLT, SV, C, NULL, TrueBB, FalseBB, CR.CaseBB); + CaseBlock CB(ISD::SETLT, SV, C, nullptr, TrueBB, FalseBB, CR.CaseBB); if (CR.CaseBB == SwitchBB) visitSwitchCase(CB, SwitchBB); @@ -2598,7 +2635,7 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases, if (Cases.size() >= 2) // Must recompute end() each iteration because it may be // invalidated by erase if we hold on to it - for (CaseItr I = Cases.begin(), J = llvm::next(Cases.begin()); + for (CaseItr I = Cases.begin(), J = std::next(Cases.begin()); J != Cases.end(); ) { const APInt& nextValue = cast<ConstantInt>(J->Low)->getValue(); const APInt& currentValue = cast<ConstantInt>(I->High)->getValue(); @@ -2642,7 +2679,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { MachineBasicBlock *SwitchMBB = FuncInfo.MBB; // Figure out which block is immediately after the current one. - MachineBasicBlock *NextBlock = 0; + MachineBasicBlock *NextBlock = nullptr; MachineBasicBlock *Default = FuncInfo.MBBMap[SI.getDefaultDest()]; // If there is only the default destination, branch to it if it is not the @@ -2676,7 +2713,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { // Push the initial CaseRec onto the worklist CaseRecVector WorkList; - WorkList.push_back(CaseRec(SwitchMBB,0,0, + WorkList.push_back(CaseRec(SwitchMBB,nullptr,nullptr, CaseRange(Cases.begin(),Cases.end()))); while (!WorkList.empty()) { @@ -2725,6 +2762,11 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) { getValue(I.getAddress()))); } +void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) { + if (DAG.getTarget().Options.TrapUnreachable) + DAG.setRoot(DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot())); +} + void SelectionDAGBuilder::visitFSub(const User &I) { // -0.0 - X --> fneg Type *Ty = I.getType(); @@ -2742,8 +2784,22 @@ void SelectionDAGBuilder::visitFSub(const User &I) { void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) { SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); - setValue(&I, DAG.getNode(OpCode, getCurSDLoc(), - Op1.getValueType(), Op1, Op2)); + + bool nuw = false; + bool nsw = false; + bool exact = false; + if (const OverflowingBinaryOperator *OFBinOp = + dyn_cast<const OverflowingBinaryOperator>(&I)) { + nuw = OFBinOp->hasNoUnsignedWrap(); + nsw = OFBinOp->hasNoSignedWrap(); + } + if (const PossiblyExactOperator *ExactOp = + dyn_cast<const PossiblyExactOperator>(&I)) + exact = ExactOp->isExact(); + + SDValue BinNodeValue = DAG.getNode(OpCode, getCurSDLoc(), Op1.getValueType(), + Op1, Op2, nuw, nsw, exact); + setValue(&I, BinNodeValue); } void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { @@ -2774,8 +2830,25 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { Op2 = DAG.getZExtOrTrunc(Op2, DL, MVT::i32); } - setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), - Op1.getValueType(), Op1, Op2)); + bool nuw = false; + bool nsw = false; + bool exact = false; + + if (Opcode == ISD::SRL || Opcode == ISD::SRA || Opcode == ISD::SHL) { + + if (const OverflowingBinaryOperator *OFBinOp = + dyn_cast<const OverflowingBinaryOperator>(&I)) { + nuw = OFBinOp->hasNoUnsignedWrap(); + nsw = OFBinOp->hasNoSignedWrap(); + } + if (const PossiblyExactOperator *ExactOp = + dyn_cast<const PossiblyExactOperator>(&I)) + exact = ExactOp->isExact(); + } + + SDValue Res = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(), Op1, Op2, + nuw, nsw, exact); + setValue(&I, Res); } void SelectionDAGBuilder::visitSDiv(const User &I) { @@ -2847,8 +2920,7 @@ void SelectionDAGBuilder::visitSelect(const User &I) { FalseVal.getResNo() + i)); setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), - DAG.getVTList(&ValueVTs[0], NumValues), - &Values[0], NumValues)); + DAG.getVTList(ValueVTs), Values)); } void SelectionDAGBuilder::visitTrunc(const User &I) { @@ -2944,6 +3016,13 @@ void SelectionDAGBuilder::visitBitCast(const User &I) { if (DestVT != N.getValueType()) setValue(&I, DAG.getNode(ISD::BITCAST, getCurSDLoc(), DestVT, N)); // convert types. + // Check if the original LLVM IR Operand was a ConstantInt, because getValue() + // might fold any kind of constant expression to an integer constant and that + // is not what we are looking for. Only regcognize a bitcast of a genuine + // constant integer as an opaque constant. + else if(ConstantInt *C = dyn_cast<ConstantInt>(I.getOperand(0))) + setValue(&I, DAG.getConstant(C->getValue(), DestVT, /*isTarget=*/false, + /*isOpaque*/true)); else setValue(&I, N); // noop cast. } @@ -3050,11 +3129,9 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { MOps2[0] = Src2; Src1 = Src1U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS, - getCurSDLoc(), VT, - &MOps1[0], NumConcat); + getCurSDLoc(), VT, MOps1); Src2 = Src2U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS, - getCurSDLoc(), VT, - &MOps2[0], NumConcat); + getCurSDLoc(), VT, MOps2); // Readjust mask for new input vector length. SmallVector<int, 8> MappedOps; @@ -3172,8 +3249,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { Ops.push_back(Res); } - setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(), - VT, &Ops[0], Ops.size())); + setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(), VT, Ops)); } void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { @@ -3215,8 +3291,7 @@ void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { SDValue(Agg.getNode(), Agg.getResNo() + i); setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), - DAG.getVTList(&AggValueVTs[0], NumAggValues), - &Values[0], NumAggValues)); + DAG.getVTList(AggValueVTs), Values)); } void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) { @@ -3250,8 +3325,7 @@ void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) { SDValue(Agg.getNode(), Agg.getResNo() + i); setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), - DAG.getVTList(&ValValueVTs[0], NumValValues), - &Values[0], NumValValues)); + DAG.getVTList(ValValueVTs), Values)); } void SelectionDAGBuilder::visitGetElementPtr(const User &I) { @@ -3269,7 +3343,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue(); if (Field) { // N = N + Offset - uint64_t Offset = TD->getStructLayout(StTy)->getElementOffset(Field); + uint64_t Offset = DL->getStructLayout(StTy)->getElementOffset(Field); N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N, DAG.getConstant(Offset, N.getValueType())); } @@ -3283,7 +3357,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) { if (CI->isZero()) continue; uint64_t Offs = - TD->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue(); + DL->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue(); SDValue OffsVal; EVT PTy = TLI->getPointerTy(AS); unsigned PtrBits = PTy.getSizeInBits(); @@ -3300,7 +3374,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { // N = N + Idx * ElementSize; APInt ElementSize = APInt(TLI->getPointerSizeInBits(AS), - TD->getTypeAllocSize(Ty)); + DL->getTypeAllocSize(Ty)); SDValue IdxN = getValue(Idx); // If the index is smaller or larger than intptr_t, truncate or extend @@ -3373,8 +3447,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align) }; SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other); - SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, getCurSDLoc(), - VTs, Ops, 3); + SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, getCurSDLoc(), VTs, Ops); setValue(&I, DSA); DAG.setRoot(DSA.getValue(1)); @@ -3391,8 +3464,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { Type *Ty = I.getType(); bool isVolatile = I.isVolatile(); - bool isNonTemporal = I.getMetadata("nontemporal") != 0; - bool isInvariant = I.getMetadata("invariant.load") != 0; + bool isNonTemporal = I.getMetadata("nontemporal") != nullptr; + bool isInvariant = I.getMetadata("invariant.load") != nullptr; unsigned Alignment = I.getAlignment(); const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa); const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); @@ -3406,7 +3479,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { SDValue Root; bool ConstantMemory = false; - if (I.isVolatile() || NumValues > MaxParallelChains) + if (isVolatile || NumValues > MaxParallelChains) // Serialize volatile loads with other side effects. Root = getRoot(); else if (AA->pointsToConstantMemory( @@ -3419,6 +3492,10 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { Root = DAG.getRoot(); } + const TargetLowering *TLI = TM.getTargetLowering(); + if (isVolatile) + Root = TLI->prepareVolatileOrAtomicLoad(Root, getCurSDLoc(), DAG); + SmallVector<SDValue, 4> Values(NumValues); SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains), NumValues)); @@ -3433,8 +3510,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { // (MaxParallelChains should always remain as failsafe). if (ChainI == MaxParallelChains) { assert(PendingLoads.empty() && "PendingLoads must be serialized first"); - SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), - MVT::Other, &Chains[0], ChainI); + SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, + makeArrayRef(Chains.data(), ChainI)); Root = Chain; ChainI = 0; } @@ -3451,8 +3528,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { } if (!ConstantMemory) { - SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), - MVT::Other, &Chains[0], ChainI); + SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, + makeArrayRef(Chains.data(), ChainI)); if (isVolatile) DAG.setRoot(Chain); else @@ -3460,8 +3537,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { } setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), - DAG.getVTList(&ValueVTs[0], NumValues), - &Values[0], NumValues)); + DAG.getVTList(ValueVTs), Values)); } void SelectionDAGBuilder::visitStore(const StoreInst &I) { @@ -3489,7 +3565,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { NumValues)); EVT PtrVT = Ptr.getValueType(); bool isVolatile = I.isVolatile(); - bool isNonTemporal = I.getMetadata("nontemporal") != 0; + bool isNonTemporal = I.getMetadata("nontemporal") != nullptr; unsigned Alignment = I.getAlignment(); const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa); @@ -3497,8 +3573,8 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { // See visitLoad comments. if (ChainI == MaxParallelChains) { - SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), - MVT::Other, &Chains[0], ChainI); + SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, + makeArrayRef(Chains.data(), ChainI)); Root = Chain; ChainI = 0; } @@ -3511,8 +3587,8 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { Chains[ChainI] = St; } - SDValue StoreNode = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), - MVT::Other, &Chains[0], ChainI); + SDValue StoreNode = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, + makeArrayRef(Chains.data(), ChainI)); DAG.setRoot(StoreNode); } @@ -3525,48 +3601,48 @@ static SDValue InsertFenceForAtomic(SDValue Chain, AtomicOrdering Order, if (Before) { if (Order == AcquireRelease || Order == SequentiallyConsistent) Order = Release; - else if (Order == Acquire || Order == Monotonic) + else if (Order == Acquire || Order == Monotonic || Order == Unordered) return Chain; } else { if (Order == AcquireRelease) Order = Acquire; - else if (Order == Release || Order == Monotonic) + else if (Order == Release || Order == Monotonic || Order == Unordered) return Chain; } SDValue Ops[3]; Ops[0] = Chain; Ops[1] = DAG.getConstant(Order, TLI.getPointerTy()); Ops[2] = DAG.getConstant(Scope, TLI.getPointerTy()); - return DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops, 3); + return DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops); } void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { SDLoc dl = getCurSDLoc(); - AtomicOrdering Order = I.getOrdering(); + AtomicOrdering SuccessOrder = I.getSuccessOrdering(); + AtomicOrdering FailureOrder = I.getFailureOrdering(); SynchronizationScope Scope = I.getSynchScope(); SDValue InChain = getRoot(); const TargetLowering *TLI = TM.getTargetLowering(); if (TLI->getInsertFencesForAtomic()) - InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl, + InChain = InsertFenceForAtomic(InChain, SuccessOrder, Scope, true, dl, DAG, *TLI); - SDValue L = - DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl, - getValue(I.getCompareOperand()).getSimpleValueType(), - InChain, - getValue(I.getPointerOperand()), - getValue(I.getCompareOperand()), - getValue(I.getNewValOperand()), - MachinePointerInfo(I.getPointerOperand()), 0 /* Alignment */, - TLI->getInsertFencesForAtomic() ? Monotonic : Order, - Scope); + MVT MemVT = getValue(I.getCompareOperand()).getSimpleValueType(); + SDVTList VTs = DAG.getVTList(MemVT, MVT::i1, MVT::Other); + SDValue L = DAG.getAtomicCmpSwap( + ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, MemVT, VTs, InChain, + getValue(I.getPointerOperand()), getValue(I.getCompareOperand()), + getValue(I.getNewValOperand()), MachinePointerInfo(I.getPointerOperand()), + 0 /* Alignment */, + TLI->getInsertFencesForAtomic() ? Monotonic : SuccessOrder, + TLI->getInsertFencesForAtomic() ? Monotonic : FailureOrder, Scope); - SDValue OutChain = L.getValue(1); + SDValue OutChain = L.getValue(2); if (TLI->getInsertFencesForAtomic()) - OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl, + OutChain = InsertFenceForAtomic(OutChain, SuccessOrder, Scope, false, dl, DAG, *TLI); setValue(&I, L); @@ -3627,7 +3703,7 @@ void SelectionDAGBuilder::visitFence(const FenceInst &I) { Ops[0] = getRoot(); Ops[1] = DAG.getConstant(I.getOrdering(), TLI->getPointerTy()); Ops[2] = DAG.getConstant(I.getSynchScope(), TLI->getPointerTy()); - DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops, 3)); + DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops)); } void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { @@ -3643,12 +3719,21 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { if (I.getAlignment() < VT.getSizeInBits() / 8) report_fatal_error("Cannot generate unaligned atomic load"); + MachineMemOperand *MMO = + DAG.getMachineFunction(). + getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()), + MachineMemOperand::MOVolatile | + MachineMemOperand::MOLoad, + VT.getStoreSize(), + I.getAlignment() ? I.getAlignment() : + DAG.getEVTAlignment(VT)); + + InChain = TLI->prepareVolatileOrAtomicLoad(InChain, dl, DAG); SDValue L = - DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain, - getValue(I.getPointerOperand()), - I.getPointerOperand(), I.getAlignment(), - TLI->getInsertFencesForAtomic() ? Monotonic : Order, - Scope); + DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain, + getValue(I.getPointerOperand()), MMO, + TLI->getInsertFencesForAtomic() ? Monotonic : Order, + Scope); SDValue OutChain = L.getValue(1); @@ -3734,27 +3819,23 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, if (HasChain) ValueVTs.push_back(MVT::Other); - SDVTList VTs = DAG.getVTList(ValueVTs.data(), ValueVTs.size()); + SDVTList VTs = DAG.getVTList(ValueVTs); // Create the node. SDValue Result; if (IsTgtIntrinsic) { // This is target intrinsic that touches memory Result = DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), - VTs, &Ops[0], Ops.size(), - Info.memVT, + VTs, Ops, Info.memVT, MachinePointerInfo(Info.ptrVal, Info.offset), Info.align, Info.vol, Info.readMem, Info.writeMem); } else if (!HasChain) { - Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), - VTs, &Ops[0], Ops.size()); + Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops); } else if (!I.getType()->isVoidTy()) { - Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurSDLoc(), - VTs, &Ops[0], Ops.size()); + Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurSDLoc(), VTs, Ops); } else { - Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), - VTs, &Ops[0], Ops.size()); + Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops); } if (HasChain) { @@ -4476,7 +4557,7 @@ static unsigned getTruncatedArgReg(const SDValue &N) { /// At the end of instruction selection, they will be inserted to the entry BB. bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, - int64_t Offset, + int64_t Offset, bool IsIndirect, const SDValue &N) { const Argument *Arg = dyn_cast<Argument>(V); if (!Arg) @@ -4528,8 +4609,6 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, if (!Op) return false; - // FIXME: This does not handle register-indirect values at offset 0. - bool IsIndirect = Offset != 0; if (Op->isReg()) FuncInfo.ArgDbgValues.push_back(BuildMI(MF, getCurDebugLoc(), TII->get(TargetOpcode::DBG_VALUE), @@ -4565,18 +4644,34 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { default: // By default, turn this into a target intrinsic node. visitTargetIntrinsic(I, Intrinsic); - return 0; - case Intrinsic::vastart: visitVAStart(I); return 0; - case Intrinsic::vaend: visitVAEnd(I); return 0; - case Intrinsic::vacopy: visitVACopy(I); return 0; + return nullptr; + case Intrinsic::vastart: visitVAStart(I); return nullptr; + case Intrinsic::vaend: visitVAEnd(I); return nullptr; + case Intrinsic::vacopy: visitVACopy(I); return nullptr; case Intrinsic::returnaddress: setValue(&I, DAG.getNode(ISD::RETURNADDR, sdl, TLI->getPointerTy(), getValue(I.getArgOperand(0)))); - return 0; + return nullptr; case Intrinsic::frameaddress: setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl, TLI->getPointerTy(), getValue(I.getArgOperand(0)))); - return 0; + return nullptr; + case Intrinsic::read_register: { + Value *Reg = I.getArgOperand(0); + SDValue RegName = DAG.getMDNode(cast<MDNode>(Reg)); + EVT VT = TM.getTargetLowering()->getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::READ_REGISTER, sdl, VT, RegName)); + return nullptr; + } + case Intrinsic::write_register: { + Value *Reg = I.getArgOperand(0); + Value *RegValue = I.getArgOperand(1); + SDValue Chain = getValue(RegValue).getOperand(0); + SDValue RegName = DAG.getMDNode(cast<MDNode>(Reg)); + DAG.setRoot(DAG.getNode(ISD::WRITE_REGISTER, sdl, MVT::Other, Chain, + RegName, getValue(RegValue))); + return nullptr; + } case Intrinsic::setjmp: return &"_setjmp"[!TLI->usesUnderscoreSetJmp()]; case Intrinsic::longjmp: @@ -4599,7 +4694,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { DAG.setRoot(DAG.getMemcpy(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, false, MachinePointerInfo(I.getArgOperand(0)), MachinePointerInfo(I.getArgOperand(1)))); - return 0; + return nullptr; } case Intrinsic::memset: { // Assert for address < 256 since we support only user defined address @@ -4616,7 +4711,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); DAG.setRoot(DAG.getMemset(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, MachinePointerInfo(I.getArgOperand(0)))); - return 0; + return nullptr; } case Intrinsic::memmove: { // Assert for address < 256 since we support only user defined address @@ -4636,7 +4731,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { DAG.setRoot(DAG.getMemmove(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, MachinePointerInfo(I.getArgOperand(0)), MachinePointerInfo(I.getArgOperand(1)))); - return 0; + return nullptr; } case Intrinsic::dbg_declare: { const DbgDeclareInst &DI = cast<DbgDeclareInst>(I); @@ -4647,14 +4742,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { "Variable in DbgDeclareInst should be either null or a DIVariable."); if (!Address || !DIVar) { DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); - return 0; + return nullptr; } // Check if address has undef value. if (isa<UndefValue>(Address) || (Address->use_empty() && !isa<Argument>(Address))) { DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); - return 0; + return nullptr; } SDValue &N = NodeMap[Address]; @@ -4676,29 +4771,29 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(N.getNode()); if (FINode) // Byval parameter. We have a frame index at this point. - SDV = DAG.getDbgValue(Variable, FINode->getIndex(), - 0, dl, SDNodeOrder); + SDV = DAG.getFrameIndexDbgValue(Variable, FINode->getIndex(), + 0, dl, SDNodeOrder); else { // Address is an argument, so try to emit its dbg value using // virtual register info from the FuncInfo.ValueMap. - EmitFuncArgumentDbgValue(Address, Variable, 0, N); - return 0; + EmitFuncArgumentDbgValue(Address, Variable, 0, false, N); + return nullptr; } } else if (AI) SDV = DAG.getDbgValue(Variable, N.getNode(), N.getResNo(), - 0, dl, SDNodeOrder); + true, 0, dl, SDNodeOrder); else { // Can't do anything with other non-AI cases yet. DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); DEBUG(dbgs() << "non-AllocaInst issue for Address: \n\t"); DEBUG(Address->dump()); - return 0; + return nullptr; } DAG.AddDbgValue(SDV, N.getNode(), isParameter); } else { // If Address is an argument then try to emit its dbg value using // virtual register info from the FuncInfo.ValueMap. - if (!EmitFuncArgumentDbgValue(Address, Variable, 0, N)) { + if (!EmitFuncArgumentDbgValue(Address, Variable, 0, false, N)) { // If variable is pinned by a alloca in dominating bb then // use StaticAllocaMap. if (const AllocaInst *AI = dyn_cast<AllocaInst>(Address)) { @@ -4706,17 +4801,17 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { DenseMap<const AllocaInst*, int>::iterator SI = FuncInfo.StaticAllocaMap.find(AI); if (SI != FuncInfo.StaticAllocaMap.end()) { - SDV = DAG.getDbgValue(Variable, SI->second, - 0, dl, SDNodeOrder); - DAG.AddDbgValue(SDV, 0, false); - return 0; + SDV = DAG.getFrameIndexDbgValue(Variable, SI->second, + 0, dl, SDNodeOrder); + DAG.AddDbgValue(SDV, nullptr, false); + return nullptr; } } } DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); } } - return 0; + return nullptr; } case Intrinsic::dbg_value: { const DbgValueInst &DI = cast<DbgValueInst>(I); @@ -4724,18 +4819,18 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { assert((!DIVar || DIVar.isVariable()) && "Variable in DbgValueInst should be either null or a DIVariable."); if (!DIVar) - return 0; + return nullptr; MDNode *Variable = DI.getVariable(); uint64_t Offset = DI.getOffset(); const Value *V = DI.getValue(); if (!V) - return 0; + return nullptr; SDDbgValue *SDV; if (isa<ConstantInt>(V) || isa<ConstantFP>(V) || isa<UndefValue>(V)) { - SDV = DAG.getDbgValue(Variable, V, Offset, dl, SDNodeOrder); - DAG.AddDbgValue(SDV, 0, false); + SDV = DAG.getConstantDbgValue(Variable, V, Offset, dl, SDNodeOrder); + DAG.AddDbgValue(SDV, nullptr, false); } else { // Do not use getValue() in here; we don't want to generate code at // this point if it hasn't been done yet. @@ -4744,9 +4839,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { // Check unused arguments map. N = UnusedArgNodeMap[V]; if (N.getNode()) { - if (!EmitFuncArgumentDbgValue(V, Variable, Offset, N)) { + // A dbg.value for an alloca is always indirect. + bool IsIndirect = isa<AllocaInst>(V) || Offset != 0; + if (!EmitFuncArgumentDbgValue(V, Variable, Offset, IsIndirect, N)) { SDV = DAG.getDbgValue(Variable, N.getNode(), - N.getResNo(), Offset, dl, SDNodeOrder); + N.getResNo(), IsIndirect, + Offset, dl, SDNodeOrder); DAG.AddDbgValue(SDV, N.getNode(), false); } } else if (!V->use_empty() ) { @@ -4769,18 +4867,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { if (!AI) { DEBUG(dbgs() << "Dropping debug location info for:\n " << DI << "\n"); DEBUG(dbgs() << " Last seen at:\n " << *V << "\n"); - return 0; + return nullptr; } DenseMap<const AllocaInst*, int>::iterator SI = FuncInfo.StaticAllocaMap.find(AI); if (SI == FuncInfo.StaticAllocaMap.end()) - return 0; // VLAs. - int FI = SI->second; - - MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); - if (!DI.getDebugLoc().isUnknown() && MMI.hasDebugInfo()) - MMI.setVariableDbgInfo(Variable, FI, DI.getDebugLoc()); - return 0; + return nullptr; // VLAs. + return nullptr; } case Intrinsic::eh_typeid_for: { @@ -4789,7 +4882,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(GV); Res = DAG.getConstant(TypeID, MVT::i32); setValue(&I, Res); - return 0; + return nullptr; } case Intrinsic::eh_return_i32: @@ -4800,10 +4893,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getControlRoot(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)))); - return 0; + return nullptr; case Intrinsic::eh_unwind_init: DAG.getMachineFunction().getMMI().setCallsUnwindInit(true); - return 0; + return nullptr; case Intrinsic::eh_dwarf_cfa: { SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getArgOperand(0)), sdl, TLI->getPointerTy()); @@ -4817,7 +4910,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { DAG.getConstant(0, TLI->getPointerTy())); setValue(&I, DAG.getNode(ISD::ADD, sdl, FA.getValueType(), FA, Offset)); - return 0; + return nullptr; } case Intrinsic::eh_sjlj_callsite: { MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); @@ -4826,7 +4919,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { assert(MMI.getCurrentCallSite() == 0 && "Overlapping call sites!"); MMI.setCurrentCallSite(CI->getZExtValue()); - return 0; + return nullptr; } case Intrinsic::eh_sjlj_functioncontext: { // Get and store the index of the function context. @@ -4835,23 +4928,22 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { cast<AllocaInst>(I.getArgOperand(0)->stripPointerCasts()); int FI = FuncInfo.StaticAllocaMap[FnCtx]; MFI->setFunctionContextIndex(FI); - return 0; + return nullptr; } case Intrinsic::eh_sjlj_setjmp: { SDValue Ops[2]; Ops[0] = getRoot(); Ops[1] = getValue(I.getArgOperand(0)); SDValue Op = DAG.getNode(ISD::EH_SJLJ_SETJMP, sdl, - DAG.getVTList(MVT::i32, MVT::Other), - Ops, 2); + DAG.getVTList(MVT::i32, MVT::Other), Ops); setValue(&I, Op.getValue(0)); DAG.setRoot(Op.getValue(1)); - return 0; + return nullptr; } case Intrinsic::eh_sjlj_longjmp: { DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, sdl, MVT::Other, getRoot(), getValue(I.getArgOperand(0)))); - return 0; + return nullptr; } case Intrinsic::x86_mmx_pslli_w: @@ -4865,7 +4957,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue ShAmt = getValue(I.getArgOperand(1)); if (isa<ConstantSDNode>(ShAmt)) { visitTargetIntrinsic(I, Intrinsic); - return 0; + return nullptr; } unsigned NewIntrinsic = 0; EVT ShAmtVT = MVT::v2i32; @@ -4904,14 +4996,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue ShOps[2]; ShOps[0] = ShAmt; ShOps[1] = DAG.getConstant(0, MVT::i32); - ShAmt = DAG.getNode(ISD::BUILD_VECTOR, sdl, ShAmtVT, &ShOps[0], 2); + ShAmt = DAG.getNode(ISD::BUILD_VECTOR, sdl, ShAmtVT, ShOps); EVT DestVT = TLI->getValueType(I.getType()); ShAmt = DAG.getNode(ISD::BITCAST, sdl, DestVT, ShAmt); Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, sdl, DestVT, DAG.getConstant(NewIntrinsic, MVT::i32), getValue(I.getArgOperand(0)), ShAmt); setValue(&I, Res); - return 0; + return nullptr; } case Intrinsic::x86_avx_vinsertf128_pd_256: case Intrinsic::x86_avx_vinsertf128_ps_256: @@ -4926,7 +5018,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getValue(I.getArgOperand(1)), DAG.getConstant(Idx, TLI->getVectorIdxTy())); setValue(&I, Res); - return 0; + return nullptr; } case Intrinsic::x86_avx_vextractf128_pd_256: case Intrinsic::x86_avx_vextractf128_ps_256: @@ -4939,7 +5031,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getValue(I.getArgOperand(0)), DAG.getConstant(Idx, TLI->getVectorIdxTy())); setValue(&I, Res); - return 0; + return nullptr; } case Intrinsic::convertff: case Intrinsic::convertfsi: @@ -4972,31 +5064,31 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getValue(I.getArgOperand(2)), Code); setValue(&I, Res); - return 0; + return nullptr; } case Intrinsic::powi: setValue(&I, ExpandPowI(sdl, getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), DAG)); - return 0; + return nullptr; case Intrinsic::log: setValue(&I, expandLog(sdl, getValue(I.getArgOperand(0)), DAG, *TLI)); - return 0; + return nullptr; case Intrinsic::log2: setValue(&I, expandLog2(sdl, getValue(I.getArgOperand(0)), DAG, *TLI)); - return 0; + return nullptr; case Intrinsic::log10: setValue(&I, expandLog10(sdl, getValue(I.getArgOperand(0)), DAG, *TLI)); - return 0; + return nullptr; case Intrinsic::exp: setValue(&I, expandExp(sdl, getValue(I.getArgOperand(0)), DAG, *TLI)); - return 0; + return nullptr; case Intrinsic::exp2: setValue(&I, expandExp2(sdl, getValue(I.getArgOperand(0)), DAG, *TLI)); - return 0; + return nullptr; case Intrinsic::pow: setValue(&I, expandPow(sdl, getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), DAG, *TLI)); - return 0; + return nullptr; case Intrinsic::sqrt: case Intrinsic::fabs: case Intrinsic::sin: @@ -5025,21 +5117,21 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { setValue(&I, DAG.getNode(Opcode, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)))); - return 0; + return nullptr; } case Intrinsic::copysign: setValue(&I, DAG.getNode(ISD::FCOPYSIGN, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)))); - return 0; + return nullptr; case Intrinsic::fma: setValue(&I, DAG.getNode(ISD::FMA, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), getValue(I.getArgOperand(2)))); - return 0; + return nullptr; case Intrinsic::fmuladd: { EVT VT = TLI->getValueType(I.getType()); if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && @@ -5060,42 +5152,45 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getValue(I.getArgOperand(2))); setValue(&I, Add); } - return 0; + return nullptr; } case Intrinsic::convert_to_fp16: - setValue(&I, DAG.getNode(ISD::FP32_TO_FP16, sdl, - MVT::i16, getValue(I.getArgOperand(0)))); - return 0; + setValue(&I, DAG.getNode(ISD::BITCAST, sdl, MVT::i16, + DAG.getNode(ISD::FP_ROUND, sdl, MVT::f16, + getValue(I.getArgOperand(0)), + DAG.getTargetConstant(0, MVT::i32)))); + return nullptr; case Intrinsic::convert_from_fp16: - setValue(&I, DAG.getNode(ISD::FP16_TO_FP32, sdl, - MVT::f32, getValue(I.getArgOperand(0)))); - return 0; + setValue(&I, + DAG.getNode(ISD::FP_EXTEND, sdl, TLI->getValueType(I.getType()), + DAG.getNode(ISD::BITCAST, sdl, MVT::f16, + getValue(I.getArgOperand(0))))); + return nullptr; case Intrinsic::pcmarker: { SDValue Tmp = getValue(I.getArgOperand(0)); DAG.setRoot(DAG.getNode(ISD::PCMARKER, sdl, MVT::Other, getRoot(), Tmp)); - return 0; + return nullptr; } case Intrinsic::readcyclecounter: { SDValue Op = getRoot(); Res = DAG.getNode(ISD::READCYCLECOUNTER, sdl, - DAG.getVTList(MVT::i64, MVT::Other), - &Op, 1); + DAG.getVTList(MVT::i64, MVT::Other), Op); setValue(&I, Res); DAG.setRoot(Res.getValue(1)); - return 0; + return nullptr; } case Intrinsic::bswap: setValue(&I, DAG.getNode(ISD::BSWAP, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)))); - return 0; + return nullptr; case Intrinsic::cttz: { SDValue Arg = getValue(I.getArgOperand(0)); ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1)); EVT Ty = Arg.getValueType(); setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTTZ : ISD::CTTZ_ZERO_UNDEF, sdl, Ty, Arg)); - return 0; + return nullptr; } case Intrinsic::ctlz: { SDValue Arg = getValue(I.getArgOperand(0)); @@ -5103,26 +5198,26 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { EVT Ty = Arg.getValueType(); setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTLZ : ISD::CTLZ_ZERO_UNDEF, sdl, Ty, Arg)); - return 0; + return nullptr; } case Intrinsic::ctpop: { SDValue Arg = getValue(I.getArgOperand(0)); EVT Ty = Arg.getValueType(); setValue(&I, DAG.getNode(ISD::CTPOP, sdl, Ty, Arg)); - return 0; + return nullptr; } case Intrinsic::stacksave: { SDValue Op = getRoot(); Res = DAG.getNode(ISD::STACKSAVE, sdl, - DAG.getVTList(TLI->getPointerTy(), MVT::Other), &Op, 1); + DAG.getVTList(TLI->getPointerTy(), MVT::Other), Op); setValue(&I, Res); DAG.setRoot(Res.getValue(1)); - return 0; + return nullptr; } case Intrinsic::stackrestore: { Res = getValue(I.getArgOperand(0)); DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, sdl, MVT::Other, getRoot(), Res)); - return 0; + return nullptr; } case Intrinsic::stackprotector: { // Emit code into the DAG to store the stack guard onto the stack. @@ -5144,7 +5239,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { true, false, 0); setValue(&I, Res); DAG.setRoot(Res); - return 0; + return nullptr; } case Intrinsic::objectsize: { // If we don't know by now, we're never going to know. @@ -5161,16 +5256,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { Res = DAG.getConstant(0, Ty); setValue(&I, Res); - return 0; + return nullptr; } case Intrinsic::annotation: case Intrinsic::ptr_annotation: // Drop the intrinsic, but forward the value setValue(&I, getValue(I.getOperand(0))); - return 0; + return nullptr; case Intrinsic::var_annotation: // Discard annotate attributes - return 0; + return nullptr; case Intrinsic::init_trampoline: { const Function *F = cast<Function>(I.getArgOperand(1)->stripPointerCasts()); @@ -5183,16 +5278,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { Ops[4] = DAG.getSrcValue(I.getArgOperand(0)); Ops[5] = DAG.getSrcValue(F); - Res = DAG.getNode(ISD::INIT_TRAMPOLINE, sdl, MVT::Other, Ops, 6); + Res = DAG.getNode(ISD::INIT_TRAMPOLINE, sdl, MVT::Other, Ops); DAG.setRoot(Res); - return 0; + return nullptr; } case Intrinsic::adjust_trampoline: { setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, sdl, TLI->getPointerTy(), getValue(I.getArgOperand(0)))); - return 0; + return nullptr; } case Intrinsic::gcroot: if (GFI) { @@ -5202,18 +5297,18 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode()); GFI->addStackRoot(FI->getIndex(), TypeMap); } - return 0; + return nullptr; case Intrinsic::gcread: case Intrinsic::gcwrite: llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!"); case Intrinsic::flt_rounds: setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, sdl, MVT::i32)); - return 0; + return nullptr; case Intrinsic::expect: { // Just replace __builtin_expect(exp, c) with EXP. setValue(&I, getValue(I.getArgOperand(0))); - return 0; + return nullptr; } case Intrinsic::debugtrap: @@ -5223,20 +5318,19 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { ISD::NodeType Op = (Intrinsic == Intrinsic::trap) ? ISD::TRAP : ISD::DEBUGTRAP; DAG.setRoot(DAG.getNode(Op, sdl,MVT::Other, getRoot())); - return 0; + return nullptr; } TargetLowering::ArgListTy Args; - TargetLowering:: - CallLoweringInfo CLI(getRoot(), I.getType(), - false, false, false, false, 0, CallingConv::C, - /*isTailCall=*/false, - /*doesNotRet=*/false, /*isReturnValueUsed=*/true, - DAG.getExternalSymbol(TrapFuncName.data(), - TLI->getPointerTy()), - Args, DAG, sdl); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(sdl).setChain(getRoot()) + .setCallee(CallingConv::C, I.getType(), + DAG.getExternalSymbol(TrapFuncName.data(), TLI->getPointerTy()), + std::move(Args), 0); + std::pair<SDValue, SDValue> Result = TLI->LowerCallTo(CLI); DAG.setRoot(Result.second); - return 0; + return nullptr; } case Intrinsic::uadd_with_overflow: @@ -5260,7 +5354,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1); setValue(&I, DAG.getNode(Op, sdl, VTs, Op1, Op2)); - return 0; + return nullptr; } case Intrinsic::prefetch: { SDValue Ops[5]; @@ -5271,25 +5365,24 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { Ops[3] = getValue(I.getArgOperand(2)); Ops[4] = getValue(I.getArgOperand(3)); DAG.setRoot(DAG.getMemIntrinsicNode(ISD::PREFETCH, sdl, - DAG.getVTList(MVT::Other), - &Ops[0], 5, + DAG.getVTList(MVT::Other), Ops, EVT::getIntegerVT(*Context, 8), MachinePointerInfo(I.getArgOperand(0)), 0, /* align */ false, /* volatile */ rw==0, /* read */ rw==1)); /* write */ - return 0; + return nullptr; } case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: { bool IsStart = (Intrinsic == Intrinsic::lifetime_start); // Stack coloring is not enabled in O0, discard region information. if (TM.getOptLevel() == CodeGenOpt::None) - return 0; + return nullptr; SmallVector<Value *, 4> Allocas; - GetUnderlyingObjects(I.getArgOperand(1), Allocas, TD); + GetUnderlyingObjects(I.getArgOperand(1), Allocas, DL); for (SmallVectorImpl<Value*>::iterator Object = Allocas.begin(), E = Allocas.end(); Object != E; ++Object) { @@ -5306,18 +5399,18 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { Ops[1] = DAG.getFrameIndex(FI, TLI->getPointerTy(), true); unsigned Opcode = (IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END); - Res = DAG.getNode(Opcode, sdl, MVT::Other, Ops, 2); + Res = DAG.getNode(Opcode, sdl, MVT::Other, Ops); DAG.setRoot(Res); } - return 0; + return nullptr; } case Intrinsic::invariant_start: // Discard region information. setValue(&I, DAG.getUNDEF(TLI->getPointerTy())); - return 0; + return nullptr; case Intrinsic::invariant_end: // Discard region information. - return 0; + return nullptr; case Intrinsic::stackprotectorcheck: { // Do not actually emit anything for this basic block. Instead we initialize // the stack protector descriptor and export the guard variable so we can @@ -5328,19 +5421,21 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { // Flush our exports since we are going to process a terminator. (void)getControlRoot(); - return 0; + return nullptr; } + case Intrinsic::clear_cache: + return TLI->getClearCacheBuiltinName(); case Intrinsic::donothing: // ignore - return 0; + return nullptr; case Intrinsic::experimental_stackmap: { visitStackmap(I); - return 0; + return nullptr; } case Intrinsic::experimental_patchpoint_void: case Intrinsic::experimental_patchpoint_i64: { visitPatchpoint(I); - return 0; + return nullptr; } } } @@ -5348,53 +5443,17 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool isTailCall, MachineBasicBlock *LandingPad) { + const TargetLowering *TLI = TM.getTargetLowering(); PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); FunctionType *FTy = cast<FunctionType>(PT->getElementType()); Type *RetTy = FTy->getReturnType(); MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); - MCSymbol *BeginLabel = 0; + MCSymbol *BeginLabel = nullptr; TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Args.reserve(CS.arg_size()); - // Check whether the function can return without sret-demotion. - SmallVector<ISD::OutputArg, 4> Outs; - const TargetLowering *TLI = TM.getTargetLowering(); - GetReturnInfo(RetTy, CS.getAttributes(), Outs, *TLI); - - bool CanLowerReturn = TLI->CanLowerReturn(CS.getCallingConv(), - DAG.getMachineFunction(), - FTy->isVarArg(), Outs, - FTy->getContext()); - - SDValue DemoteStackSlot; - int DemoteStackIdx = -100; - - if (!CanLowerReturn) { - uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize( - FTy->getReturnType()); - unsigned Align = TLI->getDataLayout()->getPrefTypeAlignment( - FTy->getReturnType()); - MachineFunction &MF = DAG.getMachineFunction(); - DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); - Type *StackSlotPtrType = PointerType::getUnqual(FTy->getReturnType()); - - DemoteStackSlot = DAG.getFrameIndex(DemoteStackIdx, TLI->getPointerTy()); - Entry.Node = DemoteStackSlot; - Entry.Ty = StackSlotPtrType; - Entry.isSExt = false; - Entry.isZExt = false; - Entry.isInReg = false; - Entry.isSRet = true; - Entry.isNest = false; - Entry.isByVal = false; - Entry.isReturned = false; - Entry.Alignment = Align; - Args.push_back(Entry); - RetTy = Type::getVoidTy(FTy->getContext()); - } - for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i) { const Value *V = *i; @@ -5435,58 +5494,20 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, // Check if target-independent constraints permit a tail call here. // Target-dependent constraints are checked within TLI->LowerCallTo. - if (isTailCall && !isInTailCallPosition(CS, *TLI)) + if (isTailCall && !isInTailCallPosition(CS, DAG.getTarget())) isTailCall = false; - TargetLowering:: - CallLoweringInfo CLI(getRoot(), RetTy, FTy, isTailCall, Callee, Args, DAG, - getCurSDLoc(), CS); + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot()) + .setCallee(RetTy, FTy, Callee, std::move(Args), CS).setTailCall(isTailCall); + std::pair<SDValue,SDValue> Result = TLI->LowerCallTo(CLI); assert((isTailCall || Result.second.getNode()) && "Non-null chain expected with non-tail call!"); assert((Result.second.getNode() || !Result.first.getNode()) && "Null value expected with tail call!"); - if (Result.first.getNode()) { + if (Result.first.getNode()) setValue(CS.getInstruction(), Result.first); - } else if (!CanLowerReturn && Result.second.getNode()) { - // The instruction result is the result of loading from the - // hidden sret parameter. - SmallVector<EVT, 1> PVTs; - Type *PtrRetTy = PointerType::getUnqual(FTy->getReturnType()); - - ComputeValueVTs(*TLI, PtrRetTy, PVTs); - assert(PVTs.size() == 1 && "Pointers should fit in one register"); - EVT PtrVT = PVTs[0]; - - SmallVector<EVT, 4> RetTys; - SmallVector<uint64_t, 4> Offsets; - RetTy = FTy->getReturnType(); - ComputeValueVTs(*TLI, RetTy, RetTys, &Offsets); - - unsigned NumValues = RetTys.size(); - SmallVector<SDValue, 4> Values(NumValues); - SmallVector<SDValue, 4> Chains(NumValues); - - for (unsigned i = 0; i < NumValues; ++i) { - SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(), PtrVT, - DemoteStackSlot, - DAG.getConstant(Offsets[i], PtrVT)); - SDValue L = DAG.getLoad(RetTys[i], getCurSDLoc(), Result.second, Add, - MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]), - false, false, false, 1); - Values[i] = L; - Chains[i] = L.getValue(1); - } - - SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), - MVT::Other, &Chains[0], NumValues); - PendingLoads.push_back(Chain); - - setValue(CS.getInstruction(), - DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), - DAG.getVTList(&RetTys[0], RetTys.size()), - &Values[0], Values.size())); - } if (!Result.second.getNode()) { // As a special case, a null chain means that a tail call has been emitted @@ -5514,9 +5535,8 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, /// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the /// value is equal or not-equal to zero. static bool IsOnlyUsedInZeroEqualityComparison(const Value *V) { - for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); - UI != E; ++UI) { - if (const ICmpInst *IC = dyn_cast<ICmpInst>(*UI)) + for (const User *U : V->users()) { + if (const ICmpInst *IC = dyn_cast<ICmpInst>(U)) if (IC->isEquality()) if (const Constant *C = dyn_cast<Constant>(IC->getOperand(1))) if (C->isNullValue()) @@ -5540,7 +5560,7 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, if (const Constant *LoadCst = ConstantFoldLoadFromConstPtr(const_cast<Constant *>(LoadInput), - Builder.TD)) + Builder.DL)) return Builder.getValue(LoadCst); } @@ -5626,7 +5646,7 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { switch (CSize->getZExtValue()) { default: LoadVT = MVT::Other; - LoadTy = 0; + LoadTy = nullptr; ActuallyDoIt = false; break; case 2: @@ -5659,9 +5679,13 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { // bloat the code. const TargetLowering *TLI = TM.getTargetLowering(); if (ActuallyDoIt && CSize->getZExtValue() > 4) { + unsigned DstAS = LHS->getType()->getPointerAddressSpace(); + unsigned SrcAS = RHS->getType()->getPointerAddressSpace(); // TODO: Handle 5 byte compare as 4-byte + 1 byte. // TODO: Handle 8 byte compare on x86-32 as two 32-bit loads. - if (!TLI->isTypeLegal(LoadVT) ||!TLI->allowsUnalignedMemoryAccesses(LoadVT)) + if (!TLI->isTypeLegal(LoadVT) || + !TLI->allowsUnalignedMemoryAccesses(LoadVT, SrcAS) || + !TLI->allowsUnalignedMemoryAccesses(LoadVT, DstAS)) ActuallyDoIt = false; } @@ -5849,7 +5873,7 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); ComputeUsesVAFloatArgument(I, &MMI); - const char *RenameFn = 0; + const char *RenameFn = nullptr; if (Function *F = I.getCalledFunction()) { if (F->isDeclaration()) { if (const TargetIntrinsicInfo *II = TM.getIntrinsicInfo()) { @@ -6024,7 +6048,7 @@ public: RegsForValue AssignedRegs; explicit SDISelAsmOperandInfo(const TargetLowering::AsmOperandInfo &info) - : TargetLowering::AsmOperandInfo(info), CallOperand(0,0) { + : TargetLowering::AsmOperandInfo(info), CallOperand(nullptr,0) { } /// getCallOperandValEVT - Return the EVT of the Value* that this operand @@ -6032,8 +6056,8 @@ public: /// MVT::Other. EVT getCallOperandValEVT(LLVMContext &Context, const TargetLowering &TLI, - const DataLayout *TD) const { - if (CallOperandVal == 0) return MVT::Other; + const DataLayout *DL) const { + if (!CallOperandVal) return MVT::Other; if (isa<BasicBlock>(CallOperandVal)) return TLI.getPointerTy(); @@ -6058,7 +6082,7 @@ public: // If OpTy is not a single value, it may be a struct/union that we // can tile with integers. if (!OpTy->isSingleValueType() && OpTy->isSized()) { - unsigned BitSize = TD->getTypeSizeInBits(OpTy); + unsigned BitSize = DL->getTypeSizeInBits(OpTy); switch (BitSize) { default: break; case 1: @@ -6114,7 +6138,7 @@ static void GetRegistersForValue(SelectionDAG &DAG, // types are identical size, use a bitcast to convert (e.g. two differing // vector types). MVT RegVT = *PhysReg.second->vt_begin(); - if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) { + if (RegVT.getSizeInBits() == OpInfo.CallOperand.getValueSizeInBits()) { OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL, RegVT, OpInfo.CallOperand); OpInfo.ConstraintVT = RegVT; @@ -6247,7 +6271,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { OpInfo.CallOperand = getValue(OpInfo.CallOperandVal); } - OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), *TLI, TD). + OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), *TLI, DL). getSimpleVT(); } @@ -6354,7 +6378,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } // There is no longer a Value* corresponding to this operand. - OpInfo.CallOperandVal = 0; + OpInfo.CallOperandVal = nullptr; // It is now an indirect operand. OpInfo.isIndirect = true; @@ -6643,8 +6667,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { if (Flag.getNode()) AsmNodeOperands.push_back(Flag); Chain = DAG.getNode(ISD::INLINEASM, getCurSDLoc(), - DAG.getVTList(MVT::Other, MVT::Glue), - &AsmNodeOperands[0], AsmNodeOperands.size()); + DAG.getVTList(MVT::Other, MVT::Glue), AsmNodeOperands); Flag = Chain.getValue(1); // If this asm returns a register value, copy the result from that register @@ -6707,8 +6730,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } if (!OutChains.empty()) - Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, - &OutChains[0], OutChains.size()); + Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, OutChains); DAG.setRoot(Chain); } @@ -6722,11 +6744,11 @@ void SelectionDAGBuilder::visitVAStart(const CallInst &I) { void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) { const TargetLowering *TLI = TM.getTargetLowering(); - const DataLayout &TD = *TLI->getDataLayout(); + const DataLayout &DL = *TLI->getDataLayout(); SDValue V = DAG.getVAArg(TLI->getValueType(I.getType()), getCurSDLoc(), getRoot(), getValue(I.getOperand(0)), DAG.getSrcValue(I.getOperand(0)), - TD.getABITypeAlignment(I.getType())); + DL.getABITypeAlignment(I.getType())); setValue(&I, V); DAG.setRoot(V.getValue(1)); } @@ -6778,15 +6800,51 @@ SelectionDAGBuilder::LowerCallOperands(const CallInst &CI, unsigned ArgIdx, } Type *retTy = useVoidTy ? Type::getVoidTy(*DAG.getContext()) : CI.getType(); - TargetLowering::CallLoweringInfo CLI(getRoot(), retTy, /*retSExt*/ false, - /*retZExt*/ false, /*isVarArg*/ false, /*isInReg*/ false, NumArgs, - CI.getCallingConv(), /*isTailCall*/ false, /*doesNotReturn*/ false, - /*isReturnValueUsed*/ CI.use_empty(), Callee, Args, DAG, getCurSDLoc()); + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot()) + .setCallee(CI.getCallingConv(), retTy, Callee, std::move(Args), NumArgs) + .setDiscardResult(!CI.use_empty()); const TargetLowering *TLI = TM.getTargetLowering(); return TLI->LowerCallTo(CLI); } +/// \brief Add a stack map intrinsic call's live variable operands to a stackmap +/// or patchpoint target node's operand list. +/// +/// Constants are converted to TargetConstants purely as an optimization to +/// avoid constant materialization and register allocation. +/// +/// FrameIndex operands are converted to TargetFrameIndex so that ISEL does not +/// generate addess computation nodes, and so ExpandISelPseudo can convert the +/// TargetFrameIndex into a DirectMemRefOp StackMap location. This avoids +/// address materialization and register allocation, but may also be required +/// for correctness. If a StackMap (or PatchPoint) intrinsic directly uses an +/// alloca in the entry block, then the runtime may assume that the alloca's +/// StackMap location can be read immediately after compilation and that the +/// location is valid at any point during execution (this is similar to the +/// assumption made by the llvm.gcroot intrinsic). If the alloca's location were +/// only available in a register, then the runtime would need to trap when +/// execution reaches the StackMap in order to read the alloca's location. +static void addStackMapLiveVars(const CallInst &CI, unsigned StartIdx, + SmallVectorImpl<SDValue> &Ops, + SelectionDAGBuilder &Builder) { + for (unsigned i = StartIdx, e = CI.getNumArgOperands(); i != e; ++i) { + SDValue OpVal = Builder.getValue(CI.getArgOperand(i)); + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(OpVal)) { + Ops.push_back( + Builder.DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64)); + Ops.push_back( + Builder.DAG.getTargetConstant(C->getSExtValue(), MVT::i64)); + } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(OpVal)) { + const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo(); + Ops.push_back( + Builder.DAG.getTargetFrameIndex(FI->getIndex(), TLI.getPointerTy())); + } else + Ops.push_back(OpVal); + } +} + /// \brief Lower llvm.experimental.stackmap directly to its target opcode. void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { // void @llvm.experimental.stackmap(i32 <id>, i32 <numShadowBytes>, @@ -6794,61 +6852,64 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { assert(CI.getType()->isVoidTy() && "Stackmap cannot return a value."); - SDValue Callee = getValue(CI.getCalledValue()); + SDValue Chain, InFlag, Callee, NullPtr; + SmallVector<SDValue, 32> Ops; - // Lower into a call sequence with no args and no return value. - std::pair<SDValue, SDValue> Result = LowerCallOperands(CI, 0, 0, Callee); - // Set the root to the target-lowered call chain. - SDValue Chain = Result.second; - DAG.setRoot(Chain); + SDLoc DL = getCurSDLoc(); + Callee = getValue(CI.getCalledValue()); + NullPtr = DAG.getIntPtrConstant(0, true); - /// Get a call instruction from the call sequence chain. - /// Tail calls are not allowed. - SDNode *CallEnd = Chain.getNode(); - assert(CallEnd->getOpcode() == ISD::CALLSEQ_END && - "Expected a callseq node."); - SDNode *Call = CallEnd->getOperand(0).getNode(); - bool hasGlue = Call->getGluedNode(); + // The stackmap intrinsic only records the live variables (the arguemnts + // passed to it) and emits NOPS (if requested). Unlike the patchpoint + // intrinsic, this won't be lowered to a function call. This means we don't + // have to worry about calling conventions and target specific lowering code. + // Instead we perform the call lowering right here. + // + // chain, flag = CALLSEQ_START(chain, 0) + // chain, flag = STACKMAP(id, nbytes, ..., chain, flag) + // chain, flag = CALLSEQ_END(chain, 0, 0, flag) + // + Chain = DAG.getCALLSEQ_START(getRoot(), NullPtr, DL); + InFlag = Chain.getValue(1); - // Replace the target specific call node with the stackmap intrinsic. - SmallVector<SDValue, 8> Ops; + // Add the <id> and <numBytes> constants. + SDValue IDVal = getValue(CI.getOperand(PatchPointOpers::IDPos)); + Ops.push_back(DAG.getTargetConstant( + cast<ConstantSDNode>(IDVal)->getZExtValue(), MVT::i64)); + SDValue NBytesVal = getValue(CI.getOperand(PatchPointOpers::NBytesPos)); + Ops.push_back(DAG.getTargetConstant( + cast<ConstantSDNode>(NBytesVal)->getZExtValue(), MVT::i32)); - // Add the <id> and <numShadowBytes> constants. - for (unsigned i = 0; i < 2; ++i) { - SDValue tmp = getValue(CI.getOperand(i)); - Ops.push_back(DAG.getTargetConstant( - cast<ConstantSDNode>(tmp)->getZExtValue(), MVT::i32)); - } // Push live variables for the stack map. - for (unsigned i = 2, e = CI.getNumArgOperands(); i != e; ++i) - Ops.push_back(getValue(CI.getArgOperand(i))); + addStackMapLiveVars(CI, 2, Ops, *this); - // Push the chain (this is originally the first operand of the call, but - // becomes now the last or second to last operand). - Ops.push_back(*(Call->op_begin())); + // We are not pushing any register mask info here on the operands list, + // because the stackmap doesn't clobber anything. - // Push the glue flag (last operand). - if (hasGlue) - Ops.push_back(*(Call->op_end()-1)); + // Push the chain and the glue flag. + Ops.push_back(Chain); + Ops.push_back(InFlag); + // Create the STACKMAP node. SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + SDNode *SM = DAG.getMachineNode(TargetOpcode::STACKMAP, DL, NodeTys, Ops); + Chain = SDValue(SM, 0); + InFlag = Chain.getValue(1); - // Replace the target specific call node with a STACKMAP node. - MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::STACKMAP, getCurSDLoc(), - NodeTys, Ops); + Chain = DAG.getCALLSEQ_END(Chain, NullPtr, NullPtr, InFlag, DL); - // StackMap generates no value, so nothing goes in the NodeMap. + // Stackmaps don't generate values, so nothing goes into the NodeMap. - // Fixup the consumers of the intrinsic. The chain and glue may be used in the - // call sequence. - DAG.ReplaceAllUsesWith(Call, MN); + // Set the root to the target-lowered call chain. + DAG.setRoot(Chain); - DAG.DeleteNode(Call); + // Inform the Frame Information that we have a stackmap in this function. + FuncInfo.MF->getFrameInfo()->setHasStackMap(); } /// \brief Lower llvm.experimental.patchpoint directly to its target opcode. void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { - // void|i64 @llvm.experimental.patchpoint.void|i64(i32 <id>, + // void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>, // i32 <numBytes>, // i8* <target>, // i32 <numArgs>, @@ -6861,17 +6922,19 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { SDValue Callee = getValue(CI.getOperand(2)); // <target> // Get the real number of arguments participating in the call <numArgs> - unsigned NumArgs = - cast<ConstantSDNode>(getValue(CI.getArgOperand(3)))->getZExtValue(); + SDValue NArgVal = getValue(CI.getArgOperand(PatchPointOpers::NArgPos)); + unsigned NumArgs = cast<ConstantSDNode>(NArgVal)->getZExtValue(); // Skip the four meta args: <id>, <numNopBytes>, <target>, <numArgs> - assert(CI.getNumArgOperands() >= NumArgs + 4 && + // Intrinsics include all meta-operands up to but not including CC. + unsigned NumMetaOpers = PatchPointOpers::CCPos; + assert(CI.getNumArgOperands() >= NumMetaOpers + NumArgs && "Not enough arguments provided to the patchpoint intrinsic"); // For AnyRegCC the arguments are lowered later on manually. unsigned NumCallArgs = isAnyRegCC ? 0 : NumArgs; std::pair<SDValue, SDValue> Result = - LowerCallOperands(CI, 4, NumCallArgs, Callee, isAnyRegCC); + LowerCallOperands(CI, NumMetaOpers, NumCallArgs, Callee, isAnyRegCC); // Set the root to the target-lowered call chain. SDValue Chain = Result.second; @@ -6891,13 +6954,16 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { // Replace the target specific call node with the patchable intrinsic. SmallVector<SDValue, 8> Ops; - // Add the <id> and <numNopBytes> constants. - for (unsigned i = 0; i < 2; ++i) { - SDValue tmp = getValue(CI.getOperand(i)); - Ops.push_back(DAG.getTargetConstant( - cast<ConstantSDNode>(tmp)->getZExtValue(), MVT::i32)); - } + // Add the <id> and <numBytes> constants. + SDValue IDVal = getValue(CI.getOperand(PatchPointOpers::IDPos)); + Ops.push_back(DAG.getTargetConstant( + cast<ConstantSDNode>(IDVal)->getZExtValue(), MVT::i64)); + SDValue NBytesVal = getValue(CI.getOperand(PatchPointOpers::NBytesPos)); + Ops.push_back(DAG.getTargetConstant( + cast<ConstantSDNode>(NBytesVal)->getZExtValue(), MVT::i32)); + // Assume that the Callee is a constant address. + // FIXME: handle function symbols in the future. Ops.push_back( DAG.getIntPtrConstant(cast<ConstantSDNode>(Callee)->getZExtValue(), /*isTarget=*/true)); @@ -6915,25 +6981,16 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { // Add the arguments we omitted previously. The register allocator should // place these in any free register. if (isAnyRegCC) - for (unsigned i = 4, e = NumArgs + 4; i != e; ++i) + for (unsigned i = NumMetaOpers, e = NumMetaOpers + NumArgs; i != e; ++i) Ops.push_back(getValue(CI.getArgOperand(i))); - // Push the arguments from the call instruction. + // Push the arguments from the call instruction up to the register mask. SDNode::op_iterator e = hasGlue ? Call->op_end()-2 : Call->op_end()-1; for (SDNode::op_iterator i = Call->op_begin()+2; i != e; ++i) Ops.push_back(*i); // Push live variables for the stack map. - for (unsigned i = NumArgs + 4, e = CI.getNumArgOperands(); i != e; ++i) { - SDValue OpVal = getValue(CI.getArgOperand(i)); - if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(OpVal)) { - Ops.push_back( - DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64)); - Ops.push_back( - DAG.getTargetConstant(C->getSExtValue(), MVT::i64)); - } else - Ops.push_back(OpVal); - } + addStackMapLiveVars(CI, NumMetaOpers + NumArgs, Ops, *this); // Push the register mask info. if (hasGlue) @@ -6960,7 +7017,7 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { // There is always a chain and a glue type at the end ValueVTs.push_back(MVT::Other); ValueVTs.push_back(MVT::Glue); - NodeTys = DAG.getVTList(ValueVTs.data(), ValueVTs.size()); + NodeTys = DAG.getVTList(ValueVTs); } else NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); @@ -6987,6 +7044,24 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { } else DAG.ReplaceAllUsesWith(Call, MN); DAG.DeleteNode(Call); + + // Inform the Frame Information that we have a patchpoint in this function. + FuncInfo.MF->getFrameInfo()->setHasPatchPoint(); +} + +/// Returns an AttributeSet representing the attributes applied to the return +/// value of the given call. +static AttributeSet getReturnAttrs(TargetLowering::CallLoweringInfo &CLI) { + SmallVector<Attribute::AttrKind, 2> Attrs; + if (CLI.RetSExt) + Attrs.push_back(Attribute::SExt); + if (CLI.RetZExt) + Attrs.push_back(Attribute::ZExt); + if (CLI.IsInReg) + Attrs.push_back(Attribute::InReg); + + return AttributeSet::get(CLI.RetTy->getContext(), AttributeSet::ReturnIndex, + Attrs); } /// TargetLowering::LowerCallTo - This is the default LowerCallTo @@ -6997,43 +7072,85 @@ std::pair<SDValue, SDValue> TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { // Handle the incoming return values from the call. CLI.Ins.clear(); + Type *OrigRetTy = CLI.RetTy; SmallVector<EVT, 4> RetTys; - ComputeValueVTs(*this, CLI.RetTy, RetTys); - for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { - EVT VT = RetTys[I]; - MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); - unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); - for (unsigned i = 0; i != NumRegs; ++i) { - ISD::InputArg MyFlags; - MyFlags.VT = RegisterVT; - MyFlags.ArgVT = VT; - MyFlags.Used = CLI.IsReturnValueUsed; - if (CLI.RetSExt) - MyFlags.Flags.setSExt(); - if (CLI.RetZExt) - MyFlags.Flags.setZExt(); - if (CLI.IsInReg) - MyFlags.Flags.setInReg(); - CLI.Ins.push_back(MyFlags); + SmallVector<uint64_t, 4> Offsets; + ComputeValueVTs(*this, CLI.RetTy, RetTys, &Offsets); + + SmallVector<ISD::OutputArg, 4> Outs; + GetReturnInfo(CLI.RetTy, getReturnAttrs(CLI), Outs, *this); + + bool CanLowerReturn = + this->CanLowerReturn(CLI.CallConv, CLI.DAG.getMachineFunction(), + CLI.IsVarArg, Outs, CLI.RetTy->getContext()); + + SDValue DemoteStackSlot; + int DemoteStackIdx = -100; + if (!CanLowerReturn) { + // FIXME: equivalent assert? + // assert(!CS.hasInAllocaArgument() && + // "sret demotion is incompatible with inalloca"); + uint64_t TySize = getDataLayout()->getTypeAllocSize(CLI.RetTy); + unsigned Align = getDataLayout()->getPrefTypeAlignment(CLI.RetTy); + MachineFunction &MF = CLI.DAG.getMachineFunction(); + DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); + Type *StackSlotPtrType = PointerType::getUnqual(CLI.RetTy); + + DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getPointerTy()); + ArgListEntry Entry; + Entry.Node = DemoteStackSlot; + Entry.Ty = StackSlotPtrType; + Entry.isSExt = false; + Entry.isZExt = false; + Entry.isInReg = false; + Entry.isSRet = true; + Entry.isNest = false; + Entry.isByVal = false; + Entry.isReturned = false; + Entry.Alignment = Align; + CLI.getArgs().insert(CLI.getArgs().begin(), Entry); + CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext()); + } else { + for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { + EVT VT = RetTys[I]; + MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); + unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); + for (unsigned i = 0; i != NumRegs; ++i) { + ISD::InputArg MyFlags; + MyFlags.VT = RegisterVT; + MyFlags.ArgVT = VT; + MyFlags.Used = CLI.IsReturnValueUsed; + if (CLI.RetSExt) + MyFlags.Flags.setSExt(); + if (CLI.RetZExt) + MyFlags.Flags.setZExt(); + if (CLI.IsInReg) + MyFlags.Flags.setInReg(); + CLI.Ins.push_back(MyFlags); + } } } // Handle all of the outgoing arguments. CLI.Outs.clear(); CLI.OutVals.clear(); - ArgListTy &Args = CLI.Args; + ArgListTy &Args = CLI.getArgs(); for (unsigned i = 0, e = Args.size(); i != e; ++i) { SmallVector<EVT, 4> ValueVTs; ComputeValueVTs(*this, Args[i].Ty, ValueVTs); - for (unsigned Value = 0, NumValues = ValueVTs.size(); - Value != NumValues; ++Value) { + Type *FinalType = Args[i].Ty; + if (Args[i].isByVal) + FinalType = cast<PointerType>(Args[i].Ty)->getElementType(); + bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters( + FinalType, CLI.CallConv, CLI.IsVarArg); + for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues; + ++Value) { EVT VT = ValueVTs[Value]; Type *ArgTy = VT.getTypeForEVT(CLI.RetTy->getContext()); SDValue Op = SDValue(Args[i].Node.getNode(), Args[i].Node.getResNo() + Value); ISD::ArgFlagsTy Flags; - unsigned OriginalAlignment = - getDataLayout()->getABITypeAlignment(ArgTy); + unsigned OriginalAlignment = getDataLayout()->getABITypeAlignment(ArgTy); if (Args[i].isZExt) Flags.setZExt(); @@ -7043,8 +7160,18 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { Flags.setInReg(); if (Args[i].isSRet) Flags.setSRet(); - if (Args[i].isByVal) { + if (Args[i].isByVal) + Flags.setByVal(); + if (Args[i].isInAlloca) { + Flags.setInAlloca(); + // Set the byval flag for CCAssignFn callbacks that don't know about + // inalloca. This way we can know how many bytes we should've allocated + // and how many bytes a callee cleanup function will pop. If we port + // inalloca to more targets, we'll have to add custom inalloca handling + // in the various CC lowering callbacks. Flags.setByVal(); + } + if (Args[i].isByVal || Args[i].isInAlloca) { PointerType *Ty = cast<PointerType>(Args[i].Ty); Type *ElementTy = Ty->getElementType(); Flags.setByValSize(getDataLayout()->getTypeAllocSize(ElementTy)); @@ -7059,6 +7186,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { } if (Args[i].isNest) Flags.setNest(); + if (NeedsRegBlock) + Flags.setInConsecutiveRegs(); Flags.setOrigAlign(OriginalAlignment); MVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT); @@ -7091,8 +7220,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { Flags.setReturned(); } - getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, - PartVT, CLI.CS ? CLI.CS->getInstruction() : 0, ExtendKind); + getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, PartVT, + CLI.CS ? CLI.CS->getInstruction() : nullptr, ExtendKind); for (unsigned j = 0; j != NumParts; ++j) { // if it isn't first piece, alignment must be 1 @@ -7104,6 +7233,10 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { else if (j != 0) MyFlags.Flags.setOrigAlign(1); + // Only mark the end at the last register of the last value. + if (NeedsRegBlock && Value == NumValues - 1 && j == NumParts - 1) + MyFlags.Flags.setInConsecutiveRegsLast(); + CLI.Outs.push_back(MyFlags); CLI.OutVals.push_back(Parts[j]); } @@ -7137,35 +7270,62 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { "LowerCall emitted a value with the wrong type!"); }); - // Collect the legal value parts into potentially illegal values - // that correspond to the original function's return values. - ISD::NodeType AssertOp = ISD::DELETED_NODE; - if (CLI.RetSExt) - AssertOp = ISD::AssertSext; - else if (CLI.RetZExt) - AssertOp = ISD::AssertZext; SmallVector<SDValue, 4> ReturnValues; - unsigned CurReg = 0; - for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { - EVT VT = RetTys[I]; - MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); - unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); + if (!CanLowerReturn) { + // The instruction result is the result of loading from the + // hidden sret parameter. + SmallVector<EVT, 1> PVTs; + Type *PtrRetTy = PointerType::getUnqual(OrigRetTy); - ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg], - NumRegs, RegisterVT, VT, NULL, - AssertOp)); - CurReg += NumRegs; - } + ComputeValueVTs(*this, PtrRetTy, PVTs); + assert(PVTs.size() == 1 && "Pointers should fit in one register"); + EVT PtrVT = PVTs[0]; + + unsigned NumValues = RetTys.size(); + ReturnValues.resize(NumValues); + SmallVector<SDValue, 4> Chains(NumValues); + + for (unsigned i = 0; i < NumValues; ++i) { + SDValue Add = CLI.DAG.getNode(ISD::ADD, CLI.DL, PtrVT, DemoteStackSlot, + CLI.DAG.getConstant(Offsets[i], PtrVT)); + SDValue L = CLI.DAG.getLoad( + RetTys[i], CLI.DL, CLI.Chain, Add, + MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]), false, + false, false, 1); + ReturnValues[i] = L; + Chains[i] = L.getValue(1); + } + + CLI.Chain = CLI.DAG.getNode(ISD::TokenFactor, CLI.DL, MVT::Other, Chains); + } else { + // Collect the legal value parts into potentially illegal values + // that correspond to the original function's return values. + ISD::NodeType AssertOp = ISD::DELETED_NODE; + if (CLI.RetSExt) + AssertOp = ISD::AssertSext; + else if (CLI.RetZExt) + AssertOp = ISD::AssertZext; + unsigned CurReg = 0; + for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { + EVT VT = RetTys[I]; + MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); + unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); + + ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg], + NumRegs, RegisterVT, VT, nullptr, + AssertOp)); + CurReg += NumRegs; + } - // For a function returning void, there is no return value. We can't create - // such a node, so we just return a null return value in that case. In - // that case, nothing will actually look at the value. - if (ReturnValues.empty()) - return std::make_pair(SDValue(), CLI.Chain); + // For a function returning void, there is no return value. We can't create + // such a node, so we just return a null return value in that case. In + // that case, nothing will actually look at the value. + if (ReturnValues.empty()) + return std::make_pair(SDValue(), CLI.Chain); + } SDValue Res = CLI.DAG.getNode(ISD::MERGE_VALUES, CLI.DL, - CLI.DAG.getVTList(&RetTys[0], RetTys.size()), - &ReturnValues[0], ReturnValues.size()); + CLI.DAG.getVTList(RetTys), ReturnValues); return std::make_pair(Res, CLI.Chain); } @@ -7192,7 +7352,7 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) { const TargetLowering *TLI = TM.getTargetLowering(); RegsForValue RFV(V->getContext(), *TLI, Reg, V->getType()); SDValue Chain = DAG.getEntryNode(); - RFV.getCopyToRegs(Op, DAG, getCurSDLoc(), Chain, 0, V); + RFV.getCopyToRegs(Op, DAG, getCurSDLoc(), Chain, nullptr, V); PendingExports.push_back(Chain); } @@ -7208,12 +7368,10 @@ static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) { return A->use_empty(); const BasicBlock *Entry = A->getParent()->begin(); - for (Value::const_use_iterator UI = A->use_begin(), E = A->use_end(); - UI != E; ++UI) { - const User *U = *UI; + for (const User *U : A->users()) if (cast<Instruction>(U)->getParent() != Entry || isa<SwitchInst>(U)) return false; // Use not in entry block. - } + return true; } @@ -7221,7 +7379,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { SelectionDAG &DAG = SDB->DAG; SDLoc dl = SDB->getCurSDLoc(); const TargetLowering *TLI = getTargetLowering(); - const DataLayout *TD = TLI->getDataLayout(); + const DataLayout *DL = TLI->getDataLayout(); SmallVector<ISD::InputArg, 16> Ins; if (!FuncInfo->CanLowerReturn) { @@ -7247,13 +7405,17 @@ void SelectionDAGISel::LowerArguments(const Function &F) { ComputeValueVTs(*TLI, I->getType(), ValueVTs); bool isArgValueUsed = !I->use_empty(); unsigned PartBase = 0; + Type *FinalType = I->getType(); + if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal)) + FinalType = cast<PointerType>(FinalType)->getElementType(); + bool NeedsRegBlock = TLI->functionArgumentNeedsConsecutiveRegisters( + FinalType, F.getCallingConv(), F.isVarArg()); for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues; ++Value) { EVT VT = ValueVTs[Value]; Type *ArgTy = VT.getTypeForEVT(*DAG.getContext()); ISD::ArgFlagsTy Flags; - unsigned OriginalAlignment = - TD->getABITypeAlignment(ArgTy); + unsigned OriginalAlignment = DL->getABITypeAlignment(ArgTy); if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt)) Flags.setZExt(); @@ -7263,11 +7425,21 @@ void SelectionDAGISel::LowerArguments(const Function &F) { Flags.setInReg(); if (F.getAttributes().hasAttribute(Idx, Attribute::StructRet)) Flags.setSRet(); - if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal)) { + if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal)) Flags.setByVal(); + if (F.getAttributes().hasAttribute(Idx, Attribute::InAlloca)) { + Flags.setInAlloca(); + // Set the byval flag for CCAssignFn callbacks that don't know about + // inalloca. This way we can know how many bytes we should've allocated + // and how many bytes a callee cleanup function will pop. If we port + // inalloca to more targets, we'll have to add custom inalloca handling + // in the various CC lowering callbacks. + Flags.setByVal(); + } + if (Flags.isByVal() || Flags.isInAlloca()) { PointerType *Ty = cast<PointerType>(I->getType()); Type *ElementTy = Ty->getElementType(); - Flags.setByValSize(TD->getTypeAllocSize(ElementTy)); + Flags.setByValSize(DL->getTypeAllocSize(ElementTy)); // For ByVal, alignment should be passed from FE. BE will guess if // this info is not there but there are cases it cannot get right. unsigned FrameAlign; @@ -7279,6 +7451,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) { } if (F.getAttributes().hasAttribute(Idx, Attribute::Nest)) Flags.setNest(); + if (NeedsRegBlock) + Flags.setInConsecutiveRegs(); Flags.setOrigAlign(OriginalAlignment); MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT); @@ -7291,6 +7465,11 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // if it isn't first piece, alignment must be 1 else if (i > 0) MyFlags.Flags.setOrigAlign(1); + + // Only mark the end at the last register of the last value. + if (NeedsRegBlock && Value == NumValues - 1 && i == NumRegs - 1) + MyFlags.Flags.setInConsecutiveRegsLast(); + Ins.push_back(MyFlags); } PartBase += VT.getStoreSize(); @@ -7332,7 +7511,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { MVT RegVT = TLI->getRegisterType(*CurDAG->getContext(), VT); ISD::NodeType AssertOp = ISD::DELETED_NODE; SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1, - RegVT, VT, NULL, AssertOp); + RegVT, VT, nullptr, AssertOp); MachineFunction& MF = SDB->DAG.getMachineFunction(); MachineRegisterInfo& RegInfo = MF.getRegInfo(); @@ -7379,7 +7558,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts, PartVT, VT, - NULL, AssertOp)); + nullptr, AssertOp)); } i += NumParts; @@ -7394,7 +7573,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode())) FuncInfo->setArgumentFrameIndex(I, FI->getIndex()); - SDValue Res = DAG.getMergeValues(&ArgValues[0], NumValues, + SDValue Res = DAG.getMergeValues(makeArrayRef(ArgValues.data(), NumValues), SDB->getCurSDLoc()); SDB->setValue(I, Res); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 835f643..84679f9 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -18,9 +18,8 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" -#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" -#include "llvm/Support/CallSite.h" #include "llvm/Support/ErrorHandling.h" #include <vector> @@ -57,6 +56,7 @@ class MachineBasicBlock; class MachineInstr; class MachineRegisterInfo; class MDNode; +class MVT; class PHINode; class PtrToIntInst; class ReturnInst; @@ -96,7 +96,7 @@ class SelectionDAGBuilder { DebugLoc dl; unsigned SDNodeOrder; public: - DanglingDebugInfo() : DI(0), dl(DebugLoc()), SDNodeOrder(0) { } + DanglingDebugInfo() : DI(nullptr), dl(DebugLoc()), SDNodeOrder(0) { } DanglingDebugInfo(const DbgValueInst *di, DebugLoc DL, unsigned SDNO) : DI(di), dl(DL), SDNodeOrder(SDNO) { } const DbgValueInst* getDI() { return DI; } @@ -135,7 +135,7 @@ private: MachineBasicBlock* BB; uint32_t ExtraWeight; - Case() : Low(0), High(0), BB(0), ExtraWeight(0) { } + Case() : Low(nullptr), High(nullptr), BB(nullptr), ExtraWeight(0) { } Case(const Constant *low, const Constant *high, MachineBasicBlock *bb, uint32_t extraweight) : Low(low), High(high), BB(bb), ExtraWeight(extraweight) { } @@ -320,7 +320,7 @@ private: /// 1. Preserve the architecture independence of stack protector generation. /// /// 2. Preserve the normal IR level stack protector check for platforms like - /// OpenBSD for which we support platform specific stack protector + /// OpenBSD for which we support platform-specific stack protector /// generation. /// /// The main problem that guided the present solution is that one can not @@ -338,7 +338,7 @@ private: /// basic block (where the return inst is placed) and then move it back /// later at SelectionDAG/MI time before the stack protector check if the /// tail call optimization failed. The MI level option was nixed - /// immediately since it would require platform specific pattern + /// immediately since it would require platform-specific pattern /// matching. The SelectionDAG level option was nixed because /// SelectionDAG only processes one IR level basic block at a time /// implying one could not create a DAG Combine to move the callinst. @@ -396,8 +396,8 @@ private: /// the same function, use the same failure basic block). class StackProtectorDescriptor { public: - StackProtectorDescriptor() : ParentMBB(0), SuccessMBB(0), FailureMBB(0), - Guard(0) { } + StackProtectorDescriptor() : ParentMBB(nullptr), SuccessMBB(nullptr), + FailureMBB(nullptr), Guard(nullptr) { } ~StackProtectorDescriptor() { } /// Returns true if all fields of the stack protector descriptor are @@ -432,8 +432,8 @@ private: /// parent mbb after we create the stack protector check (SuccessMBB). This /// BB is visited only on stack protector check success. void resetPerBBState() { - ParentMBB = 0; - SuccessMBB = 0; + ParentMBB = nullptr; + SuccessMBB = nullptr; } /// Reset state that only changes when we switch functions. @@ -446,8 +446,8 @@ private: /// 2.The guard variable since the guard variable we are checking against is /// always the same. void resetPerFunctionState() { - FailureMBB = 0; - Guard = 0; + FailureMBB = nullptr; + Guard = nullptr; } MachineBasicBlock *getParentMBB() { return ParentMBB; } @@ -482,14 +482,18 @@ private: /// block will be created. MachineBasicBlock *AddSuccessorMBB(const BasicBlock *BB, MachineBasicBlock *ParentMBB, - MachineBasicBlock *SuccMBB = 0); + MachineBasicBlock *SuccMBB = nullptr); }; private: const TargetMachine &TM; public: + /// Lowest valid SDNodeOrder. The special case 0 is reserved for scheduling + /// nodes without a corresponding SDNode. + static const unsigned LowestSDNodeOrder = 1; + SelectionDAG &DAG; - const DataLayout *TD; + const DataLayout *DL; AliasAnalysis *AA; const TargetLibraryInfo *LibInfo; @@ -534,7 +538,7 @@ public: SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo, CodeGenOpt::Level ol) - : CurInst(NULL), SDNodeOrder(0), TM(dag.getTarget()), + : CurInst(nullptr), SDNodeOrder(LowestSDNodeOrder), TM(dag.getTarget()), DAG(dag), FuncInfo(funcinfo), OptLevel(ol), HasTailCall(false) { } @@ -596,29 +600,31 @@ public: void setValue(const Value *V, SDValue NewN) { SDValue &N = NodeMap[V]; - assert(N.getNode() == 0 && "Already set a value for this node!"); + assert(!N.getNode() && "Already set a value for this node!"); N = NewN; } void setUnusedArgValue(const Value *V, SDValue NewN) { SDValue &N = UnusedArgNodeMap[V]; - assert(N.getNode() == 0 && "Already set a value for this node!"); + assert(!N.getNode() && "Already set a value for this node!"); N = NewN; } void FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, MachineBasicBlock *CurBB, - MachineBasicBlock *SwitchBB, unsigned Opc); + MachineBasicBlock *SwitchBB, unsigned Opc, + uint32_t TW, uint32_t FW); void EmitBranchForMergedCondition(const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, MachineBasicBlock *CurBB, - MachineBasicBlock *SwitchBB); + MachineBasicBlock *SwitchBB, + uint32_t TW, uint32_t FW); bool ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases); bool isExportableFromCurrentBlock(const Value *V, const BasicBlock *FromBB); void CopyToExportRegsIfNeeded(const Value *V); void ExportFromCurrentBlock(const Value *V); void LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool IsTailCall, - MachineBasicBlock *LandingPad = NULL); + MachineBasicBlock *LandingPad = nullptr); std::pair<SDValue, SDValue> LowerCallOperands(const CallInst &CI, unsigned ArgIdx, @@ -627,7 +633,7 @@ public: bool useVoidTy = false); /// UpdateSplitBlock - When an MBB was split during scheduling, update the - /// references that ned to refer to the last resulting block. + /// references that need to refer to the last resulting block. void UpdateSplitBlock(MachineBasicBlock *First, MachineBasicBlock *Last); private: @@ -636,7 +642,7 @@ private: void visitBr(const BranchInst &I); void visitSwitch(const SwitchInst &I); void visitIndirectBr(const IndirectBrInst &I); - void visitUnreachable(const UnreachableInst &I) { /* noop */ } + void visitUnreachable(const UnreachableInst &I); // Helpers for visitSwitch bool handleSmallSwitchRange(CaseRec& CR, @@ -779,7 +785,8 @@ private: /// corresponding DBG_VALUE machine instruction for it now. At the end of /// instruction selection, they will be inserted to the entry BB. bool EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, - int64_t Offset, const SDValue &N); + int64_t Offset, bool IsIndirect, + const SDValue &N); }; } // end namespace llvm diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index c04a08d..a71cc68 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -14,11 +14,10 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "ScheduleDAGSDNodes.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/DebugInfo.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/Intrinsics.h" #include "llvm/Support/Debug.h" @@ -56,6 +55,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::PREFETCH: return "Prefetch"; case ISD::ATOMIC_FENCE: return "AtomicFence"; case ISD::ATOMIC_CMP_SWAP: return "AtomicCmpSwap"; + case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: return "AtomicCmpSwapWithSuccess"; case ISD::ATOMIC_SWAP: return "AtomicSwap"; case ISD::ATOMIC_LOAD_ADD: return "AtomicLoadAdd"; case ISD::ATOMIC_LOAD_SUB: return "AtomicLoadSub"; @@ -82,7 +82,10 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::VALUETYPE: return "ValueType"; case ISD::Register: return "Register"; case ISD::RegisterMask: return "RegisterMask"; - case ISD::Constant: return "Constant"; + case ISD::Constant: + if (cast<ConstantSDNode>(this)->isOpaque()) + return "OpaqueConstant"; + return "Constant"; case ISD::ConstantFP: return "ConstantFP"; case ISD::GlobalAddress: return "GlobalAddress"; case ISD::GlobalTLSAddress: return "GlobalTLSAddress"; @@ -91,6 +94,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE"; case ISD::RETURNADDR: return "RETURNADDR"; case ISD::FRAMEADDR: return "FRAMEADDR"; + case ISD::READ_REGISTER: return "READ_REGISTER"; + case ISD::WRITE_REGISTER: return "WRITE_REGISTER"; case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET"; case ISD::EH_RETURN: return "EH_RETURN"; case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP"; @@ -112,7 +117,10 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { } case ISD::BUILD_VECTOR: return "BUILD_VECTOR"; - case ISD::TargetConstant: return "TargetConstant"; + case ISD::TargetConstant: + if (cast<ConstantSDNode>(this)->isOpaque()) + return "OpaqueTargetConstant"; + return "TargetConstant"; case ISD::TargetConstantFP: return "TargetConstantFP"; case ISD::TargetGlobalAddress: return "TargetGlobalAddress"; case ISD::TargetGlobalTLSAddress: return "TargetGlobalTLSAddress"; @@ -213,6 +221,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::ZERO_EXTEND: return "zero_extend"; case ISD::ANY_EXTEND: return "any_extend"; case ISD::SIGN_EXTEND_INREG: return "sign_extend_inreg"; + case ISD::ANY_EXTEND_VECTOR_INREG: return "any_extend_vector_inreg"; + case ISD::SIGN_EXTEND_VECTOR_INREG: return "sign_extend_vector_inreg"; + case ISD::ZERO_EXTEND_VECTOR_INREG: return "zero_extend_vector_inreg"; case ISD::TRUNCATE: return "truncate"; case ISD::FP_ROUND: return "fp_round"; case ISD::FLT_ROUNDS_: return "flt_rounds"; @@ -225,8 +236,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FP_TO_UINT: return "fp_to_uint"; case ISD::BITCAST: return "bitcast"; case ISD::ADDRSPACECAST: return "addrspacecast"; - case ISD::FP16_TO_FP32: return "fp16_to_fp32"; - case ISD::FP32_TO_FP16: return "fp32_to_fp16"; + case ISD::FP16_TO_FP: return "fp16_to_fp"; + case ISD::FP_TO_FP16: return "fp_to_fp16"; case ISD::CONVERT_RNDSAT: { switch (cast<CvtRndSatSDNode>(this)->getCvtCode()) { @@ -325,7 +336,7 @@ const char *SDNode::getIndexedModeName(ISD::MemIndexedMode AM) { } } -void SDNode::dump() const { dump(0); } +void SDNode::dump() const { dump(nullptr); } void SDNode::dump(const SelectionDAG *G) const { print(dbgs(), G); dbgs() << '\n'; @@ -352,7 +363,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { for (MachineSDNode::mmo_iterator i = MN->memoperands_begin(), e = MN->memoperands_end(); i != e; ++i) { OS << **i; - if (llvm::next(i) != e) + if (std::next(i) != e) OS << " "; } OS << ">"; @@ -385,7 +396,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { dyn_cast<GlobalAddressSDNode>(this)) { int64_t offset = GADN->getOffset(); OS << '<'; - WriteAsOperand(OS, GADN->getGlobal()); + GADN->getGlobal()->printAsOperand(OS); OS << '>'; if (offset > 0) OS << " + " << offset; @@ -422,7 +433,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { OS << LBB->getName() << " "; OS << (const void*)BBDN->getBasicBlock() << ">"; } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) { - OS << ' ' << PrintReg(R->getReg(), G ? G->getTarget().getRegisterInfo() :0); + OS << ' ' << PrintReg(R->getReg(), G ? G->getTarget().getRegisterInfo() :nullptr); } else if (const ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(this)) { OS << "'" << ES->getSymbol() << "'"; @@ -476,9 +487,9 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { dyn_cast<BlockAddressSDNode>(this)) { int64_t offset = BA->getOffset(); OS << "<"; - WriteAsOperand(OS, BA->getBlockAddress()->getFunction(), false); + BA->getBlockAddress()->getFunction()->printAsOperand(OS, false); OS << ", "; - WriteAsOperand(OS, BA->getBlockAddress()->getBasicBlock(), false); + BA->getBlockAddress()->getBasicBlock()->printAsOperand(OS, false); OS << ">"; if (offset > 0) OS << " + " << offset; @@ -590,7 +601,7 @@ static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent, void SDNode::dumpr() const { VisitedSDNodeSet once; - DumpNodesr(dbgs(), this, 0, 0, once); + DumpNodesr(dbgs(), this, 0, nullptr, once); } void SDNode::dumpr(const SelectionDAG *G) const { diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index b8b4db4..57e22e2 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "isel" #include "llvm/CodeGen/SelectionDAGISel.h" #include "ScheduleDAGSDNodes.h" #include "SelectionDAGBuilder.h" @@ -20,7 +19,6 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/CFG.h" -#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GCMetadata.h" @@ -33,8 +31,8 @@ #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/DebugInfo.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instructions.h" @@ -59,6 +57,8 @@ #include <algorithm> using namespace llvm; +#define DEBUG_TYPE "isel" + STATISTIC(NumFastIselFailures, "Number of instructions fast isel failed on"); STATISTIC(NumFastIselSuccess, "Number of instructions fast isel selected"); STATISTIC(NumFastIselBlocks, "Number of blocks selected entirely by fast isel"); @@ -141,6 +141,25 @@ STATISTIC(NumFastIselFailShuffleVector,"Fast isel fails on ShuffleVector"); STATISTIC(NumFastIselFailExtractValue,"Fast isel fails on ExtractValue"); STATISTIC(NumFastIselFailInsertValue,"Fast isel fails on InsertValue"); STATISTIC(NumFastIselFailLandingPad,"Fast isel fails on LandingPad"); + +// Intrinsic instructions... +STATISTIC(NumFastIselFailIntrinsicCall, "Fast isel fails on Intrinsic call"); +STATISTIC(NumFastIselFailSAddWithOverflow, + "Fast isel fails on sadd.with.overflow"); +STATISTIC(NumFastIselFailUAddWithOverflow, + "Fast isel fails on uadd.with.overflow"); +STATISTIC(NumFastIselFailSSubWithOverflow, + "Fast isel fails on ssub.with.overflow"); +STATISTIC(NumFastIselFailUSubWithOverflow, + "Fast isel fails on usub.with.overflow"); +STATISTIC(NumFastIselFailSMulWithOverflow, + "Fast isel fails on smul.with.overflow"); +STATISTIC(NumFastIselFailUMulWithOverflow, + "Fast isel fails on umul.with.overflow"); +STATISTIC(NumFastIselFailFrameaddress, "Fast isel fails on Frameaddress"); +STATISTIC(NumFastIselFailSqrt, "Fast isel fails on sqrt call"); +STATISTIC(NumFastIselFailStackMap, "Fast isel fails on StackMap call"); +STATISTIC(NumFastIselFailPatchPoint, "Fast isel fails on PatchPoint call"); #endif static cl::opt<bool> @@ -213,7 +232,7 @@ MachinePassRegistry RegisterScheduler::Registry; static cl::opt<RegisterScheduler::FunctionPassCtor, false, RegisterPassParser<RegisterScheduler> > ISHeuristic("pre-RA-sched", - cl::init(&createDefaultScheduler), + cl::init(&createDefaultScheduler), cl::Hidden, cl::desc("Instruction schedulers available (before register" " allocation):")); @@ -300,7 +319,7 @@ TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, "'usesCustomInserter', it must implement " "TargetLowering::EmitInstrWithCustomInserter!"; #endif - llvm_unreachable(0); + llvm_unreachable(nullptr); } void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, @@ -357,7 +376,7 @@ static void SplitCriticalSideEffectEdges(Function &Fn, Pass *SDISel) { // Loop for blocks with phi nodes. for (Function::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { PHINode *PN = dyn_cast<PHINode>(BB->begin()); - if (PN == 0) continue; + if (!PN) continue; ReprocessBlock: // For each block with a PHI node, check to see if any of the input values @@ -367,7 +386,7 @@ static void SplitCriticalSideEffectEdges(Function &Fn, Pass *SDISel) { for (BasicBlock::iterator I = BB->begin(); (PN = dyn_cast<PHINode>(I)); ++I) for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { ConstantExpr *CE = dyn_cast<ConstantExpr>(PN->getIncomingValue(i)); - if (CE == 0 || !CE->canTrap()) continue; + if (!CE || !CE->canTrap()) continue; // The only case we have to worry about is when the edge is critical. // Since this block has a PHI Node, we assume it has multiple input @@ -400,8 +419,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { RegInfo = &MF->getRegInfo(); AA = &getAnalysis<AliasAnalysis>(); LibInfo = &getAnalysis<TargetLibraryInfo>(); - TTI = getAnalysisIfAvailable<TargetTransformInfo>(); - GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : 0; + GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : nullptr; TargetSubtargetInfo &ST = const_cast<TargetSubtargetInfo&>(TM.getSubtarget<TargetSubtargetInfo>()); @@ -418,13 +436,13 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { SplitCriticalSideEffectEdges(const_cast<Function&>(Fn), this); - CurDAG->init(*MF, TTI, TLI); + CurDAG->init(*MF, TLI); FuncInfo->set(Fn, *MF, CurDAG); if (UseMBPI && OptLevel != CodeGenOpt::None) FuncInfo->BPI = &getAnalysis<BranchProbabilityInfo>(); else - FuncInfo->BPI = 0; + FuncInfo->BPI = nullptr; SDB->init(GFI, *AA, LibInfo); @@ -449,7 +467,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { for (unsigned i = 0, e = FuncInfo->ArgDbgValues.size(); i != e; ++i) { MachineInstr *MI = FuncInfo->ArgDbgValues[e-i-1]; bool hasFI = MI->getOperand(0).isFI(); - unsigned Reg = hasFI ? TRI.getFrameRegister(*MF) : MI->getOperand(0).getReg(); + unsigned Reg = + hasFI ? TRI.getFrameRegister(*MF) : MI->getOperand(0).getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) EntryMBB->insert(EntryMBB->begin(), MI); else { @@ -457,7 +476,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { if (Def) { MachineBasicBlock::iterator InsertPos = Def; // FIXME: VR def may not be in entry block. - Def->getParent()->insert(llvm::next(InsertPos), MI); + Def->getParent()->insert(std::next(InsertPos), MI); } else DEBUG(dbgs() << "Dropping debug info for dead vreg" << TargetRegisterInfo::virtReg2Index(Reg) << "\n"); @@ -483,16 +502,17 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // If this vreg is directly copied into an exported register then // that COPY instructions also need DBG_VALUE, if it is the only // user of LDI->second. - MachineInstr *CopyUseMI = NULL; - for (MachineRegisterInfo::use_iterator - UI = RegInfo->use_begin(LDI->second); - MachineInstr *UseMI = UI.skipInstruction();) { + MachineInstr *CopyUseMI = nullptr; + for (MachineRegisterInfo::use_instr_iterator + UI = RegInfo->use_instr_begin(LDI->second), + E = RegInfo->use_instr_end(); UI != E; ) { + MachineInstr *UseMI = &*(UI++); if (UseMI->isDebugValue()) continue; if (UseMI->isCopy() && !CopyUseMI && UseMI->getParent() == EntryMBB) { CopyUseMI = UseMI; continue; } // Otherwise this is another use or second copy use. - CopyUseMI = NULL; break; + CopyUseMI = nullptr; break; } if (CopyUseMI) { MachineInstr *NewMI = @@ -509,21 +529,17 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // Determine if there are any calls in this machine function. MachineFrameInfo *MFI = MF->getFrameInfo(); - for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E; - ++I) { - + for (const auto &MBB : *MF) { if (MFI->hasCalls() && MF->hasInlineAsm()) break; - const MachineBasicBlock *MBB = I; - for (MachineBasicBlock::const_iterator II = MBB->begin(), IE = MBB->end(); - II != IE; ++II) { - const MCInstrDesc &MCID = TM.getInstrInfo()->get(II->getOpcode()); + for (const auto &MI : MBB) { + const MCInstrDesc &MCID = TM.getInstrInfo()->get(MI.getOpcode()); if ((MCID.isCall() && !MCID.isReturn()) || - II->isStackAligningInlineAsm()) { + MI.isStackAligningInlineAsm()) { MFI->setHasCalls(true); } - if (II->isInlineAsm()) { + if (MI.isInlineAsm()) { MF->setHasInlineAsm(true); } } @@ -564,6 +580,9 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // at this point. FuncInfo->clear(); + DEBUG(dbgs() << "*** MachineFunction at end of ISel ***\n"); + DEBUG(MF->print(dbgs())); + return true; } @@ -621,7 +640,7 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() { continue; unsigned NumSignBits = CurDAG->ComputeNumSignBits(Src); - CurDAG->ComputeMaskedBits(Src, KnownZero, KnownOne); + CurDAG->computeKnownBits(Src, KnownZero, KnownOne); FuncInfo->AddLiveOutRegInfo(DestReg, NumSignBits, KnownZero, KnownOne); } while (!Worklist.empty()); } @@ -801,7 +820,7 @@ public: /// NodeDeleted - Handle nodes deleted from the graph. If the node being /// deleted is the current ISelPosition node, update ISelPosition. /// - virtual void NodeDeleted(SDNode *N, SDNode *E) { + void NodeDeleted(SDNode *N, SDNode *E) override { if (ISelPosition == SelectionDAG::allnodes_iterator(N)) ++ISelPosition; } @@ -974,7 +993,37 @@ static void collectFailStats(const Instruction *I) { case Instruction::FCmp: NumFastIselFailFCmp++; return; case Instruction::PHI: NumFastIselFailPHI++; return; case Instruction::Select: NumFastIselFailSelect++; return; - case Instruction::Call: NumFastIselFailCall++; return; + case Instruction::Call: { + if (auto const *Intrinsic = dyn_cast<IntrinsicInst>(I)) { + switch (Intrinsic->getIntrinsicID()) { + default: + NumFastIselFailIntrinsicCall++; return; + case Intrinsic::sadd_with_overflow: + NumFastIselFailSAddWithOverflow++; return; + case Intrinsic::uadd_with_overflow: + NumFastIselFailUAddWithOverflow++; return; + case Intrinsic::ssub_with_overflow: + NumFastIselFailSSubWithOverflow++; return; + case Intrinsic::usub_with_overflow: + NumFastIselFailUSubWithOverflow++; return; + case Intrinsic::smul_with_overflow: + NumFastIselFailSMulWithOverflow++; return; + case Intrinsic::umul_with_overflow: + NumFastIselFailUMulWithOverflow++; return; + case Intrinsic::frameaddress: + NumFastIselFailFrameaddress++; return; + case Intrinsic::sqrt: + NumFastIselFailSqrt++; return; + case Intrinsic::experimental_stackmap: + NumFastIselFailStackMap++; return; + case Intrinsic::experimental_patchpoint_void: // fall-through + case Intrinsic::experimental_patchpoint_i64: + NumFastIselFailPatchPoint++; return; + } + } + NumFastIselFailCall++; + return; + } case Instruction::Shl: NumFastIselFailShl++; return; case Instruction::LShr: NumFastIselFailLShr++; return; case Instruction::AShr: NumFastIselFailAShr++; return; @@ -991,7 +1040,7 @@ static void collectFailStats(const Instruction *I) { void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Initialize the Fast-ISel state, if needed. - FastISel *FastIS = 0; + FastISel *FastIS = nullptr; if (TM.Options.EnableFastISel) FastIS = getTargetLowering()->createFastISel(*FuncInfo, LibInfo); @@ -1064,15 +1113,15 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // where they are, so we can be sure to emit subsequent instructions // after them. if (FuncInfo->InsertPt != FuncInfo->MBB->begin()) - FastIS->setLastLocalValue(llvm::prior(FuncInfo->InsertPt)); + FastIS->setLastLocalValue(std::prev(FuncInfo->InsertPt)); else - FastIS->setLastLocalValue(0); + FastIS->setLastLocalValue(nullptr); } unsigned NumFastIselRemaining = std::distance(Begin, End); // Do FastISel on as many instructions as possible. for (; BI != Begin; --BI) { - const Instruction *Inst = llvm::prior(BI); + const Instruction *Inst = std::prev(BI); // If we no longer require this instruction, skip it. if (isFoldedOrDeadInstruction(Inst, FuncInfo)) { @@ -1093,7 +1142,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Try to fold the load if so. const Instruction *BeforeInst = Inst; while (BeforeInst != Begin) { - BeforeInst = llvm::prior(BasicBlock::const_iterator(BeforeInst)); + BeforeInst = std::prev(BasicBlock::const_iterator(BeforeInst)); if (!isFoldedOrDeadInstruction(BeforeInst, FuncInfo)) break; } @@ -1101,7 +1150,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { BeforeInst->hasOneUse() && FastIS->tryToFoldLoad(cast<LoadInst>(BeforeInst), Inst)) { // If we succeeded, don't re-select the load. - BI = llvm::next(BasicBlock::const_iterator(BeforeInst)); + BI = std::next(BasicBlock::const_iterator(BeforeInst)); --NumFastIselRemaining; ++NumFastIselSuccess; } @@ -1604,7 +1653,7 @@ bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS, APInt NeededMask = DesiredMask & ~ActualMask; APInt KnownZero, KnownOne; - CurDAG->ComputeMaskedBits(LHS, KnownZero, KnownOne); + CurDAG->computeKnownBits(LHS, KnownZero, KnownOne); // If all the missing bits in the or are already known to be set, match! if ((NeededMask & KnownOne) == NeededMask) @@ -1673,7 +1722,7 @@ static SDNode *findGlueUse(SDNode *N) { if (Use.getResNo() == FlagResNo) return Use.getUser(); } - return NULL; + return nullptr; } /// findNonImmUse - Return true if "Use" is a non-immediate use of "Def". @@ -1780,7 +1829,7 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root, EVT VT = Root->getValueType(Root->getNumValues()-1); while (VT == MVT::Glue) { SDNode *GU = findGlueUse(Root); - if (GU == NULL) + if (!GU) break; Root = GU; VT = Root->getValueType(Root->getNumValues()-1); @@ -1802,12 +1851,39 @@ SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) { SelectInlineAsmMemoryOperands(Ops); EVT VTs[] = { MVT::Other, MVT::Glue }; - SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N), - VTs, &Ops[0], Ops.size()); + SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N), VTs, Ops); + New->setNodeId(-1); + return New.getNode(); +} + +SDNode +*SelectionDAGISel::Select_READ_REGISTER(SDNode *Op) { + SDLoc dl(Op); + MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(Op->getOperand(0)); + const MDString *RegStr = dyn_cast<MDString>(MD->getMD()->getOperand(0)); + unsigned Reg = getTargetLowering()->getRegisterByName( + RegStr->getString().data(), Op->getValueType(0)); + SDValue New = CurDAG->getCopyFromReg( + CurDAG->getEntryNode(), dl, Reg, Op->getValueType(0)); New->setNodeId(-1); return New.getNode(); } +SDNode +*SelectionDAGISel::Select_WRITE_REGISTER(SDNode *Op) { + SDLoc dl(Op); + MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(Op->getOperand(1)); + const MDString *RegStr = dyn_cast<MDString>(MD->getMD()->getOperand(0)); + unsigned Reg = getTargetLowering()->getRegisterByName( + RegStr->getString().data(), Op->getOperand(2).getValueType()); + SDValue New = CurDAG->getCopyToReg( + CurDAG->getEntryNode(), dl, Reg, Op->getOperand(2)); + New->setNodeId(-1); + return New.getNode(); +} + + + SDNode *SelectionDAGISel::Select_UNDEF(SDNode *N) { return CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF,N->getValueType(0)); } @@ -1843,7 +1919,7 @@ UpdateChainsAndGlue(SDNode *NodeToMatch, SDValue InputChain, // Now that all the normal results are replaced, we replace the chain and // glue results if present. if (!ChainNodesMatched.empty()) { - assert(InputChain.getNode() != 0 && + assert(InputChain.getNode() && "Matched input chains but didn't produce a chain"); // Loop over all of the nodes we matched that produced a chain result. // Replace all the chain results with the final chain we ended up with. @@ -1874,7 +1950,7 @@ UpdateChainsAndGlue(SDNode *NodeToMatch, SDValue InputChain, // If the result produces glue, update any glue results in the matched // pattern with the glue result. - if (InputGlue.getNode() != 0) { + if (InputGlue.getNode()) { // Handle any interior nodes explicitly marked. for (unsigned i = 0, e = GlueResultNodesMatched.size(); i != e; ++i) { SDNode *FRN = GlueResultNodesMatched[i]; @@ -2077,13 +2153,13 @@ HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched, if (InputChains.size() == 1) return InputChains[0]; return CurDAG->getNode(ISD::TokenFactor, SDLoc(ChainNodesMatched[0]), - MVT::Other, &InputChains[0], InputChains.size()); + MVT::Other, InputChains); } /// MorphNode - Handle morphing a node in place for the selector. SDNode *SelectionDAGISel:: MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList, - const SDValue *Ops, unsigned NumOps, unsigned EmitNodeInfo) { + ArrayRef<SDValue> Ops, unsigned EmitNodeInfo) { // It is possible we're using MorphNodeTo to replace a node with no // normal results with one that has a normal result (or we could be // adding a chain) and the input could have glue and chains as well. @@ -2103,7 +2179,7 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList, // Call the underlying SelectionDAG routine to do the transmogrification. Note // that this deletes operands of the old node that become dead. - SDNode *Res = CurDAG->MorphNodeTo(Node, ~TargetOpc, VTList, Ops, NumOps); + SDNode *Res = CurDAG->MorphNodeTo(Node, ~TargetOpc, VTList, Ops); // MorphNodeTo can operate in two ways: if an existing node with the // specified operands exists, it can just return it. Otherwise, it @@ -2195,8 +2271,7 @@ CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex, LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex, - SDValue N, const TargetLowering *TLI, - unsigned ChildNo) { + SDValue N, const TargetLowering *TLI, unsigned ChildNo) { if (ChildNo >= N.getNumOperands()) return false; // Match fails if out of range child #. return ::CheckType(MatcherTable, MatcherIndex, N.getOperand(ChildNo), TLI); @@ -2228,7 +2303,15 @@ CheckInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex, Val = GetVBR(Val, MatcherTable, MatcherIndex); ConstantSDNode *C = dyn_cast<ConstantSDNode>(N); - return C != 0 && C->getSExtValue() == Val; + return C && C->getSExtValue() == Val; +} + +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool +CheckChildInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex, + SDValue N, unsigned ChildNo) { + if (ChildNo >= N.getNumOperands()) + return false; // Match fails if out of range child #. + return ::CheckInteger(MatcherTable, MatcherIndex, N.getOperand(ChildNo)); } LLVM_ATTRIBUTE_ALWAYS_INLINE static bool @@ -2241,7 +2324,7 @@ CheckAndImm(const unsigned char *MatcherTable, unsigned &MatcherIndex, if (N->getOpcode() != ISD::AND) return false; ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); - return C != 0 && SDISel.CheckAndMask(N.getOperand(0), C, Val); + return C && SDISel.CheckAndMask(N.getOperand(0), C, Val); } LLVM_ATTRIBUTE_ALWAYS_INLINE static bool @@ -2254,7 +2337,7 @@ CheckOrImm(const unsigned char *MatcherTable, unsigned &MatcherIndex, if (N->getOpcode() != ISD::OR) return false; ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); - return C != 0 && SDISel.CheckOrMask(N.getOperand(0), C, Val); + return C && SDISel.CheckOrMask(N.getOperand(0), C, Val); } /// IsPredicateKnownToFail - If we know how and can do so without pushing a @@ -2314,6 +2397,14 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table, case SelectionDAGISel::OPC_CheckInteger: Result = !::CheckInteger(Table, Index, N); return Index; + case SelectionDAGISel::OPC_CheckChild0Integer: + case SelectionDAGISel::OPC_CheckChild1Integer: + case SelectionDAGISel::OPC_CheckChild2Integer: + case SelectionDAGISel::OPC_CheckChild3Integer: + case SelectionDAGISel::OPC_CheckChild4Integer: + Result = !::CheckChildInteger(Table, Index, N, + Table[Index-1] - SelectionDAGISel::OPC_CheckChild0Integer); + return Index; case SelectionDAGISel::OPC_CheckAndImm: Result = !::CheckAndImm(Table, Index, N, SDISel); return Index; @@ -2378,13 +2469,15 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, case ISD::LIFETIME_START: case ISD::LIFETIME_END: NodeToMatch->setNodeId(-1); // Mark selected. - return 0; + return nullptr; case ISD::AssertSext: case ISD::AssertZext: CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, 0), NodeToMatch->getOperand(0)); - return 0; + return nullptr; case ISD::INLINEASM: return Select_INLINEASM(NodeToMatch); + case ISD::READ_REGISTER: return Select_READ_REGISTER(NodeToMatch); + case ISD::WRITE_REGISTER: return Select_WRITE_REGISTER(NodeToMatch); case ISD::UNDEF: return Select_UNDEF(NodeToMatch); } @@ -2530,7 +2623,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, } case OPC_RecordNode: { // Remember this node, it may end up being an operand in the pattern. - SDNode *Parent = 0; + SDNode *Parent = nullptr; if (NodeStack.size() > 1) Parent = NodeStack[NodeStack.size()-2].getNode(); RecordedNodes.push_back(std::make_pair(N, Parent)); @@ -2694,6 +2787,12 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, case OPC_CheckInteger: if (!::CheckInteger(MatcherTable, MatcherIndex, N)) break; continue; + case OPC_CheckChild0Integer: case OPC_CheckChild1Integer: + case OPC_CheckChild2Integer: case OPC_CheckChild3Integer: + case OPC_CheckChild4Integer: + if (!::CheckChildInteger(MatcherTable, MatcherIndex, N, + Opcode-OPC_CheckChild0Integer)) break; + continue; case OPC_CheckAndImm: if (!::CheckAndImm(MatcherTable, MatcherIndex, N, *this)) break; continue; @@ -2731,7 +2830,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, if (Val & 128) Val = GetVBR(Val, MatcherTable, MatcherIndex); RecordedNodes.push_back(std::pair<SDValue, SDNode*>( - CurDAG->getTargetConstant(Val, VT), (SDNode*)0)); + CurDAG->getTargetConstant(Val, VT), nullptr)); continue; } case OPC_EmitRegister: { @@ -2739,7 +2838,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; unsigned RegNo = MatcherTable[MatcherIndex++]; RecordedNodes.push_back(std::pair<SDValue, SDNode*>( - CurDAG->getRegister(RegNo, VT), (SDNode*)0)); + CurDAG->getRegister(RegNo, VT), nullptr)); continue; } case OPC_EmitRegister2: { @@ -2751,7 +2850,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, unsigned RegNo = MatcherTable[MatcherIndex++]; RegNo |= MatcherTable[MatcherIndex++] << 8; RecordedNodes.push_back(std::pair<SDValue, SDNode*>( - CurDAG->getRegister(RegNo, VT), (SDNode*)0)); + CurDAG->getRegister(RegNo, VT), nullptr)); continue; } @@ -2776,7 +2875,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, case OPC_EmitMergeInputChains1_0: // OPC_EmitMergeInputChains, 1, 0 case OPC_EmitMergeInputChains1_1: { // OPC_EmitMergeInputChains, 1, 1 // These are space-optimized forms of OPC_EmitMergeInputChains. - assert(InputChain.getNode() == 0 && + assert(!InputChain.getNode() && "EmitMergeInputChains should be the first chain producing node"); assert(ChainNodesMatched.empty() && "Should only have one EmitMergeInputChains per match"); @@ -2797,13 +2896,13 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, // Merge the input chains if they are not intra-pattern references. InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG); - if (InputChain.getNode() == 0) + if (!InputChain.getNode()) break; // Failed to merge. continue; } case OPC_EmitMergeInputChains: { - assert(InputChain.getNode() == 0 && + assert(!InputChain.getNode() && "EmitMergeInputChains should be the first chain producing node"); // This node gets a list of nodes we matched in the input that have // chains. We want to token factor all of the input chains to these nodes @@ -2839,7 +2938,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, // Merge the input chains if they are not intra-pattern references. InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG); - if (InputChain.getNode() == 0) + if (!InputChain.getNode()) break; // Failed to merge. continue; @@ -2850,7 +2949,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, assert(RecNo < RecordedNodes.size() && "Invalid EmitCopyToReg"); unsigned DestPhysReg = MatcherTable[MatcherIndex++]; - if (InputChain.getNode() == 0) + if (!InputChain.getNode()) InputChain = CurDAG->getEntryNode(); InputChain = CurDAG->getCopyToReg(InputChain, SDLoc(NodeToMatch), @@ -2866,7 +2965,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, unsigned RecNo = MatcherTable[MatcherIndex++]; assert(RecNo < RecordedNodes.size() && "Invalid EmitNodeXForm"); SDValue Res = RunSDNodeXForm(RecordedNodes[RecNo].first, XFormNo); - RecordedNodes.push_back(std::pair<SDValue,SDNode*>(Res, (SDNode*) 0)); + RecordedNodes.push_back(std::pair<SDValue,SDNode*>(Res, nullptr)); continue; } @@ -2898,7 +2997,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, else if (VTs.size() == 2) VTList = CurDAG->getVTList(VTs[0], VTs[1]); else - VTList = CurDAG->getVTList(VTs.data(), VTs.size()); + VTList = CurDAG->getVTList(VTs); // Get the operand list. unsigned NumOps = MatcherTable[MatcherIndex++]; @@ -2932,11 +3031,11 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, // If this has chain/glue inputs, add them. if (EmitNodeInfo & OPFL_Chain) Ops.push_back(InputChain); - if ((EmitNodeInfo & OPFL_GlueInput) && InputGlue.getNode() != 0) + if ((EmitNodeInfo & OPFL_GlueInput) && InputGlue.getNode() != nullptr) Ops.push_back(InputGlue); // Create the node. - SDNode *Res = 0; + SDNode *Res = nullptr; if (Opcode != OPC_MorphNodeTo) { // If this is a normal EmitNode command, just create the new node and // add the results to the RecordedNodes list. @@ -2947,17 +3046,16 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, for (unsigned i = 0, e = VTs.size(); i != e; ++i) { if (VTs[i] == MVT::Other || VTs[i] == MVT::Glue) break; RecordedNodes.push_back(std::pair<SDValue,SDNode*>(SDValue(Res, i), - (SDNode*) 0)); + nullptr)); } } else if (NodeToMatch->getOpcode() != ISD::DELETED_NODE) { - Res = MorphNode(NodeToMatch, TargetOpc, VTList, Ops.data(), Ops.size(), - EmitNodeInfo); + Res = MorphNode(NodeToMatch, TargetOpc, VTList, Ops, EmitNodeInfo); } else { // NodeToMatch was eliminated by CSE when the target changed the DAG. // We will visit the equivalent node later. DEBUG(dbgs() << "Node was eliminated by CSE\n"); - return 0; + return nullptr; } // If the node had chain/glue results, update our notion of the current @@ -3087,7 +3185,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, // FIXME: We just return here, which interacts correctly with SelectRoot // above. We should fix this to not return an SDNode* anymore. - return 0; + return nullptr; } } @@ -3099,7 +3197,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, while (1) { if (MatchScopes.empty()) { CannotYetSelect(NodeToMatch); - return 0; + return nullptr; } // Restore the interpreter state back to the point where the scope was diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp index b752b482..4df5ede 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -15,12 +15,11 @@ #include "ScheduleDAGSDNodes.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/DebugInfo.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/raw_ostream.h" @@ -28,6 +27,8 @@ #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; +#define DEBUG_TYPE "dag-printer" + namespace llvm { template<> struct DOTGraphTraits<SelectionDAG*> : public DefaultDOTGraphTraits { @@ -125,9 +126,9 @@ namespace llvm { static void addCustomGraphFeatures(SelectionDAG *G, GraphWriter<SelectionDAG*> &GW) { - GW.emitSimpleNode(0, "plaintext=circle", "GraphRoot"); + GW.emitSimpleNode(nullptr, "plaintext=circle", "GraphRoot"); if (G->getRoot().getNode()) - GW.emitEdge(0, -1, G->getRoot().getNode(), G->getRoot().getResNo(), + GW.emitEdge(nullptr, -1, G->getRoot().getNode(), G->getRoot().getResNo(), "color=blue,style=dashed"); } }; @@ -290,10 +291,10 @@ std::string ScheduleDAGSDNodes::getGraphNodeLabel(const SUnit *SU) const { void ScheduleDAGSDNodes::getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const { if (DAG) { // Draw a special "GraphRoot" node to indicate the root of the graph. - GW.emitSimpleNode(0, "plaintext=circle", "GraphRoot"); + GW.emitSimpleNode(nullptr, "plaintext=circle", "GraphRoot"); const SDNode *N = DAG->getRoot().getNode(); if (N && N->getNodeId() != -1) - GW.emitEdge(0, -1, &SUnits[N->getNodeId()], -1, + GW.emitEdge(nullptr, -1, &SUnits[N->getNodeId()], -1, "color=blue,style=dashed"); } } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 82b068d..05ace41 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -22,6 +22,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" #include "llvm/Support/CommandLine.h" @@ -39,7 +40,7 @@ TargetLowering::TargetLowering(const TargetMachine &tm, : TargetLoweringBase(tm, tlof) {} const char *TargetLowering::getTargetNodeName(unsigned Opcode) const { - return NULL; + return nullptr; } /// Check whether a given call node is in tail position within its function. If @@ -74,6 +75,7 @@ void TargetLowering::ArgListEntry::setAttributes(ImmutableCallSite *CS, isSRet = CS->paramHasAttr(AttrIdx, Attribute::StructRet); isNest = CS->paramHasAttr(AttrIdx, Attribute::Nest); isByVal = CS->paramHasAttr(AttrIdx, Attribute::ByVal); + isInAlloca = CS->paramHasAttr(AttrIdx, Attribute::InAlloca); isReturned = CS->paramHasAttr(AttrIdx, Attribute::Returned); Alignment = CS->getParamAlignment(AttrIdx); } @@ -101,12 +103,11 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), getPointerTy()); Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); - TargetLowering:: - CallLoweringInfo CLI(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false, - false, 0, getLibcallCallingConv(LC), - /*isTailCall=*/false, - doesNotReturn, isReturnValueUsed, Callee, Args, - DAG, dl); + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(DAG.getEntryNode()) + .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0) + .setNoReturn(doesNotReturn).setDiscardResult(!isReturnValueUsed) + .setSExtResult(isSigned).setZExtResult(!isSigned); return LowerCallTo(CLI); } @@ -224,7 +225,7 @@ unsigned TargetLowering::getJumpTableEncoding() const { return MachineJumpTableInfo::EK_BlockAddress; // In PIC mode, if the target supports a GPRel32 directive, use it. - if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != 0) + if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr) return MachineJumpTableInfo::EK_GPRel32BlockAddress; // Otherwise, use a label difference. @@ -326,6 +327,10 @@ TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op, assert(Op.getNode()->getNumValues() == 1 && "ShrinkDemandedOp only supports nodes with one result!"); + // Early return, as this function cannot handle vector types. + if (Op.getValueType().isVector()) + return false; + // Don't do this if the node has another user, which may require the // full value. if (!Op.getNode()->hasOneUse()) @@ -384,7 +389,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (Depth != 0) { // If not at the root, Just compute the KnownZero/KnownOne bits to // simplify things downstream. - TLO.DAG.ComputeMaskedBits(Op, KnownZero, KnownOne, Depth); + TLO.DAG.computeKnownBits(Op, KnownZero, KnownOne, Depth); return false; } // If this is the root being simplified, allow it to have multiple uses, @@ -414,7 +419,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { APInt LHSZero, LHSOne; // Do not increment Depth here; that can cause an infinite loop. - TLO.DAG.ComputeMaskedBits(Op.getOperand(0), LHSZero, LHSOne, Depth); + TLO.DAG.computeKnownBits(Op.getOperand(0), LHSZero, LHSOne, Depth); // If the LHS already has zeros where RHSC does, this and is dead. if ((LHSZero & NewMask) == (~RHSC->getAPIntValue() & NewMask)) return TLO.CombineTo(Op, Op.getOperand(0)); @@ -846,6 +851,31 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, } break; } + case ISD::BUILD_PAIR: { + EVT HalfVT = Op.getOperand(0).getValueType(); + unsigned HalfBitWidth = HalfVT.getScalarSizeInBits(); + + APInt MaskLo = NewMask.getLoBits(HalfBitWidth).trunc(HalfBitWidth); + APInt MaskHi = NewMask.getHiBits(HalfBitWidth).trunc(HalfBitWidth); + + APInt KnownZeroLo, KnownOneLo; + APInt KnownZeroHi, KnownOneHi; + + if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownZeroLo, + KnownOneLo, TLO, Depth + 1)) + return true; + + if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownZeroHi, + KnownOneHi, TLO, Depth + 1)) + return true; + + KnownZero = KnownZeroLo.zext(BitWidth) | + KnownZeroHi.zext(BitWidth).shl(HalfBitWidth); + + KnownOne = KnownOneLo.zext(BitWidth) | + KnownOneHi.zext(BitWidth).shl(HalfBitWidth); + break; + } case ISD::ZERO_EXTEND: { unsigned OperandBitWidth = Op.getOperand(0).getValueType().getScalarType().getSizeInBits(); @@ -1038,8 +1068,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, } // FALL THROUGH default: - // Just use ComputeMaskedBits to compute output bits. - TLO.DAG.ComputeMaskedBits(Op, KnownZero, KnownOne, Depth); + // Just use computeKnownBits to compute output bits. + TLO.DAG.computeKnownBits(Op, KnownZero, KnownOne, Depth); break; } @@ -1051,14 +1081,14 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, return false; } -/// computeMaskedBitsForTargetNode - Determine which of the bits specified +/// computeKnownBitsForTargetNode - Determine which of the bits specified /// in Mask are known to be either zero or one and return them in the /// KnownZero/KnownOne bitsets. -void TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, - APInt &KnownZero, - APInt &KnownOne, - const SelectionDAG &DAG, - unsigned Depth) const { +void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op, + APInt &KnownZero, + APInt &KnownOne, + const SelectionDAG &DAG, + unsigned Depth) const { assert((Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || @@ -1072,6 +1102,7 @@ void TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, /// targets that want to expose additional information about sign bits to the /// DAG Combiner. unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op, + const SelectionDAG &, unsigned Depth) const { assert((Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || @@ -1083,7 +1114,7 @@ unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op, } /// ValueHasExactlyOneBitSet - Test if the given value is known to have exactly -/// one bit set. This differs from ComputeMaskedBits in that it doesn't need to +/// one bit set. This differs from computeKnownBits in that it doesn't need to /// determine which bit is set. /// static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) { @@ -1106,15 +1137,69 @@ static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) { // More could be done here, though the above checks are enough // to handle some common cases. - // Fall back to ComputeMaskedBits to catch other known cases. + // Fall back to computeKnownBits to catch other known cases. EVT OpVT = Val.getValueType(); unsigned BitWidth = OpVT.getScalarType().getSizeInBits(); APInt KnownZero, KnownOne; - DAG.ComputeMaskedBits(Val, KnownZero, KnownOne); + DAG.computeKnownBits(Val, KnownZero, KnownOne); return (KnownZero.countPopulation() == BitWidth - 1) && (KnownOne.countPopulation() == 1); } +bool TargetLowering::isConstTrueVal(const SDNode *N) const { + if (!N) + return false; + + const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N); + if (!CN) { + const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N); + if (!BV) + return false; + + BitVector UndefElements; + CN = BV->getConstantSplatNode(&UndefElements); + // Only interested in constant splats, and we don't try to handle undef + // elements in identifying boolean constants. + if (!CN || UndefElements.none()) + return false; + } + + switch (getBooleanContents(N->getValueType(0))) { + case UndefinedBooleanContent: + return CN->getAPIntValue()[0]; + case ZeroOrOneBooleanContent: + return CN->isOne(); + case ZeroOrNegativeOneBooleanContent: + return CN->isAllOnesValue(); + } + + llvm_unreachable("Invalid boolean contents"); +} + +bool TargetLowering::isConstFalseVal(const SDNode *N) const { + if (!N) + return false; + + const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N); + if (!CN) { + const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N); + if (!BV) + return false; + + BitVector UndefElements; + CN = BV->getConstantSplatNode(&UndefElements); + // Only interested in constant splats, and we don't try to handle undef + // elements in identifying boolean constants. + if (!CN || UndefElements.none()) + return false; + } + + if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent) + return !CN->getAPIntValue()[0]; + + return CN->isNullValue(); +} + /// SimplifySetCC - Try to simplify a setcc built with the specified operands /// and cc. If it is unable to simplify it, return a null SDValue. SDValue @@ -1130,7 +1215,8 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, case ISD::SETFALSE2: return DAG.getConstant(0, VT); case ISD::SETTRUE: case ISD::SETTRUE2: { - TargetLowering::BooleanContent Cnt = getBooleanContents(VT.isVector()); + TargetLowering::BooleanContent Cnt = + getBooleanContents(N0->getValueType(0)); return DAG.getConstant( Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, VT); } @@ -1331,10 +1417,14 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, EVT newVT = N0.getOperand(0).getValueType(); if (DCI.isBeforeLegalizeOps() || (isOperationLegal(ISD::SETCC, newVT) && - getCondCodeAction(Cond, newVT.getSimpleVT())==Legal)) - return DAG.getSetCC(dl, VT, N0.getOperand(0), - DAG.getConstant(C1.trunc(InSize), newVT), - Cond); + getCondCodeAction(Cond, newVT.getSimpleVT()) == Legal)) { + EVT NewSetCCVT = getSetCCResultType(*DAG.getContext(), newVT); + SDValue NewConst = DAG.getConstant(C1.trunc(InSize), newVT); + + SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0), + NewConst, Cond); + return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType()); + } break; } default: @@ -1417,7 +1507,8 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } } else if (N1C->getAPIntValue() == 1 && (VT == MVT::i1 || - getBooleanContents(false) == ZeroOrOneBooleanContent)) { + getBooleanContents(N0->getValueType(0)) == + ZeroOrOneBooleanContent)) { SDValue Op0 = N0; if (Op0.getOpcode() == ISD::TRUNCATE) Op0 = Op0.getOperand(0); @@ -1468,18 +1559,32 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // Canonicalize GE/LE comparisons to use GT/LT comparisons. if (Cond == ISD::SETGE || Cond == ISD::SETUGE) { if (C1 == MinVal) return DAG.getConstant(1, VT); // X >= MIN --> true - // X >= C0 --> X > (C0-1) - return DAG.getSetCC(dl, VT, N0, - DAG.getConstant(C1-1, N1.getValueType()), - (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT); + // X >= C0 --> X > (C0 - 1) + APInt C = C1 - 1; + ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT; + if ((DCI.isBeforeLegalizeOps() || + isCondCodeLegal(NewCC, VT.getSimpleVT())) && + (!N1C->isOpaque() || (N1C->isOpaque() && C.getBitWidth() <= 64 && + isLegalICmpImmediate(C.getSExtValue())))) { + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(C, N1.getValueType()), + NewCC); + } } if (Cond == ISD::SETLE || Cond == ISD::SETULE) { if (C1 == MaxVal) return DAG.getConstant(1, VT); // X <= MAX --> true - // X <= C0 --> X < (C0+1) - return DAG.getSetCC(dl, VT, N0, - DAG.getConstant(C1+1, N1.getValueType()), - (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT); + // X <= C0 --> X < (C0 + 1) + APInt C = C1 + 1; + ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT; + if ((DCI.isBeforeLegalizeOps() || + isCondCodeLegal(NewCC, VT.getSimpleVT())) && + (!N1C->isOpaque() || (N1C->isOpaque() && C.getBitWidth() <= 64 && + isLegalICmpImmediate(C.getSExtValue())))) { + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(C, N1.getValueType()), + NewCC); + } } if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal) @@ -1535,7 +1640,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, N0.getOpcode() == ISD::AND) if (ConstantSDNode *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { - EVT ShiftTy = DCI.isBeforeLegalizeOps() ? + EVT ShiftTy = DCI.isBeforeLegalize() ? getPointerTy() : getShiftAmountTy(N0.getValueType()); if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3 // Perform the xform if the AND RHS is a single bit. @@ -1565,7 +1670,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, const APInt &AndRHSC = AndRHS->getAPIntValue(); if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) { unsigned ShiftBits = AndRHSC.countTrailingZeros(); - EVT ShiftTy = DCI.isBeforeLegalizeOps() ? + EVT ShiftTy = DCI.isBeforeLegalize() ? getPointerTy() : getShiftAmountTy(N0.getValueType()); EVT CmpTy = N0.getValueType(); SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0.getOperand(0), @@ -1593,7 +1698,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } NewC = NewC.lshr(ShiftBits); if (ShiftBits && isLegalICmpImmediate(NewC.getSExtValue())) { - EVT ShiftTy = DCI.isBeforeLegalizeOps() ? + EVT ShiftTy = DCI.isBeforeLegalize() ? getPointerTy() : getShiftAmountTy(N0.getValueType()); EVT CmpTy = N0.getValueType(); SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0, @@ -1674,7 +1779,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // The sext(setcc()) => setcc() optimization relies on the appropriate // constant being emitted. uint64_t EqVal = 0; - switch (getBooleanContents(N0.getValueType().isVector())) { + switch (getBooleanContents(N0.getValueType())) { case UndefinedBooleanContent: case ZeroOrOneBooleanContent: EqVal = ISD::isTrueWhenEqual(Cond); @@ -1988,7 +2093,7 @@ const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const{ return "r"; if (ConstraintVT.isFloatingPoint()) return "f"; // works for many targets - return 0; + return nullptr; } /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops @@ -2022,12 +2127,12 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, if (Op.getOpcode() == ISD::ADD) { C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0)); - if (C == 0 || GA == 0) { + if (!C || !GA) { C = dyn_cast<ConstantSDNode>(Op.getOperand(0)); GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(1)); } - if (C == 0 || GA == 0) - C = 0, GA = 0; + if (!C || !GA) + C = nullptr, GA = nullptr; } // If we find a valid operand, map to the TargetXXX version so that the @@ -2062,14 +2167,14 @@ std::pair<unsigned, const TargetRegisterClass*> TargetLowering:: getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const { if (Constraint.empty() || Constraint[0] != '{') - return std::make_pair(0u, static_cast<TargetRegisterClass*>(0)); + return std::make_pair(0u, static_cast<TargetRegisterClass*>(nullptr)); assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?"); // Remove the braces from around the name. StringRef RegName(Constraint.data()+1, Constraint.size()-2); std::pair<unsigned, const TargetRegisterClass*> R = - std::make_pair(0u, static_cast<const TargetRegisterClass*>(0)); + std::make_pair(0u, static_cast<const TargetRegisterClass*>(nullptr)); // Figure out which register class contains this reg. const TargetRegisterInfo *RI = getTargetMachine().getRegisterInfo(); @@ -2364,7 +2469,7 @@ TargetLowering::ConstraintWeight Value *CallOperandVal = info.CallOperandVal; // If we don't have a value, we can't do a match, // but allow it at the lowest weight. - if (CallOperandVal == NULL) + if (!CallOperandVal) return CW_Default; // Look at the constraint type. switch (*constraint) { @@ -2520,7 +2625,8 @@ SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, SDLoc dl, if (ShAmt) { // TODO: For UDIV use SRL instead of SRA. SDValue Amt = DAG.getConstant(ShAmt, getShiftAmountTy(Op1.getValueType())); - Op1 = DAG.getNode(ISD::SRA, dl, Op1.getValueType(), Op1, Amt); + Op1 = DAG.getNode(ISD::SRA, dl, Op1.getValueType(), Op1, Amt, false, false, + true); d = d.ashr(ShAmt); } @@ -2537,9 +2643,9 @@ SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, SDLoc dl, /// return a DAG expression to select that will generate the same value by /// multiplying by a magic number. See: /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> -SDValue TargetLowering:: -BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, - std::vector<SDNode*> *Created) const { +SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor, + SelectionDAG &DAG, bool IsAfterLegalization, + std::vector<SDNode *> *Created) const { EVT VT = N->getValueType(0); SDLoc dl(N); @@ -2548,8 +2654,7 @@ BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, if (!isTypeLegal(VT)) return SDValue(); - APInt d = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue(); - APInt::ms magics = d.magic(); + APInt::ms magics = Divisor.magic(); // Multiply the numerator (operand 0) by the magic value // FIXME: We should support doing a MUL in a wider type @@ -2566,13 +2671,13 @@ BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, else return SDValue(); // No mulhs or equvialent // If d > 0 and m < 0, add the numerator - if (d.isStrictlyPositive() && magics.m.isNegative()) { + if (Divisor.isStrictlyPositive() && magics.m.isNegative()) { Q = DAG.getNode(ISD::ADD, dl, VT, Q, N->getOperand(0)); if (Created) Created->push_back(Q.getNode()); } // If d < 0 and m > 0, subtract the numerator. - if (d.isNegative() && magics.m.isStrictlyPositive()) { + if (Divisor.isNegative() && magics.m.isStrictlyPositive()) { Q = DAG.getNode(ISD::SUB, dl, VT, Q, N->getOperand(0)); if (Created) Created->push_back(Q.getNode()); @@ -2585,9 +2690,9 @@ BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, Created->push_back(Q.getNode()); } // Extract the sign bit and add it to the quotient - SDValue T = - DAG.getNode(ISD::SRL, dl, VT, Q, DAG.getConstant(VT.getSizeInBits()-1, - getShiftAmountTy(Q.getValueType()))); + SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, + DAG.getConstant(VT.getScalarSizeInBits() - 1, + getShiftAmountTy(Q.getValueType()))); if (Created) Created->push_back(T.getNode()); return DAG.getNode(ISD::ADD, dl, VT, Q, T); @@ -2597,9 +2702,9 @@ BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, /// return a DAG expression to select that will generate the same value by /// multiplying by a magic number. See: /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> -SDValue TargetLowering:: -BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, - std::vector<SDNode*> *Created) const { +SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor, + SelectionDAG &DAG, bool IsAfterLegalization, + std::vector<SDNode *> *Created) const { EVT VT = N->getValueType(0); SDLoc dl(N); @@ -2610,22 +2715,21 @@ BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, // FIXME: We should use a narrower constant when the upper // bits are known to be zero. - const APInt &N1C = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue(); - APInt::mu magics = N1C.magicu(); + APInt::mu magics = Divisor.magicu(); SDValue Q = N->getOperand(0); // If the divisor is even, we can avoid using the expensive fixup by shifting // the divided value upfront. - if (magics.a != 0 && !N1C[0]) { - unsigned Shift = N1C.countTrailingZeros(); + if (magics.a != 0 && !Divisor[0]) { + unsigned Shift = Divisor.countTrailingZeros(); Q = DAG.getNode(ISD::SRL, dl, VT, Q, DAG.getConstant(Shift, getShiftAmountTy(Q.getValueType()))); if (Created) Created->push_back(Q.getNode()); // Get magic number for the shifted divisor. - magics = N1C.lshr(Shift).magicu(Shift); + magics = Divisor.lshr(Shift).magicu(Shift); assert(magics.a == 0 && "Should use cheap fixup now"); } @@ -2644,7 +2748,7 @@ BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, Created->push_back(Q.getNode()); if (magics.a == 0) { - assert(magics.s < N1C.getBitWidth() && + assert(magics.s < Divisor.getBitWidth() && "We shouldn't generate an undefined shift!"); return DAG.getNode(ISD::SRL, dl, VT, Q, DAG.getConstant(magics.s, getShiftAmountTy(Q.getValueType()))); @@ -2663,3 +2767,183 @@ BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, DAG.getConstant(magics.s-1, getShiftAmountTy(NPQ.getValueType()))); } } + +bool TargetLowering:: +verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const { + if (!isa<ConstantSDNode>(Op.getOperand(0))) { + DAG.getContext()->emitError("argument to '__builtin_return_address' must " + "be a constant integer"); + return true; + } + + return false; +} + +//===----------------------------------------------------------------------===// +// Legalization Utilities +//===----------------------------------------------------------------------===// + +bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, + SelectionDAG &DAG, SDValue LL, SDValue LH, + SDValue RL, SDValue RH) const { + EVT VT = N->getValueType(0); + SDLoc dl(N); + + bool HasMULHS = isOperationLegalOrCustom(ISD::MULHS, HiLoVT); + bool HasMULHU = isOperationLegalOrCustom(ISD::MULHU, HiLoVT); + bool HasSMUL_LOHI = isOperationLegalOrCustom(ISD::SMUL_LOHI, HiLoVT); + bool HasUMUL_LOHI = isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT); + if (HasMULHU || HasMULHS || HasUMUL_LOHI || HasSMUL_LOHI) { + unsigned OuterBitSize = VT.getSizeInBits(); + unsigned InnerBitSize = HiLoVT.getSizeInBits(); + unsigned LHSSB = DAG.ComputeNumSignBits(N->getOperand(0)); + unsigned RHSSB = DAG.ComputeNumSignBits(N->getOperand(1)); + + // LL, LH, RL, and RH must be either all NULL or all set to a value. + assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) || + (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode())); + + if (!LL.getNode() && !RL.getNode() && + isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) { + LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, N->getOperand(0)); + RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, N->getOperand(1)); + } + + if (!LL.getNode()) + return false; + + APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize); + if (DAG.MaskedValueIsZero(N->getOperand(0), HighMask) && + DAG.MaskedValueIsZero(N->getOperand(1), HighMask)) { + // The inputs are both zero-extended. + if (HasUMUL_LOHI) { + // We can emit a umul_lohi. + Lo = DAG.getNode(ISD::UMUL_LOHI, dl, + DAG.getVTList(HiLoVT, HiLoVT), LL, RL); + Hi = SDValue(Lo.getNode(), 1); + return true; + } + if (HasMULHU) { + // We can emit a mulhu+mul. + Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RL); + Hi = DAG.getNode(ISD::MULHU, dl, HiLoVT, LL, RL); + return true; + } + } + if (LHSSB > InnerBitSize && RHSSB > InnerBitSize) { + // The input values are both sign-extended. + if (HasSMUL_LOHI) { + // We can emit a smul_lohi. + Lo = DAG.getNode(ISD::SMUL_LOHI, dl, + DAG.getVTList(HiLoVT, HiLoVT), LL, RL); + Hi = SDValue(Lo.getNode(), 1); + return true; + } + if (HasMULHS) { + // We can emit a mulhs+mul. + Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RL); + Hi = DAG.getNode(ISD::MULHS, dl, HiLoVT, LL, RL); + return true; + } + } + + if (!LH.getNode() && !RH.getNode() && + isOperationLegalOrCustom(ISD::SRL, VT) && + isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) { + unsigned ShiftAmt = VT.getSizeInBits() - HiLoVT.getSizeInBits(); + SDValue Shift = DAG.getConstant(ShiftAmt, getShiftAmountTy(VT)); + LH = DAG.getNode(ISD::SRL, dl, VT, N->getOperand(0), Shift); + LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH); + RH = DAG.getNode(ISD::SRL, dl, VT, N->getOperand(1), Shift); + RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH); + } + + if (!LH.getNode()) + return false; + + if (HasUMUL_LOHI) { + // Lo,Hi = umul LHS, RHS. + SDValue UMulLOHI = DAG.getNode(ISD::UMUL_LOHI, dl, + DAG.getVTList(HiLoVT, HiLoVT), LL, RL); + Lo = UMulLOHI; + Hi = UMulLOHI.getValue(1); + RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH); + LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL); + Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH); + Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH); + return true; + } + if (HasMULHU) { + Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RL); + Hi = DAG.getNode(ISD::MULHU, dl, HiLoVT, LL, RL); + RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH); + LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL); + Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH); + Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH); + return true; + } + } + return false; +} + +bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result, + SelectionDAG &DAG) const { + EVT VT = Node->getOperand(0).getValueType(); + EVT NVT = Node->getValueType(0); + SDLoc dl(SDValue(Node, 0)); + + // FIXME: Only f32 to i64 conversions are supported. + if (VT != MVT::f32 || NVT != MVT::i64) + return false; + + // Expand f32 -> i64 conversion + // This algorithm comes from compiler-rt's implementation of fixsfdi: + // https://github.com/llvm-mirror/compiler-rt/blob/master/lib/builtins/fixsfdi.c + EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), + VT.getSizeInBits()); + SDValue ExponentMask = DAG.getConstant(0x7F800000, IntVT); + SDValue ExponentLoBit = DAG.getConstant(23, IntVT); + SDValue Bias = DAG.getConstant(127, IntVT); + SDValue SignMask = DAG.getConstant(APInt::getSignBit(VT.getSizeInBits()), + IntVT); + SDValue SignLowBit = DAG.getConstant(VT.getSizeInBits() - 1, IntVT); + SDValue MantissaMask = DAG.getConstant(0x007FFFFF, IntVT); + + SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Node->getOperand(0)); + + SDValue ExponentBits = DAG.getNode(ISD::SRL, dl, IntVT, + DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask), + DAG.getZExtOrTrunc(ExponentLoBit, dl, getShiftAmountTy(IntVT))); + SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias); + + SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT, + DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask), + DAG.getZExtOrTrunc(SignLowBit, dl, getShiftAmountTy(IntVT))); + Sign = DAG.getSExtOrTrunc(Sign, dl, NVT); + + SDValue R = DAG.getNode(ISD::OR, dl, IntVT, + DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask), + DAG.getConstant(0x00800000, IntVT)); + + R = DAG.getZExtOrTrunc(R, dl, NVT); + + + R = DAG.getSelectCC(dl, Exponent, ExponentLoBit, + DAG.getNode(ISD::SHL, dl, NVT, R, + DAG.getZExtOrTrunc( + DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit), + dl, getShiftAmountTy(IntVT))), + DAG.getNode(ISD::SRL, dl, NVT, R, + DAG.getZExtOrTrunc( + DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent), + dl, getShiftAmountTy(IntVT))), + ISD::SETGT); + + SDValue Ret = DAG.getNode(ISD::SUB, dl, NVT, + DAG.getNode(ISD::XOR, dl, NVT, R, Sign), + Sign); + + Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, IntVT), + DAG.getConstant(0, NVT), Ret, ISD::SETLT); + return true; +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp index f769b44..0e89bad 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp @@ -15,8 +15,8 @@ #include "llvm/Target/TargetMachine.h" using namespace llvm; -TargetSelectionDAGInfo::TargetSelectionDAGInfo(const TargetMachine &TM) - : TD(TM.getDataLayout()) { +TargetSelectionDAGInfo::TargetSelectionDAGInfo(const DataLayout *DL) + : DL(DL) { } TargetSelectionDAGInfo::~TargetSelectionDAGInfo() { diff --git a/contrib/llvm/lib/CodeGen/ShadowStackGC.cpp b/contrib/llvm/lib/CodeGen/ShadowStackGC.cpp index 10f64c7..f7c64da 100644 --- a/contrib/llvm/lib/CodeGen/ShadowStackGC.cpp +++ b/contrib/llvm/lib/CodeGen/ShadowStackGC.cpp @@ -25,17 +25,18 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "shadowstackgc" #include "llvm/CodeGen/GCs.h" #include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/GCStrategy.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" -#include "llvm/Support/CallSite.h" using namespace llvm; +#define DEBUG_TYPE "shadowstackgc" + namespace { class ShadowStackGC : public GCStrategy { @@ -55,8 +56,8 @@ namespace { public: ShadowStackGC(); - bool initializeCustomLowering(Module &M); - bool performCustomLowering(Function &F); + bool initializeCustomLowering(Module &M) override; + bool performCustomLowering(Function &F) override; private: bool IsNullValue(Value *V); @@ -101,7 +102,7 @@ namespace { IRBuilder<> *Next() { switch (State) { default: - return 0; + return nullptr; case 0: StateBB = F.begin(); @@ -137,7 +138,7 @@ namespace { Calls.push_back(CI); if (Calls.empty()) - return 0; + return nullptr; // Create a cleanup block. LLVMContext &C = F.getContext(); @@ -194,7 +195,7 @@ namespace { void llvm::linkShadowStackGC() { } -ShadowStackGC::ShadowStackGC() : Head(0), StackEntryTy(0) { +ShadowStackGC::ShadowStackGC() : Head(nullptr), StackEntryTy(nullptr) { InitRoots = true; CustomRoots = true; } @@ -390,8 +391,8 @@ bool ShadowStackGC::performCustomLowering(Function &F) { BasicBlock::iterator IP = F.getEntryBlock().begin(); IRBuilder<> AtEntry(IP->getParent(), IP); - Instruction *StackEntry = AtEntry.CreateAlloca(ConcreteStackEntryTy, 0, - "gc_frame"); + Instruction *StackEntry = AtEntry.CreateAlloca(ConcreteStackEntryTy, nullptr, + "gc_frame"); while (isa<AllocaInst>(IP)) ++IP; AtEntry.SetInsertPoint(IP->getParent(), IP); diff --git a/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp index da2e710..b0950de 100644 --- a/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp +++ b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "sjljehprepare" #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SetVector.h" @@ -38,6 +37,8 @@ #include <set> using namespace llvm; +#define DEBUG_TYPE "sjljehprepare" + STATISTIC(NumInvokes, "Number of invokes replaced"); STATISTIC(NumSpilled, "Number of registers live across unwind edges"); @@ -60,11 +61,11 @@ class SjLjEHPrepare : public FunctionPass { public: static char ID; // Pass identification, replacement for typeid explicit SjLjEHPrepare(const TargetMachine *TM) : FunctionPass(ID), TM(TM) {} - bool doInitialization(Module &M); - bool runOnFunction(Function &F); + bool doInitialization(Module &M) override; + bool runOnFunction(Function &F) override; - virtual void getAnalysisUsage(AnalysisUsage &AU) const {} - const char *getPassName() const { + void getAnalysisUsage(AnalysisUsage &AU) const override {} + const char *getPassName() const override { return "SJLJ Exception Handling preparation"; } @@ -100,10 +101,10 @@ bool SjLjEHPrepare::doInitialization(Module &M) { NULL); RegisterFn = M.getOrInsertFunction( "_Unwind_SjLj_Register", Type::getVoidTy(M.getContext()), - PointerType::getUnqual(FunctionContextTy), (Type *)0); + PointerType::getUnqual(FunctionContextTy), (Type *)nullptr); UnregisterFn = M.getOrInsertFunction( "_Unwind_SjLj_Unregister", Type::getVoidTy(M.getContext()), - PointerType::getUnqual(FunctionContextTy), (Type *)0); + PointerType::getUnqual(FunctionContextTy), (Type *)nullptr); FrameAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::frameaddress); StackAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::stacksave); StackRestoreFn = Intrinsic::getDeclaration(&M, Intrinsic::stackrestore); @@ -111,7 +112,7 @@ bool SjLjEHPrepare::doInitialization(Module &M) { LSDAAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_lsda); CallSiteFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_callsite); FuncCtxFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_functioncontext); - PersonalityFn = 0; + PersonalityFn = nullptr; return true; } @@ -149,7 +150,7 @@ static void MarkBlocksLiveIn(BasicBlock *BB, /// instruction with those returned by the personality function. void SjLjEHPrepare::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal, Value *SelVal) { - SmallVector<Value *, 8> UseWorkList(LPI->use_begin(), LPI->use_end()); + SmallVector<Value *, 8> UseWorkList(LPI->user_begin(), LPI->user_end()); while (!UseWorkList.empty()) { Value *Val = UseWorkList.pop_back_val(); ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Val); @@ -173,7 +174,7 @@ void SjLjEHPrepare::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal, Type *LPadType = LPI->getType(); Value *LPadVal = UndefValue::get(LPadType); IRBuilder<> Builder( - llvm::next(BasicBlock::iterator(cast<Instruction>(SelVal)))); + std::next(BasicBlock::iterator(cast<Instruction>(SelVal)))); LPadVal = Builder.CreateInsertValue(LPadVal, ExnVal, 0, "lpad.val"); LPadVal = Builder.CreateInsertValue(LPadVal, SelVal, 1, "lpad.val"); @@ -192,7 +193,7 @@ Value *SjLjEHPrepare::setupFunctionContext(Function &F, const TargetLowering *TLI = TM->getTargetLowering(); unsigned Align = TLI->getDataLayout()->getPrefTypeAlignment(FunctionContextTy); - FuncCtx = new AllocaInst(FunctionContextTy, 0, Align, "fn_context", + FuncCtx = new AllocaInst(FunctionContextTy, nullptr, Align, "fn_context", EntryBB->begin()); // Fill in the function context structure. @@ -248,34 +249,16 @@ void SjLjEHPrepare::lowerIncomingArguments(Function &F) { ++AI) { Type *Ty = AI->getType(); - // Aggregate types can't be cast, but are legal argument types, so we have - // to handle them differently. We use an extract/insert pair as a - // lightweight method to achieve the same goal. - if (isa<StructType>(Ty) || isa<ArrayType>(Ty) || isa<VectorType>(Ty)) { - Instruction *EI = ExtractValueInst::Create(AI, 0, "", AfterAllocaInsPt); - Instruction *NI = InsertValueInst::Create(AI, EI, 0); - NI->insertAfter(EI); - AI->replaceAllUsesWith(NI); - - // Set the operand of the instructions back to the AllocaInst. - EI->setOperand(0, AI); - NI->setOperand(0, AI); - } else { - // This is always a no-op cast because we're casting AI to AI->getType() - // so src and destination types are identical. BitCast is the only - // possibility. - CastInst *NC = new BitCastInst(AI, AI->getType(), AI->getName() + ".tmp", - AfterAllocaInsPt); - AI->replaceAllUsesWith(NC); - - // Set the operand of the cast instruction back to the AllocaInst. - // Normally it's forbidden to replace a CastInst's operand because it - // could cause the opcode to reflect an illegal conversion. However, we're - // replacing it here with the same value it was constructed with. We do - // this because the above replaceAllUsesWith() clobbered the operand, but - // we want this one to remain. - NC->setOperand(0, AI); - } + // Use 'select i8 true, %arg, undef' to simulate a 'no-op' instruction. + Value *TrueValue = ConstantInt::getTrue(F.getContext()); + Value *UndefValue = UndefValue::get(Ty); + Instruction *SI = SelectInst::Create(TrueValue, AI, UndefValue, + AI->getName() + ".tmp", + AfterAllocaInsPt); + AI->replaceAllUsesWith(SI); + + // Reset the operand, because it was clobbered by the RAUW above. + SI->setOperand(1, AI); } } @@ -294,8 +277,8 @@ void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F, if (Inst->use_empty()) continue; if (Inst->hasOneUse() && - cast<Instruction>(Inst->use_back())->getParent() == BB && - !isa<PHINode>(Inst->use_back())) + cast<Instruction>(Inst->user_back())->getParent() == BB && + !isa<PHINode>(Inst->user_back())) continue; // If this is an alloca in the entry block, it's not a real register @@ -306,11 +289,10 @@ void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F, // Avoid iterator invalidation by copying users to a temporary vector. SmallVector<Instruction *, 16> Users; - for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end(); - UI != E; ++UI) { - Instruction *User = cast<Instruction>(*UI); - if (User->getParent() != BB || isa<PHINode>(User)) - Users.push_back(User); + for (User *U : Inst->users()) { + Instruction *UI = cast<Instruction>(U); + if (UI->getParent() != BB || isa<PHINode>(UI)) + Users.push_back(UI); } // Find all of the blocks that this value is live in. diff --git a/contrib/llvm/lib/CodeGen/SlotIndexes.cpp b/contrib/llvm/lib/CodeGen/SlotIndexes.cpp index 20049a8..d46621d 100644 --- a/contrib/llvm/lib/CodeGen/SlotIndexes.cpp +++ b/contrib/llvm/lib/CodeGen/SlotIndexes.cpp @@ -7,8 +7,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "slotindexes" - #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunction.h" @@ -18,6 +16,8 @@ using namespace llvm; +#define DEBUG_TYPE "slotindexes" + char SlotIndexes::ID = 0; INITIALIZE_PASS(SlotIndexes, "slotindexes", "Slot index numbering", false, false) @@ -66,7 +66,7 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { MBBRanges.resize(mf->getNumBlockIDs()); idx2MBBMap.reserve(mf->size()); - indexList.push_back(createEntry(0, index)); + indexList.push_back(createEntry(nullptr, index)); // Iterate over the function. for (MachineFunction::iterator mbbItr = mf->begin(), mbbEnd = mf->end(); @@ -91,7 +91,7 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { } // We insert one blank instructions between basic blocks. - indexList.push_back(createEntry(0, index += SlotIndex::InstrDist)); + indexList.push_back(createEntry(nullptr, index += SlotIndex::InstrDist)); MBBRanges[mbb->getNumber()].first = blockStartIndex; MBBRanges[mbb->getNumber()].second = SlotIndex(&indexList.back(), @@ -129,7 +129,7 @@ void SlotIndexes::renumberIndexes(IndexList::iterator curItr) { const unsigned Space = SlotIndex::InstrDist/2; assert((Space & 3) == 0 && "InstrDist must be a multiple of 2*NUM"); - IndexList::iterator startItr = prior(curItr); + IndexList::iterator startItr = std::prev(curItr); unsigned index = startItr->getIndex(); do { curItr->setIndex(index += Space); @@ -182,7 +182,7 @@ void SlotIndexes::repairIndexesInRange(MachineBasicBlock *MBB, "Decremented past the beginning of region to repair."); MachineInstr *SlotMI = ListI->getInstr(); - MachineInstr *MI = (MBBI != MBB->end() && !pastStart) ? MBBI : 0; + MachineInstr *MI = (MBBI != MBB->end() && !pastStart) ? MBBI : nullptr; bool MBBIAtBegin = MBBI == Begin && (!includeStart || pastStart); if (SlotMI == MI && !MBBIAtBegin) { @@ -219,7 +219,7 @@ void SlotIndexes::dump() const { itr != indexList.end(); ++itr) { dbgs() << itr->getIndex() << " "; - if (itr->getInstr() != 0) { + if (itr->getInstr()) { dbgs() << *itr->getInstr(); } else { dbgs() << "\n"; diff --git a/contrib/llvm/lib/CodeGen/SpillPlacement.cpp b/contrib/llvm/lib/CodeGen/SpillPlacement.cpp index 10a93b7..24e94d1 100644 --- a/contrib/llvm/lib/CodeGen/SpillPlacement.cpp +++ b/contrib/llvm/lib/CodeGen/SpillPlacement.cpp @@ -27,7 +27,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "spillplacement" #include "SpillPlacement.h" #include "llvm/ADT/BitVector.h" #include "llvm/CodeGen/EdgeBundles.h" @@ -41,6 +40,8 @@ using namespace llvm; +#define DEBUG_TYPE "spillplacement" + char SpillPlacement::ID = 0; INITIALIZE_PASS_BEGIN(SpillPlacement, "spill-code-placement", "Spill Code Placement Analysis", true, true) @@ -59,9 +60,26 @@ void SpillPlacement::getAnalysisUsage(AnalysisUsage &AU) const { MachineFunctionPass::getAnalysisUsage(AU); } +namespace { +static BlockFrequency Threshold; +} + /// Decision threshold. A node gets the output value 0 if the weighted sum of /// its inputs falls in the open interval (-Threshold;Threshold). -static const BlockFrequency Threshold = 2; +static BlockFrequency getThreshold() { return Threshold; } + +/// \brief Set the threshold for a given entry frequency. +/// +/// Set the threshold relative to \c Entry. Since the threshold is used as a +/// bound on the open interval (-Threshold;Threshold), 1 is the minimum +/// threshold. +static void setThreshold(const BlockFrequency &Entry) { + // Apparently 2 is a good threshold when Entry==2^14, but we need to scale + // it. Divide by 2^13, rounding as appropriate. + uint64_t Freq = Entry.getFrequency(); + uint64_t Scaled = (Freq >> 13) + bool(Freq & (1 << 12)); + Threshold = std::max(UINT64_C(1), Scaled); +} /// Node - Each edge bundle corresponds to a Hopfield node. /// @@ -110,7 +128,7 @@ struct SpillPlacement::Node { // the CFG. void clear() { BiasN = BiasP = Value = 0; - SumLinkWeights = Threshold; + SumLinkWeights = getThreshold(); Links.clear(); } @@ -168,9 +186,9 @@ struct SpillPlacement::Node { // 2. It helps tame rounding errors when the links nominally sum to 0. // bool Before = preferReg(); - if (SumN >= SumP + Threshold) + if (SumN >= SumP + getThreshold()) Value = -1; - else if (SumP >= SumN + Threshold) + else if (SumP >= SumN + getThreshold()) Value = 1; else Value = 0; @@ -188,10 +206,11 @@ bool SpillPlacement::runOnMachineFunction(MachineFunction &mf) { // Compute total ingoing and outgoing block frequencies for all bundles. BlockFrequencies.resize(mf.getNumBlockIDs()); - MachineBlockFrequencyInfo &MBFI = getAnalysis<MachineBlockFrequencyInfo>(); + MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); + setThreshold(MBFI->getEntryFreq()); for (MachineFunction::iterator I = mf.begin(), E = mf.end(); I != E; ++I) { unsigned Num = I->getNumber(); - BlockFrequencies[Num] = MBFI.getBlockFreq(I); + BlockFrequencies[Num] = MBFI->getBlockFreq(I); } // We never change the function. @@ -200,7 +219,7 @@ bool SpillPlacement::runOnMachineFunction(MachineFunction &mf) { void SpillPlacement::releaseMemory() { delete[] nodes; - nodes = 0; + nodes = nullptr; } /// activate - mark node n as active if it wasn't already. @@ -221,7 +240,7 @@ void SpillPlacement::activate(unsigned n) { // Hopfield network. if (bundles->getBlocks(n).size() > 100) { nodes[n].BiasP = 0; - nodes[n].BiasN = (BlockFrequency::getEntryFrequency() / 16); + nodes[n].BiasN = (MBFI->getEntryFreq() / 16); } } @@ -323,10 +342,12 @@ void SpillPlacement::iterate() { // affect the entire network in a single iteration. That means very fast // convergence, usually in a single iteration. for (unsigned iteration = 0; iteration != 10; ++iteration) { - // Scan backwards, skipping the last node which was just updated. + // Scan backwards, skipping the last node when iteration is not zero. When + // iteration is not zero, the last node was just updated. bool Changed = false; for (SmallVectorImpl<unsigned>::const_reverse_iterator I = - llvm::next(Linked.rbegin()), E = Linked.rend(); I != E; ++I) { + iteration == 0 ? Linked.rbegin() : std::next(Linked.rbegin()), + E = Linked.rend(); I != E; ++I) { unsigned n = *I; if (nodes[n].update(nodes)) { Changed = true; @@ -340,7 +361,7 @@ void SpillPlacement::iterate() { // Scan forwards, skipping the first node which was just updated. Changed = false; for (SmallVectorImpl<unsigned>::const_iterator I = - llvm::next(Linked.begin()), E = Linked.end(); I != E; ++I) { + std::next(Linked.begin()), E = Linked.end(); I != E; ++I) { unsigned n = *I; if (nodes[n].update(nodes)) { Changed = true; @@ -373,6 +394,6 @@ SpillPlacement::finish() { ActiveNodes->reset(n); Perfect = false; } - ActiveNodes = 0; + ActiveNodes = nullptr; return Perfect; } diff --git a/contrib/llvm/lib/CodeGen/SpillPlacement.h b/contrib/llvm/lib/CodeGen/SpillPlacement.h index 105516b..43fc7f5 100644 --- a/contrib/llvm/lib/CodeGen/SpillPlacement.h +++ b/contrib/llvm/lib/CodeGen/SpillPlacement.h @@ -38,12 +38,14 @@ class BitVector; class EdgeBundles; class MachineBasicBlock; class MachineLoopInfo; +class MachineBlockFrequencyInfo; class SpillPlacement : public MachineFunctionPass { struct Node; const MachineFunction *MF; const EdgeBundles *bundles; const MachineLoopInfo *loops; + const MachineBlockFrequencyInfo *MBFI; Node *nodes; // Nodes that are active in the current computation. Owned by the prepare() @@ -63,7 +65,7 @@ class SpillPlacement : public MachineFunctionPass { public: static char ID; // Pass identification, replacement for typeid. - SpillPlacement() : MachineFunctionPass(ID), nodes(0) {} + SpillPlacement() : MachineFunctionPass(ID), nodes(nullptr) {} ~SpillPlacement() { releaseMemory(); } /// BorderConstraint - A basic block has separate constraints for entry and @@ -145,9 +147,9 @@ public: } private: - virtual bool runOnMachineFunction(MachineFunction&); - virtual void getAnalysisUsage(AnalysisUsage&) const; - virtual void releaseMemory(); + bool runOnMachineFunction(MachineFunction&) override; + void getAnalysisUsage(AnalysisUsage&) const override; + void releaseMemory() override; void activate(unsigned); }; diff --git a/contrib/llvm/lib/CodeGen/Spiller.cpp b/contrib/llvm/lib/CodeGen/Spiller.cpp index d5b3a4a..0649448 100644 --- a/contrib/llvm/lib/CodeGen/Spiller.cpp +++ b/contrib/llvm/lib/CodeGen/Spiller.cpp @@ -7,8 +7,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "spiller" - #include "Spiller.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveRangeEdit.h" @@ -28,6 +26,8 @@ using namespace llvm; +#define DEBUG_TYPE "spiller" + namespace { enum SpillerName { trivial, inline_ }; } @@ -89,8 +89,9 @@ protected: unsigned ss = vrm->assignVirt2StackSlot(li->reg); // Iterate over reg uses/defs. - for (MachineRegisterInfo::reg_iterator - regItr = mri->reg_begin(li->reg); regItr != mri->reg_end();) { + for (MachineRegisterInfo::reg_instr_iterator + regItr = mri->reg_instr_begin(li->reg); + regItr != mri->reg_instr_end();) { // Grab the use/def instr. MachineInstr *mi = &*regItr; @@ -98,9 +99,7 @@ protected: DEBUG(dbgs() << " Processing " << *mi); // Step regItr to the next use/def instr. - do { - ++regItr; - } while (regItr != mri->reg_end() && (&*regItr == mi)); + ++regItr; // Collect uses & defs for this instr. SmallVector<unsigned, 2> indices; @@ -143,9 +142,9 @@ protected: if (hasDef) { MachineInstrSpan MIS(miItr); - tii->storeRegToStackSlot(*mi->getParent(), llvm::next(miItr), NewVReg, + tii->storeRegToStackSlot(*mi->getParent(), std::next(miItr), NewVReg, true, ss, trc, tri); - lis->InsertMachineInstrRangeInMaps(llvm::next(miItr), MIS.end()); + lis->InsertMachineInstrRangeInMaps(std::next(miItr), MIS.end()); } } } @@ -164,7 +163,7 @@ public: VirtRegMap &vrm) : SpillerBase(pass, mf, vrm) {} - void spill(LiveRangeEdit &LRE) { + void spill(LiveRangeEdit &LRE) override { // Ignore spillIs - we don't use it. trivialSpillEverywhere(LRE); } diff --git a/contrib/llvm/lib/CodeGen/SplitKit.cpp b/contrib/llvm/lib/CodeGen/SplitKit.cpp index 68a15f7..7d4f568 100644 --- a/contrib/llvm/lib/CodeGen/SplitKit.cpp +++ b/contrib/llvm/lib/CodeGen/SplitKit.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "regalloc" #include "SplitKit.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" @@ -29,6 +28,8 @@ using namespace llvm; +#define DEBUG_TYPE "regalloc" + STATISTIC(NumFinished, "Number of splits finished"); STATISTIC(NumSimple, "Number of splits that were simple"); STATISTIC(NumCopies, "Number of copies inserted for splitting"); @@ -47,14 +48,14 @@ SplitAnalysis::SplitAnalysis(const VirtRegMap &vrm, LIS(lis), Loops(mli), TII(*MF.getTarget().getInstrInfo()), - CurLI(0), + CurLI(nullptr), LastSplitPoint(MF.getNumBlockIDs()) {} void SplitAnalysis::clear() { UseSlots.clear(); UseBlocks.clear(); ThroughBlocks.clear(); - CurLI = 0; + CurLI = nullptr; DidRepairRange = false; } @@ -131,11 +132,9 @@ void SplitAnalysis::analyzeUses() { // Get use slots form the use-def chain. const MachineRegisterInfo &MRI = MF.getRegInfo(); - for (MachineRegisterInfo::use_nodbg_iterator - I = MRI.use_nodbg_begin(CurLI->reg), E = MRI.use_nodbg_end(); I != E; - ++I) - if (!I.getOperand().isUndef()) - UseSlots.push_back(LIS.getInstructionIndex(&*I).getRegSlot()); + for (MachineOperand &MO : MRI.use_nodbg_operands(CurLI->reg)) + if (!MO.isUndef()) + UseSlots.push_back(LIS.getInstructionIndex(MO.getParent()).getRegSlot()); array_pod_sort(UseSlots.begin(), UseSlots.end()); @@ -188,7 +187,7 @@ bool SplitAnalysis::calcLiveBlockInfo() { BlockInfo BI; BI.MBB = MFI; SlotIndex Start, Stop; - tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB); + std::tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB); // If the block contains no uses, the range must be live through. At one // point, RegisterCoalescer could create dangling ranges that ended @@ -333,7 +332,7 @@ SplitEditor::SplitEditor(SplitAnalysis &sa, TII(*vrm.getMachineFunction().getTarget().getInstrInfo()), TRI(*vrm.getMachineFunction().getTarget().getRegisterInfo()), MBFI(mbfi), - Edit(0), + Edit(nullptr), OpenIdx(0), SpillMode(SM_Partition), RegAssign(Allocator) @@ -355,7 +354,7 @@ void SplitEditor::reset(LiveRangeEdit &LRE, ComplementSpillMode SM) { // We don't need an AliasAnalysis since we will only be performing // cheap-as-a-copy remats anyway. - Edit->anyRematerializable(0); + Edit->anyRematerializable(nullptr); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -425,7 +424,7 @@ void SplitEditor::forceRecompute(unsigned RegIdx, const VNInfo *ParentVNI) { LiveInterval *LI = &LIS.getInterval(Edit->get(RegIdx)); LI->addSegment(LiveInterval::Segment(Def, Def.getDeadSlot(), VNI)); // Mark as complex mapped, forced. - VFP = ValueForcePair(0, true); + VFP = ValueForcePair(nullptr, true); } VNInfo *SplitEditor::defFromParent(unsigned RegIdx, @@ -433,7 +432,7 @@ VNInfo *SplitEditor::defFromParent(unsigned RegIdx, SlotIndex UseIdx, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) { - MachineInstr *CopyMI = 0; + MachineInstr *CopyMI = nullptr; SlotIndex Def; LiveInterval *LI = &LIS.getInterval(Edit->get(RegIdx)); @@ -509,7 +508,7 @@ SlotIndex SplitEditor::enterIntvAfter(SlotIndex Idx) { assert(MI && "enterIntvAfter called with invalid index"); VNInfo *VNI = defFromParent(OpenIdx, ParentVNI, Idx, *MI->getParent(), - llvm::next(MachineBasicBlock::iterator(MI))); + std::next(MachineBasicBlock::iterator(MI))); return VNI->def; } @@ -570,7 +569,7 @@ SlotIndex SplitEditor::leaveIntvAfter(SlotIndex Idx) { } VNInfo *VNI = defFromParent(0, ParentVNI, Boundary, *MI->getParent(), - llvm::next(MachineBasicBlock::iterator(MI))); + std::next(MachineBasicBlock::iterator(MI))); return VNI->def; } @@ -888,7 +887,7 @@ bool SplitEditor::transferValues() { // LiveInBlocks. MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start); SlotIndex BlockStart, BlockEnd; - tie(BlockStart, BlockEnd) = LIS.getSlotIndexes()->getMBBRange(MBB); + std::tie(BlockStart, BlockEnd) = LIS.getSlotIndexes()->getMBBRange(MBB); // The first block may be live-in, or it may have its own def. if (Start != BlockStart) { @@ -924,7 +923,7 @@ bool SplitEditor::transferValues() { else { // Live-through, and we don't know the value. LRC.addLiveInBlock(LR, MDT[MBB]); - LRC.setLiveOutValue(MBB, 0); + LRC.setLiveOutValue(MBB, nullptr); } } BlockStart = BlockEnd; @@ -972,7 +971,7 @@ void SplitEditor::extendPHIKillRanges() { void SplitEditor::rewriteAssigned(bool ExtendRanges) { for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Edit->getReg()), RE = MRI.reg_end(); RI != RE;) { - MachineOperand &MO = RI.getOperand(); + MachineOperand &MO = *RI; MachineInstr *MI = MO.getParent(); ++RI; // LiveDebugVariables should have handled all DBG_VALUE instructions. @@ -1183,7 +1182,7 @@ void SplitEditor::splitLiveThroughBlock(unsigned MBBNum, unsigned IntvIn, SlotIndex LeaveBefore, unsigned IntvOut, SlotIndex EnterAfter){ SlotIndex Start, Stop; - tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(MBBNum); + std::tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(MBBNum); DEBUG(dbgs() << "BB#" << MBBNum << " [" << Start << ';' << Stop << ") intf " << LeaveBefore << '-' << EnterAfter @@ -1286,7 +1285,7 @@ void SplitEditor::splitLiveThroughBlock(unsigned MBBNum, void SplitEditor::splitRegInBlock(const SplitAnalysis::BlockInfo &BI, unsigned IntvIn, SlotIndex LeaveBefore) { SlotIndex Start, Stop; - tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB); + std::tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB); DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " [" << Start << ';' << Stop << "), uses " << BI.FirstInstr << '-' << BI.LastInstr @@ -1378,7 +1377,7 @@ void SplitEditor::splitRegInBlock(const SplitAnalysis::BlockInfo &BI, void SplitEditor::splitRegOutBlock(const SplitAnalysis::BlockInfo &BI, unsigned IntvOut, SlotIndex EnterAfter) { SlotIndex Start, Stop; - tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB); + std::tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB); DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " [" << Start << ';' << Stop << "), uses " << BI.FirstInstr << '-' << BI.LastInstr diff --git a/contrib/llvm/lib/CodeGen/SplitKit.h b/contrib/llvm/lib/CodeGen/SplitKit.h index f029c73..7048ee3 100644 --- a/contrib/llvm/lib/CodeGen/SplitKit.h +++ b/contrib/llvm/lib/CodeGen/SplitKit.h @@ -377,7 +377,7 @@ public: SlotIndex enterIntvAfter(SlotIndex Idx); /// enterIntvAtEnd - Enter the open interval at the end of MBB. - /// Use the open interval from he inserted copy to the MBB end. + /// Use the open interval from the inserted copy to the MBB end. /// Return the beginning of the new live range. SlotIndex enterIntvAtEnd(MachineBasicBlock &MBB); @@ -417,7 +417,7 @@ public: /// @param LRMap When not null, this vector will map each live range in Edit /// back to the indices returned by openIntv. /// There may be extra indices created by dead code elimination. - void finish(SmallVectorImpl<unsigned> *LRMap = 0); + void finish(SmallVectorImpl<unsigned> *LRMap = nullptr); /// dump - print the current interval maping to dbgs(). void dump() const; diff --git a/contrib/llvm/lib/CodeGen/StackColoring.cpp b/contrib/llvm/lib/CodeGen/StackColoring.cpp index 3dbc050..370430c 100644 --- a/contrib/llvm/lib/CodeGen/StackColoring.cpp +++ b/contrib/llvm/lib/CodeGen/StackColoring.cpp @@ -21,7 +21,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "stackcoloring" #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DepthFirstIterator.h" @@ -30,7 +29,6 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SparseSet.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -44,7 +42,9 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SlotIndexes.h" -#include "llvm/DebugInfo.h" +#include "llvm/CodeGen/StackProtector.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" @@ -57,6 +57,8 @@ using namespace llvm; +#define DEBUG_TYPE "stackcoloring" + static cl::opt<bool> DisableColoring("no-stack-coloring", cl::init(false), cl::Hidden, @@ -112,37 +114,25 @@ class StackColoring : public MachineFunctionPass { SmallVector<const MachineBasicBlock*, 8> BasicBlockNumbering; /// Maps liveness intervals for each slot. - SmallVector<LiveInterval*, 16> Intervals; + SmallVector<std::unique_ptr<LiveInterval>, 16> Intervals; /// VNInfo is used for the construction of LiveIntervals. VNInfo::Allocator VNInfoAllocator; /// SlotIndex analysis object. SlotIndexes *Indexes; + /// The stack protector object. + StackProtector *SP; /// The list of lifetime markers found. These markers are to be removed /// once the coloring is done. SmallVector<MachineInstr*, 8> Markers; - /// SlotSizeSorter - A Sort utility for arranging stack slots according - /// to their size. - struct SlotSizeSorter { - MachineFrameInfo *MFI; - SlotSizeSorter(MachineFrameInfo *mfi) : MFI(mfi) { } - bool operator()(int LHS, int RHS) { - // We use -1 to denote a uninteresting slot. Place these slots at the end. - if (LHS == -1) return false; - if (RHS == -1) return true; - // Sort according to size. - return MFI->getObjectSize(LHS) > MFI->getObjectSize(RHS); - } -}; - public: static char ID; StackColoring() : MachineFunctionPass(ID) { initializeStackColoringPass(*PassRegistry::getPassRegistry()); } - void getAnalysisUsage(AnalysisUsage &AU) const; - bool runOnMachineFunction(MachineFunction &MF); + void getAnalysisUsage(AnalysisUsage &AU) const override; + bool runOnMachineFunction(MachineFunction &MF) override; private: /// Debug. @@ -191,6 +181,7 @@ INITIALIZE_PASS_BEGIN(StackColoring, "stack-coloring", "Merge disjoint stack slots", false, false) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(SlotIndexes) +INITIALIZE_PASS_DEPENDENCY(StackProtector) INITIALIZE_PASS_END(StackColoring, "stack-coloring", "Merge disjoint stack slots", false, false) @@ -198,16 +189,16 @@ void StackColoring::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<MachineDominatorTree>(); AU.addPreserved<MachineDominatorTree>(); AU.addRequired<SlotIndexes>(); + AU.addRequired<StackProtector>(); MachineFunctionPass::getAnalysisUsage(AU); } void StackColoring::dump() const { - for (df_iterator<MachineFunction*> FI = df_begin(MF), FE = df_end(MF); - FI != FE; ++FI) { - DEBUG(dbgs()<<"Inspecting block #"<<BasicBlocks.lookup(*FI)<< - " ["<<FI->getName()<<"]\n"); + for (MachineBasicBlock *MBB : depth_first(MF)) { + DEBUG(dbgs() << "Inspecting block #" << BasicBlocks.lookup(MBB) << " [" + << MBB->getName() << "]\n"); - LivenessMap::const_iterator BI = BlockLiveness.find(*FI); + LivenessMap::const_iterator BI = BlockLiveness.find(MBB); assert(BI != BlockLiveness.end() && "Block not found"); const BlockLifetimeInfo &BlockInfo = BI->second; @@ -240,31 +231,28 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) { // NOTE: We use the a reverse-post-order iteration to ensure that we obtain a // deterministic numbering, and because we'll need a post-order iteration // later for solving the liveness dataflow problem. - for (df_iterator<MachineFunction*> FI = df_begin(MF), FE = df_end(MF); - FI != FE; ++FI) { + for (MachineBasicBlock *MBB : depth_first(MF)) { // Assign a serial number to this basic block. - BasicBlocks[*FI] = BasicBlockNumbering.size(); - BasicBlockNumbering.push_back(*FI); + BasicBlocks[MBB] = BasicBlockNumbering.size(); + BasicBlockNumbering.push_back(MBB); // Keep a reference to avoid repeated lookups. - BlockLifetimeInfo &BlockInfo = BlockLiveness[*FI]; + BlockLifetimeInfo &BlockInfo = BlockLiveness[MBB]; BlockInfo.Begin.resize(NumSlot); BlockInfo.End.resize(NumSlot); - for (MachineBasicBlock::iterator BI = (*FI)->begin(), BE = (*FI)->end(); - BI != BE; ++BI) { - - if (BI->getOpcode() != TargetOpcode::LIFETIME_START && - BI->getOpcode() != TargetOpcode::LIFETIME_END) + for (MachineInstr &MI : *MBB) { + if (MI.getOpcode() != TargetOpcode::LIFETIME_START && + MI.getOpcode() != TargetOpcode::LIFETIME_END) continue; - Markers.push_back(BI); + Markers.push_back(&MI); - bool IsStart = BI->getOpcode() == TargetOpcode::LIFETIME_START; - const MachineOperand &MI = BI->getOperand(0); - unsigned Slot = MI.getIndex(); + bool IsStart = MI.getOpcode() == TargetOpcode::LIFETIME_START; + const MachineOperand &MO = MI.getOperand(0); + unsigned Slot = MO.getIndex(); MarkersFound++; @@ -310,11 +298,7 @@ void StackColoring::calculateLocalLiveness() { SmallPtrSet<const MachineBasicBlock*, 8> NextBBSet; - for (SmallVectorImpl<const MachineBasicBlock *>::iterator - PI = BasicBlockNumbering.begin(), PE = BasicBlockNumbering.end(); - PI != PE; ++PI) { - - const MachineBasicBlock *BB = *PI; + for (const MachineBasicBlock *BB : BasicBlockNumbering) { if (!BBSet.count(BB)) continue; // Use an iterator to avoid repeated lookups. @@ -369,18 +353,14 @@ void StackColoring::calculateLocalLiveness() { changed = true; BlockInfo.LiveIn |= LocalLiveIn; - for (MachineBasicBlock::const_pred_iterator PI = BB->pred_begin(), - PE = BB->pred_end(); PI != PE; ++PI) - NextBBSet.insert(*PI); + NextBBSet.insert(BB->pred_begin(), BB->pred_end()); } if (LocalLiveOut.test(BlockInfo.LiveOut)) { changed = true; BlockInfo.LiveOut |= LocalLiveOut; - for (MachineBasicBlock::const_succ_iterator SI = BB->succ_begin(), - SE = BB->succ_end(); SI != SE; ++SI) - NextBBSet.insert(*SI); + NextBBSet.insert(BB->succ_begin(), BB->succ_end()); } } @@ -394,18 +374,15 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) { // For each block, find which slots are active within this block // and update the live intervals. - for (MachineFunction::iterator MBB = MF->begin(), MBBe = MF->end(); - MBB != MBBe; ++MBB) { + for (const MachineBasicBlock &MBB : *MF) { Starts.clear(); Starts.resize(NumSlots); Finishes.clear(); Finishes.resize(NumSlots); // Create the interval for the basic blocks with lifetime markers in them. - for (SmallVectorImpl<MachineInstr*>::const_iterator it = Markers.begin(), - e = Markers.end(); it != e; ++it) { - const MachineInstr *MI = *it; - if (MI->getParent() != MBB) + for (const MachineInstr *MI : Markers) { + if (MI->getParent() != &MBB) continue; assert((MI->getOpcode() == TargetOpcode::LIFETIME_START || @@ -429,14 +406,14 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) { } // Create the interval of the blocks that we previously found to be 'alive'. - BlockLifetimeInfo &MBBLiveness = BlockLiveness[MBB]; + BlockLifetimeInfo &MBBLiveness = BlockLiveness[&MBB]; for (int pos = MBBLiveness.LiveIn.find_first(); pos != -1; pos = MBBLiveness.LiveIn.find_next(pos)) { - Starts[pos] = Indexes->getMBBStartIdx(MBB); + Starts[pos] = Indexes->getMBBStartIdx(&MBB); } for (int pos = MBBLiveness.LiveOut.find_first(); pos != -1; pos = MBBLiveness.LiveOut.find_next(pos)) { - Finishes[pos] = Indexes->getMBBEndIdx(MBB); + Finishes[pos] = Indexes->getMBBEndIdx(&MBB); } for (unsigned i = 0; i < NumSlots; ++i) { @@ -452,10 +429,10 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) { // We have a single consecutive region. Intervals[i]->addSegment(LiveInterval::Segment(S, F, ValNum)); } else { - // We have two non consecutive regions. This happens when + // We have two non-consecutive regions. This happens when // LIFETIME_START appears after the LIFETIME_END marker. - SlotIndex NewStart = Indexes->getMBBStartIdx(MBB); - SlotIndex NewFin = Indexes->getMBBEndIdx(MBB); + SlotIndex NewStart = Indexes->getMBBStartIdx(&MBB); + SlotIndex NewFin = Indexes->getMBBEndIdx(&MBB); Intervals[i]->addSegment(LiveInterval::Segment(NewStart, F, ValNum)); Intervals[i]->addSegment(LiveInterval::Segment(S, NewFin, ValNum)); } @@ -465,8 +442,8 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) { bool StackColoring::removeAllMarkers() { unsigned Count = 0; - for (unsigned i = 0; i < Markers.size(); ++i) { - Markers[i]->eraseFromParent(); + for (MachineInstr *MI : Markers) { + MI->eraseFromParent(); Count++; } Markers.clear(); @@ -482,65 +459,70 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) { MachineModuleInfo *MMI = &MF->getMMI(); // Remap debug information that refers to stack slots. - MachineModuleInfo::VariableDbgInfoMapTy &VMap = MMI->getVariableDbgInfo(); - for (MachineModuleInfo::VariableDbgInfoMapTy::iterator VI = VMap.begin(), - VE = VMap.end(); VI != VE; ++VI) { - const MDNode *Var = VI->first; - if (!Var) continue; - std::pair<unsigned, DebugLoc> &VP = VI->second; - if (SlotRemap.count(VP.first)) { - DEBUG(dbgs()<<"Remapping debug info for ["<<Var->getName()<<"].\n"); - VP.first = SlotRemap[VP.first]; + for (auto &VI : MMI->getVariableDbgInfo()) { + if (!VI.Var) + continue; + if (SlotRemap.count(VI.Slot)) { + DEBUG(dbgs()<<"Remapping debug info for ["<<VI.Var->getName()<<"].\n"); + VI.Slot = SlotRemap[VI.Slot]; FixedDbg++; } } // Keep a list of *allocas* which need to be remapped. DenseMap<const AllocaInst*, const AllocaInst*> Allocas; - for (DenseMap<int, int>::const_iterator it = SlotRemap.begin(), - e = SlotRemap.end(); it != e; ++it) { - const AllocaInst *From = MFI->getObjectAllocation(it->first); - const AllocaInst *To = MFI->getObjectAllocation(it->second); + for (const std::pair<int, int> &SI : SlotRemap) { + const AllocaInst *From = MFI->getObjectAllocation(SI.first); + const AllocaInst *To = MFI->getObjectAllocation(SI.second); assert(To && From && "Invalid allocation object"); Allocas[From] = To; + + // AA might be used later for instruction scheduling, and we need it to be + // able to deduce the correct aliasing releationships between pointers + // derived from the alloca being remapped and the target of that remapping. + // The only safe way, without directly informing AA about the remapping + // somehow, is to directly update the IR to reflect the change being made + // here. + Instruction *Inst = const_cast<AllocaInst *>(To); + if (From->getType() != To->getType()) { + BitCastInst *Cast = new BitCastInst(Inst, From->getType()); + Cast->insertAfter(Inst); + Inst = Cast; + } + + // Allow the stack protector to adjust its value map to account for the + // upcoming replacement. + SP->adjustForColoring(From, To); + + // Note that this will not replace uses in MMOs (which we'll update below), + // or anywhere else (which is why we won't delete the original + // instruction). + const_cast<AllocaInst *>(From)->replaceAllUsesWith(Inst); } // Remap all instructions to the new stack slots. - MachineFunction::iterator BB, BBE; - MachineBasicBlock::iterator I, IE; - for (BB = MF->begin(), BBE = MF->end(); BB != BBE; ++BB) - for (I = BB->begin(), IE = BB->end(); I != IE; ++I) { - + for (MachineBasicBlock &BB : *MF) + for (MachineInstr &I : BB) { // Skip lifetime markers. We'll remove them soon. - if (I->getOpcode() == TargetOpcode::LIFETIME_START || - I->getOpcode() == TargetOpcode::LIFETIME_END) + if (I.getOpcode() == TargetOpcode::LIFETIME_START || + I.getOpcode() == TargetOpcode::LIFETIME_END) continue; // Update the MachineMemOperand to use the new alloca. - for (MachineInstr::mmo_iterator MM = I->memoperands_begin(), - E = I->memoperands_end(); MM != E; ++MM) { - MachineMemOperand *MMO = *MM; - - const Value *V = MMO->getValue(); - - if (!V) - continue; - - const PseudoSourceValue *PSV = dyn_cast<const PseudoSourceValue>(V); - if (PSV && PSV->isConstant(MFI)) + for (MachineMemOperand *MMO : I.memoperands()) { + // FIXME: In order to enable the use of TBAA when using AA in CodeGen, + // we'll also need to update the TBAA nodes in MMOs with values + // derived from the merged allocas. When doing this, we'll need to use + // the same variant of GetUnderlyingObjects that is used by the + // instruction scheduler (that can look through ptrtoint/inttoptr + // pairs). + + // We've replaced IR-level uses of the remapped allocas, so we only + // need to replace direct uses here. + const AllocaInst *AI = dyn_cast_or_null<AllocaInst>(MMO->getValue()); + if (!AI) continue; - // Climb up and find the original alloca. - V = GetUnderlyingObject(V); - // If we did not find one, or if the one that we found is not in our - // map, then move on. - if (!V || !isa<AllocaInst>(V)) { - // Clear mem operand since we don't know for sure that it doesn't - // alias a merged alloca. - MMO->setValue(0); - continue; - } - const AllocaInst *AI= cast<AllocaInst>(V); if (!Allocas.count(AI)) continue; @@ -549,9 +531,7 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) { } // Update all of the machine instruction operands. - for (unsigned i = 0 ; i < I->getNumOperands(); ++i) { - MachineOperand &MO = I->getOperand(i); - + for (MachineOperand &MO : I.operands()) { if (!MO.isFI()) continue; int FromSlot = MO.getIndex(); @@ -572,12 +552,12 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) { // zone are are okay, despite the fact that we don't have a good way // for validating all of the usages of the calculation. #ifndef NDEBUG - bool TouchesMemory = I->mayLoad() || I->mayStore(); + bool TouchesMemory = I.mayLoad() || I.mayStore(); // If we *don't* protect the user from escaped allocas, don't bother // validating the instructions. - if (!I->isDebugValue() && TouchesMemory && ProtectFromEscapedAllocas) { - SlotIndex Index = Indexes->getInstructionIndex(I); - LiveInterval *Interval = Intervals[FromSlot]; + if (!I.isDebugValue() && TouchesMemory && ProtectFromEscapedAllocas) { + SlotIndex Index = Indexes->getInstructionIndex(&I); + const LiveInterval *Interval = &*Intervals[FromSlot]; assert(Interval->find(Index) != Interval->end() && "Found instruction usage outside of live range."); } @@ -596,13 +576,10 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) { } void StackColoring::removeInvalidSlotRanges() { - MachineFunction::const_iterator BB, BBE; - MachineBasicBlock::const_iterator I, IE; - for (BB = MF->begin(), BBE = MF->end(); BB != BBE; ++BB) - for (I = BB->begin(), IE = BB->end(); I != IE; ++I) { - - if (I->getOpcode() == TargetOpcode::LIFETIME_START || - I->getOpcode() == TargetOpcode::LIFETIME_END || I->isDebugValue()) + for (MachineBasicBlock &BB : *MF) + for (MachineInstr &I : BB) { + if (I.getOpcode() == TargetOpcode::LIFETIME_START || + I.getOpcode() == TargetOpcode::LIFETIME_END || I.isDebugValue()) continue; // Some intervals are suspicious! In some cases we find address @@ -611,13 +588,11 @@ void StackColoring::removeInvalidSlotRanges() { // violation, but address calculations are okay. This can happen when // GEPs are hoisted outside of the lifetime zone. // So, in here we only check instructions which can read or write memory. - if (!I->mayLoad() && !I->mayStore()) + if (!I.mayLoad() && !I.mayStore()) continue; // Check all of the machine operands. - for (unsigned i = 0 ; i < I->getNumOperands(); ++i) { - const MachineOperand &MO = I->getOperand(i); - + for (const MachineOperand &MO : I.operands()) { if (!MO.isFI()) continue; @@ -631,10 +606,10 @@ void StackColoring::removeInvalidSlotRanges() { // Check that the used slot is inside the calculated lifetime range. // If it is not, warn about it and invalidate the range. - LiveInterval *Interval = Intervals[Slot]; - SlotIndex Index = Indexes->getInstructionIndex(I); + LiveInterval *Interval = &*Intervals[Slot]; + SlotIndex Index = Indexes->getInstructionIndex(&I); if (Interval->find(Index) == Interval->end()) { - Intervals[Slot]->clear(); + Interval->clear(); DEBUG(dbgs()<<"Invalidating range #"<<Slot<<"\n"); EscapedAllocas++; } @@ -659,12 +634,16 @@ void StackColoring::expungeSlotMap(DenseMap<int, int> &SlotRemap, } bool StackColoring::runOnMachineFunction(MachineFunction &Func) { + if (skipOptnoneFunction(*Func.getFunction())) + return false; + DEBUG(dbgs() << "********** Stack Coloring **********\n" << "********** Function: " << ((const Value*)Func.getFunction())->getName() << '\n'); MF = &Func; MFI = MF->getFrameInfo(); Indexes = &getAnalysis<SlotIndexes>(); + SP = &getAnalysis<StackProtector>(); BlockLiveness.clear(); BasicBlocks.clear(); BasicBlockNumbering.clear(); @@ -704,9 +683,9 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) { } for (unsigned i=0; i < NumSlots; ++i) { - LiveInterval *LI = new LiveInterval(i, 0); - Intervals.push_back(LI); + std::unique_ptr<LiveInterval> LI(new LiveInterval(i, 0)); LI->getNextValue(Indexes->getZeroIndex(), VNInfoAllocator); + Intervals.push_back(std::move(LI)); SortedSlots.push_back(i); } @@ -741,7 +720,13 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) { // Sort the slots according to their size. Place unused slots at the end. // Use stable sort to guarantee deterministic code generation. std::stable_sort(SortedSlots.begin(), SortedSlots.end(), - SlotSizeSorter(MFI)); + [this](int LHS, int RHS) { + // We use -1 to denote a uninteresting slot. Place these slots at the end. + if (LHS == -1) return false; + if (RHS == -1) return true; + // Sort according to size. + return MFI->getObjectSize(LHS) > MFI->getObjectSize(RHS); + }); bool Changed = true; while (Changed) { @@ -756,8 +741,8 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) { int FirstSlot = SortedSlots[I]; int SecondSlot = SortedSlots[J]; - LiveInterval *First = Intervals[FirstSlot]; - LiveInterval *Second = Intervals[SecondSlot]; + LiveInterval *First = &*Intervals[FirstSlot]; + LiveInterval *Second = &*Intervals[SecondSlot]; assert (!First->empty() && !Second->empty() && "Found an empty range"); // Merge disjoint slots. @@ -795,10 +780,5 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) { expungeSlotMap(SlotRemap, NumSlots); remapInstructions(SlotRemap); - // Release the intervals. - for (unsigned I = 0; I < NumSlots; ++I) { - delete Intervals[I]; - } - return removeAllMarkers(); } diff --git a/contrib/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp b/contrib/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp new file mode 100644 index 0000000..3ba502f --- /dev/null +++ b/contrib/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp @@ -0,0 +1,127 @@ +//===-- StackMapLivenessAnalysis.cpp - StackMap live Out Analysis ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the StackMap Liveness analysis pass. The pass calculates +// the liveness for each basic block in a function and attaches the register +// live-out information to a stackmap or patchpoint intrinsic if present. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/StackMapLivenessAnalysis.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" + + +using namespace llvm; + +#define DEBUG_TYPE "stackmaps" + +namespace llvm { +cl::opt<bool> EnablePatchPointLiveness("enable-patchpoint-liveness", + cl::Hidden, cl::init(true), + cl::desc("Enable PatchPoint Liveness Analysis Pass")); +} + +STATISTIC(NumStackMapFuncVisited, "Number of functions visited"); +STATISTIC(NumStackMapFuncSkipped, "Number of functions skipped"); +STATISTIC(NumBBsVisited, "Number of basic blocks visited"); +STATISTIC(NumBBsHaveNoStackmap, "Number of basic blocks with no stackmap"); +STATISTIC(NumStackMaps, "Number of StackMaps visited"); + +char StackMapLiveness::ID = 0; +char &llvm::StackMapLivenessID = StackMapLiveness::ID; +INITIALIZE_PASS(StackMapLiveness, "stackmap-liveness", + "StackMap Liveness Analysis", false, false) + +/// Default construct and initialize the pass. +StackMapLiveness::StackMapLiveness() : MachineFunctionPass(ID) { + initializeStackMapLivenessPass(*PassRegistry::getPassRegistry()); +} + +/// Tell the pass manager which passes we depend on and what information we +/// preserve. +void StackMapLiveness::getAnalysisUsage(AnalysisUsage &AU) const { + // We preserve all information. + AU.setPreservesAll(); + AU.setPreservesCFG(); + // Default dependencie for all MachineFunction passes. + AU.addRequired<MachineFunctionAnalysis>(); +} + +/// Calculate the liveness information for the given machine function. +bool StackMapLiveness::runOnMachineFunction(MachineFunction &_MF) { + if (!EnablePatchPointLiveness) + return false; + + DEBUG(dbgs() << "********** COMPUTING STACKMAP LIVENESS: " + << _MF.getName() << " **********\n"); + MF = &_MF; + TRI = MF->getTarget().getRegisterInfo(); + ++NumStackMapFuncVisited; + + // Skip this function if there are no patchpoints to process. + if (!MF->getFrameInfo()->hasPatchPoint()) { + ++NumStackMapFuncSkipped; + return false; + } + return calculateLiveness(); +} + +/// Performs the actual liveness calculation for the function. +bool StackMapLiveness::calculateLiveness() { + bool HasChanged = false; + // For all basic blocks in the function. + for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); + MBBI != MBBE; ++MBBI) { + DEBUG(dbgs() << "****** BB " << MBBI->getName() << " ******\n"); + LiveRegs.init(TRI); + LiveRegs.addLiveOuts(MBBI); + bool HasStackMap = false; + // Reverse iterate over all instructions and add the current live register + // set to an instruction if we encounter a patchpoint instruction. + for (MachineBasicBlock::reverse_iterator I = MBBI->rbegin(), + E = MBBI->rend(); I != E; ++I) { + if (I->getOpcode() == TargetOpcode::PATCHPOINT) { + addLiveOutSetToMI(*I); + HasChanged = true; + HasStackMap = true; + ++NumStackMaps; + } + DEBUG(dbgs() << " " << LiveRegs << " " << *I); + LiveRegs.stepBackward(*I); + } + ++NumBBsVisited; + if (!HasStackMap) + ++NumBBsHaveNoStackmap; + } + return HasChanged; +} + +/// Add the current register live set to the instruction. +void StackMapLiveness::addLiveOutSetToMI(MachineInstr &MI) { + uint32_t *Mask = createRegisterMask(); + MachineOperand MO = MachineOperand::CreateRegLiveOut(Mask); + MI.addOperand(*MF, MO); +} + +/// Create a register mask and initialize it with the registers from the +/// register live set. +uint32_t *StackMapLiveness::createRegisterMask() const { + // The mask is owned and cleaned up by the Machine Function. + uint32_t *Mask = MF->allocateRegisterMask(TRI->getNumRegs()); + for (LivePhysRegs::const_iterator RI = LiveRegs.begin(), RE = LiveRegs.end(); + RI != RE; ++RI) + Mask[*RI / 32] |= 1U << (*RI % 32); + return Mask; +} diff --git a/contrib/llvm/lib/CodeGen/StackMaps.cpp b/contrib/llvm/lib/CodeGen/StackMaps.cpp index 40893ea..1473fc1 100644 --- a/contrib/llvm/lib/CodeGen/StackMaps.cpp +++ b/contrib/llvm/lib/CodeGen/StackMaps.cpp @@ -7,35 +7,41 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "stackmaps" - #include "llvm/CodeGen/StackMaps.h" - #include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/IR/DataLayout.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetOpcodes.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOpcodes.h" #include "llvm/Target/TargetRegisterInfo.h" - #include <iterator> using namespace llvm; -PatchPointOpers::PatchPointOpers(const MachineInstr *MI): - MI(MI), - HasDef(MI->getOperand(0).isReg() && MI->getOperand(0).isDef() && - !MI->getOperand(0).isImplicit()), - IsAnyReg(MI->getOperand(getMetaIdx(CCPos)).getImm() == CallingConv::AnyReg) { +#define DEBUG_TYPE "stackmaps" + +static cl::opt<int> StackMapVersion("stackmap-version", cl::init(1), + cl::desc("Specify the stackmap encoding version (default = 1)")); +const char *StackMaps::WSMP = "Stack Maps: "; + +PatchPointOpers::PatchPointOpers(const MachineInstr *MI) + : MI(MI), + HasDef(MI->getOperand(0).isReg() && MI->getOperand(0).isDef() && + !MI->getOperand(0).isImplicit()), + IsAnyReg(MI->getOperand(getMetaIdx(CCPos)).getImm() == CallingConv::AnyReg) +{ #ifndef NDEBUG - { unsigned CheckStartIdx = 0, e = MI->getNumOperands(); while (CheckStartIdx < e && MI->getOperand(CheckStartIdx).isReg() && MI->getOperand(CheckStartIdx).isDef() && @@ -43,8 +49,7 @@ PatchPointOpers::PatchPointOpers(const MachineInstr *MI): ++CheckStartIdx; assert(getMetaIdx() == CheckStartIdx && - "Unexpected additonal definition in Patchpoint intrinsic."); - } + "Unexpected additional definition in Patchpoint intrinsic."); #endif } @@ -65,7 +70,126 @@ unsigned PatchPointOpers::getNextScratchIdx(unsigned StartIdx) const { return ScratchIdx; } -void StackMaps::recordStackMapOpers(const MachineInstr &MI, uint32_t ID, +StackMaps::StackMaps(AsmPrinter &AP) : AP(AP) { + if (StackMapVersion != 1) + llvm_unreachable("Unsupported stackmap version!"); +} + +MachineInstr::const_mop_iterator +StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI, + MachineInstr::const_mop_iterator MOE, + LocationVec &Locs, LiveOutVec &LiveOuts) const { + if (MOI->isImm()) { + switch (MOI->getImm()) { + default: llvm_unreachable("Unrecognized operand type."); + case StackMaps::DirectMemRefOp: { + unsigned Size = AP.TM.getDataLayout()->getPointerSizeInBits(); + assert((Size % 8) == 0 && "Need pointer size in bytes."); + Size /= 8; + unsigned Reg = (++MOI)->getReg(); + int64_t Imm = (++MOI)->getImm(); + Locs.push_back(Location(StackMaps::Location::Direct, Size, Reg, Imm)); + break; + } + case StackMaps::IndirectMemRefOp: { + int64_t Size = (++MOI)->getImm(); + assert(Size > 0 && "Need a valid size for indirect memory locations."); + unsigned Reg = (++MOI)->getReg(); + int64_t Imm = (++MOI)->getImm(); + Locs.push_back(Location(StackMaps::Location::Indirect, Size, Reg, Imm)); + break; + } + case StackMaps::ConstantOp: { + ++MOI; + assert(MOI->isImm() && "Expected constant operand."); + int64_t Imm = MOI->getImm(); + Locs.push_back(Location(Location::Constant, sizeof(int64_t), 0, Imm)); + break; + } + } + return ++MOI; + } + + // The physical register number will ultimately be encoded as a DWARF regno. + // The stack map also records the size of a spill slot that can hold the + // register content. (The runtime can track the actual size of the data type + // if it needs to.) + if (MOI->isReg()) { + // Skip implicit registers (this includes our scratch registers) + if (MOI->isImplicit()) + return ++MOI; + + assert(TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) && + "Virtreg operands should have been rewritten before now."); + const TargetRegisterClass *RC = + AP.TM.getRegisterInfo()->getMinimalPhysRegClass(MOI->getReg()); + assert(!MOI->getSubReg() && "Physical subreg still around."); + Locs.push_back( + Location(Location::Register, RC->getSize(), MOI->getReg(), 0)); + return ++MOI; + } + + if (MOI->isRegLiveOut()) + LiveOuts = parseRegisterLiveOutMask(MOI->getRegLiveOut()); + + return ++MOI; +} + +/// Go up the super-register chain until we hit a valid dwarf register number. +static unsigned getDwarfRegNum(unsigned Reg, const TargetRegisterInfo *TRI) { + int RegNo = TRI->getDwarfRegNum(Reg, false); + for (MCSuperRegIterator SR(Reg, TRI); SR.isValid() && RegNo < 0; ++SR) + RegNo = TRI->getDwarfRegNum(*SR, false); + + assert(RegNo >= 0 && "Invalid Dwarf register number."); + return (unsigned) RegNo; +} + +/// Create a live-out register record for the given register Reg. +StackMaps::LiveOutReg +StackMaps::createLiveOutReg(unsigned Reg, const TargetRegisterInfo *TRI) const { + unsigned RegNo = getDwarfRegNum(Reg, TRI); + unsigned Size = TRI->getMinimalPhysRegClass(Reg)->getSize(); + return LiveOutReg(Reg, RegNo, Size); +} + +/// Parse the register live-out mask and return a vector of live-out registers +/// that need to be recorded in the stackmap. +StackMaps::LiveOutVec +StackMaps::parseRegisterLiveOutMask(const uint32_t *Mask) const { + assert(Mask && "No register mask specified"); + const TargetRegisterInfo *TRI = AP.TM.getRegisterInfo(); + LiveOutVec LiveOuts; + + // Create a LiveOutReg for each bit that is set in the register mask. + for (unsigned Reg = 0, NumRegs = TRI->getNumRegs(); Reg != NumRegs; ++Reg) + if ((Mask[Reg / 32] >> Reg % 32) & 1) + LiveOuts.push_back(createLiveOutReg(Reg, TRI)); + + // We don't need to keep track of a register if its super-register is already + // in the list. Merge entries that refer to the same dwarf register and use + // the maximum size that needs to be spilled. + std::sort(LiveOuts.begin(), LiveOuts.end()); + for (LiveOutVec::iterator I = LiveOuts.begin(), E = LiveOuts.end(); + I != E; ++I) { + for (LiveOutVec::iterator II = std::next(I); II != E; ++II) { + if (I->RegNo != II->RegNo) { + // Skip all the now invalid entries. + I = --II; + break; + } + I->Size = std::max(I->Size, II->Size); + if (TRI->isSuperRegister(I->Reg, II->Reg)) + I->Reg = II->Reg; + II->MarkInvalid(); + } + } + LiveOuts.erase(std::remove_if(LiveOuts.begin(), LiveOuts.end(), + LiveOutReg::IsInvalid), LiveOuts.end()); + return LiveOuts; +} + +void StackMaps::recordStackMapOpers(const MachineInstr &MI, uint64_t ID, MachineInstr::const_mop_iterator MOI, MachineInstr::const_mop_iterator MOE, bool recordResult) { @@ -74,71 +198,65 @@ void StackMaps::recordStackMapOpers(const MachineInstr &MI, uint32_t ID, MCSymbol *MILabel = OutContext.CreateTempSymbol(); AP.OutStreamer.EmitLabel(MILabel); - LocationVec CallsiteLocs; + LocationVec Locations; + LiveOutVec LiveOuts; if (recordResult) { - std::pair<Location, MachineInstr::const_mop_iterator> ParseResult = - OpParser(MI.operands_begin(), llvm::next(MI.operands_begin()), AP.TM); - - Location &Loc = ParseResult.first; - assert(Loc.LocType == Location::Register && - "Stackmap return location must be a register."); - CallsiteLocs.push_back(Loc); + assert(PatchPointOpers(&MI).hasDef() && "Stackmap has no return value."); + parseOperand(MI.operands_begin(), std::next(MI.operands_begin()), + Locations, LiveOuts); } + // Parse operands. while (MOI != MOE) { - std::pair<Location, MachineInstr::const_mop_iterator> ParseResult = - OpParser(MOI, MOE, AP.TM); - - Location &Loc = ParseResult.first; + MOI = parseOperand(MOI, MOE, Locations, LiveOuts); + } - // Move large constants into the constant pool. - if (Loc.LocType == Location::Constant && (Loc.Offset & ~0xFFFFFFFFULL)) { - Loc.LocType = Location::ConstantIndex; - Loc.Offset = ConstPool.getConstantIndex(Loc.Offset); + // Move large constants into the constant pool. + for (LocationVec::iterator I = Locations.begin(), E = Locations.end(); + I != E; ++I) { + // Constants are encoded as sign-extended integers. + // -1 is directly encoded as .long 0xFFFFFFFF with no constant pool. + if (I->LocType == Location::Constant && + ((I->Offset + (int64_t(1)<<31)) >> 32) != 0) { + I->LocType = Location::ConstantIndex; + auto Result = ConstPool.insert(std::make_pair(I->Offset, I->Offset)); + I->Offset = Result.first - ConstPool.begin(); } - - CallsiteLocs.push_back(Loc); - MOI = ParseResult.second; } + // Create an expression to calculate the offset of the callsite from function + // entry. const MCExpr *CSOffsetExpr = MCBinaryExpr::CreateSub( MCSymbolRefExpr::Create(MILabel, OutContext), MCSymbolRefExpr::Create(AP.CurrentFnSym, OutContext), OutContext); - CSInfos.push_back(CallsiteInfo(CSOffsetExpr, ID, CallsiteLocs)); -} + CSInfos.push_back(CallsiteInfo(CSOffsetExpr, ID, Locations, LiveOuts)); -static MachineInstr::const_mop_iterator -getStackMapEndMOP(MachineInstr::const_mop_iterator MOI, - MachineInstr::const_mop_iterator MOE) { - for (; MOI != MOE; ++MOI) - if (MOI->isRegMask() || (MOI->isReg() && MOI->isImplicit())) - break; - - return MOI; + // Record the stack size of the current function. + const MachineFrameInfo *MFI = AP.MF->getFrameInfo(); + FnStackSize[AP.CurrentFnSym] = + MFI->hasVarSizedObjects() ? UINT64_MAX : MFI->getStackSize(); } void StackMaps::recordStackMap(const MachineInstr &MI) { - assert(MI.getOpcode() == TargetOpcode::STACKMAP && "exected stackmap"); + assert(MI.getOpcode() == TargetOpcode::STACKMAP && "expected stackmap"); int64_t ID = MI.getOperand(0).getImm(); - assert((int32_t)ID == ID && "Stack maps hold 32-bit IDs"); - recordStackMapOpers(MI, ID, llvm::next(MI.operands_begin(), 2), - getStackMapEndMOP(MI.operands_begin(), - MI.operands_end())); + recordStackMapOpers(MI, ID, std::next(MI.operands_begin(), 2), + MI.operands_end()); } void StackMaps::recordPatchPoint(const MachineInstr &MI) { - assert(MI.getOpcode() == TargetOpcode::PATCHPOINT && "exected stackmap"); + assert(MI.getOpcode() == TargetOpcode::PATCHPOINT && "expected patchpoint"); PatchPointOpers opers(&MI); int64_t ID = opers.getMetaOper(PatchPointOpers::IDPos).getImm(); - assert((int32_t)ID == ID && "Stack maps hold 32-bit IDs"); + MachineInstr::const_mop_iterator MOI = - llvm::next(MI.operands_begin(), opers.getStackMapStartIdx()); - recordStackMapOpers(MI, ID, MOI, getStackMapEndMOP(MOI, MI.operands_end()), + std::next(MI.operands_begin(), opers.getStackMapStartIdx()); + recordStackMapOpers(MI, ID, MOI, MI.operands_end(), opers.isAnyReg() && opers.hasDef()); #ifndef NDEBUG @@ -153,14 +271,66 @@ void StackMaps::recordPatchPoint(const MachineInstr &MI) { #endif } -/// serializeToStackMapSection conceptually populates the following fields: +/// Emit the stackmap header. /// -/// uint32 : Reserved (header) +/// Header { +/// uint8 : Stack Map Version (currently 1) +/// uint8 : Reserved (expected to be 0) +/// uint16 : Reserved (expected to be 0) +/// } +/// uint32 : NumFunctions /// uint32 : NumConstants -/// int64 : Constants[NumConstants] /// uint32 : NumRecords +void StackMaps::emitStackmapHeader(MCStreamer &OS) { + // Header. + OS.EmitIntValue(StackMapVersion, 1); // Version. + OS.EmitIntValue(0, 1); // Reserved. + OS.EmitIntValue(0, 2); // Reserved. + + // Num functions. + DEBUG(dbgs() << WSMP << "#functions = " << FnStackSize.size() << '\n'); + OS.EmitIntValue(FnStackSize.size(), 4); + // Num constants. + DEBUG(dbgs() << WSMP << "#constants = " << ConstPool.size() << '\n'); + OS.EmitIntValue(ConstPool.size(), 4); + // Num callsites. + DEBUG(dbgs() << WSMP << "#callsites = " << CSInfos.size() << '\n'); + OS.EmitIntValue(CSInfos.size(), 4); +} + +/// Emit the function frame record for each function. +/// +/// StkSizeRecord[NumFunctions] { +/// uint64 : Function Address +/// uint64 : Stack Size +/// } +void StackMaps::emitFunctionFrameRecords(MCStreamer &OS) { + // Function Frame records. + DEBUG(dbgs() << WSMP << "functions:\n"); + for (auto const &FR : FnStackSize) { + DEBUG(dbgs() << WSMP << "function addr: " << FR.first + << " frame size: " << FR.second); + OS.EmitSymbolValue(FR.first, 8); + OS.EmitIntValue(FR.second, 8); + } +} + +/// Emit the constant pool. +/// +/// int64 : Constants[NumConstants] +void StackMaps::emitConstantPoolEntries(MCStreamer &OS) { + // Constant pool entries. + DEBUG(dbgs() << WSMP << "constants:\n"); + for (auto ConstEntry : ConstPool) { + DEBUG(dbgs() << WSMP << ConstEntry.second << '\n'); + OS.EmitIntValue(ConstEntry.second, 8); + } +} + +/// Emit the callsite info for each callsite. +/// /// StkMapRecord[NumRecords] { -/// uint32 : PatchPoint ID +/// uint64 : PatchPoint ID /// uint32 : Instruction Offset /// uint16 : Reserved (record flags) /// uint16 : NumLocations @@ -170,6 +340,14 @@ void StackMaps::recordPatchPoint(const MachineInstr &MI) { /// uint16 : Dwarf RegNum /// int32 : Offset /// } +/// uint16 : Padding +/// uint16 : NumLiveOuts +/// LiveOuts[NumLiveOuts] { +/// uint16 : Dwarf RegNum +/// uint8 : Reserved +/// uint8 : Size in Bytes +/// } +/// uint32 : Padding (only if required to align to 8 byte) /// } /// /// Location Encoding, Type, Value: @@ -178,137 +356,154 @@ void StackMaps::recordPatchPoint(const MachineInstr &MI) { /// 0x3, Indirect, [Reg + Offset] (spilled value) /// 0x4, Constant, Offset (small constant) /// 0x5, ConstIndex, Constants[Offset] (large constant) -/// -void StackMaps::serializeToStackMapSection() { - // Bail out if there's no stack map data. - if (CSInfos.empty()) - return; - - MCContext &OutContext = AP.OutStreamer.getContext(); - const TargetRegisterInfo *TRI = AP.TM.getRegisterInfo(); - - // Create the section. - const MCSection *StackMapSection = - OutContext.getObjectFileInfo()->getStackMapSection(); - AP.OutStreamer.SwitchSection(StackMapSection); - - // Emit a dummy symbol to force section inclusion. - AP.OutStreamer.EmitLabel( - OutContext.GetOrCreateSymbol(Twine("__LLVM_StackMaps"))); - - // Serialize data. - const char *WSMP = "Stack Maps: "; - (void)WSMP; - const MCRegisterInfo &MCRI = *OutContext.getRegisterInfo(); - - DEBUG(dbgs() << "********** Stack Map Output **********\n"); - - // Header. - AP.OutStreamer.EmitIntValue(0, 4); - - // Num constants. - AP.OutStreamer.EmitIntValue(ConstPool.getNumConstants(), 4); - - // Constant pool entries. - for (unsigned i = 0; i < ConstPool.getNumConstants(); ++i) - AP.OutStreamer.EmitIntValue(ConstPool.getConstant(i), 8); - - DEBUG(dbgs() << WSMP << "#callsites = " << CSInfos.size() << "\n"); - AP.OutStreamer.EmitIntValue(CSInfos.size(), 4); +void StackMaps::emitCallsiteEntries(MCStreamer &OS, + const TargetRegisterInfo *TRI) { + // Callsite entries. + DEBUG(dbgs() << WSMP << "callsites:\n"); + for (const auto &CSI : CSInfos) { + const LocationVec &CSLocs = CSI.Locations; + const LiveOutVec &LiveOuts = CSI.LiveOuts; - for (CallsiteInfoList::const_iterator CSII = CSInfos.begin(), - CSIE = CSInfos.end(); - CSII != CSIE; ++CSII) { - - unsigned CallsiteID = CSII->ID; - const LocationVec &CSLocs = CSII->Locations; - - DEBUG(dbgs() << WSMP << "callsite " << CallsiteID << "\n"); + DEBUG(dbgs() << WSMP << "callsite " << CSI.ID << "\n"); // Verify stack map entry. It's better to communicate a problem to the // runtime than crash in case of in-process compilation. Currently, we do // simple overflow checks, but we may eventually communicate other // compilation errors this way. - if (CSLocs.size() > UINT16_MAX) { - AP.OutStreamer.EmitIntValue(UINT32_MAX, 4); // Invalid ID. - AP.OutStreamer.EmitValue(CSII->CSOffsetExpr, 4); - AP.OutStreamer.EmitIntValue(0, 2); // Reserved. - AP.OutStreamer.EmitIntValue(0, 2); // 0 locations. + if (CSLocs.size() > UINT16_MAX || LiveOuts.size() > UINT16_MAX) { + OS.EmitIntValue(UINT64_MAX, 8); // Invalid ID. + OS.EmitValue(CSI.CSOffsetExpr, 4); + OS.EmitIntValue(0, 2); // Reserved. + OS.EmitIntValue(0, 2); // 0 locations. + OS.EmitIntValue(0, 2); // padding. + OS.EmitIntValue(0, 2); // 0 live-out registers. + OS.EmitIntValue(0, 4); // padding. continue; } - AP.OutStreamer.EmitIntValue(CallsiteID, 4); - AP.OutStreamer.EmitValue(CSII->CSOffsetExpr, 4); + OS.EmitIntValue(CSI.ID, 8); + OS.EmitValue(CSI.CSOffsetExpr, 4); // Reserved for flags. - AP.OutStreamer.EmitIntValue(0, 2); + OS.EmitIntValue(0, 2); DEBUG(dbgs() << WSMP << " has " << CSLocs.size() << " locations\n"); - AP.OutStreamer.EmitIntValue(CSLocs.size(), 2); - - unsigned operIdx = 0; - for (LocationVec::const_iterator LocI = CSLocs.begin(), LocE = CSLocs.end(); - LocI != LocE; ++LocI, ++operIdx) { - const Location &Loc = *LocI; - DEBUG( - dbgs() << WSMP << " Loc " << operIdx << ": "; - switch (Loc.LocType) { - case Location::Unprocessed: - dbgs() << "<Unprocessed operand>"; - break; - case Location::Register: - dbgs() << "Register " << MCRI.getName(Loc.Reg); - break; - case Location::Direct: - dbgs() << "Direct " << MCRI.getName(Loc.Reg); - if (Loc.Offset) - dbgs() << " + " << Loc.Offset; - break; - case Location::Indirect: - dbgs() << "Indirect " << MCRI.getName(Loc.Reg) - << " + " << Loc.Offset; - break; - case Location::Constant: - dbgs() << "Constant " << Loc.Offset; - break; - case Location::ConstantIndex: - dbgs() << "Constant Index " << Loc.Offset; - break; - } - dbgs() << "\n"; - ); + OS.EmitIntValue(CSLocs.size(), 2); + unsigned OperIdx = 0; + for (const auto &Loc : CSLocs) { unsigned RegNo = 0; int Offset = Loc.Offset; if(Loc.Reg) { - RegNo = MCRI.getDwarfRegNum(Loc.Reg, false); - for (MCSuperRegIterator SR(Loc.Reg, TRI); - SR.isValid() && (int)RegNo < 0; ++SR) { - RegNo = TRI->getDwarfRegNum(*SR, false); - } + RegNo = getDwarfRegNum(Loc.Reg, TRI); + // If this is a register location, put the subregister byte offset in // the location offset. if (Loc.LocType == Location::Register) { assert(!Loc.Offset && "Register location should have zero offset"); - unsigned LLVMRegNo = MCRI.getLLVMRegNum(RegNo, false); - unsigned SubRegIdx = MCRI.getSubRegIndex(LLVMRegNo, Loc.Reg); + unsigned LLVMRegNo = TRI->getLLVMRegNum(RegNo, false); + unsigned SubRegIdx = TRI->getSubRegIndex(LLVMRegNo, Loc.Reg); if (SubRegIdx) - Offset = MCRI.getSubRegIdxOffset(SubRegIdx); + Offset = TRI->getSubRegIdxOffset(SubRegIdx); } } else { assert(Loc.LocType != Location::Register && "Missing location register"); } - AP.OutStreamer.EmitIntValue(Loc.LocType, 1); - AP.OutStreamer.EmitIntValue(Loc.Size, 1); - AP.OutStreamer.EmitIntValue(RegNo, 2); - AP.OutStreamer.EmitIntValue(Offset, 4); + + DEBUG(dbgs() << WSMP << " Loc " << OperIdx << ": "; + switch (Loc.LocType) { + case Location::Unprocessed: + dbgs() << "<Unprocessed operand>"; + break; + case Location::Register: + dbgs() << "Register " << TRI->getName(Loc.Reg); + break; + case Location::Direct: + dbgs() << "Direct " << TRI->getName(Loc.Reg); + if (Loc.Offset) + dbgs() << " + " << Loc.Offset; + break; + case Location::Indirect: + dbgs() << "Indirect " << TRI->getName(Loc.Reg) + << " + " << Loc.Offset; + break; + case Location::Constant: + dbgs() << "Constant " << Loc.Offset; + break; + case Location::ConstantIndex: + dbgs() << "Constant Index " << Loc.Offset; + break; + } + dbgs() << " [encoding: .byte " << Loc.LocType + << ", .byte " << Loc.Size + << ", .short " << RegNo + << ", .int " << Offset << "]\n"; + ); + + OS.EmitIntValue(Loc.LocType, 1); + OS.EmitIntValue(Loc.Size, 1); + OS.EmitIntValue(RegNo, 2); + OS.EmitIntValue(Offset, 4); + OperIdx++; + } + + DEBUG(dbgs() << WSMP << " has " << LiveOuts.size() + << " live-out registers\n"); + + // Num live-out registers and padding to align to 4 byte. + OS.EmitIntValue(0, 2); + OS.EmitIntValue(LiveOuts.size(), 2); + + OperIdx = 0; + for (const auto &LO : LiveOuts) { + DEBUG(dbgs() << WSMP << " LO " << OperIdx << ": " + << TRI->getName(LO.Reg) + << " [encoding: .short " << LO.RegNo + << ", .byte 0, .byte " << LO.Size << "]\n"); + OS.EmitIntValue(LO.RegNo, 2); + OS.EmitIntValue(0, 1); + OS.EmitIntValue(LO.Size, 1); } + // Emit alignment to 8 byte. + OS.EmitValueToAlignment(8); } +} - AP.OutStreamer.AddBlankLine(); +/// Serialize the stackmap data. +void StackMaps::serializeToStackMapSection() { + (void) WSMP; + // Bail out if there's no stack map data. + assert((!CSInfos.empty() || (CSInfos.empty() && ConstPool.empty())) && + "Expected empty constant pool too!"); + assert((!CSInfos.empty() || (CSInfos.empty() && FnStackSize.empty())) && + "Expected empty function record too!"); + if (CSInfos.empty()) + return; + + MCContext &OutContext = AP.OutStreamer.getContext(); + MCStreamer &OS = AP.OutStreamer; + const TargetRegisterInfo *TRI = AP.TM.getRegisterInfo(); + + // Create the section. + const MCSection *StackMapSection = + OutContext.getObjectFileInfo()->getStackMapSection(); + OS.SwitchSection(StackMapSection); + + // Emit a dummy symbol to force section inclusion. + OS.EmitLabel(OutContext.GetOrCreateSymbol(Twine("__LLVM_StackMaps"))); + + // Serialize data. + DEBUG(dbgs() << "********** Stack Map Output **********\n"); + emitStackmapHeader(OS); + emitFunctionFrameRecords(OS); + emitConstantPoolEntries(OS); + emitCallsiteEntries(OS, TRI); + OS.AddBlankLine(); + // Clean up. CSInfos.clear(); + ConstPool.clear(); } diff --git a/contrib/llvm/lib/CodeGen/StackProtector.cpp b/contrib/llvm/lib/CodeGen/StackProtector.cpp index 9020449..accfe7b 100644 --- a/contrib/llvm/lib/CodeGen/StackProtector.cpp +++ b/contrib/llvm/lib/CodeGen/StackProtector.cpp @@ -14,14 +14,12 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "stack-protector" #include "llvm/CodeGen/StackProtector.h" -#include "llvm/CodeGen/Analysis.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -38,6 +36,8 @@ #include <cstdlib> using namespace llvm; +#define DEBUG_TYPE "stack-protector" + STATISTIC(NumFunProtected, "Number of functions protected"); STATISTIC(NumAddrTaken, "Number of local variables that have their address" " taken."); @@ -58,19 +58,43 @@ StackProtector::getSSPLayout(const AllocaInst *AI) const { return AI ? Layout.lookup(AI) : SSPLK_None; } +void StackProtector::adjustForColoring(const AllocaInst *From, + const AllocaInst *To) { + // When coloring replaces one alloca with another, transfer the SSPLayoutKind + // tag from the remapped to the target alloca. The remapped alloca should + // have a size smaller than or equal to the replacement alloca. + SSPLayoutMap::iterator I = Layout.find(From); + if (I != Layout.end()) { + SSPLayoutKind Kind = I->second; + Layout.erase(I); + + // Transfer the tag, but make sure that SSPLK_AddrOf does not overwrite + // SSPLK_SmallArray or SSPLK_LargeArray, and make sure that + // SSPLK_SmallArray does not overwrite SSPLK_LargeArray. + I = Layout.find(To); + if (I == Layout.end()) + Layout.insert(std::make_pair(To, Kind)); + else if (I->second != SSPLK_LargeArray && Kind != SSPLK_AddrOf) + I->second = Kind; + } +} + bool StackProtector::runOnFunction(Function &Fn) { F = &Fn; M = F->getParent(); - DT = getAnalysisIfAvailable<DominatorTree>(); + DominatorTreeWrapperPass *DTWP = + getAnalysisIfAvailable<DominatorTreeWrapperPass>(); + DT = DTWP ? &DTWP->getDomTree() : nullptr; TLI = TM->getTargetLowering(); - if (!RequiresStackProtector()) - return false; - Attribute Attr = Fn.getAttributes().getAttribute( AttributeSet::FunctionIndex, "stack-protector-buffer-size"); - if (Attr.isStringAttribute()) - Attr.getValueAsString().getAsInteger(10, SSPBufferSize); + if (Attr.isStringAttribute() && + Attr.getValueAsString().getAsInteger(10, SSPBufferSize)) + return false; // Invalid integer string + + if (!RequiresStackProtector()) + return false; ++NumFunProtected; return InsertStackProtectors(); @@ -127,9 +151,7 @@ bool StackProtector::ContainsProtectableArray(Type *Ty, bool &IsLarge, } bool StackProtector::HasAddressTaken(const Instruction *AI) { - for (Value::const_use_iterator UI = AI->use_begin(), UE = AI->use_end(); - UI != UE; ++UI) { - const User *U = *UI; + for (const User *U : AI->users()) { if (const StoreInst *SI = dyn_cast<StoreInst>(U)) { if (AI == SI->getValueOperand()) return true; @@ -261,8 +283,7 @@ static CallInst *FindPotentialTailCall(BasicBlock *BB, ReturnInst *RI, const unsigned MaxSearch = 4; bool NoInterposingChain = true; - for (BasicBlock::reverse_iterator I = llvm::next(BB->rbegin()), - E = BB->rend(); + for (BasicBlock::reverse_iterator I = std::next(BB->rbegin()), E = BB->rend(); I != E && SearchCounter < MaxSearch; ++I) { Instruction *Inst = &*I; @@ -299,7 +320,7 @@ static CallInst *FindPotentialTailCall(BasicBlock *BB, ReturnInst *RI, SearchCounter++; } - return 0; + return nullptr; } /// Insert code into the entry block that stores the __stack_chk_guard @@ -334,7 +355,7 @@ static bool CreatePrologue(Function *F, Module *M, ReturnInst *RI, } IRBuilder<> B(&F->getEntryBlock().front()); - AI = B.CreateAlloca(PtrTy, 0, "StackGuardSlot"); + AI = B.CreateAlloca(PtrTy, nullptr, "StackGuardSlot"); LoadInst *LI = B.CreateLoad(StackGuardVar, "StackGuard"); B.CreateCall2(Intrinsic::getDeclaration(M, Intrinsic::stackprotector), LI, AI); @@ -352,8 +373,8 @@ bool StackProtector::InsertStackProtectors() { bool HasPrologue = false; bool SupportsSelectionDAGSP = EnableSelectionDAGSP && !TM->Options.EnableFastISel; - AllocaInst *AI = 0; // Place on stack that stores the stack guard. - Value *StackGuardVar = 0; // The stack guard variable. + AllocaInst *AI = nullptr; // Place on stack that stores the stack guard. + Value *StackGuardVar = nullptr; // The stack guard variable. for (Function::iterator I = F->begin(), E = F->end(); I != E;) { BasicBlock *BB = I++; @@ -370,14 +391,14 @@ bool StackProtector::InsertStackProtectors() { if (SupportsSelectionDAGSP) { // Since we have a potential tail call, insert the special stack check // intrinsic. - Instruction *InsertionPt = 0; + Instruction *InsertionPt = nullptr; if (CallInst *CI = FindPotentialTailCall(BB, RI, TLI)) { InsertionPt = CI; } else { InsertionPt = RI; // At this point we know that BB has a return statement so it *DOES* // have a terminator. - assert(InsertionPt != 0 && "BB must have a terminator instruction at " + assert(InsertionPt != nullptr && "BB must have a terminator instruction at " "this point."); } diff --git a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp index 9f44df8..791168f5 100644 --- a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp +++ b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "stackslotcoloring" #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallVector.h" @@ -33,6 +32,8 @@ #include <vector> using namespace llvm; +#define DEBUG_TYPE "stackslotcoloring" + static cl::opt<bool> DisableSharing("no-stack-slot-sharing", cl::init(false), cl::Hidden, @@ -87,7 +88,7 @@ namespace { initializeStackSlotColoringPass(*PassRegistry::getPassRegistry()); } - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); AU.addRequired<SlotIndexes>(); AU.addPreserved<SlotIndexes>(); @@ -98,7 +99,7 @@ namespace { MachineFunctionPass::getAnalysisUsage(AU); } - virtual bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; private: void InitializeSlots(); @@ -142,7 +143,6 @@ void StackSlotColoring::ScanForSpillSlotRefs(MachineFunction &MF) { for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end(); MBBI != E; ++MBBI) { MachineBasicBlock *MBB = &*MBBI; - BlockFrequency Freq = MBFI->getBlockFreq(MBB); for (MachineBasicBlock::iterator MII = MBB->begin(), EE = MBB->end(); MII != EE; ++MII) { MachineInstr *MI = &*MII; @@ -157,18 +157,17 @@ void StackSlotColoring::ScanForSpillSlotRefs(MachineFunction &MF) { continue; LiveInterval &li = LS->getInterval(FI); if (!MI->isDebugValue()) - li.weight += LiveIntervals::getSpillWeight(false, true, Freq); + li.weight += LiveIntervals::getSpillWeight(false, true, MBFI, MI); } for (MachineInstr::mmo_iterator MMOI = MI->memoperands_begin(), EE = MI->memoperands_end(); MMOI != EE; ++MMOI) { MachineMemOperand *MMO = *MMOI; - if (const Value *V = MMO->getValue()) { - if (const FixedStackPseudoSourceValue *FSV = - dyn_cast<FixedStackPseudoSourceValue>(V)) { - int FI = FSV->getFrameIndex(); - if (FI >= 0) - SSRefs[FI].push_back(MMO); - } + if (const FixedStackPseudoSourceValue *FSV = + dyn_cast_or_null<FixedStackPseudoSourceValue>( + MMO->getPseudoValue())) { + int FI = FSV->getFrameIndex(); + if (FI >= 0) + SSRefs[FI].push_back(MMO); } } } @@ -311,7 +310,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) { if (NewFI == -1 || (NewFI == (int)SS)) continue; - const Value *NewSV = PseudoSourceValue::getFixedStack(NewFI); + const PseudoSourceValue *NewSV = PseudoSourceValue::getFixedStack(NewFI); SmallVectorImpl<MachineMemOperand *> &RefMMOs = SSRefs[SS]; for (unsigned i = 0, e = RefMMOs.size(); i != e; ++i) RefMMOs[i]->setValue(NewSV); @@ -386,8 +385,8 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) { toErase.push_back(I); continue; } - - MachineBasicBlock::iterator NextMI = llvm::next(I); + + MachineBasicBlock::iterator NextMI = std::next(I); if (NextMI == MBB->end()) continue; unsigned LoadReg = 0; @@ -399,7 +398,7 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) { ++NumDead; changed = true; - if (NextMI->findRegisterUseOperandIdx(LoadReg, true, 0) != -1) { + if (NextMI->findRegisterUseOperandIdx(LoadReg, true, nullptr) != -1) { ++NumDead; toErase.push_back(I); } diff --git a/contrib/llvm/lib/CodeGen/TailDuplication.cpp b/contrib/llvm/lib/CodeGen/TailDuplication.cpp index ff0181e..723a629 100644 --- a/contrib/llvm/lib/CodeGen/TailDuplication.cpp +++ b/contrib/llvm/lib/CodeGen/TailDuplication.cpp @@ -12,13 +12,12 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "tailduplication" #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" @@ -34,6 +33,8 @@ #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; +#define DEBUG_TYPE "tailduplication" + STATISTIC(NumTails , "Number of tails duplicated"); STATISTIC(NumTailDups , "Number of tail duplicated blocks"); STATISTIC(NumInstrDups , "Additional instructions due to tail duplication"); @@ -61,9 +62,10 @@ namespace { class TailDuplicatePass : public MachineFunctionPass { const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; + const MachineBranchProbabilityInfo *MBPI; MachineModuleInfo *MMI; MachineRegisterInfo *MRI; - OwningPtr<RegScavenger> RS; + std::unique_ptr<RegScavenger> RS; bool PreRegAlloc; // SSAUpdateVRs - A list of virtual registers for which to update SSA form. @@ -78,7 +80,9 @@ namespace { explicit TailDuplicatePass() : MachineFunctionPass(ID), PreRegAlloc(false) {} - virtual bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override; private: void AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg, @@ -128,10 +132,15 @@ INITIALIZE_PASS(TailDuplicatePass, "tailduplication", "Tail Duplication", false, false) bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) { + if (skipOptnoneFunction(*MF.getFunction())) + return false; + TII = MF.getTarget().getInstrInfo(); TRI = MF.getTarget().getRegisterInfo(); MRI = &MF.getRegInfo(); MMI = getAnalysisIfAvailable<MachineModuleInfo>(); + MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); + PreRegAlloc = MRI->isSSA(); RS.reset(); if (MRI->tracksLiveness() && TRI->trackLivenessAfterRegAlloc(MF)) @@ -144,6 +153,11 @@ bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) { return MadeChange; } +void TailDuplicatePass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<MachineBranchProbabilityInfo>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) { for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ++I) { MachineBasicBlock *MBB = I; @@ -168,7 +182,7 @@ static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) { dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI; dbgs() << " missing input from predecessor BB#" << PredBB->getNumber() << '\n'; - llvm_unreachable(0); + llvm_unreachable(nullptr); } } @@ -179,12 +193,12 @@ static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) { << ": " << *MI; dbgs() << " extra input from predecessor BB#" << PHIBB->getNumber() << '\n'; - llvm_unreachable(0); + llvm_unreachable(nullptr); } if (PHIBB->getNumber() < 0) { dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI; dbgs() << " non-existing BB#" << PHIBB->getNumber() << '\n'; - llvm_unreachable(0); + llvm_unreachable(nullptr); } } ++MI; @@ -234,7 +248,7 @@ TailDuplicatePass::TailDuplicateAndUpdate(MachineBasicBlock *MBB, // If the original definition is still around, add it as an available // value. MachineInstr *DefMI = MRI->getVRegDef(VReg); - MachineBasicBlock *DefBB = 0; + MachineBasicBlock *DefBB = nullptr; if (DefMI) { DefBB = DefMI->getParent(); SSAUpdate.AddAvailableValue(DefBB, VReg); @@ -252,8 +266,8 @@ TailDuplicatePass::TailDuplicateAndUpdate(MachineBasicBlock *MBB, // Rewrite uses that are outside of the original def's block. MachineRegisterInfo::use_iterator UI = MRI->use_begin(VReg); while (UI != MRI->use_end()) { - MachineOperand &UseMO = UI.getOperand(); - MachineInstr *UseMI = &*UI; + MachineOperand &UseMO = *UI; + MachineInstr *UseMI = UseMO.getParent(); ++UI; if (UseMI->isDebugValue()) { // SSAUpdate can replace the use with an undef. That creates @@ -328,12 +342,10 @@ bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) { static bool isDefLiveOut(unsigned Reg, MachineBasicBlock *BB, const MachineRegisterInfo *MRI) { - for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg), - UE = MRI->use_end(); UI != UE; ++UI) { - MachineInstr *UseMI = &*UI; - if (UseMI->isDebugValue()) + for (MachineInstr &UseMI : MRI->use_instructions(Reg)) { + if (UseMI.isDebugValue()) continue; - if (UseMI->getParent() != BB) + if (UseMI.getParent() != BB) return true; } return false; @@ -352,9 +364,7 @@ static unsigned getPHISrcRegOpIdx(MachineInstr *MI, MachineBasicBlock *SrcBB) { // block (which is why we need to copy the information). static void getRegsUsedByPHIs(const MachineBasicBlock &BB, DenseSet<unsigned> *UsedByPhi) { - for(MachineBasicBlock::const_iterator I = BB.begin(), E = BB.end(); - I != E; ++I) { - const MachineInstr &MI = *I; + for (const auto &MI : BB) { if (!MI.isPHI()) break; for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) { @@ -645,7 +655,7 @@ TailDuplicatePass::canCompletelyDuplicateBB(MachineBasicBlock &BB) { if (PredBB->succ_size() > 1) return false; - MachineBasicBlock *PredTBB = NULL, *PredFBB = NULL; + MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; SmallVector<MachineOperand, 4> PredCond; if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)) return false; @@ -676,7 +686,7 @@ TailDuplicatePass::duplicateSimpleBB(MachineBasicBlock *TailBB, if (bothUsedInPHI(*PredBB, Succs)) continue; - MachineBasicBlock *PredTBB = NULL, *PredFBB = NULL; + MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; SmallVector<MachineOperand, 4> PredCond; if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)) continue; @@ -686,7 +696,7 @@ TailDuplicatePass::duplicateSimpleBB(MachineBasicBlock *TailBB, << "From simple Succ: " << *TailBB); MachineBasicBlock *NewTarget = *TailBB->succ_begin(); - MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(PredBB)); + MachineBasicBlock *NextBB = std::next(MachineFunction::iterator(PredBB)); // Make PredFBB explicit. if (PredCond.empty()) @@ -707,25 +717,26 @@ TailDuplicatePass::duplicateSimpleBB(MachineBasicBlock *TailBB, // Make the branch unconditional if possible if (PredTBB == PredFBB) { PredCond.clear(); - PredFBB = NULL; + PredFBB = nullptr; } // Avoid adding fall through branches. if (PredFBB == NextBB) - PredFBB = NULL; - if (PredTBB == NextBB && PredFBB == NULL) - PredTBB = NULL; + PredFBB = nullptr; + if (PredTBB == NextBB && PredFBB == nullptr) + PredTBB = nullptr; TII->RemoveBranch(*PredBB); if (PredTBB) TII->InsertBranch(*PredBB, PredTBB, PredFBB, PredCond, DebugLoc()); + uint32_t Weight = MBPI->getEdgeWeight(PredBB, TailBB); PredBB->removeSuccessor(TailBB); unsigned NumSuccessors = PredBB->succ_size(); assert(NumSuccessors <= 1); if (NumSuccessors == 0 || *PredBB->succ_begin() != NewTarget) - PredBB->addSuccessor(NewTarget); + PredBB->addSuccessor(NewTarget, Weight); TDBBs.push_back(PredBB); } @@ -786,7 +797,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, // Update PredBB livein. RS->enterBasicBlock(PredBB); if (!PredBB->empty()) - RS->forward(prior(PredBB->end())); + RS->forward(std::prev(PredBB->end())); BitVector RegsLiveAtExit(TRI->getNumRegs()); RS->getRegsUsed(RegsLiveAtExit, false); for (MachineBasicBlock::livein_iterator I = TailBB->livein_begin(), @@ -836,7 +847,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, "TailDuplicate called on block with multiple successors!"); for (MachineBasicBlock::succ_iterator I = TailBB->succ_begin(), E = TailBB->succ_end(); I != E; ++I) - PredBB->addSuccessor(*I); + PredBB->addSuccessor(*I, MBPI->getEdgeWeight(TailBB, I)); Changed = true; ++NumTailDups; @@ -845,8 +856,8 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, // If TailBB was duplicated into all its predecessors except for the prior // block, which falls through unconditionally, move the contents of this // block into the prior block. - MachineBasicBlock *PrevBB = prior(MachineFunction::iterator(TailBB)); - MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0; + MachineBasicBlock *PrevBB = std::prev(MachineFunction::iterator(TailBB)); + MachineBasicBlock *PriorTBB = nullptr, *PriorFBB = nullptr; SmallVector<MachineOperand, 4> PriorCond; // This has to check PrevBB->succ_size() because EH edges are ignored by // AnalyzeBranch. diff --git a/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp b/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp index bf4fd65..83966bd 100644 --- a/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -13,10 +13,12 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/ScoreboardHazardRecognizer.h" +#include "llvm/CodeGen/StackMaps.h" #include "llvm/IR/DataLayout.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCInstrItineraries.h" @@ -41,7 +43,7 @@ TargetInstrInfo::getRegClass(const MCInstrDesc &MCID, unsigned OpNum, const TargetRegisterInfo *TRI, const MachineFunction &MF) const { if (OpNum >= MCID.getNumOperands()) - return 0; + return nullptr; short RegClass = MCID.OpInfo[OpNum].RegClass; if (MCID.OpInfo[OpNum].isLookupPtrRegClass()) @@ -49,7 +51,7 @@ TargetInstrInfo::getRegClass(const MCInstrDesc &MCID, unsigned OpNum, // Instructions like INSERT_SUBREG do not have fixed register classes. if (RegClass < 0) - return 0; + return nullptr; // Otherwise just look it up normally. return TRI->getRegClass(RegClass); @@ -109,7 +111,7 @@ TargetInstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail, // If MBB isn't immediately before MBB, insert a branch to it. if (++MachineFunction::iterator(MBB) != MachineFunction::iterator(NewDest)) - InsertBranch(*MBB, NewDest, 0, SmallVector<MachineOperand, 0>(), + InsertBranch(*MBB, NewDest, nullptr, SmallVector<MachineOperand, 0>(), Tail->getDebugLoc()); MBB->addSuccessor(NewDest); } @@ -122,13 +124,11 @@ MachineInstr *TargetInstrInfo::commuteInstruction(MachineInstr *MI, bool HasDef = MCID.getNumDefs(); if (HasDef && !MI->getOperand(0).isReg()) // No idea how to commute this instruction. Target should implement its own. - return 0; + return nullptr; unsigned Idx1, Idx2; if (!findCommutedOpIndices(MI, Idx1, Idx2)) { - std::string msg; - raw_string_ostream Msg(msg); - Msg << "Don't know how to commute: " << *MI; - report_fatal_error(Msg.str()); + assert(MI->isCommutable() && "Precondition violation: MI must be commutable."); + return nullptr; } assert(MI->getOperand(Idx1).isReg() && MI->getOperand(Idx2).isReg() && @@ -248,13 +248,15 @@ bool TargetInstrInfo::hasLoadFromStackSlot(const MachineInstr *MI, oe = MI->memoperands_end(); o != oe; ++o) { - if ((*o)->isLoad() && (*o)->getValue()) + if ((*o)->isLoad()) { if (const FixedStackPseudoSourceValue *Value = - dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) { + dyn_cast_or_null<FixedStackPseudoSourceValue>( + (*o)->getPseudoValue())) { FrameIndex = Value->getFrameIndex(); MMO = *o; return true; } + } } return false; } @@ -266,13 +268,15 @@ bool TargetInstrInfo::hasStoreToStackSlot(const MachineInstr *MI, oe = MI->memoperands_end(); o != oe; ++o) { - if ((*o)->isStore() && (*o)->getValue()) + if ((*o)->isStore()) { if (const FixedStackPseudoSourceValue *Value = - dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) { + dyn_cast_or_null<FixedStackPseudoSourceValue>( + (*o)->getPseudoValue())) { FrameIndex = Value->getFrameIndex(); MMO = *o; return true; } + } } return false; } @@ -338,14 +342,14 @@ static const TargetRegisterClass *canFoldCopy(const MachineInstr *MI, unsigned FoldIdx) { assert(MI->isCopy() && "MI must be a COPY instruction"); if (MI->getNumOperands() != 2) - return 0; + return nullptr; assert(FoldIdx<2 && "FoldIdx refers no nonexistent operand"); const MachineOperand &FoldOp = MI->getOperand(FoldIdx); const MachineOperand &LiveOp = MI->getOperand(1-FoldIdx); if (FoldOp.getSubReg() || LiveOp.getSubReg()) - return 0; + return nullptr; unsigned FoldReg = FoldOp.getReg(); unsigned LiveReg = LiveOp.getReg(); @@ -357,13 +361,13 @@ static const TargetRegisterClass *canFoldCopy(const MachineInstr *MI, const TargetRegisterClass *RC = MRI.getRegClass(FoldReg); if (TargetRegisterInfo::isPhysicalRegister(LiveOp.getReg())) - return RC->contains(LiveOp.getReg()) ? RC : 0; + return RC->contains(LiveOp.getReg()) ? RC : nullptr; if (RC->hasSubClassEq(MRI.getRegClass(LiveReg))) return RC; // FIXME: Allow folding when register classes are memory compatible. - return 0; + return nullptr; } bool TargetInstrInfo:: @@ -372,6 +376,65 @@ canFoldMemoryOperand(const MachineInstr *MI, return MI->isCopy() && Ops.size() == 1 && canFoldCopy(MI, Ops[0]); } +static MachineInstr* foldPatchpoint(MachineFunction &MF, + MachineInstr *MI, + const SmallVectorImpl<unsigned> &Ops, + int FrameIndex, + const TargetInstrInfo &TII) { + unsigned StartIdx = 0; + switch (MI->getOpcode()) { + case TargetOpcode::STACKMAP: + StartIdx = 2; // Skip ID, nShadowBytes. + break; + case TargetOpcode::PATCHPOINT: { + // For PatchPoint, the call args are not foldable. + PatchPointOpers opers(MI); + StartIdx = opers.getVarIdx(); + break; + } + default: + llvm_unreachable("unexpected stackmap opcode"); + } + + // Return false if any operands requested for folding are not foldable (not + // part of the stackmap's live values). + for (SmallVectorImpl<unsigned>::const_iterator I = Ops.begin(), E = Ops.end(); + I != E; ++I) { + if (*I < StartIdx) + return nullptr; + } + + MachineInstr *NewMI = + MF.CreateMachineInstr(TII.get(MI->getOpcode()), MI->getDebugLoc(), true); + MachineInstrBuilder MIB(MF, NewMI); + + // No need to fold return, the meta data, and function arguments + for (unsigned i = 0; i < StartIdx; ++i) + MIB.addOperand(MI->getOperand(i)); + + for (unsigned i = StartIdx; i < MI->getNumOperands(); ++i) { + MachineOperand &MO = MI->getOperand(i); + if (std::find(Ops.begin(), Ops.end(), i) != Ops.end()) { + unsigned SpillSize; + unsigned SpillOffset; + // Compute the spill slot size and offset. + const TargetRegisterClass *RC = + MF.getRegInfo().getRegClass(MO.getReg()); + bool Valid = TII.getStackSlotRange(RC, MO.getSubReg(), SpillSize, + SpillOffset, &MF.getTarget()); + if (!Valid) + report_fatal_error("cannot spill patchpoint subregister operand"); + MIB.addImm(StackMaps::IndirectMemRefOp); + MIB.addImm(SpillSize); + MIB.addFrameIndex(FrameIndex); + MIB.addImm(SpillOffset); + } + else + MIB.addOperand(MO); + } + return NewMI; +} + /// foldMemoryOperand - Attempt to fold a load or store of the specified stack /// slot into the specified machine instruction for the specified operand(s). /// If this is possible, a new instruction is returned with the specified @@ -393,8 +456,18 @@ TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, assert(MBB && "foldMemoryOperand needs an inserted instruction"); MachineFunction &MF = *MBB->getParent(); - // Ask the target to do the actual folding. - if (MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, FI)) { + MachineInstr *NewMI = nullptr; + + if (MI->getOpcode() == TargetOpcode::STACKMAP || + MI->getOpcode() == TargetOpcode::PATCHPOINT) { + // Fold stackmap/patchpoint. + NewMI = foldPatchpoint(MF, MI, Ops, FI, *this); + } else { + // Ask the target to do the actual folding. + NewMI =foldMemoryOperandImpl(MF, MI, Ops, FI); + } + + if (NewMI) { NewMI->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); // Add a memory operand, foldMemoryOperandImpl doesn't do that. assert((!(Flags & MachineMemOperand::MOStore) || @@ -417,11 +490,11 @@ TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, // Straight COPY may fold as load/store. if (!MI->isCopy() || Ops.size() != 1) - return 0; + return nullptr; const TargetRegisterClass *RC = canFoldCopy(MI, Ops[0]); if (!RC) - return 0; + return nullptr; const MachineOperand &MO = MI->getOperand(1-Ops[0]); MachineBasicBlock::iterator Pos = MI; @@ -450,8 +523,20 @@ TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, MachineFunction &MF = *MBB.getParent(); // Ask the target to do the actual folding. - MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, LoadMI); - if (!NewMI) return 0; + MachineInstr *NewMI = nullptr; + int FrameIndex = 0; + + if ((MI->getOpcode() == TargetOpcode::STACKMAP || + MI->getOpcode() == TargetOpcode::PATCHPOINT) && + isLoadFromStackSlot(LoadMI, FrameIndex)) { + // Fold stackmap/patchpoint. + NewMI = foldPatchpoint(MF, MI, Ops, FrameIndex, *this); + } else { + // Ask the target to do the actual folding. + NewMI = foldMemoryOperandImpl(MF, MI, Ops, LoadMI); + } + + if (!NewMI) return nullptr; NewMI = MBB.insert(MI, NewMI); @@ -562,7 +647,7 @@ bool TargetInstrInfo::isSchedulingBoundary(const MachineInstr *MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const { // Terminators and labels can't be scheduled around. - if (MI->isTerminator() || MI->isLabel()) + if (MI->isTerminator() || MI->isPosition()) return true; // Don't attempt to schedule around any instruction that defines @@ -586,7 +671,7 @@ bool TargetInstrInfo::usePreRAHazardRecognizer() const { // Default implementation of CreateTargetRAHazardRecognizer. ScheduleHazardRecognizer *TargetInstrInfo:: -CreateTargetHazardRecognizer(const TargetMachine *TM, +CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const { // Dummy hazard recognizer allows all instructions to issue. return new ScheduleHazardRecognizer(); diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp index 30305af..e80ef71 100644 --- a/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -18,11 +18,15 @@ #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/StackMaps.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Mangler.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" @@ -35,7 +39,7 @@ using namespace llvm; /// InitLibcallNames - Set default libcall names. /// -static void InitLibcallNames(const char **Names, const TargetMachine &TM) { +static void InitLibcallNames(const char **Names, const Triple &TT) { Names[RTLIB::SHL_I16] = "__ashlhi3"; Names[RTLIB::SHL_I32] = "__ashlsi3"; Names[RTLIB::SHL_I64] = "__ashldi3"; @@ -78,16 +82,16 @@ static void InitLibcallNames(const char **Names, const TargetMachine &TM) { Names[RTLIB::UREM_I128] = "__umodti3"; // These are generally not available. - Names[RTLIB::SDIVREM_I8] = 0; - Names[RTLIB::SDIVREM_I16] = 0; - Names[RTLIB::SDIVREM_I32] = 0; - Names[RTLIB::SDIVREM_I64] = 0; - Names[RTLIB::SDIVREM_I128] = 0; - Names[RTLIB::UDIVREM_I8] = 0; - Names[RTLIB::UDIVREM_I16] = 0; - Names[RTLIB::UDIVREM_I32] = 0; - Names[RTLIB::UDIVREM_I64] = 0; - Names[RTLIB::UDIVREM_I128] = 0; + Names[RTLIB::SDIVREM_I8] = nullptr; + Names[RTLIB::SDIVREM_I16] = nullptr; + Names[RTLIB::SDIVREM_I32] = nullptr; + Names[RTLIB::SDIVREM_I64] = nullptr; + Names[RTLIB::SDIVREM_I128] = nullptr; + Names[RTLIB::UDIVREM_I8] = nullptr; + Names[RTLIB::UDIVREM_I16] = nullptr; + Names[RTLIB::UDIVREM_I32] = nullptr; + Names[RTLIB::UDIVREM_I64] = nullptr; + Names[RTLIB::UDIVREM_I128] = nullptr; Names[RTLIB::NEG_I32] = "__negsi2"; Names[RTLIB::NEG_I64] = "__negdi2"; @@ -201,6 +205,11 @@ static void InitLibcallNames(const char **Names, const TargetMachine &TM) { Names[RTLIB::FLOOR_F80] = "floorl"; Names[RTLIB::FLOOR_F128] = "floorl"; Names[RTLIB::FLOOR_PPCF128] = "floorl"; + Names[RTLIB::ROUND_F32] = "roundf"; + Names[RTLIB::ROUND_F64] = "round"; + Names[RTLIB::ROUND_F80] = "roundl"; + Names[RTLIB::ROUND_F128] = "roundl"; + Names[RTLIB::ROUND_PPCF128] = "roundl"; Names[RTLIB::COPYSIGN_F32] = "copysignf"; Names[RTLIB::COPYSIGN_F64] = "copysign"; Names[RTLIB::COPYSIGN_F80] = "copysignl"; @@ -211,6 +220,10 @@ static void InitLibcallNames(const char **Names, const TargetMachine &TM) { Names[RTLIB::FPEXT_F32_F64] = "__extendsfdf2"; Names[RTLIB::FPEXT_F16_F32] = "__gnu_h2f_ieee"; Names[RTLIB::FPROUND_F32_F16] = "__gnu_f2h_ieee"; + Names[RTLIB::FPROUND_F64_F16] = "__truncdfhf2"; + Names[RTLIB::FPROUND_F80_F16] = "__truncxfhf2"; + Names[RTLIB::FPROUND_F128_F16] = "__trunctfhf2"; + Names[RTLIB::FPROUND_PPCF128_F16] = "__trunctfhf2"; Names[RTLIB::FPROUND_F64_F32] = "__truncdfsf2"; Names[RTLIB::FPROUND_F80_F32] = "__truncxfsf2"; Names[RTLIB::FPROUND_F128_F32] = "__trunctfsf2"; @@ -375,7 +388,7 @@ static void InitLibcallNames(const char **Names, const TargetMachine &TM) { Names[RTLIB::SYNC_FETCH_AND_UMIN_8] = "__sync_fetch_and_umin_8"; Names[RTLIB::SYNC_FETCH_AND_UMIN_16] = "__sync_fetch_and_umin_16"; - if (Triple(TM.getTargetTriple()).getEnvironment() == Triple::GNU) { + if (TT.getEnvironment() == Triple::GNU) { Names[RTLIB::SINCOS_F32] = "sincosf"; Names[RTLIB::SINCOS_F64] = "sincos"; Names[RTLIB::SINCOS_F80] = "sincosl"; @@ -383,18 +396,18 @@ static void InitLibcallNames(const char **Names, const TargetMachine &TM) { Names[RTLIB::SINCOS_PPCF128] = "sincosl"; } else { // These are generally not available. - Names[RTLIB::SINCOS_F32] = 0; - Names[RTLIB::SINCOS_F64] = 0; - Names[RTLIB::SINCOS_F80] = 0; - Names[RTLIB::SINCOS_F128] = 0; - Names[RTLIB::SINCOS_PPCF128] = 0; + Names[RTLIB::SINCOS_F32] = nullptr; + Names[RTLIB::SINCOS_F64] = nullptr; + Names[RTLIB::SINCOS_F80] = nullptr; + Names[RTLIB::SINCOS_F128] = nullptr; + Names[RTLIB::SINCOS_PPCF128] = nullptr; } - if (Triple(TM.getTargetTriple()).getOS() != Triple::OpenBSD) { + if (TT.getOS() != Triple::OpenBSD) { Names[RTLIB::STACKPROTECTOR_CHECK_FAIL] = "__stack_chk_fail"; } else { // These are generally not available. - Names[RTLIB::STACKPROTECTOR_CHECK_FAIL] = 0; + Names[RTLIB::STACKPROTECTOR_CHECK_FAIL] = nullptr; } } @@ -409,7 +422,10 @@ static void InitLibcallCallingConvs(CallingConv::ID *CCs) { /// getFPEXT - Return the FPEXT_*_* value for the given types, or /// UNKNOWN_LIBCALL if there is none. RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) { - if (OpVT == MVT::f32) { + if (OpVT == MVT::f16) { + if (RetVT == MVT::f32) + return FPEXT_F16_F32; + } else if (OpVT == MVT::f32) { if (RetVT == MVT::f64) return FPEXT_F32_F64; if (RetVT == MVT::f128) @@ -425,7 +441,18 @@ RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) { /// getFPROUND - Return the FPROUND_*_* value for the given types, or /// UNKNOWN_LIBCALL if there is none. RTLIB::Libcall RTLIB::getFPROUND(EVT OpVT, EVT RetVT) { - if (RetVT == MVT::f32) { + if (RetVT == MVT::f16) { + if (OpVT == MVT::f32) + return FPROUND_F32_F16; + if (OpVT == MVT::f64) + return FPROUND_F64_F16; + if (OpVT == MVT::f80) + return FPROUND_F80_F16; + if (OpVT == MVT::f128) + return FPROUND_F128_F16; + if (OpVT == MVT::ppcf128) + return FPROUND_PPCF128_F16; + } else if (RetVT == MVT::f32) { if (OpVT == MVT::f64) return FPROUND_F64_F32; if (OpVT == MVT::f80) @@ -659,25 +686,29 @@ static void InitCmpLibcallCCs(ISD::CondCode *CCs) { /// NOTE: The constructor takes ownership of TLOF. TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm, const TargetLoweringObjectFile *tlof) - : TM(tm), TD(TM.getDataLayout()), TLOF(*tlof) { + : TM(tm), DL(TM.getDataLayout()), TLOF(*tlof) { initActions(); // Perform these initializations only once. - IsLittleEndian = TD->isLittleEndian(); + IsLittleEndian = DL->isLittleEndian(); MaxStoresPerMemset = MaxStoresPerMemcpy = MaxStoresPerMemmove = 8; MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize = MaxStoresPerMemmoveOptSize = 4; UseUnderscoreSetJmp = false; UseUnderscoreLongJmp = false; SelectIsExpensive = false; + HasMultipleConditionRegisters = false; + HasExtractBitsInsn = false; IntDivIsCheap = false; Pow2DivIsCheap = false; JumpIsExpensive = false; PredictableSelectIsExpensive = false; + MaskAndBranchFoldingIsLegal = false; StackPointerRegisterToSaveRestore = 0; ExceptionPointerRegister = 0; ExceptionSelectorRegister = 0; BooleanContents = UndefinedBooleanContent; + BooleanFloatContents = UndefinedBooleanContent; BooleanVectorContents = UndefinedBooleanContent; SchedPreferenceInfo = Sched::ILP; JumpBufSize = 0; @@ -690,7 +721,7 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm, SupportJumpTables = true; MinimumJumpTableEntries = 4; - InitLibcallNames(LibcallRoutineNames, TM); + InitLibcallNames(LibcallRoutineNames, Triple(TM.getTargetTriple())); InitCmpLibcallCCs(CmpLibcallCCs); InitLibcallCallingConvs(LibcallCallingConvs); } @@ -718,6 +749,10 @@ void TargetLoweringBase::initActions() { setIndexedStoreAction(IM, (MVT::SimpleValueType)VT, Expand); } + // Most backends expect to see the node which just returns the value loaded. + setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, + (MVT::SimpleValueType)VT, Expand); + // These operations default to expand. setOperationAction(ISD::FGETSIGN, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::CONCAT_VECTORS, (MVT::SimpleValueType)VT, Expand); @@ -727,8 +762,15 @@ void TargetLoweringBase::initActions() { // These operations default to expand for vector types. if (VT >= MVT::FIRST_VECTOR_VALUETYPE && - VT <= MVT::LAST_VECTOR_VALUETYPE) + VT <= MVT::LAST_VECTOR_VALUETYPE) { setOperationAction(ISD::FCOPYSIGN, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, + (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, + (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, + (MVT::SimpleValueType)VT, Expand); + } } // Most targets ignore the @llvm.prefetch intrinsic. @@ -754,6 +796,7 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::FCEIL, MVT::f16, Expand); setOperationAction(ISD::FRINT, MVT::f16, Expand); setOperationAction(ISD::FTRUNC, MVT::f16, Expand); + setOperationAction(ISD::FROUND, MVT::f16, Expand); setOperationAction(ISD::FLOG , MVT::f32, Expand); setOperationAction(ISD::FLOG2, MVT::f32, Expand); setOperationAction(ISD::FLOG10, MVT::f32, Expand); @@ -764,6 +807,7 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::FCEIL, MVT::f32, Expand); setOperationAction(ISD::FRINT, MVT::f32, Expand); setOperationAction(ISD::FTRUNC, MVT::f32, Expand); + setOperationAction(ISD::FROUND, MVT::f32, Expand); setOperationAction(ISD::FLOG , MVT::f64, Expand); setOperationAction(ISD::FLOG2, MVT::f64, Expand); setOperationAction(ISD::FLOG10, MVT::f64, Expand); @@ -774,6 +818,7 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::FCEIL, MVT::f64, Expand); setOperationAction(ISD::FRINT, MVT::f64, Expand); setOperationAction(ISD::FTRUNC, MVT::f64, Expand); + setOperationAction(ISD::FROUND, MVT::f64, Expand); setOperationAction(ISD::FLOG , MVT::f128, Expand); setOperationAction(ISD::FLOG2, MVT::f128, Expand); setOperationAction(ISD::FLOG10, MVT::f128, Expand); @@ -784,6 +829,7 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::FCEIL, MVT::f128, Expand); setOperationAction(ISD::FRINT, MVT::f128, Expand); setOperationAction(ISD::FTRUNC, MVT::f128, Expand); + setOperationAction(ISD::FROUND, MVT::f128, Expand); // Default ISD::TRAP to expand (which turns it into abort). setOperationAction(ISD::TRAP, MVT::Other, Expand); @@ -799,7 +845,7 @@ MVT TargetLoweringBase::getPointerTy(uint32_t AS) const { } unsigned TargetLoweringBase::getPointerSizeInBits(uint32_t AS) const { - return TD->getPointerSizeInBits(AS); + return DL->getPointerSizeInBits(AS); } unsigned TargetLoweringBase::getPointerTypeSizeInBits(Type *Ty) const { @@ -808,7 +854,7 @@ unsigned TargetLoweringBase::getPointerTypeSizeInBits(Type *Ty) const { } MVT TargetLoweringBase::getScalarShiftAmountTy(EVT LHSTy) const { - return MVT::getIntegerVT(8*TD->getPointerSize(0)); + return MVT::getIntegerVT(8*DL->getPointerSize(0)); } EVT TargetLoweringBase::getShiftAmountTy(EVT LHSTy) const { @@ -894,6 +940,58 @@ bool TargetLoweringBase::isLegalRC(const TargetRegisterClass *RC) const { return false; } +/// Replace/modify any TargetFrameIndex operands with a targte-dependent +/// sequence of memory operands that is recognized by PrologEpilogInserter. +MachineBasicBlock* +TargetLoweringBase::emitPatchPoint(MachineInstr *MI, + MachineBasicBlock *MBB) const { + MachineFunction &MF = *MI->getParent()->getParent(); + + // MI changes inside this loop as we grow operands. + for(unsigned OperIdx = 0; OperIdx != MI->getNumOperands(); ++OperIdx) { + MachineOperand &MO = MI->getOperand(OperIdx); + if (!MO.isFI()) + continue; + + // foldMemoryOperand builds a new MI after replacing a single FI operand + // with the canonical set of five x86 addressing-mode operands. + int FI = MO.getIndex(); + MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), MI->getDesc()); + + // Copy operands before the frame-index. + for (unsigned i = 0; i < OperIdx; ++i) + MIB.addOperand(MI->getOperand(i)); + // Add frame index operands: direct-mem-ref tag, #FI, offset. + MIB.addImm(StackMaps::DirectMemRefOp); + MIB.addOperand(MI->getOperand(OperIdx)); + MIB.addImm(0); + // Copy the operands after the frame index. + for (unsigned i = OperIdx + 1; i != MI->getNumOperands(); ++i) + MIB.addOperand(MI->getOperand(i)); + + // Inherit previous memory operands. + MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + assert(MIB->mayLoad() && "Folded a stackmap use to a non-load!"); + + // Add a new memory operand for this FI. + const MachineFrameInfo &MFI = *MF.getFrameInfo(); + assert(MFI.getObjectOffset(FI) != -1); + MachineMemOperand *MMO = + MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), + MachineMemOperand::MOLoad, + TM.getDataLayout()->getPointerSize(), + MFI.getObjectAlignment(FI)); + MIB->addMemOperand(MF, MMO); + + // Replace the instruction and update the operand index. + MBB->insert(MachineBasicBlock::iterator(MI), MIB); + OperIdx += (MIB->getNumOperands() - MI->getNumOperands()) - 1; + MI->eraseFromParent(); + MI = MIB; + } + return MBB; +} + /// findRepresentativeClass - Return the largest legal super-reg register class /// of the register class for the specified type and its associated "cost". std::pair<const TargetRegisterClass*, uint8_t> @@ -938,7 +1036,7 @@ void TargetLoweringBase::computeRegisterProperties() { // Find the largest integer register class. unsigned LargestIntReg = MVT::LAST_INTEGER_VALUETYPE; - for (; RegClassForVT[LargestIntReg] == 0; --LargestIntReg) + for (; RegClassForVT[LargestIntReg] == nullptr; --LargestIntReg) assert(LargestIntReg != MVT::i1 && "No integer registers defined!"); // Every integer value type larger than this largest register takes twice as @@ -1009,27 +1107,35 @@ void TargetLoweringBase::computeRegisterProperties() { } } + if (!isTypeLegal(MVT::f16)) { + NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::i16]; + RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::i16]; + TransformToType[MVT::f16] = MVT::i16; + ValueTypeActions.setTypeAction(MVT::f16, TypeSoftenFloat); + } + // Loop over all of the vector value types to see which need transformations. for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE; i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { - MVT VT = (MVT::SimpleValueType)i; - if (isTypeLegal(VT)) continue; + MVT VT = (MVT::SimpleValueType) i; + if (isTypeLegal(VT)) + continue; - // Determine if there is a legal wider type. If so, we should promote to - // that wider vector type. MVT EltVT = VT.getVectorElementType(); unsigned NElts = VT.getVectorNumElements(); - if (NElts != 1 && !shouldSplitVectorElementType(EltVT)) { - bool IsLegalWiderType = false; - // First try to promote the elements of integer vectors. If no legal - // promotion was found, fallback to the widen-vector method. - for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { - MVT SVT = (MVT::SimpleValueType)nVT; + bool IsLegalWiderType = false; + LegalizeTypeAction PreferredAction = getPreferredVectorAction(VT); + switch (PreferredAction) { + case TypePromoteInteger: { + // Try to promote the elements of integer vectors. If no legal + // promotion was found, fall through to the widen-vector method. + for (unsigned nVT = i + 1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { + MVT SVT = (MVT::SimpleValueType) nVT; // Promote vectors of integers to vectors with the same number // of elements, with a wider element type. if (SVT.getVectorElementType().getSizeInBits() > EltVT.getSizeInBits() - && SVT.getVectorNumElements() == NElts && - isTypeLegal(SVT) && SVT.getScalarType().isInteger()) { + && SVT.getVectorNumElements() == NElts && isTypeLegal(SVT) + && SVT.getScalarType().isInteger()) { TransformToType[i] = SVT; RegisterTypeForVT[i] = SVT; NumRegistersForVT[i] = 1; @@ -1038,15 +1144,15 @@ void TargetLoweringBase::computeRegisterProperties() { break; } } - - if (IsLegalWiderType) continue; - + if (IsLegalWiderType) + break; + } + case TypeWidenVector: { // Try to widen the vector. - for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { - MVT SVT = (MVT::SimpleValueType)nVT; - if (SVT.getVectorElementType() == EltVT && - SVT.getVectorNumElements() > NElts && - isTypeLegal(SVT)) { + for (unsigned nVT = i + 1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { + MVT SVT = (MVT::SimpleValueType) nVT; + if (SVT.getVectorElementType() == EltVT + && SVT.getVectorNumElements() > NElts && isTypeLegal(SVT)) { TransformToType[i] = SVT; RegisterTypeForVT[i] = SVT; NumRegistersForVT[i] = 1; @@ -1055,27 +1161,34 @@ void TargetLoweringBase::computeRegisterProperties() { break; } } - if (IsLegalWiderType) continue; + if (IsLegalWiderType) + break; } - - MVT IntermediateVT; - MVT RegisterVT; - unsigned NumIntermediates; - NumRegistersForVT[i] = - getVectorTypeBreakdownMVT(VT, IntermediateVT, NumIntermediates, - RegisterVT, this); - RegisterTypeForVT[i] = RegisterVT; - - MVT NVT = VT.getPow2VectorType(); - if (NVT == VT) { - // Type is already a power of 2. The default action is to split. - TransformToType[i] = MVT::Other; - unsigned NumElts = VT.getVectorNumElements(); - ValueTypeActions.setTypeAction(VT, - NumElts > 1 ? TypeSplitVector : TypeScalarizeVector); - } else { - TransformToType[i] = NVT; - ValueTypeActions.setTypeAction(VT, TypeWidenVector); + case TypeSplitVector: + case TypeScalarizeVector: { + MVT IntermediateVT; + MVT RegisterVT; + unsigned NumIntermediates; + NumRegistersForVT[i] = getVectorTypeBreakdownMVT(VT, IntermediateVT, + NumIntermediates, RegisterVT, this); + RegisterTypeForVT[i] = RegisterVT; + + MVT NVT = VT.getPow2VectorType(); + if (NVT == VT) { + // Type is already a power of 2. The default action is to split. + TransformToType[i] = MVT::Other; + if (PreferredAction == TypeScalarizeVector) + ValueTypeActions.setTypeAction(VT, TypeScalarizeVector); + else + ValueTypeActions.setTypeAction(VT, TypeSplitVector); + } else { + TransformToType[i] = NVT; + ValueTypeActions.setTypeAction(VT, TypeWidenVector); + } + break; + } + default: + llvm_unreachable("Unknown vector legalization action!"); } } @@ -1087,7 +1200,7 @@ void TargetLoweringBase::computeRegisterProperties() { for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) { const TargetRegisterClass* RRC; uint8_t Cost; - tie(RRC, Cost) = findRepresentativeClass((MVT::SimpleValueType)i); + std::tie(RRC, Cost) = findRepresentativeClass((MVT::SimpleValueType)i); RepRegClassForVT[i] = RRC; RepRegClassCostForVT[i] = Cost; } @@ -1230,7 +1343,7 @@ void llvm::GetReturnInfo(Type* ReturnType, AttributeSet attr, /// function arguments in the caller parameter area. This is the actual /// alignment, not its logarithm. unsigned TargetLoweringBase::getByValTypeAlignment(Type *Ty) const { - return TD->getCallFrameTypeAlignment(Ty); + return DL->getABITypeAlignment(Ty); } //===----------------------------------------------------------------------===// @@ -1258,7 +1371,7 @@ int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const { case Mul: return ISD::MUL; case FMul: return ISD::FMUL; case UDiv: return ISD::UDIV; - case SDiv: return ISD::UDIV; + case SDiv: return ISD::SDIV; case FDiv: return ISD::FDIV; case URem: return ISD::UREM; case SRem: return ISD::SREM; @@ -1364,6 +1477,8 @@ bool TargetLoweringBase::isLegalAddressingMode(const AddrMode &AM, return false; // Allow 2*r as r+r. break; + default: // Don't allow n * r + return false; } return true; diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 59d7b57..f59efa3 100644 --- a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -22,6 +22,7 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Mangler.h" #include "llvm/IR/Module.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" @@ -34,7 +35,7 @@ #include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/Mangler.h" +#include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" using namespace llvm; using namespace dwarf; @@ -43,21 +44,16 @@ using namespace dwarf; // ELF //===----------------------------------------------------------------------===// -MCSymbol * -TargetLoweringObjectFileELF::getCFIPersonalitySymbol(const GlobalValue *GV, - Mangler *Mang, - MachineModuleInfo *MMI) const { +MCSymbol *TargetLoweringObjectFileELF::getCFIPersonalitySymbol( + const GlobalValue *GV, Mangler &Mang, const TargetMachine &TM, + MachineModuleInfo *MMI) const { unsigned Encoding = getPersonalityEncoding(); - switch (Encoding & 0x70) { - default: - report_fatal_error("We do not support this DWARF encoding yet!"); - case dwarf::DW_EH_PE_absptr: - return getSymbol(*Mang, GV); - case dwarf::DW_EH_PE_pcrel: { + if ((Encoding & 0x80) == dwarf::DW_EH_PE_indirect) return getContext().GetOrCreateSymbol(StringRef("DW.ref.") + - getSymbol(*Mang, GV)->getName()); - } - } + TM.getSymbol(GV, Mang)->getName()); + if ((Encoding & 0x70) == dwarf::DW_EH_PE_absptr) + return TM.getSymbol(GV, Mang); + report_fatal_error("We do not support this DWARF encoding yet!"); } void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer, @@ -87,24 +83,21 @@ void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer, Streamer.EmitSymbolValue(Sym, Size); } -const MCExpr *TargetLoweringObjectFileELF:: -getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang, - MachineModuleInfo *MMI, unsigned Encoding, - MCStreamer &Streamer) const { +const MCExpr *TargetLoweringObjectFileELF::getTTypeGlobalReference( + const GlobalValue *GV, unsigned Encoding, Mangler &Mang, + const TargetMachine &TM, MachineModuleInfo *MMI, + MCStreamer &Streamer) const { if (Encoding & dwarf::DW_EH_PE_indirect) { MachineModuleInfoELF &ELFMMI = MMI->getObjFileInfo<MachineModuleInfoELF>(); - SmallString<128> Name; - Mang->getNameWithPrefix(Name, GV, true); - Name += ".DW.stub"; + MCSymbol *SSym = getSymbolWithGlobalValueBase(GV, ".DW.stub", Mang, TM); // Add information about the stub reference to ELFMMI so that the stub // gets emitted by the asmprinter. - MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str()); MachineModuleInfoImpl::StubValueTy &StubSym = ELFMMI.getGVStubEntry(SSym); - if (StubSym.getPointer() == 0) { - MCSymbol *Sym = getSymbol(*Mang, GV); + if (!StubSym.getPointer()) { + MCSymbol *Sym = TM.getSymbol(GV, Mang); StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage()); } @@ -114,7 +107,7 @@ getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang, } return TargetLoweringObjectFile:: - getTTypeGlobalReference(GV, Mang, MMI, Encoding, Streamer); + getTTypeGlobalReference(GV, Encoding, Mang, TM, MMI, Streamer); } static SectionKind @@ -199,23 +192,40 @@ getELFSectionFlags(SectionKind K) { return Flags; } +static const Comdat *getELFComdat(const GlobalValue *GV) { + const Comdat *C = GV->getComdat(); + if (!C) + return nullptr; -const MCSection *TargetLoweringObjectFileELF:: -getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, - Mangler *Mang, const TargetMachine &TM) const { + if (C->getSelectionKind() != Comdat::Any) + report_fatal_error("ELF COMDATs only support SelectionKind::Any, '" + + C->getName() + "' cannot be lowered."); + + return C; +} + +const MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal( + const GlobalValue *GV, SectionKind Kind, Mangler &Mang, + const TargetMachine &TM) const { StringRef SectionName = GV->getSection(); // Infer section flags from the section name if we can. Kind = getELFKindForNamedSection(SectionName, Kind); + StringRef Group = ""; + unsigned Flags = getELFSectionFlags(Kind); + if (const Comdat *C = getELFComdat(GV)) { + Group = C->getName(); + Flags |= ELF::SHF_GROUP; + } return getContext().getELFSection(SectionName, - getELFSectionType(SectionName, Kind), - getELFSectionFlags(Kind), Kind); + getELFSectionType(SectionName, Kind), Flags, + Kind, /*EntrySize=*/0, Group); } /// getSectionPrefixForGlobal - Return the section prefix name used by options /// FunctionsSections and DataSections. -static const char *getSectionPrefixForGlobal(SectionKind Kind) { +static StringRef getSectionPrefixForGlobal(SectionKind Kind) { if (Kind.isText()) return ".text."; if (Kind.isReadOnly()) return ".rodata."; if (Kind.isBSS()) return ".bss."; @@ -232,10 +242,9 @@ static const char *getSectionPrefixForGlobal(SectionKind Kind) { return ".data.rel.ro."; } - const MCSection *TargetLoweringObjectFileELF:: SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, - Mangler *Mang, const TargetMachine &TM) const { + Mangler &Mang, const TargetMachine &TM) const { // If we have -ffunction-section or -fdata-section then we should emit the // global value to a uniqued section specifically for it. bool EmitUniquedSection; @@ -246,18 +255,20 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, // If this global is linkonce/weak and the target handles this by emitting it // into a 'uniqued' section name, create and return the section now. - if ((GV->isWeakForLinker() || EmitUniquedSection) && + if ((GV->isWeakForLinker() || EmitUniquedSection || GV->hasComdat()) && !Kind.isCommon()) { - const char *Prefix; - Prefix = getSectionPrefixForGlobal(Kind); + StringRef Prefix = getSectionPrefixForGlobal(Kind); + + SmallString<128> Name(Prefix); + TM.getNameWithPrefix(Name, GV, Mang, true); - SmallString<128> Name(Prefix, Prefix+strlen(Prefix)); - MCSymbol *Sym = getSymbol(*Mang, GV); - Name.append(Sym->getName().begin(), Sym->getName().end()); StringRef Group = ""; unsigned Flags = getELFSectionFlags(Kind); - if (GV->isWeakForLinker()) { - Group = Sym->getName(); + if (GV->isWeakForLinker() || GV->hasComdat()) { + if (const Comdat *C = getELFComdat(GV)) + Group = C->getName(); + else + Group = Name.substr(Prefix.size()); Flags |= ELF::SHF_GROUP; } @@ -327,8 +338,9 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, /// getSectionForConstant - Given a mergeable constant with the /// specified size and relocation information, return a section that it /// should be placed in. -const MCSection *TargetLoweringObjectFileELF:: -getSectionForConstant(SectionKind Kind) const { +const MCSection * +TargetLoweringObjectFileELF::getSectionForConstant(SectionKind Kind, + const Constant *C) const { if (Kind.isMergeableConst4() && MergeableConst4Section) return MergeableConst4Section; if (Kind.isMergeableConst8() && MergeableConst8Section) @@ -343,8 +355,8 @@ getSectionForConstant(SectionKind Kind) const { return DataRelROSection; } -const MCSection * -TargetLoweringObjectFileELF::getStaticCtorSection(unsigned Priority) const { +const MCSection *TargetLoweringObjectFileELF::getStaticCtorSection( + unsigned Priority, const MCSymbol *KeySym) const { // The default scheme is .ctor / .dtor, so we have to invert the priority // numbering. if (Priority == 65535) @@ -363,8 +375,8 @@ TargetLoweringObjectFileELF::getStaticCtorSection(unsigned Priority) const { } } -const MCSection * -TargetLoweringObjectFileELF::getStaticDtorSection(unsigned Priority) const { +const MCSection *TargetLoweringObjectFileELF::getStaticDtorSection( + unsigned Priority, const MCSymbol *KeySym) const { // The default scheme is .ctor / .dtor, so we have to invert the priority // numbering. if (Priority == 65535) @@ -405,14 +417,24 @@ TargetLoweringObjectFileELF::InitializeELF(bool UseInitArray_) { // MachO //===----------------------------------------------------------------------===// +/// getDepLibFromLinkerOpt - Extract the dependent library name from a linker +/// option string. Returns StringRef() if the option does not specify a library. +StringRef TargetLoweringObjectFileMachO:: +getDepLibFromLinkerOpt(StringRef LinkerOption) const { + const char *LibCmd = "-l"; + if (LinkerOption.startswith(LibCmd)) + return LinkerOption.substr(strlen(LibCmd)); + return StringRef(); +} + /// emitModuleFlags - Perform code emission for module flags. void TargetLoweringObjectFileMachO:: emitModuleFlags(MCStreamer &Streamer, ArrayRef<Module::ModuleFlagEntry> ModuleFlags, - Mangler *Mang, const TargetMachine &TM) const { + Mangler &Mang, const TargetMachine &TM) const { unsigned VersionVal = 0; unsigned ImageInfoFlags = 0; - MDNode *LinkerOptions = 0; + MDNode *LinkerOptions = nullptr; StringRef SectionVal; for (ArrayRef<Module::ModuleFlagEntry>::iterator @@ -481,13 +503,25 @@ emitModuleFlags(MCStreamer &Streamer, Streamer.AddBlankLine(); } -const MCSection *TargetLoweringObjectFileMachO:: -getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, - Mangler *Mang, const TargetMachine &TM) const { +static void checkMachOComdat(const GlobalValue *GV) { + const Comdat *C = GV->getComdat(); + if (!C) + return; + + report_fatal_error("MachO doesn't support COMDATs, '" + C->getName() + + "' cannot be lowered."); +} + +const MCSection *TargetLoweringObjectFileMachO::getExplicitSectionGlobal( + const GlobalValue *GV, SectionKind Kind, Mangler &Mang, + const TargetMachine &TM) const { // Parse the section specifier and create it if valid. StringRef Segment, Section; unsigned TAA = 0, StubSize = 0; bool TAAParsed; + + checkMachOComdat(GV); + std::string ErrorCode = MCSectionMachO::ParseSectionSpecifier(GV->getSection(), Segment, Section, TAA, TAAParsed, StubSize); @@ -520,9 +554,45 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, return S; } +bool TargetLoweringObjectFileMachO::isSectionAtomizableBySymbols( + const MCSection &Section) const { + const MCSectionMachO &SMO = static_cast<const MCSectionMachO&>(Section); + + // Sections holding 1 byte strings are atomized based on the data + // they contain. + // Sections holding 2 byte strings require symbols in order to be + // atomized. + // There is no dedicated section for 4 byte strings. + if (SMO.getKind().isMergeable1ByteCString()) + return false; + + if (SMO.getSegmentName() == "__DATA" && + SMO.getSectionName() == "__cfstring") + return false; + + switch (SMO.getType()) { + default: + return true; + + // These sections are atomized at the element boundaries without using + // symbols. + case MachO::S_4BYTE_LITERALS: + case MachO::S_8BYTE_LITERALS: + case MachO::S_16BYTE_LITERALS: + case MachO::S_LITERAL_POINTERS: + case MachO::S_NON_LAZY_SYMBOL_POINTERS: + case MachO::S_LAZY_SYMBOL_POINTERS: + case MachO::S_MOD_INIT_FUNC_POINTERS: + case MachO::S_MOD_TERM_FUNC_POINTERS: + case MachO::S_INTERPOSING: + return false; + } +} + const MCSection *TargetLoweringObjectFileMachO:: SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, - Mangler *Mang, const TargetMachine &TM) const { + Mangler &Mang, const TargetMachine &TM) const { + checkMachOComdat(GV); // Handle thread local data. if (Kind.isThreadBSS()) return TLSBSSSection; @@ -556,7 +626,7 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, return FourByteConstantSection; if (Kind.isMergeableConst8()) return EightByteConstantSection; - if (Kind.isMergeableConst16() && SixteenByteConstantSection) + if (Kind.isMergeableConst16()) return SixteenByteConstantSection; } @@ -585,7 +655,8 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, } const MCSection * -TargetLoweringObjectFileMachO::getSectionForConstant(SectionKind Kind) const { +TargetLoweringObjectFileMachO::getSectionForConstant(SectionKind Kind, + const Constant *C) const { // If this constant requires a relocation, we have to put it in the data // segment, not in the text segment. if (Kind.isDataRel() || Kind.isReadOnlyWithRel()) @@ -595,55 +666,31 @@ TargetLoweringObjectFileMachO::getSectionForConstant(SectionKind Kind) const { return FourByteConstantSection; if (Kind.isMergeableConst8()) return EightByteConstantSection; - if (Kind.isMergeableConst16() && SixteenByteConstantSection) + if (Kind.isMergeableConst16()) return SixteenByteConstantSection; return ReadOnlySection; // .const } -/// shouldEmitUsedDirectiveFor - This hook allows targets to selectively decide -/// not to emit the UsedDirective for some symbols in llvm.used. -// FIXME: REMOVE this (rdar://7071300) -bool TargetLoweringObjectFileMachO:: -shouldEmitUsedDirectiveFor(const GlobalValue *GV, Mangler *Mang) const { - /// On Darwin, internally linked data beginning with "L" or "l" does not have - /// the directive emitted (this occurs in ObjC metadata). - if (!GV) return false; - - // Check whether the mangled name has the "Private" or "LinkerPrivate" prefix. - if (GV->hasLocalLinkage() && !isa<Function>(GV)) { - // FIXME: ObjC metadata is currently emitted as internal symbols that have - // \1L and \0l prefixes on them. Fix them to be Private/LinkerPrivate and - // this horrible hack can go away. - MCSymbol *Sym = getSymbol(*Mang, GV); - if (Sym->getName()[0] == 'L' || Sym->getName()[0] == 'l') - return false; - } - - return true; -} - -const MCExpr *TargetLoweringObjectFileMachO:: -getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang, - MachineModuleInfo *MMI, unsigned Encoding, - MCStreamer &Streamer) const { +const MCExpr *TargetLoweringObjectFileMachO::getTTypeGlobalReference( + const GlobalValue *GV, unsigned Encoding, Mangler &Mang, + const TargetMachine &TM, MachineModuleInfo *MMI, + MCStreamer &Streamer) const { // The mach-o version of this method defaults to returning a stub reference. if (Encoding & DW_EH_PE_indirect) { MachineModuleInfoMachO &MachOMMI = MMI->getObjFileInfo<MachineModuleInfoMachO>(); - SmallString<128> Name; - Mang->getNameWithPrefix(Name, GV, true); - Name += "$non_lazy_ptr"; + MCSymbol *SSym = + getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr", Mang, TM); // Add information about the stub reference to MachOMMI so that the stub // gets emitted by the asmprinter. - MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str()); MachineModuleInfoImpl::StubValueTy &StubSym = GV->hasHiddenVisibility() ? MachOMMI.getHiddenGVStubEntry(SSym) : MachOMMI.getGVStubEntry(SSym); - if (StubSym.getPointer() == 0) { - MCSymbol *Sym = getSymbol(*Mang, GV); + if (!StubSym.getPointer()) { + MCSymbol *Sym = TM.getSymbol(GV, Mang); StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage()); } @@ -652,27 +699,24 @@ getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang, Encoding & ~dwarf::DW_EH_PE_indirect, Streamer); } - return TargetLoweringObjectFile:: - getTTypeGlobalReference(GV, Mang, MMI, Encoding, Streamer); + return TargetLoweringObjectFile::getTTypeGlobalReference(GV, Encoding, Mang, + TM, MMI, Streamer); } -MCSymbol *TargetLoweringObjectFileMachO:: -getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang, - MachineModuleInfo *MMI) const { +MCSymbol *TargetLoweringObjectFileMachO::getCFIPersonalitySymbol( + const GlobalValue *GV, Mangler &Mang, const TargetMachine &TM, + MachineModuleInfo *MMI) const { // The mach-o version of this method defaults to returning a stub reference. MachineModuleInfoMachO &MachOMMI = MMI->getObjFileInfo<MachineModuleInfoMachO>(); - SmallString<128> Name; - Mang->getNameWithPrefix(Name, GV, true); - Name += "$non_lazy_ptr"; + MCSymbol *SSym = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr", Mang, TM); // Add information about the stub reference to MachOMMI so that the stub // gets emitted by the asmprinter. - MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str()); MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(SSym); - if (StubSym.getPointer() == 0) { - MCSymbol *Sym = getSymbol(*Mang, GV); + if (!StubSym.getPointer()) { + MCSymbol *Sym = TM.getSymbol(GV, Mang); StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage()); } @@ -718,58 +762,129 @@ getCOFFSectionFlags(SectionKind K) { return Flags; } -const MCSection *TargetLoweringObjectFileCOFF:: -getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, - Mangler *Mang, const TargetMachine &TM) const { +static const GlobalValue *getComdatGVForCOFF(const GlobalValue *GV) { + const Comdat *C = GV->getComdat(); + assert(C && "expected GV to have a Comdat!"); + + StringRef ComdatGVName = C->getName(); + const GlobalValue *ComdatGV = GV->getParent()->getNamedValue(ComdatGVName); + if (!ComdatGV) + report_fatal_error("Associative COMDAT symbol '" + ComdatGVName + + "' does not exist."); + + if (ComdatGV->getComdat() != C) + report_fatal_error("Associative COMDAT symbol '" + ComdatGVName + + "' is not a key for it's COMDAT."); + + return ComdatGV; +} + +static int getSelectionForCOFF(const GlobalValue *GV) { + if (const Comdat *C = GV->getComdat()) { + const GlobalValue *ComdatKey = getComdatGVForCOFF(GV); + if (const auto *GA = dyn_cast<GlobalAlias>(ComdatKey)) + ComdatKey = GA->getBaseObject(); + if (ComdatKey == GV) { + switch (C->getSelectionKind()) { + case Comdat::Any: + return COFF::IMAGE_COMDAT_SELECT_ANY; + case Comdat::ExactMatch: + return COFF::IMAGE_COMDAT_SELECT_EXACT_MATCH; + case Comdat::Largest: + return COFF::IMAGE_COMDAT_SELECT_LARGEST; + case Comdat::NoDuplicates: + return COFF::IMAGE_COMDAT_SELECT_NODUPLICATES; + case Comdat::SameSize: + return COFF::IMAGE_COMDAT_SELECT_SAME_SIZE; + } + } else { + return COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE; + } + } else if (GV->isWeakForLinker()) { + return COFF::IMAGE_COMDAT_SELECT_ANY; + } + return 0; +} + +const MCSection *TargetLoweringObjectFileCOFF::getExplicitSectionGlobal( + const GlobalValue *GV, SectionKind Kind, Mangler &Mang, + const TargetMachine &TM) const { int Selection = 0; unsigned Characteristics = getCOFFSectionFlags(Kind); - SmallString<128> Name(GV->getSection().c_str()); - if (GV->isWeakForLinker()) { - Selection = COFF::IMAGE_COMDAT_SELECT_ANY; - Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT; - Name.append("$"); - Mang->getNameWithPrefix(Name, GV, false, false); + StringRef Name = GV->getSection(); + StringRef COMDATSymName = ""; + if ((GV->isWeakForLinker() || GV->hasComdat()) && !Kind.isCommon()) { + Selection = getSelectionForCOFF(GV); + const GlobalValue *ComdatGV; + if (Selection == COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE) + ComdatGV = getComdatGVForCOFF(GV); + else + ComdatGV = GV; + + if (!ComdatGV->hasPrivateLinkage()) { + MCSymbol *Sym = TM.getSymbol(ComdatGV, Mang); + COMDATSymName = Sym->getName(); + Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT; + } else { + Selection = 0; + } } return getContext().getCOFFSection(Name, Characteristics, Kind, - "", + COMDATSymName, Selection); } -static const char *getCOFFSectionPrefixForUniqueGlobal(SectionKind Kind) { +static const char *getCOFFSectionNameForUniqueGlobal(SectionKind Kind) { if (Kind.isText()) - return ".text$"; - if (Kind.isBSS ()) - return ".bss$"; - if (Kind.isThreadLocal()) { - // 'LLVM' is just an arbitary string to ensure that the section name gets - // sorted in between '.tls$AAA' and '.tls$ZZZ' by the linker. - return ".tls$LLVM"; - } + return ".text"; + if (Kind.isBSS()) + return ".bss"; + if (Kind.isThreadLocal()) + return ".tls$"; if (Kind.isWriteable()) - return ".data$"; - return ".rdata$"; + return ".data"; + return ".rdata"; } const MCSection *TargetLoweringObjectFileCOFF:: SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, - Mangler *Mang, const TargetMachine &TM) const { + Mangler &Mang, const TargetMachine &TM) const { + // If we have -ffunction-sections then we should emit the global value to a + // uniqued section specifically for it. + bool EmitUniquedSection; + if (Kind.isText()) + EmitUniquedSection = TM.getFunctionSections(); + else + EmitUniquedSection = TM.getDataSections(); // If this global is linkonce/weak and the target handles this by emitting it // into a 'uniqued' section name, create and return the section now. - if (GV->isWeakForLinker()) { - const char *Prefix = getCOFFSectionPrefixForUniqueGlobal(Kind); - SmallString<128> Name(Prefix, Prefix+strlen(Prefix)); - Mang->getNameWithPrefix(Name, GV, false, false); - + // Section names depend on the name of the symbol which is not feasible if the + // symbol has private linkage. + if ((GV->isWeakForLinker() || EmitUniquedSection || GV->hasComdat()) && + !Kind.isCommon()) { + const char *Name = getCOFFSectionNameForUniqueGlobal(Kind); unsigned Characteristics = getCOFFSectionFlags(Kind); Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT; + int Selection = getSelectionForCOFF(GV); + if (!Selection) + Selection = COFF::IMAGE_COMDAT_SELECT_NODUPLICATES; + const GlobalValue *ComdatGV; + if (GV->hasComdat()) + ComdatGV = getComdatGVForCOFF(GV); + else + ComdatGV = GV; - return getContext().getCOFFSection(Name.str(), Characteristics, - Kind, "", COFF::IMAGE_COMDAT_SELECT_ANY); + if (!ComdatGV->hasPrivateLinkage()) { + MCSymbol *Sym = TM.getSymbol(ComdatGV, Mang); + StringRef COMDATSymName = Sym->getName(); + return getContext().getCOFFSection(Name, Characteristics, Kind, + COMDATSymName, Selection); + } } if (Kind.isText()) @@ -781,17 +896,28 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, if (Kind.isReadOnly()) return ReadOnlySection; - if (Kind.isBSS()) + // Note: we claim that common symbols are put in BSSSection, but they are + // really emitted with the magic .comm directive, which creates a symbol table + // entry but not a section. + if (Kind.isBSS() || Kind.isCommon()) return BSSSection; return DataSection; } +StringRef TargetLoweringObjectFileCOFF:: +getDepLibFromLinkerOpt(StringRef LinkerOption) const { + const char *LibCmd = "/DEFAULTLIB:"; + if (LinkerOption.startswith(LibCmd)) + return LinkerOption.substr(strlen(LibCmd)); + return StringRef(); +} + void TargetLoweringObjectFileCOFF:: emitModuleFlags(MCStreamer &Streamer, ArrayRef<Module::ModuleFlagEntry> ModuleFlags, - Mangler *Mang, const TargetMachine &TM) const { - MDNode *LinkerOptions = 0; + Mangler &Mang, const TargetMachine &TM) const { + MDNode *LinkerOptions = nullptr; // Look for the "Linker Options" flag, since it's the only one we support. for (ArrayRef<Module::ModuleFlagEntry>::iterator @@ -833,3 +959,30 @@ emitModuleFlags(MCStreamer &Streamer, } } } + +static const MCSection *getAssociativeCOFFSection(MCContext &Ctx, + const MCSection *Sec, + const MCSymbol *KeySym) { + // Return the normal section if we don't have to be associative. + if (!KeySym) + return Sec; + + // Make an associative section with the same name and kind as the normal + // section. + const MCSectionCOFF *SecCOFF = cast<MCSectionCOFF>(Sec); + unsigned Characteristics = + SecCOFF->getCharacteristics() | COFF::IMAGE_SCN_LNK_COMDAT; + return Ctx.getCOFFSection(SecCOFF->getSectionName(), Characteristics, + SecCOFF->getKind(), KeySym->getName(), + COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE); +} + +const MCSection *TargetLoweringObjectFileCOFF::getStaticCtorSection( + unsigned Priority, const MCSymbol *KeySym) const { + return getAssociativeCOFFSection(getContext(), StaticCtorSection, KeySym); +} + +const MCSection *TargetLoweringObjectFileCOFF::getStaticDtorSection( + unsigned Priority, const MCSymbol *KeySym) const { + return getAssociativeCOFFSection(getContext(), StaticDtorSection, KeySym); +} diff --git a/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp b/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp index f7bf86b..3ca2017 100644 --- a/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp +++ b/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp @@ -12,8 +12,8 @@ //===----------------------------------------------------------------------===// #include "llvm/IR/Function.h" -#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/Target/TargetOptions.h" using namespace llvm; diff --git a/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp b/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp index 5a15243..a3a4fb3 100644 --- a/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp +++ b/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp @@ -101,7 +101,7 @@ TargetRegisterInfo::getAllocatableClass(const TargetRegisterClass *RC) const { Idx += Offset + 1; } } - return NULL; + return nullptr; } /// getMinimalPhysRegClass - Returns the Register Class of a physical @@ -113,7 +113,7 @@ TargetRegisterInfo::getMinimalPhysRegClass(unsigned reg, EVT VT) const { // Pick the most sub register class of the right type that contains // this physreg. - const TargetRegisterClass* BestRC = 0; + const TargetRegisterClass* BestRC = nullptr; for (regclass_iterator I = regclass_begin(), E = regclass_end(); I != E; ++I){ const TargetRegisterClass* RC = *I; if ((VT == MVT::Other || RC->hasType(VT)) && RC->contains(reg) && @@ -130,7 +130,7 @@ TargetRegisterInfo::getMinimalPhysRegClass(unsigned reg, EVT VT) const { static void getAllocatableSetForRC(const MachineFunction &MF, const TargetRegisterClass *RC, BitVector &R){ assert(RC->isAllocatable() && "invalid for nonallocatable sets"); - ArrayRef<uint16_t> Order = RC->getRawAllocationOrder(MF); + ArrayRef<MCPhysReg> Order = RC->getRawAllocationOrder(MF); for (unsigned i = 0; i != Order.size(); ++i) R.set(Order[i]); } @@ -164,7 +164,7 @@ const TargetRegisterClass *firstCommonClass(const uint32_t *A, for (unsigned I = 0, E = TRI->getNumRegClasses(); I < E; I += 32) if (unsigned Common = *A++ & *B++) return TRI->getRegClass(I + countTrailingZeros(Common)); - return 0; + return nullptr; } const TargetRegisterClass * @@ -174,7 +174,7 @@ TargetRegisterInfo::getCommonSubClass(const TargetRegisterClass *A, if (A == B) return A; if (!A || !B) - return 0; + return nullptr; // Register classes are ordered topologically, so the largest common // sub-class it the common sub-class with the smallest ID. @@ -194,7 +194,7 @@ TargetRegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, // The bit mask contains all register classes that are projected into B // by Idx. Find a class that is also a sub-class of A. return firstCommonClass(RCI.getMask(), A->getSubClassMask(), this); - return 0; + return nullptr; } const TargetRegisterClass *TargetRegisterInfo:: @@ -215,7 +215,7 @@ getCommonSuperRegClass(const TargetRegisterClass *RCA, unsigned SubA, // Arrange for RCA to be the larger register so the answer will be found in // the first iteration. This makes the search linear for the most common // case. - const TargetRegisterClass *BestRC = 0; + const TargetRegisterClass *BestRC = nullptr; unsigned *BestPreA = &PreA; unsigned *BestPreB = &PreB; if (RCA->getSize() < RCB->getSize()) { diff --git a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp index b9a6b47..f42d47b 100644 --- a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -27,7 +27,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "twoaddrinstr" #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" @@ -51,6 +50,8 @@ #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; +#define DEBUG_TYPE "twoaddrinstr" + STATISTIC(NumTwoAddressInstrs, "Number of two-address instructions"); STATISTIC(NumCommuted , "Number of instructions commuted to coalesce"); STATISTIC(NumAggrCommuted , "Number of instructions aggressively commuted"); @@ -144,7 +145,7 @@ public: initializeTwoAddressInstructionPassPass(*PassRegistry::getPassRegistry()); } - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); AU.addRequired<AliasAnalysis>(); AU.addPreserved<LiveVariables>(); @@ -156,7 +157,7 @@ public: } /// runOnMachineFunction - Pass entry point. - bool runOnMachineFunction(MachineFunction&); + bool runOnMachineFunction(MachineFunction&) override; }; } // end anonymous namespace @@ -211,7 +212,7 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg, } // Find the instruction that kills SavedReg. - MachineInstr *KillMI = NULL; + MachineInstr *KillMI = nullptr; if (LIS) { LiveInterval &LI = LIS->getInterval(SavedReg); assert(LI.end() != LI.begin() && @@ -229,7 +230,7 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg, for (MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(SavedReg), UE = MRI->use_nodbg_end(); UI != UE; ++UI) { - MachineOperand &UseMO = UI.getOperand(); + MachineOperand &UseMO = *UI; if (!UseMO.isKill()) continue; KillMI = UseMO.getParent(); @@ -250,12 +251,12 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg, // FIXME: This can be sped up if there is an easy way to query whether an // instruction is before or after another instruction. Then we can use // MachineRegisterInfo def / use instead. - MachineOperand *KillMO = NULL; + MachineOperand *KillMO = nullptr; MachineBasicBlock::iterator KillPos = KillMI; ++KillPos; unsigned NumVisited = 0; - for (MachineBasicBlock::iterator I = llvm::next(OldPos); I != KillPos; ++I) { + for (MachineBasicBlock::iterator I = std::next(OldPos); I != KillPos; ++I) { MachineInstr *OtherMI = I; // DBG_VALUE cannot be counted against the limit. if (OtherMI->isDebugValue()) @@ -315,9 +316,7 @@ bool TwoAddressInstructionPass::noUseAfterLastDef(unsigned Reg, unsigned Dist, unsigned &LastDef) { LastDef = 0; unsigned LastUse = Dist; - for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Reg), - E = MRI->reg_end(); I != E; ++I) { - MachineOperand &MO = I.getOperand(); + for (MachineOperand &MO : MRI->reg_operands(Reg)) { MachineInstr *MI = MO.getParent(); if (MI->getParent() != MBB || MI->isDebugValue()) continue; @@ -417,9 +416,9 @@ static bool isKilled(MachineInstr &MI, unsigned Reg, MachineRegisterInfo::def_iterator Begin = MRI->def_begin(Reg); // If there are multiple defs, we can't do a simple analysis, so just // go with what the kill flag says. - if (llvm::next(Begin) != MRI->def_end()) + if (std::next(Begin) != MRI->def_end()) return true; - DefMI = &*Begin; + DefMI = Begin->getParent(); bool IsSrcPhys, IsDstPhys; unsigned SrcReg, DstReg; // If the def is something other than a copy, then it isn't going to @@ -456,10 +455,10 @@ MachineInstr *findOnlyInterestingUse(unsigned Reg, MachineBasicBlock *MBB, unsigned &DstReg, bool &IsDstPhys) { if (!MRI->hasOneNonDBGUse(Reg)) // None or more than one use. - return 0; - MachineInstr &UseMI = *MRI->use_nodbg_begin(Reg); + return nullptr; + MachineInstr &UseMI = *MRI->use_instr_nodbg_begin(Reg); if (UseMI.getParent() != MBB) - return 0; + return nullptr; unsigned SrcReg; bool IsSrcPhys; if (isCopyToReg(UseMI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys)) { @@ -471,7 +470,7 @@ MachineInstr *findOnlyInterestingUse(unsigned Reg, MachineBasicBlock *MBB, IsDstPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); return &UseMI; } - return 0; + return nullptr; } /// getMappedReg - Return the physical register the specified virtual register @@ -578,7 +577,7 @@ commuteInstruction(MachineBasicBlock::iterator &mi, DEBUG(dbgs() << "2addr: COMMUTING : " << *MI); MachineInstr *NewMI = TII->commuteInstruction(MI); - if (NewMI == 0) { + if (NewMI == nullptr) { DEBUG(dbgs() << "2addr: COMMUTING FAILED!\n"); return false; } @@ -647,7 +646,7 @@ TwoAddressInstructionPass::convertInstTo3Addr(MachineBasicBlock::iterator &mi, if (!Sunk) { DistanceMap.insert(std::make_pair(NewMI, Dist)); mi = NewMI; - nmi = llvm::next(mi); + nmi = std::next(mi); } // Update source and destination register maps. @@ -757,7 +756,7 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, // Must be created from unfolded load. Don't waste time trying this. return false; - MachineInstr *KillMI = 0; + MachineInstr *KillMI = nullptr; if (LIS) { LiveInterval &LI = LIS->getInterval(Reg); assert(LI.end() != LI.begin() && @@ -816,7 +815,7 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, // Move the copies connected to MI down as well. MachineBasicBlock::iterator Begin = MI; - MachineBasicBlock::iterator AfterMI = llvm::next(Begin); + MachineBasicBlock::iterator AfterMI = std::next(Begin); MachineBasicBlock::iterator End = AfterMI; while (End->isCopy() && Defs.count(End->getOperand(1).getReg())) { @@ -876,7 +875,7 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, } // Move debug info as well. - while (Begin != MBB->begin() && llvm::prior(Begin)->isDebugValue()) + while (Begin != MBB->begin() && std::prev(Begin)->isDebugValue()) --Begin; nmi = End; @@ -891,7 +890,7 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, LIS->handleMove(CopyMI); InsertPos = CopyMI; } - End = llvm::next(MachineBasicBlock::iterator(MI)); + End = std::next(MachineBasicBlock::iterator(MI)); } // Copies following MI may have been moved as well. @@ -914,19 +913,17 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, /// instruction too close to the defs of its register dependencies. bool TwoAddressInstructionPass::isDefTooClose(unsigned Reg, unsigned Dist, MachineInstr *MI) { - for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(Reg), - DE = MRI->def_end(); DI != DE; ++DI) { - MachineInstr *DefMI = &*DI; - if (DefMI->getParent() != MBB || DefMI->isCopy() || DefMI->isCopyLike()) + for (MachineInstr &DefMI : MRI->def_instructions(Reg)) { + if (DefMI.getParent() != MBB || DefMI.isCopy() || DefMI.isCopyLike()) continue; - if (DefMI == MI) + if (&DefMI == MI) return true; // MI is defining something KillMI uses - DenseMap<MachineInstr*, unsigned>::iterator DDI = DistanceMap.find(DefMI); + DenseMap<MachineInstr*, unsigned>::iterator DDI = DistanceMap.find(&DefMI); if (DDI == DistanceMap.end()) return true; // Below MI unsigned DefDist = DDI->second; assert(Dist > DefDist && "Visited def already?"); - if (TII->getInstrLatency(InstrItins, DefMI) > (Dist - DefDist)) + if (TII->getInstrLatency(InstrItins, &DefMI) > (Dist - DefDist)) return true; } return false; @@ -951,7 +948,7 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, // Must be created from unfolded load. Don't waste time trying this. return false; - MachineInstr *KillMI = 0; + MachineInstr *KillMI = nullptr; if (LIS) { LiveInterval &LI = LIS->getInterval(Reg); assert(LI.end() != LI.begin() && @@ -1060,15 +1057,15 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, // Move the old kill above MI, don't forget to move debug info as well. MachineBasicBlock::iterator InsertPos = mi; - while (InsertPos != MBB->begin() && llvm::prior(InsertPos)->isDebugValue()) + while (InsertPos != MBB->begin() && std::prev(InsertPos)->isDebugValue()) --InsertPos; MachineBasicBlock::iterator From = KillMI; - MachineBasicBlock::iterator To = llvm::next(From); - while (llvm::prior(From)->isDebugValue()) + MachineBasicBlock::iterator To = std::next(From); + while (std::prev(From)->isDebugValue()) --From; MBB->splice(InsertPos, MBB, From, To); - nmi = llvm::prior(InsertPos); // Backtrack so we process the moved instr. + nmi = std::prev(InsertPos); // Backtrack so we process the moved instr. DistanceMap.erase(DI); // Update live variables @@ -1317,13 +1314,14 @@ collectTiedOperands(MachineInstr *MI, TiedOperandMap &TiedOperands) { assert(SrcReg && SrcMO.isUse() && "two address instruction invalid"); // Deal with <undef> uses immediately - simply rewrite the src operand. - if (SrcMO.isUndef()) { + if (SrcMO.isUndef() && !DstMO.getSubReg()) { // Constrain the DstReg register class if required. if (TargetRegisterInfo::isVirtualRegister(DstReg)) if (const TargetRegisterClass *RC = TII->getRegClass(MCID, SrcIdx, TRI, *MF)) MRI->constrainRegClass(DstReg, RC); SrcMO.setReg(DstReg); + SrcMO.setSubReg(0); DEBUG(dbgs() << "\t\trewrite undef:\t" << *MI); continue; } @@ -1349,6 +1347,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, unsigned LastCopiedReg = 0; SlotIndex LastCopyIdx; unsigned RegB = 0; + unsigned SubRegB = 0; for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) { unsigned SrcIdx = TiedPairs[tpi].first; unsigned DstIdx = TiedPairs[tpi].second; @@ -1359,6 +1358,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, // Grab RegB from the instruction because it may have changed if the // instruction was commuted. RegB = MI->getOperand(SrcIdx).getReg(); + SubRegB = MI->getOperand(SrcIdx).getSubReg(); if (RegA == RegB) { // The register is tied to multiple destinations (or else we would @@ -1383,8 +1383,25 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, #endif // Emit a copy. - BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), - TII->get(TargetOpcode::COPY), RegA).addReg(RegB); + MachineInstrBuilder MIB = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), + TII->get(TargetOpcode::COPY), RegA); + // If this operand is folding a truncation, the truncation now moves to the + // copy so that the register classes remain valid for the operands. + MIB.addReg(RegB, 0, SubRegB); + const TargetRegisterClass *RC = MRI->getRegClass(RegB); + if (SubRegB) { + if (TargetRegisterInfo::isVirtualRegister(RegA)) { + assert(TRI->getMatchingSuperRegClass(RC, MRI->getRegClass(RegA), + SubRegB) && + "tied subregister must be a truncation"); + // The superreg class will not be used to constrain the subreg class. + RC = nullptr; + } + else { + assert(TRI->getMatchingSuperReg(RegA, SubRegB, MRI->getRegClass(RegB)) + && "tied subregister must be a truncation"); + } + } // Update DistanceMap. MachineBasicBlock::iterator PrevMI = MI; @@ -1404,7 +1421,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, } } - DEBUG(dbgs() << "\t\tprepend:\t" << *PrevMI); + DEBUG(dbgs() << "\t\tprepend:\t" << *MIB); MachineOperand &MO = MI->getOperand(SrcIdx); assert(MO.isReg() && MO.getReg() == RegB && MO.isUse() && @@ -1417,9 +1434,12 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, // Make sure regA is a legal regclass for the SrcIdx operand. if (TargetRegisterInfo::isVirtualRegister(RegA) && TargetRegisterInfo::isVirtualRegister(RegB)) - MRI->constrainRegClass(RegA, MRI->getRegClass(RegB)); - + MRI->constrainRegClass(RegA, RC); MO.setReg(RegA); + // The getMatchingSuper asserts guarantee that the register class projected + // by SubRegB is compatible with RegA with no subregister. So regardless of + // whether the dest oper writes a subreg, the source oper should not. + MO.setSubReg(0); // Propagate SrcRegMap. SrcRegMap[RegA] = RegB; @@ -1431,12 +1451,14 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, // Replace other (un-tied) uses of regB with LastCopiedReg. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); - if (MO.isReg() && MO.getReg() == RegB && MO.isUse()) { + if (MO.isReg() && MO.getReg() == RegB && MO.getSubReg() == SubRegB && + MO.isUse()) { if (MO.isKill()) { MO.setIsKill(false); RemovedKillFlag = true; } MO.setReg(LastCopiedReg); + MO.setSubReg(0); } } } @@ -1509,7 +1531,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { Processed.clear(); for (MachineBasicBlock::iterator mi = MBB->begin(), me = MBB->end(); mi != me; ) { - MachineBasicBlock::iterator nmi = llvm::next(mi); + MachineBasicBlock::iterator nmi = std::next(mi); if (mi->isDebugValue()) { mi = nmi; continue; @@ -1610,7 +1632,7 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) { TargetRegisterInfo::isPhysicalRegister(DstReg) || !(MI->getNumOperands() & 1)) { DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << *MI); - llvm_unreachable(0); + llvm_unreachable(nullptr); } SmallVector<unsigned, 4> OrigRegs; @@ -1664,7 +1686,7 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) { } MachineBasicBlock::iterator EndMBBI = - llvm::next(MachineBasicBlock::iterator(MI)); + std::next(MachineBasicBlock::iterator(MI)); if (!DefEmitted) { DEBUG(dbgs() << "Turned: " << *MI << " into an IMPLICIT_DEF"); diff --git a/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp b/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp index f735ef2..2e22082 100644 --- a/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp +++ b/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp @@ -23,32 +23,32 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/Analysis/Dominators.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/CFG.h" #include "llvm/IR/Constant.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Type.h" #include "llvm/Pass.h" -#include "llvm/Support/CFG.h" #include "llvm/Target/TargetInstrInfo.h" using namespace llvm; namespace { class UnreachableBlockElim : public FunctionPass { - virtual bool runOnFunction(Function &F); + bool runOnFunction(Function &F) override; public: static char ID; // Pass identification, replacement for typeid UnreachableBlockElim() : FunctionPass(ID) { initializeUnreachableBlockElimPass(*PassRegistry::getPassRegistry()); } - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addPreserved<DominatorTree>(); + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addPreserved<DominatorTreeWrapperPass>(); } }; } @@ -95,8 +95,8 @@ bool UnreachableBlockElim::runOnFunction(Function &F) { namespace { class UnreachableMachineBlockElim : public MachineFunctionPass { - virtual bool runOnMachineFunction(MachineFunction &F); - virtual void getAnalysisUsage(AnalysisUsage &AU) const; + bool runOnMachineFunction(MachineFunction &F) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; MachineModuleInfo *MMI; public: static char ID; // Pass identification, replacement for typeid diff --git a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp index e0aa405..704736f 100644 --- a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp +++ b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp @@ -16,10 +16,10 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "regalloc" #include "llvm/CodeGen/VirtRegMap.h" #include "LiveDebugVariables.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SparseSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveStackAnalysis.h" @@ -39,6 +39,8 @@ #include <algorithm> using namespace llvm; +#define DEBUG_TYPE "regalloc" + STATISTIC(NumSpillSlots, "Number of spill slots allocated"); STATISTIC(NumIdCopies, "Number of identity moves eliminated after rewriting"); @@ -160,6 +162,7 @@ class VirtRegRewriter : public MachineFunctionPass { SlotIndexes *Indexes; LiveIntervals *LIS; VirtRegMap *VRM; + SparseSet<unsigned> PhysRegs; void rewrite(); void addMBBLiveIns(); @@ -167,9 +170,9 @@ public: static char ID; VirtRegRewriter() : MachineFunctionPass(ID) {} - virtual void getAnalysisUsage(AnalysisUsage &AU) const; + void getAnalysisUsage(AnalysisUsage &AU) const override; - virtual bool runOnMachineFunction(MachineFunction&); + bool runOnMachineFunction(MachineFunction&) override; }; } // end anonymous namespace @@ -267,6 +270,20 @@ void VirtRegRewriter::rewrite() { SmallVector<unsigned, 8> SuperKills; SmallPtrSet<const MachineInstr *, 4> NoReturnInsts; + // Here we have a SparseSet to hold which PhysRegs are actually encountered + // in the MF we are about to iterate over so that later when we call + // setPhysRegUsed, we are only doing it for physRegs that were actually found + // in the program and not for all of the possible physRegs for the given + // target architecture. If the target has a lot of physRegs, then for a small + // program there will be a significant compile time reduction here. + PhysRegs.clear(); + PhysRegs.setUniverse(TRI->getNumRegs()); + + // The function with uwtable should guarantee that the stack unwinder + // can unwind the stack to the previous frame. Thus, we can't apply the + // noreturn optimization if the caller function has uwtable attribute. + bool HasUWTable = MF->getFunction()->hasFnAttribute(Attribute::UWTable); + for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); MBBI != MBBE; ++MBBI) { DEBUG(MBBI->print(dbgs(), Indexes)); @@ -276,9 +293,12 @@ void VirtRegRewriter::rewrite() { MachineInstr *MI = MII; ++MII; - // Check if this instruction is a call to a noreturn function. - // If so, all the definitions set by this instruction can be ignored. - if (IsExitBB && MI->isCall()) + // Check if this instruction is a call to a noreturn function. If this + // is a call to noreturn function and we don't need the stack unwinding + // functionality (i.e. this function does not have uwtable attribute and + // the callee function has the nounwind attribute), then we can ignore + // the definitions set by this instruction. + if (!HasUWTable && IsExitBB && MI->isCall()) { for (MachineInstr::mop_iterator MOI = MI->operands_begin(), MOE = MI->operands_end(); MOI != MOE; ++MOI) { MachineOperand &MO = *MOI; @@ -294,6 +314,7 @@ void VirtRegRewriter::rewrite() { NoReturnInsts.insert(MI); break; } + } for (MachineInstr::mop_iterator MOI = MI->operands_begin(), MOE = MI->operands_end(); MOI != MOE; ++MOI) { @@ -303,6 +324,15 @@ void VirtRegRewriter::rewrite() { if (MO.isRegMask()) MRI->addPhysRegsUsedFromRegMask(MO.getRegMask()); + // If we encounter a VirtReg or PhysReg then get at the PhysReg and add + // it to the physreg bitset. Later we use only the PhysRegs that were + // actually encountered in the MF to populate the MRI's used physregs. + if (MO.isReg() && MO.getReg()) + PhysRegs.insert( + TargetRegisterInfo::isVirtualRegister(MO.getReg()) ? + VRM->getPhys(MO.getReg()) : + MO.getReg()); + if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) continue; unsigned VirtReg = MO.getReg(); @@ -376,20 +406,21 @@ void VirtRegRewriter::rewrite() { // Tell MRI about physical registers in use. if (NoReturnInsts.empty()) { - for (unsigned Reg = 1, RegE = TRI->getNumRegs(); Reg != RegE; ++Reg) - if (!MRI->reg_nodbg_empty(Reg)) - MRI->setPhysRegUsed(Reg); + for (SparseSet<unsigned>::iterator + RegI = PhysRegs.begin(), E = PhysRegs.end(); RegI != E; ++RegI) + if (!MRI->reg_nodbg_empty(*RegI)) + MRI->setPhysRegUsed(*RegI); } else { - for (unsigned Reg = 1, RegE = TRI->getNumRegs(); Reg != RegE; ++Reg) { + for (SparseSet<unsigned>::iterator + I = PhysRegs.begin(), E = PhysRegs.end(); I != E; ++I) { + unsigned Reg = *I; if (MRI->reg_nodbg_empty(Reg)) continue; // Check if this register has a use that will impact the rest of the // code. Uses in debug and noreturn instructions do not impact the // generated code. - for (MachineRegisterInfo::reg_nodbg_iterator It = - MRI->reg_nodbg_begin(Reg), - EndIt = MRI->reg_nodbg_end(); It != EndIt; ++It) { - if (!NoReturnInsts.count(&(*It))) { + for (MachineInstr &It : MRI->reg_nodbg_instructions(Reg)) { + if (!NoReturnInsts.count(&It)) { MRI->setPhysRegUsed(Reg); break; } @@ -397,3 +428,4 @@ void VirtRegRewriter::rewrite() { } } } + diff --git a/contrib/llvm/lib/CodeGen/module.modulemap b/contrib/llvm/lib/CodeGen/module.modulemap new file mode 100644 index 0000000..d4f68bc --- /dev/null +++ b/contrib/llvm/lib/CodeGen/module.modulemap @@ -0,0 +1 @@ +module CodeGen { requires cplusplus umbrella "." module * { export * } } |