diff options
author | dim <dim@FreeBSD.org> | 2015-05-27 20:26:41 +0000 |
---|---|---|
committer | dim <dim@FreeBSD.org> | 2015-05-27 20:26:41 +0000 |
commit | 5ef8fd3549d38e883a31881636be3dc2a275de20 (patch) | |
tree | bd13a22d9db57ccf3eddbc07b32c18109521d050 /contrib/llvm/lib/CodeGen | |
parent | 77794ebe2d5718eb502c93ec32f8ccae4d8a0b7b (diff) | |
parent | 782067d0278612ee75d024b9b135c221c327e9e8 (diff) | |
download | FreeBSD-src-5ef8fd3549d38e883a31881636be3dc2a275de20.zip FreeBSD-src-5ef8fd3549d38e883a31881636be3dc2a275de20.tar.gz |
Merge llvm trunk r238337 from ^/vendor/llvm/dist, resolve conflicts, and
preserve our customizations, where necessary.
Diffstat (limited to 'contrib/llvm/lib/CodeGen')
164 files changed, 19370 insertions, 12286 deletions
diff --git a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp index 69c3685..58b87e1 100644 --- a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -296,6 +296,16 @@ void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx, std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& RegRefs = State->GetRegRefs(); + // FIXME: We must leave subregisters of live super registers as live, so that + // we don't clear out the register tracking information for subregisters of + // super registers we're still tracking (and with which we're unioning + // subregister definitions). + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + if (TRI->isSuperRegister(Reg, *AI) && State->IsLive(*AI)) { + DEBUG(if (!header && footer) dbgs() << footer); + return; + } + if (!State->IsLive(Reg)) { KillIndices[Reg] = KillIdx; DefIndices[Reg] = ~0u; diff --git a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h index 12cf95b..18c8bb5 100644 --- a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h +++ b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h @@ -63,11 +63,11 @@ class RegisterClassInfo; /// Map registers to all their references within a live range. std::multimap<unsigned, RegisterReference> RegRefs; - /// The index of the most recent kill (proceding bottom-up), + /// The index of the most recent kill (proceeding bottom-up), /// or ~0u if the register is not live. std::vector<unsigned> KillIndices; - /// The index of the most recent complete def (proceding bottom + /// The index of the most recent complete def (proceeding bottom /// up), or ~0u if the register is live. std::vector<unsigned> DefIndices; @@ -127,7 +127,7 @@ class RegisterClassInfo; AggressiveAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo &RCI, TargetSubtargetInfo::RegClassVector& CriticalPathRCs); - ~AggressiveAntiDepBreaker(); + ~AggressiveAntiDepBreaker() override; /// Initialize anti-dep breaking for a new basic block. void StartBlock(MachineBasicBlock *BB) override; diff --git a/contrib/llvm/lib/CodeGen/Analysis.cpp b/contrib/llvm/lib/CodeGen/Analysis.cpp index 2e8af9e..3224fac 100644 --- a/contrib/llvm/lib/CodeGen/Analysis.cpp +++ b/contrib/llvm/lib/CodeGen/Analysis.cpp @@ -295,8 +295,8 @@ static const Value *getNoopInput(const Value *V, } else if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(V)) { // Value may come from either the aggregate or the scalar ArrayRef<unsigned> InsertLoc = IVI->getIndices(); - if (std::equal(InsertLoc.rbegin(), InsertLoc.rend(), - ValLoc.rbegin())) { + if (ValLoc.size() >= InsertLoc.size() && + std::equal(InsertLoc.begin(), InsertLoc.end(), ValLoc.rbegin())) { // The type being inserted is a nested sub-type of the aggregate; we // have to remove those initial indices to get the location we're // interested in for the operand. @@ -312,8 +312,7 @@ static const Value *getNoopInput(const Value *V, // previous aggregate. Combine the two paths to obtain the true address of // our element. ArrayRef<unsigned> ExtractLoc = EVI->getIndices(); - std::copy(ExtractLoc.rbegin(), ExtractLoc.rend(), - std::back_inserter(ValLoc)); + ValLoc.append(ExtractLoc.rbegin(), ExtractLoc.rend()); NoopInput = Op; } // Terminate if we couldn't find anything to look through. @@ -518,8 +517,9 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, const TargetMachine &TM) { return false; } + const Function *F = ExitBB->getParent(); return returnTypeIsEligibleForTailCall( - ExitBB->getParent(), I, Ret, *TM.getSubtargetImpl()->getTargetLowering()); + F, I, Ret, *TM.getSubtargetImpl(*F)->getTargetLowering()); } bool llvm::returnTypeIsEligibleForTailCall(const Function *F, @@ -600,10 +600,8 @@ bool llvm::returnTypeIsEligibleForTailCall(const Function *F, // The manipulations performed when we're looking through an insertvalue or // an extractvalue would happen at the front of the RetPath list, so since // we have to copy it anyway it's more efficient to create a reversed copy. - using std::copy; - SmallVector<unsigned, 4> TmpRetPath, TmpCallPath; - copy(RetPath.rbegin(), RetPath.rend(), std::back_inserter(TmpRetPath)); - copy(CallPath.rbegin(), CallPath.rend(), std::back_inserter(TmpCallPath)); + SmallVector<unsigned, 4> TmpRetPath(RetPath.rbegin(), RetPath.rend()); + SmallVector<unsigned, 4> TmpCallPath(CallPath.rbegin(), CallPath.rend()); // Finally, we can check whether the value produced by the tail call at this // index is compatible with the value we return. diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp index 66c6c63..4cb460a 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp @@ -36,13 +36,12 @@ #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; -ARMException::ARMException(AsmPrinter *A) - : EHStreamer(A), shouldEmitCFI(false) {} +ARMException::ARMException(AsmPrinter *A) : DwarfCFIExceptionBase(A) {} ARMException::~ARMException() {} ARMTargetStreamer &ARMException::getTargetStreamer() { - MCTargetStreamer &TS = *Asm->OutStreamer.getTargetStreamer(); + MCTargetStreamer &TS = *Asm->OutStreamer->getTargetStreamer(); return static_cast<ARMTargetStreamer &>(TS); } @@ -50,48 +49,35 @@ ARMTargetStreamer &ARMException::getTargetStreamer() { /// content. void ARMException::endModule() { if (shouldEmitCFI) - Asm->OutStreamer.EmitCFISections(false, true); + Asm->OutStreamer->EmitCFISections(false, true); } -/// beginFunction - Gather pre-function exception information. Assumes it's -/// being emitted immediately after the function entry point. void ARMException::beginFunction(const MachineFunction *MF) { if (Asm->MAI->getExceptionHandlingType() == ExceptionHandling::ARM) getTargetStreamer().emitFnStart(); - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin", - Asm->getFunctionNumber())); // See if we need call frame info. AsmPrinter::CFIMoveType MoveType = Asm->needsCFIMoves(); assert(MoveType != AsmPrinter::CFI_M_EH && "non-EH CFI not yet supported in prologue with EHABI lowering"); if (MoveType == AsmPrinter::CFI_M_Debug) { shouldEmitCFI = true; - Asm->OutStreamer.EmitCFIStartProc(false); + Asm->OutStreamer->EmitCFIStartProc(false); } } /// endFunction - Gather and emit post-function exception information. /// -void ARMException::endFunction(const MachineFunction *) { - if (shouldEmitCFI) - Asm->OutStreamer.EmitCFIEndProc(); - - // Map all labels and get rid of any dead landing pads. - MMI->TidyLandingPads(); - +void ARMException::endFunction(const MachineFunction *MF) { ARMTargetStreamer &ATS = getTargetStreamer(); if (!Asm->MF->getFunction()->needsUnwindTableEntry() && MMI->getLandingPads().empty()) ATS.emitCantUnwind(); else { - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end", - Asm->getFunctionNumber())); if (!MMI->getLandingPads().empty()) { // Emit references to personality. - if (const Function * Personality = - MMI->getPersonalities()[MMI->getPersonalityIndex()]) { + if (const Function *Personality = MMI->getPersonality()) { MCSymbol *PerSym = Asm->getSymbol(Personality); - Asm->OutStreamer.EmitSymbolAttribute(PerSym, MCSA_Global); + Asm->OutStreamer->EmitSymbolAttribute(PerSym, MCSA_Global); ATS.emitPersonality(PerSym); } @@ -111,13 +97,13 @@ void ARMException::emitTypeInfos(unsigned TTypeEncoding) { const std::vector<const GlobalValue *> &TypeInfos = MMI->getTypeInfos(); const std::vector<unsigned> &FilterIds = MMI->getFilterIds(); - bool VerboseAsm = Asm->OutStreamer.isVerboseAsm(); + bool VerboseAsm = Asm->OutStreamer->isVerboseAsm(); int Entry = 0; // Emit the Catch TypeInfos. if (VerboseAsm && !TypeInfos.empty()) { - Asm->OutStreamer.AddComment(">> Catch TypeInfos <<"); - Asm->OutStreamer.AddBlankLine(); + Asm->OutStreamer->AddComment(">> Catch TypeInfos <<"); + Asm->OutStreamer->AddBlankLine(); Entry = TypeInfos.size(); } @@ -125,14 +111,14 @@ void ARMException::emitTypeInfos(unsigned TTypeEncoding) { I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) { const GlobalValue *GV = *I; if (VerboseAsm) - Asm->OutStreamer.AddComment("TypeInfo " + Twine(Entry--)); + Asm->OutStreamer->AddComment("TypeInfo " + Twine(Entry--)); Asm->EmitTTypeReference(GV, TTypeEncoding); } // Emit the Exception Specifications. if (VerboseAsm && !FilterIds.empty()) { - Asm->OutStreamer.AddComment(">> Filter TypeInfos <<"); - Asm->OutStreamer.AddBlankLine(); + Asm->OutStreamer->AddComment(">> Filter TypeInfos <<"); + Asm->OutStreamer->AddBlankLine(); Entry = 0; } for (std::vector<unsigned>::const_iterator @@ -141,7 +127,7 @@ void ARMException::emitTypeInfos(unsigned TTypeEncoding) { if (VerboseAsm) { --Entry; if (TypeID != 0) - Asm->OutStreamer.AddComment("FilterInfo " + Twine(Entry)); + Asm->OutStreamer->AddComment("FilterInfo " + Twine(Entry)); } Asm->EmitTTypeReference((TypeID == 0 ? nullptr : TypeInfos[TypeID - 1]), diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp index 8dab5e5..2487aba 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp @@ -24,12 +24,12 @@ unsigned AddressPool::getIndex(const MCSymbol *Sym, bool TLS) { } // Emit addresses into the section given. -void AddressPool::emit(AsmPrinter &Asm, const MCSection *AddrSection) { +void AddressPool::emit(AsmPrinter &Asm, MCSection *AddrSection) { if (Pool.empty()) return; // Start the dwarf addr section. - Asm.OutStreamer.SwitchSection(AddrSection); + Asm.OutStreamer->SwitchSection(AddrSection); // Order the address pool entries by ID SmallVector<const MCExpr *, 64> Entries(Pool.size()); @@ -41,5 +41,5 @@ void AddressPool::emit(AsmPrinter &Asm, const MCSection *AddrSection) { : MCSymbolRefExpr::Create(I.first, Asm.OutContext); for (const MCExpr *Entry : Entries) - Asm.OutStreamer.EmitValue(Entry, Asm.getDataLayout().getPointerSize()); + Asm.OutStreamer->EmitValue(Entry, Asm.getDataLayout().getPointerSize()); } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.h b/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.h index 802e050..211fc98 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.h @@ -40,7 +40,7 @@ public: /// label/symbol. unsigned getIndex(const MCSymbol *Sym, bool TLS = false); - void emit(AsmPrinter &Asm, const MCSection *AddrSection); + void emit(AsmPrinter &Asm, MCSection *AddrSection); bool isEmpty() { return Pool.empty(); } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index bbed808..206be70 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -28,7 +28,7 @@ #include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/Mangler.h" @@ -41,9 +41,11 @@ #include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCValue.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/Timer.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetInstrInfo.h" @@ -75,11 +77,11 @@ static gcp_map_type &getGCMap(void *&P) { /// getGVAlignmentLog2 - Return the alignment to use for the specified global /// value in log2 form. This rounds up to the preferred alignment if possible /// and legal. -static unsigned getGVAlignmentLog2(const GlobalValue *GV, const DataLayout &TD, +static unsigned getGVAlignmentLog2(const GlobalValue *GV, const DataLayout &DL, unsigned InBits = 0) { unsigned NumBits = 0; if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) - NumBits = TD.getPreferredAlignmentLog(GVar); + NumBits = DL.getPreferredAlignmentLog(GVar); // If InBits is specified, round it to it. if (InBits > NumBits) @@ -98,15 +100,19 @@ static unsigned getGVAlignmentLog2(const GlobalValue *GV, const DataLayout &TD, return NumBits; } -AsmPrinter::AsmPrinter(TargetMachine &tm, MCStreamer &Streamer) +AsmPrinter::AsmPrinter(TargetMachine &tm, std::unique_ptr<MCStreamer> Streamer) : MachineFunctionPass(ID), TM(tm), MAI(tm.getMCAsmInfo()), - MII(tm.getSubtargetImpl()->getInstrInfo()), - OutContext(Streamer.getContext()), OutStreamer(Streamer), LastMI(nullptr), - LastFn(0), Counter(~0U), SetCounter(0) { - DD = nullptr; MMI = nullptr; LI = nullptr; MF = nullptr; - CurrentFnSym = CurrentFnSymForSize = nullptr; + OutContext(Streamer->getContext()), OutStreamer(std::move(Streamer)), + LastMI(nullptr), LastFn(0), Counter(~0U) { + DD = nullptr; + MMI = nullptr; + LI = nullptr; + MF = nullptr; + CurExceptionSym = CurrentFnSym = CurrentFnSymForSize = nullptr; + CurrentFnBegin = nullptr; + CurrentFnEnd = nullptr; GCMetadataPrinters = nullptr; - VerboseAsm = Streamer.isVerboseAsm(); + VerboseAsm = OutStreamer->isVerboseAsm(); } AsmPrinter::~AsmPrinter() { @@ -118,8 +124,6 @@ AsmPrinter::~AsmPrinter() { delete &GCMap; GCMetadataPrinters = nullptr; } - - delete &OutStreamer; } /// getFunctionNumber - Return a unique ID for the current function. @@ -129,16 +133,17 @@ unsigned AsmPrinter::getFunctionNumber() const { } const TargetLoweringObjectFile &AsmPrinter::getObjFileLowering() const { - return TM.getSubtargetImpl()->getTargetLowering()->getObjFileLowering(); + return *TM.getObjFileLowering(); } /// getDataLayout - Return information about data layout. const DataLayout &AsmPrinter::getDataLayout() const { - return *TM.getSubtargetImpl()->getDataLayout(); + return *TM.getDataLayout(); } const MCSubtargetInfo &AsmPrinter::getSubtargetInfo() const { - return TM.getSubtarget<MCSubtargetInfo>(); + assert(MF && "getSubtargetInfo requires a valid MachineFunction!"); + return MF->getSubtarget<MCSubtargetInfo>(); } void AsmPrinter::EmitToStreamer(MCStreamer &S, const MCInst &Inst) { @@ -151,7 +156,7 @@ StringRef AsmPrinter::getTargetTriple() const { /// getCurrentSection() - Return the current section we are emitting to. const MCSection *AsmPrinter::getCurrentSection() const { - return OutStreamer.getCurrentSection().first; + return OutStreamer->getCurrentSection().first; } @@ -173,9 +178,9 @@ bool AsmPrinter::doInitialization(Module &M) { const_cast<TargetLoweringObjectFile&>(getObjFileLowering()) .Initialize(OutContext, TM); - OutStreamer.InitSections(false); + OutStreamer->InitSections(false); - Mang = new Mangler(TM.getSubtargetImpl()->getDataLayout()); + Mang = new Mangler(TM.getDataLayout()); // Emit the version-min deplyment target directive if needed. // @@ -191,9 +196,9 @@ bool AsmPrinter::doInitialization(Module &M) { TT.getOSVersion(Major, Minor, Update); // If there is a version specified, Major will be non-zero. if (Major) - OutStreamer.EmitVersionMin((TT.isMacOSX() ? - MCVM_OSXVersionMin : MCVM_IOSVersionMin), - Major, Minor, Update); + OutStreamer->EmitVersionMin((TT.isMacOSX() ? + MCVM_OSXVersionMin : MCVM_IOSVersionMin), + Major, Minor, Update); } // Allow the target to emit any magic that it wants at the start of the file. @@ -203,7 +208,7 @@ bool AsmPrinter::doInitialization(Module &M) { // don't, this at least helps the user find where a global came from. if (MAI->hasSingleParameterDotFile()) { // .file "foo.c" - OutStreamer.EmitFileDirective(M.getModuleIdentifier()); + OutStreamer->EmitFileDirective(M.getModuleIdentifier()); } GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>(); @@ -214,11 +219,15 @@ bool AsmPrinter::doInitialization(Module &M) { // Emit module-level inline asm if it exists. if (!M.getModuleInlineAsm().empty()) { - OutStreamer.AddComment("Start of file scope inline assembly"); - OutStreamer.AddBlankLine(); - EmitInlineAsm(M.getModuleInlineAsm()+"\n"); - OutStreamer.AddComment("End of file scope inline assembly"); - OutStreamer.AddBlankLine(); + // We're at the module level. Construct MCSubtarget from the default CPU + // and target triple. + std::unique_ptr<MCSubtargetInfo> STI(TM.getTarget().createMCSubtargetInfo( + TM.getTargetTriple(), TM.getTargetCPU(), TM.getTargetFeatureString())); + OutStreamer->AddComment("Start of file scope inline assembly"); + OutStreamer->AddBlankLine(); + EmitInlineAsm(M.getModuleInlineAsm()+"\n", *STI, TM.Options.MCOptions); + OutStreamer->AddComment("End of file scope inline assembly"); + OutStreamer->AddBlankLine(); } if (MAI->doesSupportDebugInformation()) { @@ -254,10 +263,11 @@ bool AsmPrinter::doInitialization(Module &M) { case ExceptionHandling::ARM: ES = new ARMException(this); break; - case ExceptionHandling::ItaniumWinEH: - case ExceptionHandling::MSVC: + case ExceptionHandling::WinEH: switch (MAI->getWinEHEncodingType()) { default: llvm_unreachable("unsupported unwinding information encoding"); + case WinEH::EncodingType::Invalid: + break; case WinEH::EncodingType::Itanium: ES = new Win64Exception(this); break; @@ -286,20 +296,20 @@ void AsmPrinter::EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const { case GlobalValue::WeakODRLinkage: if (MAI->hasWeakDefDirective()) { // .globl _foo - OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global); + OutStreamer->EmitSymbolAttribute(GVSym, MCSA_Global); if (!canBeHidden(GV, *MAI)) // .weak_definition _foo - OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefinition); + OutStreamer->EmitSymbolAttribute(GVSym, MCSA_WeakDefinition); else - OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefAutoPrivate); + OutStreamer->EmitSymbolAttribute(GVSym, MCSA_WeakDefAutoPrivate); } else if (MAI->hasLinkOnceDirective()) { // .globl _foo - OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global); + OutStreamer->EmitSymbolAttribute(GVSym, MCSA_Global); //NOTE: linkonce is handled by the section the symbol was assigned to. } else { // .weak _foo - OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Weak); + OutStreamer->EmitSymbolAttribute(GVSym, MCSA_Weak); } return; case GlobalValue::AppendingLinkage: @@ -308,7 +318,7 @@ void AsmPrinter::EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const { case GlobalValue::ExternalLinkage: // If external or appending, declare as a global symbol. // .globl _foo - OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global); + OutStreamer->EmitSymbolAttribute(GVSym, MCSA_Global); return; case GlobalValue::PrivateLinkage: case GlobalValue::InternalLinkage: @@ -337,10 +347,15 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { if (EmitSpecialLLVMGlobal(GV)) return; + // Skip the emission of global equivalents. The symbol can be emitted later + // on by emitGlobalGOTEquivs in case it turns out to be needed. + if (GlobalGOTEquivs.count(getSymbol(GV))) + return; + if (isVerbose()) { - GV->printAsOperand(OutStreamer.GetCommentOS(), + GV->printAsOperand(OutStreamer->GetCommentOS(), /*PrintType=*/false, GV->getParent()); - OutStreamer.GetCommentOS() << '\n'; + OutStreamer->GetCommentOS() << '\n'; } } @@ -356,11 +371,11 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { "' is already defined"); if (MAI->hasDotTypeDotSizeDirective()) - OutStreamer.EmitSymbolAttribute(GVSym, MCSA_ELF_TypeObject); + OutStreamer->EmitSymbolAttribute(GVSym, MCSA_ELF_TypeObject); SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM); - const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout(); + const DataLayout *DL = TM.getDataLayout(); uint64_t Size = DL->getTypeAllocSize(GV->getType()->getElementType()); // If the alignment is specified, we *must* obey it. Overaligning a global @@ -384,16 +399,16 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { Align = 0; // .comm _foo, 42, 4 - OutStreamer.EmitCommonSymbol(GVSym, Size, Align); + OutStreamer->EmitCommonSymbol(GVSym, Size, Align); return; } // Handle local BSS symbols. if (MAI->hasMachoZeroFillDirective()) { - const MCSection *TheSection = - getObjFileLowering().SectionForGlobal(GV, GVKind, *Mang, TM); + MCSection *TheSection = + getObjFileLowering().SectionForGlobal(GV, GVKind, *Mang, TM); // .zerofill __DATA, __bss, _foo, 400, 5 - OutStreamer.EmitZerofill(TheSection, GVSym, Size, Align); + OutStreamer->EmitZerofill(TheSection, GVSym, Size, Align); return; } @@ -405,7 +420,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { // Prefer to simply fall back to .local / .comm in this case. if (MAI->getLCOMMDirectiveAlignmentType() != LCOMM::NoAlignment) { // .lcomm _foo, 42 - OutStreamer.EmitLocalCommonSymbol(GVSym, Size, Align); + OutStreamer->EmitLocalCommonSymbol(GVSym, Size, Align); return; } @@ -413,14 +428,14 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { Align = 0; // .local _foo - OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Local); + OutStreamer->EmitSymbolAttribute(GVSym, MCSA_Local); // .comm _foo, 42, 4 - OutStreamer.EmitCommonSymbol(GVSym, Size, Align); + OutStreamer->EmitCommonSymbol(GVSym, Size, Align); return; } - const MCSection *TheSection = - getObjFileLowering().SectionForGlobal(GV, GVKind, *Mang, TM); + MCSection *TheSection = + getObjFileLowering().SectionForGlobal(GV, GVKind, *Mang, TM); // Handle the zerofill directive on darwin, which is a special form of BSS // emission. @@ -428,9 +443,9 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { if (Size == 0) Size = 1; // zerofill of 0 bytes is undefined. // .globl _foo - OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global); + OutStreamer->EmitSymbolAttribute(GVSym, MCSA_Global); // .zerofill __DATA, __common, _foo, 400, 5 - OutStreamer.EmitZerofill(TheSection, GVSym, Size, 1 << AlignLog); + OutStreamer->EmitZerofill(TheSection, GVSym, Size, 1 << AlignLog); return; } @@ -447,59 +462,58 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { if (GVKind.isThreadLocal() && MAI->hasMachoTBSSDirective()) { // Emit the .tbss symbol MCSymbol *MangSym = - OutContext.GetOrCreateSymbol(GVSym->getName() + Twine("$tlv$init")); + OutContext.getOrCreateSymbol(GVSym->getName() + Twine("$tlv$init")); if (GVKind.isThreadBSS()) { TheSection = getObjFileLowering().getTLSBSSSection(); - OutStreamer.EmitTBSSSymbol(TheSection, MangSym, Size, 1 << AlignLog); + OutStreamer->EmitTBSSSymbol(TheSection, MangSym, Size, 1 << AlignLog); } else if (GVKind.isThreadData()) { - OutStreamer.SwitchSection(TheSection); + OutStreamer->SwitchSection(TheSection); EmitAlignment(AlignLog, GV); - OutStreamer.EmitLabel(MangSym); + OutStreamer->EmitLabel(MangSym); EmitGlobalConstant(GV->getInitializer()); } - OutStreamer.AddBlankLine(); + OutStreamer->AddBlankLine(); // Emit the variable struct for the runtime. - const MCSection *TLVSect - = getObjFileLowering().getTLSExtraDataSection(); + MCSection *TLVSect = getObjFileLowering().getTLSExtraDataSection(); - OutStreamer.SwitchSection(TLVSect); + OutStreamer->SwitchSection(TLVSect); // Emit the linkage here. EmitLinkage(GV, GVSym); - OutStreamer.EmitLabel(GVSym); + OutStreamer->EmitLabel(GVSym); // Three pointers in size: // - __tlv_bootstrap - used to make sure support exists // - spare pointer, used when mapped by the runtime // - pointer to mangled symbol above with initializer unsigned PtrSize = DL->getPointerTypeSize(GV->getType()); - OutStreamer.EmitSymbolValue(GetExternalSymbolSymbol("_tlv_bootstrap"), + OutStreamer->EmitSymbolValue(GetExternalSymbolSymbol("_tlv_bootstrap"), PtrSize); - OutStreamer.EmitIntValue(0, PtrSize); - OutStreamer.EmitSymbolValue(MangSym, PtrSize); + OutStreamer->EmitIntValue(0, PtrSize); + OutStreamer->EmitSymbolValue(MangSym, PtrSize); - OutStreamer.AddBlankLine(); + OutStreamer->AddBlankLine(); return; } - OutStreamer.SwitchSection(TheSection); + OutStreamer->SwitchSection(TheSection); EmitLinkage(GV, GVSym); EmitAlignment(AlignLog, GV); - OutStreamer.EmitLabel(GVSym); + OutStreamer->EmitLabel(GVSym); EmitGlobalConstant(GV->getInitializer()); if (MAI->hasDotTypeDotSizeDirective()) // .size foo, 42 - OutStreamer.EmitELFSize(GVSym, MCConstantExpr::Create(Size, OutContext)); + OutStreamer->EmitELFSize(GVSym, MCConstantExpr::Create(Size, OutContext)); - OutStreamer.AddBlankLine(); + OutStreamer->AddBlankLine(); } /// EmitFunctionHeader - This method emits the header for the current @@ -511,20 +525,21 @@ void AsmPrinter::EmitFunctionHeader() { // Print the 'header' of function. const Function *F = MF->getFunction(); - OutStreamer.SwitchSection( + OutStreamer->SwitchSection( getObjFileLowering().SectionForGlobal(F, *Mang, TM)); EmitVisibility(CurrentFnSym, F->getVisibility()); EmitLinkage(F, CurrentFnSym); - EmitAlignment(MF->getAlignment(), F); + if (MAI->hasFunctionAlignment()) + EmitAlignment(MF->getAlignment(), F); if (MAI->hasDotTypeDotSizeDirective()) - OutStreamer.EmitSymbolAttribute(CurrentFnSym, MCSA_ELF_TypeFunction); + OutStreamer->EmitSymbolAttribute(CurrentFnSym, MCSA_ELF_TypeFunction); if (isVerbose()) { - F->printAsOperand(OutStreamer.GetCommentOS(), + F->printAsOperand(OutStreamer->GetCommentOS(), /*PrintType=*/false, F->getParent()); - OutStreamer.GetCommentOS() << '\n'; + OutStreamer->GetCommentOS() << '\n'; } // Emit the prefix data. @@ -541,8 +556,19 @@ void AsmPrinter::EmitFunctionHeader() { std::vector<MCSymbol*> DeadBlockSyms; MMI->takeDeletedSymbolsForFunction(F, DeadBlockSyms); for (unsigned i = 0, e = DeadBlockSyms.size(); i != e; ++i) { - OutStreamer.AddComment("Address taken block that was later removed"); - OutStreamer.EmitLabel(DeadBlockSyms[i]); + OutStreamer->AddComment("Address taken block that was later removed"); + OutStreamer->EmitLabel(DeadBlockSyms[i]); + } + + if (CurrentFnBegin) { + if (MAI->useAssignmentForEHBegin()) { + MCSymbol *CurPos = OutContext.createTempSymbol(); + OutStreamer->EmitLabel(CurPos); + OutStreamer->EmitAssignment(CurrentFnBegin, + MCSymbolRefExpr::Create(CurPos, OutContext)); + } else { + OutStreamer->EmitLabel(CurrentFnBegin); + } } // Emit pre-function debug and/or EH information. @@ -570,13 +596,13 @@ void AsmPrinter::EmitFunctionEntryLabel() { report_fatal_error("'" + Twine(CurrentFnSym->getName()) + "' label emitted multiple times to assembly file"); - return OutStreamer.EmitLabel(CurrentFnSym); + return OutStreamer->EmitLabel(CurrentFnSym); } /// emitComments - Pretty-print comments for instructions. static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) { const MachineFunction *MF = MI.getParent()->getParent(); - const TargetMachine &TM = MF->getTarget(); + const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); // Check for spills and reloads int FI; @@ -586,24 +612,20 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) { // We assume a single instruction only has a spill or reload, not // both. const MachineMemOperand *MMO; - if (TM.getSubtargetImpl()->getInstrInfo()->isLoadFromStackSlotPostFE(&MI, - FI)) { + if (TII->isLoadFromStackSlotPostFE(&MI, FI)) { if (FrameInfo->isSpillSlotObjectIndex(FI)) { MMO = *MI.memoperands_begin(); CommentOS << MMO->getSize() << "-byte Reload\n"; } - } else if (TM.getSubtargetImpl()->getInstrInfo()->hasLoadFromStackSlot( - &MI, MMO, FI)) { + } else if (TII->hasLoadFromStackSlot(&MI, MMO, FI)) { if (FrameInfo->isSpillSlotObjectIndex(FI)) CommentOS << MMO->getSize() << "-byte Folded Reload\n"; - } else if (TM.getSubtargetImpl()->getInstrInfo()->isStoreToStackSlotPostFE( - &MI, FI)) { + } else if (TII->isStoreToStackSlotPostFE(&MI, FI)) { if (FrameInfo->isSpillSlotObjectIndex(FI)) { MMO = *MI.memoperands_begin(); CommentOS << MMO->getSize() << "-byte Spill\n"; } - } else if (TM.getSubtargetImpl()->getInstrInfo()->hasStoreToStackSlot( - &MI, MMO, FI)) { + } else if (TII->hasStoreToStackSlot(&MI, MMO, FI)) { if (FrameInfo->isSpillSlotObjectIndex(FI)) CommentOS << MMO->getSize() << "-byte Folded Spill\n"; } @@ -617,10 +639,9 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) { /// that is an implicit def. void AsmPrinter::emitImplicitDef(const MachineInstr *MI) const { unsigned RegNo = MI->getOperand(0).getReg(); - OutStreamer.AddComment( - Twine("implicit-def: ") + - TM.getSubtargetImpl()->getRegisterInfo()->getName(RegNo)); - OutStreamer.AddBlankLine(); + OutStreamer->AddComment(Twine("implicit-def: ") + + MMI->getContext().getRegisterInfo()->getName(RegNo)); + OutStreamer->AddBlankLine(); } static void emitKill(const MachineInstr *MI, AsmPrinter &AP) { @@ -629,11 +650,11 @@ static void emitKill(const MachineInstr *MI, AsmPrinter &AP) { const MachineOperand &Op = MI->getOperand(i); assert(Op.isReg() && "KILL instruction must have only register operands"); Str += ' '; - Str += AP.TM.getSubtargetImpl()->getRegisterInfo()->getName(Op.getReg()); + Str += AP.MMI->getContext().getRegisterInfo()->getName(Op.getReg()); Str += (Op.isDef() ? "<def>" : "<kill>"); } - AP.OutStreamer.AddComment(Str); - AP.OutStreamer.AddBlankLine(); + AP.OutStreamer->AddComment(Str); + AP.OutStreamer->AddBlankLine(); } /// emitDebugValueComment - This method handles the target-independent form @@ -648,18 +669,18 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { raw_svector_ostream OS(Str); OS << "DEBUG_VALUE: "; - DIVariable V = MI->getDebugVariable(); - if (V.getContext().isSubprogram()) { - StringRef Name = DISubprogram(V.getContext()).getDisplayName(); + const DILocalVariable *V = MI->getDebugVariable(); + if (auto *SP = dyn_cast<DISubprogram>(V->getScope())) { + StringRef Name = SP->getDisplayName(); if (!Name.empty()) OS << Name << ":"; } - OS << V.getName(); + OS << V->getName(); - DIExpression Expr = MI->getDebugExpression(); - if (Expr.isVariablePiece()) - OS << " [piece offset=" << Expr.getPieceOffset() - << " size=" << Expr.getPieceSize() << "]"; + const DIExpression *Expr = MI->getDebugExpression(); + if (Expr->isBitPiece()) + OS << " [bit_piece offset=" << Expr->getBitPieceOffset() + << " size=" << Expr->getBitPieceSize() << "]"; OS << " <- "; // The second operand is only an offset if it's an immediate. @@ -691,8 +712,7 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { Reg = MI->getOperand(0).getReg(); } else { assert(MI->getOperand(0).isFI() && "Unknown operand type"); - const TargetFrameLowering *TFI = - AP.TM.getSubtargetImpl()->getFrameLowering(); + const TargetFrameLowering *TFI = AP.MF->getSubtarget().getFrameLowering(); Offset += TFI->getFrameIndexReference(*AP.MF, MI->getOperand(0).getIndex(), Reg); Deref = true; @@ -701,19 +721,19 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { // Suppress offset, it is not meaningful here. OS << "undef"; // NOTE: Want this comment at start of line, don't emit with AddComment. - AP.OutStreamer.emitRawComment(OS.str()); + AP.OutStreamer->emitRawComment(OS.str()); return true; } if (Deref) OS << '['; - OS << AP.TM.getSubtargetImpl()->getRegisterInfo()->getName(Reg); + OS << AP.MMI->getContext().getRegisterInfo()->getName(Reg); } if (Deref) OS << '+' << Offset << ']'; // NOTE: Want this comment at start of line, don't emit with AddComment. - AP.OutStreamer.emitRawComment(OS.str()); + AP.OutStreamer->emitRawComment(OS.str()); return true; } @@ -754,13 +774,15 @@ void AsmPrinter::emitFrameAlloc(const MachineInstr &MI) { int FrameOffset = MI.getOperand(1).getImm(); // Emit a symbol assignment. - OutStreamer.EmitAssignment(FrameAllocSym, + OutStreamer->EmitAssignment(FrameAllocSym, MCConstantExpr::Create(FrameOffset, OutContext)); } /// EmitFunctionBody - This method emits the body and trailer for a /// function. void AsmPrinter::EmitFunctionBody() { + EmitFunctionHeader(); + // Emit target-specific gunk before the function body. EmitFunctionBodyStart(); @@ -789,7 +811,7 @@ void AsmPrinter::EmitFunctionBody() { } if (isVerbose()) - emitComments(MI, OutStreamer.GetCommentOS()); + emitComments(MI, OutStreamer->GetCommentOS()); switch (MI.getOpcode()) { case TargetOpcode::CFI_INSTRUCTION: @@ -802,7 +824,7 @@ void AsmPrinter::EmitFunctionBody() { case TargetOpcode::EH_LABEL: case TargetOpcode::GC_LABEL: - OutStreamer.EmitLabel(MI.getOperand(0).getMCSymbol()); + OutStreamer->EmitLabel(MI.getOperand(0).getMCSymbol()); break; case TargetOpcode::INLINEASM: EmitInlineAsm(&MI); @@ -841,13 +863,13 @@ void AsmPrinter::EmitFunctionBody() { // labels from collapsing together. Just emit a noop. if ((MAI->hasSubsectionsViaSymbols() && !HasAnyRealCode)) { MCInst Noop; - TM.getSubtargetImpl()->getInstrInfo()->getNoopForMachoTarget(Noop); - OutStreamer.AddComment("avoids zero-length function"); + MF->getSubtarget().getInstrInfo()->getNoopForMachoTarget(Noop); + OutStreamer->AddComment("avoids zero-length function"); // Targets can opt-out of emitting the noop here by leaving the opcode // unspecified. if (Noop.getOpcode()) - OutStreamer.EmitInstruction(Noop, getSubtargetInfo()); + OutStreamer->EmitInstruction(Noop, getSubtargetInfo()); } const Function *F = MF->getFunction(); @@ -857,29 +879,41 @@ void AsmPrinter::EmitFunctionBody() { MCSymbol *Sym = GetBlockAddressSymbol(&BB); if (Sym->isDefined()) continue; - OutStreamer.AddComment("Address of block that was removed by CodeGen"); - OutStreamer.EmitLabel(Sym); + OutStreamer->AddComment("Address of block that was removed by CodeGen"); + OutStreamer->EmitLabel(Sym); } // Emit target-specific gunk after the function body. EmitFunctionBodyEnd(); + if (!MMI->getLandingPads().empty() || MMI->hasDebugInfo() || + MAI->hasDotTypeDotSizeDirective()) { + // Create a symbol for the end of function. + CurrentFnEnd = createTempSymbol("func_end"); + OutStreamer->EmitLabel(CurrentFnEnd); + } + // If the target wants a .size directive for the size of the function, emit // it. if (MAI->hasDotTypeDotSizeDirective()) { - // Create a symbol for the end of function, so we can get the size as - // difference between the function label and the temp label. - MCSymbol *FnEndLabel = OutContext.CreateTempSymbol(); - OutStreamer.EmitLabel(FnEndLabel); - + // We can get the size as difference between the function label and the + // temp label. const MCExpr *SizeExp = - MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(FnEndLabel, OutContext), + MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(CurrentFnEnd, OutContext), MCSymbolRefExpr::Create(CurrentFnSymForSize, OutContext), OutContext); - OutStreamer.EmitELFSize(CurrentFnSym, SizeExp); + OutStreamer->EmitELFSize(CurrentFnSym, SizeExp); } + for (const HandlerInfo &HI : Handlers) { + NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, TimePassesIsEnabled); + HI.Handler->markFunctionEnd(); + } + + // Print out jump tables referenced by the function. + EmitJumpTableInfo(); + // Emit post-function debug and/or EH information. for (const HandlerInfo &HI : Handlers) { NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, TimePassesIsEnabled); @@ -887,17 +921,106 @@ void AsmPrinter::EmitFunctionBody() { } MMI->EndFunction(); - // Print out jump tables referenced by the function. - EmitJumpTableInfo(); + OutStreamer->AddBlankLine(); +} + +/// \brief Compute the number of Global Variables that uses a Constant. +static unsigned getNumGlobalVariableUses(const Constant *C) { + if (!C) + return 0; + + if (isa<GlobalVariable>(C)) + return 1; + + unsigned NumUses = 0; + for (auto *CU : C->users()) + NumUses += getNumGlobalVariableUses(dyn_cast<Constant>(CU)); - OutStreamer.AddBlankLine(); + return NumUses; +} + +/// \brief Only consider global GOT equivalents if at least one user is a +/// cstexpr inside an initializer of another global variables. Also, don't +/// handle cstexpr inside instructions. During global variable emission, +/// candidates are skipped and are emitted later in case at least one cstexpr +/// isn't replaced by a PC relative GOT entry access. +static bool isGOTEquivalentCandidate(const GlobalVariable *GV, + unsigned &NumGOTEquivUsers) { + // Global GOT equivalents are unnamed private globals with a constant + // pointer initializer to another global symbol. They must point to a + // GlobalVariable or Function, i.e., as GlobalValue. + if (!GV->hasUnnamedAddr() || !GV->hasInitializer() || !GV->isConstant() || + !GV->isDiscardableIfUnused() || !dyn_cast<GlobalValue>(GV->getOperand(0))) + return false; + + // To be a got equivalent, at least one of its users need to be a constant + // expression used by another global variable. + for (auto *U : GV->users()) + NumGOTEquivUsers += getNumGlobalVariableUses(dyn_cast<Constant>(U)); + + return NumGOTEquivUsers > 0; +} + +/// \brief Unnamed constant global variables solely contaning a pointer to +/// another globals variable is equivalent to a GOT table entry; it contains the +/// the address of another symbol. Optimize it and replace accesses to these +/// "GOT equivalents" by using the GOT entry for the final global instead. +/// Compute GOT equivalent candidates among all global variables to avoid +/// emitting them if possible later on, after it use is replaced by a GOT entry +/// access. +void AsmPrinter::computeGlobalGOTEquivs(Module &M) { + if (!getObjFileLowering().supportIndirectSymViaGOTPCRel()) + return; + + for (const auto &G : M.globals()) { + unsigned NumGOTEquivUsers = 0; + if (!isGOTEquivalentCandidate(&G, NumGOTEquivUsers)) + continue; + + const MCSymbol *GOTEquivSym = getSymbol(&G); + GlobalGOTEquivs[GOTEquivSym] = std::make_pair(&G, NumGOTEquivUsers); + } +} + +/// \brief Constant expressions using GOT equivalent globals may not be eligible +/// for PC relative GOT entry conversion, in such cases we need to emit such +/// globals we previously omitted in EmitGlobalVariable. +void AsmPrinter::emitGlobalGOTEquivs() { + if (!getObjFileLowering().supportIndirectSymViaGOTPCRel()) + return; + + SmallVector<const GlobalVariable *, 8> FailedCandidates; + for (auto &I : GlobalGOTEquivs) { + const GlobalVariable *GV = I.second.first; + unsigned Cnt = I.second.second; + if (Cnt) + FailedCandidates.push_back(GV); + } + GlobalGOTEquivs.clear(); + + for (auto *GV : FailedCandidates) + EmitGlobalVariable(GV); } bool AsmPrinter::doFinalization(Module &M) { + // Set the MachineFunction to nullptr so that we can catch attempted + // accesses to MF specific features at the module level and so that + // we can conditionalize accesses based on whether or not it is nullptr. + MF = nullptr; + + // Gather all GOT equivalent globals in the module. We really need two + // passes over the globals: one to compute and another to avoid its emission + // in EmitGlobalVariable, otherwise we would not be able to handle cases + // where the got equivalent shows up before its use. + computeGlobalGOTEquivs(M); + // Emit global variables. for (const auto &G : M.globals()) EmitGlobalVariable(&G); + // Emit remaining GOT equivalent globals. + emitGlobalGOTEquivs(); + // Emit visibility info for declarations for (const Function &F : M) { if (!F.isDeclaration()) @@ -910,63 +1033,34 @@ bool AsmPrinter::doFinalization(Module &M) { EmitVisibility(Name, V, false); } - // Get information about jump-instruction tables to print. - JumpInstrTableInfo *JITI = getAnalysisIfAvailable<JumpInstrTableInfo>(); - - if (JITI && !JITI->getTables().empty()) { - unsigned Arch = Triple(getTargetTriple()).getArch(); - bool IsThumb = (Arch == Triple::thumb || Arch == Triple::thumbeb); - MCInst TrapInst; - TM.getSubtargetImpl()->getInstrInfo()->getTrap(TrapInst); - unsigned LogAlignment = llvm::Log2_64(JITI->entryByteAlignment()); - - // Emit the right section for these functions. - OutStreamer.SwitchSection(OutContext.getObjectFileInfo()->getTextSection()); - for (const auto &KV : JITI->getTables()) { - uint64_t Count = 0; - for (const auto &FunPair : KV.second) { - // Emit the function labels to make this be a function entry point. - MCSymbol *FunSym = - OutContext.GetOrCreateSymbol(FunPair.second->getName()); - EmitAlignment(LogAlignment); - if (IsThumb) - OutStreamer.EmitThumbFunc(FunSym); - if (MAI->hasDotTypeDotSizeDirective()) - OutStreamer.EmitSymbolAttribute(FunSym, MCSA_ELF_TypeFunction); - OutStreamer.EmitLabel(FunSym); - - // Emit the jump instruction to transfer control to the original - // function. - MCInst JumpToFun; - MCSymbol *TargetSymbol = - OutContext.GetOrCreateSymbol(FunPair.first->getName()); - const MCSymbolRefExpr *TargetSymRef = - MCSymbolRefExpr::Create(TargetSymbol, MCSymbolRefExpr::VK_PLT, - OutContext); - TM.getSubtargetImpl()->getInstrInfo()->getUnconditionalBranch( - JumpToFun, TargetSymRef); - OutStreamer.EmitInstruction(JumpToFun, getSubtargetInfo()); - ++Count; - } - - // Emit enough padding instructions to fill up to the next power of two. - uint64_t Remaining = NextPowerOf2(Count) - Count; - for (uint64_t C = 0; C < Remaining; ++C) { - EmitAlignment(LogAlignment); - OutStreamer.EmitInstruction(TrapInst, getSubtargetInfo()); - } - - } - } + const TargetLoweringObjectFile &TLOF = getObjFileLowering(); // Emit module flags. SmallVector<Module::ModuleFlagEntry, 8> ModuleFlags; M.getModuleFlagsMetadata(ModuleFlags); if (!ModuleFlags.empty()) - getObjFileLowering().emitModuleFlags(OutStreamer, ModuleFlags, *Mang, TM); + TLOF.emitModuleFlags(*OutStreamer, ModuleFlags, *Mang, TM); + + Triple TT(TM.getTargetTriple()); + if (TT.isOSBinFormatELF()) { + MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>(); + + // Output stubs for external and common global variables. + MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList(); + if (!Stubs.empty()) { + OutStreamer->SwitchSection(TLOF.getDataRelSection()); + const DataLayout *DL = TM.getDataLayout(); + + for (const auto &Stub : Stubs) { + OutStreamer->EmitLabel(Stub.first); + OutStreamer->EmitSymbolValue(Stub.second.getPointer(), + DL->getPointerSize()); + } + } + } // Make sure we wrote out everything we need. - OutStreamer.Flush(); + OutStreamer->Flush(); // Finalize debug and EH information. for (const HandlerInfo &HI : Handlers) { @@ -989,31 +1083,31 @@ bool AsmPrinter::doFinalization(Module &M) { for (const auto &G : M.globals()) { if (!G.hasExternalWeakLinkage()) continue; - OutStreamer.EmitSymbolAttribute(getSymbol(&G), MCSA_WeakReference); + OutStreamer->EmitSymbolAttribute(getSymbol(&G), MCSA_WeakReference); } for (const auto &F : M) { if (!F.hasExternalWeakLinkage()) continue; - OutStreamer.EmitSymbolAttribute(getSymbol(&F), MCSA_WeakReference); + OutStreamer->EmitSymbolAttribute(getSymbol(&F), MCSA_WeakReference); } } - OutStreamer.AddBlankLine(); + OutStreamer->AddBlankLine(); for (const auto &Alias : M.aliases()) { MCSymbol *Name = getSymbol(&Alias); if (Alias.hasExternalLinkage() || !MAI->getWeakRefDirective()) - OutStreamer.EmitSymbolAttribute(Name, MCSA_Global); + OutStreamer->EmitSymbolAttribute(Name, MCSA_Global); else if (Alias.hasWeakLinkage() || Alias.hasLinkOnceLinkage()) - OutStreamer.EmitSymbolAttribute(Name, MCSA_WeakReference); + OutStreamer->EmitSymbolAttribute(Name, MCSA_WeakReference); else assert(Alias.hasLocalLinkage() && "Invalid alias linkage"); EmitVisibility(Name, Alias.getVisibility()); // Emit the directives as assignments aka .set: - OutStreamer.EmitAssignment(Name, lowerConstant(Alias.getAliasee())); + OutStreamer->EmitAssignment(Name, lowerConstant(Alias.getAliasee())); } GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>(); @@ -1027,27 +1121,26 @@ bool AsmPrinter::doFinalization(Module &M) { // Emit __morestack address if needed for indirect calls. if (MMI->usesMorestackAddr()) { - const MCSection *ReadOnlySection = + MCSection *ReadOnlySection = getObjFileLowering().getSectionForConstant(SectionKind::getReadOnly(), /*C=*/nullptr); - OutStreamer.SwitchSection(ReadOnlySection); + OutStreamer->SwitchSection(ReadOnlySection); MCSymbol *AddrSymbol = - OutContext.GetOrCreateSymbol(StringRef("__morestack_addr")); - OutStreamer.EmitLabel(AddrSymbol); + OutContext.getOrCreateSymbol(StringRef("__morestack_addr")); + OutStreamer->EmitLabel(AddrSymbol); - const DataLayout &DL = *TM.getSubtargetImpl()->getDataLayout(); - unsigned PtrSize = DL.getPointerSize(0); - OutStreamer.EmitSymbolValue(GetExternalSymbolSymbol("__morestack"), - PtrSize); + unsigned PtrSize = TM.getDataLayout()->getPointerSize(0); + OutStreamer->EmitSymbolValue(GetExternalSymbolSymbol("__morestack"), + PtrSize); } // If we don't have any trampolines, then we don't require stack memory // to be executable. Some targets have a directive to declare this. Function *InitTrampolineIntrinsic = M.getFunction("llvm.init.trampoline"); if (!InitTrampolineIntrinsic || InitTrampolineIntrinsic->use_empty()) - if (const MCSection *S = MAI->getNonexecutableStackSection(OutContext)) - OutStreamer.SwitchSection(S); + if (MCSection *S = MAI->getNonexecutableStackSection(OutContext)) + OutStreamer->SwitchSection(S); // Allow the target to emit any magic that it wants at the end of the file, // after everything else has gone out. @@ -1056,29 +1149,44 @@ bool AsmPrinter::doFinalization(Module &M) { delete Mang; Mang = nullptr; MMI = nullptr; - OutStreamer.Finish(); - OutStreamer.reset(); + OutStreamer->Finish(); + OutStreamer->reset(); return false; } +MCSymbol *AsmPrinter::getCurExceptionSym() { + if (!CurExceptionSym) + CurExceptionSym = createTempSymbol("exception"); + return CurExceptionSym; +} + void AsmPrinter::SetupMachineFunction(MachineFunction &MF) { this->MF = &MF; // Get the function symbol. CurrentFnSym = getSymbol(MF.getFunction()); CurrentFnSymForSize = CurrentFnSym; + CurrentFnBegin = nullptr; + CurExceptionSym = nullptr; + bool NeedsLocalForSize = MAI->needsLocalForSize(); + if (!MMI->getLandingPads().empty() || MMI->hasDebugInfo() || + NeedsLocalForSize) { + CurrentFnBegin = createTempSymbol("func_begin"); + if (NeedsLocalForSize) + CurrentFnSymForSize = CurrentFnBegin; + } if (isVerbose()) LI = &getAnalysis<MachineLoopInfo>(); } namespace { - // SectionCPs - Keep track the alignment, constpool entries per Section. +// Keep track the alignment, constpool entries per Section. struct SectionCPs { - const MCSection *S; + MCSection *S; unsigned Alignment; SmallVector<unsigned, 4> CPEs; - SectionCPs(const MCSection *s, unsigned a) : S(s), Alignment(a) {} + SectionCPs(MCSection *s, unsigned a) : S(s), Alignment(a) {} }; } @@ -1100,13 +1208,13 @@ void AsmPrinter::EmitConstantPool() { unsigned Align = CPE.getAlignment(); SectionKind Kind = - CPE.getSectionKind(TM.getSubtargetImpl()->getDataLayout()); + CPE.getSectionKind(TM.getDataLayout()); const Constant *C = nullptr; if (!CPE.isMachineConstantPoolEntry()) C = CPE.Val.ConstVal; - const MCSection *S = getObjFileLowering().getSectionForConstant(Kind, C); + MCSection *S = getObjFileLowering().getSectionForConstant(Kind, C); // The number of sections are small, just do a linear search from the // last section to the first. @@ -1139,7 +1247,7 @@ void AsmPrinter::EmitConstantPool() { continue; if (CurSection != CPSections[i].S) { - OutStreamer.SwitchSection(CPSections[i].S); + OutStreamer->SwitchSection(CPSections[i].S); EmitAlignment(Log2_32(CPSections[i].Alignment)); CurSection = CPSections[i].S; Offset = 0; @@ -1150,13 +1258,13 @@ void AsmPrinter::EmitConstantPool() { // Emit inter-object padding for alignment. unsigned AlignMask = CPE.getAlignment() - 1; unsigned NewOffset = (Offset + AlignMask) & ~AlignMask; - OutStreamer.EmitZeros(NewOffset - Offset); + OutStreamer->EmitZeros(NewOffset - Offset); Type *Ty = CPE.getType(); Offset = NewOffset + - TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(Ty); + TM.getDataLayout()->getTypeAllocSize(Ty); - OutStreamer.EmitLabel(Sym); + OutStreamer->EmitLabel(Sym); if (CPE.isMachineConstantPoolEntry()) EmitMachineConstantPoolValue(CPE.Val.MachineCPVal); else @@ -1169,7 +1277,7 @@ void AsmPrinter::EmitConstantPool() { /// by the current function to the current output stream. /// void AsmPrinter::EmitJumpTableInfo() { - const DataLayout *DL = MF->getSubtarget().getDataLayout(); + const DataLayout *DL = MF->getTarget().getDataLayout(); const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); if (!MJTI) return; if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_Inline) return; @@ -1179,34 +1287,23 @@ void AsmPrinter::EmitJumpTableInfo() { // Pick the directive to use to print the jump table entries, and switch to // the appropriate section. const Function *F = MF->getFunction(); - bool JTInDiffSection = false; - if (// In PIC mode, we need to emit the jump table to the same section as the - // function body itself, otherwise the label differences won't make sense. - // FIXME: Need a better predicate for this: what about custom entries? - MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 || - // We should also do if the section name is NULL or function is declared - // in discardable section - // FIXME: this isn't the right predicate, should be based on the MCSection - // for the function. - F->isWeakForLinker()) { - OutStreamer.SwitchSection( - getObjFileLowering().SectionForGlobal(F, *Mang, TM)); - } else { - // Otherwise, drop it in the readonly section. - const MCSection *ReadOnlySection = - getObjFileLowering().getSectionForConstant(SectionKind::getReadOnly(), - /*C=*/nullptr); - OutStreamer.SwitchSection(ReadOnlySection); - JTInDiffSection = true; + const TargetLoweringObjectFile &TLOF = getObjFileLowering(); + bool JTInDiffSection = !TLOF.shouldPutJumpTableInFunctionSection( + MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32, + *F); + if (JTInDiffSection) { + // Drop it in the readonly section. + MCSection *ReadOnlySection = TLOF.getSectionForJumpTable(*F, *Mang, TM); + OutStreamer->SwitchSection(ReadOnlySection); } EmitAlignment(Log2_32( - MJTI->getEntryAlignment(*TM.getSubtargetImpl()->getDataLayout()))); + MJTI->getEntryAlignment(*TM.getDataLayout()))); // Jump tables in code sections are marked with a data_region directive // where that's supported. if (!JTInDiffSection) - OutStreamer.EmitDataRegion(MCDR_DataRegionJT32); + OutStreamer->EmitDataRegion(MCDR_DataRegionJT32); for (unsigned JTI = 0, e = JT.size(); JTI != e; ++JTI) { const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs; @@ -1219,7 +1316,7 @@ void AsmPrinter::EmitJumpTableInfo() { if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 && MAI->doesSetDirectiveSuppressesReloc()) { SmallPtrSet<const MachineBasicBlock*, 16> EmittedSets; - const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering(); + const TargetLowering *TLI = MF->getSubtarget().getTargetLowering(); const MCExpr *Base = TLI->getPICJumpTableRelocBaseExpr(MF,JTI,OutContext); for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) { const MachineBasicBlock *MBB = JTBBs[ii]; @@ -1229,8 +1326,9 @@ void AsmPrinter::EmitJumpTableInfo() { // .set LJTSet, LBB32-base const MCExpr *LHS = MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext); - OutStreamer.EmitAssignment(GetJTSetSymbol(JTI, MBB->getNumber()), - MCBinaryExpr::CreateSub(LHS, Base, OutContext)); + OutStreamer->EmitAssignment(GetJTSetSymbol(JTI, MBB->getNumber()), + MCBinaryExpr::CreateSub(LHS, Base, + OutContext)); } } @@ -1241,15 +1339,15 @@ void AsmPrinter::EmitJumpTableInfo() { if (JTInDiffSection && DL->hasLinkerPrivateGlobalPrefix()) // FIXME: This doesn't have to have any specific name, just any randomly // named and numbered 'l' label would work. Simplify GetJTISymbol. - OutStreamer.EmitLabel(GetJTISymbol(JTI, true)); + OutStreamer->EmitLabel(GetJTISymbol(JTI, true)); - OutStreamer.EmitLabel(GetJTISymbol(JTI)); + OutStreamer->EmitLabel(GetJTISymbol(JTI)); for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) EmitJumpTableEntry(MJTI, JTBBs[ii], JTI); } if (!JTInDiffSection) - OutStreamer.EmitDataRegion(MCDR_DataRegionEnd); + OutStreamer->EmitDataRegion(MCDR_DataRegionEnd); } /// EmitJumpTableEntry - Emit a jump table entry for the specified MBB to the @@ -1263,9 +1361,8 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI, case MachineJumpTableInfo::EK_Inline: llvm_unreachable("Cannot emit EK_Inline jump table entry"); case MachineJumpTableInfo::EK_Custom32: - Value = - TM.getSubtargetImpl()->getTargetLowering()->LowerCustomJumpTableEntry( - MJTI, MBB, UID, OutContext); + Value = MF->getSubtarget().getTargetLowering()->LowerCustomJumpTableEntry( + MJTI, MBB, UID, OutContext); break; case MachineJumpTableInfo::EK_BlockAddress: // EK_BlockAddress - Each entry is a plain address of block, e.g.: @@ -1277,7 +1374,7 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI, // with a relocation as gp-relative, e.g.: // .gprel32 LBB123 MCSymbol *MBBSym = MBB->getSymbol(); - OutStreamer.EmitGPRel32Value(MCSymbolRefExpr::Create(MBBSym, OutContext)); + OutStreamer->EmitGPRel32Value(MCSymbolRefExpr::Create(MBBSym, OutContext)); return; } @@ -1286,7 +1383,7 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI, // with a relocation as gp-relative, e.g.: // .gpdword LBB123 MCSymbol *MBBSym = MBB->getSymbol(); - OutStreamer.EmitGPRel64Value(MCSymbolRefExpr::Create(MBBSym, OutContext)); + OutStreamer->EmitGPRel64Value(MCSymbolRefExpr::Create(MBBSym, OutContext)); return; } @@ -1304,7 +1401,7 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI, break; } Value = MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext); - const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering(); + const TargetLowering *TLI = MF->getSubtarget().getTargetLowering(); const MCExpr *Base = TLI->getPICJumpTableRelocBaseExpr(MF, UID, OutContext); Value = MCBinaryExpr::CreateSub(Value, Base, OutContext); break; @@ -1314,8 +1411,8 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI, assert(Value && "Unknown entry kind!"); unsigned EntrySize = - MJTI->getEntrySize(*TM.getSubtargetImpl()->getDataLayout()); - OutStreamer.EmitValue(Value, EntrySize); + MJTI->getEntrySize(*TM.getDataLayout()); + OutStreamer->EmitValue(Value, EntrySize); } @@ -1344,8 +1441,8 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) { if (TM.getRelocationModel() == Reloc::Static && MAI->hasStaticCtorDtorReferenceInStaticMode()) { StringRef Sym(".constructors_used"); - OutStreamer.EmitSymbolAttribute(OutContext.GetOrCreateSymbol(Sym), - MCSA_Reference); + OutStreamer->EmitSymbolAttribute(OutContext.getOrCreateSymbol(Sym), + MCSA_Reference); } return true; } @@ -1356,8 +1453,8 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) { if (TM.getRelocationModel() == Reloc::Static && MAI->hasStaticCtorDtorReferenceInStaticMode()) { StringRef Sym(".destructors_used"); - OutStreamer.EmitSymbolAttribute(OutContext.GetOrCreateSymbol(Sym), - MCSA_Reference); + OutStreamer->EmitSymbolAttribute(OutContext.getOrCreateSymbol(Sym), + MCSA_Reference); } return true; } @@ -1374,7 +1471,7 @@ void AsmPrinter::EmitLLVMUsedList(const ConstantArray *InitList) { const GlobalValue *GV = dyn_cast<GlobalValue>(InitList->getOperand(i)->stripPointerCasts()); if (GV) - OutStreamer.EmitSymbolAttribute(getSymbol(GV), MCSA_NoDeadStrip); + OutStreamer->EmitSymbolAttribute(getSymbol(GV), MCSA_NoDeadStrip); } } @@ -1424,7 +1521,7 @@ void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) { } // Emit the function pointers in the target-specific order - const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout(); + const DataLayout *DL = TM.getDataLayout(); unsigned Align = Log2_32(DL->getPointerPrefAlignment()); std::stable_sort(Structors.begin(), Structors.end(), [](const Structor &L, @@ -1440,11 +1537,11 @@ void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) { KeySym = getSymbol(GV); } - const MCSection *OutputSection = + MCSection *OutputSection = (isCtor ? Obj.getStaticCtorSection(S.Priority, KeySym) : Obj.getStaticDtorSection(S.Priority, KeySym)); - OutStreamer.SwitchSection(OutputSection); - if (OutStreamer.getCurrentSection() != OutStreamer.getPreviousSection()) + OutStreamer->SwitchSection(OutputSection); + if (OutStreamer->getCurrentSection() != OutStreamer->getPreviousSection()) EmitAlignment(Align); EmitXXStructor(S.Func); } @@ -1460,7 +1557,7 @@ void AsmPrinter::EmitModuleIdents(Module &M) { assert(N->getNumOperands() == 1 && "llvm.ident metadata entry can have only one operand"); const MDString *S = cast<MDString>(N->getOperand(0)); - OutStreamer.EmitIdent(S->getString()); + OutStreamer->EmitIdent(S->getString()); } } } @@ -1472,19 +1569,19 @@ void AsmPrinter::EmitModuleIdents(Module &M) { /// EmitInt8 - Emit a byte directive and value. /// void AsmPrinter::EmitInt8(int Value) const { - OutStreamer.EmitIntValue(Value, 1); + OutStreamer->EmitIntValue(Value, 1); } /// EmitInt16 - Emit a short directive and value. /// void AsmPrinter::EmitInt16(int Value) const { - OutStreamer.EmitIntValue(Value, 2); + OutStreamer->EmitIntValue(Value, 2); } /// EmitInt32 - Emit a long directive and value. /// void AsmPrinter::EmitInt32(int Value) const { - OutStreamer.EmitIntValue(Value, 4); + OutStreamer->EmitIntValue(Value, 4); } /// Emit something like ".long Hi-Lo" where the size in bytes of the directive @@ -1492,6 +1589,10 @@ void AsmPrinter::EmitInt32(int Value) const { /// .set if it avoids relocations. void AsmPrinter::EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo, unsigned Size) const { + if (!MAI->doesDwarfUseRelocationsAcrossSections()) + if (OutStreamer->emitAbsoluteSymbolDiff(Hi, Lo, Size)) + return; + // Get the Hi-Lo expression. const MCExpr *Diff = MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(Hi, OutContext), @@ -1499,14 +1600,14 @@ void AsmPrinter::EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo, OutContext); if (!MAI->doesSetDirectiveSuppressesReloc()) { - OutStreamer.EmitValue(Diff, Size); + OutStreamer->EmitValue(Diff, Size); return; } // Otherwise, emit with .set (aka assignment). - MCSymbol *SetLabel = GetTempSymbol("set", SetCounter++); - OutStreamer.EmitAssignment(SetLabel, Diff); - OutStreamer.EmitSymbolValue(SetLabel, Size); + MCSymbol *SetLabel = createTempSymbol("set"); + OutStreamer->EmitAssignment(SetLabel, Diff); + OutStreamer->EmitSymbolValue(SetLabel, Size); } /// EmitLabelPlusOffset - Emit something like ".long Label+Offset" @@ -1516,7 +1617,7 @@ void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset, unsigned Size, bool IsSectionRelative) const { if (MAI->needsDwarfSectionOffsetDirective() && IsSectionRelative) { - OutStreamer.EmitCOFFSecRel32(Label); + OutStreamer->EmitCOFFSecRel32(Label); return; } @@ -1526,7 +1627,7 @@ void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset, Expr = MCBinaryExpr::CreateAdd( Expr, MCConstantExpr::Create(Offset, OutContext), OutContext); - OutStreamer.EmitValue(Expr, Size); + OutStreamer->EmitValue(Expr, Size); } //===----------------------------------------------------------------------===// @@ -1539,7 +1640,7 @@ void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset, // void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalObject *GV) const { if (GV) - NumBits = getGVAlignmentLog2(GV, *TM.getSubtargetImpl()->getDataLayout(), + NumBits = getGVAlignmentLog2(GV, *TM.getDataLayout(), NumBits); if (NumBits == 0) return; // 1-byte aligned: no need to emit alignment. @@ -1548,9 +1649,9 @@ void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalObject *GV) const { static_cast<unsigned>(std::numeric_limits<unsigned>::digits) && "undefined behavior"); if (getCurrentSection()->getKind().isText()) - OutStreamer.EmitCodeAlignment(1u << NumBits); + OutStreamer->EmitCodeAlignment(1u << NumBits); else - OutStreamer.EmitValueToAlignment(1u << NumBits); + OutStreamer->EmitValueToAlignment(1u << NumBits); } //===----------------------------------------------------------------------===// @@ -1586,8 +1687,7 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) { // If the code isn't optimized, there may be outstanding folding // opportunities. Attempt to fold the expression using DataLayout as a // last resort before giving up. - if (Constant *C = ConstantFoldConstantExpression( - CE, TM.getSubtargetImpl()->getDataLayout())) + if (Constant *C = ConstantFoldConstantExpression(CE, *TM.getDataLayout())) if (C != CE) return lowerConstant(C); @@ -1601,7 +1701,8 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) { report_fatal_error(OS.str()); } case Instruction::GetElementPtr: { - const DataLayout &DL = *TM.getSubtargetImpl()->getDataLayout(); + const DataLayout &DL = *TM.getDataLayout(); + // Generate a symbolic expression for the byte address APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0); cast<GEPOperator>(CE)->accumulateConstantOffset(DL, OffsetAI); @@ -1625,7 +1726,8 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) { return lowerConstant(CE->getOperand(0)); case Instruction::IntToPtr: { - const DataLayout &DL = *TM.getSubtargetImpl()->getDataLayout(); + const DataLayout &DL = *TM.getDataLayout(); + // Handle casts to pointers by changing them into casts to the appropriate // integer type. This promotes constant folding and simplifies this code. Constant *Op = CE->getOperand(0); @@ -1635,7 +1737,8 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) { } case Instruction::PtrToInt: { - const DataLayout &DL = *TM.getSubtargetImpl()->getDataLayout(); + const DataLayout &DL = *TM.getDataLayout(); + // Support only foldable casts to/from pointers that can be eliminated by // changing the pointer to the appropriately sized integer type. Constant *Op = CE->getOperand(0); @@ -1685,7 +1788,9 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) { } } -static void emitGlobalConstantImpl(const Constant *C, AsmPrinter &AP); +static void emitGlobalConstantImpl(const Constant *C, AsmPrinter &AP, + const Constant *BaseCV = nullptr, + uint64_t Offset = 0); /// isRepeatedByteSequence - Determine whether the given value is /// composed of a repeated sequence of identical bytes and return the @@ -1709,7 +1814,7 @@ static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) { if (CI->getBitWidth() > 64) return -1; uint64_t Size = - TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(V->getType()); + TM.getDataLayout()->getTypeAllocSize(V->getType()); uint64_t Value = CI->getZExtValue(); // Make sure the constant is at least 8 bits long and has a power @@ -1754,26 +1859,26 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS, int Value = isRepeatedByteSequence(CDS, AP.TM); if (Value != -1) { uint64_t Bytes = - AP.TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize( + AP.TM.getDataLayout()->getTypeAllocSize( CDS->getType()); // Don't emit a 1-byte object as a .fill. if (Bytes > 1) - return AP.OutStreamer.EmitFill(Bytes, Value); + return AP.OutStreamer->EmitFill(Bytes, Value); } // If this can be emitted with .ascii/.asciz, emit it as such. if (CDS->isString()) - return AP.OutStreamer.EmitBytes(CDS->getAsString()); + return AP.OutStreamer->EmitBytes(CDS->getAsString()); // Otherwise, emit the values in successive locations. unsigned ElementByteSize = CDS->getElementByteSize(); if (isa<IntegerType>(CDS->getElementType())) { for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { if (AP.isVerbose()) - AP.OutStreamer.GetCommentOS() << format("0x%" PRIx64 "\n", - CDS->getElementAsInteger(i)); - AP.OutStreamer.EmitIntValue(CDS->getElementAsInteger(i), - ElementByteSize); + AP.OutStreamer->GetCommentOS() << format("0x%" PRIx64 "\n", + CDS->getElementAsInteger(i)); + AP.OutStreamer->EmitIntValue(CDS->getElementAsInteger(i), + ElementByteSize); } } else if (ElementByteSize == 4) { // FP Constants are printed as integer constants to avoid losing @@ -1787,8 +1892,8 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS, F = CDS->getElementAsFloat(i); if (AP.isVerbose()) - AP.OutStreamer.GetCommentOS() << "float " << F << '\n'; - AP.OutStreamer.EmitIntValue(I, 4); + AP.OutStreamer->GetCommentOS() << "float " << F << '\n'; + AP.OutStreamer->EmitIntValue(I, 4); } } else { assert(CDS->getElementType()->isDoubleTy()); @@ -1800,34 +1905,36 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS, F = CDS->getElementAsDouble(i); if (AP.isVerbose()) - AP.OutStreamer.GetCommentOS() << "double " << F << '\n'; - AP.OutStreamer.EmitIntValue(I, 8); + AP.OutStreamer->GetCommentOS() << "double " << F << '\n'; + AP.OutStreamer->EmitIntValue(I, 8); } } - const DataLayout &DL = *AP.TM.getSubtargetImpl()->getDataLayout(); + const DataLayout &DL = *AP.TM.getDataLayout(); unsigned Size = DL.getTypeAllocSize(CDS->getType()); unsigned EmittedSize = DL.getTypeAllocSize(CDS->getType()->getElementType()) * CDS->getNumElements(); if (unsigned Padding = Size - EmittedSize) - AP.OutStreamer.EmitZeros(Padding); + AP.OutStreamer->EmitZeros(Padding); } -static void emitGlobalConstantArray(const ConstantArray *CA, AsmPrinter &AP) { +static void emitGlobalConstantArray(const ConstantArray *CA, AsmPrinter &AP, + const Constant *BaseCV, uint64_t Offset) { // See if we can aggregate some values. Make sure it can be // represented as a series of bytes of the constant value. int Value = isRepeatedByteSequence(CA, AP.TM); + const DataLayout &DL = *AP.TM.getDataLayout(); if (Value != -1) { - uint64_t Bytes = - AP.TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize( - CA->getType()); - AP.OutStreamer.EmitFill(Bytes, Value); + uint64_t Bytes = DL.getTypeAllocSize(CA->getType()); + AP.OutStreamer->EmitFill(Bytes, Value); } else { - for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) - emitGlobalConstantImpl(CA->getOperand(i), AP); + for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) { + emitGlobalConstantImpl(CA->getOperand(i), AP, BaseCV, Offset); + Offset += DL.getTypeAllocSize(CA->getOperand(i)->getType()); + } } } @@ -1835,36 +1942,37 @@ static void emitGlobalConstantVector(const ConstantVector *CV, AsmPrinter &AP) { for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i) emitGlobalConstantImpl(CV->getOperand(i), AP); - const DataLayout &DL = *AP.TM.getSubtargetImpl()->getDataLayout(); + const DataLayout &DL = *AP.TM.getDataLayout(); unsigned Size = DL.getTypeAllocSize(CV->getType()); unsigned EmittedSize = DL.getTypeAllocSize(CV->getType()->getElementType()) * CV->getType()->getNumElements(); if (unsigned Padding = Size - EmittedSize) - AP.OutStreamer.EmitZeros(Padding); + AP.OutStreamer->EmitZeros(Padding); } -static void emitGlobalConstantStruct(const ConstantStruct *CS, AsmPrinter &AP) { +static void emitGlobalConstantStruct(const ConstantStruct *CS, AsmPrinter &AP, + const Constant *BaseCV, uint64_t Offset) { // Print the fields in successive locations. Pad to align if needed! - const DataLayout *DL = AP.TM.getSubtargetImpl()->getDataLayout(); + const DataLayout *DL = AP.TM.getDataLayout(); unsigned Size = DL->getTypeAllocSize(CS->getType()); const StructLayout *Layout = DL->getStructLayout(CS->getType()); uint64_t SizeSoFar = 0; for (unsigned i = 0, e = CS->getNumOperands(); i != e; ++i) { const Constant *Field = CS->getOperand(i); + // Print the actual field value. + emitGlobalConstantImpl(Field, AP, BaseCV, Offset+SizeSoFar); + // Check if padding is needed and insert one or more 0s. uint64_t FieldSize = DL->getTypeAllocSize(Field->getType()); uint64_t PadSize = ((i == e-1 ? Size : Layout->getElementOffset(i+1)) - Layout->getElementOffset(i)) - FieldSize; SizeSoFar += FieldSize + PadSize; - // Now print the actual field value. - emitGlobalConstantImpl(Field, AP); - // Insert padding - this may include padding to increase the size of the // current field up to the ABI size (if the struct is not packed) as well // as padding to ensure that the next field starts at the right offset. - AP.OutStreamer.EmitZeros(PadSize); + AP.OutStreamer->EmitZeros(PadSize); } assert(SizeSoFar == Layout->getSizeInBytes() && "Layout of constant struct may be incorrect!"); @@ -1880,10 +1988,10 @@ static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) { CFP->getValueAPF().toString(StrVal); if (CFP->getType()) - CFP->getType()->print(AP.OutStreamer.GetCommentOS()); + CFP->getType()->print(AP.OutStreamer->GetCommentOS()); else - AP.OutStreamer.GetCommentOS() << "Printing <null> Type"; - AP.OutStreamer.GetCommentOS() << ' ' << StrVal << '\n'; + AP.OutStreamer->GetCommentOS() << "Printing <null> Type"; + AP.OutStreamer->GetCommentOS() << ' ' << StrVal << '\n'; } // Now iterate through the APInt chunks, emitting them in endian-correct @@ -1895,32 +2003,32 @@ static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) { // PPC's long double has odd notions of endianness compared to how LLVM // handles it: p[0] goes first for *big* endian on PPC. - if (AP.TM.getSubtargetImpl()->getDataLayout()->isBigEndian() && + if (AP.TM.getDataLayout()->isBigEndian() && !CFP->getType()->isPPC_FP128Ty()) { int Chunk = API.getNumWords() - 1; if (TrailingBytes) - AP.OutStreamer.EmitIntValue(p[Chunk--], TrailingBytes); + AP.OutStreamer->EmitIntValue(p[Chunk--], TrailingBytes); for (; Chunk >= 0; --Chunk) - AP.OutStreamer.EmitIntValue(p[Chunk], sizeof(uint64_t)); + AP.OutStreamer->EmitIntValue(p[Chunk], sizeof(uint64_t)); } else { unsigned Chunk; for (Chunk = 0; Chunk < NumBytes / sizeof(uint64_t); ++Chunk) - AP.OutStreamer.EmitIntValue(p[Chunk], sizeof(uint64_t)); + AP.OutStreamer->EmitIntValue(p[Chunk], sizeof(uint64_t)); if (TrailingBytes) - AP.OutStreamer.EmitIntValue(p[Chunk], TrailingBytes); + AP.OutStreamer->EmitIntValue(p[Chunk], TrailingBytes); } // Emit the tail padding for the long double. - const DataLayout &DL = *AP.TM.getSubtargetImpl()->getDataLayout(); - AP.OutStreamer.EmitZeros(DL.getTypeAllocSize(CFP->getType()) - - DL.getTypeStoreSize(CFP->getType())); + const DataLayout &DL = *AP.TM.getDataLayout(); + AP.OutStreamer->EmitZeros(DL.getTypeAllocSize(CFP->getType()) - + DL.getTypeStoreSize(CFP->getType())); } static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) { - const DataLayout *DL = AP.TM.getSubtargetImpl()->getDataLayout(); + const DataLayout *DL = AP.TM.getDataLayout(); unsigned BitWidth = CI->getBitWidth(); // Copy the value as we may massage the layout for constants whose bit width @@ -1959,28 +2067,123 @@ static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) { const uint64_t *RawData = Realigned.getRawData(); for (unsigned i = 0, e = BitWidth / 64; i != e; ++i) { uint64_t Val = DL->isBigEndian() ? RawData[e - i - 1] : RawData[i]; - AP.OutStreamer.EmitIntValue(Val, 8); + AP.OutStreamer->EmitIntValue(Val, 8); } if (ExtraBitsSize) { // Emit the extra bits after the 64-bits chunks. // Emit a directive that fills the expected size. - uint64_t Size = AP.TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize( + uint64_t Size = AP.TM.getDataLayout()->getTypeAllocSize( CI->getType()); Size -= (BitWidth / 64) * 8; assert(Size && Size * 8 >= ExtraBitsSize && (ExtraBits & (((uint64_t)-1) >> (64 - ExtraBitsSize))) == ExtraBits && "Directive too small for extra bits."); - AP.OutStreamer.EmitIntValue(ExtraBits, Size); + AP.OutStreamer->EmitIntValue(ExtraBits, Size); } } -static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP) { - const DataLayout *DL = AP.TM.getSubtargetImpl()->getDataLayout(); +/// \brief Transform a not absolute MCExpr containing a reference to a GOT +/// equivalent global, by a target specific GOT pc relative access to the +/// final symbol. +static void handleIndirectSymViaGOTPCRel(AsmPrinter &AP, const MCExpr **ME, + const Constant *BaseCst, + uint64_t Offset) { + // The global @foo below illustrates a global that uses a got equivalent. + // + // @bar = global i32 42 + // @gotequiv = private unnamed_addr constant i32* @bar + // @foo = i32 trunc (i64 sub (i64 ptrtoint (i32** @gotequiv to i64), + // i64 ptrtoint (i32* @foo to i64)) + // to i32) + // + // The cstexpr in @foo is converted into the MCExpr `ME`, where we actually + // check whether @foo is suitable to use a GOTPCREL. `ME` is usually in the + // form: + // + // foo = cstexpr, where + // cstexpr := <gotequiv> - "." + <cst> + // cstexpr := <gotequiv> - (<foo> - <offset from @foo base>) + <cst> + // + // After canonicalization by EvaluateAsRelocatable `ME` turns into: + // + // cstexpr := <gotequiv> - <foo> + gotpcrelcst, where + // gotpcrelcst := <offset from @foo base> + <cst> + // + MCValue MV; + if (!(*ME)->EvaluateAsRelocatable(MV, nullptr, nullptr) || MV.isAbsolute()) + return; + + const MCSymbol *GOTEquivSym = &MV.getSymA()->getSymbol(); + if (!AP.GlobalGOTEquivs.count(GOTEquivSym)) + return; + + const GlobalValue *BaseGV = dyn_cast<GlobalValue>(BaseCst); + if (!BaseGV) + return; + + const MCSymbol *BaseSym = AP.getSymbol(BaseGV); + if (BaseSym != &MV.getSymB()->getSymbol()) + return; + + // Make sure to match: + // + // gotpcrelcst := <offset from @foo base> + <cst> + // + // If gotpcrelcst is positive it means that we can safely fold the pc rel + // displacement into the GOTPCREL. We can also can have an extra offset <cst> + // if the target knows how to encode it. + // + int64_t GOTPCRelCst = Offset + MV.getConstant(); + if (GOTPCRelCst < 0) + return; + if (!AP.getObjFileLowering().supportGOTPCRelWithOffset() && GOTPCRelCst != 0) + return; + + // Emit the GOT PC relative to replace the got equivalent global, i.e.: + // + // bar: + // .long 42 + // gotequiv: + // .quad bar + // foo: + // .long gotequiv - "." + <cst> + // + // is replaced by the target specific equivalent to: + // + // bar: + // .long 42 + // foo: + // .long bar@GOTPCREL+<gotpcrelcst> + // + AsmPrinter::GOTEquivUsePair Result = AP.GlobalGOTEquivs[GOTEquivSym]; + const GlobalVariable *GV = Result.first; + int NumUses = (int)Result.second; + const GlobalValue *FinalGV = dyn_cast<GlobalValue>(GV->getOperand(0)); + const MCSymbol *FinalSym = AP.getSymbol(FinalGV); + *ME = AP.getObjFileLowering().getIndirectSymViaGOTPCRel( + FinalSym, MV, Offset, AP.MMI, *AP.OutStreamer); + + // Update GOT equivalent usage information + --NumUses; + if (NumUses >= 0) + AP.GlobalGOTEquivs[GOTEquivSym] = std::make_pair(GV, NumUses); +} + +static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP, + const Constant *BaseCV, uint64_t Offset) { + const DataLayout *DL = AP.TM.getDataLayout(); uint64_t Size = DL->getTypeAllocSize(CV->getType()); + + // Globals with sub-elements such as combinations of arrays and structs + // are handled recursively by emitGlobalConstantImpl. Keep track of the + // constant symbol base and the current position with BaseCV and Offset. + if (!BaseCV && CV->hasOneUse()) + BaseCV = dyn_cast<Constant>(CV->user_back()); + if (isa<ConstantAggregateZero>(CV) || isa<UndefValue>(CV)) - return AP.OutStreamer.EmitZeros(Size); + return AP.OutStreamer->EmitZeros(Size); if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) { switch (Size) { @@ -1989,9 +2192,9 @@ static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP) { case 4: case 8: if (AP.isVerbose()) - AP.OutStreamer.GetCommentOS() << format("0x%" PRIx64 "\n", - CI->getZExtValue()); - AP.OutStreamer.EmitIntValue(CI->getZExtValue(), Size); + AP.OutStreamer->GetCommentOS() << format("0x%" PRIx64 "\n", + CI->getZExtValue()); + AP.OutStreamer->EmitIntValue(CI->getZExtValue(), Size); return; default: emitGlobalConstantLargeInt(CI, AP); @@ -2003,7 +2206,7 @@ static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP) { return emitGlobalConstantFP(CFP, AP); if (isa<ConstantPointerNull>(CV)) { - AP.OutStreamer.EmitIntValue(0, Size); + AP.OutStreamer->EmitIntValue(0, Size); return; } @@ -2011,10 +2214,10 @@ static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP) { return emitGlobalConstantDataSequential(CDS, AP); if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV)) - return emitGlobalConstantArray(CVA, AP); + return emitGlobalConstantArray(CVA, AP, BaseCV, Offset); if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV)) - return emitGlobalConstantStruct(CVS, AP); + return emitGlobalConstantStruct(CVS, AP, BaseCV, Offset); if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) { // Look through bitcasts, which might not be able to be MCExpr'ized (e.g. of @@ -2026,7 +2229,7 @@ static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP) { // If the constant expression's size is greater than 64-bits, then we have // to emit the value in chunks. Try to constant fold the value and emit it // that way. - Constant *New = ConstantFoldConstantExpression(CE, DL); + Constant *New = ConstantFoldConstantExpression(CE, *DL); if (New && New != CE) return emitGlobalConstantImpl(New, AP); } @@ -2037,19 +2240,27 @@ static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP) { // Otherwise, it must be a ConstantExpr. Lower it to an MCExpr, then emit it // thread the streamer with EmitValue. - AP.OutStreamer.EmitValue(AP.lowerConstant(CV), Size); + const MCExpr *ME = AP.lowerConstant(CV); + + // Since lowerConstant already folded and got rid of all IR pointer and + // integer casts, detect GOT equivalent accesses by looking into the MCExpr + // directly. + if (AP.getObjFileLowering().supportIndirectSymViaGOTPCRel()) + handleIndirectSymViaGOTPCRel(AP, &ME, BaseCV, Offset); + + AP.OutStreamer->EmitValue(ME, Size); } /// EmitGlobalConstant - Print a general LLVM constant to the .s file. void AsmPrinter::EmitGlobalConstant(const Constant *CV) { uint64_t Size = - TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(CV->getType()); + TM.getDataLayout()->getTypeAllocSize(CV->getType()); if (Size) emitGlobalConstantImpl(CV, *this); else if (MAI->hasSubsectionsViaSymbols()) { // If the global has zero size, emit a single byte so that two labels don't // look like they are at the same location. - OutStreamer.EmitIntValue(0, 1); + OutStreamer->EmitIntValue(0, 1); } } @@ -2069,23 +2280,10 @@ void AsmPrinter::printOffset(int64_t Offset, raw_ostream &OS) const { // Symbol Lowering Routines. //===----------------------------------------------------------------------===// -/// GetTempSymbol - Return the MCSymbol corresponding to the assembler -/// temporary label with the specified stem and unique ID. -MCSymbol *AsmPrinter::GetTempSymbol(Twine Name, unsigned ID) const { - const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout(); - return OutContext.GetOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix()) + - Name + Twine(ID)); -} - -/// GetTempSymbol - Return an assembler temporary label with the specified -/// stem. -MCSymbol *AsmPrinter::GetTempSymbol(Twine Name) const { - const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout(); - return OutContext.GetOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix())+ - Name); +MCSymbol *AsmPrinter::createTempSymbol(const Twine &Name) const { + return OutContext.createTempSymbol(Name, true); } - MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BlockAddress *BA) const { return MMI->getAddrLabelSymbol(BA->getBasicBlock()); } @@ -2096,8 +2294,8 @@ MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BasicBlock *BB) const { /// GetCPISymbol - Return the symbol for the specified constant pool entry. MCSymbol *AsmPrinter::GetCPISymbol(unsigned CPID) const { - const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout(); - return OutContext.GetOrCreateSymbol + const DataLayout *DL = TM.getDataLayout(); + return OutContext.getOrCreateSymbol (Twine(DL->getPrivateGlobalPrefix()) + "CPI" + Twine(getFunctionNumber()) + "_" + Twine(CPID)); } @@ -2110,8 +2308,8 @@ MCSymbol *AsmPrinter::GetJTISymbol(unsigned JTID, bool isLinkerPrivate) const { /// GetJTSetSymbol - Return the symbol for the specified jump table .set /// FIXME: privatize to AsmPrinter. MCSymbol *AsmPrinter::GetJTSetSymbol(unsigned UID, unsigned MBBID) const { - const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout(); - return OutContext.GetOrCreateSymbol + const DataLayout *DL = TM.getDataLayout(); + return OutContext.getOrCreateSymbol (Twine(DL->getPrivateGlobalPrefix()) + Twine(getFunctionNumber()) + "_" + Twine(UID) + "_set_" + Twine(MBBID)); } @@ -2127,7 +2325,7 @@ MCSymbol *AsmPrinter::getSymbolWithGlobalValueBase(const GlobalValue *GV, MCSymbol *AsmPrinter::GetExternalSymbolSymbol(StringRef Sym) const { SmallString<60> NameStr; Mang->getNameWithPrefix(NameStr, Sym); - return OutContext.GetOrCreateSymbol(NameStr.str()); + return OutContext.getOrCreateSymbol(NameStr); } @@ -2172,16 +2370,16 @@ static void emitBasicBlockLoopComments(const MachineBasicBlock &MBB, // If this block is not a loop header, just print out what is the loop header // and return. if (Header != &MBB) { - AP.OutStreamer.AddComment(" in Loop: Header=BB" + - Twine(AP.getFunctionNumber())+"_" + - Twine(Loop->getHeader()->getNumber())+ - " Depth="+Twine(Loop->getLoopDepth())); + AP.OutStreamer->AddComment(" in Loop: Header=BB" + + Twine(AP.getFunctionNumber())+"_" + + Twine(Loop->getHeader()->getNumber())+ + " Depth="+Twine(Loop->getLoopDepth())); return; } // Otherwise, it is a loop header. Print out information about child and // parent loops. - raw_ostream &OS = AP.OutStreamer.GetCommentOS(); + raw_ostream &OS = AP.OutStreamer->GetCommentOS(); PrintParentLoopComment(OS, Loop->getParentLoop(), AP.getFunctionNumber()); @@ -2212,18 +2410,18 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const { if (MBB.hasAddressTaken()) { const BasicBlock *BB = MBB.getBasicBlock(); if (isVerbose()) - OutStreamer.AddComment("Block address taken"); + OutStreamer->AddComment("Block address taken"); std::vector<MCSymbol*> Symbols = MMI->getAddrLabelSymbolToEmit(BB); for (auto *Sym : Symbols) - OutStreamer.EmitLabel(Sym); + OutStreamer->EmitLabel(Sym); } // Print some verbose block comments. if (isVerbose()) { if (const BasicBlock *BB = MBB.getBasicBlock()) if (BB->hasName()) - OutStreamer.AddComment("%" + BB->getName()); + OutStreamer->AddComment("%" + BB->getName()); emitBasicBlockLoopComments(MBB, LI, *this); } @@ -2231,10 +2429,10 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const { if (MBB.pred_empty() || isBlockOnlyReachableByFallthrough(&MBB)) { if (isVerbose()) { // NOTE: Want this comment at start of line, don't emit with AddComment. - OutStreamer.emitRawComment(" BB#" + Twine(MBB.getNumber()) + ":", false); + OutStreamer->emitRawComment(" BB#" + Twine(MBB.getNumber()) + ":", false); } } else { - OutStreamer.EmitLabel(MBB.getSymbol()); + OutStreamer->EmitLabel(MBB.getSymbol()); } } @@ -2256,7 +2454,7 @@ void AsmPrinter::EmitVisibility(MCSymbol *Sym, unsigned Visibility, } if (Attr != MCSA_Invalid) - OutStreamer.EmitSymbolAttribute(Sym, Attr); + OutStreamer->EmitSymbolAttribute(Sym, Attr); } /// isBlockOnlyReachableByFallthough - Return true if the basic block has @@ -2335,3 +2533,5 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy &S) { /// Pin vtable to this file. AsmPrinterHandler::~AsmPrinterHandler() {} + +void AsmPrinterHandler::markFunctionEnd() {} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index 00681f6..3258961 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -12,45 +12,29 @@ //===----------------------------------------------------------------------===// #include "ByteStreamer.h" +#include "DwarfDebug.h" #include "DwarfExpression.h" #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/DIE.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MachineLocation.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; #define DEBUG_TYPE "asm-printer" -void DebugLocDwarfExpression::EmitOp(uint8_t Op, const char *Comment) { - BS.EmitInt8( - Op, Comment ? Twine(Comment) + " " + dwarf::OperationEncodingString(Op) - : dwarf::OperationEncodingString(Op)); -} - -void DebugLocDwarfExpression::EmitSigned(int Value) { - BS.EmitSLEB128(Value, Twine(Value)); -} - -void DebugLocDwarfExpression::EmitUnsigned(unsigned Value) { - BS.EmitULEB128(Value, Twine(Value)); -} - -bool DebugLocDwarfExpression::isFrameRegister(unsigned MachineReg) { - // This information is not available while emitting .debug_loc entries. - return false; -} - //===----------------------------------------------------------------------===// // Dwarf Emission Helper Routines //===----------------------------------------------------------------------===// @@ -58,30 +42,30 @@ bool DebugLocDwarfExpression::isFrameRegister(unsigned MachineReg) { /// EmitSLEB128 - emit the specified signed leb128 value. void AsmPrinter::EmitSLEB128(int64_t Value, const char *Desc) const { if (isVerbose() && Desc) - OutStreamer.AddComment(Desc); + OutStreamer->AddComment(Desc); - OutStreamer.EmitSLEB128IntValue(Value); + OutStreamer->EmitSLEB128IntValue(Value); } /// EmitULEB128 - emit the specified signed leb128 value. void AsmPrinter::EmitULEB128(uint64_t Value, const char *Desc, unsigned PadTo) const { if (isVerbose() && Desc) - OutStreamer.AddComment(Desc); + OutStreamer->AddComment(Desc); - OutStreamer.EmitULEB128IntValue(Value, PadTo); + OutStreamer->EmitULEB128IntValue(Value, PadTo); } /// EmitCFAByte - Emit a .byte 42 directive for a DW_CFA_xxx value. void AsmPrinter::EmitCFAByte(unsigned Val) const { if (isVerbose()) { if (Val >= dwarf::DW_CFA_offset && Val < dwarf::DW_CFA_offset + 64) - OutStreamer.AddComment("DW_CFA_offset + Reg (" + - Twine(Val - dwarf::DW_CFA_offset) + ")"); + OutStreamer->AddComment("DW_CFA_offset + Reg (" + + Twine(Val - dwarf::DW_CFA_offset) + ")"); else - OutStreamer.AddComment(dwarf::CallFrameString(Val)); + OutStreamer->AddComment(dwarf::CallFrameString(Val)); } - OutStreamer.EmitIntValue(Val, 1); + OutStreamer->EmitIntValue(Val, 1); } static const char *DecodeDWARFEncoding(unsigned Encoding) { @@ -132,13 +116,13 @@ static const char *DecodeDWARFEncoding(unsigned Encoding) { void AsmPrinter::EmitEncodingByte(unsigned Val, const char *Desc) const { if (isVerbose()) { if (Desc) - OutStreamer.AddComment(Twine(Desc) + " Encoding = " + - Twine(DecodeDWARFEncoding(Val))); + OutStreamer->AddComment(Twine(Desc) + " Encoding = " + + Twine(DecodeDWARFEncoding(Val))); else - OutStreamer.AddComment(Twine("Encoding = ") + DecodeDWARFEncoding(Val)); + OutStreamer->AddComment(Twine("Encoding = ") + DecodeDWARFEncoding(Val)); } - OutStreamer.EmitIntValue(Val, 1); + OutStreamer->EmitIntValue(Val, 1); } /// GetSizeOfEncodedValue - Return the size of the encoding in bytes. @@ -150,7 +134,7 @@ unsigned AsmPrinter::GetSizeOfEncodedValue(unsigned Encoding) const { default: llvm_unreachable("Invalid encoded value."); case dwarf::DW_EH_PE_absptr: - return TM.getSubtargetImpl()->getDataLayout()->getPointerSize(); + return TM.getDataLayout()->getPointerSize(); case dwarf::DW_EH_PE_udata2: return 2; case dwarf::DW_EH_PE_udata4: @@ -166,10 +150,11 @@ void AsmPrinter::EmitTTypeReference(const GlobalValue *GV, const TargetLoweringObjectFile &TLOF = getObjFileLowering(); const MCExpr *Exp = - TLOF.getTTypeGlobalReference(GV, Encoding, *Mang, TM, MMI, OutStreamer); - OutStreamer.EmitValue(Exp, GetSizeOfEncodedValue(Encoding)); + TLOF.getTTypeGlobalReference(GV, Encoding, *Mang, TM, MMI, + *OutStreamer); + OutStreamer->EmitValue(Exp, GetSizeOfEncodedValue(Encoding)); } else - OutStreamer.EmitIntValue(0, GetSizeOfEncodedValue(Encoding)); + OutStreamer->EmitIntValue(0, GetSizeOfEncodedValue(Encoding)); } /// EmitSectionOffset - Emit the 4-byte offset of Label from the start of its @@ -179,63 +164,43 @@ void AsmPrinter::EmitTTypeReference(const GlobalValue *GV, /// /// SectionLabel is a temporary label emitted at the start of the section that /// Label lives in. -void AsmPrinter::EmitSectionOffset(const MCSymbol *Label, - const MCSymbol *SectionLabel) const { +void AsmPrinter::emitSectionOffset(const MCSymbol *Label) const { // On COFF targets, we have to emit the special .secrel32 directive. if (MAI->needsDwarfSectionOffsetDirective()) { - OutStreamer.EmitCOFFSecRel32(Label); + OutStreamer->EmitCOFFSecRel32(Label); return; } - // Get the section that we're referring to, based on SectionLabel. - const MCSection &Section = SectionLabel->getSection(); - - // If Label has already been emitted, verify that it is in the same section as - // section label for sanity. - assert((!Label->isInSection() || &Label->getSection() == &Section) && - "Section offset using wrong section base for label"); - - // If the section in question will end up with an address of 0 anyway, we can - // just emit an absolute reference to save a relocation. - if (Section.isBaseAddressKnownZero()) { - OutStreamer.EmitSymbolValue(Label, 4); + // If the format uses relocations with dwarf, refer to the symbol directly. + if (MAI->doesDwarfUseRelocationsAcrossSections()) { + OutStreamer->EmitSymbolValue(Label, 4); return; } // Otherwise, emit it as a label difference from the start of the section. - EmitLabelDifference(Label, SectionLabel, 4); + EmitLabelDifference(Label, Label->getSection().getBeginSymbol(), 4); } -// Some targets do not provide a DWARF register number for every -// register. This function attempts to emit a DWARF register by -// emitting a piece of a super-register or by piecing together -// multiple subregisters that alias the register. -void AsmPrinter::EmitDwarfRegOpPiece(ByteStreamer &Streamer, - const MachineLocation &MLoc, - unsigned PieceSizeInBits, - unsigned PieceOffsetInBits) const { - assert(MLoc.isReg() && "MLoc must be a register"); - DebugLocDwarfExpression Expr(*this, Streamer); - Expr.AddMachineRegPiece(MLoc.getReg(), PieceSizeInBits, PieceOffsetInBits); -} +void AsmPrinter::emitDwarfStringOffset(DwarfStringPoolEntryRef S) const { + if (MAI->doesDwarfUseRelocationsAcrossSections()) { + emitSectionOffset(S.getSymbol()); + return; + } -void AsmPrinter::EmitDwarfOpPiece(ByteStreamer &Streamer, - unsigned PieceSizeInBits, - unsigned PieceOffsetInBits) const { - DebugLocDwarfExpression Expr(*this, Streamer); - Expr.AddOpPiece(PieceSizeInBits, PieceOffsetInBits); + // Just emit the offset directly; no need for symbol math. + EmitInt32(S.getOffset()); } /// EmitDwarfRegOp - Emit dwarf register operation. void AsmPrinter::EmitDwarfRegOp(ByteStreamer &Streamer, - const MachineLocation &MLoc, - bool Indirect) const { - DebugLocDwarfExpression Expr(*this, Streamer); - const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo(); - int Reg = TRI->getDwarfRegNum(MLoc.getReg(), false); + const MachineLocation &MLoc) const { + DebugLocDwarfExpression Expr(*MF->getSubtarget().getRegisterInfo(), + getDwarfDebug()->getDwarfVersion(), Streamer); + const MCRegisterInfo *MRI = MMI->getContext().getRegisterInfo(); + int Reg = MRI->getDwarfRegNum(MLoc.getReg(), false); if (Reg < 0) { // We assume that pointers are always in an addressable register. - if (Indirect || MLoc.isIndirect()) + if (MLoc.isIndirect()) // FIXME: We have no reasonable way of handling errors in here. The // caller might be in the middle of a dwarf expression. We should // probably assert that Reg >= 0 once debug info generation is more @@ -251,9 +216,7 @@ void AsmPrinter::EmitDwarfRegOp(ByteStreamer &Streamer, } if (MLoc.isIndirect()) - Expr.AddRegIndirect(Reg, MLoc.getOffset(), Indirect); - else if (Indirect) - Expr.AddRegIndirect(Reg, 0, false); + Expr.AddRegIndirect(Reg, MLoc.getOffset()); else Expr.AddReg(Reg); } @@ -267,25 +230,82 @@ void AsmPrinter::emitCFIInstruction(const MCCFIInstruction &Inst) const { default: llvm_unreachable("Unexpected instruction"); case MCCFIInstruction::OpDefCfaOffset: - OutStreamer.EmitCFIDefCfaOffset(Inst.getOffset()); + OutStreamer->EmitCFIDefCfaOffset(Inst.getOffset()); break; case MCCFIInstruction::OpDefCfa: - OutStreamer.EmitCFIDefCfa(Inst.getRegister(), Inst.getOffset()); + OutStreamer->EmitCFIDefCfa(Inst.getRegister(), Inst.getOffset()); break; case MCCFIInstruction::OpDefCfaRegister: - OutStreamer.EmitCFIDefCfaRegister(Inst.getRegister()); + OutStreamer->EmitCFIDefCfaRegister(Inst.getRegister()); break; case MCCFIInstruction::OpOffset: - OutStreamer.EmitCFIOffset(Inst.getRegister(), Inst.getOffset()); + OutStreamer->EmitCFIOffset(Inst.getRegister(), Inst.getOffset()); break; case MCCFIInstruction::OpRegister: - OutStreamer.EmitCFIRegister(Inst.getRegister(), Inst.getRegister2()); + OutStreamer->EmitCFIRegister(Inst.getRegister(), Inst.getRegister2()); break; case MCCFIInstruction::OpWindowSave: - OutStreamer.EmitCFIWindowSave(); + OutStreamer->EmitCFIWindowSave(); break; case MCCFIInstruction::OpSameValue: - OutStreamer.EmitCFISameValue(Inst.getRegister()); + OutStreamer->EmitCFISameValue(Inst.getRegister()); break; } } + +void AsmPrinter::emitDwarfDIE(const DIE &Die) const { + // Get the abbreviation for this DIE. + const DIEAbbrev &Abbrev = Die.getAbbrev(); + + // Emit the code (index) for the abbreviation. + if (isVerbose()) + OutStreamer->AddComment("Abbrev [" + Twine(Abbrev.getNumber()) + + "] 0x" + Twine::utohexstr(Die.getOffset()) + + ":0x" + Twine::utohexstr(Die.getSize()) + " " + + dwarf::TagString(Abbrev.getTag())); + EmitULEB128(Abbrev.getNumber()); + + const SmallVectorImpl<DIEValue *> &Values = Die.getValues(); + const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData(); + + // Emit the DIE attribute values. + for (unsigned i = 0, N = Values.size(); i < N; ++i) { + dwarf::Attribute Attr = AbbrevData[i].getAttribute(); + dwarf::Form Form = AbbrevData[i].getForm(); + assert(Form && "Too many attributes for DIE (check abbreviation)"); + + if (isVerbose()) { + OutStreamer->AddComment(dwarf::AttributeString(Attr)); + if (Attr == dwarf::DW_AT_accessibility) + OutStreamer->AddComment(dwarf::AccessibilityString( + cast<DIEInteger>(Values[i])->getValue())); + } + + // Emit an attribute using the defined form. + Values[i]->EmitValue(this, Form); + } + + // Emit the DIE children if any. + if (Abbrev.hasChildren()) { + for (auto &Child : Die.getChildren()) + emitDwarfDIE(*Child); + + OutStreamer->AddComment("End Of Children Mark"); + EmitInt8(0); + } +} + +void +AsmPrinter::emitDwarfAbbrevs(const std::vector<DIEAbbrev *>& Abbrevs) const { + // For each abbrevation. + for (const DIEAbbrev *Abbrev : Abbrevs) { + // Emit the abbrevations code (base 1 index.) + EmitULEB128(Abbrev->getNumber(), "Abbreviation Code"); + + // Emit the abbreviations data. + Abbrev->Emit(this); + } + + // Mark end of abbreviations. + EmitULEB128(0, "EOM(3)"); +} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h index 31867dd..f1efe9d 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h @@ -41,6 +41,10 @@ public: /// call. virtual void beginFunction(const MachineFunction *MF) = 0; + // \brief Emit any of function marker (like .cfi_endproc). This is called + // before endFunction and cannot switch sections. + virtual void markFunctionEnd(); + /// \brief Gather post-function debug information. /// Please note that some AsmPrinter implementations may not call /// beginFunction at all. diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index f6ce4a0..e7631dd 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -32,6 +32,7 @@ #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" @@ -72,7 +73,9 @@ static void srcMgrDiagHandler(const SMDiagnostic &Diag, void *diagInfo) { } /// EmitInlineAsm - Emit a blob of inline asm to the output streamer. -void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode, +void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI, + const MCTargetOptions &MCOptions, + const MDNode *LocMDNode, InlineAsm::AsmDialect Dialect) const { assert(!Str.empty() && "Can't emit empty inline asm block"); @@ -89,10 +92,10 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode, const MCAsmInfo *MCAI = TM.getMCAsmInfo(); assert(MCAI && "No MCAsmInfo"); if (!MCAI->useIntegratedAssembler() && - !OutStreamer.isIntegratedAssemblerRequired()) { - emitInlineAsmStart(TM.getSubtarget<MCSubtargetInfo>()); - OutStreamer.EmitRawText(Str); - emitInlineAsmEnd(TM.getSubtarget<MCSubtargetInfo>(), nullptr); + !OutStreamer->isIntegratedAssemblerRequired()) { + emitInlineAsmStart(); + OutStreamer->EmitRawText(Str); + emitInlineAsmEnd(STI, nullptr); return; } @@ -122,27 +125,21 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode, SrcMgr.AddNewSourceBuffer(std::move(Buffer), SMLoc()); std::unique_ptr<MCAsmParser> Parser( - createMCAsmParser(SrcMgr, OutContext, OutStreamer, *MAI)); - - // Initialize the parser with a fresh subtarget info. It is better to use a - // new STI here because the parser may modify it and we do not want those - // modifications to persist after parsing the inlineasm. The modifications - // made by the parser will be seen by the code emitters because it passes - // the current STI down to the EncodeInstruction() method. - std::unique_ptr<MCSubtargetInfo> STI(TM.getTarget().createMCSubtargetInfo( - TM.getTargetTriple(), TM.getTargetCPU(), TM.getTargetFeatureString())); - - // Preserve a copy of the original STI because the parser may modify it. For - // example, when switching between arm and thumb mode. If the target needs to - // emit code to return to the original state it can do so in - // emitInlineAsmEnd(). - MCSubtargetInfo STIOrig = *STI; + createMCAsmParser(SrcMgr, OutContext, *OutStreamer, *MAI)); - MCTargetOptions MCOptions; - if (MF) - MCOptions = MF->getTarget().Options.MCOptions; - std::unique_ptr<MCTargetAsmParser> TAP( - TM.getTarget().createMCAsmParser(*STI, *Parser, *MII, MCOptions)); + // Create a temporary copy of the original STI because the parser may modify + // it. For example, when switching between arm and thumb mode. If the target + // needs to emit code to return to the original state it can do so in + // emitInlineAsmEnd(). + MCSubtargetInfo TmpSTI = STI; + + // We create a new MCInstrInfo here since we might be at the module level + // and not have a MachineFunction to initialize the TargetInstrInfo from and + // we only need MCInstrInfo for asm parsing. We create one unconditionally + // because it's not subtarget dependent. + std::unique_ptr<MCInstrInfo> MII(TM.getTarget().createMCInstrInfo()); + std::unique_ptr<MCTargetAsmParser> TAP(TM.getTarget().createMCAsmParser( + TmpSTI, *Parser, *MII, MCOptions)); if (!TAP) report_fatal_error("Inline asm not supported by this streamer because" " we don't have an asm parser for this target\n"); @@ -153,11 +150,11 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode, TAP->SetFrameRegister(TRI->getFrameRegister(*MF)); } - emitInlineAsmStart(STIOrig); + emitInlineAsmStart(); // Don't implicitly switch to the text section before the asm. int Res = Parser->Run(/*NoInitialTextSection*/ true, /*NoFinalize*/ true); - emitInlineAsmEnd(STIOrig, STI.get()); + emitInlineAsmEnd(STI, &TmpSTI); if (Res && !HasDiagHandler) report_fatal_error("Error parsing inline asm\n"); } @@ -452,14 +449,14 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { // If this asmstr is empty, just print the #APP/#NOAPP markers. // These are useful to see where empty asm's wound up. if (AsmStr[0] == 0) { - OutStreamer.emitRawComment(MAI->getInlineAsmStart()); - OutStreamer.emitRawComment(MAI->getInlineAsmEnd()); + OutStreamer->emitRawComment(MAI->getInlineAsmStart()); + OutStreamer->emitRawComment(MAI->getInlineAsmEnd()); return; } // Emit the #APP start marker. This has to happen even if verbose-asm isn't // enabled, so we use emitRawComment. - OutStreamer.emitRawComment(MAI->getInlineAsmStart()); + OutStreamer->emitRawComment(MAI->getInlineAsmStart()); // Get the !srcloc metadata node if we have it, and decode the loc cookie from // it. @@ -492,11 +489,17 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { else EmitMSInlineAsmStr(AsmStr, MI, MMI, InlineAsmVariant, AP, LocCookie, OS); - EmitInlineAsm(OS.str(), LocMD, MI->getInlineAsmDialect()); + // Reset SanitizeAddress based on the function's attribute. + MCTargetOptions MCOptions = TM.Options.MCOptions; + MCOptions.SanitizeAddress = + MF->getFunction()->hasFnAttribute(Attribute::SanitizeAddress); + + EmitInlineAsm(OS.str(), getSubtargetInfo(), MCOptions, LocMD, + MI->getInlineAsmDialect()); // Emit the #NOAPP end marker. This has to happen even if verbose-asm isn't // enabled, so we use emitRawComment. - OutStreamer.emitRawComment(MAI->getInlineAsmEnd()); + OutStreamer->emitRawComment(MAI->getInlineAsmEnd()); } @@ -508,7 +511,7 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { /// for their own strange codes. void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS, const char *Code) const { - const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout(); + const DataLayout *DL = TM.getDataLayout(); if (!strcmp(Code, "private")) { OS << DL->getPrivateGlobalPrefix(); } else if (!strcmp(Code, "comment")) { @@ -569,7 +572,7 @@ bool AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, return true; } -void AsmPrinter::emitInlineAsmStart(const MCSubtargetInfo &StartInfo) const {} +void AsmPrinter::emitInlineAsmStart() const {} void AsmPrinter::emitInlineAsmEnd(const MCSubtargetInfo &StartInfo, const MCSubtargetInfo *EndInfo) const {} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h b/contrib/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h index 42be114..0cc829f 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h @@ -19,6 +19,8 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/MC/MCStreamer.h" +#include "llvm/Support/LEB128.h" +#include <string> namespace llvm { class ByteStreamer { @@ -38,15 +40,15 @@ private: public: APByteStreamer(AsmPrinter &Asm) : AP(Asm) {} void EmitInt8(uint8_t Byte, const Twine &Comment) override { - AP.OutStreamer.AddComment(Comment); + AP.OutStreamer->AddComment(Comment); AP.EmitInt8(Byte); } void EmitSLEB128(uint64_t DWord, const Twine &Comment) override { - AP.OutStreamer.AddComment(Comment); + AP.OutStreamer->AddComment(Comment); AP.EmitSLEB128(DWord); } void EmitULEB128(uint64_t DWord, const Twine &Comment) override { - AP.OutStreamer.AddComment(Comment); + AP.OutStreamer->AddComment(Comment); AP.EmitULEB128(DWord); } }; @@ -66,6 +68,41 @@ class HashingByteStreamer : public ByteStreamer { Hash.addULEB128(DWord); } }; + +class BufferByteStreamer : public ByteStreamer { +private: + SmallVectorImpl<char> &Buffer; + SmallVectorImpl<std::string> &Comments; + + /// \brief Only verbose textual output needs comments. This will be set to + /// true for that case, and false otherwise. If false, comments passed in to + /// the emit methods will be ignored. + bool GenerateComments; + +public: + BufferByteStreamer(SmallVectorImpl<char> &Buffer, + SmallVectorImpl<std::string> &Comments, + bool GenerateComments) + : Buffer(Buffer), Comments(Comments), GenerateComments(GenerateComments) {} + void EmitInt8(uint8_t Byte, const Twine &Comment) override { + Buffer.push_back(Byte); + if (GenerateComments) + Comments.push_back(Comment.str()); + } + void EmitSLEB128(uint64_t DWord, const Twine &Comment) override { + raw_svector_ostream OSE(Buffer); + encodeSLEB128(DWord, OSE); + if (GenerateComments) + Comments.push_back(Comment.str()); + } + void EmitULEB128(uint64_t DWord, const Twine &Comment) override { + raw_svector_ostream OSE(Buffer); + encodeULEB128(DWord, OSE); + if (GenerateComments) + Comments.push_back(Comment.str()); + } +}; + } #endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp index 64ba56b..1ccffe9 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp @@ -28,6 +28,7 @@ #include "llvm/Support/FormattedStream.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/MD5.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; //===----------------------------------------------------------------------===// @@ -60,7 +61,7 @@ void DIEAbbrev::Profile(FoldingSetNodeID &ID) const { /// Emit - Print the abbreviation using the specified asm printer. /// -void DIEAbbrev::Emit(AsmPrinter *AP) const { +void DIEAbbrev::Emit(const AsmPrinter *AP) const { // Emit its Dwarf tag type. AP->EmitULEB128(Tag, dwarf::TagString(Tag)); @@ -190,9 +191,67 @@ void DIE::dump() { } #endif -void DIEValue::anchor() { } +void DIEValue::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const { + switch (Ty) { +#define EMIT_VALUE_IMPL(Kind) \ + case is##Kind: \ + cast<DIE##Kind>(this)->EmitValueImpl(AP, Form); \ + break; + EMIT_VALUE_IMPL(Integer) + EMIT_VALUE_IMPL(String) + EMIT_VALUE_IMPL(Expr) + EMIT_VALUE_IMPL(Label) + EMIT_VALUE_IMPL(Delta) + EMIT_VALUE_IMPL(Entry) + EMIT_VALUE_IMPL(TypeSignature) + EMIT_VALUE_IMPL(Block) + EMIT_VALUE_IMPL(Loc) + EMIT_VALUE_IMPL(LocList) +#undef EMIT_VALUE_IMPL + } +} + +unsigned DIEValue::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { + switch (Ty) { +#define SIZE_OF_IMPL(Kind) \ + case is##Kind: \ + return cast<DIE##Kind>(this)->SizeOfImpl(AP, Form); + SIZE_OF_IMPL(Integer) + SIZE_OF_IMPL(String) + SIZE_OF_IMPL(Expr) + SIZE_OF_IMPL(Label) + SIZE_OF_IMPL(Delta) + SIZE_OF_IMPL(Entry) + SIZE_OF_IMPL(TypeSignature) + SIZE_OF_IMPL(Block) + SIZE_OF_IMPL(Loc) + SIZE_OF_IMPL(LocList) +#undef SIZE_OF_IMPL + } + llvm_unreachable("Unknown DIE kind"); +} #ifndef NDEBUG +void DIEValue::print(raw_ostream &O) const { + switch (Ty) { +#define PRINT_IMPL(Kind) \ + case is##Kind: \ + cast<DIE##Kind>(this)->printImpl(O); \ + break; + PRINT_IMPL(Integer) + PRINT_IMPL(String) + PRINT_IMPL(Expr) + PRINT_IMPL(Label) + PRINT_IMPL(Delta) + PRINT_IMPL(Entry) + PRINT_IMPL(TypeSignature) + PRINT_IMPL(Block) + PRINT_IMPL(Loc) + PRINT_IMPL(LocList) +#undef PRINT_IMPL + } +} + void DIEValue::dump() const { print(dbgs()); } @@ -204,13 +263,13 @@ void DIEValue::dump() const { /// EmitValue - Emit integer of appropriate size. /// -void DIEInteger::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const { +void DIEInteger::EmitValueImpl(const AsmPrinter *Asm, dwarf::Form Form) const { unsigned Size = ~0U; switch (Form) { case dwarf::DW_FORM_flag_present: // Emit something to keep the lines and comments in sync. // FIXME: Is there a better way to do this? - Asm->OutStreamer.AddBlankLine(); + Asm->OutStreamer->AddBlankLine(); return; case dwarf::DW_FORM_flag: // Fall thru case dwarf::DW_FORM_ref1: // Fall thru @@ -218,6 +277,7 @@ void DIEInteger::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const { case dwarf::DW_FORM_ref2: // Fall thru case dwarf::DW_FORM_data2: Size = 2; break; case dwarf::DW_FORM_sec_offset: // Fall thru + case dwarf::DW_FORM_strp: // Fall thru case dwarf::DW_FORM_ref4: // Fall thru case dwarf::DW_FORM_data4: Size = 4; break; case dwarf::DW_FORM_ref8: // Fall thru @@ -229,14 +289,17 @@ void DIEInteger::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const { case dwarf::DW_FORM_sdata: Asm->EmitSLEB128(Integer); return; case dwarf::DW_FORM_addr: Size = Asm->getDataLayout().getPointerSize(); break; + case dwarf::DW_FORM_ref_addr: + Size = SizeOf(Asm, dwarf::DW_FORM_ref_addr); + break; default: llvm_unreachable("DIE Value form not supported yet"); } - Asm->OutStreamer.EmitIntValue(Integer, Size); + Asm->OutStreamer->EmitIntValue(Integer, Size); } /// SizeOf - Determine size of integer value in bytes. /// -unsigned DIEInteger::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { +unsigned DIEInteger::SizeOfImpl(const AsmPrinter *AP, dwarf::Form Form) const { switch (Form) { case dwarf::DW_FORM_flag_present: return 0; case dwarf::DW_FORM_flag: // Fall thru @@ -245,6 +308,7 @@ unsigned DIEInteger::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { case dwarf::DW_FORM_ref2: // Fall thru case dwarf::DW_FORM_data2: return sizeof(int16_t); case dwarf::DW_FORM_sec_offset: // Fall thru + case dwarf::DW_FORM_strp: // Fall thru case dwarf::DW_FORM_ref4: // Fall thru case dwarf::DW_FORM_data4: return sizeof(int32_t); case dwarf::DW_FORM_ref8: // Fall thru @@ -255,12 +319,16 @@ unsigned DIEInteger::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { case dwarf::DW_FORM_udata: return getULEB128Size(Integer); case dwarf::DW_FORM_sdata: return getSLEB128Size(Integer); case dwarf::DW_FORM_addr: return AP->getDataLayout().getPointerSize(); + case dwarf::DW_FORM_ref_addr: + if (AP->OutStreamer->getContext().getDwarfVersion() == 2) + return AP->getDataLayout().getPointerSize(); + return sizeof(int32_t); default: llvm_unreachable("DIE Value form not supported yet"); } } #ifndef NDEBUG -void DIEInteger::print(raw_ostream &O) const { +void DIEInteger::printImpl(raw_ostream &O) const { O << "Int: " << (int64_t)Integer << " 0x"; O.write_hex(Integer); } @@ -272,13 +340,13 @@ void DIEInteger::print(raw_ostream &O) const { /// EmitValue - Emit expression value. /// -void DIEExpr::EmitValue(AsmPrinter *AP, dwarf::Form Form) const { - AP->OutStreamer.EmitValue(Expr, SizeOf(AP, Form)); +void DIEExpr::EmitValueImpl(const AsmPrinter *AP, dwarf::Form Form) const { + AP->OutStreamer->EmitValue(Expr, SizeOf(AP, Form)); } /// SizeOf - Determine size of expression value in bytes. /// -unsigned DIEExpr::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { +unsigned DIEExpr::SizeOfImpl(const AsmPrinter *AP, dwarf::Form Form) const { if (Form == dwarf::DW_FORM_data4) return 4; if (Form == dwarf::DW_FORM_sec_offset) return 4; if (Form == dwarf::DW_FORM_strp) return 4; @@ -286,10 +354,7 @@ unsigned DIEExpr::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { } #ifndef NDEBUG -void DIEExpr::print(raw_ostream &O) const { - O << "Expr: "; - Expr->print(O); -} +void DIEExpr::printImpl(raw_ostream &O) const { O << "Expr: " << *Expr; } #endif //===----------------------------------------------------------------------===// @@ -298,7 +363,7 @@ void DIEExpr::print(raw_ostream &O) const { /// EmitValue - Emit label value. /// -void DIELabel::EmitValue(AsmPrinter *AP, dwarf::Form Form) const { +void DIELabel::EmitValueImpl(const AsmPrinter *AP, dwarf::Form Form) const { AP->EmitLabelReference(Label, SizeOf(AP, Form), Form == dwarf::DW_FORM_strp || Form == dwarf::DW_FORM_sec_offset || @@ -307,7 +372,7 @@ void DIELabel::EmitValue(AsmPrinter *AP, dwarf::Form Form) const { /// SizeOf - Determine size of label value in bytes. /// -unsigned DIELabel::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { +unsigned DIELabel::SizeOfImpl(const AsmPrinter *AP, dwarf::Form Form) const { if (Form == dwarf::DW_FORM_data4) return 4; if (Form == dwarf::DW_FORM_sec_offset) return 4; if (Form == dwarf::DW_FORM_strp) return 4; @@ -315,7 +380,7 @@ unsigned DIELabel::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { } #ifndef NDEBUG -void DIELabel::print(raw_ostream &O) const { +void DIELabel::printImpl(raw_ostream &O) const { O << "Lbl: " << Label->getName(); } #endif @@ -326,13 +391,13 @@ void DIELabel::print(raw_ostream &O) const { /// EmitValue - Emit delta value. /// -void DIEDelta::EmitValue(AsmPrinter *AP, dwarf::Form Form) const { +void DIEDelta::EmitValueImpl(const AsmPrinter *AP, dwarf::Form Form) const { AP->EmitLabelDifference(LabelHi, LabelLo, SizeOf(AP, Form)); } /// SizeOf - Determine size of delta value in bytes. /// -unsigned DIEDelta::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { +unsigned DIEDelta::SizeOfImpl(const AsmPrinter *AP, dwarf::Form Form) const { if (Form == dwarf::DW_FORM_data4) return 4; if (Form == dwarf::DW_FORM_sec_offset) return 4; if (Form == dwarf::DW_FORM_strp) return 4; @@ -340,7 +405,7 @@ unsigned DIEDelta::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { } #ifndef NDEBUG -void DIEDelta::print(raw_ostream &O) const { +void DIEDelta::printImpl(raw_ostream &O) const { O << "Del: " << LabelHi->getName() << "-" << LabelLo->getName(); } #endif @@ -351,20 +416,50 @@ void DIEDelta::print(raw_ostream &O) const { /// EmitValue - Emit string value. /// -void DIEString::EmitValue(AsmPrinter *AP, dwarf::Form Form) const { - Access->EmitValue(AP, Form); +void DIEString::EmitValueImpl(const AsmPrinter *AP, dwarf::Form Form) const { + assert( + (Form == dwarf::DW_FORM_strp || Form == dwarf::DW_FORM_GNU_str_index) && + "Expected valid string form"); + + // Index of string in symbol table. + if (Form == dwarf::DW_FORM_GNU_str_index) { + DIEInteger(S.getIndex()).EmitValue(AP, Form); + return; + } + + // Relocatable symbol. + assert(Form == dwarf::DW_FORM_strp); + if (AP->MAI->doesDwarfUseRelocationsAcrossSections()) { + DIELabel(S.getSymbol()).EmitValue(AP, Form); + return; + } + + // Offset into symbol table. + DIEInteger(S.getOffset()).EmitValue(AP, Form); } /// SizeOf - Determine size of delta value in bytes. /// -unsigned DIEString::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { - return Access->SizeOf(AP, Form); +unsigned DIEString::SizeOfImpl(const AsmPrinter *AP, dwarf::Form Form) const { + assert( + (Form == dwarf::DW_FORM_strp || Form == dwarf::DW_FORM_GNU_str_index) && + "Expected valid string form"); + + // Index of string in symbol table. + if (Form == dwarf::DW_FORM_GNU_str_index) + return DIEInteger(S.getIndex()).SizeOf(AP, Form); + + // Relocatable symbol. + if (AP->MAI->doesDwarfUseRelocationsAcrossSections()) + return DIELabel(S.getSymbol()).SizeOf(AP, Form); + + // Offset into symbol table. + return DIEInteger(S.getOffset()).SizeOf(AP, Form); } #ifndef NDEBUG -void DIEString::print(raw_ostream &O) const { - O << "String: " << Str << "\tSymbol: "; - Access->print(O); +void DIEString::printImpl(raw_ostream &O) const { + O << "String: " << S.getString(); } #endif @@ -372,32 +467,9 @@ void DIEString::print(raw_ostream &O) const { // DIEEntry Implementation //===----------------------------------------------------------------------===// -/// Emit something like ".long Hi+Offset-Lo" where the size in bytes of the -/// directive is specified by Size and Hi/Lo specify the labels. -static void emitLabelOffsetDifference(MCStreamer &Streamer, const MCSymbol *Hi, - uint64_t Offset, const MCSymbol *Lo, - unsigned Size) { - MCContext &Context = Streamer.getContext(); - - // Emit Hi+Offset - Lo - // Get the Hi+Offset expression. - const MCExpr *Plus = - MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Hi, Context), - MCConstantExpr::Create(Offset, Context), Context); - - // Get the Hi+Offset-Lo expression. - const MCExpr *Diff = MCBinaryExpr::CreateSub( - Plus, MCSymbolRefExpr::Create(Lo, Context), Context); - - // Otherwise, emit with .set (aka assignment). - MCSymbol *SetLabel = Context.CreateTempSymbol(); - Streamer.EmitAssignment(SetLabel, Diff); - Streamer.EmitSymbolValue(SetLabel, Size); -} - /// EmitValue - Emit debug information entry offset. /// -void DIEEntry::EmitValue(AsmPrinter *AP, dwarf::Form Form) const { +void DIEEntry::EmitValueImpl(const AsmPrinter *AP, dwarf::Form Form) const { if (Form == dwarf::DW_FORM_ref_addr) { const DwarfDebug *DD = AP->getDwarfDebug(); @@ -413,14 +485,12 @@ void DIEEntry::EmitValue(AsmPrinter *AP, dwarf::Form Form) const { AP->EmitLabelPlusOffset(CU->getSectionSym(), Addr, DIEEntry::getRefAddrSize(AP)); else - emitLabelOffsetDifference(AP->OutStreamer, CU->getSectionSym(), Addr, - CU->getSectionSym(), - DIEEntry::getRefAddrSize(AP)); + AP->OutStreamer->EmitIntValue(Addr, DIEEntry::getRefAddrSize(AP)); } else AP->EmitInt32(Entry.getOffset()); } -unsigned DIEEntry::getRefAddrSize(AsmPrinter *AP) { +unsigned DIEEntry::getRefAddrSize(const AsmPrinter *AP) { // DWARF4: References that use the attribute form DW_FORM_ref_addr are // specified to be four bytes in the DWARF 32-bit format and eight bytes // in the DWARF 64-bit format, while DWARF Version 2 specifies that such @@ -433,7 +503,7 @@ unsigned DIEEntry::getRefAddrSize(AsmPrinter *AP) { } #ifndef NDEBUG -void DIEEntry::print(raw_ostream &O) const { +void DIEEntry::printImpl(raw_ostream &O) const { O << format("Die: 0x%lx", (long)(intptr_t)&Entry); } #endif @@ -441,17 +511,15 @@ void DIEEntry::print(raw_ostream &O) const { //===----------------------------------------------------------------------===// // DIETypeSignature Implementation //===----------------------------------------------------------------------===// -void DIETypeSignature::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const { +void DIETypeSignature::EmitValueImpl(const AsmPrinter *Asm, dwarf::Form Form) const { assert(Form == dwarf::DW_FORM_ref_sig8); - Asm->OutStreamer.EmitIntValue(Unit.getTypeSignature(), 8); + Asm->OutStreamer->EmitIntValue(Unit.getTypeSignature(), 8); } #ifndef NDEBUG -void DIETypeSignature::print(raw_ostream &O) const { +void DIETypeSignature::printImpl(raw_ostream &O) const { O << format("Type Unit: 0x%lx", Unit.getTypeSignature()); } - -void DIETypeSignature::dump() const { print(dbgs()); } #endif //===----------------------------------------------------------------------===// @@ -460,7 +528,7 @@ void DIETypeSignature::dump() const { print(dbgs()); } /// ComputeSize - calculate the size of the location expression. /// -unsigned DIELoc::ComputeSize(AsmPrinter *AP) const { +unsigned DIELoc::ComputeSize(const AsmPrinter *AP) const { if (!Size) { const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData(); for (unsigned i = 0, N = Values.size(); i < N; ++i) @@ -472,7 +540,7 @@ unsigned DIELoc::ComputeSize(AsmPrinter *AP) const { /// EmitValue - Emit location data. /// -void DIELoc::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const { +void DIELoc::EmitValueImpl(const AsmPrinter *Asm, dwarf::Form Form) const { switch (Form) { default: llvm_unreachable("Improper form for block"); case dwarf::DW_FORM_block1: Asm->EmitInt8(Size); break; @@ -490,7 +558,7 @@ void DIELoc::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const { /// SizeOf - Determine size of location data in bytes. /// -unsigned DIELoc::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { +unsigned DIELoc::SizeOfImpl(const AsmPrinter *AP, dwarf::Form Form) const { switch (Form) { case dwarf::DW_FORM_block1: return Size + sizeof(int8_t); case dwarf::DW_FORM_block2: return Size + sizeof(int16_t); @@ -503,7 +571,7 @@ unsigned DIELoc::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { } #ifndef NDEBUG -void DIELoc::print(raw_ostream &O) const { +void DIELoc::printImpl(raw_ostream &O) const { O << "ExprLoc: "; DIE::print(O, 5); } @@ -515,7 +583,7 @@ void DIELoc::print(raw_ostream &O) const { /// ComputeSize - calculate the size of the block. /// -unsigned DIEBlock::ComputeSize(AsmPrinter *AP) const { +unsigned DIEBlock::ComputeSize(const AsmPrinter *AP) const { if (!Size) { const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData(); for (unsigned i = 0, N = Values.size(); i < N; ++i) @@ -527,7 +595,7 @@ unsigned DIEBlock::ComputeSize(AsmPrinter *AP) const { /// EmitValue - Emit block data. /// -void DIEBlock::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const { +void DIEBlock::EmitValueImpl(const AsmPrinter *Asm, dwarf::Form Form) const { switch (Form) { default: llvm_unreachable("Improper form for block"); case dwarf::DW_FORM_block1: Asm->EmitInt8(Size); break; @@ -543,7 +611,7 @@ void DIEBlock::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const { /// SizeOf - Determine size of block data in bytes. /// -unsigned DIEBlock::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { +unsigned DIEBlock::SizeOfImpl(const AsmPrinter *AP, dwarf::Form Form) const { switch (Form) { case dwarf::DW_FORM_block1: return Size + sizeof(int8_t); case dwarf::DW_FORM_block2: return Size + sizeof(int16_t); @@ -554,7 +622,7 @@ unsigned DIEBlock::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { } #ifndef NDEBUG -void DIEBlock::print(raw_ostream &O) const { +void DIEBlock::printImpl(raw_ostream &O) const { O << "Blk: "; DIE::print(O, 5); } @@ -564,7 +632,7 @@ void DIEBlock::print(raw_ostream &O) const { // DIELocList Implementation //===----------------------------------------------------------------------===// -unsigned DIELocList::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { +unsigned DIELocList::SizeOfImpl(const AsmPrinter *AP, dwarf::Form Form) const { if (Form == dwarf::DW_FORM_data4) return 4; if (Form == dwarf::DW_FORM_sec_offset) @@ -574,18 +642,18 @@ unsigned DIELocList::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { /// EmitValue - Emit label value. /// -void DIELocList::EmitValue(AsmPrinter *AP, dwarf::Form Form) const { +void DIELocList::EmitValueImpl(const AsmPrinter *AP, dwarf::Form Form) const { DwarfDebug *DD = AP->getDwarfDebug(); - MCSymbol *Label = DD->getDebugLocEntries()[Index].Label; + MCSymbol *Label = DD->getDebugLocs().getList(Index).Label; if (AP->MAI->doesDwarfUseRelocationsAcrossSections() && !DD->useSplitDwarf()) - AP->EmitSectionOffset(Label, DD->getDebugLocSym()); + AP->emitSectionOffset(Label); else - AP->EmitLabelDifference(Label, DD->getDebugLocSym(), 4); + AP->EmitLabelDifference(Label, Label->getSection().getBeginSymbol(), 4); } #ifndef NDEBUG -void DIELocList::print(raw_ostream &O) const { +void DIELocList::printImpl(raw_ostream &O) const { O << "LocList: " << Index; } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp index 1e2ba2c..a2e5aad 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp @@ -285,8 +285,8 @@ void DIEHash::hashBlockData(const SmallVectorImpl<DIEValue *> &Values) { void DIEHash::hashLocList(const DIELocList &LocList) { HashingByteStreamer Streamer(*this); DwarfDebug &DD = *AP->getDwarfDebug(); - for (const auto &Entry : - DD.getDebugLocEntries()[LocList.getValue()].List) + const DebugLocStream &Locs = DD.getDebugLocs(); + for (const auto &Entry : Locs.getEntries(Locs.getList(LocList.getValue()))) DD.emitDebugLocEntry(Streamer, Entry); } @@ -510,7 +510,7 @@ uint64_t DIEHash::computeDIEODRSignature(const DIE &Die) { // ... take the least significant 8 bytes and return those. Our MD5 // implementation always returns its results in little endian, swap bytes // appropriately. - return *reinterpret_cast<support::ulittle64_t *>(Result + 8); + return support::endian::read64le(Result + 8); } /// This is based on the type signature computation given in section 7.27 of the @@ -531,7 +531,7 @@ uint64_t DIEHash::computeCUSignature(const DIE &Die) { // ... take the least significant 8 bytes and return those. Our MD5 // implementation always returns its results in little endian, swap bytes // appropriately. - return *reinterpret_cast<support::ulittle64_t *>(Result + 8); + return support::endian::read64le(Result + 8); } /// This is based on the type signature computation given in section 7.27 of the @@ -555,5 +555,5 @@ uint64_t DIEHash::computeTypeSignature(const DIE &Die) { // ... take the least significant 8 bytes and return those. Our MD5 // implementation always returns its results in little endian, swap bytes // appropriately. - return *reinterpret_cast<support::ulittle64_t *>(Result + 8); + return support::endian::read64le(Result + 8); } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp index 0c2a5e5..3c46a99 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp @@ -14,6 +14,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/IR/DebugInfo.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegisterInfo.h" #include <algorithm> #include <map> @@ -32,7 +33,7 @@ static unsigned isDescribedByReg(const MachineInstr &MI) { return MI.getOperand(0).isReg() ? MI.getOperand(0).getReg() : 0; } -void DbgValueHistoryMap::startInstrRange(const MDNode *Var, +void DbgValueHistoryMap::startInstrRange(InlinedVariable Var, const MachineInstr &MI) { // Instruction range should start with a DBG_VALUE instruction for the // variable. @@ -47,7 +48,7 @@ void DbgValueHistoryMap::startInstrRange(const MDNode *Var, Ranges.push_back(std::make_pair(&MI, nullptr)); } -void DbgValueHistoryMap::endInstrRange(const MDNode *Var, +void DbgValueHistoryMap::endInstrRange(InlinedVariable Var, const MachineInstr &MI) { auto &Ranges = VarInstrRanges[Var]; // Verify that the current instruction range is not yet closed. @@ -58,7 +59,7 @@ void DbgValueHistoryMap::endInstrRange(const MDNode *Var, Ranges.back().second = &MI; } -unsigned DbgValueHistoryMap::getRegisterForVar(const MDNode *Var) const { +unsigned DbgValueHistoryMap::getRegisterForVar(InlinedVariable Var) const { const auto &I = VarInstrRanges.find(Var); if (I == VarInstrRanges.end()) return 0; @@ -70,12 +71,13 @@ unsigned DbgValueHistoryMap::getRegisterForVar(const MDNode *Var) const { namespace { // Maps physreg numbers to the variables they describe. -typedef std::map<unsigned, SmallVector<const MDNode *, 1>> RegDescribedVarsMap; +typedef DbgValueHistoryMap::InlinedVariable InlinedVariable; +typedef std::map<unsigned, SmallVector<InlinedVariable, 1>> RegDescribedVarsMap; } // \brief Claim that @Var is not described by @RegNo anymore. -static void dropRegDescribedVar(RegDescribedVarsMap &RegVars, - unsigned RegNo, const MDNode *Var) { +static void dropRegDescribedVar(RegDescribedVarsMap &RegVars, unsigned RegNo, + InlinedVariable Var) { const auto &I = RegVars.find(RegNo); assert(RegNo != 0U && I != RegVars.end()); auto &VarSet = I->second; @@ -88,8 +90,8 @@ static void dropRegDescribedVar(RegDescribedVarsMap &RegVars, } // \brief Claim that @Var is now described by @RegNo. -static void addRegDescribedVar(RegDescribedVarsMap &RegVars, - unsigned RegNo, const MDNode *Var) { +static void addRegDescribedVar(RegDescribedVarsMap &RegVars, unsigned RegNo, + InlinedVariable Var) { assert(RegNo != 0U); auto &VarSet = RegVars[RegNo]; assert(std::find(VarSet.begin(), VarSet.end(), Var) == VarSet.end()); @@ -202,7 +204,10 @@ void llvm::calculateDbgValueHistory(const MachineFunction *MF, // Use the base variable (without any DW_OP_piece expressions) // as index into History. The full variables including the // piece expressions are attached to the MI. - DIVariable Var = MI.getDebugVariable(); + const DILocalVariable *RawVar = MI.getDebugVariable(); + assert(RawVar->isValidLocationForIntrinsic(MI.getDebugLoc()) && + "Expected inlined-at fields to agree"); + InlinedVariable Var(RawVar, MI.getDebugLoc()->getInlinedAt()); if (unsigned PrevReg = Result.getRegisterForVar(Var)) dropRegDescribedVar(RegVars, PrevReg, Var); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h index 4b62007..546d1b4 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h @@ -17,7 +17,8 @@ namespace llvm { class MachineFunction; class MachineInstr; -class MDNode; +class DILocalVariable; +class DILocation; class TargetRegisterInfo; // For each user variable, keep a list of instruction ranges where this variable @@ -31,16 +32,19 @@ class DbgValueHistoryMap { public: typedef std::pair<const MachineInstr *, const MachineInstr *> InstrRange; typedef SmallVector<InstrRange, 4> InstrRanges; - typedef MapVector<const MDNode *, InstrRanges> InstrRangesMap; + typedef std::pair<const DILocalVariable *, const DILocation *> + InlinedVariable; + typedef MapVector<InlinedVariable, InstrRanges> InstrRangesMap; + private: InstrRangesMap VarInstrRanges; public: - void startInstrRange(const MDNode *Var, const MachineInstr &MI); - void endInstrRange(const MDNode *Var, const MachineInstr &MI); + void startInstrRange(InlinedVariable Var, const MachineInstr &MI); + void endInstrRange(InlinedVariable Var, const MachineInstr &MI); // Returns register currently describing @Var. If @Var is currently // unaccessible or is not described by a register, returns 0. - unsigned getRegisterForVar(const MDNode *Var) const; + unsigned getRegisterForVar(InlinedVariable Var) const; bool empty() const { return VarInstrRanges.empty(); } void clear() { VarInstrRanges.clear(); } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h index b4fcada..6a943c6 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h @@ -9,52 +9,51 @@ #ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCENTRY_H #define LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCENTRY_H +#include "llvm/ADT/SmallString.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MachineLocation.h" namespace llvm { -class MDNode; +class AsmPrinter; +class DebugLocStream; + /// \brief This struct describes location entries emitted in the .debug_loc /// section. class DebugLocEntry { - // Begin and end symbols for the address range that this location is valid. + /// Begin and end symbols for the address range that this location is valid. const MCSymbol *Begin; const MCSymbol *End; public: - /// A single location or constant. + /// \brief A single location or constant. struct Value { - Value(const MDNode *Var, const MDNode *Expr, int64_t i) - : Variable(Var), Expression(Expr), EntryKind(E_Integer) { + Value(const DIExpression *Expr, int64_t i) + : Expression(Expr), EntryKind(E_Integer) { Constant.Int = i; } - Value(const MDNode *Var, const MDNode *Expr, const ConstantFP *CFP) - : Variable(Var), Expression(Expr), EntryKind(E_ConstantFP) { + Value(const DIExpression *Expr, const ConstantFP *CFP) + : Expression(Expr), EntryKind(E_ConstantFP) { Constant.CFP = CFP; } - Value(const MDNode *Var, const MDNode *Expr, const ConstantInt *CIP) - : Variable(Var), Expression(Expr), EntryKind(E_ConstantInt) { + Value(const DIExpression *Expr, const ConstantInt *CIP) + : Expression(Expr), EntryKind(E_ConstantInt) { Constant.CIP = CIP; } - Value(const MDNode *Var, const MDNode *Expr, MachineLocation Loc) - : Variable(Var), Expression(Expr), EntryKind(E_Location), Loc(Loc) { - assert(DIVariable(Var).Verify()); - assert(DIExpression(Expr).Verify()); + Value(const DIExpression *Expr, MachineLocation Loc) + : Expression(Expr), EntryKind(E_Location), Loc(Loc) { + assert(cast<DIExpression>(Expr)->isValid()); } - // The variable to which this location entry corresponds. - const MDNode *Variable; - - // Any complex address location expression for this Value. - const MDNode *Expression; + /// Any complex address location expression for this Value. + const DIExpression *Expression; - // Type of entry that this represents. + /// Type of entry that this represents. enum EntryType { E_Location, E_Integer, E_ConstantFP, E_ConstantInt }; enum EntryType EntryKind; - // Either a constant, + /// Either a constant, union { int64_t Int; const ConstantFP *CFP; @@ -72,10 +71,8 @@ public: const ConstantFP *getConstantFP() const { return Constant.CFP; } const ConstantInt *getConstantInt() const { return Constant.CIP; } MachineLocation getLoc() const { return Loc; } - const MDNode *getVariableNode() const { return Variable; } - DIVariable getVariable() const { return DIVariable(Variable); } - bool isVariablePiece() const { return getExpression().isVariablePiece(); } - DIExpression getExpression() const { return DIExpression(Expression); } + bool isBitPiece() const { return getExpression()->isBitPiece(); } + const DIExpression *getExpression() const { return Expression; } friend bool operator==(const Value &, const Value &); friend bool operator<(const Value &, const Value &); }; @@ -92,17 +89,14 @@ public: } /// \brief If this and Next are describing different pieces of the same - // variable, merge them by appending Next's values to the current - // list of values. - // Return true if the merge was successful. + /// variable, merge them by appending Next's values to the current + /// list of values. + /// Return true if the merge was successful. bool MergeValues(const DebugLocEntry &Next) { if (Begin == Next.Begin) { - DIExpression Expr(Values[0].Expression); - DIVariable Var(Values[0].Variable); - DIExpression NextExpr(Next.Values[0].Expression); - DIVariable NextVar(Next.Values[0].Variable); - if (Var == NextVar && Expr.isVariablePiece() && - NextExpr.isVariablePiece()) { + auto *Expr = cast_or_null<DIExpression>(Values[0].Expression); + auto *NextExpr = cast_or_null<DIExpression>(Next.Values[0].Expression); + if (Expr->isBitPiece() && NextExpr->isBitPiece()) { addValues(Next.Values); End = Next.End; return true; @@ -131,24 +125,28 @@ public: Values.append(Vals.begin(), Vals.end()); sortUniqueValues(); assert(std::all_of(Values.begin(), Values.end(), [](DebugLocEntry::Value V){ - return V.isVariablePiece(); + return V.isBitPiece(); }) && "value must be a piece"); } - // Sort the pieces by offset. + // \brief Sort the pieces by offset. // Remove any duplicate entries by dropping all but the first. void sortUniqueValues() { std::sort(Values.begin(), Values.end()); - Values.erase(std::unique(Values.begin(), Values.end(), - [](const Value &A, const Value &B) { - return A.getVariable() == B.getVariable() && - A.getExpression() == B.getExpression(); - }), - Values.end()); + Values.erase( + std::unique( + Values.begin(), Values.end(), [](const Value &A, const Value &B) { + return A.getExpression() == B.getExpression(); + }), + Values.end()); } + + /// \brief Lower this entry into a DWARF expression. + void finalize(const AsmPrinter &AP, DebugLocStream &Locs, + const DIBasicType *BT); }; -/// Compare two Values for equality. +/// \brief Compare two Values for equality. inline bool operator==(const DebugLocEntry::Value &A, const DebugLocEntry::Value &B) { if (A.EntryKind != B.EntryKind) @@ -157,9 +155,6 @@ inline bool operator==(const DebugLocEntry::Value &A, if (A.Expression != B.Expression) return false; - if (A.Variable != B.Variable) - return false; - switch (A.EntryKind) { case DebugLocEntry::Value::E_Location: return A.Loc == B.Loc; @@ -173,11 +168,11 @@ inline bool operator==(const DebugLocEntry::Value &A, llvm_unreachable("unhandled EntryKind"); } -/// Compare two pieces based on their offset. +/// \brief Compare two pieces based on their offset. inline bool operator<(const DebugLocEntry::Value &A, const DebugLocEntry::Value &B) { - return A.getExpression().getPieceOffset() < - B.getExpression().getPieceOffset(); + return A.getExpression()->getBitPieceOffset() < + B.getExpression()->getBitPieceOffset(); } } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocList.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocList.h deleted file mode 100644 index 0f1d2ed..0000000 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocList.h +++ /dev/null @@ -1,25 +0,0 @@ -//===--- lib/CodeGen/DebugLocList.h - DWARF debug_loc list ------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCLIST_H -#define LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCLIST_H - -#include "DebugLocEntry.h" -#include "llvm/ADT/SmallVector.h" - -namespace llvm { -class DwarfCompileUnit; -class MCSymbol; -struct DebugLocList { - MCSymbol *Label; - DwarfCompileUnit *CU; - SmallVector<DebugLocEntry, 4> List; -}; -} -#endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h new file mode 100644 index 0000000..3001da2 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h @@ -0,0 +1,133 @@ +//===--- lib/CodeGen/DebugLocStream.h - DWARF debug_loc stream --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCSTREAM_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCSTREAM_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" +#include "ByteStreamer.h" + +namespace llvm { +class DwarfCompileUnit; +class MCSymbol; + +/// \brief Byte stream of .debug_loc entries. +/// +/// Stores a unified stream of .debug_loc entries. There's \a List for each +/// variable/inlined-at pair, and an \a Entry for each \a DebugLocEntry. +/// +/// FIXME: Do we need all these temp symbols? +/// FIXME: Why not output directly to the output stream? +class DebugLocStream { +public: + struct List { + DwarfCompileUnit *CU; + MCSymbol *Label; + size_t EntryOffset; + List(DwarfCompileUnit *CU, MCSymbol *Label, size_t EntryOffset) + : CU(CU), Label(Label), EntryOffset(EntryOffset) {} + }; + struct Entry { + const MCSymbol *BeginSym; + const MCSymbol *EndSym; + size_t ByteOffset; + size_t CommentOffset; + Entry(const MCSymbol *BeginSym, const MCSymbol *EndSym, size_t ByteOffset, + size_t CommentOffset) + : BeginSym(BeginSym), EndSym(EndSym), ByteOffset(ByteOffset), + CommentOffset(CommentOffset) {} + }; + +private: + SmallVector<List, 4> Lists; + SmallVector<Entry, 32> Entries; + SmallString<256> DWARFBytes; + SmallVector<std::string, 32> Comments; + + /// \brief Only verbose textual output needs comments. This will be set to + /// true for that case, and false otherwise. + bool GenerateComments; + +public: + DebugLocStream(bool GenerateComments) : GenerateComments(GenerateComments) { } + size_t getNumLists() const { return Lists.size(); } + const List &getList(size_t LI) const { return Lists[LI]; } + ArrayRef<List> getLists() const { return Lists; } + + /// \brief Start a new .debug_loc entry list. + /// + /// Start a new .debug_loc entry list. Return the new list's index so it can + /// be retrieved later via \a getList(). + /// + /// Until the next call, \a startEntry() will add entries to this list. + size_t startList(DwarfCompileUnit *CU, MCSymbol *Label) { + size_t LI = Lists.size(); + Lists.emplace_back(CU, Label, Entries.size()); + return LI; + } + + /// \brief Start a new .debug_loc entry. + /// + /// Until the next call, bytes added to the stream will be added to this + /// entry. + void startEntry(const MCSymbol *BeginSym, const MCSymbol *EndSym) { + Entries.emplace_back(BeginSym, EndSym, DWARFBytes.size(), Comments.size()); + } + + BufferByteStreamer getStreamer() { + return BufferByteStreamer(DWARFBytes, Comments, GenerateComments); + } + + ArrayRef<Entry> getEntries(const List &L) const { + size_t LI = getIndex(L); + return makeArrayRef(Entries) + .slice(Lists[LI].EntryOffset, getNumEntries(LI)); + } + + ArrayRef<char> getBytes(const Entry &E) const { + size_t EI = getIndex(E); + return makeArrayRef(DWARFBytes.begin(), DWARFBytes.end()) + .slice(Entries[EI].ByteOffset, getNumBytes(EI)); + } + ArrayRef<std::string> getComments(const Entry &E) const { + size_t EI = getIndex(E); + return makeArrayRef(Comments) + .slice(Entries[EI].CommentOffset, getNumComments(EI)); + } + +private: + size_t getIndex(const List &L) const { + assert(&Lists.front() <= &L && &L <= &Lists.back() && + "Expected valid list"); + return &L - &Lists.front(); + } + size_t getIndex(const Entry &E) const { + assert(&Entries.front() <= &E && &E <= &Entries.back() && + "Expected valid entry"); + return &E - &Entries.front(); + } + size_t getNumEntries(size_t LI) const { + if (LI + 1 == Lists.size()) + return Entries.size() - Lists[LI].EntryOffset; + return Lists[LI + 1].EntryOffset - Lists[LI].EntryOffset; + } + size_t getNumBytes(size_t EI) const { + if (EI + 1 == Entries.size()) + return DWARFBytes.size() - Entries[EI].ByteOffset; + return Entries[EI + 1].ByteOffset - Entries[EI].ByteOffset; + } + size_t getNumComments(size_t EI) const { + if (EI + 1 == Entries.size()) + return Comments.size() - Entries[EI].CommentOffset; + return Entries[EI + 1].CommentOffset - Entries[EI].CommentOffset; + } +}; +} +#endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp index a71f35e..58b406b 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp @@ -30,14 +30,14 @@ DwarfAccelTable::DwarfAccelTable(ArrayRef<DwarfAccelTable::Atom> atomList) : Header(8 + (atomList.size() * 4)), HeaderData(atomList), Entries(Allocator) {} -void DwarfAccelTable::AddName(StringRef Name, MCSymbol *StrSym, const DIE *die, +void DwarfAccelTable::AddName(DwarfStringPoolEntryRef Name, const DIE *die, char Flags) { assert(Data.empty() && "Already finalized!"); // If the string is in the list already then add this die to the list // otherwise add a new one. - DataArray &DIEs = Entries[Name]; - assert(!DIEs.StrSym || DIEs.StrSym == StrSym); - DIEs.StrSym = StrSym; + DataArray &DIEs = Entries[Name.getString()]; + assert(!DIEs.Name || DIEs.Name == Name); + DIEs.Name = Name; DIEs.Values.push_back(new (Allocator) HashDataContents(die, Flags)); } @@ -54,7 +54,7 @@ void DwarfAccelTable::ComputeBucketCount(void) { // Then compute the bucket size, minimum of 1 bucket. if (num > 1024) Header.bucket_count = num / 4; - if (num > 16) + else if (num > 16) Header.bucket_count = num / 2; else Header.bucket_count = num > 0 ? num : 1; @@ -70,6 +70,7 @@ static bool compareDIEs(const DwarfAccelTable::HashDataContents *A, void DwarfAccelTable::FinalizeTable(AsmPrinter *Asm, StringRef Prefix) { // Create the individual hash data outputs. + Data.reserve(Entries.size()); for (StringMap<DataArray>::iterator EI = Entries.begin(), EE = Entries.end(); EI != EE; ++EI) { @@ -95,33 +96,42 @@ void DwarfAccelTable::FinalizeTable(AsmPrinter *Asm, StringRef Prefix) { for (size_t i = 0, e = Data.size(); i < e; ++i) { uint32_t bucket = Data[i]->HashValue % Header.bucket_count; Buckets[bucket].push_back(Data[i]); - Data[i]->Sym = Asm->GetTempSymbol(Prefix, i); + Data[i]->Sym = Asm->createTempSymbol(Prefix); } + + // Sort the contents of the buckets by hash value so that hash + // collisions end up together. Stable sort makes testing easier and + // doesn't cost much more. + for (size_t i = 0; i < Buckets.size(); ++i) + std::stable_sort(Buckets[i].begin(), Buckets[i].end(), + [] (HashData *LHS, HashData *RHS) { + return LHS->HashValue < RHS->HashValue; + }); } // Emits the header for the table via the AsmPrinter. void DwarfAccelTable::EmitHeader(AsmPrinter *Asm) { - Asm->OutStreamer.AddComment("Header Magic"); + Asm->OutStreamer->AddComment("Header Magic"); Asm->EmitInt32(Header.magic); - Asm->OutStreamer.AddComment("Header Version"); + Asm->OutStreamer->AddComment("Header Version"); Asm->EmitInt16(Header.version); - Asm->OutStreamer.AddComment("Header Hash Function"); + Asm->OutStreamer->AddComment("Header Hash Function"); Asm->EmitInt16(Header.hash_function); - Asm->OutStreamer.AddComment("Header Bucket Count"); + Asm->OutStreamer->AddComment("Header Bucket Count"); Asm->EmitInt32(Header.bucket_count); - Asm->OutStreamer.AddComment("Header Hash Count"); + Asm->OutStreamer->AddComment("Header Hash Count"); Asm->EmitInt32(Header.hashes_count); - Asm->OutStreamer.AddComment("Header Data Length"); + Asm->OutStreamer->AddComment("Header Data Length"); Asm->EmitInt32(Header.header_data_len); - Asm->OutStreamer.AddComment("HeaderData Die Offset Base"); + Asm->OutStreamer->AddComment("HeaderData Die Offset Base"); Asm->EmitInt32(HeaderData.die_offset_base); - Asm->OutStreamer.AddComment("HeaderData Atom Count"); + Asm->OutStreamer->AddComment("HeaderData Atom Count"); Asm->EmitInt32(HeaderData.Atoms.size()); for (size_t i = 0; i < HeaderData.Atoms.size(); i++) { Atom A = HeaderData.Atoms[i]; - Asm->OutStreamer.AddComment(dwarf::AtomTypeString(A.type)); + Asm->OutStreamer->AddComment(dwarf::AtomTypeString(A.type)); Asm->EmitInt16(A.type); - Asm->OutStreamer.AddComment(dwarf::FormEncodingString(A.form)); + Asm->OutStreamer->AddComment(dwarf::FormEncodingString(A.form)); Asm->EmitInt16(A.form); } } @@ -131,24 +141,37 @@ void DwarfAccelTable::EmitHeader(AsmPrinter *Asm) { void DwarfAccelTable::EmitBuckets(AsmPrinter *Asm) { unsigned index = 0; for (size_t i = 0, e = Buckets.size(); i < e; ++i) { - Asm->OutStreamer.AddComment("Bucket " + Twine(i)); + Asm->OutStreamer->AddComment("Bucket " + Twine(i)); if (Buckets[i].size() != 0) Asm->EmitInt32(index); else Asm->EmitInt32(UINT32_MAX); - index += Buckets[i].size(); + // Buckets point in the list of hashes, not to the data. Do not + // increment the index multiple times in case of hash collisions. + uint64_t PrevHash = UINT64_MAX; + for (auto *HD : Buckets[i]) { + uint32_t HashValue = HD->HashValue; + if (PrevHash != HashValue) + ++index; + PrevHash = HashValue; + } } } // Walk through the buckets and emit the individual hashes for each // bucket. void DwarfAccelTable::EmitHashes(AsmPrinter *Asm) { + uint64_t PrevHash = UINT64_MAX; for (size_t i = 0, e = Buckets.size(); i < e; ++i) { for (HashList::const_iterator HI = Buckets[i].begin(), HE = Buckets[i].end(); HI != HE; ++HI) { - Asm->OutStreamer.AddComment("Hash in Bucket " + Twine(i)); - Asm->EmitInt32((*HI)->HashValue); + uint32_t HashValue = (*HI)->HashValue; + if (PrevHash == HashValue) + continue; + Asm->OutStreamer->AddComment("Hash in Bucket " + Twine(i)); + Asm->EmitInt32(HashValue); + PrevHash = HashValue; } } } @@ -157,17 +180,22 @@ void DwarfAccelTable::EmitHashes(AsmPrinter *Asm) { // element in each bucket. This is done via a symbol subtraction from the // beginning of the section. The non-section symbol will be output later // when we emit the actual data. -void DwarfAccelTable::EmitOffsets(AsmPrinter *Asm, MCSymbol *SecBegin) { +void DwarfAccelTable::emitOffsets(AsmPrinter *Asm, const MCSymbol *SecBegin) { + uint64_t PrevHash = UINT64_MAX; for (size_t i = 0, e = Buckets.size(); i < e; ++i) { for (HashList::const_iterator HI = Buckets[i].begin(), HE = Buckets[i].end(); HI != HE; ++HI) { - Asm->OutStreamer.AddComment("Offset in Bucket " + Twine(i)); - MCContext &Context = Asm->OutStreamer.getContext(); + uint32_t HashValue = (*HI)->HashValue; + if (PrevHash == HashValue) + continue; + PrevHash = HashValue; + Asm->OutStreamer->AddComment("Offset in Bucket " + Twine(i)); + MCContext &Context = Asm->OutStreamer->getContext(); const MCExpr *Sub = MCBinaryExpr::CreateSub( MCSymbolRefExpr::Create((*HI)->Sym, Context), MCSymbolRefExpr::Create(SecBegin, Context), Context); - Asm->OutStreamer.EmitValue(Sub, sizeof(uint32_t)); + Asm->OutStreamer->EmitValue(Sub, sizeof(uint32_t)); } } } @@ -175,18 +203,21 @@ void DwarfAccelTable::EmitOffsets(AsmPrinter *Asm, MCSymbol *SecBegin) { // Walk through the buckets and emit the full data for each element in // the bucket. For the string case emit the dies and the various offsets. // Terminate each HashData bucket with 0. -void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfDebug *D, - MCSymbol *StrSym) { - uint64_t PrevHash = UINT64_MAX; +void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfDebug *D) { for (size_t i = 0, e = Buckets.size(); i < e; ++i) { + uint64_t PrevHash = UINT64_MAX; for (HashList::const_iterator HI = Buckets[i].begin(), HE = Buckets[i].end(); HI != HE; ++HI) { + // Terminate the previous entry if there is no hash collision + // with the current one. + if (PrevHash != UINT64_MAX && PrevHash != (*HI)->HashValue) + Asm->EmitInt32(0); // Remember to emit the label for our offset. - Asm->OutStreamer.EmitLabel((*HI)->Sym); - Asm->OutStreamer.AddComment((*HI)->Str); - Asm->EmitSectionOffset((*HI)->Data.StrSym, StrSym); - Asm->OutStreamer.AddComment("Num DIEs"); + Asm->OutStreamer->EmitLabel((*HI)->Sym); + Asm->OutStreamer->AddComment((*HI)->Str); + Asm->emitDwarfStringOffset((*HI)->Data.Name); + Asm->OutStreamer->AddComment("Num DIEs"); Asm->EmitInt32((*HI)->Data.Values.size()); for (HashDataContents *HD : (*HI)->Data.Values) { // Emit the DIE offset @@ -200,17 +231,17 @@ void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfDebug *D, Asm->EmitInt8(HD->Flags); } } - // Emit a 0 to terminate the data unless we have a hash collision. - if (PrevHash != (*HI)->HashValue) - Asm->EmitInt32(0); PrevHash = (*HI)->HashValue; } + // Emit the final end marker for the bucket. + if (!Buckets[i].empty()) + Asm->EmitInt32(0); } } // Emit the entire data structure to the output file. -void DwarfAccelTable::Emit(AsmPrinter *Asm, MCSymbol *SecBegin, DwarfDebug *D, - MCSymbol *StrSym) { +void DwarfAccelTable::emit(AsmPrinter *Asm, const MCSymbol *SecBegin, + DwarfDebug *D) { // Emit the header. EmitHeader(Asm); @@ -221,10 +252,10 @@ void DwarfAccelTable::Emit(AsmPrinter *Asm, MCSymbol *SecBegin, DwarfDebug *D, EmitHashes(Asm); // Emit the offsets. - EmitOffsets(Asm, SecBegin); + emitOffsets(Asm, SecBegin); // Emit the hash data. - EmitData(Asm, D, StrSym); + EmitData(Asm, D); } #ifndef NDEBUG diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h index 4a6085b..4d81441 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h @@ -181,9 +181,8 @@ public: private: // String Data struct DataArray { - MCSymbol *StrSym; + DwarfStringPoolEntryRef Name; std::vector<HashDataContents *> Values; - DataArray() : StrSym(nullptr) {} }; friend struct HashData; struct HashData { @@ -201,7 +200,7 @@ private: O << " Hash Value: " << format("0x%x", HashValue) << "\n"; O << " Symbol: "; if (Sym) - Sym->print(O); + O << *Sym; else O << "<none>"; O << "\n"; @@ -215,15 +214,15 @@ private: #endif }; - DwarfAccelTable(const DwarfAccelTable &) LLVM_DELETED_FUNCTION; - void operator=(const DwarfAccelTable &) LLVM_DELETED_FUNCTION; + DwarfAccelTable(const DwarfAccelTable &) = delete; + void operator=(const DwarfAccelTable &) = delete; // Internal Functions void EmitHeader(AsmPrinter *); void EmitBuckets(AsmPrinter *); void EmitHashes(AsmPrinter *); - void EmitOffsets(AsmPrinter *, MCSymbol *); - void EmitData(AsmPrinter *, DwarfDebug *D, MCSymbol *StrSym); + void emitOffsets(AsmPrinter *, const MCSymbol *); + void EmitData(AsmPrinter *, DwarfDebug *D); // Allocator for HashData and HashDataContents. BumpPtrAllocator Allocator; @@ -245,10 +244,9 @@ private: // Public Implementation public: DwarfAccelTable(ArrayRef<DwarfAccelTable::Atom>); - void AddName(StringRef Name, MCSymbol *StrSym, const DIE *Die, - char Flags = 0); + void AddName(DwarfStringPoolEntryRef Name, const DIE *Die, char Flags = 0); void FinalizeTable(AsmPrinter *, StringRef); - void Emit(AsmPrinter *, MCSymbol *, DwarfDebug *, MCSymbol *StrSym); + void emit(AsmPrinter *, const MCSymbol *, DwarfDebug *); #ifndef NDEBUG void print(raw_ostream &O); void dump() { print(dbgs()); } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp index 0dc52da..0bc873e 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp @@ -39,9 +39,24 @@ #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; +DwarfCFIExceptionBase::DwarfCFIExceptionBase(AsmPrinter *A) + : EHStreamer(A), shouldEmitCFI(false) {} + +void DwarfCFIExceptionBase::markFunctionEnd() { + if (shouldEmitCFI) + Asm->OutStreamer->EmitCFIEndProc(); + + if (MMI->getLandingPads().empty()) + return; + + // Map all labels and get rid of any dead landing pads. + MMI->TidyLandingPads(); +} + DwarfCFIException::DwarfCFIException(AsmPrinter *A) - : EHStreamer(A), shouldEmitPersonality(false), shouldEmitLSDA(false), - shouldEmitMoves(false), moveTypeModule(AsmPrinter::CFI_M_None) {} + : DwarfCFIExceptionBase(A), shouldEmitPersonality(false), + shouldEmitLSDA(false), shouldEmitMoves(false), + moveTypeModule(AsmPrinter::CFI_M_None) {} DwarfCFIException::~DwarfCFIException() {} @@ -49,9 +64,10 @@ DwarfCFIException::~DwarfCFIException() {} /// content. void DwarfCFIException::endModule() { if (moveTypeModule == AsmPrinter::CFI_M_Debug) - Asm->OutStreamer.EmitCFISections(false, true); + Asm->OutStreamer->EmitCFISections(false, true); - if (!Asm->MAI->usesItaniumLSDAForExceptions()) + // SjLj uses this pass and it doesn't need this info. + if (!Asm->MAI->usesCFIForEH()) return; const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); @@ -67,12 +83,10 @@ void DwarfCFIException::endModule() { if (!Personalities[i]) continue; MCSymbol *Sym = Asm->getSymbol(Personalities[i]); - TLOF.emitPersonalityValue(Asm->OutStreamer, Asm->TM, Sym); + TLOF.emitPersonalityValue(*Asm->OutStreamer, Asm->TM, Sym); } } -/// beginFunction - Gather pre-function exception information. Assumes it's -/// being emitted immediately after the function entry point. void DwarfCFIException::beginFunction(const MachineFunction *MF) { shouldEmitMoves = shouldEmitPersonality = shouldEmitLSDA = false; @@ -90,7 +104,7 @@ void DwarfCFIException::beginFunction(const MachineFunction *MF) { const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); unsigned PerEncoding = TLOF.getPersonalityEncoding(); - const Function *Per = MMI->getPersonalities()[MMI->getPersonalityIndex()]; + const Function *Per = MMI->getPersonality(); shouldEmitPersonality = hasLandingPads && PerEncoding != dwarf::DW_EH_PE_omit && Per; @@ -99,10 +113,11 @@ void DwarfCFIException::beginFunction(const MachineFunction *MF) { shouldEmitLSDA = shouldEmitPersonality && LSDAEncoding != dwarf::DW_EH_PE_omit; - if (!shouldEmitPersonality && !shouldEmitMoves) + shouldEmitCFI = shouldEmitPersonality || shouldEmitMoves; + if (!shouldEmitCFI) return; - Asm->OutStreamer.EmitCFIStartProc(/*IsSimple=*/false); + Asm->OutStreamer->EmitCFIStartProc(/*IsSimple=*/false); // Indicate personality routine, if any. if (!shouldEmitPersonality) @@ -110,45 +125,20 @@ void DwarfCFIException::beginFunction(const MachineFunction *MF) { const MCSymbol *Sym = TLOF.getCFIPersonalitySymbol(Per, *Asm->Mang, Asm->TM, MMI); - Asm->OutStreamer.EmitCFIPersonality(Sym, PerEncoding); - - MCSymbol *EHBegin = - Asm->GetTempSymbol("eh_func_begin", Asm->getFunctionNumber()); - if (Asm->MAI->useAssignmentForEHBegin()) { - MCContext &Ctx = Asm->OutContext; - MCSymbol *CurPos = Ctx.CreateTempSymbol(); - Asm->OutStreamer.EmitLabel(CurPos); - Asm->OutStreamer.EmitAssignment(EHBegin, - MCSymbolRefExpr::Create(CurPos, Ctx)); - } else { - Asm->OutStreamer.EmitLabel(EHBegin); - } + Asm->OutStreamer->EmitCFIPersonality(Sym, PerEncoding); // Provide LSDA information. if (!shouldEmitLSDA) return; - Asm->OutStreamer.EmitCFILsda(Asm->GetTempSymbol("exception", - Asm->getFunctionNumber()), - LSDAEncoding); + Asm->OutStreamer->EmitCFILsda(Asm->getCurExceptionSym(), LSDAEncoding); } /// endFunction - Gather and emit post-function exception information. /// void DwarfCFIException::endFunction(const MachineFunction *) { - if (!shouldEmitPersonality && !shouldEmitMoves) - return; - - Asm->OutStreamer.EmitCFIEndProc(); - if (!shouldEmitPersonality) return; - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end", - Asm->getFunctionNumber())); - - // Map all labels and get rid of any dead landing pads. - MMI->TidyLandingPads(); - emitExceptionTable(); } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index b4dba9c..c10e703 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -1,6 +1,7 @@ #include "DwarfCompileUnit.h" #include "DwarfExpression.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" @@ -15,11 +16,11 @@ namespace llvm { -DwarfCompileUnit::DwarfCompileUnit(unsigned UID, DICompileUnit Node, +DwarfCompileUnit::DwarfCompileUnit(unsigned UID, const DICompileUnit *Node, AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU) : DwarfUnit(UID, dwarf::DW_TAG_compile_unit, Node, A, DW, DWU), - Skeleton(nullptr), LabelBegin(nullptr), BaseAddress(nullptr) { + Skeleton(nullptr), BaseAddress(nullptr) { insertDIE(Node, &getUnitDie()); } @@ -63,9 +64,9 @@ unsigned DwarfCompileUnit::getOrCreateSourceID(StringRef FileName, // FIXME: add a better feature test than hasRawTextSupport. Even better, // extend .file to support this. - return Asm->OutStreamer.EmitDwarfFileDirective( + return Asm->OutStreamer->EmitDwarfFileDirective( 0, DirName, FileName, - Asm->OutStreamer.hasRawTextSupport() ? 0 : getUniqueID()); + Asm->OutStreamer->hasRawTextSupport() ? 0 : getUniqueID()); } // Return const expression if value is a GEP to access merged global @@ -96,56 +97,57 @@ static const ConstantExpr *getMergedGlobalExpr(const Value *V) { } /// getOrCreateGlobalVariableDIE - get or create global variable DIE. -DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(DIGlobalVariable GV) { +DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE( + const DIGlobalVariable *GV) { // Check for pre-existence. if (DIE *Die = getDIE(GV)) return Die; - assert(GV.isGlobalVariable()); + assert(GV); - DIScope GVContext = GV.getContext(); - DIType GTy = DD->resolve(GV.getType()); + auto *GVContext = GV->getScope(); + auto *GTy = DD->resolve(GV->getType()); // Construct the context before querying for the existence of the DIE in // case such construction creates the DIE. DIE *ContextDIE = getOrCreateContextDIE(GVContext); // Add to map. - DIE *VariableDIE = &createAndAddDIE(GV.getTag(), *ContextDIE, GV); - DIScope DeclContext; - - if (DIDerivedType SDMDecl = GV.getStaticDataMemberDeclaration()) { - DeclContext = resolve(SDMDecl.getContext()); - assert(SDMDecl.isStaticMember() && "Expected static member decl"); - assert(GV.isDefinition()); + DIE *VariableDIE = &createAndAddDIE(GV->getTag(), *ContextDIE, GV); + DIScope *DeclContext; + if (auto *SDMDecl = GV->getStaticDataMemberDeclaration()) { + DeclContext = resolve(SDMDecl->getScope()); + assert(SDMDecl->isStaticMember() && "Expected static member decl"); + assert(GV->isDefinition()); // We need the declaration DIE that is in the static member's class. DIE *VariableSpecDIE = getOrCreateStaticMemberDIE(SDMDecl); addDIEEntry(*VariableDIE, dwarf::DW_AT_specification, *VariableSpecDIE); } else { - DeclContext = GV.getContext(); + DeclContext = GV->getScope(); // Add name and type. - addString(*VariableDIE, dwarf::DW_AT_name, GV.getDisplayName()); + addString(*VariableDIE, dwarf::DW_AT_name, GV->getDisplayName()); addType(*VariableDIE, GTy); // Add scoping info. - if (!GV.isLocalToUnit()) + if (!GV->isLocalToUnit()) addFlag(*VariableDIE, dwarf::DW_AT_external); // Add line number info. addSourceLine(*VariableDIE, GV); } - if (!GV.isDefinition()) + if (!GV->isDefinition()) addFlag(*VariableDIE, dwarf::DW_AT_declaration); + else + addGlobalName(GV->getName(), *VariableDIE, DeclContext); // Add location. bool addToAccelTable = false; - bool isGlobalVariable = GV.getGlobal() != nullptr; - if (isGlobalVariable) { + if (auto *Global = dyn_cast_or_null<GlobalVariable>(GV->getVariable())) { addToAccelTable = true; DIELoc *Loc = new (DIEValueAllocator) DIELoc(); - const MCSymbol *Sym = Asm->getSymbol(GV.getGlobal()); - if (GV.getGlobal()->isThreadLocal()) { + const MCSymbol *Sym = Asm->getSymbol(Global); + if (Global->isThreadLocal()) { // FIXME: Make this work with -gsplit-dwarf. unsigned PointerSize = Asm->getDataLayout().getPointerSize(); assert((PointerSize == 4 || PointerSize == 8) && @@ -164,28 +166,21 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(DIGlobalVariable GV) { addUInt(*Loc, dwarf::DW_FORM_udata, DD->getAddressPool().getIndex(Sym, /* TLS */ true)); } - // 3) followed by a custom OP to make the debugger do a TLS lookup. - addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_push_tls_address); + // 3) followed by an OP to make the debugger do a TLS lookup. + addUInt(*Loc, dwarf::DW_FORM_data1, + DD->useGNUTLSOpcode() ? dwarf::DW_OP_GNU_push_tls_address + : dwarf::DW_OP_form_tls_address); } else { DD->addArangeLabel(SymbolCU(this, Sym)); addOpAddress(*Loc, Sym); } addBlock(*VariableDIE, dwarf::DW_AT_location, Loc); - // Add the linkage name. - StringRef LinkageName = GV.getLinkageName(); - if (!LinkageName.empty()) - // From DWARF4: DIEs to which DW_AT_linkage_name may apply include: - // TAG_common_block, TAG_constant, TAG_entry_point, TAG_subprogram and - // TAG_variable. - addString(*VariableDIE, - DD->getDwarfVersion() >= 4 ? dwarf::DW_AT_linkage_name - : dwarf::DW_AT_MIPS_linkage_name, - GlobalValue::getRealLinkageName(LinkageName)); + addLinkageName(*VariableDIE, GV->getLinkageName()); } else if (const ConstantInt *CI = - dyn_cast_or_null<ConstantInt>(GV.getConstant())) { + dyn_cast_or_null<ConstantInt>(GV->getVariable())) { addConstantValue(*VariableDIE, CI, GTy); - } else if (const ConstantExpr *CE = getMergedGlobalExpr(GV.getConstant())) { + } else if (const ConstantExpr *CE = getMergedGlobalExpr(GV->getVariable())) { addToAccelTable = true; // GV is a merged global. DIELoc *Loc = new (DIEValueAllocator) DIELoc(); @@ -202,15 +197,14 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(DIGlobalVariable GV) { } if (addToAccelTable) { - DD->addAccelName(GV.getName(), *VariableDIE); + DD->addAccelName(GV->getName(), *VariableDIE); // If the linkage name is different than the name, go ahead and output // that as well into the name table. - if (GV.getLinkageName() != "" && GV.getName() != GV.getLinkageName()) - DD->addAccelName(GV.getLinkageName(), *VariableDIE); + if (GV->getLinkageName() != "" && GV->getName() != GV->getLinkageName()) + DD->addAccelName(GV->getLinkageName(), *VariableDIE); } - addGlobalName(GV.getName(), *VariableDIE, DeclContext); return VariableDIE; } @@ -243,10 +237,10 @@ void DwarfCompileUnit::addSectionLabel(DIE &Die, dwarf::Attribute Attribute, addSectionDelta(Die, Attribute, Label, Sec); } -void DwarfCompileUnit::initStmtList(MCSymbol *DwarfLineSectionSym) { +void DwarfCompileUnit::initStmtList() { // Define start line table label for each Compile Unit. MCSymbol *LineTableStartSym = - Asm->OutStreamer.getDwarfLineTableSymbol(getUniqueID()); + Asm->OutStreamer->getDwarfLineTableSymbol(getUniqueID()); stmtListIndex = UnitDie.getValues().size(); @@ -255,8 +249,9 @@ void DwarfCompileUnit::initStmtList(MCSymbol *DwarfLineSectionSym) { // left in the skeleton CU and so not included. // The line table entries are not always emitted in assembly, so it // is not okay to use line_table_start here. + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); addSectionLabel(UnitDie, dwarf::DW_AT_stmt_list, LineTableStartSym, - DwarfLineSectionSym); + TLOF.getDwarfLineSection()->getBeginSymbol()); } void DwarfCompileUnit::applyStmtList(DIE &D) { @@ -282,18 +277,17 @@ void DwarfCompileUnit::attachLowHighPC(DIE &D, const MCSymbol *Begin, // Find DIE for the given subprogram and attach appropriate DW_AT_low_pc // and DW_AT_high_pc attributes. If there are global variables in this // scope then create and insert DIEs for these variables. -DIE &DwarfCompileUnit::updateSubprogramScopeDIE(DISubprogram SP) { +DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP) { DIE *SPDie = getOrCreateSubprogramDIE(SP, includeMinimalInlineScopes()); - attachLowHighPC(*SPDie, DD->getFunctionBeginSym(), DD->getFunctionEndSym()); + attachLowHighPC(*SPDie, Asm->getFunctionBegin(), Asm->getFunctionEnd()); if (!DD->getCurrentFunction()->getTarget().Options.DisableFramePointerElim( *DD->getCurrentFunction())) addFlag(*SPDie, dwarf::DW_AT_APPLE_omit_frame_ptr); // Only include DW_AT_frame_base in full debug info if (!includeMinimalInlineScopes()) { - const TargetRegisterInfo *RI = - Asm->TM.getSubtargetImpl()->getRegisterInfo(); + const TargetRegisterInfo *RI = Asm->MF->getSubtarget().getRegisterInfo(); MachineLocation Location(RI->getFrameRegister(*Asm->MF)); if (RI->isPhysicalRegister(Location.getReg())) addAddress(*SPDie, dwarf::DW_AT_frame_base, Location); @@ -312,9 +306,9 @@ void DwarfCompileUnit::constructScopeDIE( if (!Scope || !Scope->getScopeNode()) return; - DIScope DS(Scope->getScopeNode()); + auto *DS = Scope->getScopeNode(); - assert((Scope->getInlinedAt() || !DS.isSubprogram()) && + assert((Scope->getInlinedAt() || !isa<DISubprogram>(DS)) && "Only handle inlined subprograms here, use " "constructSubprogramScopeDIE for non-inlined " "subprograms"); @@ -325,7 +319,7 @@ void DwarfCompileUnit::constructScopeDIE( // avoid creating un-used children then removing them later when we find out // the scope DIE is null. std::unique_ptr<DIE> ScopeDIE; - if (Scope->getParent() && DS.isSubprogram()) { + if (Scope->getParent() && isa<DISubprogram>(DS)) { ScopeDIE = constructInlinedScopeDIE(Scope); if (!ScopeDIE) return; @@ -347,7 +341,7 @@ void DwarfCompileUnit::constructScopeDIE( // There is no need to emit empty lexical block DIE. for (const auto &E : DD->findImportedEntitiesForScope(DS)) Children.push_back( - constructImportedEntityDIE(DIImportedEntity(E.second))); + constructImportedEntityDIE(cast<DIImportedEntity>(E.second))); } // If there are only other scopes as children, put them directly in the @@ -379,13 +373,14 @@ void DwarfCompileUnit::addSectionDelta(DIE &Die, dwarf::Attribute Attribute, void DwarfCompileUnit::addScopeRangeList(DIE &ScopeDIE, SmallVector<RangeSpan, 2> Range) { + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + // Emit offset in .debug_range as a relocatable label. emitDIE will handle // emitting it appropriately. - auto *RangeSectionSym = DD->getRangeSectionSym(); + const MCSymbol *RangeSectionSym = + TLOF.getDwarfRangesSection()->getBeginSymbol(); - RangeSpanList List( - Asm->GetTempSymbol("debug_ranges", DD->getNextRangeNumber()), - std::move(Range)); + RangeSpanList List(Asm->createTempSymbol("debug_ranges"), std::move(Range)); // Under fission, ranges are specified by constant offsets relative to the // CU's DW_AT_GNU_ranges_base. @@ -424,8 +419,8 @@ void DwarfCompileUnit::attachRangesOrLowHighPC( std::unique_ptr<DIE> DwarfCompileUnit::constructInlinedScopeDIE(LexicalScope *Scope) { assert(Scope->getScopeNode()); - DIScope DS(Scope->getScopeNode()); - DISubprogram InlinedSP = getDISubprogram(DS); + auto *DS = Scope->getScopeNode(); + auto *InlinedSP = getDISubprogram(DS); // Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram // was inlined from another compile unit. DIE *OriginDIE = DU->getAbstractSPDies()[InlinedSP]; @@ -437,10 +432,10 @@ DwarfCompileUnit::constructInlinedScopeDIE(LexicalScope *Scope) { attachRangesOrLowHighPC(*ScopeDIE, Scope->getRanges()); // Add the call site information to the DIE. - DILocation DL(Scope->getInlinedAt()); + const DILocation *IA = Scope->getInlinedAt(); addUInt(*ScopeDIE, dwarf::DW_AT_call_file, None, - getOrCreateSourceID(DL.getFilename(), DL.getDirectory())); - addUInt(*ScopeDIE, dwarf::DW_AT_call_line, None, DL.getLineNumber()); + getOrCreateSourceID(IA->getFilename(), IA->getDirectory())); + addUInt(*ScopeDIE, dwarf::DW_AT_call_line, None, IA->getLine()); // Add name to the name table, we do this here because we're guaranteed // to have concrete versions of our DW_TAG_inlined_subprogram nodes. @@ -486,7 +481,7 @@ DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV, // Add variable address. - unsigned Offset = DV.getDotDebugLocOffset(); + unsigned Offset = DV.getDebugLocListIndex(); if (Offset != ~0U) { addLocationList(*VariableDie, dwarf::DW_AT_location, Offset); return VariableDie; @@ -516,15 +511,23 @@ DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV, } // .. else use frame index. - int FI = DV.getFrameIndex(); - if (FI != ~0) { + if (DV.getFrameIndex().back() == ~0) + return VariableDie; + + auto Expr = DV.getExpression().begin(); + DIELoc *Loc = new (DIEValueAllocator) DIELoc(); + DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc); + for (auto FI : DV.getFrameIndex()) { unsigned FrameReg = 0; - const TargetFrameLowering *TFI = - Asm->TM.getSubtargetImpl()->getFrameLowering(); + const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering(); int Offset = TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg); - MachineLocation Location(FrameReg, Offset); - addVariableAddress(DV, *VariableDie, Location); + assert(Expr != DV.getExpression().end() && + "Wrong number of expressions"); + DwarfExpr.AddMachineRegIndirect(FrameReg, Offset); + DwarfExpr.AddExpression((*Expr)->expr_op_begin(), (*Expr)->expr_op_end()); + ++Expr; } + addBlock(*VariableDie, dwarf::DW_AT_location, Loc); return VariableDie; } @@ -560,16 +563,14 @@ void DwarfCompileUnit::constructSubprogramScopeDIE(LexicalScope *Scope) { assert(Scope && Scope->getScopeNode()); assert(!Scope->getInlinedAt()); assert(!Scope->isAbstractScope()); - DISubprogram Sub(Scope->getScopeNode()); - - assert(Sub.isSubprogram()); + auto *Sub = cast<DISubprogram>(Scope->getScopeNode()); DD->getProcessedSPNodes().insert(Sub); DIE &ScopeDIE = updateSubprogramScopeDIE(Sub); // If this is a variadic function, add an unspecified parameter. - DITypeArray FnArgs = Sub.getType().getTypeArray(); + DITypeRefArray FnArgs = Sub->getType()->getTypeArray(); // Collect lexical scope children first. // ObjectPointer might be a local (non-argument) local variable if it's a @@ -580,8 +581,7 @@ void DwarfCompileUnit::constructSubprogramScopeDIE(LexicalScope *Scope) { // If we have a single element of null, it is a function that returns void. // If we have more than one elements and the last one is null, it is a // variadic function. - if (FnArgs.getNumElements() > 1 && - !FnArgs.getElement(FnArgs.getNumElements() - 1) && + if (FnArgs.size() > 1 && !FnArgs[FnArgs.size() - 1] && !includeMinimalInlineScopes()) ScopeDIE.addChild(make_unique<DIE>(dwarf::DW_TAG_unspecified_parameters)); } @@ -605,26 +605,25 @@ DwarfCompileUnit::constructAbstractSubprogramScopeDIE(LexicalScope *Scope) { if (AbsDef) return; - DISubprogram SP(Scope->getScopeNode()); + auto *SP = cast<DISubprogram>(Scope->getScopeNode()); DIE *ContextDIE; if (includeMinimalInlineScopes()) ContextDIE = &getUnitDie(); // Some of this is duplicated from DwarfUnit::getOrCreateSubprogramDIE, with - // the important distinction that the DIDescriptor is not associated with the - // DIE (since the DIDescriptor will be associated with the concrete DIE, if + // the important distinction that the debug node is not associated with the + // DIE (since the debug node will be associated with the concrete DIE, if // any). It could be refactored to some common utility function. - else if (DISubprogram SPDecl = SP.getFunctionDeclaration()) { + else if (auto *SPDecl = SP->getDeclaration()) { ContextDIE = &getUnitDie(); getOrCreateSubprogramDIE(SPDecl); } else - ContextDIE = getOrCreateContextDIE(resolve(SP.getContext())); + ContextDIE = getOrCreateContextDIE(resolve(SP->getScope())); - // Passing null as the associated DIDescriptor because the abstract definition + // Passing null as the associated node because the abstract definition // shouldn't be found by lookup. - AbsDef = - &createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE, DIDescriptor()); + AbsDef = &createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE, nullptr); applySubprogramAttributesToDefinition(SP, *AbsDef); if (!includeMinimalInlineScopes()) @@ -634,36 +633,33 @@ DwarfCompileUnit::constructAbstractSubprogramScopeDIE(LexicalScope *Scope) { } std::unique_ptr<DIE> -DwarfCompileUnit::constructImportedEntityDIE(const DIImportedEntity &Module) { - assert(Module.Verify() && - "Use one of the MDNode * overloads to handle invalid metadata"); - std::unique_ptr<DIE> IMDie = make_unique<DIE>((dwarf::Tag)Module.getTag()); +DwarfCompileUnit::constructImportedEntityDIE(const DIImportedEntity *Module) { + std::unique_ptr<DIE> IMDie = make_unique<DIE>((dwarf::Tag)Module->getTag()); insertDIE(Module, IMDie.get()); DIE *EntityDie; - DIDescriptor Entity = resolve(Module.getEntity()); - if (Entity.isNameSpace()) - EntityDie = getOrCreateNameSpace(DINameSpace(Entity)); - else if (Entity.isSubprogram()) - EntityDie = getOrCreateSubprogramDIE(DISubprogram(Entity)); - else if (Entity.isType()) - EntityDie = getOrCreateTypeDIE(DIType(Entity)); - else if (Entity.isGlobalVariable()) - EntityDie = getOrCreateGlobalVariableDIE(DIGlobalVariable(Entity)); + auto *Entity = resolve(Module->getEntity()); + if (auto *NS = dyn_cast<DINamespace>(Entity)) + EntityDie = getOrCreateNameSpace(NS); + else if (auto *SP = dyn_cast<DISubprogram>(Entity)) + EntityDie = getOrCreateSubprogramDIE(SP); + else if (auto *T = dyn_cast<DIType>(Entity)) + EntityDie = getOrCreateTypeDIE(T); + else if (auto *GV = dyn_cast<DIGlobalVariable>(Entity)) + EntityDie = getOrCreateGlobalVariableDIE(GV); else EntityDie = getDIE(Entity); assert(EntityDie); - addSourceLine(*IMDie, Module.getLineNumber(), - Module.getContext().getFilename(), - Module.getContext().getDirectory()); + addSourceLine(*IMDie, Module->getLine(), Module->getScope()->getFilename(), + Module->getScope()->getDirectory()); addDIEEntry(*IMDie, dwarf::DW_AT_import, *EntityDie); - StringRef Name = Module.getName(); + StringRef Name = Module->getName(); if (!Name.empty()) addString(*IMDie, dwarf::DW_AT_name, Name); return IMDie; } -void DwarfCompileUnit::finishSubprogramDefinition(DISubprogram SP) { +void DwarfCompileUnit::finishSubprogramDefinition(const DISubprogram *SP) { DIE *D = getDIE(SP); if (DIE *AbsSPDIE = DU->getAbstractSPDies().lookup(SP)) { if (D) @@ -680,39 +676,39 @@ void DwarfCompileUnit::finishSubprogramDefinition(DISubprogram SP) { applySubprogramAttributesToDefinition(SP, *D); } } -void DwarfCompileUnit::collectDeadVariables(DISubprogram SP) { - assert(SP.isSubprogram() && "CU's subprogram list contains a non-subprogram"); - assert(SP.isDefinition() && +void DwarfCompileUnit::collectDeadVariables(const DISubprogram *SP) { + assert(SP && "CU's subprogram list contains a non-subprogram"); + assert(SP->isDefinition() && "CU's subprogram list contains a subprogram declaration"); - DIArray Variables = SP.getVariables(); - if (Variables.getNumElements() == 0) + auto Variables = SP->getVariables(); + if (Variables.size() == 0) return; DIE *SPDIE = DU->getAbstractSPDies().lookup(SP); if (!SPDIE) SPDIE = getDIE(SP); assert(SPDIE); - for (unsigned vi = 0, ve = Variables.getNumElements(); vi != ve; ++vi) { - DIVariable DV(Variables.getElement(vi)); - assert(DV.isVariable()); - DbgVariable NewVar(DV, DIExpression(nullptr), DD); + for (const DILocalVariable *DV : Variables) { + DbgVariable NewVar(DV, /* IA */ nullptr, /* Expr */ nullptr, DD); auto VariableDie = constructVariableDIE(NewVar); applyVariableAttributes(NewVar, *VariableDie); SPDIE->addChild(std::move(VariableDie)); } } -void DwarfCompileUnit::emitHeader(const MCSymbol *ASectionSym) const { +void DwarfCompileUnit::emitHeader(bool UseOffsets) { // Don't bother labeling the .dwo unit, as its offset isn't used. - if (!Skeleton) - Asm->OutStreamer.EmitLabel(LabelBegin); + if (!Skeleton) { + LabelBegin = Asm->createTempSymbol("cu_begin"); + Asm->OutStreamer->EmitLabel(LabelBegin); + } - DwarfUnit::emitHeader(ASectionSym); + DwarfUnit::emitHeader(UseOffsets); } /// addGlobalName - Add a new global name to the compile unit. void DwarfCompileUnit::addGlobalName(StringRef Name, DIE &Die, - DIScope Context) { + const DIScope *Context) { if (includeMinimalInlineScopes()) return; std::string FullName = getParentContextString(Context) + Name.str(); @@ -720,11 +716,11 @@ void DwarfCompileUnit::addGlobalName(StringRef Name, DIE &Die, } /// Add a new global type to the unit. -void DwarfCompileUnit::addGlobalType(DIType Ty, const DIE &Die, - DIScope Context) { +void DwarfCompileUnit::addGlobalType(const DIType *Ty, const DIE &Die, + const DIScope *Context) { if (includeMinimalInlineScopes()) return; - std::string FullName = getParentContextString(Context) + Ty.getName().str(); + std::string FullName = getParentContextString(Context) + Ty->getName().str(); GlobalTypes[FullName] = &Die; } @@ -737,18 +733,16 @@ void DwarfCompileUnit::addVariableAddress(const DbgVariable &DV, DIE &Die, else if (DV.isBlockByrefVariable()) addBlockByrefAddress(DV, Die, dwarf::DW_AT_location, Location); else - addAddress(Die, dwarf::DW_AT_location, Location, - DV.getVariable().isIndirect()); + addAddress(Die, dwarf::DW_AT_location, Location); } /// Add an address attribute to a die based on the location provided. void DwarfCompileUnit::addAddress(DIE &Die, dwarf::Attribute Attribute, - const MachineLocation &Location, - bool Indirect) { + const MachineLocation &Location) { DIELoc *Loc = new (DIEValueAllocator) DIELoc(); bool validReg; - if (Location.isReg() && !Indirect) + if (Location.isReg()) validReg = addRegisterOpPiece(*Loc, Location.getReg()); else validReg = addRegisterOffset(*Loc, Location.getReg(), Location.getOffset()); @@ -756,9 +750,6 @@ void DwarfCompileUnit::addAddress(DIE &Die, dwarf::Attribute Attribute, if (!validReg) return; - if (!Location.isReg() && Indirect) - addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); - // Now attach the location information to the DIE. addBlock(Die, Attribute, Loc); } @@ -772,22 +763,20 @@ void DwarfCompileUnit::addComplexAddress(const DbgVariable &DV, DIE &Die, const MachineLocation &Location) { DIELoc *Loc = new (DIEValueAllocator) DIELoc(); DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc); - DIExpression Expr = DV.getExpression(); + assert(DV.getExpression().size() == 1); + const DIExpression *Expr = DV.getExpression().back(); + bool ValidReg; if (Location.getOffset()) { - if (DwarfExpr.AddMachineRegIndirect(Location.getReg(), - Location.getOffset())) { - DwarfExpr.AddExpression(Expr); - assert(!DV.getVariable().isIndirect() - && "double indirection not handled"); - } - } else { - if (DwarfExpr.AddMachineRegExpression(Expr, Location.getReg())) - if (DV.getVariable().isIndirect()) - DwarfExpr.EmitOp(dwarf::DW_OP_deref); - } + ValidReg = DwarfExpr.AddMachineRegIndirect(Location.getReg(), + Location.getOffset()); + if (ValidReg) + DwarfExpr.AddExpression(Expr->expr_op_begin(), Expr->expr_op_end()); + } else + ValidReg = DwarfExpr.AddMachineRegExpression(Expr, Location.getReg()); // Now attach the location information to the DIE. - addBlock(Die, Attribute, Loc); + if (ValidReg) + addBlock(Die, Attribute, Loc); } /// Add a Dwarf loclistptr attribute data and value. @@ -817,12 +806,12 @@ void DwarfCompileUnit::addExpr(DIELoc &Die, dwarf::Form Form, Die.addValue((dwarf::Attribute)0, Form, Value); } -void DwarfCompileUnit::applySubprogramAttributesToDefinition(DISubprogram SP, - DIE &SPDie) { - DISubprogram SPDecl = SP.getFunctionDeclaration(); - DIScope Context = resolve(SPDecl ? SPDecl.getContext() : SP.getContext()); +void DwarfCompileUnit::applySubprogramAttributesToDefinition( + const DISubprogram *SP, DIE &SPDie) { + auto *SPDecl = SP->getDeclaration(); + auto *Context = resolve(SPDecl ? SPDecl->getScope() : SP->getScope()); applySubprogramAttributes(SP, SPDie, includeMinimalInlineScopes()); - addGlobalName(SP.getName(), SPDie, Context); + addGlobalName(SP->getName(), SPDie, Context); } bool DwarfCompileUnit::isDwoUnit() const { @@ -830,7 +819,7 @@ bool DwarfCompileUnit::isDwoUnit() const { } bool DwarfCompileUnit::includeMinimalInlineScopes() const { - return getCUNode().getEmissionKind() == DIBuilder::LineTablesOnly || + return getCUNode()->getEmissionKind() == DIBuilder::LineTablesOnly || (DD->useSplitDwarf() && !Skeleton); } } // end llvm namespace diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index 91164bc..50e4a54e 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -36,9 +36,6 @@ class DwarfCompileUnit : public DwarfUnit { /// Skeleton unit associated with this unit. DwarfCompileUnit *Skeleton; - /// A label at the start of the non-dwo section related to this unit. - MCSymbol *SectionSym; - /// The start of the unit within its section. MCSymbol *LabelBegin; @@ -69,20 +66,20 @@ class DwarfCompileUnit : public DwarfUnit { bool includeMinimalInlineScopes() const; public: - DwarfCompileUnit(unsigned UID, DICompileUnit Node, AsmPrinter *A, + DwarfCompileUnit(unsigned UID, const DICompileUnit *Node, AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU); DwarfCompileUnit *getSkeleton() const { return Skeleton; } - void initStmtList(MCSymbol *DwarfLineSectionSym); + void initStmtList(); /// Apply the DW_AT_stmt_list from this compile unit to the specified DIE. void applyStmtList(DIE &D); /// getOrCreateGlobalVariableDIE - get or create global variable DIE. - DIE *getOrCreateGlobalVariableDIE(DIGlobalVariable GV); + DIE *getOrCreateGlobalVariableDIE(const DIGlobalVariable *GV); /// addLabelAddress - Add a dwarf label attribute data and value using /// either DW_FORM_addr or DW_FORM_GNU_addr_index. @@ -116,7 +113,7 @@ public: /// DW_AT_low_pc and DW_AT_high_pc attributes. If there are global /// variables in this scope then create and insert DIEs for these /// variables. - DIE &updateSubprogramScopeDIE(DISubprogram SP); + DIE &updateSubprogramScopeDIE(const DISubprogram *SP); void constructScopeDIE(LexicalScope *Scope, SmallVectorImpl<std::unique_ptr<DIE>> &FinalChildren); @@ -159,31 +156,18 @@ public: /// \brief Construct import_module DIE. std::unique_ptr<DIE> - constructImportedEntityDIE(const DIImportedEntity &Module); + constructImportedEntityDIE(const DIImportedEntity *Module); - void finishSubprogramDefinition(DISubprogram SP); + void finishSubprogramDefinition(const DISubprogram *SP); - void collectDeadVariables(DISubprogram SP); + void collectDeadVariables(const DISubprogram *SP); /// Set the skeleton unit associated with this unit. void setSkeleton(DwarfCompileUnit &Skel) { Skeleton = &Skel; } - MCSymbol *getSectionSym() const { + const MCSymbol *getSectionSym() const { assert(Section); - return SectionSym; - } - - /// Pass in the SectionSym even though we could recreate it in every compile - /// unit (type units will have actually distinct symbols once they're in - /// comdat sections). - void initSection(const MCSection *Section, MCSymbol *SectionSym) { - DwarfUnit::initSection(Section); - this->SectionSym = SectionSym; - - // Don't bother labeling the .dwo unit, as its offset isn't used. - if (!Skeleton) - LabelBegin = - Asm->GetTempSymbol(Section->getLabelBeginName(), getUniqueID()); + return Section->getBeginSymbol(); } unsigned getLength() { @@ -191,7 +175,7 @@ public: getHeaderSize() + UnitDie.getSize(); } - void emitHeader(const MCSymbol *ASectionSym) const override; + void emitHeader(bool UseOffsets) override; MCSymbol *getLabelBegin() const { assert(Section); @@ -199,10 +183,11 @@ public: } /// Add a new global name to the compile unit. - void addGlobalName(StringRef Name, DIE &Die, DIScope Context) override; + void addGlobalName(StringRef Name, DIE &Die, const DIScope *Context) override; /// Add a new global type to the compile unit. - void addGlobalType(DIType Ty, const DIE &Die, DIScope Context) override; + void addGlobalType(const DIType *Ty, const DIE &Die, + const DIScope *Context) override; const StringMap<const DIE *> &getGlobalNames() const { return GlobalNames; } const StringMap<const DIE *> &getGlobalTypes() const { return GlobalTypes; } @@ -213,7 +198,7 @@ public: MachineLocation Location); /// Add an address attribute to a die based on the location provided. void addAddress(DIE &Die, dwarf::Attribute Attribute, - const MachineLocation &Location, bool Indirect = false); + const MachineLocation &Location); /// Start with the address based on the location provided, and generate the /// DWARF information necessary to find the actual variable (navigating the @@ -230,7 +215,8 @@ public: /// Add a Dwarf expression attribute data and value. void addExpr(DIELoc &Die, dwarf::Form Form, const MCExpr *Expr); - void applySubprogramAttributesToDefinition(DISubprogram SP, DIE &SPDie); + void applySubprogramAttributesToDefinition(const DISubprogram *SP, + DIE &SPDie); /// getRangeLists - Get the vector of range lists. const SmallVectorImpl<RangeSpanList> &getRangeLists() const { diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index a587b46..105ff6c 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -14,6 +14,7 @@ #include "DwarfDebug.h" #include "ByteStreamer.h" #include "DIEHash.h" +#include "DebugLocEntry.h" #include "DwarfCompileUnit.h" #include "DwarfExpression.h" #include "DwarfUnit.h" @@ -45,6 +46,7 @@ #include "llvm/Support/MD5.h" #include "llvm/Support/Path.h" #include "llvm/Support/Timer.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" @@ -105,24 +107,45 @@ DwarfPubSections("generate-dwarf-pub-sections", cl::Hidden, static const char *const DWARFGroupName = "DWARF Emission"; static const char *const DbgTimerName = "DWARF Debug Writer"; +void DebugLocDwarfExpression::EmitOp(uint8_t Op, const char *Comment) { + BS.EmitInt8( + Op, Comment ? Twine(Comment) + " " + dwarf::OperationEncodingString(Op) + : dwarf::OperationEncodingString(Op)); +} + +void DebugLocDwarfExpression::EmitSigned(int64_t Value) { + BS.EmitSLEB128(Value, Twine(Value)); +} + +void DebugLocDwarfExpression::EmitUnsigned(uint64_t Value) { + BS.EmitULEB128(Value, Twine(Value)); +} + +bool DebugLocDwarfExpression::isFrameRegister(unsigned MachineReg) { + // This information is not available while emitting .debug_loc entries. + return false; +} + //===----------------------------------------------------------------------===// /// resolve - Look in the DwarfDebug map for the MDNode that /// corresponds to the reference. -template <typename T> T DbgVariable::resolve(DIRef<T> Ref) const { +template <typename T> T *DbgVariable::resolve(TypedDINodeRef<T> Ref) const { return DD->resolve(Ref); } bool DbgVariable::isBlockByrefVariable() const { - assert(Var.isVariable() && "Invalid complex DbgVariable!"); - return Var.isBlockByrefVariable(DD->getTypeIdentifierMap()); + assert(Var && "Invalid complex DbgVariable!"); + return Var->getType() + .resolve(DD->getTypeIdentifierMap()) + ->isBlockByrefStruct(); } -DIType DbgVariable::getType() const { - DIType Ty = Var.getType().resolve(DD->getTypeIdentifierMap()); +const DIType *DbgVariable::getType() const { + DIType *Ty = Var->getType().resolve(DD->getTypeIdentifierMap()); // FIXME: isBlockByrefVariable should be reformulated in terms of complex // addresses instead. - if (Var.isBlockByrefVariable(DD->getTypeIdentifierMap())) { + if (Ty->isBlockByrefStruct()) { /* Byref variables, in Blocks, are declared by the programmer as "SomeType VarName;", but the compiler creates a __Block_byref_x_VarName struct, and gives the variable VarName @@ -147,17 +170,17 @@ DIType DbgVariable::getType() const { have a DW_AT_location that tells the debugger how to unwind through the pointers and __Block_byref_x_VarName struct to find the actual value of the variable. The function addBlockByrefType does this. */ - DIType subType = Ty; - uint16_t tag = Ty.getTag(); + DIType *subType = Ty; + uint16_t tag = Ty->getTag(); if (tag == dwarf::DW_TAG_pointer_type) - subType = resolve(DIDerivedType(Ty).getTypeDerivedFrom()); + subType = resolve(cast<DIDerivedType>(Ty)->getBaseType()); - DIArray Elements = DICompositeType(subType).getElements(); - for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { - DIDerivedType DT(Elements.getElement(i)); - if (getName() == DT.getName()) - return (resolve(DT.getTypeDerivedFrom())); + auto Elements = cast<DICompositeTypeBase>(subType)->getElements(); + for (unsigned i = 0, N = Elements.size(); i < N; ++i) { + auto *DT = cast<DIDerivedTypeBase>(Elements[i]); + if (getName() == DT->getName()) + return resolve(DT->getBaseType()); } } return Ty; @@ -169,11 +192,12 @@ static LLVM_CONSTEXPR DwarfAccelTable::Atom TypeAtoms[] = { DwarfAccelTable::Atom(dwarf::DW_ATOM_type_flags, dwarf::DW_FORM_data1)}; DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) - : Asm(A), MMI(Asm->MMI), PrevLabel(nullptr), GlobalRangeCount(0), - InfoHolder(A, *this, "info_string", DIEValueAllocator), + : Asm(A), MMI(Asm->MMI), DebugLocs(A->OutStreamer->isVerboseAsm()), + PrevLabel(nullptr), InfoHolder(A, "info_string", DIEValueAllocator), UsedNonDefaultText(false), - SkeletonHolder(A, *this, "skel_string", DIEValueAllocator), + SkeletonHolder(A, "skel_string", DIEValueAllocator), IsDarwin(Triple(A->getTargetTriple()).isOSDarwin()), + IsPS4(Triple(A->getTargetTriple()).isPS4()), AccelNames(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)), AccelObjC(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, @@ -182,17 +206,11 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) dwarf::DW_FORM_data4)), AccelTypes(TypeAtoms) { - DwarfInfoSectionSym = DwarfAbbrevSectionSym = DwarfStrSectionSym = nullptr; - DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = nullptr; - DwarfLineSectionSym = nullptr; - DwarfAddrSectionSym = nullptr; - DwarfAbbrevDWOSectionSym = DwarfStrDWOSectionSym = nullptr; - FunctionBeginSym = FunctionEndSym = nullptr; CurFn = nullptr; CurMI = nullptr; // Turn on accelerator tables for Darwin by default, pubnames by - // default for non-Darwin, and handle split dwarf. + // default for non-Darwin/PS4, and handle split dwarf. if (DwarfAccelTables == Default) HasDwarfAccelTables = IsDarwin; else @@ -204,7 +222,7 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) HasSplitDwarf = SplitDwarf == Enable; if (DwarfPubSections == Default) - HasDwarfPubSections = !IsDarwin; + HasDwarfPubSections = !IsDarwin && !IsPS4; else HasDwarfPubSections = DwarfPubSections == Enable; @@ -212,7 +230,11 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) DwarfVersion = DwarfVersionNumber ? DwarfVersionNumber : MMI->getModule()->getDwarfVersion(); - Asm->OutStreamer.getContext().setDwarfVersion(DwarfVersion); + // Darwin and PS4 use the standard TLS opcode (defined in DWARF 3). + // Everybody else uses GNU's. + UseGNUTLSOpcode = !(IsDarwin || IsPS4) || DwarfVersion < 3; + + Asm->OutStreamer->getContext().setDwarfVersion(DwarfVersion); { NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); @@ -223,19 +245,6 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) // Define out of line so we don't have to include DwarfUnit.h in DwarfDebug.h. DwarfDebug::~DwarfDebug() { } -// Switch to the specified MCSection and emit an assembler -// temporary label to it if SymbolStem is specified. -static MCSymbol *emitSectionSym(AsmPrinter *Asm, const MCSection *Section, - const char *SymbolStem = nullptr) { - Asm->OutStreamer.SwitchSection(Section); - if (!SymbolStem) - return nullptr; - - MCSymbol *TmpSym = Asm->GetTempSymbol(SymbolStem); - Asm->OutStreamer.EmitLabel(TmpSym); - return TmpSym; -} - static bool isObjCClass(StringRef Name) { return Name.startswith("+") || Name.startswith("-"); } @@ -264,37 +273,30 @@ static StringRef getObjCMethodName(StringRef In) { return In.slice(In.find(' ') + 1, In.find(']')); } -// Helper for sorting sections into a stable output order. -static bool SectionSort(const MCSection *A, const MCSection *B) { - std::string LA = (A ? A->getLabelBeginName() : ""); - std::string LB = (B ? B->getLabelBeginName() : ""); - return LA < LB; -} - // Add the various names to the Dwarf accelerator table names. // TODO: Determine whether or not we should add names for programs // that do not have a DW_AT_name or DW_AT_linkage_name field - this // is only slightly different than the lookup of non-standard ObjC names. -void DwarfDebug::addSubprogramNames(DISubprogram SP, DIE &Die) { - if (!SP.isDefinition()) +void DwarfDebug::addSubprogramNames(const DISubprogram *SP, DIE &Die) { + if (!SP->isDefinition()) return; - addAccelName(SP.getName(), Die); + addAccelName(SP->getName(), Die); // If the linkage name is different than the name, go ahead and output // that as well into the name table. - if (SP.getLinkageName() != "" && SP.getName() != SP.getLinkageName()) - addAccelName(SP.getLinkageName(), Die); + if (SP->getLinkageName() != "" && SP->getName() != SP->getLinkageName()) + addAccelName(SP->getLinkageName(), Die); // If this is an Objective-C selector name add it to the ObjC accelerator // too. - if (isObjCClass(SP.getName())) { + if (isObjCClass(SP->getName())) { StringRef Class, Category; - getObjCClassCategory(SP.getName(), Class, Category); + getObjCClassCategory(SP->getName(), Class, Category); addAccelObjC(Class, Die); if (Category != "") addAccelObjC(Category, Die); // Also add the base method name to the name table. - addAccelName(getObjCMethodName(SP.getName()), Die); + addAccelName(getObjCMethodName(SP->getName()), Die); } } @@ -303,11 +305,10 @@ void DwarfDebug::addSubprogramNames(DISubprogram SP, DIE &Die) { bool DwarfDebug::isSubprogramContext(const MDNode *Context) { if (!Context) return false; - DIDescriptor D(Context); - if (D.isSubprogram()) + if (isa<DISubprogram>(Context)) return true; - if (D.isType()) - return isSubprogramContext(resolve(DIType(Context).getContext())); + if (auto *T = dyn_cast<DIType>(Context)) + return isSubprogramContext(resolve(T->getScope())); return false; } @@ -362,9 +363,10 @@ void DwarfDebug::addGnuPubAttributes(DwarfUnit &U, DIE &D) const { // Create new DwarfCompileUnit for the given metadata node with tag // DW_TAG_compile_unit. -DwarfCompileUnit &DwarfDebug::constructDwarfCompileUnit(DICompileUnit DIUnit) { - StringRef FN = DIUnit.getFilename(); - CompilationDir = DIUnit.getDirectory(); +DwarfCompileUnit & +DwarfDebug::constructDwarfCompileUnit(const DICompileUnit *DIUnit) { + StringRef FN = DIUnit->getFilename(); + CompilationDir = DIUnit->getDirectory(); auto OwnedUnit = make_unique<DwarfCompileUnit>( InfoHolder.getUnits().size(), DIUnit, Asm, this, &InfoHolder); @@ -378,17 +380,17 @@ DwarfCompileUnit &DwarfDebug::constructDwarfCompileUnit(DICompileUnit DIUnit) { // To avoid the compilation directory being ambiguous, let the line table // explicitly describe the directory of all files, never relying on the // compilation directory. - if (!Asm->OutStreamer.hasRawTextSupport() || SingleCU) - Asm->OutStreamer.getContext().setMCLineTableCompilationDir( + if (!Asm->OutStreamer->hasRawTextSupport() || SingleCU) + Asm->OutStreamer->getContext().setMCLineTableCompilationDir( NewCU.getUniqueID(), CompilationDir); - NewCU.addString(Die, dwarf::DW_AT_producer, DIUnit.getProducer()); + NewCU.addString(Die, dwarf::DW_AT_producer, DIUnit->getProducer()); NewCU.addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2, - DIUnit.getLanguage()); + DIUnit->getSourceLanguage()); NewCU.addString(Die, dwarf::DW_AT_name, FN); if (!useSplitDwarf()) { - NewCU.initStmtList(DwarfLineSectionSym); + NewCU.initStmtList(); // If we're using split dwarf the compilation dir is going to be in the // skeleton CU and so we don't need to duplicate it here. @@ -398,23 +400,21 @@ DwarfCompileUnit &DwarfDebug::constructDwarfCompileUnit(DICompileUnit DIUnit) { addGnuPubAttributes(NewCU, Die); } - if (DIUnit.isOptimized()) + if (DIUnit->isOptimized()) NewCU.addFlag(Die, dwarf::DW_AT_APPLE_optimized); - StringRef Flags = DIUnit.getFlags(); + StringRef Flags = DIUnit->getFlags(); if (!Flags.empty()) NewCU.addString(Die, dwarf::DW_AT_APPLE_flags, Flags); - if (unsigned RVer = DIUnit.getRunTimeVersion()) + if (unsigned RVer = DIUnit->getRuntimeVersion()) NewCU.addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers, dwarf::DW_FORM_data1, RVer); if (useSplitDwarf()) - NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoDWOSection(), - DwarfInfoDWOSectionSym); + NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoDWOSection()); else - NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoSection(), - DwarfInfoSectionSym); + NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoSection()); CUMap.insert(std::make_pair(DIUnit, &NewCU)); CUDieMap.insert(std::make_pair(&Die, &NewCU)); @@ -422,11 +422,9 @@ DwarfCompileUnit &DwarfDebug::constructDwarfCompileUnit(DICompileUnit DIUnit) { } void DwarfDebug::constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU, - const MDNode *N) { - DIImportedEntity Module(N); - assert(Module.Verify()); - if (DIE *D = TheCU.getOrCreateContextDIE(Module.getContext())) - D->addChild(TheCU.constructImportedEntityDIE(Module)); + const DIImportedEntity *N) { + if (DIE *D = TheCU.getOrCreateContextDIE(N->getScope())) + D->addChild(TheCU.constructImportedEntityDIE(N)); } // Emit all Dwarf sections that should come prior to the content. Create @@ -445,54 +443,40 @@ void DwarfDebug::beginModule() { return; TypeIdentifierMap = generateDITypeIdentifierMap(CU_Nodes); - // Emit initial sections so we can reference labels later. - emitSectionLabels(); - SingleCU = CU_Nodes->getNumOperands() == 1; for (MDNode *N : CU_Nodes->operands()) { - DICompileUnit CUNode(N); + auto *CUNode = cast<DICompileUnit>(N); DwarfCompileUnit &CU = constructDwarfCompileUnit(CUNode); - DIArray ImportedEntities = CUNode.getImportedEntities(); - for (unsigned i = 0, e = ImportedEntities.getNumElements(); i != e; ++i) - ScopesWithImportedEntities.push_back(std::make_pair( - DIImportedEntity(ImportedEntities.getElement(i)).getContext(), - ImportedEntities.getElement(i))); - std::sort(ScopesWithImportedEntities.begin(), - ScopesWithImportedEntities.end(), less_first()); - DIArray GVs = CUNode.getGlobalVariables(); - for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i) - CU.getOrCreateGlobalVariableDIE(DIGlobalVariable(GVs.getElement(i))); - DIArray SPs = CUNode.getSubprograms(); - for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) - SPMap.insert(std::make_pair(SPs.getElement(i), &CU)); - DIArray EnumTypes = CUNode.getEnumTypes(); - for (unsigned i = 0, e = EnumTypes.getNumElements(); i != e; ++i) { - DIType Ty(EnumTypes.getElement(i)); + for (auto *IE : CUNode->getImportedEntities()) + ScopesWithImportedEntities.push_back(std::make_pair(IE->getScope(), IE)); + // Stable sort to preserve the order of appearance of imported entities. + // This is to avoid out-of-order processing of interdependent declarations + // within the same scope, e.g. { namespace A = base; namespace B = A; } + std::stable_sort(ScopesWithImportedEntities.begin(), + ScopesWithImportedEntities.end(), less_first()); + for (auto *GV : CUNode->getGlobalVariables()) + CU.getOrCreateGlobalVariableDIE(GV); + for (auto *SP : CUNode->getSubprograms()) + SPMap.insert(std::make_pair(SP, &CU)); + for (auto *Ty : CUNode->getEnumTypes()) { // The enum types array by design contains pointers to // MDNodes rather than DIRefs. Unique them here. - DIType UniqueTy(resolve(Ty.getRef())); - CU.getOrCreateTypeDIE(UniqueTy); + CU.getOrCreateTypeDIE(cast<DIType>(resolve(Ty->getRef()))); } - DIArray RetainedTypes = CUNode.getRetainedTypes(); - for (unsigned i = 0, e = RetainedTypes.getNumElements(); i != e; ++i) { - DIType Ty(RetainedTypes.getElement(i)); + for (auto *Ty : CUNode->getRetainedTypes()) { // The retained types array by design contains pointers to // MDNodes rather than DIRefs. Unique them here. - DIType UniqueTy(resolve(Ty.getRef())); - CU.getOrCreateTypeDIE(UniqueTy); + CU.getOrCreateTypeDIE(cast<DIType>(resolve(Ty->getRef()))); } // Emit imported_modules last so that the relevant context is already // available. - for (unsigned i = 0, e = ImportedEntities.getNumElements(); i != e; ++i) - constructAndAddImportedEntityDIE(CU, ImportedEntities.getElement(i)); + for (auto *IE : CUNode->getImportedEntities()) + constructAndAddImportedEntityDIE(CU, IE); } // Tell MMI that we have debug info. MMI->setDebugInfoAvailability(true); - - // Prime section data. - SectionMap[Asm->getObjFileLowering().getTextSection()]; } void DwarfDebug::finishVariableDefinitions() { @@ -504,7 +488,8 @@ void DwarfDebug::finishVariableDefinitions() { // DIE::getUnit isn't simple - it walks parent pointers, etc. DwarfCompileUnit *Unit = lookupUnit(VariableDie->getUnit()); assert(Unit); - DbgVariable *AbsVar = getExistingAbstractVariable(Var->getVariable()); + DbgVariable *AbsVar = getExistingAbstractVariable( + InlinedVariable(Var->getVariable(), Var->getInlinedAt())); if (AbsVar && AbsVar->getDIE()) { Unit->addDIEEntry(*VariableDie, dwarf::DW_AT_abstract_origin, *AbsVar->getDIE()); @@ -516,7 +501,7 @@ void DwarfDebug::finishVariableDefinitions() { void DwarfDebug::finishSubprogramDefinitions() { for (const auto &P : SPMap) forBothCUs(*P.second, [&](DwarfCompileUnit &CU) { - CU.finishSubprogramDefinition(DISubprogram(P.first)); + CU.finishSubprogramDefinition(cast<DISubprogram>(P.first)); }); } @@ -527,14 +512,12 @@ void DwarfDebug::collectDeadVariables() { if (NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu")) { for (MDNode *N : CU_Nodes->operands()) { - DICompileUnit TheCU(N); + auto *TheCU = cast<DICompileUnit>(N); // Construct subprogram DIE and add variables DIEs. DwarfCompileUnit *SPCU = static_cast<DwarfCompileUnit *>(CUMap.lookup(TheCU)); assert(SPCU && "Unable to find Compile Unit!"); - DIArray Subprograms = TheCU.getSubprograms(); - for (unsigned i = 0, e = Subprograms.getNumElements(); i != e; ++i) { - DISubprogram SP(Subprograms.getElement(i)); + for (auto *SP : TheCU->getSubprograms()) { if (ProcessedSPNodes.count(SP) != 0) continue; SPCU->collectDeadVariables(SP); @@ -544,6 +527,8 @@ void DwarfDebug::collectDeadVariables() { } void DwarfDebug::finalizeModuleInfo() { + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + finishSubprogramDefinitions(); finishVariableDefinitions(); @@ -573,13 +558,16 @@ void DwarfDebug::finalizeModuleInfo() { // We don't keep track of which addresses are used in which CU so this // is a bit pessimistic under LTO. - if (!AddrPool.isEmpty()) + if (!AddrPool.isEmpty()) { + const MCSymbol *Sym = TLOF.getDwarfAddrSection()->getBeginSymbol(); SkCU->addSectionLabel(SkCU->getUnitDie(), dwarf::DW_AT_GNU_addr_base, - DwarfAddrSectionSym, DwarfAddrSectionSym); - if (!SkCU->getRangeLists().empty()) + Sym, Sym); + } + if (!SkCU->getRangeLists().empty()) { + const MCSymbol *Sym = TLOF.getDwarfRangesSection()->getBeginSymbol(); SkCU->addSectionLabel(SkCU->getUnitDie(), dwarf::DW_AT_GNU_ranges_base, - DwarfDebugRangeSectionSym, - DwarfDebugRangeSectionSym); + Sym, Sym); + } } // If we have code split among multiple sections or non-contiguous @@ -597,7 +585,7 @@ void DwarfDebug::finalizeModuleInfo() { // 2.17.3). U.addUInt(U.getUnitDie(), dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, 0); else - TheCU.setBaseAddress(TheCU.getRanges().front().getStart()); + U.setBaseAddress(TheCU.getRanges().front().getStart()); U.attachRangesOrLowHighPC(U.getUnitDie(), TheCU.takeRanges()); } } @@ -608,53 +596,6 @@ void DwarfDebug::finalizeModuleInfo() { SkeletonHolder.computeSizeAndOffsets(); } -void DwarfDebug::endSections() { - // Filter labels by section. - for (const SymbolCU &SCU : ArangeLabels) { - if (SCU.Sym->isInSection()) { - // Make a note of this symbol and it's section. - const MCSection *Section = &SCU.Sym->getSection(); - if (!Section->getKind().isMetadata()) - SectionMap[Section].push_back(SCU); - } else { - // Some symbols (e.g. common/bss on mach-o) can have no section but still - // appear in the output. This sucks as we rely on sections to build - // arange spans. We can do it without, but it's icky. - SectionMap[nullptr].push_back(SCU); - } - } - - // Build a list of sections used. - std::vector<const MCSection *> Sections; - for (const auto &it : SectionMap) { - const MCSection *Section = it.first; - Sections.push_back(Section); - } - - // Sort the sections into order. - // This is only done to ensure consistent output order across different runs. - std::sort(Sections.begin(), Sections.end(), SectionSort); - - // Add terminating symbols for each section. - for (unsigned ID = 0, E = Sections.size(); ID != E; ID++) { - const MCSection *Section = Sections[ID]; - MCSymbol *Sym = nullptr; - - if (Section) { - // We can't call MCSection::getLabelEndName, as it's only safe to do so - // if we know the section name up-front. For user-created sections, the - // resulting label may not be valid to use as a label. (section names can - // use a greater set of characters on some systems) - Sym = Asm->GetTempSymbol("debug_end", ID); - Asm->OutStreamer.SwitchSection(Section); - Asm->OutStreamer.EmitLabel(Sym); - } - - // Insert a final terminator. - SectionMap[Section].push_back(SymbolCU(nullptr, Sym)); - } -} - // Emit all Dwarf sections that should come after the content. void DwarfDebug::endModule() { assert(CurFn == nullptr); @@ -663,24 +604,26 @@ void DwarfDebug::endModule() { // If we aren't actually generating debug info (check beginModule - // conditionalized on !DisableDebugInfoPrinting and the presence of the // llvm.dbg.cu metadata node) - if (!DwarfInfoSectionSym) + if (!MMI->hasDebugInfo()) return; - // End any existing sections. - // TODO: Does this need to happen? - endSections(); - // Finalize the debug info for the module. finalizeModuleInfo(); emitDebugStr(); - // Emit all the DIEs into a debug info section. - emitDebugInfo(); + if (useSplitDwarf()) + emitDebugLocDWO(); + else + // Emit info into a debug loc section. + emitDebugLoc(); // Corresponding abbreviations into a abbrev section. emitAbbreviations(); + // Emit all the DIEs into a debug info section. + emitDebugInfo(); + // Emit info into a debug aranges section. if (GenerateARangeSection) emitDebugARanges(); @@ -693,12 +636,9 @@ void DwarfDebug::endModule() { emitDebugInfoDWO(); emitDebugAbbrevDWO(); emitDebugLineDWO(); - emitDebugLocDWO(); // Emit DWO addresses. AddrPool.emit(*Asm, Asm->getObjFileLowering().getDwarfAddrSection()); - } else - // Emit info into a debug loc section. - emitDebugLoc(); + } // Emit info into the dwarf accelerator table sections. if (useDwarfAccelTables()) { @@ -720,80 +660,80 @@ void DwarfDebug::endModule() { } // Find abstract variable, if any, associated with Var. -DbgVariable *DwarfDebug::getExistingAbstractVariable(const DIVariable &DV, - DIVariable &Cleansed) { - LLVMContext &Ctx = DV->getContext(); +DbgVariable * +DwarfDebug::getExistingAbstractVariable(InlinedVariable IV, + const DILocalVariable *&Cleansed) { // More then one inlined variable corresponds to one abstract variable. - // FIXME: This duplication of variables when inlining should probably be - // removed. It's done to allow each DIVariable to describe its location - // because the DebugLoc on the dbg.value/declare isn't accurate. We should - // make it accurate then remove this duplication/cleansing stuff. - Cleansed = cleanseInlinedVariable(DV, Ctx); + Cleansed = IV.first; auto I = AbstractVariables.find(Cleansed); if (I != AbstractVariables.end()) return I->second.get(); return nullptr; } -DbgVariable *DwarfDebug::getExistingAbstractVariable(const DIVariable &DV) { - DIVariable Cleansed; - return getExistingAbstractVariable(DV, Cleansed); +DbgVariable *DwarfDebug::getExistingAbstractVariable(InlinedVariable IV) { + const DILocalVariable *Cleansed; + return getExistingAbstractVariable(IV, Cleansed); } -void DwarfDebug::createAbstractVariable(const DIVariable &Var, +void DwarfDebug::createAbstractVariable(const DILocalVariable *Var, LexicalScope *Scope) { - auto AbsDbgVariable = make_unique<DbgVariable>(Var, DIExpression(), this); + auto AbsDbgVariable = + make_unique<DbgVariable>(Var, /* IA */ nullptr, /* Expr */ nullptr, this); InfoHolder.addScopeVariable(Scope, AbsDbgVariable.get()); AbstractVariables[Var] = std::move(AbsDbgVariable); } -void DwarfDebug::ensureAbstractVariableIsCreated(const DIVariable &DV, +void DwarfDebug::ensureAbstractVariableIsCreated(InlinedVariable IV, const MDNode *ScopeNode) { - DIVariable Cleansed = DV; - if (getExistingAbstractVariable(DV, Cleansed)) + const DILocalVariable *Cleansed = nullptr; + if (getExistingAbstractVariable(IV, Cleansed)) return; - createAbstractVariable(Cleansed, LScopes.getOrCreateAbstractScope(ScopeNode)); + createAbstractVariable(Cleansed, LScopes.getOrCreateAbstractScope( + cast<DILocalScope>(ScopeNode))); } -void -DwarfDebug::ensureAbstractVariableIsCreatedIfScoped(const DIVariable &DV, - const MDNode *ScopeNode) { - DIVariable Cleansed = DV; - if (getExistingAbstractVariable(DV, Cleansed)) +void DwarfDebug::ensureAbstractVariableIsCreatedIfScoped( + InlinedVariable IV, const MDNode *ScopeNode) { + const DILocalVariable *Cleansed = nullptr; + if (getExistingAbstractVariable(IV, Cleansed)) return; - if (LexicalScope *Scope = LScopes.findAbstractScope(ScopeNode)) + if (LexicalScope *Scope = + LScopes.findAbstractScope(cast_or_null<DILocalScope>(ScopeNode))) createAbstractVariable(Cleansed, Scope); } // Collect variable information from side table maintained by MMI. void DwarfDebug::collectVariableInfoFromMMITable( - SmallPtrSetImpl<const MDNode *> &Processed) { + DenseSet<InlinedVariable> &Processed) { for (const auto &VI : MMI->getVariableDbgInfo()) { if (!VI.Var) continue; - Processed.insert(VI.Var); + assert(VI.Var->isValidLocationForIntrinsic(VI.Loc) && + "Expected inlined-at fields to agree"); + + InlinedVariable Var(VI.Var, VI.Loc->getInlinedAt()); + Processed.insert(Var); LexicalScope *Scope = LScopes.findLexicalScope(VI.Loc); // If variable scope is not found then skip this variable. if (!Scope) continue; - DIVariable DV(VI.Var); - DIExpression Expr(VI.Expr); - ensureAbstractVariableIsCreatedIfScoped(DV, Scope->getScopeNode()); - ConcreteVariables.push_back(make_unique<DbgVariable>(DV, Expr, this)); - DbgVariable *RegVar = ConcreteVariables.back().get(); - RegVar->setFrameIndex(VI.Slot); - InfoHolder.addScopeVariable(Scope, RegVar); + const DIExpression *Expr = cast_or_null<DIExpression>(VI.Expr); + ensureAbstractVariableIsCreatedIfScoped(Var, Scope->getScopeNode()); + auto RegVar = + make_unique<DbgVariable>(Var.first, Var.second, Expr, this, VI.Slot); + if (InfoHolder.addScopeVariable(Scope, RegVar.get())) + ConcreteVariables.push_back(std::move(RegVar)); } } // Get .debug_loc entry for the instruction range starting at MI. static DebugLocEntry::Value getDebugLocValue(const MachineInstr *MI) { - const MDNode *Expr = MI->getDebugExpression(); - const MDNode *Var = MI->getDebugVariable(); + const DIExpression *Expr = MI->getDebugExpression(); assert(MI->getNumOperands() == 4); if (MI->getOperand(0).isReg()) { @@ -804,26 +744,26 @@ static DebugLocEntry::Value getDebugLocValue(const MachineInstr *MI) { MLoc.set(MI->getOperand(0).getReg()); else MLoc.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm()); - return DebugLocEntry::Value(Var, Expr, MLoc); + return DebugLocEntry::Value(Expr, MLoc); } if (MI->getOperand(0).isImm()) - return DebugLocEntry::Value(Var, Expr, MI->getOperand(0).getImm()); + return DebugLocEntry::Value(Expr, MI->getOperand(0).getImm()); if (MI->getOperand(0).isFPImm()) - return DebugLocEntry::Value(Var, Expr, MI->getOperand(0).getFPImm()); + return DebugLocEntry::Value(Expr, MI->getOperand(0).getFPImm()); if (MI->getOperand(0).isCImm()) - return DebugLocEntry::Value(Var, Expr, MI->getOperand(0).getCImm()); + return DebugLocEntry::Value(Expr, MI->getOperand(0).getCImm()); llvm_unreachable("Unexpected 4-operand DBG_VALUE instruction!"); } /// Determine whether two variable pieces overlap. -static bool piecesOverlap(DIExpression P1, DIExpression P2) { - if (!P1.isVariablePiece() || !P2.isVariablePiece()) +static bool piecesOverlap(const DIExpression *P1, const DIExpression *P2) { + if (!P1->isBitPiece() || !P2->isBitPiece()) return true; - unsigned l1 = P1.getPieceOffset(); - unsigned l2 = P2.getPieceOffset(); - unsigned r1 = l1 + P1.getPieceSize(); - unsigned r2 = l2 + P2.getPieceSize(); + unsigned l1 = P1->getBitPieceOffset(); + unsigned l2 = P2->getBitPieceOffset(); + unsigned r1 = l1 + P1->getBitPieceSize(); + unsigned r2 = l2 + P2->getBitPieceSize(); // True where [l1,r1[ and [r1,r2[ overlap. return (l1 < r2) && (l2 < r1); } @@ -842,7 +782,8 @@ static bool piecesOverlap(DIExpression P1, DIExpression P2) { // 1 | | [x, (reg1, piece 32, 32)] <- IsPieceOfPrevEntry // 2 | | ... // 3 | [clobber reg0] -// 4 [x, (mem, piece 0, 64)] <- overlapping with both previous pieces of x. +// 4 [x, (mem, piece 0, 64)] <- overlapping with both previous pieces of +// x. // // Output: // @@ -868,7 +809,7 @@ DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc, } // If this piece overlaps with any open ranges, truncate them. - DIExpression DIExpr = Begin->getDebugExpression(); + const DIExpression *DIExpr = Begin->getDebugExpression(); auto Last = std::remove_if(OpenRanges.begin(), OpenRanges.end(), [&](DebugLocEntry::Value R) { return piecesOverlap(DIExpr, R.getExpression()); @@ -882,7 +823,7 @@ DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc, if (End != nullptr) EndLabel = getLabelAfterInsn(End); else if (std::next(I) == Ranges.end()) - EndLabel = FunctionEndSym; + EndLabel = Asm->getFunctionEnd(); else EndLabel = getLabelBeforeInsn(std::next(I)->first); assert(EndLabel && "Forgot label after instruction ending a range!"); @@ -894,7 +835,7 @@ DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc, bool couldMerge = false; // If this is a piece, it may belong to the current DebugLocEntry. - if (DIExpr.isVariablePiece()) { + if (DIExpr->isBitPiece()) { // Add this value to the list of open ranges. OpenRanges.push_back(Value); @@ -916,54 +857,50 @@ DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc, // Attempt to coalesce the ranges of two otherwise identical // DebugLocEntries. auto CurEntry = DebugLoc.rbegin(); - auto PrevEntry = std::next(CurEntry); - if (PrevEntry != DebugLoc.rend() && PrevEntry->MergeRanges(*CurEntry)) - DebugLoc.pop_back(); - DEBUG({ dbgs() << CurEntry->getValues().size() << " Values:\n"; - for (auto Value : CurEntry->getValues()) { - Value.getVariable()->dump(); + for (auto &Value : CurEntry->getValues()) Value.getExpression()->dump(); - } dbgs() << "-----\n"; }); + + auto PrevEntry = std::next(CurEntry); + if (PrevEntry != DebugLoc.rend() && PrevEntry->MergeRanges(*CurEntry)) + DebugLoc.pop_back(); } } // Find variables for each lexical scope. -void -DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU, DISubprogram SP, - SmallPtrSetImpl<const MDNode *> &Processed) { +void DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU, + const DISubprogram *SP, + DenseSet<InlinedVariable> &Processed) { // Grab the variable info that was squirreled away in the MMI side-table. collectVariableInfoFromMMITable(Processed); for (const auto &I : DbgValues) { - DIVariable DV(I.first); - if (Processed.count(DV)) + InlinedVariable IV = I.first; + if (Processed.count(IV)) continue; - // Instruction ranges, specifying where DV is accessible. + // Instruction ranges, specifying where IV is accessible. const auto &Ranges = I.second; if (Ranges.empty()) continue; LexicalScope *Scope = nullptr; - if (MDNode *IA = DV.getInlinedAt()) { - DebugLoc DL = DebugLoc::getFromDILocation(IA); - Scope = LScopes.findInlinedScope(DebugLoc::get( - DL.getLine(), DL.getCol(), DV.getContext(), IA)); - } else - Scope = LScopes.findLexicalScope(DV.getContext()); + if (const DILocation *IA = IV.second) + Scope = LScopes.findInlinedScope(IV.first->getScope(), IA); + else + Scope = LScopes.findLexicalScope(IV.first->getScope()); // If variable scope is not found then skip this variable. if (!Scope) continue; - Processed.insert(DV); + Processed.insert(IV); const MachineInstr *MInsn = Ranges.front().first; assert(MInsn->isDebugValue() && "History must begin with debug value"); - ensureAbstractVariableIsCreatedIfScoped(DV, Scope->getScopeNode()); + ensureAbstractVariableIsCreatedIfScoped(IV, Scope->getScopeNode()); ConcreteVariables.push_back(make_unique<DbgVariable>(MInsn, this)); DbgVariable *RegVar = ConcreteVariables.back().get(); InfoHolder.addScopeVariable(Scope, RegVar); @@ -973,29 +910,33 @@ DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU, DISubprogram SP, continue; // Handle multiple DBG_VALUE instructions describing one variable. - RegVar->setDotDebugLocOffset(DotDebugLocEntries.size()); - - DotDebugLocEntries.resize(DotDebugLocEntries.size() + 1); - DebugLocList &LocList = DotDebugLocEntries.back(); - LocList.CU = &TheCU; - LocList.Label = - Asm->GetTempSymbol("debug_loc", DotDebugLocEntries.size() - 1); + RegVar->setDebugLocListIndex( + DebugLocs.startList(&TheCU, Asm->createTempSymbol("debug_loc"))); // Build the location list for this variable. - buildLocationList(LocList.List, Ranges); + SmallVector<DebugLocEntry, 8> Entries; + buildLocationList(Entries, Ranges); + + // If the variable has an DIBasicType, extract it. Basic types cannot have + // unique identifiers, so don't bother resolving the type with the + // identifier map. + const DIBasicType *BT = dyn_cast<DIBasicType>( + static_cast<const Metadata *>(IV.first->getType())); + + // Finalize the entry by lowering it into a DWARF bytestream. + for (auto &Entry : Entries) + Entry.finalize(*Asm, DebugLocs, BT); } // Collect info for variables that were optimized out. - DIArray Variables = SP.getVariables(); - for (unsigned i = 0, e = Variables.getNumElements(); i != e; ++i) { - DIVariable DV(Variables.getElement(i)); - assert(DV.isVariable()); - if (!Processed.insert(DV).second) + for (const DILocalVariable *DV : SP->getVariables()) { + if (!Processed.insert(InlinedVariable(DV, nullptr)).second) continue; - if (LexicalScope *Scope = LScopes.findLexicalScope(DV.getContext())) { - ensureAbstractVariableIsCreatedIfScoped(DV, Scope->getScopeNode()); - DIExpression NoExpr; - ConcreteVariables.push_back(make_unique<DbgVariable>(DV, NoExpr, this)); + if (LexicalScope *Scope = LScopes.findLexicalScope(DV->getScope())) { + ensureAbstractVariableIsCreatedIfScoped(InlinedVariable(DV, nullptr), + Scope->getScopeNode()); + ConcreteVariables.push_back(make_unique<DbgVariable>( + DV, /* IA */ nullptr, /* Expr */ nullptr, this)); InfoHolder.addScopeVariable(Scope, ConcreteVariables.back().get()); } } @@ -1020,23 +961,25 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) { // Check if source location changes, but ignore DBG_VALUE locations. if (!MI->isDebugValue()) { DebugLoc DL = MI->getDebugLoc(); - if (DL != PrevInstLoc && (!DL.isUnknown() || UnknownLocations)) { - unsigned Flags = 0; - PrevInstLoc = DL; - if (DL == PrologEndLoc) { - Flags |= DWARF2_FLAG_PROLOGUE_END; - PrologEndLoc = DebugLoc(); - Flags |= DWARF2_FLAG_IS_STMT; - } - if (DL.getLine() != - Asm->OutStreamer.getContext().getCurrentDwarfLoc().getLine()) - Flags |= DWARF2_FLAG_IS_STMT; + if (DL != PrevInstLoc) { + if (DL) { + unsigned Flags = 0; + PrevInstLoc = DL; + if (DL == PrologEndLoc) { + Flags |= DWARF2_FLAG_PROLOGUE_END; + PrologEndLoc = DebugLoc(); + Flags |= DWARF2_FLAG_IS_STMT; + } + if (DL.getLine() != + Asm->OutStreamer->getContext().getCurrentDwarfLoc().getLine()) + Flags |= DWARF2_FLAG_IS_STMT; - if (!DL.isUnknown()) { - const MDNode *Scope = DL.getScope(Asm->MF->getFunction()->getContext()); + const MDNode *Scope = DL.getScope(); recordSourceLine(DL.getLine(), DL.getCol(), Scope, Flags); - } else + } else if (UnknownLocations) { + PrevInstLoc = DL; recordSourceLine(0, 0, nullptr, 0); + } } } @@ -1053,8 +996,8 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) { return; if (!PrevLabel) { - PrevLabel = MMI->getContext().CreateTempSymbol(); - Asm->OutStreamer.EmitLabel(PrevLabel); + PrevLabel = MMI->getContext().createTempSymbol(); + Asm->OutStreamer->EmitLabel(PrevLabel); } I->second = PrevLabel; } @@ -1081,8 +1024,8 @@ void DwarfDebug::endInstruction() { // We need a label after this instruction. if (!PrevLabel) { - PrevLabel = MMI->getContext().CreateTempSymbol(); - Asm->OutStreamer.EmitLabel(PrevLabel); + PrevLabel = MMI->getContext().createTempSymbol(); + Asm->OutStreamer->EmitLabel(PrevLabel); } I->second = PrevLabel; } @@ -1119,7 +1062,7 @@ static DebugLoc findPrologueEndLoc(const MachineFunction *MF) { for (const auto &MBB : *MF) for (const auto &MI : MBB) if (!MI.isDebugValue() && !MI.getFlag(MachineInstr::FrameSetup) && - !MI.getDebugLoc().isUnknown()) { + MI.getDebugLoc()) { // Did the target forget to set the FrameSetup flag for CFI insns? assert(!MI.isCFIInstruction() && "First non-frame-setup instruction is a CFI instruction."); @@ -1166,19 +1109,14 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { // is absolute (such as an <> lookup header))) DwarfCompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode()); assert(TheCU && "Unable to find compile unit!"); - if (Asm->OutStreamer.hasRawTextSupport()) + if (Asm->OutStreamer->hasRawTextSupport()) // Use a single line table if we are generating assembly. - Asm->OutStreamer.getContext().setDwarfCompileUnitID(0); + Asm->OutStreamer->getContext().setDwarfCompileUnitID(0); else - Asm->OutStreamer.getContext().setDwarfCompileUnitID(TheCU->getUniqueID()); - - // Emit a label for the function so that we have a beginning address. - FunctionBeginSym = Asm->GetTempSymbol("func_begin", Asm->getFunctionNumber()); - // Assumes in correct section after the entry point. - Asm->OutStreamer.EmitLabel(FunctionBeginSym); + Asm->OutStreamer->getContext().setDwarfCompileUnitID(TheCU->getUniqueID()); // Calculate history for local variables. - calculateDbgValueHistory(MF, Asm->TM.getSubtargetImpl()->getRegisterInfo(), + calculateDbgValueHistory(MF, Asm->MF->getSubtarget().getRegisterInfo(), DbgValues); // Request labels for the full history. @@ -1187,21 +1125,21 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { if (Ranges.empty()) continue; - // The first mention of a function argument gets the FunctionBeginSym + // The first mention of a function argument gets the CurrentFnBegin // label, so arguments are visible when breaking at function entry. - DIVariable DIVar(Ranges.front().first->getDebugVariable()); - if (DIVar.isVariable() && DIVar.getTag() == dwarf::DW_TAG_arg_variable && - getDISubprogram(DIVar.getContext()).describes(MF->getFunction())) { - LabelsBeforeInsn[Ranges.front().first] = FunctionBeginSym; - if (Ranges.front().first->getDebugExpression().isVariablePiece()) { + const DILocalVariable *DIVar = Ranges.front().first->getDebugVariable(); + if (DIVar->getTag() == dwarf::DW_TAG_arg_variable && + getDISubprogram(DIVar->getScope())->describes(MF->getFunction())) { + LabelsBeforeInsn[Ranges.front().first] = Asm->getFunctionBegin(); + if (Ranges.front().first->getDebugExpression()->isBitPiece()) { // Mark all non-overlapping initial pieces. for (auto I = Ranges.begin(); I != Ranges.end(); ++I) { - DIExpression Piece = I->first->getDebugExpression(); + const DIExpression *Piece = I->first->getDebugExpression(); if (std::all_of(Ranges.begin(), I, [&](DbgValueHistoryMap::InstrRange Pred) { return !piecesOverlap(Piece, Pred.first->getDebugExpression()); })) - LabelsBeforeInsn[I->first] = FunctionBeginSym; + LabelsBeforeInsn[I->first] = Asm->getFunctionBegin(); else break; } @@ -1216,19 +1154,15 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { } PrevInstLoc = DebugLoc(); - PrevLabel = FunctionBeginSym; + PrevLabel = Asm->getFunctionBegin(); // Record beginning of function. PrologEndLoc = findPrologueEndLoc(MF); - if (!PrologEndLoc.isUnknown()) { - DebugLoc FnStartDL = - PrologEndLoc.getFnDebugLoc(MF->getFunction()->getContext()); - recordSourceLine( - FnStartDL.getLine(), FnStartDL.getCol(), - FnStartDL.getScope(MF->getFunction()->getContext()), - // We'd like to list the prologue as "not statements" but GDB behaves - // poorly if we do that. Revisit this with caution/GDB (7.5+) testing. - DWARF2_FLAG_IS_STMT); + if (DILocation *L = PrologEndLoc) { + // We'd like to list the prologue as "not statements" but GDB behaves + // poorly if we do that. Revisit this with caution/GDB (7.5+) testing. + auto *SP = L->getInlinedAtScope()->getSubprogram(); + recordSourceLine(SP->getScopeLine(), 0, SP, DWARF2_FLAG_IS_STMT); } } @@ -1247,27 +1181,22 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { return; } - // Define end label for subprogram. - FunctionEndSym = Asm->GetTempSymbol("func_end", Asm->getFunctionNumber()); - // Assumes in correct section after the entry point. - Asm->OutStreamer.EmitLabel(FunctionEndSym); - // Set DwarfDwarfCompileUnitID in MCContext to default value. - Asm->OutStreamer.getContext().setDwarfCompileUnitID(0); + Asm->OutStreamer->getContext().setDwarfCompileUnitID(0); LexicalScope *FnScope = LScopes.getCurrentFunctionScope(); - DISubprogram SP(FnScope->getScopeNode()); + auto *SP = cast<DISubprogram>(FnScope->getScopeNode()); DwarfCompileUnit &TheCU = *SPMap.lookup(SP); - SmallPtrSet<const MDNode *, 16> ProcessedVars; + DenseSet<InlinedVariable> ProcessedVars; collectVariableInfo(TheCU, SP, ProcessedVars); // Add the range of this function to the list of ranges for the CU. - TheCU.addRange(RangeSpan(FunctionBeginSym, FunctionEndSym)); + TheCU.addRange(RangeSpan(Asm->getFunctionBegin(), Asm->getFunctionEnd())); // Under -gmlt, skip building the subprogram if there are no inlined // subroutines inside it. - if (TheCU.getCUNode().getEmissionKind() == DIBuilder::LineTablesOnly && + if (TheCU.getCUNode()->getEmissionKind() == DIBuilder::LineTablesOnly && LScopes.getAbstractScopesList().empty() && !IsDarwin) { assert(InfoHolder.getScopeVariables().empty()); assert(DbgValues.empty()); @@ -1286,16 +1215,13 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { #endif // Construct abstract scopes. for (LexicalScope *AScope : LScopes.getAbstractScopesList()) { - DISubprogram SP(AScope->getScopeNode()); - assert(SP.isSubprogram()); + auto *SP = cast<DISubprogram>(AScope->getScopeNode()); // Collect info for variables that were optimized out. - DIArray Variables = SP.getVariables(); - for (unsigned i = 0, e = Variables.getNumElements(); i != e; ++i) { - DIVariable DV(Variables.getElement(i)); - assert(DV && DV.isVariable()); - if (!ProcessedVars.insert(DV).second) + for (const DILocalVariable *DV : SP->getVariables()) { + if (!ProcessedVars.insert(InlinedVariable(DV, nullptr)).second) continue; - ensureAbstractVariableIsCreated(DV, DV.getContext()); + ensureAbstractVariableIsCreated(InlinedVariable(DV, nullptr), + DV->getScope()); assert(LScopes.getAbstractScopesList().size() == NumAbstractScopes && "ensureAbstractVariableIsCreated inserted abstract scopes"); } @@ -1327,122 +1253,28 @@ void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S, StringRef Dir; unsigned Src = 1; unsigned Discriminator = 0; - if (DIScope Scope = DIScope(S)) { - assert(Scope.isScope()); - Fn = Scope.getFilename(); - Dir = Scope.getDirectory(); - if (Scope.isLexicalBlockFile()) - Discriminator = DILexicalBlockFile(S).getDiscriminator(); - - unsigned CUID = Asm->OutStreamer.getContext().getDwarfCompileUnitID(); + if (auto *Scope = cast_or_null<DIScope>(S)) { + Fn = Scope->getFilename(); + Dir = Scope->getDirectory(); + if (auto *LBF = dyn_cast<DILexicalBlockFile>(Scope)) + Discriminator = LBF->getDiscriminator(); + + unsigned CUID = Asm->OutStreamer->getContext().getDwarfCompileUnitID(); Src = static_cast<DwarfCompileUnit &>(*InfoHolder.getUnits()[CUID]) .getOrCreateSourceID(Fn, Dir); } - Asm->OutStreamer.EmitDwarfLocDirective(Src, Line, Col, Flags, 0, - Discriminator, Fn); + Asm->OutStreamer->EmitDwarfLocDirective(Src, Line, Col, Flags, 0, + Discriminator, Fn); } //===----------------------------------------------------------------------===// // Emit Methods //===----------------------------------------------------------------------===// -// Emit initial Dwarf sections with a label at the start of each one. -void DwarfDebug::emitSectionLabels() { - const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); - - // Dwarf sections base addresses. - DwarfInfoSectionSym = - emitSectionSym(Asm, TLOF.getDwarfInfoSection(), "section_info"); - if (useSplitDwarf()) { - DwarfInfoDWOSectionSym = - emitSectionSym(Asm, TLOF.getDwarfInfoDWOSection(), "section_info_dwo"); - DwarfTypesDWOSectionSym = - emitSectionSym(Asm, TLOF.getDwarfTypesDWOSection(), "section_types_dwo"); - } - DwarfAbbrevSectionSym = - emitSectionSym(Asm, TLOF.getDwarfAbbrevSection(), "section_abbrev"); - if (useSplitDwarf()) - DwarfAbbrevDWOSectionSym = emitSectionSym( - Asm, TLOF.getDwarfAbbrevDWOSection(), "section_abbrev_dwo"); - if (GenerateARangeSection) - emitSectionSym(Asm, TLOF.getDwarfARangesSection()); - - DwarfLineSectionSym = - emitSectionSym(Asm, TLOF.getDwarfLineSection(), "section_line"); - if (GenerateGnuPubSections) { - DwarfGnuPubNamesSectionSym = - emitSectionSym(Asm, TLOF.getDwarfGnuPubNamesSection()); - DwarfGnuPubTypesSectionSym = - emitSectionSym(Asm, TLOF.getDwarfGnuPubTypesSection()); - } else if (HasDwarfPubSections) { - emitSectionSym(Asm, TLOF.getDwarfPubNamesSection()); - emitSectionSym(Asm, TLOF.getDwarfPubTypesSection()); - } - - DwarfStrSectionSym = - emitSectionSym(Asm, TLOF.getDwarfStrSection(), "info_string"); - if (useSplitDwarf()) { - DwarfStrDWOSectionSym = - emitSectionSym(Asm, TLOF.getDwarfStrDWOSection(), "skel_string"); - DwarfAddrSectionSym = - emitSectionSym(Asm, TLOF.getDwarfAddrSection(), "addr_sec"); - DwarfDebugLocSectionSym = - emitSectionSym(Asm, TLOF.getDwarfLocDWOSection(), "skel_loc"); - } else - DwarfDebugLocSectionSym = - emitSectionSym(Asm, TLOF.getDwarfLocSection(), "section_debug_loc"); - DwarfDebugRangeSectionSym = - emitSectionSym(Asm, TLOF.getDwarfRangesSection(), "debug_range"); -} - -// Recursively emits a debug information entry. -void DwarfDebug::emitDIE(DIE &Die) { - // Get the abbreviation for this DIE. - const DIEAbbrev &Abbrev = Die.getAbbrev(); - - // Emit the code (index) for the abbreviation. - if (Asm->isVerbose()) - Asm->OutStreamer.AddComment("Abbrev [" + Twine(Abbrev.getNumber()) + - "] 0x" + Twine::utohexstr(Die.getOffset()) + - ":0x" + Twine::utohexstr(Die.getSize()) + " " + - dwarf::TagString(Abbrev.getTag())); - Asm->EmitULEB128(Abbrev.getNumber()); - - const SmallVectorImpl<DIEValue *> &Values = Die.getValues(); - const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData(); - - // Emit the DIE attribute values. - for (unsigned i = 0, N = Values.size(); i < N; ++i) { - dwarf::Attribute Attr = AbbrevData[i].getAttribute(); - dwarf::Form Form = AbbrevData[i].getForm(); - assert(Form && "Too many attributes for DIE (check abbreviation)"); - - if (Asm->isVerbose()) { - Asm->OutStreamer.AddComment(dwarf::AttributeString(Attr)); - if (Attr == dwarf::DW_AT_accessibility) - Asm->OutStreamer.AddComment(dwarf::AccessibilityString( - cast<DIEInteger>(Values[i])->getValue())); - } - - // Emit an attribute using the defined form. - Values[i]->EmitValue(Asm, Form); - } - - // Emit the DIE children if any. - if (Abbrev.hasChildren()) { - for (auto &Child : Die.getChildren()) - emitDIE(*Child); - - Asm->OutStreamer.AddComment("End Of Children Mark"); - Asm->EmitInt8(0); - } -} - // Emit the debug info section. void DwarfDebug::emitDebugInfo() { DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; - - Holder.emitUnits(DwarfAbbrevSectionSym); + Holder.emitUnits(/* UseOffsets */ false); } // Emit the abbreviation section. @@ -1452,65 +1284,39 @@ void DwarfDebug::emitAbbreviations() { Holder.emitAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevSection()); } -// Emit the last address of the section and the end of the line matrix. -void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) { - // Define last address of section. - Asm->OutStreamer.AddComment("Extended Op"); - Asm->EmitInt8(0); - - Asm->OutStreamer.AddComment("Op size"); - Asm->EmitInt8(Asm->getDataLayout().getPointerSize() + 1); - Asm->OutStreamer.AddComment("DW_LNE_set_address"); - Asm->EmitInt8(dwarf::DW_LNE_set_address); - - Asm->OutStreamer.AddComment("Section end label"); - - Asm->OutStreamer.EmitSymbolValue( - Asm->GetTempSymbol("section_end", SectionEnd), - Asm->getDataLayout().getPointerSize()); - - // Mark end of matrix. - Asm->OutStreamer.AddComment("DW_LNE_end_sequence"); - Asm->EmitInt8(0); - Asm->EmitInt8(1); - Asm->EmitInt8(1); -} - -void DwarfDebug::emitAccel(DwarfAccelTable &Accel, const MCSection *Section, - StringRef TableName, StringRef SymName) { +void DwarfDebug::emitAccel(DwarfAccelTable &Accel, MCSection *Section, + StringRef TableName) { Accel.FinalizeTable(Asm, TableName); - Asm->OutStreamer.SwitchSection(Section); - auto *SectionBegin = Asm->GetTempSymbol(SymName); - Asm->OutStreamer.EmitLabel(SectionBegin); + Asm->OutStreamer->SwitchSection(Section); // Emit the full data. - Accel.Emit(Asm, SectionBegin, this, DwarfStrSectionSym); + Accel.emit(Asm, Section->getBeginSymbol(), this); } // Emit visible names into a hashed accelerator table section. void DwarfDebug::emitAccelNames() { emitAccel(AccelNames, Asm->getObjFileLowering().getDwarfAccelNamesSection(), - "Names", "names_begin"); + "Names"); } // Emit objective C classes and categories into a hashed accelerator table // section. void DwarfDebug::emitAccelObjC() { emitAccel(AccelObjC, Asm->getObjFileLowering().getDwarfAccelObjCSection(), - "ObjC", "objc_begin"); + "ObjC"); } // Emit namespace dies into a hashed accelerator table. void DwarfDebug::emitAccelNamespaces() { emitAccel(AccelNamespace, Asm->getObjFileLowering().getDwarfAccelNamespaceSection(), - "namespac", "namespac_begin"); + "namespac"); } // Emit type dies into a hashed accelerator table. void DwarfDebug::emitAccelTypes() { emitAccel(AccelTypes, Asm->getObjFileLowering().getDwarfAccelTypesSection(), - "types", "types_begin"); + "types"); } // Public name handling. @@ -1559,7 +1365,6 @@ static dwarf::PubIndexEntryDescriptor computeIndexValue(DwarfUnit *CU, return dwarf::GIEK_TYPE; case dwarf::DW_TAG_subprogram: return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_FUNCTION, Linkage); - case dwarf::DW_TAG_constant: case dwarf::DW_TAG_variable: return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_VARIABLE, Linkage); case dwarf::DW_TAG_enumerator: @@ -1573,16 +1378,16 @@ static dwarf::PubIndexEntryDescriptor computeIndexValue(DwarfUnit *CU, /// emitDebugPubNames - Emit visible names into a debug pubnames section. /// void DwarfDebug::emitDebugPubNames(bool GnuStyle) { - const MCSection *PSec = - GnuStyle ? Asm->getObjFileLowering().getDwarfGnuPubNamesSection() - : Asm->getObjFileLowering().getDwarfPubNamesSection(); + MCSection *PSec = GnuStyle + ? Asm->getObjFileLowering().getDwarfGnuPubNamesSection() + : Asm->getObjFileLowering().getDwarfPubNamesSection(); emitDebugPubSection(GnuStyle, PSec, "Names", &DwarfCompileUnit::getGlobalNames); } void DwarfDebug::emitDebugPubSection( - bool GnuStyle, const MCSection *PSec, StringRef Name, + bool GnuStyle, MCSection *PSec, StringRef Name, const StringMap<const DIE *> &(DwarfCompileUnit::*Accessor)() const) { for (const auto &NU : CUMap) { DwarfCompileUnit *TheU = NU.second; @@ -1594,26 +1399,25 @@ void DwarfDebug::emitDebugPubSection( if (auto *Skeleton = TheU->getSkeleton()) TheU = Skeleton; - unsigned ID = TheU->getUniqueID(); // Start the dwarf pubnames section. - Asm->OutStreamer.SwitchSection(PSec); + Asm->OutStreamer->SwitchSection(PSec); // Emit the header. - Asm->OutStreamer.AddComment("Length of Public " + Name + " Info"); - MCSymbol *BeginLabel = Asm->GetTempSymbol("pub" + Name + "_begin", ID); - MCSymbol *EndLabel = Asm->GetTempSymbol("pub" + Name + "_end", ID); + Asm->OutStreamer->AddComment("Length of Public " + Name + " Info"); + MCSymbol *BeginLabel = Asm->createTempSymbol("pub" + Name + "_begin"); + MCSymbol *EndLabel = Asm->createTempSymbol("pub" + Name + "_end"); Asm->EmitLabelDifference(EndLabel, BeginLabel, 4); - Asm->OutStreamer.EmitLabel(BeginLabel); + Asm->OutStreamer->EmitLabel(BeginLabel); - Asm->OutStreamer.AddComment("DWARF Version"); + Asm->OutStreamer->AddComment("DWARF Version"); Asm->EmitInt16(dwarf::DW_PUBNAMES_VERSION); - Asm->OutStreamer.AddComment("Offset of Compilation Unit Info"); - Asm->EmitSectionOffset(TheU->getLabelBegin(), TheU->getSectionSym()); + Asm->OutStreamer->AddComment("Offset of Compilation Unit Info"); + Asm->emitSectionOffset(TheU->getLabelBegin()); - Asm->OutStreamer.AddComment("Compilation Unit Length"); + Asm->OutStreamer->AddComment("Compilation Unit Length"); Asm->EmitInt32(TheU->getLength()); // Emit the pubnames for this compilation unit. @@ -1621,31 +1425,31 @@ void DwarfDebug::emitDebugPubSection( const char *Name = GI.getKeyData(); const DIE *Entity = GI.second; - Asm->OutStreamer.AddComment("DIE offset"); + Asm->OutStreamer->AddComment("DIE offset"); Asm->EmitInt32(Entity->getOffset()); if (GnuStyle) { dwarf::PubIndexEntryDescriptor Desc = computeIndexValue(TheU, Entity); - Asm->OutStreamer.AddComment( + Asm->OutStreamer->AddComment( Twine("Kind: ") + dwarf::GDBIndexEntryKindString(Desc.Kind) + ", " + dwarf::GDBIndexEntryLinkageString(Desc.Linkage)); Asm->EmitInt8(Desc.toBits()); } - Asm->OutStreamer.AddComment("External Name"); - Asm->OutStreamer.EmitBytes(StringRef(Name, GI.getKeyLength() + 1)); + Asm->OutStreamer->AddComment("External Name"); + Asm->OutStreamer->EmitBytes(StringRef(Name, GI.getKeyLength() + 1)); } - Asm->OutStreamer.AddComment("End Mark"); + Asm->OutStreamer->AddComment("End Mark"); Asm->EmitInt32(0); - Asm->OutStreamer.EmitLabel(EndLabel); + Asm->OutStreamer->EmitLabel(EndLabel); } } void DwarfDebug::emitDebugPubTypes(bool GnuStyle) { - const MCSection *PSec = - GnuStyle ? Asm->getObjFileLowering().getDwarfGnuPubTypesSection() - : Asm->getObjFileLowering().getDwarfPubTypesSection(); + MCSection *PSec = GnuStyle + ? Asm->getObjFileLowering().getDwarfGnuPubTypesSection() + : Asm->getObjFileLowering().getDwarfPubTypesSection(); emitDebugPubSection(GnuStyle, PSec, "Types", &DwarfCompileUnit::getGlobalTypes); @@ -1657,86 +1461,44 @@ void DwarfDebug::emitDebugStr() { Holder.emitStrings(Asm->getObjFileLowering().getDwarfStrSection()); } -/// Emits an optimal (=sorted) sequence of DW_OP_pieces. -void DwarfDebug::emitLocPieces(ByteStreamer &Streamer, - const DITypeIdentifierMap &Map, - ArrayRef<DebugLocEntry::Value> Values) { - assert(std::all_of(Values.begin(), Values.end(), [](DebugLocEntry::Value P) { - return P.isVariablePiece(); - }) && "all values are expected to be pieces"); - assert(std::is_sorted(Values.begin(), Values.end()) && - "pieces are expected to be sorted"); - - unsigned Offset = 0; - for (auto Piece : Values) { - const unsigned SizeOfByte = 8; - DIExpression Expr = Piece.getExpression(); - unsigned PieceOffset = Expr.getPieceOffset(); - unsigned PieceSize = Expr.getPieceSize(); - assert(Offset <= PieceOffset && "overlapping or duplicate pieces"); - if (Offset < PieceOffset) { - // The DWARF spec seriously mandates pieces with no locations for gaps. - Asm->EmitDwarfOpPiece(Streamer, (PieceOffset-Offset)*SizeOfByte); - Offset += PieceOffset-Offset; - } - Offset += PieceSize; - -#ifndef NDEBUG - DIVariable Var = Piece.getVariable(); - assert(!Var.isIndirect() && "indirect address for piece"); - unsigned VarSize = Var.getSizeInBits(Map); - assert(PieceSize+PieceOffset <= VarSize/SizeOfByte - && "piece is larger than or outside of variable"); - assert(PieceSize*SizeOfByte != VarSize - && "piece covers entire variable"); -#endif - - emitDebugLocValue(Streamer, Piece, PieceOffset*SizeOfByte); - } -} - - void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer, - const DebugLocEntry &Entry) { - const DebugLocEntry::Value Value = Entry.getValues()[0]; - if (Value.isVariablePiece()) - // Emit all pieces that belong to the same variable and range. - return emitLocPieces(Streamer, TypeIdentifierMap, Entry.getValues()); - - assert(Entry.getValues().size() == 1 && "only pieces may have >1 value"); - emitDebugLocValue(Streamer, Value); -} - -void DwarfDebug::emitDebugLocValue(ByteStreamer &Streamer, - const DebugLocEntry::Value &Value, - unsigned PieceOffsetInBits) { - DIVariable DV = Value.getVariable(); - DebugLocDwarfExpression DwarfExpr(*Asm, Streamer); - + const DebugLocStream::Entry &Entry) { + auto &&Comments = DebugLocs.getComments(Entry); + auto Comment = Comments.begin(); + auto End = Comments.end(); + for (uint8_t Byte : DebugLocs.getBytes(Entry)) + Streamer.EmitInt8(Byte, Comment != End ? *(Comment++) : ""); +} + +static void emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT, + ByteStreamer &Streamer, + const DebugLocEntry::Value &Value, + unsigned PieceOffsetInBits) { + DebugLocDwarfExpression DwarfExpr(*AP.MF->getSubtarget().getRegisterInfo(), + AP.getDwarfDebug()->getDwarfVersion(), + Streamer); // Regular entry. if (Value.isInt()) { - DIBasicType BTy(resolve(DV.getType())); - if (BTy.Verify() && (BTy.getEncoding() == dwarf::DW_ATE_signed || - BTy.getEncoding() == dwarf::DW_ATE_signed_char)) + if (BT && (BT->getEncoding() == dwarf::DW_ATE_signed || + BT->getEncoding() == dwarf::DW_ATE_signed_char)) DwarfExpr.AddSignedConstant(Value.getInt()); else DwarfExpr.AddUnsignedConstant(Value.getInt()); } else if (Value.isLocation()) { MachineLocation Loc = Value.getLoc(); - DIExpression Expr = Value.getExpression(); - if (!Expr || (Expr.getNumElements() == 0)) + const DIExpression *Expr = Value.getExpression(); + if (!Expr || !Expr->getNumElements()) // Regular entry. - Asm->EmitDwarfRegOp(Streamer, Loc, DV.isIndirect()); + AP.EmitDwarfRegOp(Streamer, Loc); else { // Complex address entry. if (Loc.getOffset()) { DwarfExpr.AddMachineRegIndirect(Loc.getReg(), Loc.getOffset()); - DwarfExpr.AddExpression(Expr, PieceOffsetInBits); + DwarfExpr.AddExpression(Expr->expr_op_begin(), Expr->expr_op_end(), + PieceOffsetInBits); } else DwarfExpr.AddMachineRegExpression(Expr, Loc.getReg(), PieceOffsetInBits); - if (DV.isIndirect()) - DwarfExpr.EmitOp(dwarf::DW_OP_deref); } } // else ... ignore constant fp. There is not any good way to @@ -1744,61 +1506,95 @@ void DwarfDebug::emitDebugLocValue(ByteStreamer &Streamer, // FIXME: ^ } -void DwarfDebug::emitDebugLocEntryLocation(const DebugLocEntry &Entry) { - Asm->OutStreamer.AddComment("Loc expr size"); - MCSymbol *begin = Asm->OutStreamer.getContext().CreateTempSymbol(); - MCSymbol *end = Asm->OutStreamer.getContext().CreateTempSymbol(); - Asm->EmitLabelDifference(end, begin, 2); - Asm->OutStreamer.EmitLabel(begin); +void DebugLocEntry::finalize(const AsmPrinter &AP, DebugLocStream &Locs, + const DIBasicType *BT) { + Locs.startEntry(Begin, End); + BufferByteStreamer Streamer = Locs.getStreamer(); + const DebugLocEntry::Value &Value = Values[0]; + if (Value.isBitPiece()) { + // Emit all pieces that belong to the same variable and range. + assert(std::all_of(Values.begin(), Values.end(), [](DebugLocEntry::Value P) { + return P.isBitPiece(); + }) && "all values are expected to be pieces"); + assert(std::is_sorted(Values.begin(), Values.end()) && + "pieces are expected to be sorted"); + + unsigned Offset = 0; + for (auto Piece : Values) { + const DIExpression *Expr = Piece.getExpression(); + unsigned PieceOffset = Expr->getBitPieceOffset(); + unsigned PieceSize = Expr->getBitPieceSize(); + assert(Offset <= PieceOffset && "overlapping or duplicate pieces"); + if (Offset < PieceOffset) { + // The DWARF spec seriously mandates pieces with no locations for gaps. + DebugLocDwarfExpression Expr(*AP.MF->getSubtarget().getRegisterInfo(), + AP.getDwarfDebug()->getDwarfVersion(), + Streamer); + Expr.AddOpPiece(PieceOffset-Offset, 0); + Offset += PieceOffset-Offset; + } + Offset += PieceSize; + + emitDebugLocValue(AP, BT, Streamer, Piece, PieceOffset); + } + } else { + assert(Values.size() == 1 && "only pieces may have >1 value"); + emitDebugLocValue(AP, BT, Streamer, Value, 0); + } +} + +void DwarfDebug::emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry) { + // Emit the size. + Asm->OutStreamer->AddComment("Loc expr size"); + Asm->EmitInt16(DebugLocs.getBytes(Entry).size()); + // Emit the entry. APByteStreamer Streamer(*Asm); emitDebugLocEntry(Streamer, Entry); - // Close the range. - Asm->OutStreamer.EmitLabel(end); } // Emit locations into the debug loc section. void DwarfDebug::emitDebugLoc() { // Start the dwarf loc section. - Asm->OutStreamer.SwitchSection( + Asm->OutStreamer->SwitchSection( Asm->getObjFileLowering().getDwarfLocSection()); unsigned char Size = Asm->getDataLayout().getPointerSize(); - for (const auto &DebugLoc : DotDebugLocEntries) { - Asm->OutStreamer.EmitLabel(DebugLoc.Label); - const DwarfCompileUnit *CU = DebugLoc.CU; - for (const auto &Entry : DebugLoc.List) { + for (const auto &List : DebugLocs.getLists()) { + Asm->OutStreamer->EmitLabel(List.Label); + const DwarfCompileUnit *CU = List.CU; + for (const auto &Entry : DebugLocs.getEntries(List)) { // Set up the range. This range is relative to the entry point of the // compile unit. This is a hard coded 0 for low_pc when we're emitting // ranges, or the DW_AT_low_pc on the compile unit otherwise. if (auto *Base = CU->getBaseAddress()) { - Asm->EmitLabelDifference(Entry.getBeginSym(), Base, Size); - Asm->EmitLabelDifference(Entry.getEndSym(), Base, Size); + Asm->EmitLabelDifference(Entry.BeginSym, Base, Size); + Asm->EmitLabelDifference(Entry.EndSym, Base, Size); } else { - Asm->OutStreamer.EmitSymbolValue(Entry.getBeginSym(), Size); - Asm->OutStreamer.EmitSymbolValue(Entry.getEndSym(), Size); + Asm->OutStreamer->EmitSymbolValue(Entry.BeginSym, Size); + Asm->OutStreamer->EmitSymbolValue(Entry.EndSym, Size); } emitDebugLocEntryLocation(Entry); } - Asm->OutStreamer.EmitIntValue(0, Size); - Asm->OutStreamer.EmitIntValue(0, Size); + Asm->OutStreamer->EmitIntValue(0, Size); + Asm->OutStreamer->EmitIntValue(0, Size); } } void DwarfDebug::emitDebugLocDWO() { - Asm->OutStreamer.SwitchSection( + Asm->OutStreamer->SwitchSection( Asm->getObjFileLowering().getDwarfLocDWOSection()); - for (const auto &DebugLoc : DotDebugLocEntries) { - Asm->OutStreamer.EmitLabel(DebugLoc.Label); - for (const auto &Entry : DebugLoc.List) { + for (const auto &List : DebugLocs.getLists()) { + Asm->OutStreamer->EmitLabel(List.Label); + for (const auto &Entry : DebugLocs.getEntries(List)) { // Just always use start_length for now - at least that's one address // rather than two. We could get fancier and try to, say, reuse an // address we know we've emitted elsewhere (the start of the function? // The start of the CU or CU subrange that encloses this range?) Asm->EmitInt8(dwarf::DW_LLE_start_length_entry); - unsigned idx = AddrPool.getIndex(Entry.getBeginSym()); + unsigned idx = AddrPool.getIndex(Entry.BeginSym); Asm->EmitULEB128(idx); - Asm->EmitLabelDifference(Entry.getEndSym(), Entry.getBeginSym(), 4); + Asm->EmitLabelDifference(Entry.EndSym, Entry.BeginSym, 4); emitDebugLocEntryLocation(Entry); } @@ -1813,36 +1609,62 @@ struct ArangeSpan { // Emit a debug aranges section, containing a CU lookup for any // address we can tie back to a CU. void DwarfDebug::emitDebugARanges() { - // Start the dwarf aranges section. - Asm->OutStreamer.SwitchSection( - Asm->getObjFileLowering().getDwarfARangesSection()); + // Provides a unique id per text section. + MapVector<MCSection *, SmallVector<SymbolCU, 8>> SectionMap; - typedef DenseMap<DwarfCompileUnit *, std::vector<ArangeSpan>> SpansType; + // Filter labels by section. + for (const SymbolCU &SCU : ArangeLabels) { + if (SCU.Sym->isInSection()) { + // Make a note of this symbol and it's section. + MCSection *Section = &SCU.Sym->getSection(); + if (!Section->getKind().isMetadata()) + SectionMap[Section].push_back(SCU); + } else { + // Some symbols (e.g. common/bss on mach-o) can have no section but still + // appear in the output. This sucks as we rely on sections to build + // arange spans. We can do it without, but it's icky. + SectionMap[nullptr].push_back(SCU); + } + } + + // Add terminating symbols for each section. + for (const auto &I : SectionMap) { + MCSection *Section = I.first; + MCSymbol *Sym = nullptr; - SpansType Spans; + if (Section) + Sym = Asm->OutStreamer->endSection(Section); - // Build a list of sections used. - std::vector<const MCSection *> Sections; - for (const auto &it : SectionMap) { - const MCSection *Section = it.first; - Sections.push_back(Section); + // Insert a final terminator. + SectionMap[Section].push_back(SymbolCU(nullptr, Sym)); } - // Sort the sections into order. - // This is only done to ensure consistent output order across different runs. - std::sort(Sections.begin(), Sections.end(), SectionSort); + DenseMap<DwarfCompileUnit *, std::vector<ArangeSpan>> Spans; - // Build a set of address spans, sorted by CU. - for (const MCSection *Section : Sections) { - SmallVector<SymbolCU, 8> &List = SectionMap[Section]; + for (auto &I : SectionMap) { + const MCSection *Section = I.first; + SmallVector<SymbolCU, 8> &List = I.second; if (List.size() < 2) continue; + // If we have no section (e.g. common), just write out + // individual spans for each symbol. + if (!Section) { + for (const SymbolCU &Cur : List) { + ArangeSpan Span; + Span.Start = Cur.Sym; + Span.End = nullptr; + if (Cur.CU) + Spans[Cur.CU].push_back(Span); + } + continue; + } + // Sort the symbols by offset within the section. std::sort(List.begin(), List.end(), [&](const SymbolCU &A, const SymbolCU &B) { - unsigned IA = A.Sym ? Asm->OutStreamer.GetSymbolOrder(A.Sym) : 0; - unsigned IB = B.Sym ? Asm->OutStreamer.GetSymbolOrder(B.Sym) : 0; + unsigned IA = A.Sym ? Asm->OutStreamer->GetSymbolOrder(A.Sym) : 0; + unsigned IB = B.Sym ? Asm->OutStreamer->GetSymbolOrder(B.Sym) : 0; // Symbols with no order assigned should be placed at the end. // (e.g. section end labels) @@ -1853,35 +1675,27 @@ void DwarfDebug::emitDebugARanges() { return IA < IB; }); - // If we have no section (e.g. common), just write out - // individual spans for each symbol. - if (!Section) { - for (const SymbolCU &Cur : List) { + // Build spans between each label. + const MCSymbol *StartSym = List[0].Sym; + for (size_t n = 1, e = List.size(); n < e; n++) { + const SymbolCU &Prev = List[n - 1]; + const SymbolCU &Cur = List[n]; + + // Try and build the longest span we can within the same CU. + if (Cur.CU != Prev.CU) { ArangeSpan Span; - Span.Start = Cur.Sym; - Span.End = nullptr; - if (Cur.CU) - Spans[Cur.CU].push_back(Span); - } - } else { - // Build spans between each label. - const MCSymbol *StartSym = List[0].Sym; - for (size_t n = 1, e = List.size(); n < e; n++) { - const SymbolCU &Prev = List[n - 1]; - const SymbolCU &Cur = List[n]; - - // Try and build the longest span we can within the same CU. - if (Cur.CU != Prev.CU) { - ArangeSpan Span; - Span.Start = StartSym; - Span.End = Cur.Sym; - Spans[Prev.CU].push_back(Span); - StartSym = Cur.Sym; - } + Span.Start = StartSym; + Span.End = Cur.Sym; + Spans[Prev.CU].push_back(Span); + StartSym = Cur.Sym; } } } + // Start the dwarf aranges section. + Asm->OutStreamer->SwitchSection( + Asm->getObjFileLowering().getDwarfARangesSection()); + unsigned PtrSize = Asm->getDataLayout().getPointerSize(); // Build a list of CUs used. @@ -1921,18 +1735,18 @@ void DwarfDebug::emitDebugARanges() { ContentSize += (List.size() + 1) * TupleSize; // For each compile unit, write the list of spans it covers. - Asm->OutStreamer.AddComment("Length of ARange Set"); + Asm->OutStreamer->AddComment("Length of ARange Set"); Asm->EmitInt32(ContentSize); - Asm->OutStreamer.AddComment("DWARF Arange version number"); + Asm->OutStreamer->AddComment("DWARF Arange version number"); Asm->EmitInt16(dwarf::DW_ARANGES_VERSION); - Asm->OutStreamer.AddComment("Offset Into Debug Info Section"); - Asm->EmitSectionOffset(CU->getLabelBegin(), CU->getSectionSym()); - Asm->OutStreamer.AddComment("Address Size (in bytes)"); + Asm->OutStreamer->AddComment("Offset Into Debug Info Section"); + Asm->emitSectionOffset(CU->getLabelBegin()); + Asm->OutStreamer->AddComment("Address Size (in bytes)"); Asm->EmitInt8(PtrSize); - Asm->OutStreamer.AddComment("Segment Size (in bytes)"); + Asm->OutStreamer->AddComment("Segment Size (in bytes)"); Asm->EmitInt8(0); - Asm->OutStreamer.EmitFill(Padding, 0xff); + Asm->OutStreamer->EmitFill(Padding, 0xff); for (const ArangeSpan &Span : List) { Asm->EmitLabelReference(Span.Start, PtrSize); @@ -1947,20 +1761,20 @@ void DwarfDebug::emitDebugARanges() { if (Size == 0) Size = 1; - Asm->OutStreamer.EmitIntValue(Size, PtrSize); + Asm->OutStreamer->EmitIntValue(Size, PtrSize); } } - Asm->OutStreamer.AddComment("ARange terminator"); - Asm->OutStreamer.EmitIntValue(0, PtrSize); - Asm->OutStreamer.EmitIntValue(0, PtrSize); + Asm->OutStreamer->AddComment("ARange terminator"); + Asm->OutStreamer->EmitIntValue(0, PtrSize); + Asm->OutStreamer->EmitIntValue(0, PtrSize); } } // Emit visible names into a debug ranges section. void DwarfDebug::emitDebugRanges() { // Start the dwarf ranges section. - Asm->OutStreamer.SwitchSection( + Asm->OutStreamer->SwitchSection( Asm->getObjFileLowering().getDwarfRangesSection()); // Size for our labels. @@ -1976,7 +1790,7 @@ void DwarfDebug::emitDebugRanges() { // Iterate over the misc ranges for the compile units in the module. for (const RangeSpanList &List : TheCU->getRangeLists()) { // Emit our symbol so we can find the beginning of the range. - Asm->OutStreamer.EmitLabel(List.getSym()); + Asm->OutStreamer->EmitLabel(List.getSym()); for (const RangeSpan &Range : List.getRanges()) { const MCSymbol *Begin = Range.getStart(); @@ -1987,14 +1801,14 @@ void DwarfDebug::emitDebugRanges() { Asm->EmitLabelDifference(Begin, Base, Size); Asm->EmitLabelDifference(End, Base, Size); } else { - Asm->OutStreamer.EmitSymbolValue(Begin, Size); - Asm->OutStreamer.EmitSymbolValue(End, Size); + Asm->OutStreamer->EmitSymbolValue(Begin, Size); + Asm->OutStreamer->EmitSymbolValue(End, Size); } } // And terminate the list with two 0 values. - Asm->OutStreamer.EmitIntValue(0, Size); - Asm->OutStreamer.EmitIntValue(0, Size); + Asm->OutStreamer->EmitIntValue(0, Size); + Asm->OutStreamer->EmitIntValue(0, Size); } } } @@ -2004,7 +1818,7 @@ void DwarfDebug::emitDebugRanges() { void DwarfDebug::initSkeletonUnit(const DwarfUnit &U, DIE &Die, std::unique_ptr<DwarfUnit> NewU) { NewU->addString(Die, dwarf::DW_AT_GNU_dwo_name, - U.getCUNode().getSplitDebugFilename()); + U.getCUNode()->getSplitDebugFilename()); if (!CompilationDir.empty()) NewU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir); @@ -2022,10 +1836,9 @@ DwarfCompileUnit &DwarfDebug::constructSkeletonCU(const DwarfCompileUnit &CU) { auto OwnedUnit = make_unique<DwarfCompileUnit>( CU.getUniqueID(), CU.getCUNode(), Asm, this, &SkeletonHolder); DwarfCompileUnit &NewCU = *OwnedUnit; - NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoSection(), - DwarfInfoSectionSym); + NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoSection()); - NewCU.initStmtList(DwarfLineSectionSym); + NewCU.initStmtList(); initSkeletonUnit(CU, NewCU.getUnitDie(), std::move(OwnedUnit)); @@ -2036,9 +1849,8 @@ DwarfCompileUnit &DwarfDebug::constructSkeletonCU(const DwarfCompileUnit &CU) { // compile units that would normally be in debug_info. void DwarfDebug::emitDebugInfoDWO() { assert(useSplitDwarf() && "No split dwarf debug info?"); - // Don't pass an abbrev symbol, using a constant zero instead so as not to - // emit relocations into the dwo file. - InfoHolder.emitUnits(/* AbbrevSymbol */ nullptr); + // Don't emit relocations into the dwo file. + InfoHolder.emitUnits(/* UseOffsets */ true); } // Emit the .debug_abbrev.dwo section for separated dwarf. This contains the @@ -2050,9 +1862,9 @@ void DwarfDebug::emitDebugAbbrevDWO() { void DwarfDebug::emitDebugLineDWO() { assert(useSplitDwarf() && "No split dwarf?"); - Asm->OutStreamer.SwitchSection( + Asm->OutStreamer->SwitchSection( Asm->getObjFileLowering().getDwarfLineDWOSection()); - SplitTypeUnitFileTable.Emit(Asm->OutStreamer); + SplitTypeUnitFileTable.Emit(*Asm->OutStreamer); } // Emit the .debug_str.dwo section for separated dwarf. This contains the @@ -2060,8 +1872,7 @@ void DwarfDebug::emitDebugLineDWO() { // sections. void DwarfDebug::emitDebugStrDWO() { assert(useSplitDwarf() && "No split dwarf?"); - const MCSection *OffSec = - Asm->getObjFileLowering().getDwarfStrOffDWOSection(); + MCSection *OffSec = Asm->getObjFileLowering().getDwarfStrOffDWOSection(); InfoHolder.emitStrings(Asm->getObjFileLowering().getDwarfStrDWOSection(), OffSec); } @@ -2070,7 +1881,7 @@ MCDwarfDwoLineTable *DwarfDebug::getDwoLineTable(const DwarfCompileUnit &CU) { if (!useSplitDwarf()) return nullptr; if (SingleCU) - SplitTypeUnitFileTable.setCompilationDir(CU.getCUNode().getDirectory()); + SplitTypeUnitFileTable.setCompilationDir(CU.getCUNode()->getDirectory()); return &SplitTypeUnitFileTable; } @@ -2082,12 +1893,12 @@ static uint64_t makeTypeSignature(StringRef Identifier) { // appropriately. MD5::MD5Result Result; Hash.final(Result); - return *reinterpret_cast<support::ulittle64_t *>(Result + 8); + return support::endian::read64le(Result + 8); } void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU, StringRef Identifier, DIE &RefDie, - DICompositeType CTy) { + const DICompositeType *CTy) { // Fast path if we're building some type units and one has already used the // address pool we know we're going to throw away all this work anyway, so // don't bother building dependent types. @@ -2146,7 +1957,7 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU, // This is inefficient because all the dependent types will be rebuilt // from scratch, including building them in type units, discovering that // they depend on addresses, throwing them out and rebuilding them. - CU.constructTypeDIE(RefDie, CTy); + CU.constructTypeDIE(RefDie, cast<DICompositeType>(CTy)); return; } @@ -2165,27 +1976,23 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU, void DwarfDebug::addAccelName(StringRef Name, const DIE &Die) { if (!useDwarfAccelTables()) return; - AccelNames.AddName(Name, InfoHolder.getStringPool().getSymbol(*Asm, Name), - &Die); + AccelNames.AddName(InfoHolder.getStringPool().getEntry(*Asm, Name), &Die); } void DwarfDebug::addAccelObjC(StringRef Name, const DIE &Die) { if (!useDwarfAccelTables()) return; - AccelObjC.AddName(Name, InfoHolder.getStringPool().getSymbol(*Asm, Name), - &Die); + AccelObjC.AddName(InfoHolder.getStringPool().getEntry(*Asm, Name), &Die); } void DwarfDebug::addAccelNamespace(StringRef Name, const DIE &Die) { if (!useDwarfAccelTables()) return; - AccelNamespace.AddName(Name, InfoHolder.getStringPool().getSymbol(*Asm, Name), - &Die); + AccelNamespace.AddName(InfoHolder.getStringPool().getEntry(*Asm, Name), &Die); } void DwarfDebug::addAccelType(StringRef Name, const DIE &Die, char Flags) { if (!useDwarfAccelTables()) return; - AccelTypes.AddName(Name, InfoHolder.getStringPool().getSymbol(*Asm, Name), - &Die); + AccelTypes.AddName(InfoHolder.getStringPool().getEntry(*Asm, Name), &Die); } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h index a1a9426..700f736 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -16,11 +16,11 @@ #include "AsmPrinterHandler.h" #include "DbgValueHistoryCalculator.h" -#include "DebugLocEntry.h" -#include "DebugLocList.h" +#include "DebugLocStream.h" #include "DwarfAccelTable.h" #include "DwarfFile.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallPtrSet.h" @@ -41,6 +41,7 @@ class AsmPrinter; class ByteStreamer; class ConstantInt; class ConstantFP; +class DebugLocEntry; class DwarfCompileUnit; class DwarfDebug; class DwarfTypeUnit; @@ -67,81 +68,111 @@ public: //===----------------------------------------------------------------------===// /// \brief This class is used to track local variable information. +/// +/// - Variables whose location changes over time have a DebugLocListIndex and +/// the other fields are not used. +/// +/// - Variables that are described by multiple MMI table entries have multiple +/// expressions and frame indices. class DbgVariable { - DIVariable Var; // Variable Descriptor. - DIExpression Expr; // Complex address location expression. - DIE *TheDIE; // Variable DIE. - unsigned DotDebugLocOffset; // Offset in DotDebugLocEntries. - const MachineInstr *MInsn; // DBG_VALUE instruction of the variable. - int FrameIndex; + const DILocalVariable *Var; /// Variable Descriptor. + const DILocation *IA; /// Inlined at location. + SmallVector<const DIExpression *, 1> + Expr; /// Complex address location expression. + DIE *TheDIE; /// Variable DIE. + unsigned DebugLocListIndex; /// Offset in DebugLocs. + const MachineInstr *MInsn; /// DBG_VALUE instruction of the variable. + SmallVector<int, 1> FrameIndex; /// Frame index of the variable. DwarfDebug *DD; public: - /// Construct a DbgVariable from a DIVariable. - DbgVariable(DIVariable V, DIExpression E, DwarfDebug *DD) - : Var(V), Expr(E), TheDIE(nullptr), DotDebugLocOffset(~0U), - MInsn(nullptr), FrameIndex(~0), DD(DD) { - assert(Var.Verify() && Expr.Verify()); + /// Construct a DbgVariable from a variable. + DbgVariable(const DILocalVariable *V, const DILocation *IA, + const DIExpression *E, DwarfDebug *DD, int FI = ~0) + : Var(V), IA(IA), Expr(1, E), TheDIE(nullptr), DebugLocListIndex(~0U), + MInsn(nullptr), DD(DD) { + FrameIndex.push_back(FI); + assert(!E || E->isValid()); } /// Construct a DbgVariable from a DEBUG_VALUE. /// AbstractVar may be NULL. DbgVariable(const MachineInstr *DbgValue, DwarfDebug *DD) - : Var(DbgValue->getDebugVariable()), Expr(DbgValue->getDebugExpression()), - TheDIE(nullptr), DotDebugLocOffset(~0U), MInsn(DbgValue), - FrameIndex(~0), DD(DD) {} + : Var(DbgValue->getDebugVariable()), + IA(DbgValue->getDebugLoc()->getInlinedAt()), + Expr(1, DbgValue->getDebugExpression()), TheDIE(nullptr), + DebugLocListIndex(~0U), MInsn(DbgValue), DD(DD) { + FrameIndex.push_back(~0); + } // Accessors. - DIVariable getVariable() const { return Var; } - DIExpression getExpression() const { return Expr; } + const DILocalVariable *getVariable() const { return Var; } + const DILocation *getInlinedAt() const { return IA; } + const ArrayRef<const DIExpression *> getExpression() const { return Expr; } void setDIE(DIE &D) { TheDIE = &D; } DIE *getDIE() const { return TheDIE; } - void setDotDebugLocOffset(unsigned O) { DotDebugLocOffset = O; } - unsigned getDotDebugLocOffset() const { return DotDebugLocOffset; } - StringRef getName() const { return Var.getName(); } + void setDebugLocListIndex(unsigned O) { DebugLocListIndex = O; } + unsigned getDebugLocListIndex() const { return DebugLocListIndex; } + StringRef getName() const { return Var->getName(); } const MachineInstr *getMInsn() const { return MInsn; } - int getFrameIndex() const { return FrameIndex; } - void setFrameIndex(int FI) { FrameIndex = FI; } + const ArrayRef<int> getFrameIndex() const { return FrameIndex; } + + void addMMIEntry(const DbgVariable &V) { + assert(DebugLocListIndex == ~0U && !MInsn && "not an MMI entry"); + assert(V.DebugLocListIndex == ~0U && !V.MInsn && "not an MMI entry"); + assert(V.Var == Var && "conflicting variable"); + assert(V.IA == IA && "conflicting inlined-at location"); + + if (V.getFrameIndex().back() != ~0) { + auto E = V.getExpression(); + auto FI = V.getFrameIndex(); + Expr.append(E.begin(), E.end()); + FrameIndex.append(FI.begin(), FI.end()); + } + assert(Expr.size() > 1 ? std::all_of(Expr.begin(), Expr.end(), + [](const DIExpression *E) { + return E->isBitPiece(); + }) + : (true && "conflicting locations for variable")); + } + // Translate tag to proper Dwarf tag. dwarf::Tag getTag() const { - if (Var.getTag() == dwarf::DW_TAG_arg_variable) + if (Var->getTag() == dwarf::DW_TAG_arg_variable) return dwarf::DW_TAG_formal_parameter; return dwarf::DW_TAG_variable; } /// \brief Return true if DbgVariable is artificial. bool isArtificial() const { - if (Var.isArtificial()) + if (Var->isArtificial()) return true; - if (getType().isArtificial()) + if (getType()->isArtificial()) return true; return false; } bool isObjectPointer() const { - if (Var.isObjectPointer()) + if (Var->isObjectPointer()) return true; - if (getType().isObjectPointer()) + if (getType()->isObjectPointer()) return true; return false; } bool variableHasComplexAddress() const { - assert(Var.isVariable() && "Invalid complex DbgVariable!"); - return Expr.getNumElements() > 0; + assert(Var && "Invalid complex DbgVariable!"); + assert(Expr.size() == 1 && + "variableHasComplexAddress() invoked on multi-FI variable"); + return Expr.back()->getNumElements() > 0; } bool isBlockByrefVariable() const; - unsigned getNumAddrElements() const { - assert(Var.isVariable() && "Invalid complex DbgVariable!"); - return Expr.getNumElements(); - } - uint64_t getAddrElement(unsigned i) const { return Expr.getElement(i); } - DIType getType() const; + const DIType *getType() const; private: /// resolve - Look in the DwarfDebug map for the MDNode that /// corresponds to the reference. - template <typename T> T resolve(DIRef<T> Ref) const; + template <typename T> T *resolve(TypedDINodeRef<T> Ref) const; }; @@ -178,10 +209,6 @@ class DwarfDebug : public AsmPrinterHandler { // Size of each symbol emitted (for those symbols that have a specific size). DenseMap<const MCSymbol *, uint64_t> SymSize; - // Provides a unique id per text section. - typedef DenseMap<const MCSection *, SmallVector<SymbolCU, 8> > SectionMapType; - SectionMapType SectionMap; - LexicalScopes LScopes; // Collection of abstract variables. @@ -190,7 +217,7 @@ class DwarfDebug : public AsmPrinterHandler { // Collection of DebugLocEntry. Stored in a linked list so that DIELocLists // can refer to them in spite of insertions into this list. - SmallVector<DebugLocList, 4> DotDebugLocEntries; + DebugLocStream DebugLocs; // This is a collection of subprogram MDNodes that are processed to // create DIEs. @@ -224,25 +251,10 @@ class DwarfDebug : public AsmPrinterHandler { // If nonnull, stores the CU in which the previous subprogram was contained. const DwarfCompileUnit *PrevCU; - // Section Symbols: these are assembler temporary labels that are emitted at - // the beginning of each supported dwarf section. These are used to form - // section offsets and are created by EmitSectionLabels. - MCSymbol *DwarfInfoSectionSym, *DwarfAbbrevSectionSym; - MCSymbol *DwarfStrSectionSym, *TextSectionSym, *DwarfDebugRangeSectionSym; - MCSymbol *DwarfDebugLocSectionSym, *DwarfLineSectionSym, *DwarfAddrSectionSym; - MCSymbol *FunctionBeginSym, *FunctionEndSym; - MCSymbol *DwarfInfoDWOSectionSym, *DwarfAbbrevDWOSectionSym; - MCSymbol *DwarfTypesDWOSectionSym; - MCSymbol *DwarfStrDWOSectionSym; - MCSymbol *DwarfGnuPubNamesSectionSym, *DwarfGnuPubTypesSectionSym; - // As an optimization, there is no need to emit an entry in the directory // table for the same directory as DW_AT_comp_dir. StringRef CompilationDir; - // Counter for assigning globally unique IDs for ranges. - unsigned GlobalRangeCount; - // Holder for the file specific debug information. DwarfFile InfoHolder; @@ -258,7 +270,9 @@ class DwarfDebug : public AsmPrinterHandler { // them. DenseMap<const MDNode *, const DwarfTypeUnit *> DwarfTypeUnits; - SmallVector<std::pair<std::unique_ptr<DwarfTypeUnit>, DICompositeType>, 1> TypeUnitsUnderConstruction; + SmallVector< + std::pair<std::unique_ptr<DwarfTypeUnit>, const DICompositeType *>, 1> + TypeUnitsUnderConstruction; // Whether to emit the pubnames/pubtypes sections. bool HasDwarfPubSections; @@ -270,6 +284,9 @@ class DwarfDebug : public AsmPrinterHandler { // text. bool UsedNonDefaultText; + // Whether to use the GNU TLS opcode (instead of the standard opcode). + bool UseGNUTLSOpcode; + // Version of dwarf we're emitting. unsigned DwarfVersion; @@ -298,6 +315,7 @@ class DwarfDebug : public AsmPrinterHandler { // True iff there are multiple CUs in this module. bool SingleCU; bool IsDarwin; + bool IsPS4; AddressPool AddrPool; @@ -306,7 +324,7 @@ class DwarfDebug : public AsmPrinterHandler { DwarfAccelTable AccelNamespace; DwarfAccelTable AccelTypes; - DenseMap<const Function *, DISubprogram> FunctionDIs; + DenseMap<const Function *, DISubprogram *> FunctionDIs; MCDwarfDwoLineTable *getDwoLineTable(const DwarfCompileUnit &); @@ -314,22 +332,21 @@ class DwarfDebug : public AsmPrinterHandler { return InfoHolder.getUnits(); } + typedef DbgValueHistoryMap::InlinedVariable InlinedVariable; + /// \brief Find abstract variable associated with Var. - DbgVariable *getExistingAbstractVariable(const DIVariable &DV, - DIVariable &Cleansed); - DbgVariable *getExistingAbstractVariable(const DIVariable &DV); - void createAbstractVariable(const DIVariable &DV, LexicalScope *Scope); - void ensureAbstractVariableIsCreated(const DIVariable &Var, + DbgVariable *getExistingAbstractVariable(InlinedVariable IV, + const DILocalVariable *&Cleansed); + DbgVariable *getExistingAbstractVariable(InlinedVariable IV); + void createAbstractVariable(const DILocalVariable *DV, LexicalScope *Scope); + void ensureAbstractVariableIsCreated(InlinedVariable Var, const MDNode *Scope); - void ensureAbstractVariableIsCreatedIfScoped(const DIVariable &Var, + void ensureAbstractVariableIsCreatedIfScoped(InlinedVariable Var, const MDNode *Scope); /// \brief Construct a DIE for this abstract scope. void constructAbstractSubprogramScopeDIE(LexicalScope *Scope); - /// \brief Emit initial Dwarf sections with a label at the start of each one. - void emitSectionLabels(); - /// \brief Compute the size and offset of a DIE given an incoming Offset. unsigned computeSizeAndOffset(DIE *Die, unsigned Offset); @@ -347,23 +364,15 @@ class DwarfDebug : public AsmPrinterHandler { /// processed. void finalizeModuleInfo(); - /// \brief Emit labels to close any remaining sections that have been left - /// open. - void endSections(); - /// \brief Emit the debug info section. void emitDebugInfo(); /// \brief Emit the abbreviation section. void emitAbbreviations(); - /// \brief Emit the last address of the section and the end of - /// the line matrix. - void emitEndOfLineMatrix(unsigned SectionEnd); - /// \brief Emit a specified accelerator table. - void emitAccel(DwarfAccelTable &Accel, const MCSection *Section, - StringRef TableName, StringRef SymName); + void emitAccel(DwarfAccelTable &Accel, MCSection *Section, + StringRef TableName); /// \brief Emit visible names into a hashed accelerator table section. void emitAccelNames(); @@ -391,7 +400,7 @@ class DwarfDebug : public AsmPrinterHandler { void emitDebugPubTypes(bool GnuStyle = false); void emitDebugPubSection( - bool GnuStyle, const MCSection *PSec, StringRef Name, + bool GnuStyle, MCSection *PSec, StringRef Name, const StringMap<const DIE *> &(DwarfCompileUnit::*Accessor)() const); /// \brief Emit visible names into a debug str section. @@ -444,11 +453,11 @@ class DwarfDebug : public AsmPrinterHandler { /// \brief Create new DwarfCompileUnit for the given metadata node with tag /// DW_TAG_compile_unit. - DwarfCompileUnit &constructDwarfCompileUnit(DICompileUnit DIUnit); + DwarfCompileUnit &constructDwarfCompileUnit(const DICompileUnit *DIUnit); /// \brief Construct imported_module or imported_declaration DIE. void constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU, - const MDNode *N); + const DIImportedEntity *N); /// \brief Register a source line with debug info. Returns the unique /// label that was emitted and which provides correspondence to the @@ -461,8 +470,8 @@ class DwarfDebug : public AsmPrinterHandler { void identifyScopeMarkers(); /// \brief Populate LexicalScope entries with variables' info. - void collectVariableInfo(DwarfCompileUnit &TheCU, DISubprogram SP, - SmallPtrSetImpl<const MDNode *> &ProcessedVars); + void collectVariableInfo(DwarfCompileUnit &TheCU, const DISubprogram *SP, + DenseSet<InlinedVariable> &ProcessedVars); /// \brief Build the location list for all DBG_VALUEs in the /// function that describe the same variable. @@ -471,7 +480,7 @@ class DwarfDebug : public AsmPrinterHandler { /// \brief Collect variable information from the side table maintained /// by MMI. - void collectVariableInfoFromMMITable(SmallPtrSetImpl<const MDNode *> &P); + void collectVariableInfoFromMMITable(DenseSet<InlinedVariable> &P); /// \brief Ensure that a label will be emitted before MI. void requestLabelBeforeInsn(const MachineInstr *MI) { @@ -513,7 +522,7 @@ public: /// \brief Add a DIE to the set of types that we're going to pull into /// type units. void addDwarfTypeUnitType(DwarfCompileUnit &CU, StringRef Identifier, - DIE &Die, DICompositeType CTy); + DIE &Die, const DICompositeType *CTy); /// \brief Add a label so that arange data can be generated for it. void addArangeLabel(SymbolCU SCU) { ArangeLabels.push_back(SCU); } @@ -524,8 +533,9 @@ public: SymSize[Sym] = Size; } - /// \brief Recursively Emits a debug information entry. - void emitDIE(DIE &Die); + /// \brief Returns whether to use DW_OP_GNU_push_tls_address, instead of the + /// standard DW_OP_form_tls_address opcode + bool useGNUTLSOpcode() const { return UseGNUTLSOpcode; } // Experimental DWARF5 features. @@ -540,42 +550,23 @@ public: /// Returns the Dwarf Version. unsigned getDwarfVersion() const { return DwarfVersion; } - /// Returns the section symbol for the .debug_loc section. - MCSymbol *getDebugLocSym() const { return DwarfDebugLocSectionSym; } - - /// Returns the section symbol for the .debug_str section. - MCSymbol *getDebugStrSym() const { return DwarfStrSectionSym; } - - /// Returns the section symbol for the .debug_ranges section. - MCSymbol *getRangeSectionSym() const { return DwarfDebugRangeSectionSym; } - /// Returns the previous CU that was being updated const DwarfCompileUnit *getPrevCU() const { return PrevCU; } void setPrevCU(const DwarfCompileUnit *PrevCU) { this->PrevCU = PrevCU; } /// Returns the entries for the .debug_loc section. - const SmallVectorImpl<DebugLocList> & - getDebugLocEntries() const { - return DotDebugLocEntries; - } + const DebugLocStream &getDebugLocs() const { return DebugLocs; } /// \brief Emit an entry for the debug loc section. This can be used to /// handle an entry that's going to be emitted into the debug loc section. - void emitDebugLocEntry(ByteStreamer &Streamer, const DebugLocEntry &Entry); - /// \brief emit a single value for the debug loc section. - void emitDebugLocValue(ByteStreamer &Streamer, - const DebugLocEntry::Value &Value, - unsigned PieceOffsetInBits = 0); - /// Emits an optimal (=sorted) sequence of DW_OP_pieces. - void emitLocPieces(ByteStreamer &Streamer, - const DITypeIdentifierMap &Map, - ArrayRef<DebugLocEntry::Value> Values); + void emitDebugLocEntry(ByteStreamer &Streamer, + const DebugLocStream::Entry &Entry); /// Emit the location for a debug loc entry, including the size header. - void emitDebugLocEntryLocation(const DebugLocEntry &Entry); + void emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry); /// Find the MDNode for the given reference. - template <typename T> T resolve(DIRef<T> Ref) const { + template <typename T> T *resolve(TypedDINodeRef<T> Ref) const { return Ref.resolve(TypeIdentifierMap); } @@ -592,7 +583,7 @@ public: /// or another context nested inside a subprogram. bool isSubprogramContext(const MDNode *Context); - void addSubprogramNames(DISubprogram SP, DIE &Die); + void addSubprogramNames(const DISubprogram *SP, DIE &Die); AddressPool &getAddressPool() { return AddrPool; } @@ -605,8 +596,6 @@ public: void addAccelType(StringRef Name, const DIE &Die, char Flags); const MachineFunction *getCurrentFunction() const { return CurFn; } - const MCSymbol *getFunctionBeginSym() const { return FunctionBeginSym; } - const MCSymbol *getFunctionEndSym() const { return FunctionEndSym; } iterator_range<ImportedEntityMap::const_iterator> findImportedEntitiesForScope(const MDNode *Scope) const { @@ -626,12 +615,6 @@ public: /// \brief Return Label immediately following the instruction. MCSymbol *getLabelAfterInsn(const MachineInstr *MI); - // FIXME: Consider rolling ranges up into DwarfDebug since we use a single - // range_base anyway, so there's no need to keep them as separate per-CU range - // lists. (though one day we might end up with a range.dwo section, in which - // case it'd go to DwarfFile) - unsigned getNextRangeNumber() { return GlobalRangeCount++; } - // FIXME: Sink these functions down into DwarfFile/Dwarf*Unit. SmallPtrSet<const MDNode *, 16> &getProcessedSPNodes() { diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h index e8867c0a..a4fd36f 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h @@ -21,17 +21,24 @@ namespace llvm { class MachineFunction; class ARMTargetStreamer; -class DwarfCFIException : public EHStreamer { - /// shouldEmitPersonality - Per-function flag to indicate if .cfi_personality - /// should be emitted. +class DwarfCFIExceptionBase : public EHStreamer { +protected: + DwarfCFIExceptionBase(AsmPrinter *A); + + /// Per-function flag to indicate if frame CFI info should be emitted. + bool shouldEmitCFI; + + void markFunctionEnd() override; +}; + +class DwarfCFIException : public DwarfCFIExceptionBase { + /// Per-function flag to indicate if .cfi_personality should be emitted. bool shouldEmitPersonality; - /// shouldEmitLSDA - Per-function flag to indicate if .cfi_lsda - /// should be emitted. + /// Per-function flag to indicate if .cfi_lsda should be emitted. bool shouldEmitLSDA; - /// shouldEmitMoves - Per-function flag to indicate if frame moves info - /// should be emitted. + /// Per-function flag to indicate if frame moves info should be emitted. bool shouldEmitMoves; AsmPrinter::CFIMoveType moveTypeModule; @@ -41,44 +48,38 @@ public: // Main entry points. // DwarfCFIException(AsmPrinter *A); - virtual ~DwarfCFIException(); + ~DwarfCFIException() override; - /// endModule - Emit all exception information that should come after the - /// content. + /// Emit all exception information that should come after the content. void endModule() override; - /// beginFunction - Gather pre-function exception information. Assumes being - /// emitted immediately after the function entry point. + /// Gather pre-function exception information. Assumes being emitted + /// immediately after the function entry point. void beginFunction(const MachineFunction *MF) override; - /// endFunction - Gather and emit post-function exception information. + /// Gather and emit post-function exception information. void endFunction(const MachineFunction *) override; }; -class ARMException : public EHStreamer { +class ARMException : public DwarfCFIExceptionBase { void emitTypeInfos(unsigned TTypeEncoding) override; ARMTargetStreamer &getTargetStreamer(); - /// shouldEmitCFI - Per-function flag to indicate if frame CFI info - /// should be emitted. - bool shouldEmitCFI; - public: //===--------------------------------------------------------------------===// // Main entry points. // ARMException(AsmPrinter *A); - virtual ~ARMException(); + ~ARMException() override; - /// endModule - Emit all exception information that should come after the - /// content. + /// Emit all exception information that should come after the content. void endModule() override; - /// beginFunction - Gather pre-function exception information. Assumes being - /// emitted immediately after the function entry point. + /// Gather pre-function exception information. Assumes being emitted + /// immediately after the function entry point. void beginFunction(const MachineFunction *MF) override; - /// endFunction - Gather and emit post-function exception information. + /// Gather and emit post-function exception information. void endFunction(const MachineFunction *) override; }; } // End of namespace llvm diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp index 8e85eff..a2799b8 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp @@ -22,14 +22,6 @@ using namespace llvm; -const TargetRegisterInfo *DwarfExpression::getTRI() const { - return AP.TM.getSubtargetImpl()->getRegisterInfo(); -} - -unsigned DwarfExpression::getDwarfVersion() const { - return AP.getDwarfDebug()->getDwarfVersion(); -} - void DwarfExpression::AddReg(int DwarfReg, const char *Comment) { assert(DwarfReg >= 0 && "invalid negative dwarf register number"); if (DwarfReg < 32) { @@ -74,25 +66,28 @@ void DwarfExpression::AddShr(unsigned ShiftBy) { } bool DwarfExpression::AddMachineRegIndirect(unsigned MachineReg, int Offset) { - int DwarfReg = getTRI()->getDwarfRegNum(MachineReg, false); - if (DwarfReg < 0) - return false; - if (isFrameRegister(MachineReg)) { // If variable offset is based in frame register then use fbreg. EmitOp(dwarf::DW_OP_fbreg); EmitSigned(Offset); - } else { - AddRegIndirect(DwarfReg, Offset); + return true; } + + int DwarfReg = TRI.getDwarfRegNum(MachineReg, false); + if (DwarfReg < 0) + return false; + + AddRegIndirect(DwarfReg, Offset); return true; } bool DwarfExpression::AddMachineRegPiece(unsigned MachineReg, unsigned PieceSizeInBits, unsigned PieceOffsetInBits) { - const TargetRegisterInfo *TRI = getTRI(); - int Reg = TRI->getDwarfRegNum(MachineReg, false); + if (!TRI.isPhysicalRegister(MachineReg)) + return false; + + int Reg = TRI.getDwarfRegNum(MachineReg, false); // If this is a valid register number, emit it. if (Reg >= 0) { @@ -104,12 +99,12 @@ bool DwarfExpression::AddMachineRegPiece(unsigned MachineReg, // Walk up the super-register chain until we find a valid number. // For example, EAX on x86_64 is a 32-bit piece of RAX with offset 0. - for (MCSuperRegIterator SR(MachineReg, TRI); SR.isValid(); ++SR) { - Reg = TRI->getDwarfRegNum(*SR, false); + for (MCSuperRegIterator SR(MachineReg, &TRI); SR.isValid(); ++SR) { + Reg = TRI.getDwarfRegNum(*SR, false); if (Reg >= 0) { - unsigned Idx = TRI->getSubRegIndex(*SR, MachineReg); - unsigned Size = TRI->getSubRegIdxSize(Idx); - unsigned RegOffset = TRI->getSubRegIdxOffset(Idx); + unsigned Idx = TRI.getSubRegIndex(*SR, MachineReg); + unsigned Size = TRI.getSubRegIdxSize(Idx); + unsigned RegOffset = TRI.getSubRegIdxOffset(Idx); AddReg(Reg, "super-register"); if (PieceOffsetInBits == RegOffset) { AddOpPiece(Size, RegOffset); @@ -133,15 +128,15 @@ bool DwarfExpression::AddMachineRegPiece(unsigned MachineReg, // efficient DW_OP_piece. unsigned CurPos = PieceOffsetInBits; // The size of the register in bits, assuming 8 bits per byte. - unsigned RegSize = TRI->getMinimalPhysRegClass(MachineReg)->getSize() * 8; + unsigned RegSize = TRI.getMinimalPhysRegClass(MachineReg)->getSize() * 8; // Keep track of the bits in the register we already emitted, so we // can avoid emitting redundant aliasing subregs. SmallBitVector Coverage(RegSize, false); - for (MCSubRegIterator SR(MachineReg, TRI); SR.isValid(); ++SR) { - unsigned Idx = TRI->getSubRegIndex(MachineReg, *SR); - unsigned Size = TRI->getSubRegIdxSize(Idx); - unsigned Offset = TRI->getSubRegIdxOffset(Idx); - Reg = TRI->getDwarfRegNum(*SR, false); + for (MCSubRegIterator SR(MachineReg, &TRI); SR.isValid(); ++SR) { + unsigned Idx = TRI.getSubRegIndex(MachineReg, *SR); + unsigned Size = TRI.getSubRegIdxSize(Idx); + unsigned Offset = TRI.getSubRegIdxOffset(Idx); + Reg = TRI.getDwarfRegNum(*SR, false); // Intersection between the bits we already emitted and the bits // covered by this subregister. @@ -177,7 +172,7 @@ void DwarfExpression::AddSignedConstant(int Value) { // value, so the producers and consumers started to rely on heuristics // to disambiguate the value vs. location status of the expression. // See PR21176 for more details. - if (getDwarfVersion() >= 4) + if (DwarfVersion >= 4) EmitOp(dwarf::DW_OP_stack_value); } @@ -185,7 +180,7 @@ void DwarfExpression::AddUnsignedConstant(unsigned Value) { EmitOp(dwarf::DW_OP_constu); EmitUnsigned(Value); // cf. comment in DwarfExpression::AddSignedConstant(). - if (getDwarfVersion() >= 4) + if (DwarfVersion >= 4) EmitOp(dwarf::DW_OP_stack_value); } @@ -197,64 +192,74 @@ static unsigned getOffsetOrZero(unsigned OffsetInBits, return OffsetInBits; } -bool DwarfExpression::AddMachineRegExpression(DIExpression Expr, +bool DwarfExpression::AddMachineRegExpression(const DIExpression *Expr, unsigned MachineReg, unsigned PieceOffsetInBits) { - unsigned N = Expr.getNumElements(); - unsigned I = 0; - bool ValidReg = false; + auto I = Expr->expr_op_begin(); + auto E = Expr->expr_op_end(); + if (I == E) + return AddMachineRegPiece(MachineReg); + // Pattern-match combinations for which more efficient representations exist // first. - if (N >= 3 && Expr.getElement(0) == dwarf::DW_OP_piece) { - unsigned SizeOfByte = 8; - unsigned OffsetInBits = Expr.getElement(1) * SizeOfByte; - unsigned SizeInBits = Expr.getElement(2) * SizeOfByte; - ValidReg = - AddMachineRegPiece(MachineReg, SizeInBits, - getOffsetOrZero(OffsetInBits, PieceOffsetInBits)); - I = 3; - } else if (N >= 3 && Expr.getElement(0) == dwarf::DW_OP_plus && - Expr.getElement(2) == dwarf::DW_OP_deref) { + bool ValidReg = false; + switch (I->getOp()) { + case dwarf::DW_OP_bit_piece: { + unsigned OffsetInBits = I->getArg(0); + unsigned SizeInBits = I->getArg(1); + // Piece always comes at the end of the expression. + return AddMachineRegPiece(MachineReg, SizeInBits, + getOffsetOrZero(OffsetInBits, PieceOffsetInBits)); + } + case dwarf::DW_OP_plus: { // [DW_OP_reg,Offset,DW_OP_plus,DW_OP_deref] --> [DW_OP_breg,Offset]. - unsigned Offset = Expr.getElement(1); - ValidReg = AddMachineRegIndirect(MachineReg, Offset); - I = 3; - } else if (N >= 1 && Expr.getElement(0) == dwarf::DW_OP_deref) { - // [DW_OP_reg,DW_OP_deref] --> [DW_OP_breg]. - ValidReg = AddMachineRegIndirect(MachineReg); - I = 1; - } else - ValidReg = AddMachineRegPiece(MachineReg); + auto N = I.getNext(); + if (N != E && N->getOp() == dwarf::DW_OP_deref) { + unsigned Offset = I->getArg(0); + ValidReg = AddMachineRegIndirect(MachineReg, Offset); + std::advance(I, 2); + break; + } else + ValidReg = AddMachineRegPiece(MachineReg); + } + case dwarf::DW_OP_deref: { + // [DW_OP_reg,DW_OP_deref] --> [DW_OP_breg]. + ValidReg = AddMachineRegIndirect(MachineReg); + ++I; + break; + } + default: + llvm_unreachable("unsupported operand"); + } if (!ValidReg) return false; // Emit remaining elements of the expression. - AddExpression(Expr, I); + AddExpression(I, E, PieceOffsetInBits); return true; } -void DwarfExpression::AddExpression(DIExpression Expr, unsigned I, +void DwarfExpression::AddExpression(DIExpression::expr_op_iterator I, + DIExpression::expr_op_iterator E, unsigned PieceOffsetInBits) { - unsigned N = Expr.getNumElements(); - for (; I < N; ++I) { - switch (Expr.getElement(I)) { - case dwarf::DW_OP_piece: { - unsigned SizeOfByte = 8; - unsigned OffsetInBits = Expr.getElement(++I) * SizeOfByte; - unsigned SizeInBits = Expr.getElement(++I) * SizeOfByte; + for (; I != E; ++I) { + switch (I->getOp()) { + case dwarf::DW_OP_bit_piece: { + unsigned OffsetInBits = I->getArg(0); + unsigned SizeInBits = I->getArg(1); AddOpPiece(SizeInBits, getOffsetOrZero(OffsetInBits, PieceOffsetInBits)); break; } case dwarf::DW_OP_plus: EmitOp(dwarf::DW_OP_plus_uconst); - EmitUnsigned(Expr.getElement(++I)); + EmitUnsigned(I->getArg(0)); break; case dwarf::DW_OP_deref: EmitOp(dwarf::DW_OP_deref); break; default: - llvm_unreachable("unhandled opcode found in DIExpression"); + llvm_unreachable("unhandled opcode found in expression"); } } } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h index 92e4d5d..78ec937 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h @@ -30,21 +30,22 @@ class DIELoc; /// entry. class DwarfExpression { protected: - const AsmPrinter &AP; // Various convenience accessors that extract things out of AsmPrinter. - const TargetRegisterInfo *getTRI() const; - unsigned getDwarfVersion() const; + const TargetRegisterInfo &TRI; + unsigned DwarfVersion; public: - DwarfExpression(const AsmPrinter &AP) : AP(AP) {} + DwarfExpression(const TargetRegisterInfo &TRI, + unsigned DwarfVersion) + : TRI(TRI), DwarfVersion(DwarfVersion) {} virtual ~DwarfExpression() {} /// Output a dwarf operand and an optional assembler comment. virtual void EmitOp(uint8_t Op, const char *Comment = nullptr) = 0; /// Emit a raw signed value. - virtual void EmitSigned(int Value) = 0; + virtual void EmitSigned(int64_t Value) = 0; /// Emit a raw unsigned value. - virtual void EmitUnsigned(unsigned Value) = 0; + virtual void EmitUnsigned(uint64_t Value) = 0; /// Return whether the given machine register is the frame register in the /// current function. virtual bool isFrameRegister(unsigned MachineReg) = 0; @@ -87,17 +88,19 @@ public: /// Emit an unsigned constant. void AddUnsignedConstant(unsigned Value); - /// Emit an entire DIExpression on top of a machine register location. + /// \brief Emit an entire expression on top of a machine register location. + /// /// \param PieceOffsetInBits If this is one piece out of a fragmented /// location, this is the offset of the piece inside the entire variable. /// \return false if no DWARF register exists for MachineReg. - bool AddMachineRegExpression(DIExpression Expr, unsigned MachineReg, + bool AddMachineRegExpression(const DIExpression *Expr, unsigned MachineReg, unsigned PieceOffsetInBits = 0); - /// Emit a the operations in a DIExpression, starting from element I. + /// Emit a the operations remaining the DIExpressionIterator I. /// \param PieceOffsetInBits If this is one piece out of a fragmented /// location, this is the offset of the piece inside the entire variable. - void AddExpression(DIExpression Expr, unsigned PieceOffsetInBits = 0, - unsigned I = 0); + void AddExpression(DIExpression::expr_op_iterator I, + DIExpression::expr_op_iterator E, + unsigned PieceOffsetInBits = 0); }; /// DwarfExpression implementation for .debug_loc entries. @@ -105,27 +108,27 @@ class DebugLocDwarfExpression : public DwarfExpression { ByteStreamer &BS; public: - DebugLocDwarfExpression(const AsmPrinter &AP, ByteStreamer &BS) - : DwarfExpression(AP), BS(BS) {} + DebugLocDwarfExpression(const TargetRegisterInfo &TRI, + unsigned DwarfVersion, ByteStreamer &BS) + : DwarfExpression(TRI, DwarfVersion), BS(BS) {} void EmitOp(uint8_t Op, const char *Comment = nullptr) override; - void EmitSigned(int Value) override; - void EmitUnsigned(unsigned Value) override; + void EmitSigned(int64_t Value) override; + void EmitUnsigned(uint64_t Value) override; bool isFrameRegister(unsigned MachineReg) override; }; /// DwarfExpression implementation for singular DW_AT_location. class DIEDwarfExpression : public DwarfExpression { +const AsmPrinter &AP; DwarfUnit &DU; DIELoc &DIE; public: - DIEDwarfExpression(const AsmPrinter &AP, DwarfUnit &DU, DIELoc &DIE) - : DwarfExpression(AP), DU(DU), DIE(DIE) {} - + DIEDwarfExpression(const AsmPrinter &AP, DwarfUnit &DU, DIELoc &DIE); void EmitOp(uint8_t Op, const char *Comment = nullptr) override; - void EmitSigned(int Value) override; - void EmitUnsigned(unsigned Value) override; + void EmitSigned(int64_t Value) override; + void EmitUnsigned(uint64_t Value) override; bool isFrameRegister(unsigned MachineReg) override; }; } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp index 549abf8..10b58d4 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp @@ -17,9 +17,8 @@ #include "llvm/Target/TargetLoweringObjectFile.h" namespace llvm { -DwarfFile::DwarfFile(AsmPrinter *AP, DwarfDebug &DD, StringRef Pref, - BumpPtrAllocator &DA) - : Asm(AP), DD(DD), StrPool(DA, *Asm, Pref) {} +DwarfFile::DwarfFile(AsmPrinter *AP, StringRef Pref, BumpPtrAllocator &DA) + : Asm(AP), StrPool(DA, *Asm, Pref) {} DwarfFile::~DwarfFile() {} @@ -48,15 +47,15 @@ void DwarfFile::addUnit(std::unique_ptr<DwarfUnit> U) { // Emit the various dwarf units to the unit section USection with // the abbreviations going into ASection. -void DwarfFile::emitUnits(const MCSymbol *ASectionSym) { +void DwarfFile::emitUnits(bool UseOffsets) { for (const auto &TheU : CUs) { DIE &Die = TheU->getUnitDie(); - const MCSection *USection = TheU->getSection(); - Asm->OutStreamer.SwitchSection(USection); + MCSection *USection = TheU->getSection(); + Asm->OutStreamer->SwitchSection(USection); - TheU->emitHeader(ASectionSym); + TheU->emitHeader(UseOffsets); - DD.emitDIE(Die); + Asm->emitDwarfDIE(Die); } } @@ -120,37 +119,26 @@ unsigned DwarfFile::computeSizeAndOffset(DIE &Die, unsigned Offset) { Die.setSize(Offset - Die.getOffset()); return Offset; } -void DwarfFile::emitAbbrevs(const MCSection *Section) { + +void DwarfFile::emitAbbrevs(MCSection *Section) { // Check to see if it is worth the effort. if (!Abbreviations.empty()) { // Start the debug abbrev section. - Asm->OutStreamer.SwitchSection(Section); - - // For each abbrevation. - for (const DIEAbbrev *Abbrev : Abbreviations) { - // Emit the abbrevations code (base 1 index.) - Asm->EmitULEB128(Abbrev->getNumber(), "Abbreviation Code"); - - // Emit the abbreviations data. - Abbrev->Emit(Asm); - } - - // Mark end of abbreviations. - Asm->EmitULEB128(0, "EOM(3)"); + Asm->OutStreamer->SwitchSection(Section); + Asm->emitDwarfAbbrevs(Abbreviations); } } // Emit strings into a string section. -void DwarfFile::emitStrings(const MCSection *StrSection, - const MCSection *OffsetSection) { +void DwarfFile::emitStrings(MCSection *StrSection, MCSection *OffsetSection) { StrPool.emit(*Asm, StrSection, OffsetSection); } -void DwarfFile::addScopeVariable(LexicalScope *LS, DbgVariable *Var) { +bool DwarfFile::addScopeVariable(LexicalScope *LS, DbgVariable *Var) { SmallVectorImpl<DbgVariable *> &Vars = ScopeVariables[LS]; - DIVariable DV = Var->getVariable(); + const DILocalVariable *DV = Var->getVariable(); // Variables with positive arg numbers are parameters. - if (unsigned ArgNum = DV.getArgNumber()) { + if (unsigned ArgNum = DV->getArg()) { // Keep all parameters in order at the start of the variable list to ensure // function types are correct (no out-of-order parameters) // @@ -160,7 +148,7 @@ void DwarfFile::addScopeVariable(LexicalScope *LS, DbgVariable *Var) { // rather than linear search. auto I = Vars.begin(); while (I != Vars.end()) { - unsigned CurNum = (*I)->getVariable().getArgNumber(); + unsigned CurNum = (*I)->getVariable()->getArg(); // A local (non-parameter) variable has been found, insert immediately // before it. if (CurNum == 0) @@ -168,18 +156,17 @@ void DwarfFile::addScopeVariable(LexicalScope *LS, DbgVariable *Var) { // A later indexed parameter has been found, insert immediately before it. if (CurNum > ArgNum) break; - // FIXME: There are still some cases where two inlined functions are - // conflated together (two calls to the same function at the same - // location (eg: via a macro, or without column info, etc)) and then - // their arguments are conflated as well. - assert((LS->getParent() || CurNum != ArgNum) && - "Duplicate argument for top level (non-inlined) function"); + if (CurNum == ArgNum) { + (*I)->addMMIEntry(*Var); + return false; + } ++I; } Vars.insert(I, Var); - return; + return true; } Vars.push_back(Var); + return true; } } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h index f14d673..532ed96 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h @@ -28,17 +28,15 @@ class DwarfUnit; class DIEAbbrev; class MCSymbol; class DIE; -class DISubprogram; class LexicalScope; class StringRef; class DwarfDebug; class MCSection; +class MDNode; class DwarfFile { // Target of Dwarf emission, used for sizing of abbreviations. AsmPrinter *Asm; - DwarfDebug ⅅ - // Used to uniquely define abbreviations. FoldingSet<DIEAbbrev> AbbreviationsSet; @@ -59,11 +57,10 @@ class DwarfFile { /// Maps MDNodes for type system with the corresponding DIEs. These DIEs can /// be shared across CUs, that is why we keep the map here instead /// of in DwarfCompileUnit. - DenseMap<const MDNode *, DIE *> MDTypeNodeToDieMap; + DenseMap<const MDNode *, DIE *> DITypeNodeToDieMap; public: - DwarfFile(AsmPrinter *AP, DwarfDebug &DD, StringRef Pref, - BumpPtrAllocator &DA); + DwarfFile(AsmPrinter *AP, StringRef Pref, BumpPtrAllocator &DA); ~DwarfFile(); @@ -83,19 +80,19 @@ public: /// \brief Emit all of the units to the section listed with the given /// abbreviation section. - void emitUnits(const MCSymbol *ASectionSym); + void emitUnits(bool UseOffsets); /// \brief Emit a set of abbreviations to the specific section. - void emitAbbrevs(const MCSection *); + void emitAbbrevs(MCSection *); /// \brief Emit all of the strings to the section given. - void emitStrings(const MCSection *StrSection, - const MCSection *OffsetSection = nullptr); + void emitStrings(MCSection *StrSection, MCSection *OffsetSection = nullptr); /// \brief Returns the string pool. DwarfStringPool &getStringPool() { return StrPool; } - void addScopeVariable(LexicalScope *LS, DbgVariable *Var); + /// \returns false if the variable was merged with a previous one. + bool addScopeVariable(LexicalScope *LS, DbgVariable *Var); DenseMap<LexicalScope *, SmallVector<DbgVariable *, 8>> &getScopeVariables() { return ScopeVariables; @@ -106,10 +103,10 @@ public: } void insertDIE(const MDNode *TypeMD, DIE *Die) { - MDTypeNodeToDieMap.insert(std::make_pair(TypeMD, Die)); + DITypeNodeToDieMap.insert(std::make_pair(TypeMD, Die)); } DIE *getDIE(const MDNode *TypeMD) { - return MDTypeNodeToDieMap.lookup(TypeMD); + return DITypeNodeToDieMap.lookup(TypeMD); } }; } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp index d76b66c..2066f74 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp @@ -8,63 +8,67 @@ //===----------------------------------------------------------------------===// #include "DwarfStringPool.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCStreamer.h" using namespace llvm; -static std::pair<MCSymbol *, unsigned> & -getEntry(AsmPrinter &Asm, - StringMap<std::pair<MCSymbol *, unsigned>, BumpPtrAllocator &> &Pool, - StringRef Prefix, StringRef Str) { - std::pair<MCSymbol *, unsigned> &Entry = Pool[Str]; - if (!Entry.first) { - Entry.second = Pool.size() - 1; - Entry.first = Asm.GetTempSymbol(Prefix, Entry.second); - } - return Entry; -} +DwarfStringPool::DwarfStringPool(BumpPtrAllocator &A, AsmPrinter &Asm, + StringRef Prefix) + : Pool(A), Prefix(Prefix), + ShouldCreateSymbols(Asm.MAI->doesDwarfUseRelocationsAcrossSections()) {} -MCSymbol *DwarfStringPool::getSymbol(AsmPrinter &Asm, StringRef Str) { - return getEntry(Asm, Pool, Prefix, Str).first; -} +DwarfStringPool::EntryRef DwarfStringPool::getEntry(AsmPrinter &Asm, + StringRef Str) { + auto I = Pool.insert(std::make_pair(Str, EntryTy())); + if (I.second) { + auto &Entry = I.first->second; + Entry.Index = Pool.size() - 1; + Entry.Offset = NumBytes; + Entry.Symbol = ShouldCreateSymbols ? Asm.createTempSymbol(Prefix) : nullptr; -unsigned DwarfStringPool::getIndex(AsmPrinter &Asm, StringRef Str) { - return getEntry(Asm, Pool, Prefix, Str).second; + NumBytes += Str.size() + 1; + assert(NumBytes > Entry.Offset && "Unexpected overflow"); + } + return EntryRef(*I.first); } -void DwarfStringPool::emit(AsmPrinter &Asm, const MCSection *StrSection, - const MCSection *OffsetSection) { +void DwarfStringPool::emit(AsmPrinter &Asm, MCSection *StrSection, + MCSection *OffsetSection) { if (Pool.empty()) return; // Start the dwarf str section. - Asm.OutStreamer.SwitchSection(StrSection); + Asm.OutStreamer->SwitchSection(StrSection); // Get all of the string pool entries and put them in an array by their ID so // we can sort them. - SmallVector<const StringMapEntry<std::pair<MCSymbol *, unsigned>> *, 64> - Entries(Pool.size()); + SmallVector<const StringMapEntry<EntryTy> *, 64> Entries(Pool.size()); for (const auto &E : Pool) - Entries[E.getValue().second] = &E; + Entries[E.getValue().Index] = &E; for (const auto &Entry : Entries) { + assert(ShouldCreateSymbols == static_cast<bool>(Entry->getValue().Symbol) && + "Mismatch between setting and entry"); + // Emit a label for reference from debug information entries. - Asm.OutStreamer.EmitLabel(Entry->getValue().first); + if (ShouldCreateSymbols) + Asm.OutStreamer->EmitLabel(Entry->getValue().Symbol); // Emit the string itself with a terminating null byte. - Asm.OutStreamer.EmitBytes( + Asm.OutStreamer->AddComment("string offset=" + + Twine(Entry->getValue().Offset)); + Asm.OutStreamer->EmitBytes( StringRef(Entry->getKeyData(), Entry->getKeyLength() + 1)); } // If we've got an offset section go ahead and emit that now as well. if (OffsetSection) { - Asm.OutStreamer.SwitchSection(OffsetSection); - unsigned offset = 0; + Asm.OutStreamer->SwitchSection(OffsetSection); unsigned size = 4; // FIXME: DWARF64 is 8. - for (const auto &Entry : Entries) { - Asm.OutStreamer.EmitIntValue(offset, size); - offset += Entry->getKeyLength() + 1; - } + for (const auto &Entry : Entries) + Asm.OutStreamer->EmitIntValue(Entry->getValue().Offset, size); } } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h index 63e3412..93a1684 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h @@ -11,12 +11,13 @@ #define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFSTRINGPOOL_H #include "llvm/ADT/StringMap.h" -#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/DwarfStringPoolEntry.h" #include "llvm/Support/Allocator.h" #include <utility> namespace llvm { +class AsmPrinter; class MCSymbol; class MCSection; class StringRef; @@ -25,25 +26,24 @@ class StringRef; // A String->Symbol mapping of strings used by indirect // references. class DwarfStringPool { - StringMap<std::pair<MCSymbol *, unsigned>, BumpPtrAllocator &> Pool; + typedef DwarfStringPoolEntry EntryTy; + StringMap<EntryTy, BumpPtrAllocator &> Pool; StringRef Prefix; + unsigned NumBytes = 0; + bool ShouldCreateSymbols; public: - DwarfStringPool(BumpPtrAllocator &A, AsmPrinter &Asm, StringRef Prefix) - : Pool(A), Prefix(Prefix) {} + typedef DwarfStringPoolEntryRef EntryRef; - void emit(AsmPrinter &Asm, const MCSection *StrSection, - const MCSection *OffsetSection = nullptr); + DwarfStringPool(BumpPtrAllocator &A, AsmPrinter &Asm, StringRef Prefix); - /// \brief Returns an entry into the string pool with the given - /// string text. - MCSymbol *getSymbol(AsmPrinter &Asm, StringRef Str); - - /// \brief Returns the index into the string pool with the given - /// string text. - unsigned getIndex(AsmPrinter &Asm, StringRef Str); + void emit(AsmPrinter &Asm, MCSection *StrSection, + MCSection *OffsetSection = nullptr); bool empty() const { return Pool.empty(); } + + /// Get a reference to an entry in the string pool. + EntryRef getEntry(AsmPrinter &Asm, StringRef Str); }; } #endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 455258e..04836c6 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -17,6 +17,7 @@ #include "DwarfDebug.h" #include "DwarfExpression.h" #include "llvm/ADT/APFloat.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/DataLayout.h" @@ -43,23 +44,28 @@ GenerateDwarfTypeUnits("generate-type-units", cl::Hidden, cl::desc("Generate DWARF4 type units."), cl::init(false)); +DIEDwarfExpression::DIEDwarfExpression(const AsmPrinter &AP, DwarfUnit &DU, + DIELoc &DIE) + : DwarfExpression(*AP.MF->getSubtarget().getRegisterInfo(), + AP.getDwarfDebug()->getDwarfVersion()), + AP(AP), DU(DU), DIE(DIE) {} + void DIEDwarfExpression::EmitOp(uint8_t Op, const char* Comment) { DU.addUInt(DIE, dwarf::DW_FORM_data1, Op); } -void DIEDwarfExpression::EmitSigned(int Value) { +void DIEDwarfExpression::EmitSigned(int64_t Value) { DU.addSInt(DIE, dwarf::DW_FORM_sdata, Value); } -void DIEDwarfExpression::EmitUnsigned(unsigned Value) { +void DIEDwarfExpression::EmitUnsigned(uint64_t Value) { DU.addUInt(DIE, dwarf::DW_FORM_udata, Value); } bool DIEDwarfExpression::isFrameRegister(unsigned MachineReg) { - return MachineReg == getTRI()->getFrameRegister(*AP.MF); + return MachineReg == TRI.getFrameRegister(*AP.MF); } - -/// Unit - Unit constructor. -DwarfUnit::DwarfUnit(unsigned UID, dwarf::Tag UnitTag, DICompileUnit Node, - AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU) +DwarfUnit::DwarfUnit(unsigned UID, dwarf::Tag UnitTag, + const DICompileUnit *Node, AsmPrinter *A, DwarfDebug *DW, + DwarfFile *DWU) : UniqueID(UID), CUNode(Node), UnitDie(UnitTag), DebugInfoOffset(0), Asm(A), DD(DW), DU(DWU), IndexTyDie(nullptr), Section(nullptr) { assert(UnitTag == dwarf::DW_TAG_compile_unit || @@ -76,7 +82,6 @@ DwarfTypeUnit::DwarfTypeUnit(unsigned UID, DwarfCompileUnit &CU, AsmPrinter *A, addSectionOffset(UnitDie, dwarf::DW_AT_stmt_list, 0); } -/// ~Unit - Destructor for compile unit. DwarfUnit::~DwarfUnit() { for (unsigned j = 0, M = DIEBlocks.size(); j < M; ++j) DIEBlocks[j]->~DIEBlock(); @@ -84,15 +89,11 @@ DwarfUnit::~DwarfUnit() { DIELocs[j]->~DIELoc(); } -/// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug -/// information entry. DIEEntry *DwarfUnit::createDIEEntry(DIE &Entry) { DIEEntry *Value = new (DIEValueAllocator) DIEEntry(Entry); return Value; } -/// getDefaultLowerBound - Return the default lower bound for an array. If the -/// DWARF version doesn't handle the language, return -1. int64_t DwarfUnit::getDefaultLowerBound() const { switch (getLanguage()) { default: @@ -130,13 +131,37 @@ int64_t DwarfUnit::getDefaultLowerBound() const { if (dwarf::DWARF_VERSION >= 4) return 1; break; + + // The languages below have valid values only if the DWARF version >= 5. + case dwarf::DW_LANG_OpenCL: + case dwarf::DW_LANG_Go: + case dwarf::DW_LANG_Haskell: + case dwarf::DW_LANG_C_plus_plus_03: + case dwarf::DW_LANG_C_plus_plus_11: + case dwarf::DW_LANG_OCaml: + case dwarf::DW_LANG_Rust: + case dwarf::DW_LANG_C11: + case dwarf::DW_LANG_Swift: + case dwarf::DW_LANG_Dylan: + case dwarf::DW_LANG_C_plus_plus_14: + if (dwarf::DWARF_VERSION >= 5) + return 0; + break; + + case dwarf::DW_LANG_Modula3: + case dwarf::DW_LANG_Julia: + case dwarf::DW_LANG_Fortran03: + case dwarf::DW_LANG_Fortran08: + if (dwarf::DWARF_VERSION >= 5) + return 1; + break; } return -1; } /// Check whether the DIE for this MDNode can be shared across CUs. -static bool isShareableAcrossCUs(DIDescriptor D) { +static bool isShareableAcrossCUs(const DINode *D) { // When the MDNode can be part of the type system, the DIE can be shared // across CUs. // Combining type units and cross-CU DIE sharing is lower value (since @@ -144,25 +169,18 @@ static bool isShareableAcrossCUs(DIDescriptor D) { // level already) but may be implementable for some value in projects // building multiple independent libraries with LTO and then linking those // together. - return (D.isType() || - (D.isSubprogram() && !DISubprogram(D).isDefinition())) && + return (isa<DIType>(D) || + (isa<DISubprogram>(D) && !cast<DISubprogram>(D)->isDefinition())) && !GenerateDwarfTypeUnits; } -/// getDIE - Returns the debug information entry map slot for the -/// specified debug variable. We delegate the request to DwarfDebug -/// when the DIE for this MDNode can be shared across CUs. The mappings -/// will be kept in DwarfDebug for shareable DIEs. -DIE *DwarfUnit::getDIE(DIDescriptor D) const { +DIE *DwarfUnit::getDIE(const DINode *D) const { if (isShareableAcrossCUs(D)) return DU->getDIE(D); return MDNodeToDieMap.lookup(D); } -/// insertDIE - Insert DIE into the map. We delegate the request to DwarfDebug -/// when the DIE for this MDNode can be shared across CUs. The mappings -/// will be kept in DwarfDebug for shareable DIEs. -void DwarfUnit::insertDIE(DIDescriptor Desc, DIE *D) { +void DwarfUnit::insertDIE(const DINode *Desc, DIE *D) { if (isShareableAcrossCUs(Desc)) { DU->insertDIE(Desc, D); return; @@ -170,7 +188,6 @@ void DwarfUnit::insertDIE(DIDescriptor Desc, DIE *D) { MDNodeToDieMap.insert(std::make_pair(Desc, D)); } -/// addFlag - Add a flag that is true. void DwarfUnit::addFlag(DIE &Die, dwarf::Attribute Attribute) { if (DD->getDwarfVersion() >= 4) Die.addValue(Attribute, dwarf::DW_FORM_flag_present, DIEIntegerOne); @@ -178,8 +195,6 @@ void DwarfUnit::addFlag(DIE &Die, dwarf::Attribute Attribute) { Die.addValue(Attribute, dwarf::DW_FORM_flag, DIEIntegerOne); } -/// addUInt - Add an unsigned integer attribute data and value. -/// void DwarfUnit::addUInt(DIE &Die, dwarf::Attribute Attribute, Optional<dwarf::Form> Form, uint64_t Integer) { if (!Form) @@ -193,8 +208,6 @@ void DwarfUnit::addUInt(DIE &Block, dwarf::Form Form, uint64_t Integer) { addUInt(Block, (dwarf::Attribute)0, Form, Integer); } -/// addSInt - Add an signed integer attribute data and value. -/// void DwarfUnit::addSInt(DIE &Die, dwarf::Attribute Attribute, Optional<dwarf::Form> Form, int64_t Integer) { if (!Form) @@ -208,43 +221,14 @@ void DwarfUnit::addSInt(DIELoc &Die, Optional<dwarf::Form> Form, addSInt(Die, (dwarf::Attribute)0, Form, Integer); } -/// addString - Add a string attribute data and value. We always emit a -/// reference to the string pool instead of immediate strings so that DIEs have -/// more predictable sizes. In the case of split dwarf we emit an index -/// into another table which gets us the static offset into the string -/// table. void DwarfUnit::addString(DIE &Die, dwarf::Attribute Attribute, StringRef String) { - if (!isDwoUnit()) - return addLocalString(Die, Attribute, String); - - addIndexedString(Die, Attribute, String); -} - -void DwarfUnit::addIndexedString(DIE &Die, dwarf::Attribute Attribute, - StringRef String) { - unsigned idx = DU->getStringPool().getIndex(*Asm, String); - DIEValue *Value = new (DIEValueAllocator) DIEInteger(idx); - DIEValue *Str = new (DIEValueAllocator) DIEString(Value, String); - Die.addValue(Attribute, dwarf::DW_FORM_GNU_str_index, Str); -} - -/// addLocalString - Add a string attribute data and value. This is guaranteed -/// to be in the local string pool instead of indirected. -void DwarfUnit::addLocalString(DIE &Die, dwarf::Attribute Attribute, - StringRef String) { - MCSymbol *Symb = DU->getStringPool().getSymbol(*Asm, String); - DIEValue *Value; - if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) - Value = new (DIEValueAllocator) DIELabel(Symb); - else - Value = new (DIEValueAllocator) DIEDelta(Symb, DD->getDebugStrSym()); - DIEValue *Str = new (DIEValueAllocator) DIEString(Value, String); - Die.addValue(Attribute, dwarf::DW_FORM_strp, Str); + Die.addValue(Attribute, + isDwoUnit() ? dwarf::DW_FORM_GNU_str_index : dwarf::DW_FORM_strp, + new (DIEValueAllocator) + DIEString(DU->getStringPool().getEntry(*Asm, String))); } -/// addLabel - Add a Dwarf label attribute data and value. -/// void DwarfUnit::addLabel(DIE &Die, dwarf::Attribute Attribute, dwarf::Form Form, const MCSymbol *Label) { DIEValue *Value = new (DIEValueAllocator) DIELabel(Label); @@ -255,8 +239,6 @@ void DwarfUnit::addLabel(DIELoc &Die, dwarf::Form Form, const MCSymbol *Label) { addLabel(Die, (dwarf::Attribute)0, Form, Label); } -/// addSectionOffset - Add an offset into a section attribute data and value. -/// void DwarfUnit::addSectionOffset(DIE &Die, dwarf::Attribute Attribute, uint64_t Integer) { if (DD->getDwarfVersion() >= 4) @@ -270,9 +252,6 @@ unsigned DwarfTypeUnit::getOrCreateSourceID(StringRef FileName, StringRef DirNam : getCU().getOrCreateSourceID(FileName, DirName); } -/// addOpAddress - Add a dwarf op address data and value using the -/// form given and an op of either DW_FORM_addr or DW_FORM_GNU_addr_index. -/// void DwarfUnit::addOpAddress(DIELoc &Die, const MCSymbol *Sym) { if (!DD->useSplitDwarf()) { addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); @@ -290,8 +269,6 @@ void DwarfUnit::addLabelDelta(DIE &Die, dwarf::Attribute Attribute, Die.addValue(Attribute, dwarf::DW_FORM_data4, Value); } -/// addDIEEntry - Add a DIE attribute data and value. -/// void DwarfUnit::addDIEEntry(DIE &Die, dwarf::Attribute Attribute, DIE &Entry) { addDIEEntry(Die, Attribute, createDIEEntry(Entry)); } @@ -321,9 +298,7 @@ void DwarfUnit::addDIEEntry(DIE &Die, dwarf::Attribute Attribute, Entry); } -/// Create a DIE with the given Tag, add the DIE to its parent, and -/// call insertDIE if MD is not null. -DIE &DwarfUnit::createAndAddDIE(unsigned Tag, DIE &Parent, DIDescriptor N) { +DIE &DwarfUnit::createAndAddDIE(unsigned Tag, DIE &Parent, const DINode *N) { assert(Tag != dwarf::DW_TAG_auto_variable && Tag != dwarf::DW_TAG_arg_variable); Parent.addChild(make_unique<DIE>((dwarf::Tag)Tag)); @@ -333,8 +308,6 @@ DIE &DwarfUnit::createAndAddDIE(unsigned Tag, DIE &Parent, DIDescriptor N) { return Die; } -/// addBlock - Add block data. -/// void DwarfUnit::addBlock(DIE &Die, dwarf::Attribute Attribute, DIELoc *Loc) { Loc->ComputeSize(Asm); DIELocs.push_back(Loc); // Memoize so we can call the destructor later on. @@ -348,8 +321,6 @@ void DwarfUnit::addBlock(DIE &Die, dwarf::Attribute Attribute, Die.addValue(Attribute, Block->BestForm(), Block); } -/// addSourceLine - Add location information to specified debug information -/// entry. void DwarfUnit::addSourceLine(DIE &Die, unsigned Line, StringRef File, StringRef Directory) { if (Line == 0) @@ -361,58 +332,41 @@ void DwarfUnit::addSourceLine(DIE &Die, unsigned Line, StringRef File, addUInt(Die, dwarf::DW_AT_decl_line, None, Line); } -/// addSourceLine - Add location information to specified debug information -/// entry. -void DwarfUnit::addSourceLine(DIE &Die, DIVariable V) { - assert(V.isVariable()); +void DwarfUnit::addSourceLine(DIE &Die, const DILocalVariable *V) { + assert(V); - addSourceLine(Die, V.getLineNumber(), V.getContext().getFilename(), - V.getContext().getDirectory()); + addSourceLine(Die, V->getLine(), V->getScope()->getFilename(), + V->getScope()->getDirectory()); } -/// addSourceLine - Add location information to specified debug information -/// entry. -void DwarfUnit::addSourceLine(DIE &Die, DIGlobalVariable G) { - assert(G.isGlobalVariable()); +void DwarfUnit::addSourceLine(DIE &Die, const DIGlobalVariable *G) { + assert(G); - addSourceLine(Die, G.getLineNumber(), G.getFilename(), G.getDirectory()); + addSourceLine(Die, G->getLine(), G->getFilename(), G->getDirectory()); } -/// addSourceLine - Add location information to specified debug information -/// entry. -void DwarfUnit::addSourceLine(DIE &Die, DISubprogram SP) { - assert(SP.isSubprogram()); +void DwarfUnit::addSourceLine(DIE &Die, const DISubprogram *SP) { + assert(SP); - addSourceLine(Die, SP.getLineNumber(), SP.getFilename(), SP.getDirectory()); + addSourceLine(Die, SP->getLine(), SP->getFilename(), SP->getDirectory()); } -/// addSourceLine - Add location information to specified debug information -/// entry. -void DwarfUnit::addSourceLine(DIE &Die, DIType Ty) { - assert(Ty.isType()); +void DwarfUnit::addSourceLine(DIE &Die, const DIType *Ty) { + assert(Ty); - addSourceLine(Die, Ty.getLineNumber(), Ty.getFilename(), Ty.getDirectory()); + addSourceLine(Die, Ty->getLine(), Ty->getFilename(), Ty->getDirectory()); } -/// addSourceLine - Add location information to specified debug information -/// entry. -void DwarfUnit::addSourceLine(DIE &Die, DIObjCProperty Ty) { - assert(Ty.isObjCProperty()); +void DwarfUnit::addSourceLine(DIE &Die, const DIObjCProperty *Ty) { + assert(Ty); - DIFile File = Ty.getFile(); - addSourceLine(Die, Ty.getLineNumber(), File.getFilename(), - File.getDirectory()); + addSourceLine(Die, Ty->getLine(), Ty->getFilename(), Ty->getDirectory()); } -/// addSourceLine - Add location information to specified debug information -/// entry. -void DwarfUnit::addSourceLine(DIE &Die, DINameSpace NS) { - assert(NS.Verify()); - - addSourceLine(Die, NS.getLineNumber(), NS.getFilename(), NS.getDirectory()); +void DwarfUnit::addSourceLine(DIE &Die, const DINamespace *NS) { + addSourceLine(Die, NS->getLine(), NS->getFilename(), NS->getDirectory()); } -/// addRegisterOp - Add register operand. bool DwarfUnit::addRegisterOpPiece(DIELoc &TheDie, unsigned Reg, unsigned SizeInBits, unsigned OffsetInBits) { DIEDwarfExpression Expr(*Asm, *this, TheDie); @@ -420,7 +374,6 @@ bool DwarfUnit::addRegisterOpPiece(DIELoc &TheDie, unsigned Reg, return true; } -/// addRegisterOffset - Add register offset. bool DwarfUnit::addRegisterOffset(DIELoc &TheDie, unsigned Reg, int64_t Offset) { DIEDwarfExpression Expr(*Asm, *this, TheDie); @@ -481,39 +434,31 @@ bool DwarfUnit::addRegisterOffset(DIELoc &TheDie, unsigned Reg, That is what this function does. */ -/// addBlockByrefAddress - Start with the address based on the location -/// provided, and generate the DWARF information necessary to find the -/// actual Block variable (navigating the Block struct) based on the -/// starting location. Add the DWARF information to the die. For -/// more information, read large comment just above here. -/// void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die, dwarf::Attribute Attribute, const MachineLocation &Location) { - DIType Ty = DV.getType(); - DIType TmpTy = Ty; - uint16_t Tag = Ty.getTag(); + const DIType *Ty = DV.getType(); + const DIType *TmpTy = Ty; + uint16_t Tag = Ty->getTag(); bool isPointer = false; StringRef varName = DV.getName(); if (Tag == dwarf::DW_TAG_pointer_type) { - DIDerivedType DTy(Ty); - TmpTy = resolve(DTy.getTypeDerivedFrom()); + auto *DTy = cast<DIDerivedType>(Ty); + TmpTy = resolve(DTy->getBaseType()); isPointer = true; } - DICompositeType blockStruct(TmpTy); - // Find the __forwarding field and the variable field in the __Block_byref // struct. - DIArray Fields = blockStruct.getElements(); - DIDerivedType varField; - DIDerivedType forwardingField; + DINodeArray Fields = cast<DICompositeTypeBase>(TmpTy)->getElements(); + const DIDerivedType *varField = nullptr; + const DIDerivedType *forwardingField = nullptr; - for (unsigned i = 0, N = Fields.getNumElements(); i < N; ++i) { - DIDerivedType DT(Fields.getElement(i)); - StringRef fieldName = DT.getName(); + for (unsigned i = 0, N = Fields.size(); i < N; ++i) { + auto *DT = cast<DIDerivedType>(Fields[i]); + StringRef fieldName = DT->getName(); if (fieldName == "__forwarding") forwardingField = DT; else if (fieldName == varName) @@ -521,8 +466,8 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die, } // Get the offsets for the forwarding field and the variable field. - unsigned forwardingFieldOffset = forwardingField.getOffsetInBits() >> 3; - unsigned varFieldOffset = varField.getOffsetInBits() >> 2; + unsigned forwardingFieldOffset = forwardingField->getOffsetInBits() >> 3; + unsigned varFieldOffset = varField->getOffsetInBits() >> 2; // Decode the original location, and use that as the start of the byref // variable's location. @@ -567,10 +512,9 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die, } /// Return true if type encoding is unsigned. -static bool isUnsignedDIType(DwarfDebug *DD, DIType Ty) { - DIDerivedType DTy(Ty); - if (DTy.isDerivedType()) { - dwarf::Tag T = (dwarf::Tag)Ty.getTag(); +static bool isUnsignedDIType(DwarfDebug *DD, const DIType *Ty) { + if (auto *DTy = dyn_cast<DIDerivedTypeBase>(Ty)) { + dwarf::Tag T = (dwarf::Tag)Ty->getTag(); // Encode pointer constants as unsigned bytes. This is used at least for // null pointer constant emission. // (Pieces of) aggregate types that get hacked apart by SROA may also be @@ -584,64 +528,64 @@ static bool isUnsignedDIType(DwarfDebug *DD, DIType Ty) { T == dwarf::DW_TAG_ptr_to_member_type || T == dwarf::DW_TAG_reference_type || T == dwarf::DW_TAG_rvalue_reference_type || - T == dwarf::DW_TAG_structure_type) + T == dwarf::DW_TAG_structure_type || + T == dwarf::DW_TAG_union_type) return true; assert(T == dwarf::DW_TAG_typedef || T == dwarf::DW_TAG_const_type || T == dwarf::DW_TAG_volatile_type || T == dwarf::DW_TAG_restrict_type || T == dwarf::DW_TAG_enumeration_type); - if (DITypeRef Deriv = DTy.getTypeDerivedFrom()) + if (DITypeRef Deriv = DTy->getBaseType()) return isUnsignedDIType(DD, DD->resolve(Deriv)); // FIXME: Enums without a fixed underlying type have unknown signedness // here, leading to incorrectly emitted constants. - assert(DTy.getTag() == dwarf::DW_TAG_enumeration_type); + assert(DTy->getTag() == dwarf::DW_TAG_enumeration_type); return false; } - DIBasicType BTy(Ty); - assert(BTy.isBasicType()); - unsigned Encoding = BTy.getEncoding(); + auto *BTy = cast<DIBasicType>(Ty); + unsigned Encoding = BTy->getEncoding(); assert((Encoding == dwarf::DW_ATE_unsigned || Encoding == dwarf::DW_ATE_unsigned_char || Encoding == dwarf::DW_ATE_signed || Encoding == dwarf::DW_ATE_signed_char || - Encoding == dwarf::DW_ATE_UTF || Encoding == dwarf::DW_ATE_boolean || - (Ty.getTag() == dwarf::DW_TAG_unspecified_type && - Ty.getName() == "decltype(nullptr)")) && + Encoding == dwarf::DW_ATE_float || Encoding == dwarf::DW_ATE_UTF || + Encoding == dwarf::DW_ATE_boolean || + (Ty->getTag() == dwarf::DW_TAG_unspecified_type && + Ty->getName() == "decltype(nullptr)")) && "Unsupported encoding"); - return (Encoding == dwarf::DW_ATE_unsigned || - Encoding == dwarf::DW_ATE_unsigned_char || - Encoding == dwarf::DW_ATE_UTF || Encoding == dwarf::DW_ATE_boolean || - Ty.getTag() == dwarf::DW_TAG_unspecified_type); + return Encoding == dwarf::DW_ATE_unsigned || + Encoding == dwarf::DW_ATE_unsigned_char || + Encoding == dwarf::DW_ATE_UTF || Encoding == dwarf::DW_ATE_boolean || + Ty->getTag() == dwarf::DW_TAG_unspecified_type; } /// If this type is derived from a base type then return base type size. -static uint64_t getBaseTypeSize(DwarfDebug *DD, DIDerivedType Ty) { - unsigned Tag = Ty.getTag(); +static uint64_t getBaseTypeSize(DwarfDebug *DD, const DIDerivedType *Ty) { + unsigned Tag = Ty->getTag(); if (Tag != dwarf::DW_TAG_member && Tag != dwarf::DW_TAG_typedef && Tag != dwarf::DW_TAG_const_type && Tag != dwarf::DW_TAG_volatile_type && Tag != dwarf::DW_TAG_restrict_type) - return Ty.getSizeInBits(); + return Ty->getSizeInBits(); - DIType BaseType = DD->resolve(Ty.getTypeDerivedFrom()); + auto *BaseType = DD->resolve(Ty->getBaseType()); - assert(BaseType.isValid()); + assert(BaseType && "Unexpected invalid base type"); // If this is a derived type, go ahead and get the base type, unless it's a // reference then it's just the size of the field. Pointer types have no need // of this since they're a different type of qualification on the type. - if (BaseType.getTag() == dwarf::DW_TAG_reference_type || - BaseType.getTag() == dwarf::DW_TAG_rvalue_reference_type) - return Ty.getSizeInBits(); + if (BaseType->getTag() == dwarf::DW_TAG_reference_type || + BaseType->getTag() == dwarf::DW_TAG_rvalue_reference_type) + return Ty->getSizeInBits(); - if (BaseType.isDerivedType()) - return getBaseTypeSize(DD, DIDerivedType(BaseType)); + if (auto *DT = dyn_cast<DIDerivedType>(BaseType)) + return getBaseTypeSize(DD, DT); - return BaseType.getSizeInBits(); + return BaseType->getSizeInBits(); } -/// addConstantFPValue - Add constant value entry in variable DIE. void DwarfUnit::addConstantFPValue(DIE &Die, const MachineOperand &MO) { assert(MO.isFPImm() && "Invalid machine operand!"); DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); @@ -664,20 +608,18 @@ void DwarfUnit::addConstantFPValue(DIE &Die, const MachineOperand &MO) { addBlock(Die, dwarf::DW_AT_const_value, Block); } -/// addConstantFPValue - Add constant value entry in variable DIE. void DwarfUnit::addConstantFPValue(DIE &Die, const ConstantFP *CFP) { // Pass this down to addConstantValue as an unsigned bag of bits. addConstantValue(Die, CFP->getValueAPF().bitcastToAPInt(), true); } -/// addConstantValue - Add constant value entry in variable DIE. -void DwarfUnit::addConstantValue(DIE &Die, const ConstantInt *CI, DIType Ty) { +void DwarfUnit::addConstantValue(DIE &Die, const ConstantInt *CI, + const DIType *Ty) { addConstantValue(Die, CI->getValue(), Ty); } -/// addConstantValue - Add constant value entry in variable DIE. void DwarfUnit::addConstantValue(DIE &Die, const MachineOperand &MO, - DIType Ty) { + const DIType *Ty) { assert(MO.isImm() && "Invalid machine operand!"); addConstantValue(Die, isUnsignedDIType(DD, Ty), MO.getImm()); @@ -690,11 +632,10 @@ void DwarfUnit::addConstantValue(DIE &Die, bool Unsigned, uint64_t Val) { Unsigned ? dwarf::DW_FORM_udata : dwarf::DW_FORM_sdata, Val); } -void DwarfUnit::addConstantValue(DIE &Die, const APInt &Val, DIType Ty) { +void DwarfUnit::addConstantValue(DIE &Die, const APInt &Val, const DIType *Ty) { addConstantValue(Die, Val, isUnsignedDIType(DD, Ty)); } -// addConstantValue - Add constant value entry in variable DIE. void DwarfUnit::addConstantValue(DIE &Die, const APInt &Val, bool Unsigned) { unsigned CIBitWidth = Val.getBitWidth(); if (CIBitWidth <= 64) { @@ -724,67 +665,67 @@ void DwarfUnit::addConstantValue(DIE &Die, const APInt &Val, bool Unsigned) { addBlock(Die, dwarf::DW_AT_const_value, Block); } -/// addTemplateParams - Add template parameters into buffer. -void DwarfUnit::addTemplateParams(DIE &Buffer, DIArray TParams) { +void DwarfUnit::addLinkageName(DIE &Die, StringRef LinkageName) { + if (!LinkageName.empty()) + addString(Die, + DD->getDwarfVersion() >= 4 ? dwarf::DW_AT_linkage_name + : dwarf::DW_AT_MIPS_linkage_name, + GlobalValue::getRealLinkageName(LinkageName)); +} + +void DwarfUnit::addTemplateParams(DIE &Buffer, DINodeArray TParams) { // Add template parameters. - for (unsigned i = 0, e = TParams.getNumElements(); i != e; ++i) { - DIDescriptor Element = TParams.getElement(i); - if (Element.isTemplateTypeParameter()) - constructTemplateTypeParameterDIE(Buffer, - DITemplateTypeParameter(Element)); - else if (Element.isTemplateValueParameter()) - constructTemplateValueParameterDIE(Buffer, - DITemplateValueParameter(Element)); + for (const auto *Element : TParams) { + if (auto *TTP = dyn_cast<DITemplateTypeParameter>(Element)) + constructTemplateTypeParameterDIE(Buffer, TTP); + else if (auto *TVP = dyn_cast<DITemplateValueParameter>(Element)) + constructTemplateValueParameterDIE(Buffer, TVP); } } -/// getOrCreateContextDIE - Get context owner's DIE. -DIE *DwarfUnit::getOrCreateContextDIE(DIScope Context) { - if (!Context || Context.isFile()) +DIE *DwarfUnit::getOrCreateContextDIE(const DIScope *Context) { + if (!Context || isa<DIFile>(Context)) return &getUnitDie(); - if (Context.isType()) - return getOrCreateTypeDIE(DIType(Context)); - if (Context.isNameSpace()) - return getOrCreateNameSpace(DINameSpace(Context)); - if (Context.isSubprogram()) - return getOrCreateSubprogramDIE(DISubprogram(Context)); + if (auto *T = dyn_cast<DIType>(Context)) + return getOrCreateTypeDIE(T); + if (auto *NS = dyn_cast<DINamespace>(Context)) + return getOrCreateNameSpace(NS); + if (auto *SP = dyn_cast<DISubprogram>(Context)) + return getOrCreateSubprogramDIE(SP); return getDIE(Context); } -DIE *DwarfUnit::createTypeDIE(DICompositeType Ty) { - DIScope Context = resolve(Ty.getContext()); +DIE *DwarfUnit::createTypeDIE(const DICompositeType *Ty) { + auto *Context = resolve(Ty->getScope()); DIE *ContextDIE = getOrCreateContextDIE(Context); if (DIE *TyDIE = getDIE(Ty)) return TyDIE; // Create new type. - DIE &TyDIE = createAndAddDIE(Ty.getTag(), *ContextDIE, Ty); + DIE &TyDIE = createAndAddDIE(Ty->getTag(), *ContextDIE, Ty); - constructTypeDIE(TyDIE, Ty); + constructTypeDIE(TyDIE, cast<DICompositeType>(Ty)); updateAcceleratorTables(Context, Ty, TyDIE); return &TyDIE; } -/// getOrCreateTypeDIE - Find existing DIE or create new DIE for the -/// given DIType. DIE *DwarfUnit::getOrCreateTypeDIE(const MDNode *TyNode) { if (!TyNode) return nullptr; - DIType Ty(TyNode); - assert(Ty.isType()); - assert(Ty == resolve(Ty.getRef()) && + auto *Ty = cast<DIType>(TyNode); + assert(Ty == resolve(Ty->getRef()) && "type was not uniqued, possible ODR violation."); // DW_TAG_restrict_type is not supported in DWARF2 - if (Ty.getTag() == dwarf::DW_TAG_restrict_type && DD->getDwarfVersion() <= 2) - return getOrCreateTypeDIE(resolve(DIDerivedType(Ty).getTypeDerivedFrom())); + if (Ty->getTag() == dwarf::DW_TAG_restrict_type && DD->getDwarfVersion() <= 2) + return getOrCreateTypeDIE(resolve(cast<DIDerivedType>(Ty)->getBaseType())); // Construct the context before querying for the existence of the DIE in case // such construction creates the DIE. - DIScope Context = resolve(Ty.getContext()); + auto *Context = resolve(Ty->getScope()); DIE *ContextDIE = getOrCreateContextDIE(Context); assert(ContextDIE); @@ -792,50 +733,49 @@ DIE *DwarfUnit::getOrCreateTypeDIE(const MDNode *TyNode) { return TyDIE; // Create new type. - DIE &TyDIE = createAndAddDIE(Ty.getTag(), *ContextDIE, Ty); + DIE &TyDIE = createAndAddDIE(Ty->getTag(), *ContextDIE, Ty); updateAcceleratorTables(Context, Ty, TyDIE); - if (Ty.isBasicType()) - constructTypeDIE(TyDIE, DIBasicType(Ty)); - else if (Ty.isCompositeType()) { - DICompositeType CTy(Ty); - if (GenerateDwarfTypeUnits && !Ty.isForwardDecl()) - if (MDString *TypeId = CTy.getIdentifier()) { + if (auto *BT = dyn_cast<DIBasicType>(Ty)) + constructTypeDIE(TyDIE, BT); + else if (auto *STy = dyn_cast<DISubroutineType>(Ty)) + constructTypeDIE(TyDIE, STy); + else if (auto *CTy = dyn_cast<DICompositeType>(Ty)) { + if (GenerateDwarfTypeUnits && !Ty->isForwardDecl()) + if (MDString *TypeId = CTy->getRawIdentifier()) { DD->addDwarfTypeUnitType(getCU(), TypeId->getString(), TyDIE, CTy); // Skip updating the accelerator tables since this is not the full type. return &TyDIE; } constructTypeDIE(TyDIE, CTy); } else { - assert(Ty.isDerivedType() && "Unknown kind of DIType"); - constructTypeDIE(TyDIE, DIDerivedType(Ty)); + constructTypeDIE(TyDIE, cast<DIDerivedType>(Ty)); } return &TyDIE; } -void DwarfUnit::updateAcceleratorTables(DIScope Context, DIType Ty, - const DIE &TyDIE) { - if (!Ty.getName().empty() && !Ty.isForwardDecl()) { +void DwarfUnit::updateAcceleratorTables(const DIScope *Context, + const DIType *Ty, const DIE &TyDIE) { + if (!Ty->getName().empty() && !Ty->isForwardDecl()) { bool IsImplementation = 0; - if (Ty.isCompositeType()) { - DICompositeType CT(Ty); + if (auto *CT = dyn_cast<DICompositeTypeBase>(Ty)) { // A runtime language of 0 actually means C/C++ and that any // non-negative value is some version of Objective-C/C++. - IsImplementation = (CT.getRunTimeLang() == 0) || CT.isObjcClassComplete(); + IsImplementation = CT->getRuntimeLang() == 0 || CT->isObjcClassComplete(); } unsigned Flags = IsImplementation ? dwarf::DW_FLAG_type_implementation : 0; - DD->addAccelType(Ty.getName(), TyDIE, Flags); + DD->addAccelType(Ty->getName(), TyDIE, Flags); - if (!Context || Context.isCompileUnit() || Context.isFile() || - Context.isNameSpace()) + if (!Context || isa<DICompileUnit>(Context) || isa<DIFile>(Context) || + isa<DINamespace>(Context)) addGlobalType(Ty, TyDIE, Context); } } -/// addType - Add a new type attribute to the specified entity. -void DwarfUnit::addType(DIE &Entity, DIType Ty, dwarf::Attribute Attribute) { +void DwarfUnit::addType(DIE &Entity, const DIType *Ty, + dwarf::Attribute Attribute) { assert(Ty && "Trying to add a type that doesn't exist?"); // Check for pre-existence. @@ -855,12 +795,7 @@ void DwarfUnit::addType(DIE &Entity, DIType Ty, dwarf::Attribute Attribute) { addDIEEntry(Entity, Attribute, Entry); } -/// getParentContextString - Walks the metadata parent chain in a language -/// specific manner (using the compile unit language) and returns -/// it as a string. This is done at the metadata level because DIEs may -/// not currently have been added to the parent context and walking the -/// DIEs looking for names is more expensive than walking the metadata. -std::string DwarfUnit::getParentContextString(DIScope Context) const { +std::string DwarfUnit::getParentContextString(const DIScope *Context) const { if (!Context) return ""; @@ -869,11 +804,11 @@ std::string DwarfUnit::getParentContextString(DIScope Context) const { return ""; std::string CS; - SmallVector<DIScope, 1> Parents; - while (!Context.isCompileUnit()) { + SmallVector<const DIScope *, 1> Parents; + while (!isa<DICompileUnit>(Context)) { Parents.push_back(Context); - if (Context.getContext()) - Context = resolve(Context.getContext()); + if (Context->getScope()) + Context = resolve(Context->getScope()); else // Structure, etc types will have a NULL context if they're at the top // level. @@ -882,12 +817,10 @@ std::string DwarfUnit::getParentContextString(DIScope Context) const { // Reverse iterate over our list to go from the outermost construct to the // innermost. - for (SmallVectorImpl<DIScope>::reverse_iterator I = Parents.rbegin(), - E = Parents.rend(); - I != E; ++I) { - DIScope Ctx = *I; - StringRef Name = Ctx.getName(); - if (Name.empty() && Ctx.isNameSpace()) + for (auto I = Parents.rbegin(), E = Parents.rend(); I != E; ++I) { + const DIScope *Ctx = *I; + StringRef Name = Ctx->getName(); + if (Name.empty() && isa<DINamespace>(Ctx)) Name = "(anonymous namespace)"; if (!Name.empty()) { CS += Name; @@ -897,34 +830,32 @@ std::string DwarfUnit::getParentContextString(DIScope Context) const { return CS; } -/// constructTypeDIE - Construct basic type die from DIBasicType. -void DwarfUnit::constructTypeDIE(DIE &Buffer, DIBasicType BTy) { +void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIBasicType *BTy) { // Get core information. - StringRef Name = BTy.getName(); + StringRef Name = BTy->getName(); // Add name if not anonymous or intermediate type. if (!Name.empty()) addString(Buffer, dwarf::DW_AT_name, Name); // An unspecified type only has a name attribute. - if (BTy.getTag() == dwarf::DW_TAG_unspecified_type) + if (BTy->getTag() == dwarf::DW_TAG_unspecified_type) return; addUInt(Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, - BTy.getEncoding()); + BTy->getEncoding()); - uint64_t Size = BTy.getSizeInBits() >> 3; + uint64_t Size = BTy->getSizeInBits() >> 3; addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size); } -/// constructTypeDIE - Construct derived type die from DIDerivedType. -void DwarfUnit::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) { +void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy) { // Get core information. - StringRef Name = DTy.getName(); - uint64_t Size = DTy.getSizeInBits() >> 3; + StringRef Name = DTy->getName(); + uint64_t Size = DTy->getSizeInBits() >> 3; uint16_t Tag = Buffer.getTag(); // Map to main type, void will not have a type. - DIType FromTy = resolve(DTy.getTypeDerivedFrom()); + const DIType *FromTy = resolve(DTy->getBaseType()); if (FromTy) addType(Buffer, FromTy); @@ -938,35 +869,62 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) { addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size); if (Tag == dwarf::DW_TAG_ptr_to_member_type) - addDIEEntry(Buffer, dwarf::DW_AT_containing_type, - *getOrCreateTypeDIE(resolve(DTy.getClassType()))); + addDIEEntry( + Buffer, dwarf::DW_AT_containing_type, + *getOrCreateTypeDIE(resolve(cast<DIDerivedType>(DTy)->getClassType()))); // Add source line info if available and TyDesc is not a forward declaration. - if (!DTy.isForwardDecl()) + if (!DTy->isForwardDecl()) addSourceLine(Buffer, DTy); } -/// constructSubprogramArguments - Construct function argument DIEs. -void DwarfUnit::constructSubprogramArguments(DIE &Buffer, DITypeArray Args) { - for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) { - DIType Ty = resolve(Args.getElement(i)); +void DwarfUnit::constructSubprogramArguments(DIE &Buffer, DITypeRefArray Args) { + for (unsigned i = 1, N = Args.size(); i < N; ++i) { + const DIType *Ty = resolve(Args[i]); if (!Ty) { assert(i == N-1 && "Unspecified parameter must be the last argument"); createAndAddDIE(dwarf::DW_TAG_unspecified_parameters, Buffer); } else { DIE &Arg = createAndAddDIE(dwarf::DW_TAG_formal_parameter, Buffer); addType(Arg, Ty); - if (Ty.isArtificial()) + if (Ty->isArtificial()) addFlag(Arg, dwarf::DW_AT_artificial); } } } -/// constructTypeDIE - Construct type DIE from DICompositeType. -void DwarfUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { +void DwarfUnit::constructTypeDIE(DIE &Buffer, const DISubroutineType *CTy) { + // Add return type. A void return won't have a type. + auto Elements = cast<DISubroutineType>(CTy)->getTypeArray(); + if (Elements.size()) + if (auto RTy = resolve(Elements[0])) + addType(Buffer, RTy); + + bool isPrototyped = true; + if (Elements.size() == 2 && !Elements[1]) + isPrototyped = false; + + constructSubprogramArguments(Buffer, Elements); + + // Add prototype flag if we're dealing with a C language and the function has + // been prototyped. + uint16_t Language = getLanguage(); + if (isPrototyped && + (Language == dwarf::DW_LANG_C89 || Language == dwarf::DW_LANG_C99 || + Language == dwarf::DW_LANG_ObjC)) + addFlag(Buffer, dwarf::DW_AT_prototyped); + + if (CTy->isLValueReference()) + addFlag(Buffer, dwarf::DW_AT_reference); + + if (CTy->isRValueReference()) + addFlag(Buffer, dwarf::DW_AT_rvalue_reference); +} + +void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) { // Add name if not anonymous or intermediate type. - StringRef Name = CTy.getName(); + StringRef Name = CTy->getName(); - uint64_t Size = CTy.getSizeInBits() >> 3; + uint64_t Size = CTy->getSizeInBits() >> 3; uint16_t Tag = Buffer.getTag(); switch (Tag) { @@ -976,82 +934,39 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { case dwarf::DW_TAG_enumeration_type: constructEnumTypeDIE(Buffer, CTy); break; - case dwarf::DW_TAG_subroutine_type: { - // Add return type. A void return won't have a type. - DITypeArray Elements = DISubroutineType(CTy).getTypeArray(); - DIType RTy(resolve(Elements.getElement(0))); - if (RTy) - addType(Buffer, RTy); - - bool isPrototyped = true; - if (Elements.getNumElements() == 2 && - !Elements.getElement(1)) - isPrototyped = false; - - constructSubprogramArguments(Buffer, Elements); - - // Add prototype flag if we're dealing with a C language and the - // function has been prototyped. - uint16_t Language = getLanguage(); - if (isPrototyped && - (Language == dwarf::DW_LANG_C89 || Language == dwarf::DW_LANG_C99 || - Language == dwarf::DW_LANG_ObjC)) - addFlag(Buffer, dwarf::DW_AT_prototyped); - - if (CTy.isLValueReference()) - addFlag(Buffer, dwarf::DW_AT_reference); - - if (CTy.isRValueReference()) - addFlag(Buffer, dwarf::DW_AT_rvalue_reference); - } break; case dwarf::DW_TAG_structure_type: case dwarf::DW_TAG_union_type: case dwarf::DW_TAG_class_type: { // Add elements to structure type. - DIArray Elements = CTy.getElements(); - for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { - DIDescriptor Element = Elements.getElement(i); - if (Element.isSubprogram()) - getOrCreateSubprogramDIE(DISubprogram(Element)); - else if (Element.isDerivedType()) { - DIDerivedType DDTy(Element); - if (DDTy.getTag() == dwarf::DW_TAG_friend) { + DINodeArray Elements = CTy->getElements(); + for (const auto *Element : Elements) { + if (!Element) + continue; + if (auto *SP = dyn_cast<DISubprogram>(Element)) + getOrCreateSubprogramDIE(SP); + else if (auto *DDTy = dyn_cast<DIDerivedType>(Element)) { + if (DDTy->getTag() == dwarf::DW_TAG_friend) { DIE &ElemDie = createAndAddDIE(dwarf::DW_TAG_friend, Buffer); - addType(ElemDie, resolve(DDTy.getTypeDerivedFrom()), - dwarf::DW_AT_friend); - } else if (DDTy.isStaticMember()) { + addType(ElemDie, resolve(DDTy->getBaseType()), dwarf::DW_AT_friend); + } else if (DDTy->isStaticMember()) { getOrCreateStaticMemberDIE(DDTy); } else { constructMemberDIE(Buffer, DDTy); } - } else if (Element.isObjCProperty()) { - DIObjCProperty Property(Element); - DIE &ElemDie = createAndAddDIE(Property.getTag(), Buffer); - StringRef PropertyName = Property.getObjCPropertyName(); + } else if (auto *Property = dyn_cast<DIObjCProperty>(Element)) { + DIE &ElemDie = createAndAddDIE(Property->getTag(), Buffer); + StringRef PropertyName = Property->getName(); addString(ElemDie, dwarf::DW_AT_APPLE_property_name, PropertyName); - if (Property.getType()) - addType(ElemDie, Property.getType()); + if (Property->getType()) + addType(ElemDie, Property->getType()); addSourceLine(ElemDie, Property); - StringRef GetterName = Property.getObjCPropertyGetterName(); + StringRef GetterName = Property->getGetterName(); if (!GetterName.empty()) addString(ElemDie, dwarf::DW_AT_APPLE_property_getter, GetterName); - StringRef SetterName = Property.getObjCPropertySetterName(); + StringRef SetterName = Property->getSetterName(); if (!SetterName.empty()) addString(ElemDie, dwarf::DW_AT_APPLE_property_setter, SetterName); - unsigned PropertyAttributes = 0; - if (Property.isReadOnlyObjCProperty()) - PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readonly; - if (Property.isReadWriteObjCProperty()) - PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readwrite; - if (Property.isAssignObjCProperty()) - PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_assign; - if (Property.isRetainObjCProperty()) - PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_retain; - if (Property.isCopyObjCProperty()) - PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_copy; - if (Property.isNonAtomicObjCProperty()) - PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_nonatomic; - if (PropertyAttributes) + if (unsigned PropertyAttributes = Property->getAttributes()) addUInt(ElemDie, dwarf::DW_AT_APPLE_property_attribute, None, PropertyAttributes); @@ -1060,28 +975,27 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { Entry = createDIEEntry(ElemDie); insertDIEEntry(Element, Entry); } - } else - continue; + } } - if (CTy.isAppleBlockExtension()) + if (CTy->isAppleBlockExtension()) addFlag(Buffer, dwarf::DW_AT_APPLE_block); // This is outside the DWARF spec, but GDB expects a DW_AT_containing_type // inside C++ composite types to point to the base class with the vtable. - DICompositeType ContainingType(resolve(CTy.getContainingType())); - if (ContainingType) + if (auto *ContainingType = + dyn_cast_or_null<DICompositeType>(resolve(CTy->getVTableHolder()))) addDIEEntry(Buffer, dwarf::DW_AT_containing_type, *getOrCreateTypeDIE(ContainingType)); - if (CTy.isObjcClassComplete()) + if (CTy->isObjcClassComplete()) addFlag(Buffer, dwarf::DW_AT_APPLE_objc_complete_type); // Add template parameters to a class, structure or union types. // FIXME: The support isn't in the metadata for this yet. if (Tag == dwarf::DW_TAG_class_type || Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) - addTemplateParams(Buffer, CTy.getTemplateParams()); + addTemplateParams(Buffer, CTy->getTemplateParams()); break; } @@ -1100,55 +1014,50 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { // TODO: Do we care about size for enum forward declarations? if (Size) addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size); - else if (!CTy.isForwardDecl()) + else if (!CTy->isForwardDecl()) // Add zero size if it is not a forward declaration. addUInt(Buffer, dwarf::DW_AT_byte_size, None, 0); // If we're a forward decl, say so. - if (CTy.isForwardDecl()) + if (CTy->isForwardDecl()) addFlag(Buffer, dwarf::DW_AT_declaration); // Add source line info if available. - if (!CTy.isForwardDecl()) + if (!CTy->isForwardDecl()) addSourceLine(Buffer, CTy); // No harm in adding the runtime language to the declaration. - unsigned RLang = CTy.getRunTimeLang(); + unsigned RLang = CTy->getRuntimeLang(); if (RLang) addUInt(Buffer, dwarf::DW_AT_APPLE_runtime_class, dwarf::DW_FORM_data1, RLang); } } -/// constructTemplateTypeParameterDIE - Construct new DIE for the given -/// DITemplateTypeParameter. -void DwarfUnit::constructTemplateTypeParameterDIE(DIE &Buffer, - DITemplateTypeParameter TP) { +void DwarfUnit::constructTemplateTypeParameterDIE( + DIE &Buffer, const DITemplateTypeParameter *TP) { DIE &ParamDIE = createAndAddDIE(dwarf::DW_TAG_template_type_parameter, Buffer); // Add the type if it exists, it could be void and therefore no type. - if (TP.getType()) - addType(ParamDIE, resolve(TP.getType())); - if (!TP.getName().empty()) - addString(ParamDIE, dwarf::DW_AT_name, TP.getName()); + if (TP->getType()) + addType(ParamDIE, resolve(TP->getType())); + if (!TP->getName().empty()) + addString(ParamDIE, dwarf::DW_AT_name, TP->getName()); } -/// constructTemplateValueParameterDIE - Construct new DIE for the given -/// DITemplateValueParameter. -void -DwarfUnit::constructTemplateValueParameterDIE(DIE &Buffer, - DITemplateValueParameter VP) { - DIE &ParamDIE = createAndAddDIE(VP.getTag(), Buffer); +void DwarfUnit::constructTemplateValueParameterDIE( + DIE &Buffer, const DITemplateValueParameter *VP) { + DIE &ParamDIE = createAndAddDIE(VP->getTag(), Buffer); // Add the type if there is one, template template and template parameter // packs will not have a type. - if (VP.getTag() == dwarf::DW_TAG_template_value_parameter) - addType(ParamDIE, resolve(VP.getType())); - if (!VP.getName().empty()) - addString(ParamDIE, dwarf::DW_AT_name, VP.getName()); - if (Metadata *Val = VP.getValue()) { + if (VP->getTag() == dwarf::DW_TAG_template_value_parameter) + addType(ParamDIE, resolve(VP->getType())); + if (!VP->getName().empty()) + addString(ParamDIE, dwarf::DW_AT_name, VP->getName()); + if (Metadata *Val = VP->getValue()) { if (ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Val)) - addConstantValue(ParamDIE, CI, resolve(VP.getType())); + addConstantValue(ParamDIE, CI, resolve(VP->getType())); else if (GlobalValue *GV = mdconst::dyn_extract<GlobalValue>(Val)) { // For declaration non-type template parameters (such as global values and // functions) @@ -1158,51 +1067,47 @@ DwarfUnit::constructTemplateValueParameterDIE(DIE &Buffer, // parameter, rather than a pointer to it. addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_stack_value); addBlock(ParamDIE, dwarf::DW_AT_location, Loc); - } else if (VP.getTag() == dwarf::DW_TAG_GNU_template_template_param) { + } else if (VP->getTag() == dwarf::DW_TAG_GNU_template_template_param) { assert(isa<MDString>(Val)); addString(ParamDIE, dwarf::DW_AT_GNU_template_name, cast<MDString>(Val)->getString()); - } else if (VP.getTag() == dwarf::DW_TAG_GNU_template_parameter_pack) { - assert(isa<MDNode>(Val)); - DIArray A(cast<MDNode>(Val)); - addTemplateParams(ParamDIE, A); + } else if (VP->getTag() == dwarf::DW_TAG_GNU_template_parameter_pack) { + addTemplateParams(ParamDIE, cast<MDTuple>(Val)); } } } -/// getOrCreateNameSpace - Create a DIE for DINameSpace. -DIE *DwarfUnit::getOrCreateNameSpace(DINameSpace NS) { +DIE *DwarfUnit::getOrCreateNameSpace(const DINamespace *NS) { // Construct the context before querying for the existence of the DIE in case // such construction creates the DIE. - DIE *ContextDIE = getOrCreateContextDIE(NS.getContext()); + DIE *ContextDIE = getOrCreateContextDIE(NS->getScope()); if (DIE *NDie = getDIE(NS)) return NDie; DIE &NDie = createAndAddDIE(dwarf::DW_TAG_namespace, *ContextDIE, NS); - StringRef Name = NS.getName(); + StringRef Name = NS->getName(); if (!Name.empty()) - addString(NDie, dwarf::DW_AT_name, NS.getName()); + addString(NDie, dwarf::DW_AT_name, NS->getName()); else Name = "(anonymous namespace)"; DD->addAccelNamespace(Name, NDie); - addGlobalName(Name, NDie, NS.getContext()); + addGlobalName(Name, NDie, NS->getScope()); addSourceLine(NDie, NS); return &NDie; } -/// getOrCreateSubprogramDIE - Create new DIE using SP. -DIE *DwarfUnit::getOrCreateSubprogramDIE(DISubprogram SP, bool Minimal) { +DIE *DwarfUnit::getOrCreateSubprogramDIE(const DISubprogram *SP, bool Minimal) { // Construct the context before querying for the existence of the DIE in case // such construction creates the DIE (as is the case for member function // declarations). DIE *ContextDIE = - Minimal ? &getUnitDie() : getOrCreateContextDIE(resolve(SP.getContext())); + Minimal ? &getUnitDie() : getOrCreateContextDIE(resolve(SP->getScope())); if (DIE *SPDie = getDIE(SP)) return SPDie; - if (DISubprogram SPDecl = SP.getFunctionDeclaration()) { + if (auto *SPDecl = SP->getDeclaration()) { if (!Minimal) { // Add subprogram definitions to the CU die directly. ContextDIE = &getUnitDie(); @@ -1216,36 +1121,35 @@ DIE *DwarfUnit::getOrCreateSubprogramDIE(DISubprogram SP, bool Minimal) { // Stop here and fill this in later, depending on whether or not this // subprogram turns out to have inlined instances or not. - if (SP.isDefinition()) + if (SP->isDefinition()) return &SPDie; applySubprogramAttributes(SP, SPDie); return &SPDie; } -bool DwarfUnit::applySubprogramDefinitionAttributes(DISubprogram SP, +bool DwarfUnit::applySubprogramDefinitionAttributes(const DISubprogram *SP, DIE &SPDie) { DIE *DeclDie = nullptr; StringRef DeclLinkageName; - if (DISubprogram SPDecl = SP.getFunctionDeclaration()) { + if (auto *SPDecl = SP->getDeclaration()) { DeclDie = getDIE(SPDecl); assert(DeclDie && "This DIE should've already been constructed when the " "definition DIE was created in " "getOrCreateSubprogramDIE"); - DeclLinkageName = SPDecl.getLinkageName(); + DeclLinkageName = SPDecl->getLinkageName(); } // Add function template parameters. - addTemplateParams(SPDie, SP.getTemplateParams()); + addTemplateParams(SPDie, SP->getTemplateParams()); // Add the linkage name if we have one and it isn't in the Decl. - StringRef LinkageName = SP.getLinkageName(); + StringRef LinkageName = SP->getLinkageName(); assert(((LinkageName.empty() || DeclLinkageName.empty()) || LinkageName == DeclLinkageName) && "decl has a linkage name and it is different"); - if (!LinkageName.empty() && DeclLinkageName.empty()) - addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, - GlobalValue::getRealLinkageName(LinkageName)); + if (DeclLinkageName.empty()) + addLinkageName(SPDie, LinkageName); if (!DeclDie) return false; @@ -1256,15 +1160,15 @@ bool DwarfUnit::applySubprogramDefinitionAttributes(DISubprogram SP, return true; } -void DwarfUnit::applySubprogramAttributes(DISubprogram SP, DIE &SPDie, +void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie, bool Minimal) { if (!Minimal) if (applySubprogramDefinitionAttributes(SP, SPDie)) return; // Constructors and operators for anonymous aggregates do not have names. - if (!SP.getName().empty()) - addString(SPDie, dwarf::DW_AT_name, SP.getName()); + if (!SP->getName().empty()) + addString(SPDie, dwarf::DW_AT_name, SP->getName()); // Skip the rest of the attributes under -gmlt to save space. if (Minimal) @@ -1275,33 +1179,34 @@ void DwarfUnit::applySubprogramAttributes(DISubprogram SP, DIE &SPDie, // Add the prototype if we have a prototype and we have a C like // language. uint16_t Language = getLanguage(); - if (SP.isPrototyped() && + if (SP->isPrototyped() && (Language == dwarf::DW_LANG_C89 || Language == dwarf::DW_LANG_C99 || Language == dwarf::DW_LANG_ObjC)) addFlag(SPDie, dwarf::DW_AT_prototyped); - DISubroutineType SPTy = SP.getType(); - assert(SPTy.getTag() == dwarf::DW_TAG_subroutine_type && + const DISubroutineType *SPTy = SP->getType(); + assert(SPTy->getTag() == dwarf::DW_TAG_subroutine_type && "the type of a subprogram should be a subroutine"); - DITypeArray Args = SPTy.getTypeArray(); + auto Args = SPTy->getTypeArray(); // Add a return type. If this is a type like a C/C++ void type we don't add a // return type. - if (resolve(Args.getElement(0))) - addType(SPDie, DIType(resolve(Args.getElement(0)))); + if (Args.size()) + if (auto Ty = resolve(Args[0])) + addType(SPDie, Ty); - unsigned VK = SP.getVirtuality(); + unsigned VK = SP->getVirtuality(); if (VK) { addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_data1, VK); DIELoc *Block = getDIELoc(); addUInt(*Block, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); - addUInt(*Block, dwarf::DW_FORM_udata, SP.getVirtualIndex()); + addUInt(*Block, dwarf::DW_FORM_udata, SP->getVirtualIndex()); addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, Block); ContainingTypeMap.insert( - std::make_pair(&SPDie, resolve(SP.getContainingType()))); + std::make_pair(&SPDie, resolve(SP->getContainingType()))); } - if (!SP.isDefinition()) { + if (!SP->isDefinition()) { addFlag(SPDie, dwarf::DW_AT_declaration); // Add arguments. Do not add arguments for subprogram definition. They will @@ -1309,41 +1214,40 @@ void DwarfUnit::applySubprogramAttributes(DISubprogram SP, DIE &SPDie, constructSubprogramArguments(SPDie, Args); } - if (SP.isArtificial()) + if (SP->isArtificial()) addFlag(SPDie, dwarf::DW_AT_artificial); - if (!SP.isLocalToUnit()) + if (!SP->isLocalToUnit()) addFlag(SPDie, dwarf::DW_AT_external); - if (SP.isOptimized()) + if (SP->isOptimized()) addFlag(SPDie, dwarf::DW_AT_APPLE_optimized); - if (unsigned isa = Asm->getISAEncoding()) { + if (unsigned isa = Asm->getISAEncoding()) addUInt(SPDie, dwarf::DW_AT_APPLE_isa, dwarf::DW_FORM_flag, isa); - } - if (SP.isLValueReference()) + if (SP->isLValueReference()) addFlag(SPDie, dwarf::DW_AT_reference); - if (SP.isRValueReference()) + if (SP->isRValueReference()) addFlag(SPDie, dwarf::DW_AT_rvalue_reference); - if (SP.isProtected()) + if (SP->isProtected()) addUInt(SPDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, dwarf::DW_ACCESS_protected); - else if (SP.isPrivate()) + else if (SP->isPrivate()) addUInt(SPDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, dwarf::DW_ACCESS_private); - else if (SP.isPublic()) + else if (SP->isPublic()) addUInt(SPDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, dwarf::DW_ACCESS_public); - if (SP.isExplicit()) + if (SP->isExplicit()) addFlag(SPDie, dwarf::DW_AT_explicit); } -/// constructSubrangeDIE - Construct subrange DIE from DISubrange. -void DwarfUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy) { +void DwarfUnit::constructSubrangeDIE(DIE &Buffer, const DISubrange *SR, + DIE *IndexTy) { DIE &DW_Subrange = createAndAddDIE(dwarf::DW_TAG_subrange_type, Buffer); addDIEEntry(DW_Subrange, dwarf::DW_AT_type, *IndexTy); @@ -1351,9 +1255,9 @@ void DwarfUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy) { // C/C++. The Count value is the number of elements. Values are 64 bit. If // Count == -1 then the array is unbounded and we do not emit // DW_AT_lower_bound and DW_AT_count attributes. - int64_t LowerBound = SR.getLo(); + int64_t LowerBound = SR->getLowerBound(); int64_t DefaultLowerBound = getDefaultLowerBound(); - int64_t Count = SR.getCount(); + int64_t Count = SR->getCount(); if (DefaultLowerBound == -1 || LowerBound != DefaultLowerBound) addUInt(DW_Subrange, dwarf::DW_AT_lower_bound, None, LowerBound); @@ -1376,13 +1280,12 @@ DIE *DwarfUnit::getIndexTyDie() { return IndexTyDie; } -/// constructArrayTypeDIE - Construct array type DIE from DICompositeType. -void DwarfUnit::constructArrayTypeDIE(DIE &Buffer, DICompositeType CTy) { - if (CTy.isVector()) +void DwarfUnit::constructArrayTypeDIE(DIE &Buffer, const DICompositeType *CTy) { + if (CTy->isVector()) addFlag(Buffer, dwarf::DW_AT_GNU_vector); // Emit the element type. - addType(Buffer, resolve(CTy.getTypeDerivedFrom())); + addType(Buffer, resolve(CTy->getBaseType())); // Get an anonymous type for index type. // FIXME: This type should be passed down from the front end @@ -1390,45 +1293,42 @@ void DwarfUnit::constructArrayTypeDIE(DIE &Buffer, DICompositeType CTy) { DIE *IdxTy = getIndexTyDie(); // Add subranges to array type. - DIArray Elements = CTy.getElements(); - for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { - DIDescriptor Element = Elements.getElement(i); - if (Element.getTag() == dwarf::DW_TAG_subrange_type) - constructSubrangeDIE(Buffer, DISubrange(Element), IdxTy); + DINodeArray Elements = CTy->getElements(); + for (unsigned i = 0, N = Elements.size(); i < N; ++i) { + // FIXME: Should this really be such a loose cast? + if (auto *Element = dyn_cast_or_null<DINode>(Elements[i])) + if (Element->getTag() == dwarf::DW_TAG_subrange_type) + constructSubrangeDIE(Buffer, cast<DISubrange>(Element), IdxTy); } } -/// constructEnumTypeDIE - Construct an enum type DIE from DICompositeType. -void DwarfUnit::constructEnumTypeDIE(DIE &Buffer, DICompositeType CTy) { - DIArray Elements = CTy.getElements(); +void DwarfUnit::constructEnumTypeDIE(DIE &Buffer, const DICompositeType *CTy) { + DINodeArray Elements = CTy->getElements(); // Add enumerators to enumeration type. - for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { - DIEnumerator Enum(Elements.getElement(i)); - if (Enum.isEnumerator()) { + for (unsigned i = 0, N = Elements.size(); i < N; ++i) { + auto *Enum = dyn_cast_or_null<DIEnumerator>(Elements[i]); + if (Enum) { DIE &Enumerator = createAndAddDIE(dwarf::DW_TAG_enumerator, Buffer); - StringRef Name = Enum.getName(); + StringRef Name = Enum->getName(); addString(Enumerator, dwarf::DW_AT_name, Name); - int64_t Value = Enum.getEnumValue(); + int64_t Value = Enum->getValue(); addSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, Value); } } - DIType DTy = resolve(CTy.getTypeDerivedFrom()); + const DIType *DTy = resolve(CTy->getBaseType()); if (DTy) { addType(Buffer, DTy); addFlag(Buffer, dwarf::DW_AT_enum_class); } } -/// constructContainingTypeDIEs - Construct DIEs for types that contain -/// vtables. void DwarfUnit::constructContainingTypeDIEs() { - for (DenseMap<DIE *, const MDNode *>::iterator CI = ContainingTypeMap.begin(), - CE = ContainingTypeMap.end(); + for (auto CI = ContainingTypeMap.begin(), CE = ContainingTypeMap.end(); CI != CE; ++CI) { DIE &SPDie = *CI->first; - DIDescriptor D(CI->second); + const DINode *D = CI->second; if (!D) continue; DIE *NDie = getDIE(D); @@ -1438,18 +1338,17 @@ void DwarfUnit::constructContainingTypeDIEs() { } } -/// constructMemberDIE - Construct member DIE from DIDerivedType. -void DwarfUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) { - DIE &MemberDie = createAndAddDIE(DT.getTag(), Buffer); - StringRef Name = DT.getName(); +void DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) { + DIE &MemberDie = createAndAddDIE(DT->getTag(), Buffer); + StringRef Name = DT->getName(); if (!Name.empty()) addString(MemberDie, dwarf::DW_AT_name, Name); - addType(MemberDie, resolve(DT.getTypeDerivedFrom())); + addType(MemberDie, resolve(DT->getBaseType())); addSourceLine(MemberDie, DT); - if (DT.getTag() == dwarf::DW_TAG_inheritance && DT.isVirtual()) { + if (DT->getTag() == dwarf::DW_TAG_inheritance && DT->isVirtual()) { // For C++, virtual base classes are not at fixed offset. Use following // expression to extract appropriate offset from vtable. @@ -1459,14 +1358,14 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) { addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_dup); addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); - addUInt(*VBaseLocationDie, dwarf::DW_FORM_udata, DT.getOffsetInBits()); + addUInt(*VBaseLocationDie, dwarf::DW_FORM_udata, DT->getOffsetInBits()); addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_minus); addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); addBlock(MemberDie, dwarf::DW_AT_data_member_location, VBaseLocationDie); } else { - uint64_t Size = DT.getSizeInBits(); + uint64_t Size = DT->getSizeInBits(); uint64_t FieldSize = getBaseTypeSize(DD, DT); uint64_t OffsetInBytes; @@ -1475,8 +1374,8 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) { addUInt(MemberDie, dwarf::DW_AT_byte_size, None, FieldSize/8); addUInt(MemberDie, dwarf::DW_AT_bit_size, None, Size); - uint64_t Offset = DT.getOffsetInBits(); - uint64_t AlignMask = ~(DT.getAlignInBits() - 1); + uint64_t Offset = DT->getOffsetInBits(); + uint64_t AlignMask = ~(DT->getAlignInBits() - 1); uint64_t HiMark = (Offset + FieldSize) & AlignMask; uint64_t FieldOffset = (HiMark - FieldSize); Offset -= FieldOffset; @@ -1491,7 +1390,7 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) { OffsetInBytes = FieldOffset >> 3; } else // This is not a bitfield. - OffsetInBytes = DT.getOffsetInBits() >> 3; + OffsetInBytes = DT->getOffsetInBits() >> 3; if (DD->getDwarfVersion() <= 2) { DIELoc *MemLocationDie = new (DIEValueAllocator) DIELoc(); @@ -1503,49 +1402,48 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) { OffsetInBytes); } - if (DT.isProtected()) + if (DT->isProtected()) addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, dwarf::DW_ACCESS_protected); - else if (DT.isPrivate()) + else if (DT->isPrivate()) addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, dwarf::DW_ACCESS_private); // Otherwise C++ member and base classes are considered public. - else if (DT.isPublic()) + else if (DT->isPublic()) addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, dwarf::DW_ACCESS_public); - if (DT.isVirtual()) + if (DT->isVirtual()) addUInt(MemberDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_data1, dwarf::DW_VIRTUALITY_virtual); // Objective-C properties. - if (MDNode *PNode = DT.getObjCProperty()) + if (MDNode *PNode = DT->getObjCProperty()) if (DIEEntry *PropertyDie = getDIEEntry(PNode)) MemberDie.addValue(dwarf::DW_AT_APPLE_property, dwarf::DW_FORM_ref4, PropertyDie); - if (DT.isArtificial()) + if (DT->isArtificial()) addFlag(MemberDie, dwarf::DW_AT_artificial); } -/// getOrCreateStaticMemberDIE - Create new DIE for C++ static member. -DIE *DwarfUnit::getOrCreateStaticMemberDIE(DIDerivedType DT) { - if (!DT.Verify()) +DIE *DwarfUnit::getOrCreateStaticMemberDIE(const DIDerivedType *DT) { + if (!DT) return nullptr; // Construct the context before querying for the existence of the DIE in case // such construction creates the DIE. - DIE *ContextDIE = getOrCreateContextDIE(resolve(DT.getContext())); + DIE *ContextDIE = getOrCreateContextDIE(resolve(DT->getScope())); assert(dwarf::isType(ContextDIE->getTag()) && "Static member should belong to a type."); if (DIE *StaticMemberDIE = getDIE(DT)) return StaticMemberDIE; - DIE &StaticMemberDIE = createAndAddDIE(DT.getTag(), *ContextDIE, DT); + DIE &StaticMemberDIE = createAndAddDIE(DT->getTag(), *ContextDIE, DT); - DIType Ty = resolve(DT.getTypeDerivedFrom()); + const DIType *Ty = resolve(DT->getBaseType()); - addString(StaticMemberDIE, dwarf::DW_AT_name, DT.getName()); + addString(StaticMemberDIE, dwarf::DW_AT_name, DT->getName()); addType(StaticMemberDIE, Ty); addSourceLine(StaticMemberDIE, DT); addFlag(StaticMemberDIE, dwarf::DW_AT_external); @@ -1553,57 +1451,59 @@ DIE *DwarfUnit::getOrCreateStaticMemberDIE(DIDerivedType DT) { // FIXME: We could omit private if the parent is a class_type, and // public if the parent is something else. - if (DT.isProtected()) + if (DT->isProtected()) addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, dwarf::DW_ACCESS_protected); - else if (DT.isPrivate()) + else if (DT->isPrivate()) addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, dwarf::DW_ACCESS_private); - else if (DT.isPublic()) + else if (DT->isPublic()) addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, dwarf::DW_ACCESS_public); - if (const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(DT.getConstant())) + if (const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(DT->getConstant())) addConstantValue(StaticMemberDIE, CI, Ty); - if (const ConstantFP *CFP = dyn_cast_or_null<ConstantFP>(DT.getConstant())) + if (const ConstantFP *CFP = dyn_cast_or_null<ConstantFP>(DT->getConstant())) addConstantFPValue(StaticMemberDIE, CFP); return &StaticMemberDIE; } -void DwarfUnit::emitHeader(const MCSymbol *ASectionSym) const { +void DwarfUnit::emitHeader(bool UseOffsets) { // Emit size of content not including length itself - Asm->OutStreamer.AddComment("Length of Unit"); + Asm->OutStreamer->AddComment("Length of Unit"); Asm->EmitInt32(getHeaderSize() + UnitDie.getSize()); - Asm->OutStreamer.AddComment("DWARF version number"); + Asm->OutStreamer->AddComment("DWARF version number"); Asm->EmitInt16(DD->getDwarfVersion()); - Asm->OutStreamer.AddComment("Offset Into Abbrev. Section"); + Asm->OutStreamer->AddComment("Offset Into Abbrev. Section"); + // We share one abbreviations table across all units so it's always at the // start of the section. Use a relocatable offset where needed to ensure // linking doesn't invalidate that offset. - if (ASectionSym) - Asm->EmitSectionOffset(ASectionSym, ASectionSym); + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + if (!UseOffsets) + Asm->emitSectionOffset(TLOF.getDwarfAbbrevSection()->getBeginSymbol()); else - // Use a constant value when no symbol is provided. Asm->EmitInt32(0); - Asm->OutStreamer.AddComment("Address Size (in bytes)"); + + Asm->OutStreamer->AddComment("Address Size (in bytes)"); Asm->EmitInt8(Asm->getDataLayout().getPointerSize()); } -void DwarfUnit::initSection(const MCSection *Section) { +void DwarfUnit::initSection(MCSection *Section) { assert(!this->Section); this->Section = Section; } -void DwarfTypeUnit::emitHeader(const MCSymbol *ASectionSym) const { - DwarfUnit::emitHeader(ASectionSym); - Asm->OutStreamer.AddComment("Type Signature"); - Asm->OutStreamer.EmitIntValue(TypeSignature, sizeof(TypeSignature)); - Asm->OutStreamer.AddComment("Type DIE Offset"); +void DwarfTypeUnit::emitHeader(bool UseOffsets) { + DwarfUnit::emitHeader(UseOffsets); + Asm->OutStreamer->AddComment("Type Signature"); + Asm->OutStreamer->EmitIntValue(TypeSignature, sizeof(TypeSignature)); + Asm->OutStreamer->AddComment("Type DIE Offset"); // In a skeleton type unit there is no type DIE so emit a zero offset. - Asm->OutStreamer.EmitIntValue(Ty ? Ty->getOffset() : 0, - sizeof(Ty->getOffset())); + Asm->OutStreamer->EmitIntValue(Ty ? Ty->getOffset() : 0, + sizeof(Ty->getOffset())); } bool DwarfTypeUnit::isDwoUnit() const { diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h index 7a5e47d..0d01a9e 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h @@ -63,15 +63,15 @@ public: }; //===----------------------------------------------------------------------===// -/// Unit - This dwarf writer support class manages information associated -/// with a source file. +/// This dwarf writer support class manages information associated with a +/// source file. class DwarfUnit { protected: - /// UniqueID - a numeric ID unique among all CUs in the module + /// A numeric ID unique among all CUs in the module unsigned UniqueID; - /// Node - MDNode for the compile unit. - DICompileUnit CUNode; + /// MDNode for the compile unit. + const DICompileUnit *CUNode; /// Unit debug information entry. DIE UnitDie; @@ -79,60 +79,62 @@ protected: /// Offset of the UnitDie from beginning of debug info section. unsigned DebugInfoOffset; - /// Asm - Target of Dwarf emission. + /// Target of Dwarf emission. AsmPrinter *Asm; // Holders for some common dwarf information. DwarfDebug *DD; DwarfFile *DU; - /// IndexTyDie - An anonymous type for index type. Owned by UnitDie. + /// An anonymous type for index type. Owned by UnitDie. DIE *IndexTyDie; - /// MDNodeToDieMap - Tracks the mapping of unit level debug information - /// variables to debug information entries. + /// Tracks the mapping of unit level debug information variables to debug + /// information entries. DenseMap<const MDNode *, DIE *> MDNodeToDieMap; - /// MDNodeToDIEEntryMap - Tracks the mapping of unit level debug information - /// descriptors to debug information entries using a DIEEntry proxy. + /// Tracks the mapping of unit level debug information descriptors to debug + /// information entries using a DIEEntry proxy. DenseMap<const MDNode *, DIEEntry *> MDNodeToDIEEntryMap; - /// DIEBlocks - A list of all the DIEBlocks in use. + /// A list of all the DIEBlocks in use. std::vector<DIEBlock *> DIEBlocks; - - /// DIELocs - A list of all the DIELocs in use. + + /// A list of all the DIELocs in use. std::vector<DIELoc *> DIELocs; - /// ContainingTypeMap - This map is used to keep track of subprogram DIEs that - /// need DW_AT_containing_type attribute. This attribute points to a DIE that + /// This map is used to keep track of subprogram DIEs that need + /// DW_AT_containing_type attribute. This attribute points to a DIE that /// corresponds to the MDNode mapped with the subprogram DIE. - DenseMap<DIE *, const MDNode *> ContainingTypeMap; + DenseMap<DIE *, const DINode *> ContainingTypeMap; - // DIEValueAllocator - All DIEValues are allocated through this allocator. + // All DIEValues are allocated through this allocator. BumpPtrAllocator DIEValueAllocator; - // DIEIntegerOne - A preallocated DIEValue because 1 is used frequently. + // A preallocated DIEValue because 1 is used frequently. DIEInteger *DIEIntegerOne; /// The section this unit will be emitted in. - const MCSection *Section; + MCSection *Section; - DwarfUnit(unsigned UID, dwarf::Tag, DICompileUnit CU, AsmPrinter *A, + DwarfUnit(unsigned UID, dwarf::Tag, const DICompileUnit *CU, AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU); - void initSection(const MCSection *Section); - - /// Add a string attribute data and value. + /// \brief Add a string attribute data and value. + /// + /// This is guaranteed to be in the local string pool instead of indirected. void addLocalString(DIE &Die, dwarf::Attribute Attribute, StringRef Str); void addIndexedString(DIE &Die, dwarf::Attribute Attribute, StringRef Str); - bool applySubprogramDefinitionAttributes(DISubprogram SP, DIE &SPDie); + bool applySubprogramDefinitionAttributes(const DISubprogram *SP, DIE &SPDie); public: virtual ~DwarfUnit(); - const MCSection *getSection() const { + void initSection(MCSection *Section); + + MCSection *getSection() const { assert(Section); return Section; } @@ -140,119 +142,133 @@ public: // Accessors. AsmPrinter* getAsmPrinter() const { return Asm; } unsigned getUniqueID() const { return UniqueID; } - uint16_t getLanguage() const { return CUNode.getLanguage(); } - DICompileUnit getCUNode() const { return CUNode; } + uint16_t getLanguage() const { return CUNode->getSourceLanguage(); } + const DICompileUnit *getCUNode() const { return CUNode; } DIE &getUnitDie() { return UnitDie; } unsigned getDebugInfoOffset() const { return DebugInfoOffset; } void setDebugInfoOffset(unsigned DbgInfoOff) { DebugInfoOffset = DbgInfoOff; } - /// hasContent - Return true if this compile unit has something to write out. + /// \brief Return true if this compile unit has something to write out. bool hasContent() const { return !UnitDie.getChildren().empty(); } - /// getParentContextString - Get a string containing the language specific - /// context for a global name. - std::string getParentContextString(DIScope Context) const; + /// \brief Get string containing language specific context for a global name. + /// + /// Walks the metadata parent chain in a language specific manner (using the + /// compile unit language) and returns it as a string. This is done at the + /// metadata level because DIEs may not currently have been added to the + /// parent context and walking the DIEs looking for names is more expensive + /// than walking the metadata. + std::string getParentContextString(const DIScope *Context) const; /// Add a new global name to the compile unit. - virtual void addGlobalName(StringRef Name, DIE &Die, DIScope Context) {} + virtual void addGlobalName(StringRef Name, DIE &Die, const DIScope *Context) { + } /// Add a new global type to the compile unit. - virtual void addGlobalType(DIType Ty, const DIE &Die, DIScope Context) {} + virtual void addGlobalType(const DIType *Ty, const DIE &Die, + const DIScope *Context) {} - /// addAccelNamespace - Add a new name to the namespace accelerator table. + /// \brief Add a new name to the namespace accelerator table. void addAccelNamespace(StringRef Name, const DIE &Die); - /// getDIE - Returns the debug information entry map slot for the - /// specified debug variable. We delegate the request to DwarfDebug - /// when the MDNode can be part of the type system, since DIEs for - /// the type system can be shared across CUs and the mappings are - /// kept in DwarfDebug. - DIE *getDIE(DIDescriptor D) const; + /// \brief Returns the DIE map slot for the specified debug variable. + /// + /// We delegate the request to DwarfDebug when the MDNode can be part of the + /// type system, since DIEs for the type system can be shared across CUs and + /// the mappings are kept in DwarfDebug. + DIE *getDIE(const DINode *D) const; - /// getDIELoc - Returns a fresh newly allocated DIELoc. + /// \brief Returns a fresh newly allocated DIELoc. DIELoc *getDIELoc() { return new (DIEValueAllocator) DIELoc(); } - /// insertDIE - Insert DIE into the map. We delegate the request to DwarfDebug - /// when the MDNode can be part of the type system, since DIEs for - /// the type system can be shared across CUs and the mappings are - /// kept in DwarfDebug. - void insertDIE(DIDescriptor Desc, DIE *D); + /// \brief Insert DIE into the map. + /// + /// We delegate the request to DwarfDebug when the MDNode can be part of the + /// type system, since DIEs for the type system can be shared across CUs and + /// the mappings are kept in DwarfDebug. + void insertDIE(const DINode *Desc, DIE *D); - /// addFlag - Add a flag that is true to the DIE. + /// \brief Add a flag that is true to the DIE. void addFlag(DIE &Die, dwarf::Attribute Attribute); - /// addUInt - Add an unsigned integer attribute data and value. + /// \brief Add an unsigned integer attribute data and value. void addUInt(DIE &Die, dwarf::Attribute Attribute, Optional<dwarf::Form> Form, uint64_t Integer); void addUInt(DIE &Block, dwarf::Form Form, uint64_t Integer); - /// addSInt - Add an signed integer attribute data and value. + /// \brief Add an signed integer attribute data and value. void addSInt(DIE &Die, dwarf::Attribute Attribute, Optional<dwarf::Form> Form, int64_t Integer); void addSInt(DIELoc &Die, Optional<dwarf::Form> Form, int64_t Integer); - /// addString - Add a string attribute data and value. + /// \brief Add a string attribute data and value. + /// + /// We always emit a reference to the string pool instead of immediate + /// strings so that DIEs have more predictable sizes. In the case of split + /// dwarf we emit an index into another table which gets us the static offset + /// into the string table. void addString(DIE &Die, dwarf::Attribute Attribute, StringRef Str); - /// addLabel - Add a Dwarf label attribute data and value. + /// \brief Add a Dwarf label attribute data and value. void addLabel(DIE &Die, dwarf::Attribute Attribute, dwarf::Form Form, const MCSymbol *Label); void addLabel(DIELoc &Die, dwarf::Form Form, const MCSymbol *Label); - /// addSectionOffset - Add an offset into a section attribute data and value. - /// + /// \brief Add an offset into a section attribute data and value. void addSectionOffset(DIE &Die, dwarf::Attribute Attribute, uint64_t Integer); - /// addOpAddress - Add a dwarf op address data and value using the - /// form given and an op of either DW_FORM_addr or DW_FORM_GNU_addr_index. + /// \brief Add a dwarf op address data and value using the form given and an + /// op of either DW_FORM_addr or DW_FORM_GNU_addr_index. void addOpAddress(DIELoc &Die, const MCSymbol *Label); - /// addLabelDelta - Add a label delta attribute data and value. + /// \brief Add a label delta attribute data and value. void addLabelDelta(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Hi, const MCSymbol *Lo); - /// addDIEEntry - Add a DIE attribute data and value. + /// \brief Add a DIE attribute data and value. void addDIEEntry(DIE &Die, dwarf::Attribute Attribute, DIE &Entry); - /// addDIEEntry - Add a DIE attribute data and value. + /// \brief Add a DIE attribute data and value. void addDIEEntry(DIE &Die, dwarf::Attribute Attribute, DIEEntry *Entry); void addDIETypeSignature(DIE &Die, const DwarfTypeUnit &Type); - /// addBlock - Add block data. + /// \brief Add block data. void addBlock(DIE &Die, dwarf::Attribute Attribute, DIELoc *Block); - /// addBlock - Add block data. + /// \brief Add block data. void addBlock(DIE &Die, dwarf::Attribute Attribute, DIEBlock *Block); - /// addSourceLine - Add location information to specified debug information - /// entry. + /// \brief Add location information to specified debug information entry. void addSourceLine(DIE &Die, unsigned Line, StringRef File, StringRef Directory); - void addSourceLine(DIE &Die, DIVariable V); - void addSourceLine(DIE &Die, DIGlobalVariable G); - void addSourceLine(DIE &Die, DISubprogram SP); - void addSourceLine(DIE &Die, DIType Ty); - void addSourceLine(DIE &Die, DINameSpace NS); - void addSourceLine(DIE &Die, DIObjCProperty Ty); - - /// addConstantValue - Add constant value entry in variable DIE. - void addConstantValue(DIE &Die, const MachineOperand &MO, DIType Ty); - void addConstantValue(DIE &Die, const ConstantInt *CI, DIType Ty); - void addConstantValue(DIE &Die, const APInt &Val, DIType Ty); + void addSourceLine(DIE &Die, const DILocalVariable *V); + void addSourceLine(DIE &Die, const DIGlobalVariable *G); + void addSourceLine(DIE &Die, const DISubprogram *SP); + void addSourceLine(DIE &Die, const DIType *Ty); + void addSourceLine(DIE &Die, const DINamespace *NS); + void addSourceLine(DIE &Die, const DIObjCProperty *Ty); + + /// \brief Add constant value entry in variable DIE. + void addConstantValue(DIE &Die, const MachineOperand &MO, const DIType *Ty); + void addConstantValue(DIE &Die, const ConstantInt *CI, const DIType *Ty); + void addConstantValue(DIE &Die, const APInt &Val, const DIType *Ty); void addConstantValue(DIE &Die, const APInt &Val, bool Unsigned); void addConstantValue(DIE &Die, bool Unsigned, uint64_t Val); - /// addConstantFPValue - Add constant value entry in variable DIE. + /// \brief Add constant value entry in variable DIE. void addConstantFPValue(DIE &Die, const MachineOperand &MO); void addConstantFPValue(DIE &Die, const ConstantFP *CFP); - /// addTemplateParams - Add template parameters in buffer. - void addTemplateParams(DIE &Buffer, DIArray TParams); + /// \brief Add a linkage name, if it isn't empty. + void addLinkageName(DIE &Die, StringRef LinkageName); + + /// \brief Add template parameters in buffer. + void addTemplateParams(DIE &Buffer, DINodeArray TParams); /// \brief Add register operand. /// \returns false if the register does not exist, e.g., because it was never @@ -266,51 +282,45 @@ public: bool addRegisterOffset(DIELoc &TheDie, unsigned Reg, int64_t Offset); // FIXME: Should be reformulated in terms of addComplexAddress. - /// addBlockByrefAddress - Start with the address based on the location - /// provided, and generate the DWARF information necessary to find the - /// actual Block variable (navigating the Block struct) based on the - /// starting location. Add the DWARF information to the die. Obsolete, - /// please use addComplexAddress instead. + /// Start with the address based on the location provided, and generate the + /// DWARF information necessary to find the actual Block variable (navigating + /// the Block struct) based on the starting location. Add the DWARF + /// information to the die. Obsolete, please use addComplexAddress instead. void addBlockByrefAddress(const DbgVariable &DV, DIE &Die, dwarf::Attribute Attribute, const MachineLocation &Location); - /// addType - Add a new type attribute to the specified entity. This takes - /// and attribute parameter because DW_AT_friend attributes are also - /// type references. - void addType(DIE &Entity, DIType Ty, + /// \brief Add a new type attribute to the specified entity. + /// + /// This takes and attribute parameter because DW_AT_friend attributes are + /// also type references. + void addType(DIE &Entity, const DIType *Ty, dwarf::Attribute Attribute = dwarf::DW_AT_type); - /// getOrCreateNameSpace - Create a DIE for DINameSpace. - DIE *getOrCreateNameSpace(DINameSpace NS); - - /// getOrCreateSubprogramDIE - Create new DIE using SP. - DIE *getOrCreateSubprogramDIE(DISubprogram SP, bool Minimal = false); + DIE *getOrCreateNameSpace(const DINamespace *NS); + DIE *getOrCreateSubprogramDIE(const DISubprogram *SP, bool Minimal = false); - void applySubprogramAttributes(DISubprogram SP, DIE &SPDie, + void applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie, bool Minimal = false); - /// getOrCreateTypeDIE - Find existing DIE or create new DIE for the - /// given DIType. + /// \brief Find existing DIE or create new DIE for the given type. DIE *getOrCreateTypeDIE(const MDNode *N); - /// getOrCreateContextDIE - Get context owner's DIE. - DIE *createTypeDIE(DICompositeType Ty); + /// \brief Get context owner's DIE. + DIE *createTypeDIE(const DICompositeType *Ty); - /// getOrCreateContextDIE - Get context owner's DIE. - DIE *getOrCreateContextDIE(DIScope Context); + /// \brief Get context owner's DIE. + DIE *getOrCreateContextDIE(const DIScope *Context); - /// constructContainingTypeDIEs - Construct DIEs for types that contain - /// vtables. + /// \brief Construct DIEs for types that contain vtables. void constructContainingTypeDIEs(); - /// constructSubprogramArguments - Construct function argument DIEs. - void constructSubprogramArguments(DIE &Buffer, DITypeArray Args); + /// \brief Construct function argument DIEs. + void constructSubprogramArguments(DIE &Buffer, DITypeRefArray Args); /// Create a DIE with the given Tag, add the DIE to its parent, and /// call insertDIE if MD is not null. - DIE &createAndAddDIE(unsigned Tag, DIE &Parent, - DIDescriptor N = DIDescriptor()); + DIE &createAndAddDIE(unsigned Tag, DIE &Parent, const DINode *N = nullptr); /// Compute the size of a header for this unit, not including the initial /// length field. @@ -321,85 +331,68 @@ public: } /// Emit the header for this unit, not including the initial length field. - virtual void emitHeader(const MCSymbol *ASectionSym) const; + virtual void emitHeader(bool UseOffsets); virtual DwarfCompileUnit &getCU() = 0; - /// constructTypeDIE - Construct type DIE from DICompositeType. - void constructTypeDIE(DIE &Buffer, DICompositeType CTy); + void constructTypeDIE(DIE &Buffer, const DICompositeType *CTy); protected: - /// getOrCreateStaticMemberDIE - Create new static data member DIE. - DIE *getOrCreateStaticMemberDIE(DIDerivedType DT); + /// \brief Create new static data member DIE. + DIE *getOrCreateStaticMemberDIE(const DIDerivedType *DT); /// Look up the source ID with the given directory and source file names. If /// none currently exists, create a new ID and insert it in the line table. virtual unsigned getOrCreateSourceID(StringRef File, StringRef Directory) = 0; - /// resolve - Look in the DwarfDebug map for the MDNode that - /// corresponds to the reference. - template <typename T> T resolve(DIRef<T> Ref) const { + /// \brief Look in the DwarfDebug map for the MDNode that corresponds to the + /// reference. + template <typename T> T *resolve(TypedDINodeRef<T> Ref) const { return DD->resolve(Ref); } private: - /// constructTypeDIE - Construct basic type die from DIBasicType. - void constructTypeDIE(DIE &Buffer, DIBasicType BTy); - - /// constructTypeDIE - Construct derived type die from DIDerivedType. - void constructTypeDIE(DIE &Buffer, DIDerivedType DTy); - - /// constructSubrangeDIE - Construct subrange DIE from DISubrange. - void constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy); - - /// constructArrayTypeDIE - Construct array type DIE from DICompositeType. - void constructArrayTypeDIE(DIE &Buffer, DICompositeType CTy); - - /// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator. - void constructEnumTypeDIE(DIE &Buffer, DICompositeType CTy); - - /// constructMemberDIE - Construct member DIE from DIDerivedType. - void constructMemberDIE(DIE &Buffer, DIDerivedType DT); - - /// constructTemplateTypeParameterDIE - Construct new DIE for the given - /// DITemplateTypeParameter. + void constructTypeDIE(DIE &Buffer, const DIBasicType *BTy); + void constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy); + void constructTypeDIE(DIE &Buffer, const DISubroutineType *DTy); + void constructSubrangeDIE(DIE &Buffer, const DISubrange *SR, DIE *IndexTy); + void constructArrayTypeDIE(DIE &Buffer, const DICompositeType *CTy); + void constructEnumTypeDIE(DIE &Buffer, const DICompositeType *CTy); + void constructMemberDIE(DIE &Buffer, const DIDerivedType *DT); void constructTemplateTypeParameterDIE(DIE &Buffer, - DITemplateTypeParameter TP); - - /// constructTemplateValueParameterDIE - Construct new DIE for the given - /// DITemplateValueParameter. + const DITemplateTypeParameter *TP); void constructTemplateValueParameterDIE(DIE &Buffer, - DITemplateValueParameter TVP); + const DITemplateValueParameter *TVP); - /// getLowerBoundDefault - Return the default lower bound for an array. If the - /// DWARF version doesn't handle the language, return -1. + /// \brief Return the default lower bound for an array. + /// + /// If the DWARF version doesn't handle the language, return -1. int64_t getDefaultLowerBound() const; - /// getDIEEntry - Returns the debug information entry for the specified - /// debug variable. + /// \brief Returns the DIE entry for the specified debug variable. DIEEntry *getDIEEntry(const MDNode *N) const { return MDNodeToDIEEntryMap.lookup(N); } - /// insertDIEEntry - Insert debug information entry into the map. + /// \brief Insert debug information entry into the map. void insertDIEEntry(const MDNode *N, DIEEntry *E) { MDNodeToDIEEntryMap.insert(std::make_pair(N, E)); } - // getIndexTyDie - Get an anonymous type for index type. + /// \brief Get an anonymous type for index type. DIE *getIndexTyDie(); - // setIndexTyDie - Set D as anonymous type for index which can be reused - // later. + /// \brief Set D as anonymous type for index which can be reused later. void setIndexTyDie(DIE *D) { IndexTyDie = D; } - /// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug - /// information entry. + /// \brief Creates a new DIEEntry to be a proxy for a debug information + /// entry. DIEEntry *createDIEEntry(DIE &Entry); /// If this is a named finished type then include it in the list of types for /// the accelerator tables. - void updateAcceleratorTables(DIScope Context, DIType Ty, const DIE &TyDIE); + void updateAcceleratorTables(const DIScope *Context, const DIType *Ty, + const DIE &TyDIE); virtual bool isDwoUnit() const = 0; }; @@ -423,12 +416,11 @@ public: void setType(const DIE *Ty) { this->Ty = Ty; } /// Emit the header for this unit, not including the initial length field. - void emitHeader(const MCSymbol *ASectionSym) const override; + void emitHeader(bool UseOffsets) override; unsigned getHeaderSize() const override { return DwarfUnit::getHeaderSize() + sizeof(uint64_t) + // Type Signature sizeof(uint32_t); // Type DIE Offset } - using DwarfUnit::initSection; DwarfCompileUnit &getCU() override { return CU; } }; } // end llvm namespace diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp index f112120..1be3fd7 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp @@ -121,7 +121,8 @@ computeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads, for (unsigned J = NumShared, M = TypeIds.size(); J != M; ++J) { int TypeID = TypeIds[J]; assert(-1 - TypeID < (int)FilterOffsets.size() && "Unknown filter id!"); - int ValueForTypeID = TypeID < 0 ? FilterOffsets[-1 - TypeID] : TypeID; + int ValueForTypeID = + isFilterEHSelector(TypeID) ? FilterOffsets[-1 - TypeID] : TypeID; unsigned SizeTypeID = getSLEB128Size(ValueForTypeID); int NextAction = SizeAction ? -(SizeAction + SizeTypeID) : 0; @@ -187,20 +188,12 @@ bool EHStreamer::callToNoUnwindFunction(const MachineInstr *MI) { return MarkedNoUnwind; } -/// Compute the call-site table. The entry for an invoke has a try-range -/// containing the call, a non-zero landing pad, and an appropriate action. The -/// entry for an ordinary call has a try-range containing the call and zero for -/// the landing pad and the action. Calls marked 'nounwind' have no entry and -/// must not be contained in the try-range of any entry - they form gaps in the -/// table. Entries must be ordered by try-range address. -void EHStreamer:: -computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, - const SmallVectorImpl<const LandingPadInfo *> &LandingPads, - const SmallVectorImpl<unsigned> &FirstActions) { +void EHStreamer::computePadMap( + const SmallVectorImpl<const LandingPadInfo *> &LandingPads, + RangeMapType &PadMap) { // Invokes and nounwind calls have entries in PadMap (due to being bracketed // by try-range labels when lowered). Ordinary calls do not, so appropriate // try-ranges for them need be deduced so we can put them in the LSDA. - RangeMapType PadMap; for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) { const LandingPadInfo *LandingPad = LandingPads[i]; for (unsigned j = 0, E = LandingPad->BeginLabels.size(); j != E; ++j) { @@ -210,6 +203,20 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, PadMap[BeginLabel] = P; } } +} + +/// Compute the call-site table. The entry for an invoke has a try-range +/// containing the call, a non-zero landing pad, and an appropriate action. The +/// entry for an ordinary call has a try-range containing the call and zero for +/// the landing pad and the action. Calls marked 'nounwind' have no entry and +/// must not be contained in the try-range of any entry - they form gaps in the +/// table. Entries must be ordered by try-range address. +void EHStreamer:: +computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, + const SmallVectorImpl<const LandingPadInfo *> &LandingPads, + const SmallVectorImpl<unsigned> &FirstActions) { + RangeMapType PadMap; + computePadMap(LandingPads, PadMap); // The end label of the previous invoke or nounwind try-range. MCSymbol *LastLabel = nullptr; @@ -252,7 +259,7 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, // instruction between the previous try-range and this one may throw, // create a call-site entry with no landing pad for the region between the // try-ranges. - if (SawPotentiallyThrowing && !IsSJLJ) { + if (SawPotentiallyThrowing && Asm->MAI->usesCFIForEH()) { CallSiteEntry Site = { LastLabel, BeginLabel, nullptr, 0 }; CallSites.push_back(Site); PreviousIsInvoke = false; @@ -269,14 +276,14 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, CallSiteEntry Site = { BeginLabel, LastLabel, - LandingPad->LandingPadLabel, + LandingPad, FirstActions[P.PadIndex] }; // Try to merge with the previous call-site. SJLJ doesn't do this if (PreviousIsInvoke && !IsSJLJ) { CallSiteEntry &Prev = CallSites.back(); - if (Site.PadLabel == Prev.PadLabel && Site.Action == Prev.Action) { + if (Site.LPad == Prev.LPad && Site.Action == Prev.Action) { // Extend the range of the previous entry. Prev.EndLabel = Site.EndLabel; continue; @@ -381,7 +388,7 @@ void EHStreamer::emitExceptionTable() { } // Type infos. - const MCSection *LSDASection = Asm->getObjFileLowering().getLSDASection(); + MCSection *LSDASection = Asm->getObjFileLowering().getLSDASection(); unsigned TTypeEncoding; unsigned TypeFormatSize; @@ -427,20 +434,15 @@ void EHStreamer::emitExceptionTable() { // Sometimes we want not to emit the data into separate section (e.g. ARM // EHABI). In this case LSDASection will be NULL. if (LSDASection) - Asm->OutStreamer.SwitchSection(LSDASection); + Asm->OutStreamer->SwitchSection(LSDASection); Asm->EmitAlignment(2); // Emit the LSDA. MCSymbol *GCCETSym = - Asm->OutContext.GetOrCreateSymbol(Twine("GCC_except_table")+ + Asm->OutContext.getOrCreateSymbol(Twine("GCC_except_table")+ Twine(Asm->getFunctionNumber())); - Asm->OutStreamer.EmitLabel(GCCETSym); - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("exception", - Asm->getFunctionNumber())); - - if (IsSJLJ) - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("_LSDA_", - Asm->getFunctionNumber())); + Asm->OutStreamer->EmitLabel(GCCETSym); + Asm->OutStreamer->EmitLabel(Asm->getCurExceptionSym()); // Emit the LSDA header. Asm->EmitEncodingByte(dwarf::DW_EH_PE_omit, "@LPStart"); @@ -484,7 +486,7 @@ void EHStreamer::emitExceptionTable() { SizeAlign = 0; } - bool VerboseAsm = Asm->OutStreamer.isVerboseAsm(); + bool VerboseAsm = Asm->OutStreamer->isVerboseAsm(); // SjLj Exception handling if (IsSJLJ) { @@ -502,8 +504,8 @@ void EHStreamer::emitExceptionTable() { // Offset of the landing pad, counted in 16-byte bundles relative to the // @LPStart address. if (VerboseAsm) { - Asm->OutStreamer.AddComment(">> Call Site " + Twine(idx) + " <<"); - Asm->OutStreamer.AddComment(" On exception at call site "+Twine(idx)); + Asm->OutStreamer->AddComment(">> Call Site " + Twine(idx) + " <<"); + Asm->OutStreamer->AddComment(" On exception at call site "+Twine(idx)); } Asm->EmitULEB128(idx); @@ -512,10 +514,10 @@ void EHStreamer::emitExceptionTable() { // the action table), and 0 indicates that there are no actions. if (VerboseAsm) { if (S.Action == 0) - Asm->OutStreamer.AddComment(" Action: cleanup"); + Asm->OutStreamer->AddComment(" Action: cleanup"); else - Asm->OutStreamer.AddComment(" Action: " + - Twine((S.Action - 1) / 2 + 1)); + Asm->OutStreamer->AddComment(" Action: " + + Twine((S.Action - 1) / 2 + 1)); } Asm->EmitULEB128(S.Action); } @@ -551,40 +553,38 @@ void EHStreamer::emitExceptionTable() { I = CallSites.begin(), E = CallSites.end(); I != E; ++I) { const CallSiteEntry &S = *I; - MCSymbol *EHFuncBeginSym = - Asm->GetTempSymbol("eh_func_begin", Asm->getFunctionNumber()); + MCSymbol *EHFuncBeginSym = Asm->getFunctionBegin(); MCSymbol *BeginLabel = S.BeginLabel; if (!BeginLabel) BeginLabel = EHFuncBeginSym; MCSymbol *EndLabel = S.EndLabel; if (!EndLabel) - EndLabel = Asm->GetTempSymbol("eh_func_end", Asm->getFunctionNumber()); - + EndLabel = Asm->getFunctionEnd(); // Offset of the call site relative to the previous call site, counted in // number of 16-byte bundles. The first call site is counted relative to // the start of the procedure fragment. if (VerboseAsm) - Asm->OutStreamer.AddComment(">> Call Site " + Twine(++Entry) + " <<"); + Asm->OutStreamer->AddComment(">> Call Site " + Twine(++Entry) + " <<"); Asm->EmitLabelDifference(BeginLabel, EHFuncBeginSym, 4); if (VerboseAsm) - Asm->OutStreamer.AddComment(Twine(" Call between ") + - BeginLabel->getName() + " and " + - EndLabel->getName()); + Asm->OutStreamer->AddComment(Twine(" Call between ") + + BeginLabel->getName() + " and " + + EndLabel->getName()); Asm->EmitLabelDifference(EndLabel, BeginLabel, 4); // Offset of the landing pad, counted in 16-byte bundles relative to the // @LPStart address. - if (!S.PadLabel) { + if (!S.LPad) { if (VerboseAsm) - Asm->OutStreamer.AddComment(" has no landing pad"); - Asm->OutStreamer.EmitIntValue(0, 4/*size*/); + Asm->OutStreamer->AddComment(" has no landing pad"); + Asm->OutStreamer->EmitIntValue(0, 4/*size*/); } else { if (VerboseAsm) - Asm->OutStreamer.AddComment(Twine(" jumps to ") + - S.PadLabel->getName()); - Asm->EmitLabelDifference(S.PadLabel, EHFuncBeginSym, 4); + Asm->OutStreamer->AddComment(Twine(" jumps to ") + + S.LPad->LandingPadLabel->getName()); + Asm->EmitLabelDifference(S.LPad->LandingPadLabel, EHFuncBeginSym, 4); } // Offset of the first associated action record, relative to the start of @@ -592,10 +592,10 @@ void EHStreamer::emitExceptionTable() { // the action table), and 0 indicates that there are no actions. if (VerboseAsm) { if (S.Action == 0) - Asm->OutStreamer.AddComment(" On action: cleanup"); + Asm->OutStreamer->AddComment(" On action: cleanup"); else - Asm->OutStreamer.AddComment(" On action: " + - Twine((S.Action - 1) / 2 + 1)); + Asm->OutStreamer->AddComment(" On action: " + + Twine((S.Action - 1) / 2 + 1)); } Asm->EmitULEB128(S.Action); } @@ -609,7 +609,7 @@ void EHStreamer::emitExceptionTable() { if (VerboseAsm) { // Emit comments that decode the action table. - Asm->OutStreamer.AddComment(">> Action Record " + Twine(++Entry) + " <<"); + Asm->OutStreamer->AddComment(">> Action Record " + Twine(++Entry) + " <<"); } // Type Filter @@ -618,13 +618,13 @@ void EHStreamer::emitExceptionTable() { // type of the catch clauses or the types in the exception specification. if (VerboseAsm) { if (Action.ValueForTypeID > 0) - Asm->OutStreamer.AddComment(" Catch TypeInfo " + - Twine(Action.ValueForTypeID)); + Asm->OutStreamer->AddComment(" Catch TypeInfo " + + Twine(Action.ValueForTypeID)); else if (Action.ValueForTypeID < 0) - Asm->OutStreamer.AddComment(" Filter TypeInfo " + - Twine(Action.ValueForTypeID)); + Asm->OutStreamer->AddComment(" Filter TypeInfo " + + Twine(Action.ValueForTypeID)); else - Asm->OutStreamer.AddComment(" Cleanup"); + Asm->OutStreamer->AddComment(" Cleanup"); } Asm->EmitSLEB128(Action.ValueForTypeID); @@ -634,10 +634,10 @@ void EHStreamer::emitExceptionTable() { // or 0 if there is no next action record. if (VerboseAsm) { if (Action.NextAction == 0) { - Asm->OutStreamer.AddComment(" No further actions"); + Asm->OutStreamer->AddComment(" No further actions"); } else { unsigned NextAction = Entry + (Action.NextAction + 1) / 2; - Asm->OutStreamer.AddComment(" Continue to action "+Twine(NextAction)); + Asm->OutStreamer->AddComment(" Continue to action "+Twine(NextAction)); } } Asm->EmitSLEB128(Action.NextAction); @@ -652,13 +652,13 @@ void EHStreamer::emitTypeInfos(unsigned TTypeEncoding) { const std::vector<const GlobalValue *> &TypeInfos = MMI->getTypeInfos(); const std::vector<unsigned> &FilterIds = MMI->getFilterIds(); - bool VerboseAsm = Asm->OutStreamer.isVerboseAsm(); + bool VerboseAsm = Asm->OutStreamer->isVerboseAsm(); int Entry = 0; // Emit the Catch TypeInfos. if (VerboseAsm && !TypeInfos.empty()) { - Asm->OutStreamer.AddComment(">> Catch TypeInfos <<"); - Asm->OutStreamer.AddBlankLine(); + Asm->OutStreamer->AddComment(">> Catch TypeInfos <<"); + Asm->OutStreamer->AddBlankLine(); Entry = TypeInfos.size(); } @@ -666,14 +666,14 @@ void EHStreamer::emitTypeInfos(unsigned TTypeEncoding) { I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) { const GlobalValue *GV = *I; if (VerboseAsm) - Asm->OutStreamer.AddComment("TypeInfo " + Twine(Entry--)); + Asm->OutStreamer->AddComment("TypeInfo " + Twine(Entry--)); Asm->EmitTTypeReference(GV, TTypeEncoding); } // Emit the Exception Specifications. if (VerboseAsm && !FilterIds.empty()) { - Asm->OutStreamer.AddComment(">> Filter TypeInfos <<"); - Asm->OutStreamer.AddBlankLine(); + Asm->OutStreamer->AddComment(">> Filter TypeInfos <<"); + Asm->OutStreamer->AddBlankLine(); Entry = 0; } for (std::vector<unsigned>::const_iterator @@ -681,26 +681,10 @@ void EHStreamer::emitTypeInfos(unsigned TTypeEncoding) { unsigned TypeID = *I; if (VerboseAsm) { --Entry; - if (TypeID != 0) - Asm->OutStreamer.AddComment("FilterInfo " + Twine(Entry)); + if (isFilterEHSelector(TypeID)) + Asm->OutStreamer->AddComment("FilterInfo " + Twine(Entry)); } Asm->EmitULEB128(TypeID); } } - -/// Emit all exception information that should come after the content. -void EHStreamer::endModule() { - llvm_unreachable("Should be implemented"); -} - -/// Gather pre-function exception information. Assumes it's being emitted -/// immediately after the function entry point. -void EHStreamer::beginFunction(const MachineFunction *MF) { - llvm_unreachable("Should be implemented"); -} - -/// Gather and emit post-function exception information. -void EHStreamer::endFunction(const MachineFunction *) { - llvm_unreachable("Should be implemented"); -} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h index e93055c..65973fa 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h @@ -23,6 +23,8 @@ class MachineModuleInfo; class MachineInstr; class MachineFunction; class AsmPrinter; +class MCSymbol; +class MCSymbolRefExpr; template <typename T> class SmallVectorImpl; @@ -60,11 +62,11 @@ protected: /// Structure describing an entry in the call-site table. struct CallSiteEntry { // The 'try-range' is BeginLabel .. EndLabel. - MCSymbol *BeginLabel; // zero indicates the start of the function. - MCSymbol *EndLabel; // zero indicates the end of the function. + MCSymbol *BeginLabel; // Null indicates the start of the function. + MCSymbol *EndLabel; // Null indicates the end of the function. - // The landing pad starts at PadLabel. - MCSymbol *PadLabel; // zero indicates that there is no landing pad. + // LPad contains the landing pad start labels. + const LandingPadInfo *LPad; // Null indicates that there is no landing pad. unsigned Action; }; @@ -78,13 +80,15 @@ protected: /// `false' otherwise. bool callToNoUnwindFunction(const MachineInstr *MI); + void computePadMap(const SmallVectorImpl<const LandingPadInfo *> &LandingPads, + RangeMapType &PadMap); + /// Compute the call-site table. The entry for an invoke has a try-range /// containing the call, a non-zero landing pad and an appropriate action. /// The entry for an ordinary call has a try-range containing the call and /// zero for the landing pad and the action. Calls marked 'nounwind' have /// no entry and must not be contained in the try-range of any entry - they /// form gaps in the table. Entries must be ordered by try-range address. - void computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, const SmallVectorImpl<const LandingPadInfo *> &LPs, const SmallVectorImpl<unsigned> &FirstActions); @@ -112,19 +116,16 @@ protected: virtual void emitTypeInfos(unsigned TTypeEncoding); + // Helpers for for identifying what kind of clause an EH typeid or selector + // corresponds to. Negative selectors are for filter clauses, the zero + // selector is for cleanups, and positive selectors are for catch clauses. + static bool isFilterEHSelector(int Selector) { return Selector < 0; } + static bool isCleanupEHSelector(int Selector) { return Selector == 0; } + static bool isCatchEHSelector(int Selector) { return Selector > 0; } + public: EHStreamer(AsmPrinter *A); - virtual ~EHStreamer(); - - /// Emit all exception information that should come after the content. - void endModule() override; - - /// Gather pre-function exception information. Assumes being emitted - /// immediately after the function entry point. - void beginFunction(const MachineFunction *MF) override; - - /// Gather and emit post-function exception information. - void endFunction(const MachineFunction *) override; + ~EHStreamer() override; // Unused. void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp index e293acd..eb9e4c1 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp @@ -34,33 +34,30 @@ using namespace llvm; namespace { - class ErlangGCPrinter : public GCMetadataPrinter { - public: - void finishAssembly(Module &M, GCModuleInfo &Info, - AsmPrinter &AP) override; - }; - +class ErlangGCPrinter : public GCMetadataPrinter { +public: + void finishAssembly(Module &M, GCModuleInfo &Info, AsmPrinter &AP) override; +}; } static GCMetadataPrinterRegistry::Add<ErlangGCPrinter> -X("erlang", "erlang-compatible garbage collector"); + X("erlang", "erlang-compatible garbage collector"); -void llvm::linkErlangGCPrinter() { } +void llvm::linkErlangGCPrinter() {} void ErlangGCPrinter::finishAssembly(Module &M, GCModuleInfo &Info, AsmPrinter &AP) { - MCStreamer &OS = AP.OutStreamer; - unsigned IntPtrSize = - AP.TM.getSubtargetImpl()->getDataLayout()->getPointerSize(); + MCStreamer &OS = *AP.OutStreamer; + unsigned IntPtrSize = AP.TM.getDataLayout()->getPointerSize(); // Put this in a custom .note section. - AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getContext() - .getELFSection(".note.gc", ELF::SHT_PROGBITS, 0, - SectionKind::getDataRel())); + OS.SwitchSection( + AP.getObjFileLowering().getContext().getELFSection(".note.gc", + ELF::SHT_PROGBITS, 0)); // For each function... for (GCModuleInfo::FuncInfoVec::iterator FI = Info.funcinfo_begin(), - IE = Info.funcinfo_end(); + IE = Info.funcinfo_end(); FI != IE; ++FI) { GCFunctionInfo &MD = **FI; if (MD.getStrategy().getName() != getStrategy().getName()) @@ -91,7 +88,7 @@ void ErlangGCPrinter::finishAssembly(Module &M, GCModuleInfo &Info, // Emit the address of the safe point. OS.AddComment("safe point address"); MCSymbol *Label = PI->Label; - AP.EmitLabelPlusOffset(Label/*Hi*/, 0/*Offset*/, 4/*Size*/); + AP.EmitLabelPlusOffset(Label /*Hi*/, 0 /*Offset*/, 4 /*Size*/); } // Stack information never change in safe points! Only print info from the @@ -104,8 +101,9 @@ void ErlangGCPrinter::finishAssembly(Module &M, GCModuleInfo &Info, // Emit stack arity, i.e. the number of stacked arguments. unsigned RegisteredArgs = IntPtrSize == 4 ? 5 : 6; - unsigned StackArity = MD.getFunction().arg_size() > RegisteredArgs ? - MD.getFunction().arg_size() - RegisteredArgs : 0; + unsigned StackArity = MD.getFunction().arg_size() > RegisteredArgs + ? MD.getFunction().arg_size() - RegisteredArgs + : 0; OS.AddComment("stack arity"); AP.EmitInt16(StackArity); @@ -116,7 +114,7 @@ void ErlangGCPrinter::finishAssembly(Module &M, GCModuleInfo &Info, // And for each live root... for (GCFunctionInfo::live_iterator LI = MD.live_begin(PI), LE = MD.live_end(PI); - LI != LE; ++LI) { + LI != LE; ++LI) { // Emit live root's offset within the stack frame. OS.AddComment("stack index (offset / wordsize)"); AP.EmitInt16(LI->StackOffset / IntPtrSize); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp index ddb14a0..802456b 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp @@ -32,20 +32,17 @@ using namespace llvm; namespace { - class OcamlGCMetadataPrinter : public GCMetadataPrinter { - public: - void beginAssembly(Module &M, GCModuleInfo &Info, - AsmPrinter &AP) override; - void finishAssembly(Module &M, GCModuleInfo &Info, - AsmPrinter &AP) override; - }; - +class OcamlGCMetadataPrinter : public GCMetadataPrinter { +public: + void beginAssembly(Module &M, GCModuleInfo &Info, AsmPrinter &AP) override; + void finishAssembly(Module &M, GCModuleInfo &Info, AsmPrinter &AP) override; +}; } static GCMetadataPrinterRegistry::Add<OcamlGCMetadataPrinter> -Y("ocaml", "ocaml 3.10-compatible collector"); + Y("ocaml", "ocaml 3.10-compatible collector"); -void llvm::linkOcamlGCPrinter() { } +void llvm::linkOcamlGCPrinter() {} static void EmitCamlGlobal(const Module &M, AsmPrinter &AP, const char *Id) { const std::string &MId = M.getModuleIdentifier(); @@ -63,18 +60,18 @@ static void EmitCamlGlobal(const Module &M, AsmPrinter &AP, const char *Id) { SmallString<128> TmpStr; AP.Mang->getNameWithPrefix(TmpStr, SymName); - MCSymbol *Sym = AP.OutContext.GetOrCreateSymbol(TmpStr); + MCSymbol *Sym = AP.OutContext.getOrCreateSymbol(TmpStr); - AP.OutStreamer.EmitSymbolAttribute(Sym, MCSA_Global); - AP.OutStreamer.EmitLabel(Sym); + AP.OutStreamer->EmitSymbolAttribute(Sym, MCSA_Global); + AP.OutStreamer->EmitLabel(Sym); } void OcamlGCMetadataPrinter::beginAssembly(Module &M, GCModuleInfo &Info, AsmPrinter &AP) { - AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getTextSection()); + AP.OutStreamer->SwitchSection(AP.getObjFileLowering().getTextSection()); EmitCamlGlobal(M, AP, "code_begin"); - AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection()); + AP.OutStreamer->SwitchSection(AP.getObjFileLowering().getDataSection()); EmitCamlGlobal(M, AP, "data_begin"); } @@ -96,24 +93,24 @@ void OcamlGCMetadataPrinter::beginAssembly(Module &M, GCModuleInfo &Info, /// void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info, AsmPrinter &AP) { - unsigned IntPtrSize = - AP.TM.getSubtargetImpl()->getDataLayout()->getPointerSize(); + unsigned IntPtrSize = AP.TM.getDataLayout()->getPointerSize(); - AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getTextSection()); + AP.OutStreamer->SwitchSection(AP.getObjFileLowering().getTextSection()); EmitCamlGlobal(M, AP, "code_end"); - AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection()); + AP.OutStreamer->SwitchSection(AP.getObjFileLowering().getDataSection()); EmitCamlGlobal(M, AP, "data_end"); // FIXME: Why does ocaml emit this?? - AP.OutStreamer.EmitIntValue(0, IntPtrSize); + AP.OutStreamer->EmitIntValue(0, IntPtrSize); - AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection()); + AP.OutStreamer->SwitchSection(AP.getObjFileLowering().getDataSection()); EmitCamlGlobal(M, AP, "frametable"); int NumDescriptors = 0; for (GCModuleInfo::FuncInfoVec::iterator I = Info.funcinfo_begin(), - IE = Info.funcinfo_end(); I != IE; ++I) { + IE = Info.funcinfo_end(); + I != IE; ++I) { GCFunctionInfo &FI = **I; if (FI.getStrategy().getName() != getStrategy().getName()) // this function is managed by some other GC @@ -123,7 +120,7 @@ void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info, } } - if (NumDescriptors >= 1<<16) { + if (NumDescriptors >= 1 << 16) { // Very rude! report_fatal_error(" Too much descriptor for ocaml GC"); } @@ -131,45 +128,50 @@ void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info, AP.EmitAlignment(IntPtrSize == 4 ? 2 : 3); for (GCModuleInfo::FuncInfoVec::iterator I = Info.funcinfo_begin(), - IE = Info.funcinfo_end(); I != IE; ++I) { + IE = Info.funcinfo_end(); + I != IE; ++I) { GCFunctionInfo &FI = **I; if (FI.getStrategy().getName() != getStrategy().getName()) // this function is managed by some other GC continue; uint64_t FrameSize = FI.getFrameSize(); - if (FrameSize >= 1<<16) { + if (FrameSize >= 1 << 16) { // Very rude! report_fatal_error("Function '" + FI.getFunction().getName() + "' is too large for the ocaml GC! " - "Frame size " + Twine(FrameSize) + ">= 65536.\n" - "(" + Twine(uintptr_t(&FI)) + ")"); + "Frame size " + + Twine(FrameSize) + ">= 65536.\n" + "(" + + Twine(uintptr_t(&FI)) + ")"); } - AP.OutStreamer.AddComment("live roots for " + - Twine(FI.getFunction().getName())); - AP.OutStreamer.AddBlankLine(); + AP.OutStreamer->AddComment("live roots for " + + Twine(FI.getFunction().getName())); + AP.OutStreamer->AddBlankLine(); for (GCFunctionInfo::iterator J = FI.begin(), JE = FI.end(); J != JE; ++J) { size_t LiveCount = FI.live_size(J); - if (LiveCount >= 1<<16) { + if (LiveCount >= 1 << 16) { // Very rude! report_fatal_error("Function '" + FI.getFunction().getName() + "' is too large for the ocaml GC! " - "Live root count "+Twine(LiveCount)+" >= 65536."); + "Live root count " + + Twine(LiveCount) + " >= 65536."); } - AP.OutStreamer.EmitSymbolValue(J->Label, IntPtrSize); + AP.OutStreamer->EmitSymbolValue(J->Label, IntPtrSize); AP.EmitInt16(FrameSize); AP.EmitInt16(LiveCount); for (GCFunctionInfo::live_iterator K = FI.live_begin(J), - KE = FI.live_end(J); K != KE; ++K) { - if (K->StackOffset >= 1<<16) { + KE = FI.live_end(J); + K != KE; ++K) { + if (K->StackOffset >= 1 << 16) { // Very rude! report_fatal_error( - "GC root stack offset is outside of fixed stack frame and out " - "of range for ocaml GC!"); + "GC root stack offset is outside of fixed stack frame and out " + "of range for ocaml GC!"); } AP.EmitInt16(K->StackOffset); } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/Win64Exception.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/Win64Exception.cpp index 0f0ad75..dc6df9c 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/Win64Exception.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/Win64Exception.cpp @@ -19,6 +19,7 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Mangler.h" #include "llvm/IR/Module.h" @@ -48,8 +49,6 @@ Win64Exception::~Win64Exception() {} void Win64Exception::endModule() { } -/// beginFunction - Gather pre-function exception information. Assumes it's -/// being emitted immediately after the function entry point. void Win64Exception::beginFunction(const MachineFunction *MF) { shouldEmitMoves = shouldEmitPersonality = shouldEmitLSDA = false; @@ -60,7 +59,7 @@ void Win64Exception::beginFunction(const MachineFunction *MF) { const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); unsigned PerEncoding = TLOF.getPersonalityEncoding(); - const Function *Per = MMI->getPersonalities()[MMI->getPersonalityIndex()]; + const Function *Per = MF->getMMI().getPersonality(); shouldEmitPersonality = hasLandingPads && PerEncoding != dwarf::DW_EH_PE_omit && Per; @@ -69,39 +68,437 @@ void Win64Exception::beginFunction(const MachineFunction *MF) { shouldEmitLSDA = shouldEmitPersonality && LSDAEncoding != dwarf::DW_EH_PE_omit; + + // If this was an outlined handler, we need to define the label corresponding + // to the offset of the parent frame relative to the stack pointer after the + // prologue. + const Function *F = MF->getFunction(); + const Function *ParentF = MMI->getWinEHParent(F); + if (F != ParentF) { + WinEHFuncInfo &FuncInfo = MMI->getWinEHFuncInfo(ParentF); + auto I = FuncInfo.CatchHandlerParentFrameObjOffset.find(F); + if (I != FuncInfo.CatchHandlerParentFrameObjOffset.end()) { + MCSymbol *HandlerTypeParentFrameOffset = + Asm->OutContext.getOrCreateParentFrameOffsetSymbol( + GlobalValue::getRealLinkageName(F->getName())); + + // Emit a symbol assignment. + Asm->OutStreamer->EmitAssignment( + HandlerTypeParentFrameOffset, + MCConstantExpr::Create(I->second, Asm->OutContext)); + } + } + if (!shouldEmitPersonality && !shouldEmitMoves) return; - Asm->OutStreamer.EmitWinCFIStartProc(Asm->CurrentFnSym); + Asm->OutStreamer->EmitWinCFIStartProc(Asm->CurrentFnSym); if (!shouldEmitPersonality) return; const MCSymbol *PersHandlerSym = TLOF.getCFIPersonalitySymbol(Per, *Asm->Mang, Asm->TM, MMI); - Asm->OutStreamer.EmitWinEHHandler(PersHandlerSym, true, true); - - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin", - Asm->getFunctionNumber())); + Asm->OutStreamer->EmitWinEHHandler(PersHandlerSym, true, true); } /// endFunction - Gather and emit post-function exception information. /// -void Win64Exception::endFunction(const MachineFunction *) { +void Win64Exception::endFunction(const MachineFunction *MF) { if (!shouldEmitPersonality && !shouldEmitMoves) return; - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end", - Asm->getFunctionNumber())); + EHPersonality Per = MMI->getPersonalityType(); - // Map all labels and get rid of any dead landing pads. - MMI->TidyLandingPads(); + // Get rid of any dead landing pads if we're not using a Windows EH scheme. In + // Windows EH schemes, the landing pad is not actually reachable. It only + // exists so that we can emit the right table data. + if (!isMSVCEHPersonality(Per)) + MMI->TidyLandingPads(); if (shouldEmitPersonality) { - Asm->OutStreamer.PushSection(); - Asm->OutStreamer.EmitWinEHHandlerData(); - emitExceptionTable(); - Asm->OutStreamer.PopSection(); + Asm->OutStreamer->PushSection(); + + // Emit an UNWIND_INFO struct describing the prologue. + Asm->OutStreamer->EmitWinEHHandlerData(); + + // Emit the tables appropriate to the personality function in use. If we + // don't recognize the personality, assume it uses an Itanium-style LSDA. + if (Per == EHPersonality::MSVC_Win64SEH) + emitCSpecificHandlerTable(); + else if (Per == EHPersonality::MSVC_CXX) + emitCXXFrameHandler3Table(MF); + else + emitExceptionTable(); + + Asm->OutStreamer->PopSection(); + } + Asm->OutStreamer->EmitWinCFIEndProc(); +} + +const MCExpr *Win64Exception::createImageRel32(const MCSymbol *Value) { + if (!Value) + return MCConstantExpr::Create(0, Asm->OutContext); + return MCSymbolRefExpr::Create(Value, MCSymbolRefExpr::VK_COFF_IMGREL32, + Asm->OutContext); +} + +const MCExpr *Win64Exception::createImageRel32(const GlobalValue *GV) { + if (!GV) + return MCConstantExpr::Create(0, Asm->OutContext); + return createImageRel32(Asm->getSymbol(GV)); +} + +/// Emit the language-specific data that __C_specific_handler expects. This +/// handler lives in the x64 Microsoft C runtime and allows catching or cleaning +/// up after faults with __try, __except, and __finally. The typeinfo values +/// are not really RTTI data, but pointers to filter functions that return an +/// integer (1, 0, or -1) indicating how to handle the exception. For __finally +/// blocks and other cleanups, the landing pad label is zero, and the filter +/// function is actually a cleanup handler with the same prototype. A catch-all +/// entry is modeled with a null filter function field and a non-zero landing +/// pad label. +/// +/// Possible filter function return values: +/// EXCEPTION_EXECUTE_HANDLER (1): +/// Jump to the landing pad label after cleanups. +/// EXCEPTION_CONTINUE_SEARCH (0): +/// Continue searching this table or continue unwinding. +/// EXCEPTION_CONTINUE_EXECUTION (-1): +/// Resume execution at the trapping PC. +/// +/// Inferred table structure: +/// struct Table { +/// int NumEntries; +/// struct Entry { +/// imagerel32 LabelStart; +/// imagerel32 LabelEnd; +/// imagerel32 FilterOrFinally; // One means catch-all. +/// imagerel32 LabelLPad; // Zero means __finally. +/// } Entries[NumEntries]; +/// }; +void Win64Exception::emitCSpecificHandlerTable() { + const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads(); + + // Simplifying assumptions for first implementation: + // - Cleanups are not implemented. + // - Filters are not implemented. + + // The Itanium LSDA table sorts similar landing pads together to simplify the + // actions table, but we don't need that. + SmallVector<const LandingPadInfo *, 64> LandingPads; + LandingPads.reserve(PadInfos.size()); + for (const auto &LP : PadInfos) + LandingPads.push_back(&LP); + + // Compute label ranges for call sites as we would for the Itanium LSDA, but + // use an all zero action table because we aren't using these actions. + SmallVector<unsigned, 64> FirstActions; + FirstActions.resize(LandingPads.size()); + SmallVector<CallSiteEntry, 64> CallSites; + computeCallSiteTable(CallSites, LandingPads, FirstActions); + + MCSymbol *EHFuncBeginSym = Asm->getFunctionBegin(); + MCSymbol *EHFuncEndSym = Asm->getFunctionEnd(); + + // Emit the number of table entries. + unsigned NumEntries = 0; + for (const CallSiteEntry &CSE : CallSites) { + if (!CSE.LPad) + continue; // Ignore gaps. + NumEntries += CSE.LPad->SEHHandlers.size(); + } + Asm->OutStreamer->EmitIntValue(NumEntries, 4); + + // If there are no actions, we don't need to iterate again. + if (NumEntries == 0) + return; + + // Emit the four-label records for each call site entry. The table has to be + // sorted in layout order, and the call sites should already be sorted. + for (const CallSiteEntry &CSE : CallSites) { + // Ignore gaps. Unlike the Itanium model, unwinding through a frame without + // an EH table entry will propagate the exception rather than terminating + // the program. + if (!CSE.LPad) + continue; + const LandingPadInfo *LPad = CSE.LPad; + + // Compute the label range. We may reuse the function begin and end labels + // rather than forming new ones. + const MCExpr *Begin = + createImageRel32(CSE.BeginLabel ? CSE.BeginLabel : EHFuncBeginSym); + const MCExpr *End; + if (CSE.EndLabel) { + // The interval is half-open, so we have to add one to include the return + // address of the last invoke in the range. + End = MCBinaryExpr::CreateAdd(createImageRel32(CSE.EndLabel), + MCConstantExpr::Create(1, Asm->OutContext), + Asm->OutContext); + } else { + End = createImageRel32(EHFuncEndSym); + } + + // Emit an entry for each action. + for (SEHHandler Handler : LPad->SEHHandlers) { + Asm->OutStreamer->EmitValue(Begin, 4); + Asm->OutStreamer->EmitValue(End, 4); + + // Emit the filter or finally function pointer, if present. Otherwise, + // emit '1' to indicate a catch-all. + const Function *F = Handler.FilterOrFinally; + if (F) + Asm->OutStreamer->EmitValue(createImageRel32(Asm->getSymbol(F)), 4); + else + Asm->OutStreamer->EmitIntValue(1, 4); + + // Emit the recovery address, if present. Otherwise, this must be a + // finally. + const BlockAddress *BA = Handler.RecoverBA; + if (BA) + Asm->OutStreamer->EmitValue( + createImageRel32(Asm->GetBlockAddressSymbol(BA)), 4); + else + Asm->OutStreamer->EmitIntValue(0, 4); + } + } +} + +void Win64Exception::emitCXXFrameHandler3Table(const MachineFunction *MF) { + const Function *F = MF->getFunction(); + const Function *ParentF = MMI->getWinEHParent(F); + auto &OS = *Asm->OutStreamer; + WinEHFuncInfo &FuncInfo = MMI->getWinEHFuncInfo(ParentF); + + StringRef ParentLinkageName = + GlobalValue::getRealLinkageName(ParentF->getName()); + + MCSymbol *FuncInfoXData = + Asm->OutContext.getOrCreateSymbol(Twine("$cppxdata$", ParentLinkageName)); + OS.EmitValue(createImageRel32(FuncInfoXData), 4); + + // The Itanium LSDA table sorts similar landing pads together to simplify the + // actions table, but we don't need that. + SmallVector<const LandingPadInfo *, 64> LandingPads; + const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads(); + LandingPads.reserve(PadInfos.size()); + for (const auto &LP : PadInfos) + LandingPads.push_back(&LP); + + RangeMapType PadMap; + computePadMap(LandingPads, PadMap); + + // The end label of the previous invoke or nounwind try-range. + MCSymbol *LastLabel = Asm->getFunctionBegin(); + + // Whether there is a potentially throwing instruction (currently this means + // an ordinary call) between the end of the previous try-range and now. + bool SawPotentiallyThrowing = false; + + int LastEHState = -2; + + // The parent function and the catch handlers contribute to the 'ip2state' + // table. + + // Include ip2state entries for the beginning of the main function and + // for catch handler functions. + if (F == ParentF) { + FuncInfo.IPToStateList.push_back(std::make_pair(LastLabel, -1)); + LastEHState = -1; + } else if (FuncInfo.HandlerBaseState.count(F)) { + FuncInfo.IPToStateList.push_back(std::make_pair(LastLabel, + FuncInfo.HandlerBaseState[F])); + LastEHState = FuncInfo.HandlerBaseState[F]; + } + for (const auto &MBB : *MF) { + for (const auto &MI : MBB) { + if (!MI.isEHLabel()) { + if (MI.isCall()) + SawPotentiallyThrowing |= !callToNoUnwindFunction(&MI); + continue; + } + + // End of the previous try-range? + MCSymbol *BeginLabel = MI.getOperand(0).getMCSymbol(); + if (BeginLabel == LastLabel) + SawPotentiallyThrowing = false; + + // Beginning of a new try-range? + RangeMapType::const_iterator L = PadMap.find(BeginLabel); + if (L == PadMap.end()) + // Nope, it was just some random label. + continue; + + const PadRange &P = L->second; + const LandingPadInfo *LandingPad = LandingPads[P.PadIndex]; + assert(BeginLabel == LandingPad->BeginLabels[P.RangeIndex] && + "Inconsistent landing pad map!"); + + // FIXME: Should this be using FuncInfo.HandlerBaseState? + if (SawPotentiallyThrowing && LastEHState != -1) { + FuncInfo.IPToStateList.push_back(std::make_pair(LastLabel, -1)); + SawPotentiallyThrowing = false; + LastEHState = -1; + } + + if (LandingPad->WinEHState != LastEHState) + FuncInfo.IPToStateList.push_back( + std::make_pair(BeginLabel, LandingPad->WinEHState)); + LastEHState = LandingPad->WinEHState; + LastLabel = LandingPad->EndLabels[P.RangeIndex]; + } + } + + // Defer emission until we've visited the parent function and all the catch + // handlers. Cleanups don't contribute to the ip2state table yet, so don't + // count them. + if (ParentF != F && !FuncInfo.CatchHandlerMaxState.count(F)) + return; + ++FuncInfo.NumIPToStateFuncsVisited; + if (FuncInfo.NumIPToStateFuncsVisited != FuncInfo.CatchHandlerMaxState.size()) + return; + + MCSymbol *UnwindMapXData = nullptr; + MCSymbol *TryBlockMapXData = nullptr; + MCSymbol *IPToStateXData = nullptr; + if (!FuncInfo.UnwindMap.empty()) + UnwindMapXData = Asm->OutContext.getOrCreateSymbol( + Twine("$stateUnwindMap$", ParentLinkageName)); + if (!FuncInfo.TryBlockMap.empty()) + TryBlockMapXData = Asm->OutContext.getOrCreateSymbol( + Twine("$tryMap$", ParentLinkageName)); + if (!FuncInfo.IPToStateList.empty()) + IPToStateXData = Asm->OutContext.getOrCreateSymbol( + Twine("$ip2state$", ParentLinkageName)); + + // FuncInfo { + // uint32_t MagicNumber + // int32_t MaxState; + // UnwindMapEntry *UnwindMap; + // uint32_t NumTryBlocks; + // TryBlockMapEntry *TryBlockMap; + // uint32_t IPMapEntries; + // IPToStateMapEntry *IPToStateMap; + // uint32_t UnwindHelp; // (x64/ARM only) + // ESTypeList *ESTypeList; + // int32_t EHFlags; + // } + // EHFlags & 1 -> Synchronous exceptions only, no async exceptions. + // EHFlags & 2 -> ??? + // EHFlags & 4 -> The function is noexcept(true), unwinding can't continue. + OS.EmitLabel(FuncInfoXData); + OS.EmitIntValue(0x19930522, 4); // MagicNumber + OS.EmitIntValue(FuncInfo.UnwindMap.size(), 4); // MaxState + OS.EmitValue(createImageRel32(UnwindMapXData), 4); // UnwindMap + OS.EmitIntValue(FuncInfo.TryBlockMap.size(), 4); // NumTryBlocks + OS.EmitValue(createImageRel32(TryBlockMapXData), 4); // TryBlockMap + OS.EmitIntValue(FuncInfo.IPToStateList.size(), 4); // IPMapEntries + OS.EmitValue(createImageRel32(IPToStateXData), 4); // IPToStateMap + OS.EmitIntValue(FuncInfo.UnwindHelpFrameOffset, 4); // UnwindHelp + OS.EmitIntValue(0, 4); // ESTypeList + OS.EmitIntValue(1, 4); // EHFlags + + // UnwindMapEntry { + // int32_t ToState; + // void (*Action)(); + // }; + if (UnwindMapXData) { + OS.EmitLabel(UnwindMapXData); + for (const WinEHUnwindMapEntry &UME : FuncInfo.UnwindMap) { + OS.EmitIntValue(UME.ToState, 4); // ToState + OS.EmitValue(createImageRel32(UME.Cleanup), 4); // Action + } + } + + // TryBlockMap { + // int32_t TryLow; + // int32_t TryHigh; + // int32_t CatchHigh; + // int32_t NumCatches; + // HandlerType *HandlerArray; + // }; + if (TryBlockMapXData) { + OS.EmitLabel(TryBlockMapXData); + SmallVector<MCSymbol *, 1> HandlerMaps; + for (size_t I = 0, E = FuncInfo.TryBlockMap.size(); I != E; ++I) { + WinEHTryBlockMapEntry &TBME = FuncInfo.TryBlockMap[I]; + MCSymbol *HandlerMapXData = nullptr; + + if (!TBME.HandlerArray.empty()) + HandlerMapXData = + Asm->OutContext.getOrCreateSymbol(Twine("$handlerMap$") + .concat(Twine(I)) + .concat("$") + .concat(ParentLinkageName)); + + HandlerMaps.push_back(HandlerMapXData); + + int CatchHigh = -1; + for (WinEHHandlerType &HT : TBME.HandlerArray) + CatchHigh = + std::max(CatchHigh, FuncInfo.CatchHandlerMaxState[HT.Handler]); + + assert(TBME.TryLow <= TBME.TryHigh); + OS.EmitIntValue(TBME.TryLow, 4); // TryLow + OS.EmitIntValue(TBME.TryHigh, 4); // TryHigh + OS.EmitIntValue(CatchHigh, 4); // CatchHigh + OS.EmitIntValue(TBME.HandlerArray.size(), 4); // NumCatches + OS.EmitValue(createImageRel32(HandlerMapXData), 4); // HandlerArray + } + + for (size_t I = 0, E = FuncInfo.TryBlockMap.size(); I != E; ++I) { + WinEHTryBlockMapEntry &TBME = FuncInfo.TryBlockMap[I]; + MCSymbol *HandlerMapXData = HandlerMaps[I]; + if (!HandlerMapXData) + continue; + // HandlerType { + // int32_t Adjectives; + // TypeDescriptor *Type; + // int32_t CatchObjOffset; + // void (*Handler)(); + // int32_t ParentFrameOffset; // x64 only + // }; + OS.EmitLabel(HandlerMapXData); + for (const WinEHHandlerType &HT : TBME.HandlerArray) { + MCSymbol *ParentFrameOffset = + Asm->OutContext.getOrCreateParentFrameOffsetSymbol( + GlobalValue::getRealLinkageName(HT.Handler->getName())); + const MCSymbolRefExpr *ParentFrameOffsetRef = MCSymbolRefExpr::Create( + ParentFrameOffset, MCSymbolRefExpr::VK_None, Asm->OutContext); + + // Get the frame escape label with the offset of the catch object. If + // the index is -1, then there is no catch object, and we should emit an + // offset of zero, indicating that no copy will occur. + const MCExpr *FrameAllocOffsetRef = nullptr; + if (HT.CatchObjRecoverIdx >= 0) { + MCSymbol *FrameAllocOffset = + Asm->OutContext.getOrCreateFrameAllocSymbol( + GlobalValue::getRealLinkageName(ParentF->getName()), + HT.CatchObjRecoverIdx); + FrameAllocOffsetRef = MCSymbolRefExpr::Create( + FrameAllocOffset, MCSymbolRefExpr::VK_None, Asm->OutContext); + } else { + FrameAllocOffsetRef = MCConstantExpr::Create(0, Asm->OutContext); + } + + OS.EmitIntValue(HT.Adjectives, 4); // Adjectives + OS.EmitValue(createImageRel32(HT.TypeDescriptor), 4); // Type + OS.EmitValue(FrameAllocOffsetRef, 4); // CatchObjOffset + OS.EmitValue(createImageRel32(HT.Handler), 4); // Handler + OS.EmitValue(ParentFrameOffsetRef, 4); // ParentFrameOffset + } + } + } + + // IPToStateMapEntry { + // void *IP; + // int32_t State; + // }; + if (IPToStateXData) { + OS.EmitLabel(IPToStateXData); + for (auto &IPStatePair : FuncInfo.IPToStateList) { + OS.EmitValue(createImageRel32(IPStatePair.first), 4); // IP + OS.EmitIntValue(IPStatePair.second, 4); // State + } } - Asm->OutStreamer.EmitWinCFIEndProc(); } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/Win64Exception.h b/contrib/llvm/lib/CodeGen/AsmPrinter/Win64Exception.h index 538e132..5f4237f 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/Win64Exception.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/Win64Exception.h @@ -17,7 +17,9 @@ #include "EHStreamer.h" namespace llvm { +class GlobalValue; class MachineFunction; +class MCExpr; class Win64Exception : public EHStreamer { /// Per-function flag to indicate if personality info should be emitted. @@ -29,12 +31,19 @@ class Win64Exception : public EHStreamer { /// Per-function flag to indicate if frame moves info should be emitted. bool shouldEmitMoves; + void emitCSpecificHandlerTable(); + + void emitCXXFrameHandler3Table(const MachineFunction *MF); + + const MCExpr *createImageRel32(const MCSymbol *Value); + const MCExpr *createImageRel32(const GlobalValue *GV); + public: //===--------------------------------------------------------------------===// // Main entry points. // Win64Exception(AsmPrinter *A); - virtual ~Win64Exception(); + ~Win64Exception() override; /// Emit all exception information that should come after the content. void endModule() override; diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp index b5e0929..371e20a 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp @@ -20,14 +20,13 @@ namespace llvm { StringRef WinCodeViewLineTables::getFullFilepath(const MDNode *S) { assert(S); - DIDescriptor D(S); - assert((D.isCompileUnit() || D.isFile() || D.isSubprogram() || - D.isLexicalBlockFile() || D.isLexicalBlock()) && + assert((isa<DICompileUnit>(S) || isa<DIFile>(S) || isa<DISubprogram>(S) || + isa<DILexicalBlockBase>(S)) && "Unexpected scope info"); - DIScope Scope(S); - StringRef Dir = Scope.getDirectory(), - Filename = Scope.getFilename(); + auto *Scope = cast<DIScope>(S); + StringRef Dir = Scope->getDirectory(), + Filename = Scope->getFilename(); char *&Result = DirAndFilenameToFilepathMap[std::make_pair(Dir, Filename)]; if (Result) return Result; @@ -40,7 +39,7 @@ StringRef WinCodeViewLineTables::getFullFilepath(const MDNode *S) { if (Filename.find(':') == 1) Filepath = Filename; else - Filepath = (Dir + Twine("\\") + Filename).str(); + Filepath = (Dir + "\\" + Filename).str(); // Canonicalize the path. We have to do it textually because we may no longer // have access the file in the filesystem. @@ -81,7 +80,7 @@ StringRef WinCodeViewLineTables::getFullFilepath(const MDNode *S) { void WinCodeViewLineTables::maybeRecordLocation(DebugLoc DL, const MachineFunction *MF) { - const MDNode *Scope = DL.getScope(MF->getFunction()->getContext()); + const MDNode *Scope = DL.getScope(); if (!Scope) return; StringRef Filename = getFullFilepath(Scope); @@ -95,8 +94,8 @@ void WinCodeViewLineTables::maybeRecordLocation(DebugLoc DL, } FileNameRegistry.add(Filename); - MCSymbol *MCL = Asm->MMI->getContext().CreateTempSymbol(); - Asm->OutStreamer.EmitLabel(MCL); + MCSymbol *MCL = Asm->MMI->getContext().createTempSymbol(); + Asm->OutStreamer->EmitLabel(MCL); CurFn->Instrs.push_back(MCL); InstrInfo[MCL] = InstrInfoTy(Filename, DL.getLine()); } @@ -121,7 +120,7 @@ void WinCodeViewLineTables::endModule() { return; assert(Asm != nullptr); - Asm->OutStreamer.SwitchSection( + Asm->OutStreamer->SwitchSection( Asm->getObjFileLowering().getCOFFDebugSymbolsSection()); Asm->EmitInt32(COFF::DEBUG_SECTION_MAGIC); @@ -136,7 +135,7 @@ void WinCodeViewLineTables::endModule() { emitDebugInfoForFunction(VisitedFunctions[I]); // This subsection holds a file index to offset in string table table. - Asm->OutStreamer.AddComment("File index to string table offset subsection"); + Asm->OutStreamer->AddComment("File index to string table offset subsection"); Asm->EmitInt32(COFF::DEBUG_INDEX_SUBSECTION); size_t NumFilenames = FileNameRegistry.Infos.size(); Asm->EmitInt32(8 * NumFilenames); @@ -149,7 +148,7 @@ void WinCodeViewLineTables::endModule() { } // This subsection holds the string table. - Asm->OutStreamer.AddComment("String table"); + Asm->OutStreamer->AddComment("String table"); Asm->EmitInt32(COFF::DEBUG_STRING_TABLE_SUBSECTION); Asm->EmitInt32(FileNameRegistry.LastOffset); // The payload starts with a null character. @@ -157,12 +156,12 @@ void WinCodeViewLineTables::endModule() { for (size_t I = 0, E = FileNameRegistry.Filenames.size(); I != E; ++I) { // Just emit unique filenames one by one, separated by a null character. - Asm->OutStreamer.EmitBytes(FileNameRegistry.Filenames[I]); + Asm->OutStreamer->EmitBytes(FileNameRegistry.Filenames[I]); Asm->EmitInt8(0); } // No more subsections. Fill with zeros to align the end of the section by 4. - Asm->OutStreamer.EmitFill((-FileNameRegistry.LastOffset) % 4, 0); + Asm->OutStreamer->EmitFill((-FileNameRegistry.LastOffset) % 4, 0); clear(); } @@ -190,8 +189,11 @@ void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) { return; assert(FI.End && "Don't know where the function ends?"); - StringRef FuncName = getDISubprogram(GV).getDisplayName(), - GVName = GV->getName(); + StringRef GVName = GV->getName(); + StringRef FuncName; + if (auto *SP = getDISubprogram(GV)) + FuncName = SP->getDisplayName(); + // FIXME Clang currently sets DisplayName to "bar" for a C++ // "namespace_foo::bar" function, see PR21528. Luckily, dbghelp.dll is trying // to demangle display names anyways, so let's just put a mangled name into @@ -199,41 +201,41 @@ void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) { if (GVName.startswith("\01?")) FuncName = GVName.substr(1); // Emit a symbol subsection, required by VS2012+ to find function boundaries. - MCSymbol *SymbolsBegin = Asm->MMI->getContext().CreateTempSymbol(), - *SymbolsEnd = Asm->MMI->getContext().CreateTempSymbol(); - Asm->OutStreamer.AddComment("Symbol subsection for " + Twine(FuncName)); + MCSymbol *SymbolsBegin = Asm->MMI->getContext().createTempSymbol(), + *SymbolsEnd = Asm->MMI->getContext().createTempSymbol(); + Asm->OutStreamer->AddComment("Symbol subsection for " + Twine(FuncName)); Asm->EmitInt32(COFF::DEBUG_SYMBOL_SUBSECTION); - EmitLabelDiff(Asm->OutStreamer, SymbolsBegin, SymbolsEnd); - Asm->OutStreamer.EmitLabel(SymbolsBegin); + EmitLabelDiff(*Asm->OutStreamer, SymbolsBegin, SymbolsEnd); + Asm->OutStreamer->EmitLabel(SymbolsBegin); { - MCSymbol *ProcSegmentBegin = Asm->MMI->getContext().CreateTempSymbol(), - *ProcSegmentEnd = Asm->MMI->getContext().CreateTempSymbol(); - EmitLabelDiff(Asm->OutStreamer, ProcSegmentBegin, ProcSegmentEnd, 2); - Asm->OutStreamer.EmitLabel(ProcSegmentBegin); + MCSymbol *ProcSegmentBegin = Asm->MMI->getContext().createTempSymbol(), + *ProcSegmentEnd = Asm->MMI->getContext().createTempSymbol(); + EmitLabelDiff(*Asm->OutStreamer, ProcSegmentBegin, ProcSegmentEnd, 2); + Asm->OutStreamer->EmitLabel(ProcSegmentBegin); Asm->EmitInt16(COFF::DEBUG_SYMBOL_TYPE_PROC_START); // Some bytes of this segment don't seem to be required for basic debugging, // so just fill them with zeroes. - Asm->OutStreamer.EmitFill(12, 0); + Asm->OutStreamer->EmitFill(12, 0); // This is the important bit that tells the debugger where the function // code is located and what's its size: - EmitLabelDiff(Asm->OutStreamer, Fn, FI.End); - Asm->OutStreamer.EmitFill(12, 0); - Asm->OutStreamer.EmitCOFFSecRel32(Fn); - Asm->OutStreamer.EmitCOFFSectionIndex(Fn); + EmitLabelDiff(*Asm->OutStreamer, Fn, FI.End); + Asm->OutStreamer->EmitFill(12, 0); + Asm->OutStreamer->EmitCOFFSecRel32(Fn); + Asm->OutStreamer->EmitCOFFSectionIndex(Fn); Asm->EmitInt8(0); // Emit the function display name as a null-terminated string. - Asm->OutStreamer.EmitBytes(FuncName); + Asm->OutStreamer->EmitBytes(FuncName); Asm->EmitInt8(0); - Asm->OutStreamer.EmitLabel(ProcSegmentEnd); + Asm->OutStreamer->EmitLabel(ProcSegmentEnd); // We're done with this function. Asm->EmitInt16(0x0002); Asm->EmitInt16(COFF::DEBUG_SYMBOL_TYPE_PROC_END); } - Asm->OutStreamer.EmitLabel(SymbolsEnd); + Asm->OutStreamer->EmitLabel(SymbolsEnd); // Every subsection must be aligned to a 4-byte boundary. - Asm->OutStreamer.EmitFill((-FuncName.size()) % 4, 0); + Asm->OutStreamer->EmitFill((-FuncName.size()) % 4, 0); // PCs/Instructions are grouped into segments sharing the same filename. // Pre-calculate the lengths (in instructions) of these segments and store @@ -252,21 +254,21 @@ void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) { FilenameSegmentLengths[LastSegmentEnd] = FI.Instrs.size() - LastSegmentEnd; // Emit a line table subsection, requred to do PC-to-file:line lookup. - Asm->OutStreamer.AddComment("Line table subsection for " + Twine(FuncName)); + Asm->OutStreamer->AddComment("Line table subsection for " + Twine(FuncName)); Asm->EmitInt32(COFF::DEBUG_LINE_TABLE_SUBSECTION); - MCSymbol *LineTableBegin = Asm->MMI->getContext().CreateTempSymbol(), - *LineTableEnd = Asm->MMI->getContext().CreateTempSymbol(); - EmitLabelDiff(Asm->OutStreamer, LineTableBegin, LineTableEnd); - Asm->OutStreamer.EmitLabel(LineTableBegin); + MCSymbol *LineTableBegin = Asm->MMI->getContext().createTempSymbol(), + *LineTableEnd = Asm->MMI->getContext().createTempSymbol(); + EmitLabelDiff(*Asm->OutStreamer, LineTableBegin, LineTableEnd); + Asm->OutStreamer->EmitLabel(LineTableBegin); // Identify the function this subsection is for. - Asm->OutStreamer.EmitCOFFSecRel32(Fn); - Asm->OutStreamer.EmitCOFFSectionIndex(Fn); + Asm->OutStreamer->EmitCOFFSecRel32(Fn); + Asm->OutStreamer->EmitCOFFSectionIndex(Fn); // Insert padding after a 16-bit section index. Asm->EmitInt16(0); // Length of the function's code, in bytes. - EmitLabelDiff(Asm->OutStreamer, Fn, FI.End); + EmitLabelDiff(*Asm->OutStreamer, Fn, FI.End); // PC-to-linenumber lookup table: MCSymbol *FileSegmentEnd = nullptr; @@ -277,17 +279,17 @@ void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) { if (FilenameSegmentLengths.count(J)) { // We came to a beginning of a new filename segment. if (FileSegmentEnd) - Asm->OutStreamer.EmitLabel(FileSegmentEnd); + Asm->OutStreamer->EmitLabel(FileSegmentEnd); StringRef CurFilename = InstrInfo[FI.Instrs[J]].Filename; assert(FileNameRegistry.Infos.count(CurFilename)); size_t IndexInStringTable = FileNameRegistry.Infos[CurFilename].FilenameID; // Each segment starts with the offset of the filename // in the string table. - Asm->OutStreamer.AddComment( + Asm->OutStreamer->AddComment( "Segment for file '" + Twine(CurFilename) + "' begins"); - MCSymbol *FileSegmentBegin = Asm->MMI->getContext().CreateTempSymbol(); - Asm->OutStreamer.EmitLabel(FileSegmentBegin); + MCSymbol *FileSegmentBegin = Asm->MMI->getContext().createTempSymbol(); + Asm->OutStreamer->EmitLabel(FileSegmentBegin); Asm->EmitInt32(8 * IndexInStringTable); // Number of PC records in the lookup table. @@ -296,18 +298,18 @@ void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) { // Full size of the segment for this filename, including the prev two // records. - FileSegmentEnd = Asm->MMI->getContext().CreateTempSymbol(); - EmitLabelDiff(Asm->OutStreamer, FileSegmentBegin, FileSegmentEnd); + FileSegmentEnd = Asm->MMI->getContext().createTempSymbol(); + EmitLabelDiff(*Asm->OutStreamer, FileSegmentBegin, FileSegmentEnd); } // The first PC with the given linenumber and the linenumber itself. - EmitLabelDiff(Asm->OutStreamer, Fn, Instr); + EmitLabelDiff(*Asm->OutStreamer, Fn, Instr); Asm->EmitInt32(InstrInfo[Instr].LineNumber); } if (FileSegmentEnd) - Asm->OutStreamer.EmitLabel(FileSegmentEnd); - Asm->OutStreamer.EmitLabel(LineTableEnd); + Asm->OutStreamer->EmitLabel(FileSegmentEnd); + Asm->OutStreamer->EmitLabel(LineTableEnd); } void WinCodeViewLineTables::beginFunction(const MachineFunction *MF) { @@ -327,7 +329,7 @@ void WinCodeViewLineTables::beginFunction(const MachineFunction *MF) { DebugLoc PrologEndLoc; bool EmptyPrologue = true; for (const auto &MBB : *MF) { - if (!PrologEndLoc.isUnknown()) + if (PrologEndLoc) break; for (const auto &MI : MBB) { if (MI.isDebugValue()) @@ -336,8 +338,7 @@ void WinCodeViewLineTables::beginFunction(const MachineFunction *MF) { // First known non-DBG_VALUE and non-frame setup location marks // the beginning of the function body. // FIXME: do we need the first subcondition? - if (!MI.getFlag(MachineInstr::FrameSetup) && - (!MI.getDebugLoc().isUnknown())) { + if (!MI.getFlag(MachineInstr::FrameSetup) && MI.getDebugLoc()) { PrologEndLoc = MI.getDebugLoc(); break; } @@ -345,9 +346,8 @@ void WinCodeViewLineTables::beginFunction(const MachineFunction *MF) { } } // Record beginning of function if we have a non-empty prologue. - if (!PrologEndLoc.isUnknown() && !EmptyPrologue) { - DebugLoc FnStartDL = - PrologEndLoc.getFnDebugLoc(MF->getFunction()->getContext()); + if (PrologEndLoc && !EmptyPrologue) { + DebugLoc FnStartDL = PrologEndLoc.getFnDebugLoc(); maybeRecordLocation(FnStartDL, MF); } } @@ -364,10 +364,7 @@ void WinCodeViewLineTables::endFunction(const MachineFunction *MF) { FnDebugInfo.erase(GV); VisitedFunctions.pop_back(); } else { - // Define end label for subprogram. - MCSymbol *FunctionEndSym = Asm->OutStreamer.getContext().CreateTempSymbol(); - Asm->OutStreamer.EmitLabel(FunctionEndSym); - CurFn->End = FunctionEndSym; + CurFn->End = Asm->getFunctionEnd(); } CurFn = nullptr; } @@ -377,7 +374,7 @@ void WinCodeViewLineTables::beginInstruction(const MachineInstr *MI) { if (!Asm || MI->isDebugValue() || MI->getFlag(MachineInstr::FrameSetup)) return; DebugLoc DL = MI->getDebugLoc(); - if (DL == PrevInstLoc || DL.isUnknown()) + if (DL == PrevInstLoc || !DL) return; maybeRecordLocation(DL, Asm->MF); } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h b/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h index 8492eac..c66d141 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h @@ -114,7 +114,7 @@ class WinCodeViewLineTables : public AsmPrinterHandler { public: WinCodeViewLineTables(AsmPrinter *Asm); - ~WinCodeViewLineTables() { + ~WinCodeViewLineTables() override { for (DirAndFilenameToFilepathMapTy::iterator I = DirAndFilenameToFilepathMap.begin(), E = DirAndFilenameToFilepathMap.end(); diff --git a/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp b/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp index 12f6bd7..fa17108 100644 --- a/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -31,10 +31,11 @@ using namespace llvm; namespace { class AtomicExpand: public FunctionPass { const TargetMachine *TM; + const TargetLowering *TLI; public: static char ID; // Pass identification, replacement for typeid explicit AtomicExpand(const TargetMachine *TM = nullptr) - : FunctionPass(ID), TM(TM) { + : FunctionPass(ID), TM(TM), TLI(nullptr) { initializeAtomicExpandPass(*PassRegistry::getPassRegistry()); } @@ -47,7 +48,7 @@ namespace { bool expandAtomicLoadToLL(LoadInst *LI); bool expandAtomicLoadToCmpXchg(LoadInst *LI); bool expandAtomicStore(StoreInst *SI); - bool expandAtomicRMW(AtomicRMWInst *AI); + bool tryExpandAtomicRMW(AtomicRMWInst *AI); bool expandAtomicRMWToLLSC(AtomicRMWInst *AI); bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI); bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI); @@ -67,9 +68,9 @@ FunctionPass *llvm::createAtomicExpandPass(const TargetMachine *TM) { } bool AtomicExpand::runOnFunction(Function &F) { - if (!TM || !TM->getSubtargetImpl()->enableAtomicExpand()) + if (!TM || !TM->getSubtargetImpl(F)->enableAtomicExpand()) return false; - auto TargetLowering = TM->getSubtargetImpl()->getTargetLowering(); + TLI = TM->getSubtargetImpl(F)->getTargetLowering(); SmallVector<Instruction *, 1> AtomicInsts; @@ -91,7 +92,7 @@ bool AtomicExpand::runOnFunction(Function &F) { auto FenceOrdering = Monotonic; bool IsStore, IsLoad; - if (TargetLowering->getInsertFencesForAtomic()) { + if (TLI->getInsertFencesForAtomic()) { if (LI && isAtLeastAcquire(LI->getOrdering())) { FenceOrdering = LI->getOrdering(); LI->setOrdering(Monotonic); @@ -107,9 +108,9 @@ bool AtomicExpand::runOnFunction(Function &F) { FenceOrdering = RMWI->getOrdering(); RMWI->setOrdering(Monotonic); IsStore = IsLoad = true; - } else if (CASI && !TargetLowering->hasLoadLinkedStoreConditional() && - (isAtLeastRelease(CASI->getSuccessOrdering()) || - isAtLeastAcquire(CASI->getSuccessOrdering()))) { + } else if (CASI && !TLI->hasLoadLinkedStoreConditional() && + (isAtLeastRelease(CASI->getSuccessOrdering()) || + isAtLeastAcquire(CASI->getSuccessOrdering()))) { // If a compare and swap is lowered to LL/SC, we can do smarter fence // insertion, with a stronger one on the success path than on the // failure path. As a result, fence insertion is directly done by @@ -125,20 +126,22 @@ bool AtomicExpand::runOnFunction(Function &F) { } } - if (LI && TargetLowering->shouldExpandAtomicLoadInIR(LI)) { + if (LI && TLI->shouldExpandAtomicLoadInIR(LI)) { MadeChange |= expandAtomicLoad(LI); - } else if (SI && TargetLowering->shouldExpandAtomicStoreInIR(SI)) { + } else if (SI && TLI->shouldExpandAtomicStoreInIR(SI)) { MadeChange |= expandAtomicStore(SI); } else if (RMWI) { // There are two different ways of expanding RMW instructions: // - into a load if it is idempotent // - into a Cmpxchg/LL-SC loop otherwise // we try them in that order. - MadeChange |= (isIdempotentRMW(RMWI) && - simplifyIdempotentRMW(RMWI)) || - (TargetLowering->shouldExpandAtomicRMWInIR(RMWI) && - expandAtomicRMW(RMWI)); - } else if (CASI && TargetLowering->hasLoadLinkedStoreConditional()) { + + if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) { + MadeChange = true; + } else { + MadeChange |= tryExpandAtomicRMW(RMWI); + } + } else if (CASI && TLI->hasLoadLinkedStoreConditional()) { MadeChange |= expandAtomicCmpXchg(CASI); } } @@ -149,13 +152,9 @@ bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order, bool IsStore, bool IsLoad) { IRBuilder<> Builder(I); - auto LeadingFence = - TM->getSubtargetImpl()->getTargetLowering()->emitLeadingFence( - Builder, Order, IsStore, IsLoad); + auto LeadingFence = TLI->emitLeadingFence(Builder, Order, IsStore, IsLoad); - auto TrailingFence = - TM->getSubtargetImpl()->getTargetLowering()->emitTrailingFence( - Builder, Order, IsStore, IsLoad); + auto TrailingFence = TLI->emitTrailingFence(Builder, Order, IsStore, IsLoad); // The trailing fence is emitted before the instruction instead of after // because there is no easy way of setting Builder insertion point after // an instruction. So we must erase it from the BB, and insert it back @@ -171,16 +170,13 @@ bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order, } bool AtomicExpand::expandAtomicLoad(LoadInst *LI) { - if (TM->getSubtargetImpl() - ->getTargetLowering() - ->hasLoadLinkedStoreConditional()) + if (TLI->hasLoadLinkedStoreConditional()) return expandAtomicLoadToLL(LI); else return expandAtomicLoadToCmpXchg(LI); } bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) { - auto TLI = TM->getSubtargetImpl()->getTargetLowering(); IRBuilder<> Builder(LI); // On some architectures, load-linked instructions are atomic for larger @@ -218,7 +214,7 @@ bool AtomicExpand::expandAtomicStore(StoreInst *SI) { // atomic if implemented as a native store. So we replace them by an // atomic swap, that can be implemented for example as a ldrex/strex on ARM // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes. - // It is the responsibility of the target to only return true in + // It is the responsibility of the target to only signal expansion via // shouldExpandAtomicRMW in cases where this is required and possible. IRBuilder<> Builder(SI); AtomicRMWInst *AI = @@ -227,16 +223,26 @@ bool AtomicExpand::expandAtomicStore(StoreInst *SI) { SI->eraseFromParent(); // Now we have an appropriate swap instruction, lower it as usual. - return expandAtomicRMW(AI); + return tryExpandAtomicRMW(AI); } -bool AtomicExpand::expandAtomicRMW(AtomicRMWInst *AI) { - if (TM->getSubtargetImpl() - ->getTargetLowering() - ->hasLoadLinkedStoreConditional()) +bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) { + switch (TLI->shouldExpandAtomicRMWInIR(AI)) { + case TargetLoweringBase::AtomicRMWExpansionKind::None: + return false; + case TargetLoweringBase::AtomicRMWExpansionKind::LLSC: { + assert(TLI->hasLoadLinkedStoreConditional() && + "TargetLowering requested we expand AtomicRMW instruction into " + "load-linked/store-conditional combos, but such instructions aren't " + "supported"); + return expandAtomicRMWToLLSC(AI); - else + } + case TargetLoweringBase::AtomicRMWExpansionKind::CmpXChg: { return expandAtomicRMWToCmpXchg(AI); + } + } + llvm_unreachable("Unhandled case in tryExpandAtomicRMW"); } /// Emit IR to implement the given atomicrmw operation on values in registers, @@ -277,7 +283,6 @@ static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder, } bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) { - auto TLI = TM->getSubtargetImpl()->getTargetLowering(); AtomicOrdering MemOpOrder = AI->getOrdering(); Value *Addr = AI->getPointerOperand(); BasicBlock *BB = AI->getParent(); @@ -397,7 +402,6 @@ bool AtomicExpand::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI) { } bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { - auto TLI = TM->getSubtargetImpl()->getTargetLowering(); AtomicOrdering SuccessOrder = CI->getSuccessOrdering(); AtomicOrdering FailureOrder = CI->getFailureOrdering(); Value *Addr = CI->getPointerOperand(); @@ -551,13 +555,10 @@ bool AtomicExpand::isIdempotentRMW(AtomicRMWInst* RMWI) { } bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst* RMWI) { - auto TLI = TM->getSubtargetImpl()->getTargetLowering(); - if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) { if (TLI->shouldExpandAtomicLoadInIR(ResultingLoad)) expandAtomicLoad(ResultingLoad); return true; } - return false; } diff --git a/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp b/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp index 72da806..82f5c48 100644 --- a/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp +++ b/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp @@ -15,633 +15,23 @@ /// //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/BasicTTIImpl.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/TargetTransformInfoImpl.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include <utility> using namespace llvm; -static cl::opt<unsigned> -PartialUnrollingThreshold("partial-unrolling-threshold", cl::init(0), - cl::desc("Threshold for partial unrolling"), cl::Hidden); - #define DEBUG_TYPE "basictti" -namespace { - -class BasicTTI final : public ImmutablePass, public TargetTransformInfo { - const TargetMachine *TM; - - /// Estimate the overhead of scalarizing an instruction. Insert and Extract - /// are set if the result needs to be inserted and/or extracted from vectors. - unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; - - /// Estimate the cost overhead of SK_Alternate shuffle. - unsigned getAltShuffleOverhead(Type *Ty) const; - - const TargetLoweringBase *getTLI() const { - return TM->getSubtargetImpl()->getTargetLowering(); - } - -public: - BasicTTI() : ImmutablePass(ID), TM(nullptr) { - llvm_unreachable("This pass cannot be directly constructed"); - } - - BasicTTI(const TargetMachine *TM) : ImmutablePass(ID), TM(TM) { - initializeBasicTTIPass(*PassRegistry::getPassRegistry()); - } - - void initializePass() override { - pushTTIStack(this); - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - TargetTransformInfo::getAnalysisUsage(AU); - } - - /// Pass identification. - static char ID; - - /// Provide necessary pointer adjustments for the two base classes. - void *getAdjustedAnalysisPointer(const void *ID) override { - if (ID == &TargetTransformInfo::ID) - return (TargetTransformInfo*)this; - return this; - } - - bool hasBranchDivergence() const override; - - /// \name Scalar TTI Implementations - /// @{ - - bool isLegalAddImmediate(int64_t imm) const override; - bool isLegalICmpImmediate(int64_t imm) const override; - bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, - int64_t BaseOffset, bool HasBaseReg, - int64_t Scale) const override; - int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, - int64_t BaseOffset, bool HasBaseReg, - int64_t Scale) const override; - bool isTruncateFree(Type *Ty1, Type *Ty2) const override; - bool isTypeLegal(Type *Ty) const override; - unsigned getJumpBufAlignment() const override; - unsigned getJumpBufSize() const override; - bool shouldBuildLookupTables() const override; - bool haveFastSqrt(Type *Ty) const override; - void getUnrollingPreferences(const Function *F, Loop *L, - UnrollingPreferences &UP) const override; - - /// @} - - /// \name Vector TTI Implementations - /// @{ - - unsigned getNumberOfRegisters(bool Vector) const override; - unsigned getMaxInterleaveFactor() const override; - unsigned getRegisterBitWidth(bool Vector) const override; - unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind, - OperandValueKind, OperandValueProperties, - OperandValueProperties) const override; - unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, - int Index, Type *SubTp) const override; - unsigned getCastInstrCost(unsigned Opcode, Type *Dst, - Type *Src) const override; - unsigned getCFInstrCost(unsigned Opcode) const override; - unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy) const override; - unsigned getVectorInstrCost(unsigned Opcode, Type *Val, - unsigned Index) const override; - unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace) const override; - unsigned getIntrinsicInstrCost(Intrinsic::ID, Type *RetTy, - ArrayRef<Type*> Tys) const override; - unsigned getNumberOfParts(Type *Tp) const override; - unsigned getAddressComputationCost( Type *Ty, bool IsComplex) const override; - unsigned getReductionCost(unsigned Opcode, Type *Ty, - bool IsPairwise) const override; - - /// @} -}; - -} - -INITIALIZE_AG_PASS(BasicTTI, TargetTransformInfo, "basictti", - "Target independent code generator's TTI", true, true, false) -char BasicTTI::ID = 0; - -ImmutablePass * -llvm::createBasicTargetTransformInfoPass(const TargetMachine *TM) { - return new BasicTTI(TM); -} - -bool BasicTTI::hasBranchDivergence() const { return false; } - -bool BasicTTI::isLegalAddImmediate(int64_t imm) const { - return getTLI()->isLegalAddImmediate(imm); -} - -bool BasicTTI::isLegalICmpImmediate(int64_t imm) const { - return getTLI()->isLegalICmpImmediate(imm); -} - -bool BasicTTI::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, - int64_t BaseOffset, bool HasBaseReg, - int64_t Scale) const { - TargetLoweringBase::AddrMode AM; - AM.BaseGV = BaseGV; - AM.BaseOffs = BaseOffset; - AM.HasBaseReg = HasBaseReg; - AM.Scale = Scale; - return getTLI()->isLegalAddressingMode(AM, Ty); -} - -int BasicTTI::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, - int64_t BaseOffset, bool HasBaseReg, - int64_t Scale) const { - TargetLoweringBase::AddrMode AM; - AM.BaseGV = BaseGV; - AM.BaseOffs = BaseOffset; - AM.HasBaseReg = HasBaseReg; - AM.Scale = Scale; - return getTLI()->getScalingFactorCost(AM, Ty); -} - -bool BasicTTI::isTruncateFree(Type *Ty1, Type *Ty2) const { - return getTLI()->isTruncateFree(Ty1, Ty2); -} - -bool BasicTTI::isTypeLegal(Type *Ty) const { - EVT T = getTLI()->getValueType(Ty); - return getTLI()->isTypeLegal(T); -} - -unsigned BasicTTI::getJumpBufAlignment() const { - return getTLI()->getJumpBufAlignment(); -} - -unsigned BasicTTI::getJumpBufSize() const { - return getTLI()->getJumpBufSize(); -} - -bool BasicTTI::shouldBuildLookupTables() const { - const TargetLoweringBase *TLI = getTLI(); - return TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || - TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other); -} - -bool BasicTTI::haveFastSqrt(Type *Ty) const { - const TargetLoweringBase *TLI = getTLI(); - EVT VT = TLI->getValueType(Ty); - return TLI->isTypeLegal(VT) && TLI->isOperationLegalOrCustom(ISD::FSQRT, VT); -} - -void BasicTTI::getUnrollingPreferences(const Function *F, Loop *L, - UnrollingPreferences &UP) const { - // This unrolling functionality is target independent, but to provide some - // motivation for its intended use, for x86: - - // According to the Intel 64 and IA-32 Architectures Optimization Reference - // Manual, Intel Core models and later have a loop stream detector - // (and associated uop queue) that can benefit from partial unrolling. - // The relevant requirements are: - // - The loop must have no more than 4 (8 for Nehalem and later) branches - // taken, and none of them may be calls. - // - The loop can have no more than 18 (28 for Nehalem and later) uops. - - // According to the Software Optimization Guide for AMD Family 15h Processors, - // models 30h-4fh (Steamroller and later) have a loop predictor and loop - // buffer which can benefit from partial unrolling. - // The relevant requirements are: - // - The loop must have fewer than 16 branches - // - The loop must have less than 40 uops in all executed loop branches - - // The number of taken branches in a loop is hard to estimate here, and - // benchmarking has revealed that it is better not to be conservative when - // estimating the branch count. As a result, we'll ignore the branch limits - // until someone finds a case where it matters in practice. - - unsigned MaxOps; - const TargetSubtargetInfo *ST = &TM->getSubtarget<TargetSubtargetInfo>(F); - if (PartialUnrollingThreshold.getNumOccurrences() > 0) - MaxOps = PartialUnrollingThreshold; - else if (ST->getSchedModel().LoopMicroOpBufferSize > 0) - MaxOps = ST->getSchedModel().LoopMicroOpBufferSize; - else - return; - - // Scan the loop: don't unroll loops with calls. - for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); - I != E; ++I) { - BasicBlock *BB = *I; - - for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; ++J) - if (isa<CallInst>(J) || isa<InvokeInst>(J)) { - ImmutableCallSite CS(J); - if (const Function *F = CS.getCalledFunction()) { - if (!TopTTI->isLoweredToCall(F)) - continue; - } - - return; - } - } - - // Enable runtime and partial unrolling up to the specified size. - UP.Partial = UP.Runtime = true; - UP.PartialThreshold = UP.PartialOptSizeThreshold = MaxOps; -} - -//===----------------------------------------------------------------------===// -// -// Calls used by the vectorizers. -// -//===----------------------------------------------------------------------===// - -unsigned BasicTTI::getScalarizationOverhead(Type *Ty, bool Insert, - bool Extract) const { - assert (Ty->isVectorTy() && "Can only scalarize vectors"); - unsigned Cost = 0; - - for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) { - if (Insert) - Cost += TopTTI->getVectorInstrCost(Instruction::InsertElement, Ty, i); - if (Extract) - Cost += TopTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, i); - } - - return Cost; -} - -unsigned BasicTTI::getNumberOfRegisters(bool Vector) const { - return 1; -} - -unsigned BasicTTI::getRegisterBitWidth(bool Vector) const { - return 32; -} - -unsigned BasicTTI::getMaxInterleaveFactor() const { - return 1; -} - -unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, - OperandValueKind, OperandValueKind, - OperandValueProperties, - OperandValueProperties) const { - // Check if any of the operands are vector operands. - const TargetLoweringBase *TLI = getTLI(); - int ISD = TLI->InstructionOpcodeToISD(Opcode); - assert(ISD && "Invalid opcode"); - - std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty); - - bool IsFloat = Ty->getScalarType()->isFloatingPointTy(); - // Assume that floating point arithmetic operations cost twice as much as - // integer operations. - unsigned OpCost = (IsFloat ? 2 : 1); - - if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { - // The operation is legal. Assume it costs 1. - // If the type is split to multiple registers, assume that there is some - // overhead to this. - // TODO: Once we have extract/insert subvector cost we need to use them. - if (LT.first > 1) - return LT.first * 2 * OpCost; - return LT.first * 1 * OpCost; - } - - if (!TLI->isOperationExpand(ISD, LT.second)) { - // If the operation is custom lowered then assume - // thare the code is twice as expensive. - return LT.first * 2 * OpCost; - } - - // Else, assume that we need to scalarize this op. - if (Ty->isVectorTy()) { - unsigned Num = Ty->getVectorNumElements(); - unsigned Cost = TopTTI->getArithmeticInstrCost(Opcode, Ty->getScalarType()); - // return the cost of multiple scalar invocation plus the cost of inserting - // and extracting the values. - return getScalarizationOverhead(Ty, true, true) + Num * Cost; - } - - // We don't know anything about this scalar instruction. - return OpCost; -} - -unsigned BasicTTI::getAltShuffleOverhead(Type *Ty) const { - assert(Ty->isVectorTy() && "Can only shuffle vectors"); - unsigned Cost = 0; - // Shuffle cost is equal to the cost of extracting element from its argument - // plus the cost of inserting them onto the result vector. - - // e.g. <4 x float> has a mask of <0,5,2,7> i.e we need to extract from index - // 0 of first vector, index 1 of second vector,index 2 of first vector and - // finally index 3 of second vector and insert them at index <0,1,2,3> of - // result vector. - for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) { - Cost += TopTTI->getVectorInstrCost(Instruction::InsertElement, Ty, i); - Cost += TopTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, i); - } - return Cost; -} - -unsigned BasicTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, - Type *SubTp) const { - if (Kind == SK_Alternate) { - return getAltShuffleOverhead(Tp); - } - return 1; -} - -unsigned BasicTTI::getCastInstrCost(unsigned Opcode, Type *Dst, - Type *Src) const { - const TargetLoweringBase *TLI = getTLI(); - int ISD = TLI->InstructionOpcodeToISD(Opcode); - assert(ISD && "Invalid opcode"); - - std::pair<unsigned, MVT> SrcLT = TLI->getTypeLegalizationCost(Src); - std::pair<unsigned, MVT> DstLT = TLI->getTypeLegalizationCost(Dst); - - // Check for NOOP conversions. - if (SrcLT.first == DstLT.first && - SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) { - - // Bitcast between types that are legalized to the same type are free. - if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc) - return 0; - } - - if (Opcode == Instruction::Trunc && - TLI->isTruncateFree(SrcLT.second, DstLT.second)) - return 0; - - if (Opcode == Instruction::ZExt && - TLI->isZExtFree(SrcLT.second, DstLT.second)) - return 0; - - // If the cast is marked as legal (or promote) then assume low cost. - if (SrcLT.first == DstLT.first && - TLI->isOperationLegalOrPromote(ISD, DstLT.second)) - return 1; - - // Handle scalar conversions. - if (!Src->isVectorTy() && !Dst->isVectorTy()) { - - // Scalar bitcasts are usually free. - if (Opcode == Instruction::BitCast) - return 0; - - // Just check the op cost. If the operation is legal then assume it costs 1. - if (!TLI->isOperationExpand(ISD, DstLT.second)) - return 1; - - // Assume that illegal scalar instruction are expensive. - return 4; - } - - // Check vector-to-vector casts. - if (Dst->isVectorTy() && Src->isVectorTy()) { - - // If the cast is between same-sized registers, then the check is simple. - if (SrcLT.first == DstLT.first && - SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) { - - // Assume that Zext is done using AND. - if (Opcode == Instruction::ZExt) - return 1; - - // Assume that sext is done using SHL and SRA. - if (Opcode == Instruction::SExt) - return 2; - - // Just check the op cost. If the operation is legal then assume it costs - // 1 and multiply by the type-legalization overhead. - if (!TLI->isOperationExpand(ISD, DstLT.second)) - return SrcLT.first * 1; - } - - // If we are converting vectors and the operation is illegal, or - // if the vectors are legalized to different types, estimate the - // scalarization costs. - unsigned Num = Dst->getVectorNumElements(); - unsigned Cost = TopTTI->getCastInstrCost(Opcode, Dst->getScalarType(), - Src->getScalarType()); - - // Return the cost of multiple scalar invocation plus the cost of - // inserting and extracting the values. - return getScalarizationOverhead(Dst, true, true) + Num * Cost; - } - - // We already handled vector-to-vector and scalar-to-scalar conversions. This - // is where we handle bitcast between vectors and scalars. We need to assume - // that the conversion is scalarized in one way or another. - if (Opcode == Instruction::BitCast) - // Illegal bitcasts are done by storing and loading from a stack slot. - return (Src->isVectorTy()? getScalarizationOverhead(Src, false, true):0) + - (Dst->isVectorTy()? getScalarizationOverhead(Dst, true, false):0); - - llvm_unreachable("Unhandled cast"); - } - -unsigned BasicTTI::getCFInstrCost(unsigned Opcode) const { - // Branches are assumed to be predicted. - return 0; -} - -unsigned BasicTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy) const { - const TargetLoweringBase *TLI = getTLI(); - int ISD = TLI->InstructionOpcodeToISD(Opcode); - assert(ISD && "Invalid opcode"); - - // Selects on vectors are actually vector selects. - if (ISD == ISD::SELECT) { - assert(CondTy && "CondTy must exist"); - if (CondTy->isVectorTy()) - ISD = ISD::VSELECT; - } - - std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy); - - if (!(ValTy->isVectorTy() && !LT.second.isVector()) && - !TLI->isOperationExpand(ISD, LT.second)) { - // The operation is legal. Assume it costs 1. Multiply - // by the type-legalization overhead. - return LT.first * 1; - } - - // Otherwise, assume that the cast is scalarized. - if (ValTy->isVectorTy()) { - unsigned Num = ValTy->getVectorNumElements(); - if (CondTy) - CondTy = CondTy->getScalarType(); - unsigned Cost = TopTTI->getCmpSelInstrCost(Opcode, ValTy->getScalarType(), - CondTy); - - // Return the cost of multiple scalar invocation plus the cost of inserting - // and extracting the values. - return getScalarizationOverhead(ValTy, true, false) + Num * Cost; - } - - // Unknown scalar opcode. - return 1; -} - -unsigned BasicTTI::getVectorInstrCost(unsigned Opcode, Type *Val, - unsigned Index) const { - std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(Val->getScalarType()); - - return LT.first; -} - -unsigned BasicTTI::getMemoryOpCost(unsigned Opcode, Type *Src, - unsigned Alignment, - unsigned AddressSpace) const { - assert(!Src->isVoidTy() && "Invalid type"); - std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(Src); - - // Assuming that all loads of legal types cost 1. - unsigned Cost = LT.first; - - if (Src->isVectorTy() && - Src->getPrimitiveSizeInBits() < LT.second.getSizeInBits()) { - // This is a vector load that legalizes to a larger type than the vector - // itself. Unless the corresponding extending load or truncating store is - // legal, then this will scalarize. - TargetLowering::LegalizeAction LA = TargetLowering::Expand; - EVT MemVT = getTLI()->getValueType(Src, true); - if (MemVT.isSimple() && MemVT != MVT::Other) { - if (Opcode == Instruction::Store) - LA = getTLI()->getTruncStoreAction(LT.second, MemVT.getSimpleVT()); - else - LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, LT.second, MemVT); - } - - if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) { - // This is a vector load/store for some illegal type that is scalarized. - // We must account for the cost of building or decomposing the vector. - Cost += getScalarizationOverhead(Src, Opcode != Instruction::Store, - Opcode == Instruction::Store); - } - } - - return Cost; -} - -unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef<Type *> Tys) const { - unsigned ISD = 0; - switch (IID) { - default: { - // Assume that we need to scalarize this intrinsic. - unsigned ScalarizationCost = 0; - unsigned ScalarCalls = 1; - if (RetTy->isVectorTy()) { - ScalarizationCost = getScalarizationOverhead(RetTy, true, false); - ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements()); - } - for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) { - if (Tys[i]->isVectorTy()) { - ScalarizationCost += getScalarizationOverhead(Tys[i], false, true); - ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements()); - } - } - - return ScalarCalls + ScalarizationCost; - } - // Look for intrinsics that can be lowered directly or turned into a scalar - // intrinsic call. - case Intrinsic::sqrt: ISD = ISD::FSQRT; break; - case Intrinsic::sin: ISD = ISD::FSIN; break; - case Intrinsic::cos: ISD = ISD::FCOS; break; - case Intrinsic::exp: ISD = ISD::FEXP; break; - case Intrinsic::exp2: ISD = ISD::FEXP2; break; - case Intrinsic::log: ISD = ISD::FLOG; break; - case Intrinsic::log10: ISD = ISD::FLOG10; break; - case Intrinsic::log2: ISD = ISD::FLOG2; break; - case Intrinsic::fabs: ISD = ISD::FABS; break; - case Intrinsic::minnum: ISD = ISD::FMINNUM; break; - case Intrinsic::maxnum: ISD = ISD::FMAXNUM; break; - case Intrinsic::copysign: ISD = ISD::FCOPYSIGN; break; - case Intrinsic::floor: ISD = ISD::FFLOOR; break; - case Intrinsic::ceil: ISD = ISD::FCEIL; break; - case Intrinsic::trunc: ISD = ISD::FTRUNC; break; - case Intrinsic::nearbyint: - ISD = ISD::FNEARBYINT; break; - case Intrinsic::rint: ISD = ISD::FRINT; break; - case Intrinsic::round: ISD = ISD::FROUND; break; - case Intrinsic::pow: ISD = ISD::FPOW; break; - case Intrinsic::fma: ISD = ISD::FMA; break; - case Intrinsic::fmuladd: ISD = ISD::FMA; break; - // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free. - case Intrinsic::lifetime_start: - case Intrinsic::lifetime_end: - return 0; - } - - const TargetLoweringBase *TLI = getTLI(); - std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(RetTy); - - if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { - // The operation is legal. Assume it costs 1. - // If the type is split to multiple registers, assume that there is some - // overhead to this. - // TODO: Once we have extract/insert subvector cost we need to use them. - if (LT.first > 1) - return LT.first * 2; - return LT.first * 1; - } - - if (!TLI->isOperationExpand(ISD, LT.second)) { - // If the operation is custom lowered then assume - // thare the code is twice as expensive. - return LT.first * 2; - } - - // If we can't lower fmuladd into an FMA estimate the cost as a floating - // point mul followed by an add. - if (IID == Intrinsic::fmuladd) - return TopTTI->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) + - TopTTI->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy); - - // Else, assume that we need to scalarize this intrinsic. For math builtins - // this will emit a costly libcall, adding call overhead and spills. Make it - // very expensive. - if (RetTy->isVectorTy()) { - unsigned Num = RetTy->getVectorNumElements(); - unsigned Cost = TopTTI->getIntrinsicInstrCost(IID, RetTy->getScalarType(), - Tys); - return 10 * Cost * Num; - } - - // This is going to be turned into a library call, make it expensive. - return 10; -} - -unsigned BasicTTI::getNumberOfParts(Type *Tp) const { - std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(Tp); - return LT.first; -} - -unsigned BasicTTI::getAddressComputationCost(Type *Ty, bool IsComplex) const { - return 0; -} +// This flag is used by the template base class for BasicTTIImpl, and here to +// provide a definition. +cl::opt<unsigned> + llvm::PartialUnrollingThreshold("partial-unrolling-threshold", cl::init(0), + cl::desc("Threshold for partial unrolling"), + cl::Hidden); -unsigned BasicTTI::getReductionCost(unsigned Opcode, Type *Ty, - bool IsPairwise) const { - assert(Ty->isVectorTy() && "Expect a vector type"); - unsigned NumVecElts = Ty->getVectorNumElements(); - unsigned NumReduxLevels = Log2_32(NumVecElts); - unsigned ArithCost = NumReduxLevels * - TopTTI->getArithmeticInstrCost(Opcode, Ty); - // Assume the pairwise shuffles add a cost. - unsigned ShuffleCost = - NumReduxLevels * (IsPairwise + 1) * - TopTTI->getShuffleCost(SK_ExtractSubvector, Ty, NumVecElts / 2, Ty); - return ShuffleCost + ArithCost + getScalarizationOverhead(Ty, false, true); -} +BasicTTIImpl::BasicTTIImpl(const TargetMachine *TM, Function &F) + : BaseT(TM), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {} diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.cpp b/contrib/llvm/lib/CodeGen/BranchFolding.cpp index 2128da1..b8d9a1a 100644 --- a/contrib/llvm/lib/CodeGen/BranchFolding.cpp +++ b/contrib/llvm/lib/CodeGen/BranchFolding.cpp @@ -24,6 +24,7 @@ #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" @@ -601,8 +602,7 @@ static bool ProfitableToMerge(MachineBasicBlock *MBB1, // instructions that would be deleted in the merge. MachineFunction *MF = MBB1->getParent(); if (EffectiveTailLen >= 2 && - MF->getFunction()->getAttributes(). - hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize) && + MF->getFunction()->hasFnAttribute(Attribute::OptimizeForSize) && (I1 == MBB1->begin() || I2 == MBB2->begin())) return true; @@ -728,6 +728,62 @@ bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, return true; } +static bool hasIdenticalMMOs(const MachineInstr *MI1, const MachineInstr *MI2) { + auto I1 = MI1->memoperands_begin(), E1 = MI1->memoperands_end(); + auto I2 = MI2->memoperands_begin(), E2 = MI2->memoperands_end(); + if ((E1 - I1) != (E2 - I2)) + return false; + for (; I1 != E1; ++I1, ++I2) { + if (**I1 != **I2) + return false; + } + return true; +} + +static void +removeMMOsFromMemoryOperations(MachineBasicBlock::iterator MBBIStartPos, + MachineBasicBlock &MBBCommon) { + // Remove MMOs from memory operations in the common block + // when they do not match the ones from the block being tail-merged. + // This ensures later passes conservatively compute dependencies. + MachineBasicBlock *MBB = MBBIStartPos->getParent(); + // Note CommonTailLen does not necessarily matches the size of + // the common BB nor all its instructions because of debug + // instructions differences. + unsigned CommonTailLen = 0; + for (auto E = MBB->end(); MBBIStartPos != E; ++MBBIStartPos) + ++CommonTailLen; + + MachineBasicBlock::reverse_iterator MBBI = MBB->rbegin(); + MachineBasicBlock::reverse_iterator MBBIE = MBB->rend(); + MachineBasicBlock::reverse_iterator MBBICommon = MBBCommon.rbegin(); + MachineBasicBlock::reverse_iterator MBBIECommon = MBBCommon.rend(); + + while (CommonTailLen--) { + assert(MBBI != MBBIE && "Reached BB end within common tail length!"); + (void)MBBIE; + + if (MBBI->isDebugValue()) { + ++MBBI; + continue; + } + + while ((MBBICommon != MBBIECommon) && MBBICommon->isDebugValue()) + ++MBBICommon; + + assert(MBBICommon != MBBIECommon && + "Reached BB end within common tail length!"); + assert(MBBICommon->isIdenticalTo(&*MBBI) && "Expected matching MIIs!"); + + if (MBBICommon->mayLoad() || MBBICommon->mayStore()) + if (!hasIdenticalMMOs(&*MBBI, &*MBBICommon)) + MBBICommon->clearMemRefs(); + + ++MBBI; + ++MBBICommon; + } +} + // See if any of the blocks in MergePotentials (which all have a common single // successor, or all have no successor) can be tail-merged. If there is a // successor, any blocks in MergePotentials that are not tail-merged and @@ -762,7 +818,7 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB, // Sort by hash value so that blocks with identical end sequences sort // together. - std::stable_sort(MergePotentials.begin(), MergePotentials.end()); + array_pod_sort(MergePotentials.begin(), MergePotentials.end()); // Walk through equivalence sets looking for actual exact matches. while (MergePotentials.size() > 1) { @@ -841,6 +897,8 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB, continue; DEBUG(dbgs() << "BB#" << SameTails[i].getBlock()->getNumber() << (i == e-1 ? "" : ", ")); + // Remove MMOs from memory operations as needed. + removeMMOsFromMemoryOperations(SameTails[i].getTailStartPos(), *MBB); // Hack the end off BB i, making it jump to BB commonTailIndex instead. ReplaceTailWithBranchTo(SameTails[i].getTailStartPos(), MBB); // BB i is no longer a predecessor of SuccBB; remove it from the worklist. @@ -1145,6 +1203,11 @@ ReoptimizeBlock: if (FallThrough == MF.end()) { // TODO: Simplify preds to not branch here if possible! + } else if (FallThrough->isLandingPad()) { + // Don't rewrite to a landing pad fallthough. That could lead to the case + // where a BB jumps to more than one landing pad. + // TODO: Is it ever worth rewriting predecessors which don't already + // jump to a landing pad, and so can safely jump to the fallthrough? } else { // Rewrite all predecessors of the old block to go to the fallthrough // instead. @@ -1620,8 +1683,7 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, // Also avoid moving code above predicated instruction since it's hard to // reason about register liveness with predicated instruction. bool DontMoveAcrossStore = true; - if (!PI->isSafeToMove(TII, nullptr, DontMoveAcrossStore) || - TII->isPredicated(PI)) + if (!PI->isSafeToMove(nullptr, DontMoveAcrossStore) || TII->isPredicated(PI)) return MBB->end(); @@ -1759,7 +1821,7 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { break; bool DontMoveAcrossStore = true; - if (!TIB->isSafeToMove(TII, nullptr, DontMoveAcrossStore)) + if (!TIB->isSafeToMove(nullptr, DontMoveAcrossStore)) break; // Remove kills from LocalDefsSet, these registers had short live ranges. diff --git a/contrib/llvm/lib/CodeGen/CodeGen.cpp b/contrib/llvm/lib/CodeGen/CodeGen.cpp index 307dec5..2c6eaf3 100644 --- a/contrib/llvm/lib/CodeGen/CodeGen.cpp +++ b/contrib/llvm/lib/CodeGen/CodeGen.cpp @@ -21,13 +21,13 @@ using namespace llvm; /// initializeCodeGen - Initialize all passes linked into the CodeGen library. void llvm::initializeCodeGen(PassRegistry &Registry) { initializeAtomicExpandPass(Registry); - initializeBasicTTIPass(Registry); initializeBranchFolderPassPass(Registry); initializeCodeGenPreparePass(Registry); initializeDeadMachineInstructionElimPass(Registry); + initializeDwarfEHPreparePass(Registry); initializeEarlyIfConverterPass(Registry); - initializeExpandPostRAPass(Registry); initializeExpandISelPseudosPass(Registry); + initializeExpandPostRAPass(Registry); initializeFinalizeMachineBundlesPass(Registry); initializeGCMachineCodeAnalysisPass(Registry); initializeGCModuleInfoPass(Registry); @@ -37,31 +37,35 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeLiveStacksPass(Registry); initializeLiveVariablesPass(Registry); initializeLocalStackSlotPassPass(Registry); + initializeLowerIntrinsicsPass(Registry); initializeMachineBlockFrequencyInfoPass(Registry); initializeMachineBlockPlacementPass(Registry); initializeMachineBlockPlacementStatsPass(Registry); - initializeMachineCopyPropagationPass(Registry); - initializeMachineCombinerPass(Registry); initializeMachineCSEPass(Registry); + initializeMachineCombinerPass(Registry); + initializeMachineCopyPropagationPass(Registry); initializeMachineDominatorTreePass(Registry); - initializeMachinePostDominatorTreePass(Registry); + initializeMachineFunctionPrinterPassPass(Registry); initializeMachineLICMPass(Registry); initializeMachineLoopInfoPass(Registry); initializeMachineModuleInfoPass(Registry); + initializeMachinePostDominatorTreePass(Registry); initializeMachineSchedulerPass(Registry); initializeMachineSinkingPass(Registry); initializeMachineVerifierPassPass(Registry); initializeOptimizePHIsPass(Registry); + initializePEIPass(Registry); initializePHIEliminationPass(Registry); initializePeepholeOptimizerPass(Registry); initializePostMachineSchedulerPass(Registry); initializePostRASchedulerPass(Registry); initializeProcessImplicitDefsPass(Registry); - initializePEIPass(Registry); initializeRegisterCoalescerPass(Registry); + initializeShrinkWrapPass(Registry); initializeSlotIndexesPass(Registry); - initializeStackProtectorPass(Registry); initializeStackColoringPass(Registry); + initializeStackMapLivenessPass(Registry); + initializeStackProtectorPass(Registry); initializeStackSlotColoringPass(Registry); initializeTailDuplicatePassPass(Registry); initializeTargetPassConfigPass(Registry); @@ -71,9 +75,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeUnreachableMachineBlockElimPass(Registry); initializeVirtRegMapPass(Registry); initializeVirtRegRewriterPass(Registry); - initializeLowerIntrinsicsPass(Registry); - initializeMachineFunctionPrinterPassPass(Registry); - initializeStackMapLivenessPass(Registry); + initializeWinEHPreparePass(Registry); } void LLVMInitializeCodeGen(LLVMPassRegistryRef R) { diff --git a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp index 82cd380..2c1858b 100644 --- a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" @@ -32,13 +33,13 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/IR/Statepoint.h" #include "llvm/IR/ValueHandle.h" #include "llvm/IR/ValueMap.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -72,6 +73,10 @@ static cl::opt<bool> DisableBranchOpts( "disable-cgp-branch-opts", cl::Hidden, cl::init(false), cl::desc("Disable branch optimizations in CodeGenPrepare")); +static cl::opt<bool> + DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false), + cl::desc("Disable GC optimizations in CodeGenPrepare")); + static cl::opt<bool> DisableSelectToBranch( "disable-cgp-select2branch", cl::Hidden, cl::init(false), cl::desc("Disable select to branch conversion.")); @@ -119,7 +124,6 @@ class TypePromotionTransaction; const TargetLowering *TLI; const TargetTransformInfo *TTI; const TargetLibraryInfo *TLInfo; - DominatorTree *DT; /// CurInstIterator - As we scan instructions optimizing them, this is the /// next instruction to optimize. Xforms that can invalidate this should @@ -137,8 +141,7 @@ class TypePromotionTransaction; /// promotion for the current function. InstrToOrigTy PromotedInsts; - /// ModifiedDT - If CFG is modified in anyway, dominator tree may need to - /// be updated. + /// ModifiedDT - If CFG is modified in anyway. bool ModifiedDT; /// OptSize - True if optimizing for size. @@ -156,8 +159,8 @@ class TypePromotionTransaction; void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addPreserved<DominatorTreeWrapperPass>(); - AU.addRequired<TargetLibraryInfo>(); - AU.addRequired<TargetTransformInfo>(); + AU.addRequired<TargetLibraryInfoWrapperPass>(); + AU.addRequired<TargetTransformInfoWrapperPass>(); } private: @@ -181,8 +184,9 @@ class TypePromotionTransaction; bool ExtLdPromotion(TypePromotionTransaction &TPT, LoadInst *&LI, Instruction *&Inst, const SmallVectorImpl<Instruction *> &Exts, - unsigned CreatedInst); + unsigned CreatedInstCost); bool splitBranchCondition(Function &F); + bool simplifyOffsetableRelocate(Instruction &I); }; } @@ -205,14 +209,10 @@ bool CodeGenPrepare::runOnFunction(Function &F) { ModifiedDT = false; if (TM) - TLI = TM->getSubtargetImpl()->getTargetLowering(); - TLInfo = &getAnalysis<TargetLibraryInfo>(); - TTI = &getAnalysis<TargetTransformInfo>(); - DominatorTreeWrapperPass *DTWP = - getAnalysisIfAvailable<DominatorTreeWrapperPass>(); - DT = DTWP ? &DTWP->getDomTree() : nullptr; - OptSize = F.getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::OptimizeForSize); + TLI = TM->getSubtargetImpl(F)->getTargetLowering(); + TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); + TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); + OptSize = F.hasFnAttribute(Attribute::OptimizeForSize); /// This optimization identifies DIV instructions that can be /// profitably bypassed and carried out with a shorter, faster divide. @@ -248,9 +248,8 @@ bool CodeGenPrepare::runOnFunction(Function &F) { BasicBlock *BB = I++; bool ModifiedDTOnIteration = false; MadeChange |= OptimizeBlock(*BB, ModifiedDTOnIteration); - + // Restart BB iteration if the dominator tree of the Function was changed - ModifiedDT |= ModifiedDTOnIteration; if (ModifiedDTOnIteration) break; } @@ -293,13 +292,18 @@ bool CodeGenPrepare::runOnFunction(Function &F) { if (EverMadeChange || MadeChange) MadeChange |= EliminateFallThrough(F); - if (MadeChange) - ModifiedDT = true; EverMadeChange |= MadeChange; } - if (ModifiedDT && DT) - DT->recalculate(F); + if (!DisableGCOpts) { + SmallVector<Instruction *, 2> Statepoints; + for (BasicBlock &BB : F) + for (Instruction &I : BB) + if (isStatepoint(I)) + Statepoints.push_back(&I); + for (auto &I : Statepoints) + EverMadeChange |= simplifyOffsetableRelocate(*I); + } return EverMadeChange; } @@ -326,7 +330,7 @@ bool CodeGenPrepare::EliminateFallThrough(Function &F) { // Remember if SinglePred was the entry block of the function. // If so, we will need to move BB back to the entry position. bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); - MergeBasicBlockIntoOnlyPred(BB, this); + MergeBasicBlockIntoOnlyPred(BB, nullptr); if (isEntry && BB != &BB->getParent()->getEntryBlock()) BB->moveBefore(&BB->getParent()->getEntryBlock()); @@ -466,7 +470,7 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { // Remember if SinglePred was the entry block of the function. If so, we // will need to move BB back to the entry position. bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); - MergeBasicBlockIntoOnlyPred(DestBB, this); + MergeBasicBlockIntoOnlyPred(DestBB, nullptr); if (isEntry && BB != &BB->getParent()->getEntryBlock()) BB->moveBefore(&BB->getParent()->getEntryBlock()); @@ -508,19 +512,188 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { // The PHIs are now updated, change everything that refers to BB to use // DestBB and remove BB. BB->replaceAllUsesWith(DestBB); - if (DT && !ModifiedDT) { - BasicBlock *BBIDom = DT->getNode(BB)->getIDom()->getBlock(); - BasicBlock *DestBBIDom = DT->getNode(DestBB)->getIDom()->getBlock(); - BasicBlock *NewIDom = DT->findNearestCommonDominator(BBIDom, DestBBIDom); - DT->changeImmediateDominator(DestBB, NewIDom); - DT->eraseNode(BB); - } BB->eraseFromParent(); ++NumBlocksElim; DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); } +// Computes a map of base pointer relocation instructions to corresponding +// derived pointer relocation instructions given a vector of all relocate calls +static void computeBaseDerivedRelocateMap( + const SmallVectorImpl<User *> &AllRelocateCalls, + DenseMap<IntrinsicInst *, SmallVector<IntrinsicInst *, 2>> & + RelocateInstMap) { + // Collect information in two maps: one primarily for locating the base object + // while filling the second map; the second map is the final structure holding + // a mapping between Base and corresponding Derived relocate calls + DenseMap<std::pair<unsigned, unsigned>, IntrinsicInst *> RelocateIdxMap; + for (auto &U : AllRelocateCalls) { + GCRelocateOperands ThisRelocate(U); + IntrinsicInst *I = cast<IntrinsicInst>(U); + auto K = std::make_pair(ThisRelocate.getBasePtrIndex(), + ThisRelocate.getDerivedPtrIndex()); + RelocateIdxMap.insert(std::make_pair(K, I)); + } + for (auto &Item : RelocateIdxMap) { + std::pair<unsigned, unsigned> Key = Item.first; + if (Key.first == Key.second) + // Base relocation: nothing to insert + continue; + + IntrinsicInst *I = Item.second; + auto BaseKey = std::make_pair(Key.first, Key.first); + + // We're iterating over RelocateIdxMap so we cannot modify it. + auto MaybeBase = RelocateIdxMap.find(BaseKey); + if (MaybeBase == RelocateIdxMap.end()) + // TODO: We might want to insert a new base object relocate and gep off + // that, if there are enough derived object relocates. + continue; + + RelocateInstMap[MaybeBase->second].push_back(I); + } +} + +// Accepts a GEP and extracts the operands into a vector provided they're all +// small integer constants +static bool getGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP, + SmallVectorImpl<Value *> &OffsetV) { + for (unsigned i = 1; i < GEP->getNumOperands(); i++) { + // Only accept small constant integer operands + auto Op = dyn_cast<ConstantInt>(GEP->getOperand(i)); + if (!Op || Op->getZExtValue() > 20) + return false; + } + + for (unsigned i = 1; i < GEP->getNumOperands(); i++) + OffsetV.push_back(GEP->getOperand(i)); + return true; +} + +// Takes a RelocatedBase (base pointer relocation instruction) and Targets to +// replace, computes a replacement, and affects it. +static bool +simplifyRelocatesOffABase(IntrinsicInst *RelocatedBase, + const SmallVectorImpl<IntrinsicInst *> &Targets) { + bool MadeChange = false; + for (auto &ToReplace : Targets) { + GCRelocateOperands MasterRelocate(RelocatedBase); + GCRelocateOperands ThisRelocate(ToReplace); + + assert(ThisRelocate.getBasePtrIndex() == MasterRelocate.getBasePtrIndex() && + "Not relocating a derived object of the original base object"); + if (ThisRelocate.getBasePtrIndex() == ThisRelocate.getDerivedPtrIndex()) { + // A duplicate relocate call. TODO: coalesce duplicates. + continue; + } + + Value *Base = ThisRelocate.getBasePtr(); + auto Derived = dyn_cast<GetElementPtrInst>(ThisRelocate.getDerivedPtr()); + if (!Derived || Derived->getPointerOperand() != Base) + continue; + + SmallVector<Value *, 2> OffsetV; + if (!getGEPSmallConstantIntOffsetV(Derived, OffsetV)) + continue; + + // Create a Builder and replace the target callsite with a gep + assert(RelocatedBase->getNextNode() && "Should always have one since it's not a terminator"); + + // Insert after RelocatedBase + IRBuilder<> Builder(RelocatedBase->getNextNode()); + Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc()); + + // If gc_relocate does not match the actual type, cast it to the right type. + // In theory, there must be a bitcast after gc_relocate if the type does not + // match, and we should reuse it to get the derived pointer. But it could be + // cases like this: + // bb1: + // ... + // %g1 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(...) + // br label %merge + // + // bb2: + // ... + // %g2 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(...) + // br label %merge + // + // merge: + // %p1 = phi i8 addrspace(1)* [ %g1, %bb1 ], [ %g2, %bb2 ] + // %cast = bitcast i8 addrspace(1)* %p1 in to i32 addrspace(1)* + // + // In this case, we can not find the bitcast any more. So we insert a new bitcast + // no matter there is already one or not. In this way, we can handle all cases, and + // the extra bitcast should be optimized away in later passes. + Instruction *ActualRelocatedBase = RelocatedBase; + if (RelocatedBase->getType() != Base->getType()) { + ActualRelocatedBase = + cast<Instruction>(Builder.CreateBitCast(RelocatedBase, Base->getType())); + } + Value *Replacement = Builder.CreateGEP( + Derived->getSourceElementType(), ActualRelocatedBase, makeArrayRef(OffsetV)); + Instruction *ReplacementInst = cast<Instruction>(Replacement); + Replacement->takeName(ToReplace); + // If the newly generated derived pointer's type does not match the original derived + // pointer's type, cast the new derived pointer to match it. Same reasoning as above. + Instruction *ActualReplacement = ReplacementInst; + if (ReplacementInst->getType() != ToReplace->getType()) { + ActualReplacement = + cast<Instruction>(Builder.CreateBitCast(ReplacementInst, ToReplace->getType())); + } + ToReplace->replaceAllUsesWith(ActualReplacement); + ToReplace->eraseFromParent(); + + MadeChange = true; + } + return MadeChange; +} + +// Turns this: +// +// %base = ... +// %ptr = gep %base + 15 +// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr) +// %base' = relocate(%tok, i32 4, i32 4) +// %ptr' = relocate(%tok, i32 4, i32 5) +// %val = load %ptr' +// +// into this: +// +// %base = ... +// %ptr = gep %base + 15 +// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr) +// %base' = gc.relocate(%tok, i32 4, i32 4) +// %ptr' = gep %base' + 15 +// %val = load %ptr' +bool CodeGenPrepare::simplifyOffsetableRelocate(Instruction &I) { + bool MadeChange = false; + SmallVector<User *, 2> AllRelocateCalls; + + for (auto *U : I.users()) + if (isGCRelocate(dyn_cast<Instruction>(U))) + // Collect all the relocate calls associated with a statepoint + AllRelocateCalls.push_back(U); + + // We need atleast one base pointer relocation + one derived pointer + // relocation to mangle + if (AllRelocateCalls.size() < 2) + return false; + + // RelocateInstMap is a mapping from the base relocate instruction to the + // corresponding derived relocate instructions + DenseMap<IntrinsicInst *, SmallVector<IntrinsicInst *, 2>> RelocateInstMap; + computeBaseDerivedRelocateMap(AllRelocateCalls, RelocateInstMap); + if (RelocateInstMap.empty()) + return false; + + for (auto &Item : RelocateInstMap) + // Item.first is the RelocatedBase to offset against + // Item.second is the vector of Targets to replace + MadeChange = simplifyRelocatesOffABase(Item.first, Item.second); + return MadeChange; +} + /// SinkCast - Sink the specified cast instruction into its user blocks static bool SinkCast(CastInst *CI) { BasicBlock *DefBB = CI->getParent(); @@ -555,11 +728,11 @@ static bool SinkCast(CastInst *CI) { InsertedCast = CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "", InsertPt); - MadeChange = true; } // Replace a use of the cast with a use of the new cast. TheUse = InsertedCast; + MadeChange = true; ++NumCastUses; } @@ -609,13 +782,60 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){ return SinkCast(CI); } -/// OptimizeCmpExpression - sink the given CmpInst into user blocks to reduce +/// CombineUAddWithOverflow - try to combine CI into a call to the +/// llvm.uadd.with.overflow intrinsic if possible. +/// +/// Return true if any changes were made. +static bool CombineUAddWithOverflow(CmpInst *CI) { + Value *A, *B; + Instruction *AddI; + if (!match(CI, + m_UAddWithOverflow(m_Value(A), m_Value(B), m_Instruction(AddI)))) + return false; + + Type *Ty = AddI->getType(); + if (!isa<IntegerType>(Ty)) + return false; + + // We don't want to move around uses of condition values this late, so we we + // check if it is legal to create the call to the intrinsic in the basic + // block containing the icmp: + + if (AddI->getParent() != CI->getParent() && !AddI->hasOneUse()) + return false; + +#ifndef NDEBUG + // Someday m_UAddWithOverflow may get smarter, but this is a safe assumption + // for now: + if (AddI->hasOneUse()) + assert(*AddI->user_begin() == CI && "expected!"); +#endif + + Module *M = CI->getParent()->getParent()->getParent(); + Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty); + + auto *InsertPt = AddI->hasOneUse() ? CI : AddI; + + auto *UAddWithOverflow = + CallInst::Create(F, {A, B}, "uadd.overflow", InsertPt); + auto *UAdd = ExtractValueInst::Create(UAddWithOverflow, 0, "uadd", InsertPt); + auto *Overflow = + ExtractValueInst::Create(UAddWithOverflow, 1, "overflow", InsertPt); + + CI->replaceAllUsesWith(Overflow); + AddI->replaceAllUsesWith(UAdd); + CI->eraseFromParent(); + AddI->eraseFromParent(); + return true; +} + +/// SinkCmpExpression - Sink the given CmpInst into user blocks to reduce /// the number of virtual registers that must be created and coalesced. This is /// a clear win except on targets with multiple condition code registers /// (PowerPC), where it might lose; some adjustment may be wanted there. /// /// Return true if any changes are made. -static bool OptimizeCmpExpression(CmpInst *CI) { +static bool SinkCmpExpression(CmpInst *CI) { BasicBlock *DefBB = CI->getParent(); /// InsertedCmp - Only insert a cmp in each block once. @@ -649,21 +869,33 @@ static bool OptimizeCmpExpression(CmpInst *CI) { CmpInst::Create(CI->getOpcode(), CI->getPredicate(), CI->getOperand(0), CI->getOperand(1), "", InsertPt); - MadeChange = true; } // Replace a use of the cmp with a use of the new cmp. TheUse = InsertedCmp; + MadeChange = true; ++NumCmpUses; } // If we removed all uses, nuke the cmp. - if (CI->use_empty()) + if (CI->use_empty()) { CI->eraseFromParent(); + MadeChange = true; + } return MadeChange; } +static bool OptimizeCmpExpression(CmpInst *CI) { + if (SinkCmpExpression(CI)) + return true; + + if (CombineUAddWithOverflow(CI)) + return true; + + return false; +} + /// isExtractBitsCandidateUse - Check if the candidates could /// be combined with shift instruction, which includes: /// 1. Truncate instruction @@ -943,8 +1175,9 @@ static void ScalarizeMaskedLoad(CallInst *CI) { // CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load"); Builder.SetInsertPoint(InsertPt); - - Value* Gep = Builder.CreateInBoundsGEP(FirstEltPtr, Builder.getInt32(Idx)); + + Value *Gep = + Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx)); LoadInst* Load = Builder.CreateLoad(Gep, false); VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx)); @@ -1038,7 +1271,8 @@ static void ScalarizeMaskedStore(CallInst *CI) { Builder.SetInsertPoint(InsertPt); Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx)); - Value* Gep = Builder.CreateInBoundsGEP(FirstEltPtr, Builder.getInt32(Idx)); + Value *Gep = + Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx)); Builder.CreateStore(OneElt, Gep); // Create "else" block, fill it in the next iteration @@ -1072,6 +1306,54 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) { return true; } + const DataLayout *TD = TLI ? TLI->getDataLayout() : nullptr; + + // Align the pointer arguments to this call if the target thinks it's a good + // idea + unsigned MinSize, PrefAlign; + if (TLI && TD && TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) { + for (auto &Arg : CI->arg_operands()) { + // We want to align both objects whose address is used directly and + // objects whose address is used in casts and GEPs, though it only makes + // sense for GEPs if the offset is a multiple of the desired alignment and + // if size - offset meets the size threshold. + if (!Arg->getType()->isPointerTy()) + continue; + APInt Offset(TD->getPointerSizeInBits( + cast<PointerType>(Arg->getType())->getAddressSpace()), 0); + Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*TD, Offset); + uint64_t Offset2 = Offset.getLimitedValue(); + if ((Offset2 & (PrefAlign-1)) != 0) + continue; + AllocaInst *AI; + if ((AI = dyn_cast<AllocaInst>(Val)) && + AI->getAlignment() < PrefAlign && + TD->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2) + AI->setAlignment(PrefAlign); + // Global variables can only be aligned if they are defined in this + // object (i.e. they are uniquely initialized in this object), and + // over-aligning global variables that have an explicit section is + // forbidden. + GlobalVariable *GV; + if ((GV = dyn_cast<GlobalVariable>(Val)) && + GV->hasUniqueInitializer() && + !GV->hasSection() && + GV->getAlignment() < PrefAlign && + TD->getTypeAllocSize( + GV->getType()->getElementType()) >= MinSize + Offset2) + GV->setAlignment(PrefAlign); + } + // If this is a memcpy (or similar) then we may be able to improve the + // alignment + if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) { + unsigned Align = getKnownAlignment(MI->getDest(), *TD); + if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) + Align = std::min(Align, getKnownAlignment(MTI->getSource(), *TD)); + if (Align > MI->getAlignment()) + MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), Align)); + } + } + IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI); if (II) { switch (II->getIntrinsicID()) { @@ -1088,8 +1370,7 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) { WeakVH IterHandle(CurInstIterator); replaceAndRecursivelySimplify(CI, RetVal, - TLI ? TLI->getDataLayout() : nullptr, - TLInfo, ModifiedDT ? nullptr : DT); + TLInfo, nullptr); // If the iterator instruction was recursively deleted, start over at the // start of the block. @@ -1116,6 +1397,16 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) { } return false; } + case Intrinsic::aarch64_stlxr: + case Intrinsic::aarch64_stxr: { + ZExtInst *ExtVal = dyn_cast<ZExtInst>(CI->getArgOperand(0)); + if (!ExtVal || !ExtVal->hasOneUse() || + ExtVal->getParent() == CI->getParent()) + return false; + // Sink a zext feeding stlxr/stxr before it, so it can be folded into it. + ExtVal->moveBefore(CI); + return true; + } } if (TLI) { @@ -1131,15 +1422,11 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) { // From here on out we're working with named functions. if (!CI->getCalledFunction()) return false; - // We'll need DataLayout from here on out. - const DataLayout *TD = TLI ? TLI->getDataLayout() : nullptr; - if (!TD) return false; - // Lower all default uses of _chk calls. This is very similar // to what InstCombineCalls does, but here we are only lowering calls // to fortified library functions (e.g. __memcpy_chk) that have the default // "don't know" as the objectsize. Anything else should be left alone. - FortifiedLibCallSimplifier Simplifier(TD, TLInfo, true); + FortifiedLibCallSimplifier Simplifier(TLInfo, true); if (Value *V = Simplifier.optimizeCall(CI)) { CI->replaceAllUsesWith(V); CI->eraseFromParent(); @@ -1672,7 +1959,7 @@ class TypePromotionTransaction { Inst->removeFromParent(); } - ~InstructionRemover() { delete Replacer; } + ~InstructionRemover() override { delete Replacer; } /// \brief Really remove the instruction. void commit() override { delete Inst; } @@ -1802,6 +2089,7 @@ void TypePromotionTransaction::rollback( /// This encapsulates the logic for matching the target-legal addressing modes. class AddressingModeMatcher { SmallVectorImpl<Instruction*> &AddrModeInsts; + const TargetMachine &TM; const TargetLowering &TLI; /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and @@ -1825,13 +2113,15 @@ class AddressingModeMatcher { /// always returns true. bool IgnoreProfitability; - AddressingModeMatcher(SmallVectorImpl<Instruction*> &AMI, - const TargetLowering &T, Type *AT, - Instruction *MI, ExtAddrMode &AM, - const SetOfInstrs &InsertedTruncs, + AddressingModeMatcher(SmallVectorImpl<Instruction *> &AMI, + const TargetMachine &TM, Type *AT, Instruction *MI, + ExtAddrMode &AM, const SetOfInstrs &InsertedTruncs, InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT) - : AddrModeInsts(AMI), TLI(T), AccessTy(AT), MemoryInst(MI), AddrMode(AM), + : AddrModeInsts(AMI), TM(TM), + TLI(*TM.getSubtargetImpl(*MI->getParent()->getParent()) + ->getTargetLowering()), + AccessTy(AT), MemoryInst(MI), AddrMode(AM), InsertedTruncs(InsertedTruncs), PromotedInsts(PromotedInsts), TPT(TPT) { IgnoreProfitability = false; } @@ -1848,13 +2138,13 @@ public: static ExtAddrMode Match(Value *V, Type *AccessTy, Instruction *MemoryInst, SmallVectorImpl<Instruction*> &AddrModeInsts, - const TargetLowering &TLI, + const TargetMachine &TM, const SetOfInstrs &InsertedTruncs, InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT) { ExtAddrMode Result; - bool Success = AddressingModeMatcher(AddrModeInsts, TLI, AccessTy, + bool Success = AddressingModeMatcher(AddrModeInsts, TM, AccessTy, MemoryInst, Result, InsertedTruncs, PromotedInsts, TPT).MatchAddr(V, 0); (void)Success; assert(Success && "Couldn't select *anything*?"); @@ -1869,7 +2159,7 @@ private: ExtAddrMode &AMBefore, ExtAddrMode &AMAfter); bool ValueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2); - bool IsPromotionProfitable(unsigned MatchedSize, unsigned SizeWithPromotion, + bool IsPromotionProfitable(unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const; }; @@ -2003,7 +2293,7 @@ class TypePromotionHelper { /// \brief Utility function to promote the operand of \p Ext when this /// operand is a promotable trunc or sext or zext. /// \p PromotedInsts maps the instructions to their type before promotion. - /// \p CreatedInsts[out] contains how many non-free instructions have been + /// \p CreatedInstsCost[out] contains the cost of all instructions /// created to promote the operand of Ext. /// Newly added extensions are inserted in \p Exts. /// Newly added truncates are inserted in \p Truncs. @@ -2011,53 +2301,55 @@ class TypePromotionHelper { /// \return The promoted value which is used instead of Ext. static Value *promoteOperandForTruncAndAnyExt( Instruction *Ext, TypePromotionTransaction &TPT, - InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts, + InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost, SmallVectorImpl<Instruction *> *Exts, - SmallVectorImpl<Instruction *> *Truncs); + SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI); /// \brief Utility function to promote the operand of \p Ext when this /// operand is promotable and is not a supported trunc or sext. /// \p PromotedInsts maps the instructions to their type before promotion. - /// \p CreatedInsts[out] contains how many non-free instructions have been + /// \p CreatedInstsCost[out] contains the cost of all the instructions /// created to promote the operand of Ext. /// Newly added extensions are inserted in \p Exts. /// Newly added truncates are inserted in \p Truncs. /// Should never be called directly. /// \return The promoted value which is used instead of Ext. - static Value * - promoteOperandForOther(Instruction *Ext, TypePromotionTransaction &TPT, - InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts, - SmallVectorImpl<Instruction *> *Exts, - SmallVectorImpl<Instruction *> *Truncs, bool IsSExt); + static Value *promoteOperandForOther(Instruction *Ext, + TypePromotionTransaction &TPT, + InstrToOrigTy &PromotedInsts, + unsigned &CreatedInstsCost, + SmallVectorImpl<Instruction *> *Exts, + SmallVectorImpl<Instruction *> *Truncs, + const TargetLowering &TLI, bool IsSExt); /// \see promoteOperandForOther. - static Value * - signExtendOperandForOther(Instruction *Ext, TypePromotionTransaction &TPT, - InstrToOrigTy &PromotedInsts, - unsigned &CreatedInsts, - SmallVectorImpl<Instruction *> *Exts, - SmallVectorImpl<Instruction *> *Truncs) { - return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInsts, Exts, - Truncs, true); + static Value *signExtendOperandForOther( + Instruction *Ext, TypePromotionTransaction &TPT, + InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost, + SmallVectorImpl<Instruction *> *Exts, + SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) { + return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost, + Exts, Truncs, TLI, true); } /// \see promoteOperandForOther. - static Value * - zeroExtendOperandForOther(Instruction *Ext, TypePromotionTransaction &TPT, - InstrToOrigTy &PromotedInsts, - unsigned &CreatedInsts, - SmallVectorImpl<Instruction *> *Exts, - SmallVectorImpl<Instruction *> *Truncs) { - return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInsts, Exts, - Truncs, false); + static Value *zeroExtendOperandForOther( + Instruction *Ext, TypePromotionTransaction &TPT, + InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost, + SmallVectorImpl<Instruction *> *Exts, + SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) { + return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost, + Exts, Truncs, TLI, false); } public: /// Type for the utility function that promotes the operand of Ext. typedef Value *(*Action)(Instruction *Ext, TypePromotionTransaction &TPT, - InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts, + InstrToOrigTy &PromotedInsts, + unsigned &CreatedInstsCost, SmallVectorImpl<Instruction *> *Exts, - SmallVectorImpl<Instruction *> *Truncs); + SmallVectorImpl<Instruction *> *Truncs, + const TargetLowering &TLI); /// \brief Given a sign/zero extend instruction \p Ext, return the approriate /// action to promote the operand of \p Ext instead of using Ext. /// \return NULL if no promotable action is possible with the current @@ -2174,16 +2466,18 @@ TypePromotionHelper::Action TypePromotionHelper::getAction( Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt( llvm::Instruction *SExt, TypePromotionTransaction &TPT, - InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts, + InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost, SmallVectorImpl<Instruction *> *Exts, - SmallVectorImpl<Instruction *> *Truncs) { + SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) { // By construction, the operand of SExt is an instruction. Otherwise we cannot // get through it and this method should not be called. Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0)); Value *ExtVal = SExt; + bool HasMergedNonFreeExt = false; if (isa<ZExtInst>(SExtOpnd)) { // Replace s|zext(zext(opnd)) // => zext(opnd). + HasMergedNonFreeExt = !TLI.isExtFree(SExtOpnd); Value *ZExt = TPT.createZExt(SExt, SExtOpnd->getOperand(0), SExt->getType()); TPT.replaceAllUsesWith(SExt, ZExt); @@ -2194,7 +2488,7 @@ Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt( // => z|sext(opnd). TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0)); } - CreatedInsts = 0; + CreatedInstsCost = 0; // Remove dead code. if (SExtOpnd->use_empty()) @@ -2203,8 +2497,11 @@ Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt( // Check if the extension is still needed. Instruction *ExtInst = dyn_cast<Instruction>(ExtVal); if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType()) { - if (ExtInst && Exts) - Exts->push_back(ExtInst); + if (ExtInst) { + if (Exts) + Exts->push_back(ExtInst); + CreatedInstsCost = !TLI.isExtFree(ExtInst) && !HasMergedNonFreeExt; + } return ExtVal; } @@ -2217,13 +2514,14 @@ Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt( Value *TypePromotionHelper::promoteOperandForOther( Instruction *Ext, TypePromotionTransaction &TPT, - InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts, + InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost, SmallVectorImpl<Instruction *> *Exts, - SmallVectorImpl<Instruction *> *Truncs, bool IsSExt) { + SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI, + bool IsSExt) { // By construction, the operand of Ext is an instruction. Otherwise we cannot // get through it and this method should not be called. Instruction *ExtOpnd = cast<Instruction>(Ext->getOperand(0)); - CreatedInsts = 0; + CreatedInstsCost = 0; if (!ExtOpnd->hasOneUse()) { // ExtOpnd will be promoted. // All its uses, but Ext, will need to use a truncated value of the @@ -2298,7 +2596,6 @@ Value *TypePromotionHelper::promoteOperandForOther( continue; } ExtForOpnd = cast<Instruction>(ValForExtOpnd); - ++CreatedInsts; } if (Exts) Exts->push_back(ExtForOpnd); @@ -2307,6 +2604,7 @@ Value *TypePromotionHelper::promoteOperandForOther( // Move the sign extension before the insertion point. TPT.moveBefore(ExtForOpnd, ExtOpnd); TPT.setOperand(ExtOpnd, OpIdx, ExtForOpnd); + CreatedInstsCost += !TLI.isExtFree(ExtForOpnd); // If more sext are required, new instructions will have to be created. ExtForOpnd = nullptr; } @@ -2319,22 +2617,22 @@ Value *TypePromotionHelper::promoteOperandForOther( /// IsPromotionProfitable - Check whether or not promoting an instruction /// to a wider type was profitable. -/// \p MatchedSize gives the number of instructions that have been matched -/// in the addressing mode after the promotion was applied. -/// \p SizeWithPromotion gives the number of created instructions for -/// the promotion plus the number of instructions that have been -/// matched in the addressing mode before the promotion. +/// \p NewCost gives the cost of extension instructions created by the +/// promotion. +/// \p OldCost gives the cost of extension instructions before the promotion +/// plus the number of instructions that have been +/// matched in the addressing mode the promotion. /// \p PromotedOperand is the value that has been promoted. /// \return True if the promotion is profitable, false otherwise. -bool -AddressingModeMatcher::IsPromotionProfitable(unsigned MatchedSize, - unsigned SizeWithPromotion, - Value *PromotedOperand) const { - // We folded less instructions than what we created to promote the operand. +bool AddressingModeMatcher::IsPromotionProfitable( + unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const { + DEBUG(dbgs() << "OldCost: " << OldCost << "\tNewCost: " << NewCost << '\n'); + // The cost of the new extensions is greater than the cost of the + // old extension plus what we folded. // This is not profitable. - if (MatchedSize < SizeWithPromotion) + if (NewCost > OldCost) return false; - if (MatchedSize > SizeWithPromotion) + if (NewCost < OldCost) return true; // The promotion is neutral but it may help folding the sign extension in // loads for instance. @@ -2374,7 +2672,6 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, return MatchAddr(AddrInst->getOperand(0), Depth); return false; case Instruction::BitCast: - case Instruction::AddrSpaceCast: // BitCast is always a noop, and we can handle it as long as it is // int->int or pointer->pointer (we don't want int<->fp or something). if ((AddrInst->getOperand(0)->getType()->isPointerTy() || @@ -2385,6 +2682,14 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, AddrInst->getOperand(0)->getType() != AddrInst->getType()) return MatchAddr(AddrInst->getOperand(0), Depth); return false; + case Instruction::AddrSpaceCast: { + unsigned SrcAS + = AddrInst->getOperand(0)->getType()->getPointerAddressSpace(); + unsigned DestAS = AddrInst->getType()->getPointerAddressSpace(); + if (TLI.isNoopAddrSpaceCast(SrcAS, DestAS)) + return MatchAddr(AddrInst->getOperand(0), Depth); + return false; + } case Instruction::Add: { // Check to see if we can merge in the RHS then the LHS. If so, we win. ExtAddrMode BackupAddrMode = AddrMode; @@ -2532,9 +2837,10 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, TypePromotionTransaction::ConstRestorationPt LastKnownGood = TPT.getRestorationPoint(); - unsigned CreatedInsts = 0; + unsigned CreatedInstsCost = 0; + unsigned ExtCost = !TLI.isExtFree(Ext); Value *PromotedOperand = - TPH(Ext, TPT, PromotedInsts, CreatedInsts, nullptr, nullptr); + TPH(Ext, TPT, PromotedInsts, CreatedInstsCost, nullptr, nullptr, TLI); // SExt has been moved away. // Thus either it will be rematched later in the recursive calls or it is // gone. Anyway, we must not fold it into the addressing mode at this point. @@ -2556,7 +2862,12 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, unsigned OldSize = AddrModeInsts.size(); if (!MatchAddr(PromotedOperand, Depth) || - !IsPromotionProfitable(AddrModeInsts.size(), OldSize + CreatedInsts, + // The total of the new cost is equals to the cost of the created + // instructions. + // The total of the old cost is equals to the cost of the extension plus + // what we have saved in the addressing mode. + !IsPromotionProfitable(CreatedInstsCost, + ExtCost + (AddrModeInsts.size() - OldSize), PromotedOperand)) { AddrMode = BackupAddrMode; AddrModeInsts.resize(OldSize); @@ -2658,13 +2969,17 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) { /// inline asm call are due to memory operands. If so, return true, otherwise /// return false. static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, - const TargetLowering &TLI) { - TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(ImmutableCallSite(CI)); + const TargetMachine &TM) { + const Function *F = CI->getParent()->getParent(); + const TargetLowering *TLI = TM.getSubtargetImpl(*F)->getTargetLowering(); + const TargetRegisterInfo *TRI = TM.getSubtargetImpl(*F)->getRegisterInfo(); + TargetLowering::AsmOperandInfoVector TargetConstraints = + TLI->ParseConstraints(TRI, ImmutableCallSite(CI)); for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; // Compute the constraint code and ConstraintType to use. - TLI.ComputeConstraintToUse(OpInfo, SDValue()); + TLI->ComputeConstraintToUse(OpInfo, SDValue()); // If this asm operand is our Value*, and if it isn't an indirect memory // operand, we can't fold it! @@ -2680,10 +2995,10 @@ static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, /// FindAllMemoryUses - Recursively walk all the uses of I until we find a /// memory use. If we find an obviously non-foldable instruction, return true. /// Add the ultimately found memory instructions to MemoryUses. -static bool FindAllMemoryUses(Instruction *I, - SmallVectorImpl<std::pair<Instruction*,unsigned> > &MemoryUses, - SmallPtrSetImpl<Instruction*> &ConsideredInsts, - const TargetLowering &TLI) { +static bool FindAllMemoryUses( + Instruction *I, + SmallVectorImpl<std::pair<Instruction *, unsigned>> &MemoryUses, + SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetMachine &TM) { // If we already considered this instruction, we're done. if (!ConsideredInsts.insert(I).second) return false; @@ -2713,12 +3028,12 @@ static bool FindAllMemoryUses(Instruction *I, if (!IA) return true; // If this is a memory operand, we're cool, otherwise bail out. - if (!IsOperandAMemoryOperand(CI, IA, I, TLI)) + if (!IsOperandAMemoryOperand(CI, IA, I, TM)) return true; continue; } - if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI)) + if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TM)) return true; } @@ -2806,7 +3121,7 @@ IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, // uses. SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses; SmallPtrSet<Instruction*, 16> ConsideredInsts; - if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI)) + if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TM)) return false; // Has a non-memory, non-foldable use! // Now that we know that all uses of this instruction are part of a chain of @@ -2831,7 +3146,7 @@ IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, ExtAddrMode Result; TypePromotionTransaction::ConstRestorationPt LastKnownGood = TPT.getRestorationPoint(); - AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, AddressAccessTy, + AddressingModeMatcher Matcher(MatchedAddrModeInsts, TM, AddressAccessTy, MemoryInst, Result, InsertedTruncs, PromotedInsts, TPT); Matcher.IgnoreProfitability = true; @@ -2906,15 +3221,15 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // For a PHI node, push all of its incoming values. if (PHINode *P = dyn_cast<PHINode>(V)) { - for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i) - worklist.push_back(P->getIncomingValue(i)); + for (Value *IncValue : P->incoming_values()) + worklist.push_back(IncValue); continue; } // For non-PHIs, determine the addressing mode being computed. SmallVector<Instruction*, 16> NewAddrModeInsts; ExtAddrMode NewAddrMode = AddressingModeMatcher::Match( - V, AccessTy, MemoryInst, NewAddrModeInsts, *TLI, InsertedTruncsSet, + V, AccessTy, MemoryInst, NewAddrModeInsts, *TM, InsertedTruncsSet, PromotedInsts, TPT); // This check is broken into two cases with very similar code to avoid using @@ -2989,8 +3304,10 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, << *MemoryInst << "\n"); if (SunkAddr->getType() != Addr->getType()) SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType()); - } else if (AddrSinkUsingGEPs || (!AddrSinkUsingGEPs.getNumOccurrences() && - TM && TM->getSubtarget<TargetSubtargetInfo>().useAA())) { + } else if (AddrSinkUsingGEPs || + (!AddrSinkUsingGEPs.getNumOccurrences() && TM && + TM->getSubtargetImpl(*MemoryInst->getParent()->getParent()) + ->useAA())) { // By default, we use the GEP-based method when AA is used later. This // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities. DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " @@ -3041,7 +3358,8 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, return false; } else { Type *I8PtrTy = - Builder.getInt8PtrTy(Addr->getType()->getPointerAddressSpace()); + Builder.getInt8PtrTy(Addr->getType()->getPointerAddressSpace()); + Type *I8Ty = Builder.getInt8Ty(); // Start with the base register. Do this first so that subsequent address // matching finds it last, which will prevent it from trying to match it @@ -3093,7 +3411,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // SDAG consecutive load/store merging. if (ResultPtr->getType() != I8PtrTy) ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy); - ResultPtr = Builder.CreateGEP(ResultPtr, ResultIndex, "sunkaddr"); + ResultPtr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr"); } ResultIndex = V; @@ -3104,7 +3422,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, } else { if (ResultPtr->getType() != I8PtrTy) ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy); - SunkAddr = Builder.CreateGEP(ResultPtr, ResultIndex, "sunkaddr"); + SunkAddr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr"); } if (SunkAddr->getType() != Addr->getType()) @@ -3213,8 +3531,10 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) { bool MadeChange = false; + const TargetRegisterInfo *TRI = + TM->getSubtargetImpl(*CS->getParent()->getParent())->getRegisterInfo(); TargetLowering::AsmOperandInfoVector - TargetConstraints = TLI->ParseConstraints(CS); + TargetConstraints = TLI->ParseConstraints(TRI, CS); unsigned ArgNo = 0; for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; @@ -3308,7 +3628,7 @@ static bool hasSameExtUse(Instruction *Inst, const TargetLowering &TLI) { bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT, LoadInst *&LI, Instruction *&Inst, const SmallVectorImpl<Instruction *> &Exts, - unsigned CreatedInsts = 0) { + unsigned CreatedInstsCost = 0) { // Iterate over all the extensions to see if one form an ext(load). for (auto I : Exts) { // Check if we directly have ext(load). @@ -3330,10 +3650,11 @@ bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT, TypePromotionTransaction::ConstRestorationPt LastKnownGood = TPT.getRestorationPoint(); SmallVector<Instruction *, 4> NewExts; - unsigned NewCreatedInsts = 0; + unsigned NewCreatedInstsCost = 0; + unsigned ExtCost = !TLI->isExtFree(I); // Promote. - Value *PromotedVal = - TPH(I, TPT, PromotedInsts, NewCreatedInsts, &NewExts, nullptr); + Value *PromotedVal = TPH(I, TPT, PromotedInsts, NewCreatedInstsCost, + &NewExts, nullptr, *TLI); assert(PromotedVal && "TypePromotionHelper should have filtered out those cases"); @@ -3343,9 +3664,10 @@ bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT, // With exactly 2, the transformation is neutral, because we will merge // one extension but leave one. However, we optimistically keep going, // because the new extension may be removed too. - unsigned TotalCreatedInsts = CreatedInsts + NewCreatedInsts; + long long TotalCreatedInstsCost = CreatedInstsCost + NewCreatedInstsCost; + TotalCreatedInstsCost -= ExtCost; if (!StressExtLdPromotion && - (TotalCreatedInsts > 1 || + (TotalCreatedInstsCost > 1 || !isPromotedInstructionLegal(*TLI, PromotedVal))) { // The promotion is not profitable, rollback to the previous state. TPT.rollback(LastKnownGood); @@ -3353,8 +3675,8 @@ bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT, } // The promotion is profitable. // Check if it exposes an ext(load). - (void)ExtLdPromotion(TPT, LI, Inst, NewExts, TotalCreatedInsts); - if (LI && (StressExtLdPromotion || NewCreatedInsts == 0 || + (void)ExtLdPromotion(TPT, LI, Inst, NewExts, TotalCreatedInstsCost); + if (LI && (StressExtLdPromotion || NewCreatedInstsCost <= ExtCost || // If we have created a new extension, i.e., now we have two // extensions. We must make sure one of them is merged with // the load, otherwise we may degrade the code quality. @@ -3969,148 +4291,6 @@ void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) { Transition->setOperand(getTransitionOriginalValueIdx(), ToBePromoted); } -// See if we can speculate calls to intrinsic cttz/ctlz. -// -// Example: -// entry: -// ... -// %cmp = icmp eq i64 %val, 0 -// br i1 %cmp, label %end.bb, label %then.bb -// -// then.bb: -// %c = tail call i64 @llvm.cttz.i64(i64 %val, i1 true) -// br label %EndBB -// -// end.bb: -// %cond = phi i64 [ %c, %then.bb ], [ 64, %entry ] -// -// ==> -// -// entry: -// ... -// %c = tail call i64 @llvm.cttz.i64(i64 %val, i1 false) -// -static bool OptimizeBranchInst(BranchInst *BrInst, const TargetLowering &TLI) { - assert(BrInst->isConditional() && "Expected a conditional branch!"); - BasicBlock *ThenBB = BrInst->getSuccessor(1); - BasicBlock *EndBB = BrInst->getSuccessor(0); - - // See if ThenBB contains only one instruction (excluding the - // terminator and DbgInfoIntrinsic calls). - IntrinsicInst *II = nullptr; - CastInst *CI = nullptr; - for (BasicBlock::iterator I = ThenBB->begin(), - E = std::prev(ThenBB->end()); I != E; ++I) { - // Skip debug info. - if (isa<DbgInfoIntrinsic>(I)) - continue; - - // Check if this is a zero extension or a truncate of a previously - // matched call to intrinsic cttz/ctlz. - if (II) { - // Early exit if we already found a "free" zero extend/truncate. - if (CI) - return false; - - Type *SrcTy = II->getType(); - Type *DestTy = I->getType(); - Value *V; - - if (match(cast<Instruction>(I), m_ZExt(m_Value(V))) && V == II) { - // Speculate this zero extend only if it is "free" for the target. - if (TLI.isZExtFree(SrcTy, DestTy)) { - CI = cast<CastInst>(I); - continue; - } - } else if (match(cast<Instruction>(I), m_Trunc(m_Value(V))) && V == II) { - // Speculate this truncate only if it is "free" for the target. - if (TLI.isTruncateFree(SrcTy, DestTy)) { - CI = cast<CastInst>(I); - continue; - } - } else { - // Avoid speculating more than one instruction. - return false; - } - } - - // See if this is a call to intrinsic cttz/ctlz. - if (match(cast<Instruction>(I), m_Intrinsic<Intrinsic::cttz>())) { - // Avoid speculating expensive intrinsic calls. - if (!TLI.isCheapToSpeculateCttz()) - return false; - } - else if (match(cast<Instruction>(I), m_Intrinsic<Intrinsic::ctlz>())) { - // Avoid speculating expensive intrinsic calls. - if (!TLI.isCheapToSpeculateCtlz()) - return false; - } else - return false; - - II = cast<IntrinsicInst>(I); - } - - // Look for PHI nodes with 'II' as the incoming value from 'ThenBB'. - BasicBlock *EntryBB = BrInst->getParent(); - for (BasicBlock::iterator I = EndBB->begin(); - PHINode *PN = dyn_cast<PHINode>(I); ++I) { - Value *ThenV = PN->getIncomingValueForBlock(ThenBB); - Value *OrigV = PN->getIncomingValueForBlock(EntryBB); - - if (!OrigV) - return false; - - if (ThenV != II && (!CI || ThenV != CI)) - return false; - - if (ConstantInt *CInt = dyn_cast<ConstantInt>(OrigV)) { - unsigned BitWidth = II->getType()->getIntegerBitWidth(); - - // Don't try to simplify this phi node if 'ThenV' is a cttz/ctlz - // intrinsic call, but 'OrigV' is not equal to the 'size-of' in bits - // of the value in input to the cttz/ctlz. - if (CInt->getValue() != BitWidth) - return false; - - // Hoist the call to cttz/ctlz from ThenBB into EntryBB. - EntryBB->getInstList().splice(BrInst, ThenBB->getInstList(), - ThenBB->begin(), std::prev(ThenBB->end())); - - // Update PN setting ThenV as the incoming value from both 'EntryBB' - // and 'ThenBB'. Eventually, method 'OptimizeInst' will fold this - // phi node if all the incoming values are the same. - PN->setIncomingValue(PN->getBasicBlockIndex(EntryBB), ThenV); - PN->setIncomingValue(PN->getBasicBlockIndex(ThenBB), ThenV); - - // Clear the 'undef on zero' flag of the cttz/ctlz intrinsic call. - if (cast<ConstantInt>(II->getArgOperand(1))->isOne()) { - Type *Ty = II->getArgOperand(0)->getType(); - Value *Args[] = { II->getArgOperand(0), - ConstantInt::getFalse(II->getContext()) }; - Module *M = EntryBB->getParent()->getParent(); - Value *IF = Intrinsic::getDeclaration(M, II->getIntrinsicID(), Ty); - IRBuilder<> Builder(II); - Instruction *NewI = Builder.CreateCall(IF, Args); - - // Replace the old call to cttz/ctlz. - II->replaceAllUsesWith(NewI); - II->eraseFromParent(); - } - - // Update BrInst condition so that the branch to EndBB is always taken. - // Later on, method 'ConstantFoldTerminator' will simplify this branch - // replacing it with a direct branch to 'EndBB'. - // As a side effect, CodeGenPrepare will attempt to simplify the control - // flow graph by deleting basic block 'ThenBB' and merging 'EntryBB' into - // 'EndBB' (calling method 'EliminateFallThrough'). - BrInst->setCondition(ConstantInt::getTrue(BrInst->getContext())); - return true; - } - } - - return false; -} - /// Some targets can do store(extractelement) with one instruction. /// Try to push the extractelement towards the stores when the target /// has this feature and this is profitable. @@ -4171,8 +4351,8 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) { // It is possible for very late stage optimizations (such as SimplifyCFG) // to introduce PHI nodes too late to be cleaned up. If we detect such a // trivial PHI, go ahead and zap it here. - if (Value *V = SimplifyInstruction(P, TLI ? TLI->getDataLayout() : nullptr, - TLInfo, DT)) { + const DataLayout &DL = I->getModule()->getDataLayout(); + if (Value *V = SimplifyInstruction(P, DL, TLInfo, nullptr)) { P->replaceAllUsesWith(V); P->eraseFromParent(); ++NumPHIsElim; @@ -4263,34 +4443,6 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) { if (isa<ExtractElementInst>(I)) return OptimizeExtractElementInst(I); - if (BranchInst *BI = dyn_cast<BranchInst>(I)) { - if (TLI && BI->isConditional() && BI->getCondition()->hasOneUse()) { - // Check if the branch condition compares a value agaist zero. - if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) { - if (ICI->getPredicate() == ICmpInst::ICMP_EQ && - match(ICI->getOperand(1), m_Zero())) { - BasicBlock *ThenBB = BI->getSuccessor(1); - BasicBlock *EndBB = BI->getSuccessor(0); - - // Check if ThenBB is only reachable from this basic block; also, - // check if EndBB has more than one predecessor. - if (ThenBB->getSinglePredecessor() && - !EndBB->getSinglePredecessor()) { - TerminatorInst *TI = ThenBB->getTerminator(); - - if (TI->getNumSuccessors() == 1 && TI->getSuccessor(0) == EndBB && - // Try to speculate calls to intrinsic cttz/ctlz from 'ThenBB'. - OptimizeBranchInst(BI, *TLI)) { - ModifiedDT = true; - return true; - } - } - } - } - } - return false; - } - return false; } @@ -4469,8 +4621,7 @@ static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) { /// FIXME: Remove the (equivalent?) implementation in SelectionDAG. /// bool CodeGenPrepare::splitBranchCondition(Function &F) { - if (!TM || TM->Options.EnableFastISel != true || - !TLI || TLI->isJumpExpensive()) + if (!TM || !TM->Options.EnableFastISel || !TLI || TLI->isJumpExpensive()) return false; bool MadeChange = false; @@ -4631,10 +4782,8 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) { } } - // Request DOM Tree update. // Note: No point in getting fancy here, since the DT info is never - // available to CodeGenPrepare and the existing update code is broken - // anyways. + // available to CodeGenPrepare. ModifiedDT = true; MadeChange = true; diff --git a/contrib/llvm/lib/CodeGen/CoreCLRGC.cpp b/contrib/llvm/lib/CodeGen/CoreCLRGC.cpp new file mode 100644 index 0000000..28c97ba --- /dev/null +++ b/contrib/llvm/lib/CodeGen/CoreCLRGC.cpp @@ -0,0 +1,54 @@ +//===-- CoreCLRGC.cpp - CoreCLR Runtime GC Strategy -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a GCStrategy for the CoreCLR Runtime. +// The strategy is similar to Statepoint-example GC, but differs from it in +// certain aspects, such as: +// 1) Base-pointers need not be explicitly tracked and reported for +// interior pointers +// 2) Uses a different format for encoding stack-maps +// 3) Location of Safe-point polls: polls are only needed before loop-back edges +// and before tail-calls (not needed at function-entry) +// +// The above differences in behavior are to be implemented in upcoming checkins. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Value.h" + +using namespace llvm; + +namespace { +class CoreCLRGC : public GCStrategy { +public: + CoreCLRGC() { + UseStatepoints = true; + // These options are all gc.root specific, we specify them so that the + // gc.root lowering code doesn't run. + InitRoots = false; + NeededSafePoints = 0; + UsesMetadata = false; + CustomRoots = false; + } + Optional<bool> isGCManagedPointer(const Value *V) const override { + // Method is only valid on pointer typed values. + PointerType *PT = cast<PointerType>(V->getType()); + // We pick addrspace(1) as our GC managed heap. + return (1 == PT->getAddressSpace()); + } +}; +} + +static GCRegistry::Add<CoreCLRGC> X("coreclr", "CoreCLR-compatible GC"); + +namespace llvm { +void linkCoreCLRGC() {} +} diff --git a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h index ceef74d..af011a0 100644 --- a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h +++ b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h @@ -69,7 +69,7 @@ class TargetRegisterInfo; public: CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo&); - ~CriticalAntiDepBreaker(); + ~CriticalAntiDepBreaker() override; /// Initialize anti-dep breaking for a new basic block. void StartBlock(MachineBasicBlock *BB) override; diff --git a/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp index c17a35d..963d573 100644 --- a/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -65,7 +65,7 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const { // Don't delete instructions with side effects. bool SawStore = false; - if (!MI->isSafeToMove(TII, nullptr, SawStore) && !MI->isPHI()) + if (!MI->isSafeToMove(nullptr, SawStore) && !MI->isPHI()) return false; // Examine each operand. diff --git a/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp b/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp index 75b74d9..42656fb 100644 --- a/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp +++ b/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp @@ -13,19 +13,19 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/BitVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/IR/CallSite.h" +#include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/LibCallSemantics.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" -#include "llvm/MC/MCAsmInfo.h" #include "llvm/Pass.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetSubtargetInfo.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/SSAUpdater.h" +#include "llvm/Transforms/Utils/Local.h" using namespace llvm; #define DEBUG_TYPE "dwarfehprepare" @@ -39,15 +39,28 @@ namespace { // RewindFunction - _Unwind_Resume or the target equivalent. Constant *RewindFunction; + DominatorTree *DT; + const TargetLowering *TLI; + bool InsertUnwindResumeCalls(Function &Fn); Value *GetExceptionObject(ResumeInst *RI); + size_t + pruneUnreachableResumes(Function &Fn, + SmallVectorImpl<ResumeInst *> &Resumes, + SmallVectorImpl<LandingPadInst *> &CleanupLPads); public: static char ID; // Pass identification, replacement for typeid. + + // INITIALIZE_TM_PASS requires a default constructor, but it isn't used in + // practice. + DwarfEHPrepare() + : FunctionPass(ID), TM(nullptr), RewindFunction(nullptr), DT(nullptr), + TLI(nullptr) {} + DwarfEHPrepare(const TargetMachine *TM) - : FunctionPass(ID), TM(TM), RewindFunction(nullptr) { - initializeDominatorTreeWrapperPassPass(*PassRegistry::getPassRegistry()); - } + : FunctionPass(ID), TM(TM), RewindFunction(nullptr), DT(nullptr), + TLI(nullptr) {} bool runOnFunction(Function &Fn) override; @@ -56,7 +69,7 @@ namespace { return false; } - void getAnalysisUsage(AnalysisUsage &AU) const override { } + void getAnalysisUsage(AnalysisUsage &AU) const override; const char *getPassName() const override { return "Exception handling preparation"; @@ -65,11 +78,22 @@ namespace { } // end anonymous namespace char DwarfEHPrepare::ID = 0; +INITIALIZE_TM_PASS_BEGIN(DwarfEHPrepare, "dwarfehprepare", + "Prepare DWARF exceptions", false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_TM_PASS_END(DwarfEHPrepare, "dwarfehprepare", + "Prepare DWARF exceptions", false, false) FunctionPass *llvm::createDwarfEHPass(const TargetMachine *TM) { return new DwarfEHPrepare(TM); } +void DwarfEHPrepare::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<TargetTransformInfoWrapperPass>(); + AU.addRequired<DominatorTreeWrapperPass>(); +} + /// GetExceptionObject - Return the exception object from the value passed into /// the 'resume' instruction (typically an aggregate). Clean up any dead /// instructions, including the 'resume' instruction. @@ -99,34 +123,93 @@ Value *DwarfEHPrepare::GetExceptionObject(ResumeInst *RI) { RI->eraseFromParent(); if (EraseIVIs) { - if (SelIVI->getNumUses() == 0) + if (SelIVI->use_empty()) SelIVI->eraseFromParent(); - if (ExcIVI->getNumUses() == 0) + if (ExcIVI->use_empty()) ExcIVI->eraseFromParent(); - if (SelLoad && SelLoad->getNumUses() == 0) + if (SelLoad && SelLoad->use_empty()) SelLoad->eraseFromParent(); } return ExnObj; } +/// Replace resumes that are not reachable from a cleanup landing pad with +/// unreachable and then simplify those blocks. +size_t DwarfEHPrepare::pruneUnreachableResumes( + Function &Fn, SmallVectorImpl<ResumeInst *> &Resumes, + SmallVectorImpl<LandingPadInst *> &CleanupLPads) { + BitVector ResumeReachable(Resumes.size()); + size_t ResumeIndex = 0; + for (auto *RI : Resumes) { + for (auto *LP : CleanupLPads) { + if (isPotentiallyReachable(LP, RI, DT)) { + ResumeReachable.set(ResumeIndex); + break; + } + } + ++ResumeIndex; + } + + // If everything is reachable, there is no change. + if (ResumeReachable.all()) + return Resumes.size(); + + const TargetTransformInfo &TTI = + getAnalysis<TargetTransformInfoWrapperPass>().getTTI(Fn); + LLVMContext &Ctx = Fn.getContext(); + + // Otherwise, insert unreachable instructions and call simplifycfg. + size_t ResumesLeft = 0; + for (size_t I = 0, E = Resumes.size(); I < E; ++I) { + ResumeInst *RI = Resumes[I]; + if (ResumeReachable[I]) { + Resumes[ResumesLeft++] = RI; + } else { + BasicBlock *BB = RI->getParent(); + new UnreachableInst(Ctx, RI); + RI->eraseFromParent(); + SimplifyCFG(BB, TTI, 1); + } + } + Resumes.resize(ResumesLeft); + return ResumesLeft; +} + /// InsertUnwindResumeCalls - Convert the ResumeInsts that are still present /// into calls to the appropriate _Unwind_Resume function. bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) { SmallVector<ResumeInst*, 16> Resumes; - for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) { - TerminatorInst *TI = I->getTerminator(); - if (ResumeInst *RI = dyn_cast<ResumeInst>(TI)) + SmallVector<LandingPadInst*, 16> CleanupLPads; + bool FoundLP = false; + for (BasicBlock &BB : Fn) { + if (auto *RI = dyn_cast<ResumeInst>(BB.getTerminator())) Resumes.push_back(RI); + if (auto *LP = BB.getLandingPadInst()) { + if (LP->isCleanup()) + CleanupLPads.push_back(LP); + // Check the personality on the first landingpad. Don't do anything if + // it's for MSVC. + if (!FoundLP) { + FoundLP = true; + EHPersonality Pers = classifyEHPersonality(LP->getPersonalityFn()); + if (isMSVCEHPersonality(Pers)) + return false; + } + } } if (Resumes.empty()) return false; + LLVMContext &Ctx = Fn.getContext(); + + size_t ResumesLeft = pruneUnreachableResumes(Fn, Resumes, CleanupLPads); + if (ResumesLeft == 0) + return true; // We pruned them all. + // Find the rewind function if we didn't already. - const TargetLowering *TLI = TM->getSubtargetImpl()->getTargetLowering(); if (!RewindFunction) { - LLVMContext &Ctx = Resumes[0]->getContext(); FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx), Type::getInt8PtrTy(Ctx), false); const char *RewindName = TLI->getLibcallName(RTLIB::UNWIND_RESUME); @@ -134,10 +217,7 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) { } // Create the basic block where the _Unwind_Resume call will live. - LLVMContext &Ctx = Fn.getContext(); - unsigned ResumesSize = Resumes.size(); - - if (ResumesSize == 1) { + if (ResumesLeft == 1) { // Instead of creating a new BB and PHI node, just append the call to // _Unwind_Resume to the end of the single resume block. ResumeInst *RI = Resumes.front(); @@ -154,14 +234,12 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) { } BasicBlock *UnwindBB = BasicBlock::Create(Ctx, "unwind_resume", &Fn); - PHINode *PN = PHINode::Create(Type::getInt8PtrTy(Ctx), ResumesSize, + PHINode *PN = PHINode::Create(Type::getInt8PtrTy(Ctx), ResumesLeft, "exn.obj", UnwindBB); // Extract the exception object from the ResumeInst and add it to the PHI node // that feeds the _Unwind_Resume call. - for (SmallVectorImpl<ResumeInst*>::iterator - I = Resumes.begin(), E = Resumes.end(); I != E; ++I) { - ResumeInst *RI = *I; + for (ResumeInst *RI : Resumes) { BasicBlock *Parent = RI->getParent(); BranchInst::Create(UnwindBB, Parent); @@ -181,6 +259,11 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) { } bool DwarfEHPrepare::runOnFunction(Function &Fn) { + assert(TM && "DWARF EH preparation requires a target machine"); + DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + TLI = TM->getSubtargetImpl(Fn)->getTargetLowering(); bool Changed = InsertUnwindResumeCalls(Fn); + DT = nullptr; + TLI = nullptr; return Changed; } diff --git a/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp b/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp index 995606f..092b7f8 100644 --- a/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp +++ b/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp @@ -220,7 +220,7 @@ bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) { // We never speculate stores, so an AA pointer isn't necessary. bool DontMoveAcrossStore = true; - if (!I->isSafeToMove(TII, nullptr, DontMoveAcrossStore)) { + if (!I->isSafeToMove(nullptr, DontMoveAcrossStore)) { DEBUG(dbgs() << "Can't speculate: " << *I); return false; } @@ -777,15 +777,13 @@ bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "********** EARLY IF-CONVERSION **********\n" << "********** Function: " << MF.getName() << '\n'); // Only run if conversion if the target wants it. - if (!MF.getTarget() - .getSubtarget<TargetSubtargetInfo>() - .enableEarlyIfConversion()) + const TargetSubtargetInfo &STI = MF.getSubtarget(); + if (!STI.enableEarlyIfConversion()) return false; - TII = MF.getSubtarget().getInstrInfo(); - TRI = MF.getSubtarget().getRegisterInfo(); - SchedModel = - MF.getTarget().getSubtarget<TargetSubtargetInfo>().getSchedModel(); + TII = STI.getInstrInfo(); + TRI = STI.getRegisterInfo(); + SchedModel = STI.getSchedModel(); MRI = &MF.getRegInfo(); DomTree = &getAnalysis<MachineDominatorTree>(); Loops = getAnalysisIfAvailable<MachineLoopInfo>(); @@ -799,9 +797,8 @@ bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) { // if-conversion in a single pass. The tryConvertIf() function may erase // blocks, but only blocks dominated by the head block. This makes it safe to // update the dominator tree while the post-order iterator is still active. - for (po_iterator<MachineDominatorTree*> - I = po_begin(DomTree), E = po_end(DomTree); I != E; ++I) - if (tryConvertIf(I->getBlock())) + for (auto DomNode : post_order(DomTree)) + if (tryConvertIf(DomNode->getBlock())) Changed = true; return Changed; diff --git a/contrib/llvm/lib/CodeGen/ErlangGC.cpp b/contrib/llvm/lib/CodeGen/ErlangGC.cpp index 85b0893..024946d 100644 --- a/contrib/llvm/lib/CodeGen/ErlangGC.cpp +++ b/contrib/llvm/lib/CodeGen/ErlangGC.cpp @@ -27,56 +27,20 @@ using namespace llvm; namespace { - class ErlangGC : public GCStrategy { - MCSymbol *InsertLabel(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - DebugLoc DL) const; - public: - ErlangGC(); - bool findCustomSafePoints(GCFunctionInfo &FI, MachineFunction &MF) override; - }; - +class ErlangGC : public GCStrategy { +public: + ErlangGC(); +}; } -static GCRegistry::Add<ErlangGC> -X("erlang", "erlang-compatible garbage collector"); +static GCRegistry::Add<ErlangGC> X("erlang", + "erlang-compatible garbage collector"); -void llvm::linkErlangGC() { } +void llvm::linkErlangGC() {} ErlangGC::ErlangGC() { InitRoots = false; NeededSafePoints = 1 << GC::PostCall; UsesMetadata = true; CustomRoots = false; - CustomSafePoints = true; -} - -MCSymbol *ErlangGC::InsertLabel(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - DebugLoc DL) const { - const TargetInstrInfo *TII = MBB.getParent()->getSubtarget().getInstrInfo(); - MCSymbol *Label = MBB.getParent()->getContext().CreateTempSymbol(); - BuildMI(MBB, MI, DL, TII->get(TargetOpcode::GC_LABEL)).addSym(Label); - return Label; -} - -bool ErlangGC::findCustomSafePoints(GCFunctionInfo &FI, MachineFunction &MF) { - for (MachineFunction::iterator BBI = MF.begin(), BBE = MF.end(); BBI != BBE; - ++BBI) - for (MachineBasicBlock::iterator MI = BBI->begin(), ME = BBI->end(); - MI != ME; ++MI) - - if (MI->getDesc().isCall()) { - - // Do not treat tail call sites as safe points. - if (MI->getDesc().isTerminator()) - continue; - - /* Code copied from VisitCallPoint(...) */ - MachineBasicBlock::iterator RAI = MI; ++RAI; - MCSymbol* Label = InsertLabel(*MI->getParent(), RAI, MI->getDebugLoc()); - FI.addSafePoint(GC::PostCall, Label, MI->getDebugLoc()); - } - - return false; } diff --git a/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp b/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp index b3a22c8..5b09cf1 100644 --- a/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp +++ b/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp @@ -113,7 +113,7 @@ struct DomainValue { } namespace { -/// LiveReg - Information about a live register. +/// Information about a live register. struct LiveReg { /// Value currently in this register, or NULL when no value is being tracked. /// This counts as a DomainValue reference. @@ -125,7 +125,7 @@ struct LiveReg { /// will be a negative number. int Def; }; -} // anonynous namespace +} // anonymous namespace namespace { class ExeDepsFix : public MachineFunctionPass { @@ -174,7 +174,7 @@ public: private: iterator_range<SmallVectorImpl<int>::const_iterator> - regIndizes(unsigned Reg) const; + regIndices(unsigned Reg) const; // DomainValue allocation. DomainValue *alloc(int domain = -1); @@ -205,10 +205,10 @@ private: char ExeDepsFix::ID = 0; -/// Translate TRI register number to a list of indizes into our stmaller tables +/// Translate TRI register number to a list of indices into our smaller tables /// of interesting registers. iterator_range<SmallVectorImpl<int>::const_iterator> -ExeDepsFix::regIndizes(unsigned Reg) const { +ExeDepsFix::regIndices(unsigned Reg) const { assert(Reg < AliasMap.size() && "Invalid register"); const auto &Entry = AliasMap[Reg]; return make_range(Entry.begin(), Entry.end()); @@ -225,7 +225,7 @@ DomainValue *ExeDepsFix::alloc(int domain) { return dv; } -/// release - Release a reference to DV. When the last reference is released, +/// Release a reference to DV. When the last reference is released, /// collapse if needed. void ExeDepsFix::release(DomainValue *DV) { while (DV) { @@ -245,8 +245,8 @@ void ExeDepsFix::release(DomainValue *DV) { } } -/// resolve - Follow the chain of dead DomainValues until a live DomainValue is -/// reached. Update the referenced pointer when necessary. +/// Follow the chain of dead DomainValues until a live DomainValue is reached. +/// Update the referenced pointer when necessary. DomainValue *ExeDepsFix::resolve(DomainValue *&DVRef) { DomainValue *DV = DVRef; if (!DV || !DV->Next) @@ -325,8 +325,7 @@ void ExeDepsFix::collapse(DomainValue *dv, unsigned domain) { setLiveReg(rx, alloc(domain)); } -/// Merge - All instructions and registers in B are moved to A, and B is -/// released. +/// All instructions and registers in B are moved to A, and B is released. bool ExeDepsFix::merge(DomainValue *A, DomainValue *B) { assert(!A->isCollapsed() && "Cannot merge into collapsed"); assert(!B->isCollapsed() && "Cannot merge from collapsed"); @@ -352,7 +351,7 @@ bool ExeDepsFix::merge(DomainValue *A, DomainValue *B) { return true; } -// enterBasicBlock - Set up LiveRegs by merging predecessor live-out values. +/// Set up LiveRegs by merging predecessor live-out values. void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) { // Detect back-edges from predecessors we haven't processed yet. SeenUnknownBackEdge = false; @@ -378,7 +377,7 @@ void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) { if (MBB->pred_empty()) { for (MachineBasicBlock::livein_iterator i = MBB->livein_begin(), e = MBB->livein_end(); i != e; ++i) { - for (int rx : regIndizes(*i)) { + for (int rx : regIndices(*i)) { // Treat function live-ins as if they were defined just before the first // instruction. Usually, function arguments are set up immediately // before the call. @@ -475,7 +474,7 @@ void ExeDepsFix::visitInstr(MachineInstr *MI) { bool ExeDepsFix::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx, unsigned Pref) { unsigned reg = MI->getOperand(OpIdx).getReg(); - for (int rx : regIndizes(reg)) { + for (int rx : regIndices(reg)) { unsigned Clearance = CurInstr - LiveRegs[rx].Def; DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref); @@ -521,7 +520,7 @@ void ExeDepsFix::processDefs(MachineInstr *MI, bool Kill) { break; if (MO.isUse()) continue; - for (int rx : regIndizes(MO.getReg())) { + for (int rx : regIndices(MO.getReg())) { // This instruction explicitly defines rx. DEBUG(dbgs() << TRI->getName(RC->getRegister(rx)) << ":\t" << CurInstr << '\t' << *MI); @@ -587,7 +586,7 @@ void ExeDepsFix::visitHardInstr(MachineInstr *mi, unsigned domain) { e = mi->getDesc().getNumOperands(); i != e; ++i) { MachineOperand &mo = mi->getOperand(i); if (!mo.isReg()) continue; - for (int rx : regIndizes(mo.getReg())) { + for (int rx : regIndices(mo.getReg())) { force(rx, domain); } } @@ -596,7 +595,7 @@ void ExeDepsFix::visitHardInstr(MachineInstr *mi, unsigned domain) { for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) { MachineOperand &mo = mi->getOperand(i); if (!mo.isReg()) continue; - for (int rx : regIndizes(mo.getReg())) { + for (int rx : regIndices(mo.getReg())) { kill(rx); force(rx, domain); } @@ -616,7 +615,7 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { e = mi->getDesc().getNumOperands(); i != e; ++i) { MachineOperand &mo = mi->getOperand(i); if (!mo.isReg()) continue; - for (int rx : regIndizes(mo.getReg())) { + for (int rx : regIndices(mo.getReg())) { DomainValue *dv = LiveRegs[rx].Value; if (dv == nullptr) continue; @@ -712,7 +711,7 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { ii != ee; ++ii) { MachineOperand &mo = *ii; if (!mo.isReg()) continue; - for (int rx : regIndizes(mo.getReg())) { + for (int rx : regIndices(mo.getReg())) { if (!LiveRegs[rx].Value || (mo.isDef() && LiveRegs[rx].Value != dv)) { kill(rx); setLiveReg(rx, dv); diff --git a/contrib/llvm/lib/CodeGen/ForwardControlFlowIntegrity.cpp b/contrib/llvm/lib/CodeGen/ForwardControlFlowIntegrity.cpp deleted file mode 100644 index 63c3699..0000000 --- a/contrib/llvm/lib/CodeGen/ForwardControlFlowIntegrity.cpp +++ /dev/null @@ -1,374 +0,0 @@ -//===-- ForwardControlFlowIntegrity.cpp: Forward-Edge CFI -----------------===// -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// \brief A pass that instruments code with fast checks for indirect calls and -/// hooks for a function to check violations. -/// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "cfi" - -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/JumpInstrTableInfo.h" -#include "llvm/CodeGen/ForwardControlFlowIntegrity.h" -#include "llvm/CodeGen/JumpInstrTables.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/IR/Attributes.h" -#include "llvm/IR/CallSite.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/GlobalValue.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/InlineAsm.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Operator.h" -#include "llvm/IR/Type.h" -#include "llvm/IR/Verifier.h" -#include "llvm/Pass.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -STATISTIC(NumCFIIndirectCalls, - "Number of indirect call sites rewritten by the CFI pass"); - -char ForwardControlFlowIntegrity::ID = 0; -INITIALIZE_PASS_BEGIN(ForwardControlFlowIntegrity, "forward-cfi", - "Control-Flow Integrity", true, true) -INITIALIZE_PASS_DEPENDENCY(JumpInstrTableInfo); -INITIALIZE_PASS_DEPENDENCY(JumpInstrTables); -INITIALIZE_PASS_END(ForwardControlFlowIntegrity, "forward-cfi", - "Control-Flow Integrity", true, true) - -ModulePass *llvm::createForwardControlFlowIntegrityPass() { - return new ForwardControlFlowIntegrity(); -} - -ModulePass *llvm::createForwardControlFlowIntegrityPass( - JumpTable::JumpTableType JTT, CFIntegrity CFIType, bool CFIEnforcing, - StringRef CFIFuncName) { - return new ForwardControlFlowIntegrity(JTT, CFIType, CFIEnforcing, - CFIFuncName); -} - -// Checks to see if a given CallSite is making an indirect call, including -// cases where the indirect call is made through a bitcast. -static bool isIndirectCall(CallSite &CS) { - if (CS.getCalledFunction()) - return false; - - // Check the value to see if it is merely a bitcast of a function. In - // this case, it will translate to a direct function call in the resulting - // assembly, so we won't treat it as an indirect call here. - const Value *V = CS.getCalledValue(); - if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { - return !(CE->isCast() && isa<Function>(CE->getOperand(0))); - } - - // Otherwise, since we know it's a call, it must be an indirect call - return true; -} - -static const char cfi_failure_func_name[] = "__llvm_cfi_pointer_warning"; - -ForwardControlFlowIntegrity::ForwardControlFlowIntegrity() - : ModulePass(ID), IndirectCalls(), JTType(JumpTable::Single), - CFIType(CFIntegrity::Sub), CFIEnforcing(false), CFIFuncName("") { - initializeForwardControlFlowIntegrityPass(*PassRegistry::getPassRegistry()); -} - -ForwardControlFlowIntegrity::ForwardControlFlowIntegrity( - JumpTable::JumpTableType JTT, CFIntegrity CFIType, bool CFIEnforcing, - std::string CFIFuncName) - : ModulePass(ID), IndirectCalls(), JTType(JTT), CFIType(CFIType), - CFIEnforcing(CFIEnforcing), CFIFuncName(CFIFuncName) { - initializeForwardControlFlowIntegrityPass(*PassRegistry::getPassRegistry()); -} - -ForwardControlFlowIntegrity::~ForwardControlFlowIntegrity() {} - -void ForwardControlFlowIntegrity::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<JumpInstrTableInfo>(); - AU.addRequired<JumpInstrTables>(); -} - -void ForwardControlFlowIntegrity::getIndirectCalls(Module &M) { - // To get the indirect calls, we iterate over all functions and iterate over - // the list of basic blocks in each. We extract a total list of indirect calls - // before modifying any of them, since our modifications will modify the list - // of basic blocks. - for (Function &F : M) { - for (BasicBlock &BB : F) { - for (Instruction &I : BB) { - CallSite CS(&I); - if (!(CS && isIndirectCall(CS))) - continue; - - Value *CalledValue = CS.getCalledValue(); - - // Don't rewrite this instruction if the indirect call is actually just - // inline assembly, since our transformation will generate an invalid - // module in that case. - if (isa<InlineAsm>(CalledValue)) - continue; - - IndirectCalls.push_back(&I); - } - } - } -} - -void ForwardControlFlowIntegrity::updateIndirectCalls(Module &M, - CFITables &CFIT) { - Type *Int64Ty = Type::getInt64Ty(M.getContext()); - for (Instruction *I : IndirectCalls) { - CallSite CS(I); - Value *CalledValue = CS.getCalledValue(); - - // Get the function type for this call and look it up in the tables. - Type *VTy = CalledValue->getType(); - PointerType *PTy = dyn_cast<PointerType>(VTy); - Type *EltTy = PTy->getElementType(); - FunctionType *FunTy = dyn_cast<FunctionType>(EltTy); - FunctionType *TransformedTy = JumpInstrTables::transformType(JTType, FunTy); - ++NumCFIIndirectCalls; - Constant *JumpTableStart = nullptr; - Constant *JumpTableMask = nullptr; - Constant *JumpTableSize = nullptr; - - // Some call sites have function types that don't correspond to any - // address-taken function in the module. This happens when function pointers - // are passed in from external code. - auto it = CFIT.find(TransformedTy); - if (it == CFIT.end()) { - // In this case, make sure that the function pointer will change by - // setting the mask and the start to be 0 so that the transformed - // function is 0. - JumpTableStart = ConstantInt::get(Int64Ty, 0); - JumpTableMask = ConstantInt::get(Int64Ty, 0); - JumpTableSize = ConstantInt::get(Int64Ty, 0); - } else { - JumpTableStart = it->second.StartValue; - JumpTableMask = it->second.MaskValue; - JumpTableSize = it->second.Size; - } - - rewriteFunctionPointer(M, I, CalledValue, JumpTableStart, JumpTableMask, - JumpTableSize); - } - - return; -} - -bool ForwardControlFlowIntegrity::runOnModule(Module &M) { - JumpInstrTableInfo *JITI = &getAnalysis<JumpInstrTableInfo>(); - Type *Int64Ty = Type::getInt64Ty(M.getContext()); - Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext()); - - // JumpInstrTableInfo stores information about the alignment of each entry. - // The alignment returned by JumpInstrTableInfo is alignment in bytes, not - // in the exponent. - ByteAlignment = JITI->entryByteAlignment(); - LogByteAlignment = llvm::Log2_64(ByteAlignment); - - // Set up tables for control-flow integrity based on information about the - // jump-instruction tables. - CFITables CFIT; - for (const auto &KV : JITI->getTables()) { - uint64_t Size = static_cast<uint64_t>(KV.second.size()); - uint64_t TableSize = NextPowerOf2(Size); - - int64_t MaskValue = ((TableSize << LogByteAlignment) - 1) & -ByteAlignment; - Constant *JumpTableMaskValue = ConstantInt::get(Int64Ty, MaskValue); - Constant *JumpTableSize = ConstantInt::get(Int64Ty, Size); - - // The base of the table is defined to be the first jumptable function in - // the table. - Function *First = KV.second.begin()->second; - Constant *JumpTableStartValue = ConstantExpr::getBitCast(First, VoidPtrTy); - CFIT[KV.first].StartValue = JumpTableStartValue; - CFIT[KV.first].MaskValue = JumpTableMaskValue; - CFIT[KV.first].Size = JumpTableSize; - } - - if (CFIT.empty()) - return false; - - getIndirectCalls(M); - - if (!CFIEnforcing) { - addWarningFunction(M); - } - - // Update the instructions with the check and the indirect jump through our - // table. - updateIndirectCalls(M, CFIT); - - return true; -} - -void ForwardControlFlowIntegrity::addWarningFunction(Module &M) { - PointerType *CharPtrTy = Type::getInt8PtrTy(M.getContext()); - - // Get the type of the Warning Function: void (i8*, i8*), - // where the first argument is the name of the function in which the violation - // occurs, and the second is the function pointer that violates CFI. - SmallVector<Type *, 2> WarningFunArgs; - WarningFunArgs.push_back(CharPtrTy); - WarningFunArgs.push_back(CharPtrTy); - FunctionType *WarningFunTy = - FunctionType::get(Type::getVoidTy(M.getContext()), WarningFunArgs, false); - - if (!CFIFuncName.empty()) { - Constant *FailureFun = M.getOrInsertFunction(CFIFuncName, WarningFunTy); - if (!FailureFun) - report_fatal_error("Could not get or insert the function specified by" - " -cfi-func-name"); - } else { - // The default warning function swallows the warning and lets the call - // continue, since there's no generic way for it to print out this - // information. - Function *WarningFun = M.getFunction(cfi_failure_func_name); - if (!WarningFun) { - WarningFun = - Function::Create(WarningFunTy, GlobalValue::LinkOnceAnyLinkage, - cfi_failure_func_name, &M); - } - - BasicBlock *Entry = - BasicBlock::Create(M.getContext(), "entry", WarningFun, 0); - ReturnInst::Create(M.getContext(), Entry); - } -} - -void ForwardControlFlowIntegrity::rewriteFunctionPointer( - Module &M, Instruction *I, Value *FunPtr, Constant *JumpTableStart, - Constant *JumpTableMask, Constant *JumpTableSize) { - IRBuilder<> TempBuilder(I); - - Type *OrigFunType = FunPtr->getType(); - - BasicBlock *CurBB = cast<BasicBlock>(I->getParent()); - Function *CurF = cast<Function>(CurBB->getParent()); - Type *Int64Ty = Type::getInt64Ty(M.getContext()); - - Value *TI = TempBuilder.CreatePtrToInt(FunPtr, Int64Ty); - Value *TStartInt = TempBuilder.CreatePtrToInt(JumpTableStart, Int64Ty); - - Value *NewFunPtr = nullptr; - Value *Check = nullptr; - switch (CFIType) { - case CFIntegrity::Sub: { - // This is the subtract, mask, and add version. - // Subtract from the base. - Value *Sub = TempBuilder.CreateSub(TI, TStartInt); - - // Mask the difference to force this to be a table offset. - Value *And = TempBuilder.CreateAnd(Sub, JumpTableMask); - - // Add it back to the base. - Value *Result = TempBuilder.CreateAdd(And, TStartInt); - - // Convert it back into a function pointer that we can call. - NewFunPtr = TempBuilder.CreateIntToPtr(Result, OrigFunType); - break; - } - case CFIntegrity::Ror: { - // This is the subtract and rotate version. - // Rotate right by the alignment value. The optimizer should recognize - // this sequence as a rotation. - - // This cast is safe, since unsigned is always a subset of uint64_t. - uint64_t LogByteAlignment64 = static_cast<uint64_t>(LogByteAlignment); - Constant *RightShift = ConstantInt::get(Int64Ty, LogByteAlignment64); - Constant *LeftShift = ConstantInt::get(Int64Ty, 64 - LogByteAlignment64); - - // Subtract from the base. - Value *Sub = TempBuilder.CreateSub(TI, TStartInt); - - // Create the equivalent of a rotate-right instruction. - Value *Shr = TempBuilder.CreateLShr(Sub, RightShift); - Value *Shl = TempBuilder.CreateShl(Sub, LeftShift); - Value *Or = TempBuilder.CreateOr(Shr, Shl); - - // Perform unsigned comparison to check for inclusion in the table. - Check = TempBuilder.CreateICmpULT(Or, JumpTableSize); - NewFunPtr = FunPtr; - break; - } - case CFIntegrity::Add: { - // This is the mask and add version. - // Mask the function pointer to turn it into an offset into the table. - Value *And = TempBuilder.CreateAnd(TI, JumpTableMask); - - // Then or this offset to the base and get the pointer value. - Value *Result = TempBuilder.CreateAdd(And, TStartInt); - - // Convert it back into a function pointer that we can call. - NewFunPtr = TempBuilder.CreateIntToPtr(Result, OrigFunType); - break; - } - } - - if (!CFIEnforcing) { - // If a check hasn't been added (in the rotation version), then check to see - // if it's the same as the original function. This check determines whether - // or not we call the CFI failure function. - if (!Check) - Check = TempBuilder.CreateICmpEQ(NewFunPtr, FunPtr); - BasicBlock *InvalidPtrBlock = - BasicBlock::Create(M.getContext(), "invalid.ptr", CurF, 0); - BasicBlock *ContinuationBB = CurBB->splitBasicBlock(I); - - // Remove the unconditional branch that connects the two blocks. - TerminatorInst *TermInst = CurBB->getTerminator(); - TermInst->eraseFromParent(); - - // Add a conditional branch that depends on the Check above. - BranchInst::Create(ContinuationBB, InvalidPtrBlock, Check, CurBB); - - // Call the warning function for this pointer, then continue. - Instruction *BI = BranchInst::Create(ContinuationBB, InvalidPtrBlock); - insertWarning(M, InvalidPtrBlock, BI, FunPtr); - } else { - // Modify the instruction to call this value. - CallSite CS(I); - CS.setCalledFunction(NewFunPtr); - } -} - -void ForwardControlFlowIntegrity::insertWarning(Module &M, BasicBlock *Block, - Instruction *I, Value *FunPtr) { - Function *ParentFun = cast<Function>(Block->getParent()); - - // Get the function to call right before the instruction. - Function *WarningFun = nullptr; - if (CFIFuncName.empty()) { - WarningFun = M.getFunction(cfi_failure_func_name); - } else { - WarningFun = M.getFunction(CFIFuncName); - } - - assert(WarningFun && "Could not find the CFI failure function"); - - Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext()); - - IRBuilder<> WarningInserter(I); - // Create a mergeable GlobalVariable containing the name of the function. - Value *ParentNameGV = - WarningInserter.CreateGlobalString(ParentFun->getName()); - Value *ParentNamePtr = WarningInserter.CreateBitCast(ParentNameGV, VoidPtrTy); - Value *FunVoidPtr = WarningInserter.CreateBitCast(FunPtr, VoidPtrTy); - WarningInserter.CreateCall2(WarningFun, ParentNamePtr, FunVoidPtr); -} diff --git a/contrib/llvm/lib/CodeGen/GCMetadata.cpp b/contrib/llvm/lib/CodeGen/GCMetadata.cpp index 6101c67..c8116a4 100644 --- a/contrib/llvm/lib/CodeGen/GCMetadata.cpp +++ b/contrib/llvm/lib/CodeGen/GCMetadata.cpp @@ -24,22 +24,20 @@ using namespace llvm; namespace { - - class Printer : public FunctionPass { - static char ID; - raw_ostream &OS; - - public: - explicit Printer(raw_ostream &OS) : FunctionPass(ID), OS(OS) {} +class Printer : public FunctionPass { + static char ID; + raw_ostream &OS; - const char *getPassName() const override; - void getAnalysisUsage(AnalysisUsage &AU) const override; +public: + explicit Printer(raw_ostream &OS) : FunctionPass(ID), OS(OS) {} - bool runOnFunction(Function &F) override; - bool doFinalization(Module &M) override; - }; + const char *getPassName() const override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + bool runOnFunction(Function &F) override; + bool doFinalization(Module &M) override; +}; } INITIALIZE_PASS(GCModuleInfo, "collector-metadata", @@ -48,7 +46,7 @@ INITIALIZE_PASS(GCModuleInfo, "collector-metadata", // ----------------------------------------------------------------------------- GCFunctionInfo::GCFunctionInfo(const Function &F, GCStrategy &S) - : F(F), S(S), FrameSize(~0LL) {} + : F(F), S(S), FrameSize(~0LL) {} GCFunctionInfo::~GCFunctionInfo() {} @@ -56,41 +54,19 @@ GCFunctionInfo::~GCFunctionInfo() {} char GCModuleInfo::ID = 0; -GCModuleInfo::GCModuleInfo() - : ImmutablePass(ID) { +GCModuleInfo::GCModuleInfo() : ImmutablePass(ID) { initializeGCModuleInfoPass(*PassRegistry::getPassRegistry()); } -GCStrategy *GCModuleInfo::getOrCreateStrategy(const Module *M, - const std::string &Name) { - strategy_map_type::iterator NMI = StrategyMap.find(Name); - if (NMI != StrategyMap.end()) - return NMI->getValue(); - - for (GCRegistry::iterator I = GCRegistry::begin(), - E = GCRegistry::end(); I != E; ++I) { - if (Name == I->getName()) { - std::unique_ptr<GCStrategy> S = I->instantiate(); - S->Name = Name; - StrategyMap[Name] = S.get(); - StrategyList.push_back(std::move(S)); - return StrategyList.back().get(); - } - } - - dbgs() << "unsupported GC: " << Name << "\n"; - llvm_unreachable(nullptr); -} - GCFunctionInfo &GCModuleInfo::getFunctionInfo(const Function &F) { assert(!F.isDeclaration() && "Can only get GCFunctionInfo for a definition!"); assert(F.hasGC()); - + finfo_map_type::iterator I = FInfoMap.find(&F); if (I != FInfoMap.end()) return *I->second; - - GCStrategy *S = getOrCreateStrategy(F.getParent(), F.getGC()); + + GCStrategy *S = getGCStrategy(F.getGC()); Functions.push_back(make_unique<GCFunctionInfo>(F, *S)); GCFunctionInfo *GFI = Functions.back().get(); FInfoMap[&F] = GFI; @@ -100,8 +76,7 @@ GCFunctionInfo &GCModuleInfo::getFunctionInfo(const Function &F) { void GCModuleInfo::clear() { Functions.clear(); FInfoMap.clear(); - StrategyMap.clear(); - StrategyList.clear(); + GCStrategyList.clear(); } // ----------------------------------------------------------------------------- @@ -112,7 +87,6 @@ FunctionPass *llvm::createGCInfoPrinter(raw_ostream &OS) { return new Printer(OS); } - const char *Printer::getPassName() const { return "Print Garbage Collector Information"; } @@ -125,42 +99,45 @@ void Printer::getAnalysisUsage(AnalysisUsage &AU) const { static const char *DescKind(GC::PointKind Kind) { switch (Kind) { - case GC::Loop: return "loop"; - case GC::Return: return "return"; - case GC::PreCall: return "pre-call"; - case GC::PostCall: return "post-call"; + case GC::PreCall: + return "pre-call"; + case GC::PostCall: + return "post-call"; } llvm_unreachable("Invalid point kind"); } bool Printer::runOnFunction(Function &F) { - if (F.hasGC()) return false; - + if (F.hasGC()) + return false; + GCFunctionInfo *FD = &getAnalysis<GCModuleInfo>().getFunctionInfo(F); - + OS << "GC roots for " << FD->getFunction().getName() << ":\n"; for (GCFunctionInfo::roots_iterator RI = FD->roots_begin(), - RE = FD->roots_end(); RI != RE; ++RI) + RE = FD->roots_end(); + RI != RE; ++RI) OS << "\t" << RI->Num << "\t" << RI->StackOffset << "[sp]\n"; - + OS << "GC safe points for " << FD->getFunction().getName() << ":\n"; - for (GCFunctionInfo::iterator PI = FD->begin(), - PE = FD->end(); PI != PE; ++PI) { - - OS << "\t" << PI->Label->getName() << ": " - << DescKind(PI->Kind) << ", live = {"; - + for (GCFunctionInfo::iterator PI = FD->begin(), PE = FD->end(); PI != PE; + ++PI) { + + OS << "\t" << PI->Label->getName() << ": " << DescKind(PI->Kind) + << ", live = {"; + for (GCFunctionInfo::live_iterator RI = FD->live_begin(PI), - RE = FD->live_end(PI);;) { + RE = FD->live_end(PI); + ;) { OS << " " << RI->Num; if (++RI == RE) break; OS << ","; } - + OS << " }\n"; } - + return false; } @@ -170,3 +147,31 @@ bool Printer::doFinalization(Module &M) { GMI->clear(); return false; } + +GCStrategy *GCModuleInfo::getGCStrategy(const StringRef Name) { + // TODO: Arguably, just doing a linear search would be faster for small N + auto NMI = GCStrategyMap.find(Name); + if (NMI != GCStrategyMap.end()) + return NMI->getValue(); + + for (auto& Entry : GCRegistry::entries()) { + if (Name == Entry.getName()) { + std::unique_ptr<GCStrategy> S = Entry.instantiate(); + S->Name = Name; + GCStrategyMap[Name] = S.get(); + GCStrategyList.push_back(std::move(S)); + return GCStrategyList.back().get(); + } + } + + if (GCRegistry::begin() == GCRegistry::end()) { + // In normal operation, the registry should not be empty. There should + // be the builtin GCs if nothing else. The most likely scenario here is + // that we got here without running the initializers used by the Registry + // itself and it's registration mechanism. + const std::string error = ("unsupported GC: " + Name).str() + + " (did you remember to link and initialize the CodeGen library?)"; + report_fatal_error(error); + } else + report_fatal_error(std::string("unsupported GC: ") + Name); +} diff --git a/contrib/llvm/lib/CodeGen/GCMetadataPrinter.cpp b/contrib/llvm/lib/CodeGen/GCMetadataPrinter.cpp index fdff4a7..bb8cfa1 100644 --- a/contrib/llvm/lib/CodeGen/GCMetadataPrinter.cpp +++ b/contrib/llvm/lib/CodeGen/GCMetadataPrinter.cpp @@ -14,6 +14,6 @@ #include "llvm/CodeGen/GCMetadataPrinter.h" using namespace llvm; -GCMetadataPrinter::GCMetadataPrinter() { } +GCMetadataPrinter::GCMetadataPrinter() {} -GCMetadataPrinter::~GCMetadataPrinter() { } +GCMetadataPrinter::~GCMetadataPrinter() {} diff --git a/contrib/llvm/lib/CodeGen/GCRootLowering.cpp b/contrib/llvm/lib/CodeGen/GCRootLowering.cpp new file mode 100644 index 0000000..d8edd7e --- /dev/null +++ b/contrib/llvm/lib/CodeGen/GCRootLowering.cpp @@ -0,0 +1,354 @@ +//===-- GCRootLowering.cpp - Garbage collection infrastructure ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the lowering for the gc.root mechanism. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GCMetadata.h" +#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +using namespace llvm; + +namespace { + +/// LowerIntrinsics - This pass rewrites calls to the llvm.gcread or +/// llvm.gcwrite intrinsics, replacing them with simple loads and stores as +/// directed by the GCStrategy. It also performs automatic root initialization +/// and custom intrinsic lowering. +class LowerIntrinsics : public FunctionPass { + bool PerformDefaultLowering(Function &F, GCStrategy &Coll); + +public: + static char ID; + + LowerIntrinsics(); + const char *getPassName() const override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + + bool doInitialization(Module &M) override; + bool runOnFunction(Function &F) override; +}; + +/// GCMachineCodeAnalysis - This is a target-independent pass over the machine +/// function representation to identify safe points for the garbage collector +/// in the machine code. It inserts labels at safe points and populates a +/// GCMetadata record for each function. +class GCMachineCodeAnalysis : public MachineFunctionPass { + GCFunctionInfo *FI; + MachineModuleInfo *MMI; + const TargetInstrInfo *TII; + + void FindSafePoints(MachineFunction &MF); + void VisitCallPoint(MachineBasicBlock::iterator MI); + MCSymbol *InsertLabel(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + DebugLoc DL) const; + + void FindStackOffsets(MachineFunction &MF); + +public: + static char ID; + + GCMachineCodeAnalysis(); + void getAnalysisUsage(AnalysisUsage &AU) const override; + + bool runOnMachineFunction(MachineFunction &MF) override; +}; +} + +// ----------------------------------------------------------------------------- + +INITIALIZE_PASS_BEGIN(LowerIntrinsics, "gc-lowering", "GC Lowering", false, + false) +INITIALIZE_PASS_DEPENDENCY(GCModuleInfo) +INITIALIZE_PASS_END(LowerIntrinsics, "gc-lowering", "GC Lowering", false, false) + +FunctionPass *llvm::createGCLoweringPass() { return new LowerIntrinsics(); } + +char LowerIntrinsics::ID = 0; + +LowerIntrinsics::LowerIntrinsics() : FunctionPass(ID) { + initializeLowerIntrinsicsPass(*PassRegistry::getPassRegistry()); +} + +const char *LowerIntrinsics::getPassName() const { + return "Lower Garbage Collection Instructions"; +} + +void LowerIntrinsics::getAnalysisUsage(AnalysisUsage &AU) const { + FunctionPass::getAnalysisUsage(AU); + AU.addRequired<GCModuleInfo>(); + AU.addPreserved<DominatorTreeWrapperPass>(); +} + +static bool NeedsDefaultLoweringPass(const GCStrategy &C) { + // Default lowering is necessary only if read or write barriers have a default + // action. The default for roots is no action. + return !C.customWriteBarrier() || !C.customReadBarrier() || + C.initializeRoots(); +} + +/// doInitialization - If this module uses the GC intrinsics, find them now. +bool LowerIntrinsics::doInitialization(Module &M) { + GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>(); + assert(MI && "LowerIntrinsics didn't require GCModuleInfo!?"); + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) + if (!I->isDeclaration() && I->hasGC()) + MI->getFunctionInfo(*I); // Instantiate the GC strategy. + + return false; +} + +/// CouldBecomeSafePoint - Predicate to conservatively determine whether the +/// instruction could introduce a safe point. +static bool CouldBecomeSafePoint(Instruction *I) { + // The natural definition of instructions which could introduce safe points + // are: + // + // - call, invoke (AfterCall, BeforeCall) + // - phis (Loops) + // - invoke, ret, unwind (Exit) + // + // However, instructions as seemingly inoccuous as arithmetic can become + // libcalls upon lowering (e.g., div i64 on a 32-bit platform), so instead + // it is necessary to take a conservative approach. + + if (isa<AllocaInst>(I) || isa<GetElementPtrInst>(I) || isa<StoreInst>(I) || + isa<LoadInst>(I)) + return false; + + // llvm.gcroot is safe because it doesn't do anything at runtime. + if (CallInst *CI = dyn_cast<CallInst>(I)) + if (Function *F = CI->getCalledFunction()) + if (Intrinsic::ID IID = F->getIntrinsicID()) + if (IID == Intrinsic::gcroot) + return false; + + return true; +} + +static bool InsertRootInitializers(Function &F, AllocaInst **Roots, + unsigned Count) { + // Scroll past alloca instructions. + BasicBlock::iterator IP = F.getEntryBlock().begin(); + while (isa<AllocaInst>(IP)) + ++IP; + + // Search for initializers in the initial BB. + SmallPtrSet<AllocaInst *, 16> InitedRoots; + for (; !CouldBecomeSafePoint(IP); ++IP) + if (StoreInst *SI = dyn_cast<StoreInst>(IP)) + if (AllocaInst *AI = + dyn_cast<AllocaInst>(SI->getOperand(1)->stripPointerCasts())) + InitedRoots.insert(AI); + + // Add root initializers. + bool MadeChange = false; + + for (AllocaInst **I = Roots, **E = Roots + Count; I != E; ++I) + if (!InitedRoots.count(*I)) { + StoreInst *SI = new StoreInst( + ConstantPointerNull::get(cast<PointerType>( + cast<PointerType>((*I)->getType())->getElementType())), + *I); + SI->insertAfter(*I); + MadeChange = true; + } + + return MadeChange; +} + +/// runOnFunction - Replace gcread/gcwrite intrinsics with loads and stores. +/// Leave gcroot intrinsics; the code generator needs to see those. +bool LowerIntrinsics::runOnFunction(Function &F) { + // Quick exit for functions that do not use GC. + if (!F.hasGC()) + return false; + + GCFunctionInfo &FI = getAnalysis<GCModuleInfo>().getFunctionInfo(F); + GCStrategy &S = FI.getStrategy(); + + bool MadeChange = false; + + if (NeedsDefaultLoweringPass(S)) + MadeChange |= PerformDefaultLowering(F, S); + + return MadeChange; +} + +bool LowerIntrinsics::PerformDefaultLowering(Function &F, GCStrategy &S) { + bool LowerWr = !S.customWriteBarrier(); + bool LowerRd = !S.customReadBarrier(); + bool InitRoots = S.initializeRoots(); + + SmallVector<AllocaInst *, 32> Roots; + + bool MadeChange = false; + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { + for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) { + if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++)) { + Function *F = CI->getCalledFunction(); + switch (F->getIntrinsicID()) { + case Intrinsic::gcwrite: + if (LowerWr) { + // Replace a write barrier with a simple store. + Value *St = + new StoreInst(CI->getArgOperand(0), CI->getArgOperand(2), CI); + CI->replaceAllUsesWith(St); + CI->eraseFromParent(); + } + break; + case Intrinsic::gcread: + if (LowerRd) { + // Replace a read barrier with a simple load. + Value *Ld = new LoadInst(CI->getArgOperand(1), "", CI); + Ld->takeName(CI); + CI->replaceAllUsesWith(Ld); + CI->eraseFromParent(); + } + break; + case Intrinsic::gcroot: + if (InitRoots) { + // Initialize the GC root, but do not delete the intrinsic. The + // backend needs the intrinsic to flag the stack slot. + Roots.push_back( + cast<AllocaInst>(CI->getArgOperand(0)->stripPointerCasts())); + } + break; + default: + continue; + } + + MadeChange = true; + } + } + } + + if (Roots.size()) + MadeChange |= InsertRootInitializers(F, Roots.begin(), Roots.size()); + + return MadeChange; +} + +// ----------------------------------------------------------------------------- + +char GCMachineCodeAnalysis::ID = 0; +char &llvm::GCMachineCodeAnalysisID = GCMachineCodeAnalysis::ID; + +INITIALIZE_PASS(GCMachineCodeAnalysis, "gc-analysis", + "Analyze Machine Code For Garbage Collection", false, false) + +GCMachineCodeAnalysis::GCMachineCodeAnalysis() : MachineFunctionPass(ID) {} + +void GCMachineCodeAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + MachineFunctionPass::getAnalysisUsage(AU); + AU.setPreservesAll(); + AU.addRequired<MachineModuleInfo>(); + AU.addRequired<GCModuleInfo>(); +} + +MCSymbol *GCMachineCodeAnalysis::InsertLabel(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + DebugLoc DL) const { + MCSymbol *Label = MBB.getParent()->getContext().createTempSymbol(); + BuildMI(MBB, MI, DL, TII->get(TargetOpcode::GC_LABEL)).addSym(Label); + return Label; +} + +void GCMachineCodeAnalysis::VisitCallPoint(MachineBasicBlock::iterator CI) { + // Find the return address (next instruction), too, so as to bracket the call + // instruction. + MachineBasicBlock::iterator RAI = CI; + ++RAI; + + if (FI->getStrategy().needsSafePoint(GC::PreCall)) { + MCSymbol *Label = InsertLabel(*CI->getParent(), CI, CI->getDebugLoc()); + FI->addSafePoint(GC::PreCall, Label, CI->getDebugLoc()); + } + + if (FI->getStrategy().needsSafePoint(GC::PostCall)) { + MCSymbol *Label = InsertLabel(*CI->getParent(), RAI, CI->getDebugLoc()); + FI->addSafePoint(GC::PostCall, Label, CI->getDebugLoc()); + } +} + +void GCMachineCodeAnalysis::FindSafePoints(MachineFunction &MF) { + for (MachineFunction::iterator BBI = MF.begin(), BBE = MF.end(); BBI != BBE; + ++BBI) + for (MachineBasicBlock::iterator MI = BBI->begin(), ME = BBI->end(); + MI != ME; ++MI) + if (MI->isCall()) { + // Do not treat tail or sibling call sites as safe points. This is + // legal since any arguments passed to the callee which live in the + // remnants of the callers frame will be owned and updated by the + // callee if required. + if (MI->isTerminator()) + continue; + VisitCallPoint(MI); + } +} + +void GCMachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) { + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + assert(TFI && "TargetRegisterInfo not available!"); + + for (GCFunctionInfo::roots_iterator RI = FI->roots_begin(); + RI != FI->roots_end();) { + // If the root references a dead object, no need to keep it. + if (MF.getFrameInfo()->isDeadObjectIndex(RI->Num)) { + RI = FI->removeStackRoot(RI); + } else { + RI->StackOffset = TFI->getFrameIndexOffset(MF, RI->Num); + ++RI; + } + } +} + +bool GCMachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) { + // Quick exit for functions that do not use GC. + if (!MF.getFunction()->hasGC()) + return false; + + FI = &getAnalysis<GCModuleInfo>().getFunctionInfo(*MF.getFunction()); + MMI = &getAnalysis<MachineModuleInfo>(); + TII = MF.getSubtarget().getInstrInfo(); + + // Find the size of the stack frame. There may be no correct static frame + // size, we use UINT64_MAX to represent this. + const MachineFrameInfo *MFI = MF.getFrameInfo(); + const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); + const bool DynamicFrameSize = MFI->hasVarSizedObjects() || + RegInfo->needsStackRealignment(MF); + FI->setFrameSize(DynamicFrameSize ? UINT64_MAX : MFI->getStackSize()); + + // Find all safe points. + if (FI->getStrategy().needsSafePoints()) + FindSafePoints(MF); + + // Find the concrete stack offsets for all roots (stack slots) + FindStackOffsets(MF); + + return false; +} diff --git a/contrib/llvm/lib/CodeGen/GCStrategy.cpp b/contrib/llvm/lib/CodeGen/GCStrategy.cpp index 05c36fc..554d326 100644 --- a/contrib/llvm/lib/CodeGen/GCStrategy.cpp +++ b/contrib/llvm/lib/CodeGen/GCStrategy.cpp @@ -1,4 +1,4 @@ -//===-- GCStrategy.cpp - Garbage collection infrastructure -----------------===// +//===-- GCStrategy.cpp - Garbage Collector Description --------------------===// // // The LLVM Compiler Infrastructure // @@ -7,399 +7,16 @@ // //===----------------------------------------------------------------------===// // -// This file implements target- and collector-independent garbage collection -// infrastructure. -// -// GCMachineCodeAnalysis identifies the GC safe points in the machine code. -// Roots are identified in SelectionDAGISel. +// This file implements the policy object GCStrategy which describes the +// behavior of a given garbage collector. // //===----------------------------------------------------------------------===// #include "llvm/CodeGen/GCStrategy.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Module.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; -namespace { - - /// LowerIntrinsics - This pass rewrites calls to the llvm.gcread or - /// llvm.gcwrite intrinsics, replacing them with simple loads and stores as - /// directed by the GCStrategy. It also performs automatic root initialization - /// and custom intrinsic lowering. - class LowerIntrinsics : public FunctionPass { - static bool NeedsDefaultLoweringPass(const GCStrategy &C); - static bool NeedsCustomLoweringPass(const GCStrategy &C); - static bool CouldBecomeSafePoint(Instruction *I); - bool PerformDefaultLowering(Function &F, GCStrategy &Coll); - static bool InsertRootInitializers(Function &F, - AllocaInst **Roots, unsigned Count); - - public: - static char ID; - - LowerIntrinsics(); - const char *getPassName() const override; - void getAnalysisUsage(AnalysisUsage &AU) const override; - - bool doInitialization(Module &M) override; - bool runOnFunction(Function &F) override; - }; - - - /// GCMachineCodeAnalysis - This is a target-independent pass over the machine - /// function representation to identify safe points for the garbage collector - /// in the machine code. It inserts labels at safe points and populates a - /// GCMetadata record for each function. - class GCMachineCodeAnalysis : public MachineFunctionPass { - const TargetMachine *TM; - GCFunctionInfo *FI; - MachineModuleInfo *MMI; - const TargetInstrInfo *TII; - - void FindSafePoints(MachineFunction &MF); - void VisitCallPoint(MachineBasicBlock::iterator MI); - MCSymbol *InsertLabel(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - DebugLoc DL) const; - - void FindStackOffsets(MachineFunction &MF); - - public: - static char ID; - - GCMachineCodeAnalysis(); - void getAnalysisUsage(AnalysisUsage &AU) const override; - - bool runOnMachineFunction(MachineFunction &MF) override; - }; - -} - -// ----------------------------------------------------------------------------- - -GCStrategy::GCStrategy() : - UseStatepoints(false), - NeededSafePoints(0), - CustomReadBarriers(false), - CustomWriteBarriers(false), - CustomRoots(false), - CustomSafePoints(false), - InitRoots(true), - UsesMetadata(false) -{} - -// ----------------------------------------------------------------------------- - -INITIALIZE_PASS_BEGIN(LowerIntrinsics, "gc-lowering", "GC Lowering", - false, false) -INITIALIZE_PASS_DEPENDENCY(GCModuleInfo) -INITIALIZE_PASS_END(LowerIntrinsics, "gc-lowering", "GC Lowering", false, false) - -FunctionPass *llvm::createGCLoweringPass() { - return new LowerIntrinsics(); -} - -char LowerIntrinsics::ID = 0; - -LowerIntrinsics::LowerIntrinsics() - : FunctionPass(ID) { - initializeLowerIntrinsicsPass(*PassRegistry::getPassRegistry()); - } - -const char *LowerIntrinsics::getPassName() const { - return "Lower Garbage Collection Instructions"; -} - -void LowerIntrinsics::getAnalysisUsage(AnalysisUsage &AU) const { - FunctionPass::getAnalysisUsage(AU); - AU.addRequired<GCModuleInfo>(); - AU.addPreserved<DominatorTreeWrapperPass>(); -} - -/// doInitialization - If this module uses the GC intrinsics, find them now. -bool LowerIntrinsics::doInitialization(Module &M) { - // FIXME: This is rather antisocial in the context of a JIT since it performs - // work against the entire module. But this cannot be done at - // runFunction time (initializeCustomLowering likely needs to change - // the module). - GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>(); - assert(MI && "LowerIntrinsics didn't require GCModuleInfo!?"); - for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) - if (!I->isDeclaration() && I->hasGC()) - MI->getFunctionInfo(*I); // Instantiate the GC strategy. - - bool MadeChange = false; - for (GCModuleInfo::iterator I = MI->begin(), E = MI->end(); I != E; ++I) - if (NeedsCustomLoweringPass(**I)) - if ((*I)->initializeCustomLowering(M)) - MadeChange = true; - - return MadeChange; -} - -bool LowerIntrinsics::InsertRootInitializers(Function &F, AllocaInst **Roots, - unsigned Count) { - // Scroll past alloca instructions. - BasicBlock::iterator IP = F.getEntryBlock().begin(); - while (isa<AllocaInst>(IP)) ++IP; - - // Search for initializers in the initial BB. - SmallPtrSet<AllocaInst*,16> InitedRoots; - for (; !CouldBecomeSafePoint(IP); ++IP) - if (StoreInst *SI = dyn_cast<StoreInst>(IP)) - if (AllocaInst *AI = - dyn_cast<AllocaInst>(SI->getOperand(1)->stripPointerCasts())) - InitedRoots.insert(AI); - - // Add root initializers. - bool MadeChange = false; - - for (AllocaInst **I = Roots, **E = Roots + Count; I != E; ++I) - if (!InitedRoots.count(*I)) { - StoreInst* SI = new StoreInst(ConstantPointerNull::get(cast<PointerType>( - cast<PointerType>((*I)->getType())->getElementType())), - *I); - SI->insertAfter(*I); - MadeChange = true; - } - - return MadeChange; -} - -bool LowerIntrinsics::NeedsDefaultLoweringPass(const GCStrategy &C) { - // Default lowering is necessary only if read or write barriers have a default - // action. The default for roots is no action. - return !C.customWriteBarrier() - || !C.customReadBarrier() - || C.initializeRoots(); -} - -bool LowerIntrinsics::NeedsCustomLoweringPass(const GCStrategy &C) { - // Custom lowering is only necessary if enabled for some action. - return C.customWriteBarrier() - || C.customReadBarrier() - || C.customRoots(); -} - -/// CouldBecomeSafePoint - Predicate to conservatively determine whether the -/// instruction could introduce a safe point. -bool LowerIntrinsics::CouldBecomeSafePoint(Instruction *I) { - // The natural definition of instructions which could introduce safe points - // are: - // - // - call, invoke (AfterCall, BeforeCall) - // - phis (Loops) - // - invoke, ret, unwind (Exit) - // - // However, instructions as seemingly inoccuous as arithmetic can become - // libcalls upon lowering (e.g., div i64 on a 32-bit platform), so instead - // it is necessary to take a conservative approach. - - if (isa<AllocaInst>(I) || isa<GetElementPtrInst>(I) || - isa<StoreInst>(I) || isa<LoadInst>(I)) - return false; - - // llvm.gcroot is safe because it doesn't do anything at runtime. - if (CallInst *CI = dyn_cast<CallInst>(I)) - if (Function *F = CI->getCalledFunction()) - if (unsigned IID = F->getIntrinsicID()) - if (IID == Intrinsic::gcroot) - return false; - - return true; -} - -/// runOnFunction - Replace gcread/gcwrite intrinsics with loads and stores. -/// Leave gcroot intrinsics; the code generator needs to see those. -bool LowerIntrinsics::runOnFunction(Function &F) { - // Quick exit for functions that do not use GC. - if (!F.hasGC()) - return false; - - GCFunctionInfo &FI = getAnalysis<GCModuleInfo>().getFunctionInfo(F); - GCStrategy &S = FI.getStrategy(); - - bool MadeChange = false; - - if (NeedsDefaultLoweringPass(S)) - MadeChange |= PerformDefaultLowering(F, S); - - bool UseCustomLoweringPass = NeedsCustomLoweringPass(S); - if (UseCustomLoweringPass) - MadeChange |= S.performCustomLowering(F); - - // Custom lowering may modify the CFG, so dominators must be recomputed. - if (UseCustomLoweringPass) { - if (DominatorTreeWrapperPass *DTWP = - getAnalysisIfAvailable<DominatorTreeWrapperPass>()) - DTWP->getDomTree().recalculate(F); - } - - return MadeChange; -} - -bool LowerIntrinsics::PerformDefaultLowering(Function &F, GCStrategy &S) { - bool LowerWr = !S.customWriteBarrier(); - bool LowerRd = !S.customReadBarrier(); - bool InitRoots = S.initializeRoots(); - - SmallVector<AllocaInst*, 32> Roots; - - bool MadeChange = false; - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { - for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) { - if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++)) { - Function *F = CI->getCalledFunction(); - switch (F->getIntrinsicID()) { - case Intrinsic::gcwrite: - if (LowerWr) { - // Replace a write barrier with a simple store. - Value *St = new StoreInst(CI->getArgOperand(0), - CI->getArgOperand(2), CI); - CI->replaceAllUsesWith(St); - CI->eraseFromParent(); - } - break; - case Intrinsic::gcread: - if (LowerRd) { - // Replace a read barrier with a simple load. - Value *Ld = new LoadInst(CI->getArgOperand(1), "", CI); - Ld->takeName(CI); - CI->replaceAllUsesWith(Ld); - CI->eraseFromParent(); - } - break; - case Intrinsic::gcroot: - if (InitRoots) { - // Initialize the GC root, but do not delete the intrinsic. The - // backend needs the intrinsic to flag the stack slot. - Roots.push_back(cast<AllocaInst>( - CI->getArgOperand(0)->stripPointerCasts())); - } - break; - default: - continue; - } - - MadeChange = true; - } - } - } - - if (Roots.size()) - MadeChange |= InsertRootInitializers(F, Roots.begin(), Roots.size()); - - return MadeChange; -} - -// ----------------------------------------------------------------------------- - -char GCMachineCodeAnalysis::ID = 0; -char &llvm::GCMachineCodeAnalysisID = GCMachineCodeAnalysis::ID; - -INITIALIZE_PASS(GCMachineCodeAnalysis, "gc-analysis", - "Analyze Machine Code For Garbage Collection", false, false) - -GCMachineCodeAnalysis::GCMachineCodeAnalysis() - : MachineFunctionPass(ID) {} - -void GCMachineCodeAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { - MachineFunctionPass::getAnalysisUsage(AU); - AU.setPreservesAll(); - AU.addRequired<MachineModuleInfo>(); - AU.addRequired<GCModuleInfo>(); -} - -MCSymbol *GCMachineCodeAnalysis::InsertLabel(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - DebugLoc DL) const { - MCSymbol *Label = MBB.getParent()->getContext().CreateTempSymbol(); - BuildMI(MBB, MI, DL, TII->get(TargetOpcode::GC_LABEL)).addSym(Label); - return Label; -} - -void GCMachineCodeAnalysis::VisitCallPoint(MachineBasicBlock::iterator CI) { - // Find the return address (next instruction), too, so as to bracket the call - // instruction. - MachineBasicBlock::iterator RAI = CI; - ++RAI; - - if (FI->getStrategy().needsSafePoint(GC::PreCall)) { - MCSymbol* Label = InsertLabel(*CI->getParent(), CI, CI->getDebugLoc()); - FI->addSafePoint(GC::PreCall, Label, CI->getDebugLoc()); - } - - if (FI->getStrategy().needsSafePoint(GC::PostCall)) { - MCSymbol* Label = InsertLabel(*CI->getParent(), RAI, CI->getDebugLoc()); - FI->addSafePoint(GC::PostCall, Label, CI->getDebugLoc()); - } -} - -void GCMachineCodeAnalysis::FindSafePoints(MachineFunction &MF) { - for (MachineFunction::iterator BBI = MF.begin(), - BBE = MF.end(); BBI != BBE; ++BBI) - for (MachineBasicBlock::iterator MI = BBI->begin(), - ME = BBI->end(); MI != ME; ++MI) - if (MI->isCall()) - VisitCallPoint(MI); -} - -void GCMachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) { - const TargetFrameLowering *TFI = TM->getSubtargetImpl()->getFrameLowering(); - assert(TFI && "TargetRegisterInfo not available!"); - - for (GCFunctionInfo::roots_iterator RI = FI->roots_begin(); - RI != FI->roots_end();) { - // If the root references a dead object, no need to keep it. - if (MF.getFrameInfo()->isDeadObjectIndex(RI->Num)) { - RI = FI->removeStackRoot(RI); - } else { - RI->StackOffset = TFI->getFrameIndexOffset(MF, RI->Num); - ++RI; - } - } -} - -bool GCMachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) { - // Quick exit for functions that do not use GC. - if (!MF.getFunction()->hasGC()) - return false; - - FI = &getAnalysis<GCModuleInfo>().getFunctionInfo(*MF.getFunction()); - if (!FI->getStrategy().needsSafePoints()) - return false; - - TM = &MF.getTarget(); - MMI = &getAnalysis<MachineModuleInfo>(); - TII = TM->getSubtargetImpl()->getInstrInfo(); - - // Find the size of the stack frame. - FI->setFrameSize(MF.getFrameInfo()->getStackSize()); - - // Find all safe points. - if (FI->getStrategy().customSafePoints()) { - FI->getStrategy().findCustomSafePoints(*FI, MF); - } else { - FindSafePoints(MF); - } - - // Find the stack offsets for all roots. - FindStackOffsets(MF); - - return false; -} +GCStrategy::GCStrategy() + : UseStatepoints(false), NeededSafePoints(0), CustomReadBarriers(false), + CustomWriteBarriers(false), CustomRoots(false), InitRoots(true), + UsesMetadata(false) {} diff --git a/contrib/llvm/lib/CodeGen/GlobalMerge.cpp b/contrib/llvm/lib/CodeGen/GlobalMerge.cpp index 8257567..79de175 100644 --- a/contrib/llvm/lib/CodeGen/GlobalMerge.cpp +++ b/contrib/llvm/lib/CodeGen/GlobalMerge.cpp @@ -49,9 +49,19 @@ // str r0, [r5], #4 // // note that we saved 2 registers here almostly "for free". +// +// However, merging globals can have tradeoffs: +// - it confuses debuggers, tools, and users +// - it makes linker optimizations less useful (order files, LOHs, ...) +// - it forces usage of indexed addressing (which isn't necessarily "free") +// - it can increase register pressure when the uses are disparate enough. +// +// We use heuristics to discover the best global grouping we can (cf cl::opts). // ===---------------------------------------------------------------------===// #include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/Passes.h" @@ -66,18 +76,31 @@ #include "llvm/IR/Module.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include <algorithm> using namespace llvm; #define DEBUG_TYPE "global-merge" +// FIXME: This is only useful as a last-resort way to disable the pass. static cl::opt<bool> EnableGlobalMerge("enable-global-merge", cl::Hidden, - cl::desc("Enable global merge pass"), + cl::desc("Enable the global merge pass"), cl::init(true)); +static cl::opt<bool> GlobalMergeGroupByUse( + "global-merge-group-by-use", cl::Hidden, + cl::desc("Improve global merge pass to look at uses"), cl::init(true)); + +static cl::opt<bool> GlobalMergeIgnoreSingleUse( + "global-merge-ignore-single-use", cl::Hidden, + cl::desc("Improve global merge pass to ignore globals only used alone"), + cl::init(true)); + static cl::opt<bool> EnableGlobalMergeOnConst("global-merge-on-const", cl::Hidden, cl::desc("Enable global merge pass on constants"), @@ -90,13 +113,24 @@ EnableGlobalMergeOnExternal("global-merge-on-external", cl::Hidden, cl::desc("Enable global merge pass on external linkage"), cl::init(false)); -STATISTIC(NumMerged , "Number of globals merged"); +STATISTIC(NumMerged, "Number of globals merged"); namespace { class GlobalMerge : public FunctionPass { const TargetMachine *TM; + const DataLayout *DL; + // FIXME: Infer the maximum possible offset depending on the actual users + // (these max offsets are different for the users inside Thumb or ARM + // functions), see the code that passes in the offset in the ARM backend + // for more information. + unsigned MaxOffset; bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals, Module &M, bool isConst, unsigned AddrSpace) const; + /// \brief Merge everything in \p Globals for which the corresponding bit + /// in \p GlobalSet is set. + bool doMerge(SmallVectorImpl<GlobalVariable *> &Globals, + const BitVector &GlobalSet, Module &M, bool isConst, + unsigned AddrSpace) const; /// \brief Check if the given variable has been identified as must keep /// \pre setMustKeepGlobalVariables must have been called on the Module that @@ -117,8 +151,10 @@ namespace { public: static char ID; // Pass identification, replacement for typeid. - explicit GlobalMerge(const TargetMachine *TM = nullptr) - : FunctionPass(ID), TM(TM) { + explicit GlobalMerge(const TargetMachine *TM = nullptr, + unsigned MaximalOffset = 0) + : FunctionPass(ID), TM(TM), DL(TM->getDataLayout()), + MaxOffset(MaximalOffset) { initializeGlobalMergePass(*PassRegistry::getPassRegistry()); } @@ -138,48 +174,243 @@ namespace { } // end anonymous namespace char GlobalMerge::ID = 0; -INITIALIZE_TM_PASS(GlobalMerge, "global-merge", "Merge global variables", - false, false) +INITIALIZE_PASS_BEGIN(GlobalMerge, "global-merge", "Merge global variables", + false, false) +INITIALIZE_PASS_END(GlobalMerge, "global-merge", "Merge global variables", + false, false) bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals, Module &M, bool isConst, unsigned AddrSpace) const { - const TargetLowering *TLI = TM->getSubtargetImpl()->getTargetLowering(); - const DataLayout *DL = TLI->getDataLayout(); - - // FIXME: Infer the maximum possible offset depending on the actual users - // (these max offsets are different for the users inside Thumb or ARM - // functions) - unsigned MaxOffset = TLI->getMaximalGlobalOffset(); - // FIXME: Find better heuristics std::stable_sort(Globals.begin(), Globals.end(), - [DL](const GlobalVariable *GV1, const GlobalVariable *GV2) { + [this](const GlobalVariable *GV1, const GlobalVariable *GV2) { Type *Ty1 = cast<PointerType>(GV1->getType())->getElementType(); Type *Ty2 = cast<PointerType>(GV2->getType())->getElementType(); return (DL->getTypeAllocSize(Ty1) < DL->getTypeAllocSize(Ty2)); }); + // If we want to just blindly group all globals together, do so. + if (!GlobalMergeGroupByUse) { + BitVector AllGlobals(Globals.size()); + AllGlobals.set(); + return doMerge(Globals, AllGlobals, M, isConst, AddrSpace); + } + + // If we want to be smarter, look at all uses of each global, to try to + // discover all sets of globals used together, and how many times each of + // these sets occured. + // + // Keep this reasonably efficient, by having an append-only list of all sets + // discovered so far (UsedGlobalSet), and mapping each "together-ness" unit of + // code (currently, a Function) to the set of globals seen so far that are + // used together in that unit (GlobalUsesByFunction). + // + // When we look at the Nth global, we now that any new set is either: + // - the singleton set {N}, containing this global only, or + // - the union of {N} and a previously-discovered set, containing some + // combination of the previous N-1 globals. + // Using that knowledge, when looking at the Nth global, we can keep: + // - a reference to the singleton set {N} (CurGVOnlySetIdx) + // - a list mapping each previous set to its union with {N} (EncounteredUGS), + // if it actually occurs. + + // We keep track of the sets of globals used together "close enough". + struct UsedGlobalSet { + UsedGlobalSet(size_t Size) : Globals(Size), UsageCount(1) {} + BitVector Globals; + unsigned UsageCount; + }; + + // Each set is unique in UsedGlobalSets. + std::vector<UsedGlobalSet> UsedGlobalSets; + + // Avoid repeating the create-global-set pattern. + auto CreateGlobalSet = [&]() -> UsedGlobalSet & { + UsedGlobalSets.emplace_back(Globals.size()); + return UsedGlobalSets.back(); + }; + + // The first set is the empty set. + CreateGlobalSet().UsageCount = 0; + + // We define "close enough" to be "in the same function". + // FIXME: Grouping uses by function is way too aggressive, so we should have + // a better metric for distance between uses. + // The obvious alternative would be to group by BasicBlock, but that's in + // turn too conservative.. + // Anything in between wouldn't be trivial to compute, so just stick with + // per-function grouping. + + // The value type is an index into UsedGlobalSets. + // The default (0) conveniently points to the empty set. + DenseMap<Function *, size_t /*UsedGlobalSetIdx*/> GlobalUsesByFunction; + + // Now, look at each merge-eligible global in turn. + + // Keep track of the sets we already encountered to which we added the + // current global. + // Each element matches the same-index element in UsedGlobalSets. + // This lets us efficiently tell whether a set has already been expanded to + // include the current global. + std::vector<size_t> EncounteredUGS; + + for (size_t GI = 0, GE = Globals.size(); GI != GE; ++GI) { + GlobalVariable *GV = Globals[GI]; + + // Reset the encountered sets for this global... + std::fill(EncounteredUGS.begin(), EncounteredUGS.end(), 0); + // ...and grow it in case we created new sets for the previous global. + EncounteredUGS.resize(UsedGlobalSets.size()); + + // We might need to create a set that only consists of the current global. + // Keep track of its index into UsedGlobalSets. + size_t CurGVOnlySetIdx = 0; + + // For each global, look at all its Uses. + for (auto &U : GV->uses()) { + // This Use might be a ConstantExpr. We're interested in Instruction + // users, so look through ConstantExpr... + Use *UI, *UE; + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U.getUser())) { + UI = &*CE->use_begin(); + UE = nullptr; + } else if (isa<Instruction>(U.getUser())) { + UI = &U; + UE = UI->getNext(); + } else { + continue; + } + + // ...to iterate on all the instruction users of the global. + // Note that we iterate on Uses and not on Users to be able to getNext(). + for (; UI != UE; UI = UI->getNext()) { + Instruction *I = dyn_cast<Instruction>(UI->getUser()); + if (!I) + continue; + + Function *ParentFn = I->getParent()->getParent(); + size_t UGSIdx = GlobalUsesByFunction[ParentFn]; + + // If this is the first global the basic block uses, map it to the set + // consisting of this global only. + if (!UGSIdx) { + // If that set doesn't exist yet, create it. + if (!CurGVOnlySetIdx) { + CurGVOnlySetIdx = UsedGlobalSets.size(); + CreateGlobalSet().Globals.set(GI); + } else { + ++UsedGlobalSets[CurGVOnlySetIdx].UsageCount; + } + + GlobalUsesByFunction[ParentFn] = CurGVOnlySetIdx; + continue; + } + + // If we already encountered this BB, just increment the counter. + if (UsedGlobalSets[UGSIdx].Globals.test(GI)) { + ++UsedGlobalSets[UGSIdx].UsageCount; + continue; + } + + // If not, the previous set wasn't actually used in this function. + --UsedGlobalSets[UGSIdx].UsageCount; + + // If we already expanded the previous set to include this global, just + // reuse that expanded set. + if (size_t ExpandedIdx = EncounteredUGS[UGSIdx]) { + ++UsedGlobalSets[ExpandedIdx].UsageCount; + GlobalUsesByFunction[ParentFn] = ExpandedIdx; + continue; + } + + // If not, create a new set consisting of the union of the previous set + // and this global. Mark it as encountered, so we can reuse it later. + GlobalUsesByFunction[ParentFn] = EncounteredUGS[UGSIdx] = + UsedGlobalSets.size(); + + UsedGlobalSet &NewUGS = CreateGlobalSet(); + NewUGS.Globals.set(GI); + NewUGS.Globals |= UsedGlobalSets[UGSIdx].Globals; + } + } + } + + // Now we found a bunch of sets of globals used together. We accumulated + // the number of times we encountered the sets (i.e., the number of blocks + // that use that exact set of globals). + // + // Multiply that by the size of the set to give us a crude profitability + // metric. + std::sort(UsedGlobalSets.begin(), UsedGlobalSets.end(), + [](const UsedGlobalSet &UGS1, const UsedGlobalSet &UGS2) { + return UGS1.Globals.count() * UGS1.UsageCount < + UGS2.Globals.count() * UGS2.UsageCount; + }); + + // We can choose to merge all globals together, but ignore globals never used + // with another global. This catches the obviously non-profitable cases of + // having a single global, but is aggressive enough for any other case. + if (GlobalMergeIgnoreSingleUse) { + BitVector AllGlobals(Globals.size()); + for (size_t i = 0, e = UsedGlobalSets.size(); i != e; ++i) { + const UsedGlobalSet &UGS = UsedGlobalSets[e - i - 1]; + if (UGS.UsageCount == 0) + continue; + if (UGS.Globals.count() > 1) + AllGlobals |= UGS.Globals; + } + return doMerge(Globals, AllGlobals, M, isConst, AddrSpace); + } + + // Starting from the sets with the best (=biggest) profitability, find a + // good combination. + // The ideal (and expensive) solution can only be found by trying all + // combinations, looking for the one with the best profitability. + // Don't be smart about it, and just pick the first compatible combination, + // starting with the sets with the best profitability. + BitVector PickedGlobals(Globals.size()); + bool Changed = false; + + for (size_t i = 0, e = UsedGlobalSets.size(); i != e; ++i) { + const UsedGlobalSet &UGS = UsedGlobalSets[e - i - 1]; + if (UGS.UsageCount == 0) + continue; + if (PickedGlobals.anyCommon(UGS.Globals)) + continue; + PickedGlobals |= UGS.Globals; + // If the set only contains one global, there's no point in merging. + // Ignore the global for inclusion in other sets though, so keep it in + // PickedGlobals. + if (UGS.Globals.count() < 2) + continue; + Changed |= doMerge(Globals, UGS.Globals, M, isConst, AddrSpace); + } + + return Changed; +} + +bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable *> &Globals, + const BitVector &GlobalSet, Module &M, bool isConst, + unsigned AddrSpace) const { + Type *Int32Ty = Type::getInt32Ty(M.getContext()); assert(Globals.size() > 1); - // FIXME: This simple solution merges globals all together as maximum as - // possible. However, with this solution it would be hard to remove dead - // global symbols at link-time. An alternative solution could be checking - // global symbols references function by function, and make the symbols - // being referred in the same function merged and we would probably need - // to introduce heuristic algorithm to solve the merge conflict from - // different functions. - for (size_t i = 0, e = Globals.size(); i != e; ) { - size_t j = 0; + DEBUG(dbgs() << " Trying to merge set, starts with #" + << GlobalSet.find_first() << "\n"); + + ssize_t i = GlobalSet.find_first(); + while (i != -1) { + ssize_t j = 0; uint64_t MergedSize = 0; std::vector<Type*> Tys; std::vector<Constant*> Inits; bool HasExternal = false; GlobalVariable *TheFirstExternal = 0; - for (j = i; j != e; ++j) { + for (j = i; j != -1; j = GlobalSet.find_next(j)) { Type *Ty = Globals[j]->getType()->getElementType(); MergedSize += DL->getTypeAllocSize(Ty); if (MergedSize > MaxOffset) { @@ -212,23 +443,23 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals, : "_MergedGlobals", nullptr, GlobalVariable::NotThreadLocal, AddrSpace); - for (size_t k = i; k < j; ++k) { + for (ssize_t k = i, idx = 0; k != j; k = GlobalSet.find_next(k)) { GlobalValue::LinkageTypes Linkage = Globals[k]->getLinkage(); std::string Name = Globals[k]->getName(); Constant *Idx[2] = { ConstantInt::get(Int32Ty, 0), - ConstantInt::get(Int32Ty, k-i) + ConstantInt::get(Int32Ty, idx++) }; - Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(MergedGV, Idx); + Constant *GEP = + ConstantExpr::getInBoundsGetElementPtr(MergedTy, MergedGV, Idx); Globals[k]->replaceAllUsesWith(GEP); Globals[k]->eraseFromParent(); if (Linkage != GlobalValue::InternalLinkage) { // Generate a new alias... auto *PTy = cast<PointerType>(GEP->getType()); - GlobalAlias::create(PTy->getElementType(), PTy->getAddressSpace(), - Linkage, Name, GEP, &M); + GlobalAlias::create(PTy, Linkage, Name, GEP, &M); } NumMerged++; @@ -282,9 +513,6 @@ bool GlobalMerge::doInitialization(Module &M) { DenseMap<unsigned, SmallVector<GlobalVariable*, 16> > Globals, ConstGlobals, BSSGlobals; - const TargetLowering *TLI = TM->getSubtargetImpl()->getTargetLowering(); - const DataLayout *DL = TLI->getDataLayout(); - unsigned MaxOffset = TLI->getMaximalGlobalOffset(); bool Changed = false; setMustKeepGlobalVariables(M); @@ -357,6 +585,6 @@ bool GlobalMerge::doFinalization(Module &M) { return false; } -Pass *llvm::createGlobalMergePass(const TargetMachine *TM) { - return new GlobalMerge(TM); +Pass *llvm::createGlobalMergePass(const TargetMachine *TM, unsigned Offset) { + return new GlobalMerge(TM, Offset); } diff --git a/contrib/llvm/lib/CodeGen/IfConversion.cpp b/contrib/llvm/lib/CodeGen/IfConversion.cpp index e84d25d9..0d59c72 100644 --- a/contrib/llvm/lib/CodeGen/IfConversion.cpp +++ b/contrib/llvm/lib/CodeGen/IfConversion.cpp @@ -24,7 +24,6 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetSchedule.h" -#include "llvm/MC/MCInstrItineraries.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -247,7 +246,7 @@ namespace { return true; else if (Incr1 == Incr2) { // Favors subsumption. - if (C1->NeedSubsumption == false && C2->NeedSubsumption == true) + if (!C1->NeedSubsumption && C2->NeedSubsumption) return true; else if (C1->NeedSubsumption == C2->NeedSubsumption) { // Favors diamond over triangle, etc. @@ -271,15 +270,13 @@ INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) INITIALIZE_PASS_END(IfConverter, "if-converter", "If Converter", false, false) bool IfConverter::runOnMachineFunction(MachineFunction &MF) { - TLI = MF.getSubtarget().getTargetLowering(); - TII = MF.getSubtarget().getInstrInfo(); - TRI = MF.getSubtarget().getRegisterInfo(); + const TargetSubtargetInfo &ST = MF.getSubtarget(); + TLI = ST.getTargetLowering(); + TII = ST.getInstrInfo(); + TRI = ST.getRegisterInfo(); MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); MRI = &MF.getRegInfo(); - - const TargetSubtargetInfo &ST = - MF.getTarget().getSubtarget<TargetSubtargetInfo>(); SchedModel.init(ST.getSchedModel(), &ST, TII); if (!TII) return false; @@ -290,7 +287,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { if (!PreRegAlloc) { // Tail merge tend to expose more if-conversion opportunities. BranchFolder BF(true, false, *MBFI, *MBPI); - BFChange = BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(), + BFChange = BF.OptimizeFunction(MF, TII, ST.getRegisterInfo(), getAnalysisIfAvailable<MachineModuleInfo>()); } @@ -728,6 +725,12 @@ bool IfConverter::FeasibilityAnalysis(BBInfo &BBI, if (BBI.IsDone || BBI.IsUnpredicable) return false; + // If it is already predicated but we couldn't analyze its terminator, the + // latter might fallthrough, but we can't determine where to. + // Conservatively avoid if-converting again. + if (BBI.Predicate.size() && !BBI.IsBrAnalyzable) + return false; + // If it is already predicated, check if the new predicate subsumes // its predicate. if (BBI.Predicate.size() && !TII->SubsumesPredicate(Pred, BBI.Predicate)) @@ -971,26 +974,37 @@ void IfConverter::RemoveExtraEdges(BBInfo &BBI) { /// Behaves like LiveRegUnits::StepForward() but also adds implicit uses to all /// values defined in MI which are not live/used by MI. static void UpdatePredRedefs(MachineInstr *MI, LivePhysRegs &Redefs) { - for (ConstMIBundleOperands Ops(MI); Ops.isValid(); ++Ops) { - if (!Ops->isReg() || !Ops->isKill()) - continue; - unsigned Reg = Ops->getReg(); - if (Reg == 0) - continue; - Redefs.removeReg(Reg); - } - for (MIBundleOperands Ops(MI); Ops.isValid(); ++Ops) { - if (!Ops->isReg() || !Ops->isDef()) - continue; - unsigned Reg = Ops->getReg(); - if (Reg == 0 || Redefs.contains(Reg)) + SmallVector<std::pair<unsigned, const MachineOperand*>, 4> Clobbers; + Redefs.stepForward(*MI, Clobbers); + + // Now add the implicit uses for each of the clobbered values. + for (auto Reg : Clobbers) { + // FIXME: Const cast here is nasty, but better than making StepForward + // take a mutable instruction instead of const. + MachineOperand &Op = const_cast<MachineOperand&>(*Reg.second); + MachineInstr *OpMI = Op.getParent(); + MachineInstrBuilder MIB(*OpMI->getParent()->getParent(), OpMI); + if (Op.isRegMask()) { + // First handle regmasks. They clobber any entries in the mask which + // means that we need a def for those registers. + MIB.addReg(Reg.first, RegState::Implicit | RegState::Undef); + + // We also need to add an implicit def of this register for the later + // use to read from. + // For the register allocator to have allocated a register clobbered + // by the call which is used later, it must be the case that + // the call doesn't return. + MIB.addReg(Reg.first, RegState::Implicit | RegState::Define); continue; - Redefs.addReg(Reg); - - MachineOperand &Op = *Ops; - MachineInstr *MI = Op.getParent(); - MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI); - MIB.addReg(Reg, RegState::Implicit | RegState::Undef); + } + assert(Op.isReg() && "Register operand required"); + if (Op.isDead()) { + // If we found a dead def, but it needs to be live, then remove the dead + // flag. + if (Redefs.contains(Op.getReg())) + Op.setIsDead(false); + } + MIB.addReg(Reg.first, RegState::Implicit | RegState::Undef); } } @@ -1370,7 +1384,8 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, for (MachineBasicBlock::const_iterator I = BBI1->BB->begin(), E = DI1; I != E; ++I) { - Redefs.stepForward(*I); + SmallVector<std::pair<unsigned, const MachineOperand*>, 4> IgnoredClobbers; + Redefs.stepForward(*I, IgnoredClobbers); } BBI.BB->splice(BBI.BB->end(), BBI1->BB, BBI1->BB->begin(), DI1); BBI2->BB->erase(BBI2->BB->begin(), DI2); @@ -1504,10 +1519,9 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, } static bool MaySpeculate(const MachineInstr *MI, - SmallSet<unsigned, 4> &LaterRedefs, - const TargetInstrInfo *TII) { + SmallSet<unsigned, 4> &LaterRedefs) { bool SawStore = true; - if (!MI->isSafeToMove(TII, nullptr, SawStore)) + if (!MI->isSafeToMove(nullptr, SawStore)) return false; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { @@ -1538,7 +1552,7 @@ void IfConverter::PredicateBlock(BBInfo &BBI, // It may be possible not to predicate an instruction if it's the 'true' // side of a diamond and the 'false' side may re-define the instruction's // defs. - if (MaySpec && MaySpeculate(I, *LaterRedefs, TII)) { + if (MaySpec && MaySpeculate(I, *LaterRedefs)) { AnyUnpred = true; continue; } @@ -1557,7 +1571,7 @@ void IfConverter::PredicateBlock(BBInfo &BBI, UpdatePredRedefs(I, Redefs); } - std::copy(Cond.begin(), Cond.end(), std::back_inserter(BBI.Predicate)); + BBI.Predicate.append(Cond.begin(), Cond.end()); BBI.IsAnalyzed = false; BBI.NonPredSize = 0; @@ -1622,9 +1636,8 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, } } - std::copy(FromBBI.Predicate.begin(), FromBBI.Predicate.end(), - std::back_inserter(ToBBI.Predicate)); - std::copy(Cond.begin(), Cond.end(), std::back_inserter(ToBBI.Predicate)); + ToBBI.Predicate.append(FromBBI.Predicate.begin(), FromBBI.Predicate.end()); + ToBBI.Predicate.append(Cond.begin(), Cond.end()); ToBBI.ClobbersPred |= FromBBI.ClobbersPred; ToBBI.IsAnalyzed = false; @@ -1663,8 +1676,7 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) { if (NBB && !FromBBI.BB->isSuccessor(NBB)) FromBBI.BB->addSuccessor(NBB); - std::copy(FromBBI.Predicate.begin(), FromBBI.Predicate.end(), - std::back_inserter(ToBBI.Predicate)); + ToBBI.Predicate.append(FromBBI.Predicate.begin(), FromBBI.Predicate.end()); FromBBI.Predicate.clear(); ToBBI.NonPredSize += FromBBI.NonPredSize; diff --git a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp index df889f7..9989f23 100644 --- a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp +++ b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp @@ -135,7 +135,7 @@ private: // Dead defs generated during spilling. SmallVector<MachineInstr*, 8> DeadDefs; - ~InlineSpiller() {} + ~InlineSpiller() override {} public: InlineSpiller(MachineFunctionPass &pass, MachineFunction &mf, VirtRegMap &vrm) @@ -576,8 +576,8 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI, std::tie(SVI, Inserted) = SibValues.insert(std::make_pair(NonPHI, SibValueInfo(Reg, NonPHI))); // Add all the PHIs as dependents of NonPHI. - for (unsigned pi = 0, pe = PHIs.size(); pi != pe; ++pi) - SVI->second.Deps.push_back(PHIs[pi]); + SVI->second.Deps.insert(SVI->second.Deps.end(), PHIs.begin(), + PHIs.end()); // This is the first time we see NonPHI, add it to the worklist. if (Inserted) WorkList.push_back(std::make_pair(Reg, NonPHI)); @@ -921,7 +921,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, // Replace operands for (unsigned i = 0, e = Ops.size(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(Ops[i].second); + MachineOperand &MO = Ops[i].first->getOperand(Ops[i].second); if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg) { MO.setReg(NewVReg); MO.setIsKill(); @@ -1100,6 +1100,7 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops, SmallVector<unsigned, 8> FoldOps; for (unsigned i = 0, e = Ops.size(); i != e; ++i) { unsigned Idx = Ops[i].second; + assert(MI == Ops[i].first && "Instruction conflict during operand folding"); MachineOperand &MO = MI->getOperand(Idx); if (MO.isImplicit()) { ImpReg = MO.getReg(); @@ -1142,13 +1143,8 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops, continue; // FoldMI does not define this physreg. Remove the LI segment. assert(MO->isDead() && "Cannot fold physreg def"); - for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units) { - if (LiveRange *LR = LIS.getCachedRegUnit(*Units)) { - SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot(); - if (VNInfo *VNI = LR->getVNInfoAt(Idx)) - LR->removeValNo(VNI); - } - } + SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot(); + LIS.removePhysRegDefAt(Reg, Idx); } LIS.ReplaceMachineInstrInMaps(MI, FoldMI); @@ -1237,6 +1233,8 @@ void InlineSpiller::spillAroundUses(unsigned Reg) { DebugLoc DL = MI->getDebugLoc(); DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI); MachineBasicBlock *MBB = MI->getParent(); + assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && + "Expected inlined-at fields to agree"); BuildMI(*MBB, MBB->erase(MI), DL, TII.get(TargetOpcode::DBG_VALUE)) .addFrameIndex(StackSlot) .addImm(Offset) diff --git a/contrib/llvm/lib/CodeGen/InterferenceCache.cpp b/contrib/llvm/lib/CodeGen/InterferenceCache.cpp index 187e015..fd5749b 100644 --- a/contrib/llvm/lib/CodeGen/InterferenceCache.cpp +++ b/contrib/llvm/lib/CodeGen/InterferenceCache.cpp @@ -21,7 +21,8 @@ using namespace llvm; #define DEBUG_TYPE "regalloc" // Static member used for null interference cursors. -InterferenceCache::BlockInterference InterferenceCache::Cursor::NoInterference; +const InterferenceCache::BlockInterference + InterferenceCache::Cursor::NoInterference; // Initializes PhysRegEntries (instead of a SmallVector, PhysRegEntries is a // buffer of size NumPhysRegs to speed up alloc/clear for targets with large diff --git a/contrib/llvm/lib/CodeGen/InterferenceCache.h b/contrib/llvm/lib/CodeGen/InterferenceCache.h index 1791afb..6519a80 100644 --- a/contrib/llvm/lib/CodeGen/InterferenceCache.h +++ b/contrib/llvm/lib/CodeGen/InterferenceCache.h @@ -170,8 +170,8 @@ public: /// Cursor - The primary query interface for the block interference cache. class Cursor { Entry *CacheEntry; - BlockInterference *Current; - static BlockInterference NoInterference; + const BlockInterference *Current; + static const BlockInterference NoInterference; void setEntry(Entry *E) { Current = nullptr; diff --git a/contrib/llvm/lib/CodeGen/JumpInstrTables.cpp b/contrib/llvm/lib/CodeGen/JumpInstrTables.cpp deleted file mode 100644 index 75fa261..0000000 --- a/contrib/llvm/lib/CodeGen/JumpInstrTables.cpp +++ /dev/null @@ -1,296 +0,0 @@ -//===-- JumpInstrTables.cpp: Jump-Instruction Tables ----------------------===// -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// \brief An implementation of jump-instruction tables. -/// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "jt" - -#include "llvm/CodeGen/JumpInstrTables.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/JumpInstrTableInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/IR/Attributes.h" -#include "llvm/IR/CallSite.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Operator.h" -#include "llvm/IR/Type.h" -#include "llvm/IR/Verifier.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include <vector> - -using namespace llvm; - -char JumpInstrTables::ID = 0; - -INITIALIZE_PASS_BEGIN(JumpInstrTables, "jump-instr-tables", - "Jump-Instruction Tables", true, true) -INITIALIZE_PASS_DEPENDENCY(JumpInstrTableInfo); -INITIALIZE_PASS_END(JumpInstrTables, "jump-instr-tables", - "Jump-Instruction Tables", true, true) - -STATISTIC(NumJumpTables, "Number of indirect call tables generated"); -STATISTIC(NumFuncsInJumpTables, "Number of functions in the jump tables"); - -ModulePass *llvm::createJumpInstrTablesPass() { - // The default implementation uses a single table for all functions. - return new JumpInstrTables(JumpTable::Single); -} - -ModulePass *llvm::createJumpInstrTablesPass(JumpTable::JumpTableType JTT) { - return new JumpInstrTables(JTT); -} - -namespace { -static const char jump_func_prefix[] = "__llvm_jump_instr_table_"; -static const char jump_section_prefix[] = ".jump.instr.table.text."; - -// Checks to see if a given CallSite is making an indirect call, including -// cases where the indirect call is made through a bitcast. -bool isIndirectCall(CallSite &CS) { - if (CS.getCalledFunction()) - return false; - - // Check the value to see if it is merely a bitcast of a function. In - // this case, it will translate to a direct function call in the resulting - // assembly, so we won't treat it as an indirect call here. - const Value *V = CS.getCalledValue(); - if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { - return !(CE->isCast() && isa<Function>(CE->getOperand(0))); - } - - // Otherwise, since we know it's a call, it must be an indirect call - return true; -} - -// Replaces Functions and GlobalAliases with a different Value. -bool replaceGlobalValueIndirectUse(GlobalValue *GV, Value *V, Use *U) { - User *Us = U->getUser(); - if (!Us) - return false; - if (Instruction *I = dyn_cast<Instruction>(Us)) { - CallSite CS(I); - - // Don't do the replacement if this use is a direct call to this function. - // If the use is not the called value, then replace it. - if (CS && (isIndirectCall(CS) || CS.isCallee(U))) { - return false; - } - - U->set(V); - } else if (Constant *C = dyn_cast<Constant>(Us)) { - // Don't replace calls to bitcasts of function symbols, since they get - // translated to direct calls. - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Us)) { - if (CE->getOpcode() == Instruction::BitCast) { - // This bitcast must have exactly one user. - if (CE->user_begin() != CE->user_end()) { - User *ParentUs = *CE->user_begin(); - if (CallInst *CI = dyn_cast<CallInst>(ParentUs)) { - CallSite CS(CI); - Use &CEU = *CE->use_begin(); - if (CS.isCallee(&CEU)) { - return false; - } - } - } - } - } - - // GlobalAlias doesn't support replaceUsesOfWithOnConstant. And the verifier - // requires alias to point to a defined function. So, GlobalAlias is handled - // as a separate case in runOnModule. - if (!isa<GlobalAlias>(C)) - C->replaceUsesOfWithOnConstant(GV, V, U); - } else { - llvm_unreachable("The Use of a Function symbol is neither an instruction " - "nor a constant"); - } - - return true; -} - -// Replaces all replaceable address-taken uses of GV with a pointer to a -// jump-instruction table entry. -void replaceValueWithFunction(GlobalValue *GV, Function *F) { - // Go through all uses of this function and replace the uses of GV with the - // jump-table version of the function. Get the uses as a vector before - // replacing them, since replacing them changes the use list and invalidates - // the iterator otherwise. - for (Value::use_iterator I = GV->use_begin(), E = GV->use_end(); I != E;) { - Use &U = *I++; - - // Replacement of constants replaces all instances in the constant. So, some - // uses might have already been handled by the time we reach them here. - if (U.get() == GV) - replaceGlobalValueIndirectUse(GV, F, &U); - } - - return; -} -} // end anonymous namespace - -JumpInstrTables::JumpInstrTables() - : ModulePass(ID), Metadata(), JITI(nullptr), TableCount(0), - JTType(JumpTable::Single) { - initializeJumpInstrTablesPass(*PassRegistry::getPassRegistry()); -} - -JumpInstrTables::JumpInstrTables(JumpTable::JumpTableType JTT) - : ModulePass(ID), Metadata(), JITI(nullptr), TableCount(0), JTType(JTT) { - initializeJumpInstrTablesPass(*PassRegistry::getPassRegistry()); -} - -JumpInstrTables::~JumpInstrTables() {} - -void JumpInstrTables::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<JumpInstrTableInfo>(); -} - -Function *JumpInstrTables::insertEntry(Module &M, Function *Target) { - FunctionType *OrigFunTy = Target->getFunctionType(); - FunctionType *FunTy = transformType(JTType, OrigFunTy); - - JumpMap::iterator it = Metadata.find(FunTy); - if (Metadata.end() == it) { - struct TableMeta Meta; - Meta.TableNum = TableCount; - Meta.Count = 0; - Metadata[FunTy] = Meta; - it = Metadata.find(FunTy); - ++NumJumpTables; - ++TableCount; - } - - it->second.Count++; - - std::string NewName(jump_func_prefix); - NewName += (Twine(it->second.TableNum) + "_" + Twine(it->second.Count)).str(); - Function *JumpFun = - Function::Create(OrigFunTy, GlobalValue::ExternalLinkage, NewName, &M); - // The section for this table - JumpFun->setSection((jump_section_prefix + Twine(it->second.TableNum)).str()); - JITI->insertEntry(FunTy, Target, JumpFun); - - ++NumFuncsInJumpTables; - return JumpFun; -} - -bool JumpInstrTables::hasTable(FunctionType *FunTy) { - FunctionType *TransTy = transformType(JTType, FunTy); - return Metadata.end() != Metadata.find(TransTy); -} - -FunctionType *JumpInstrTables::transformType(JumpTable::JumpTableType JTT, - FunctionType *FunTy) { - // Returning nullptr forces all types into the same table, since all types map - // to the same type - Type *VoidPtrTy = Type::getInt8PtrTy(FunTy->getContext()); - - // Ignore the return type. - Type *RetTy = VoidPtrTy; - bool IsVarArg = FunTy->isVarArg(); - std::vector<Type *> ParamTys(FunTy->getNumParams()); - FunctionType::param_iterator PI, PE; - int i = 0; - - std::vector<Type *> EmptyParams; - Type *Int32Ty = Type::getInt32Ty(FunTy->getContext()); - FunctionType *VoidFnTy = FunctionType::get( - Type::getVoidTy(FunTy->getContext()), EmptyParams, false); - switch (JTT) { - case JumpTable::Single: - - return FunctionType::get(RetTy, EmptyParams, false); - case JumpTable::Arity: - // Transform all types to void* so that all functions with the same arity - // end up in the same table. - for (PI = FunTy->param_begin(), PE = FunTy->param_end(); PI != PE; - PI++, i++) { - ParamTys[i] = VoidPtrTy; - } - - return FunctionType::get(RetTy, ParamTys, IsVarArg); - case JumpTable::Simplified: - // Project all parameters types to one of 3 types: composite, integer, and - // function, matching the three subclasses of Type. - for (PI = FunTy->param_begin(), PE = FunTy->param_end(); PI != PE; - ++PI, ++i) { - assert((isa<IntegerType>(*PI) || isa<FunctionType>(*PI) || - isa<CompositeType>(*PI)) && - "This type is not an Integer or a Composite or a Function"); - if (isa<CompositeType>(*PI)) { - ParamTys[i] = VoidPtrTy; - } else if (isa<FunctionType>(*PI)) { - ParamTys[i] = VoidFnTy; - } else if (isa<IntegerType>(*PI)) { - ParamTys[i] = Int32Ty; - } - } - - return FunctionType::get(RetTy, ParamTys, IsVarArg); - case JumpTable::Full: - // Don't transform this type at all. - return FunTy; - } - - return nullptr; -} - -bool JumpInstrTables::runOnModule(Module &M) { - JITI = &getAnalysis<JumpInstrTableInfo>(); - - // Get the set of jumptable-annotated functions that have their address taken. - DenseMap<Function *, Function *> Functions; - for (Function &F : M) { - if (F.hasFnAttribute(Attribute::JumpTable) && F.hasAddressTaken()) { - assert(F.hasUnnamedAddr() && - "Attribute 'jumptable' requires 'unnamed_addr'"); - Functions[&F] = nullptr; - } - } - - // Create the jump-table functions. - for (auto &KV : Functions) { - Function *F = KV.first; - KV.second = insertEntry(M, F); - } - - // GlobalAlias is a special case, because the target of an alias statement - // must be a defined function. So, instead of replacing a given function in - // the alias, we replace all uses of aliases that target jumptable functions. - // Note that there's no need to create these functions, since only aliases - // that target known jumptable functions are replaced, and there's no way to - // put the jumptable annotation on a global alias. - DenseMap<GlobalAlias *, Function *> Aliases; - for (GlobalAlias &GA : M.aliases()) { - Constant *Aliasee = GA.getAliasee(); - if (Function *F = dyn_cast<Function>(Aliasee)) { - auto it = Functions.find(F); - if (it != Functions.end()) { - Aliases[&GA] = it->second; - } - } - } - - // Replace each address taken function with its jump-instruction table entry. - for (auto &KV : Functions) - replaceValueWithFunction(KV.first, KV.second); - - for (auto &KV : Aliases) - replaceValueWithFunction(KV.first, KV.second); - - return !Functions.empty(); -} diff --git a/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp index 9018314..610c9f4 100644 --- a/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp +++ b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp @@ -12,32 +12,26 @@ //===----------------------------------------------------------------------===// #include "llvm/Target/TargetMachine.h" -#include "llvm/Analysis/JumpInstrTableInfo.h" #include "llvm/Analysis/Passes.h" #include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/ForwardControlFlowIntegrity.h" -#include "llvm/CodeGen/JumpInstrTables.h" +#include "llvm/CodeGen/BasicTTIImpl.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/IR/IRPrintingPasses.h" +#include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Verifier.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/PassManager.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/TargetRegistry.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Transforms/Scalar.h" using namespace llvm; @@ -49,8 +43,16 @@ EnableFastISelOption("fast-isel", cl::Hidden, cl::desc("Enable the \"fast\" instruction selector")); void LLVMTargetMachine::initAsmInfo() { - MCAsmInfo *TmpAsmInfo = TheTarget.createMCAsmInfo( - *getSubtargetImpl()->getRegisterInfo(), getTargetTriple()); + MRI = TheTarget.createMCRegInfo(getTargetTriple()); + MII = TheTarget.createMCInstrInfo(); + // FIXME: Having an MCSubtargetInfo on the target machine is a hack due + // to some backends having subtarget feature dependent module level + // code generation. This is similar to the hack in the AsmPrinter for + // module level assembly etc. + STI = TheTarget.createMCSubtargetInfo(getTargetTriple(), getTargetCPU(), + getTargetFeatureString()); + + MCAsmInfo *TmpAsmInfo = TheTarget.createMCAsmInfo(*MRI, getTargetTriple()); // TargetSelect.h moved to a different directory between LLVM 2.9 and 3.0, // and if the old one gets included then MCAsmInfo will be NULL and // we'll crash later. @@ -68,17 +70,20 @@ void LLVMTargetMachine::initAsmInfo() { AsmInfo = TmpAsmInfo; } -LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple, - StringRef CPU, StringRef FS, - TargetOptions Options, +LLVMTargetMachine::LLVMTargetMachine(const Target &T, + StringRef DataLayoutString, + StringRef Triple, StringRef CPU, + StringRef FS, TargetOptions Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : TargetMachine(T, Triple, CPU, FS, Options) { + : TargetMachine(T, DataLayoutString, Triple, CPU, FS, Options) { CodeGenInfo = T.createMCCodeGenInfo(Triple, RM, CM, OL); } -void LLVMTargetMachine::addAnalysisPasses(PassManagerBase &PM) { - PM.add(createBasicTargetTransformInfoPass(this)); +TargetIRAnalysis LLVMTargetMachine::getTargetIRAnalysis() { + return TargetIRAnalysis([this](Function &F) { + return TargetTransformInfo(BasicTTIImpl(this, F)); + }); } /// addPassesToX helper drives creation and initialization of TargetPassConfig. @@ -89,7 +94,7 @@ static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM, AnalysisID StopAfter) { // Add internal analysis passes from the target machine. - TM->addAnalysisPasses(PM); + PM.add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis())); // Targets may override createPassConfig to provide a target-specific // subclass. @@ -112,8 +117,7 @@ static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM, // Install a MachineModuleInfo class, which is an immutable pass that holds // all the per-module stuff we're generating, including MCContext. MachineModuleInfo *MMI = new MachineModuleInfo( - *TM->getMCAsmInfo(), *TM->getSubtargetImpl()->getRegisterInfo(), - &TM->getSubtargetImpl()->getTargetLowering()->getObjFileLowering()); + *TM->getMCAsmInfo(), *TM->getMCRegisterInfo(), TM->getObjFileLowering()); PM.add(MMI); // Set up a MachineFunction for the rest of CodeGen to work on. @@ -136,22 +140,9 @@ static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM, return &MMI->getContext(); } -bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, - formatted_raw_ostream &Out, - CodeGenFileType FileType, - bool DisableVerify, - AnalysisID StartAfter, - AnalysisID StopAfter) { - // Passes to handle jumptable function annotations. These can't be handled at - // JIT time, so we don't add them directly to addPassesToGenerateCode. - PM.add(createJumpInstrTableInfoPass( - getSubtargetImpl()->getInstrInfo()->getJumpInstrTableEntryBound())); - PM.add(createJumpInstrTablesPass(Options.JTType)); - if (Options.FCFI) - PM.add(createForwardControlFlowIntegrityPass( - Options.JTType, Options.CFIType, Options.CFIEnforcing, - Options.getCFIFuncName())); - +bool LLVMTargetMachine::addPassesToEmitFile( + PassManagerBase &PM, raw_pwrite_stream &Out, CodeGenFileType FileType, + bool DisableVerify, AnalysisID StartAfter, AnalysisID StopAfter) { // Add common CodeGen passes. MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify, StartAfter, StopAfter); @@ -171,27 +162,28 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, if (Options.MCOptions.MCSaveTempLabels) Context->setAllowTemporaryLabels(false); - const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>(); + const MCSubtargetInfo &STI = *getMCSubtargetInfo(); const MCAsmInfo &MAI = *getMCAsmInfo(); - const MCRegisterInfo &MRI = *getSubtargetImpl()->getRegisterInfo(); - const MCInstrInfo &MII = *getSubtargetImpl()->getInstrInfo(); + const MCRegisterInfo &MRI = *getMCRegisterInfo(); + const MCInstrInfo &MII = *getMCInstrInfo(); + std::unique_ptr<MCStreamer> AsmStreamer; switch (FileType) { case CGFT_AssemblyFile: { - MCInstPrinter *InstPrinter = - getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI, - MII, MRI, STI); + MCInstPrinter *InstPrinter = getTarget().createMCInstPrinter( + Triple(getTargetTriple()), MAI.getAssemblerDialect(), MAI, MII, MRI); // Create a code emitter if asked to show the encoding. MCCodeEmitter *MCE = nullptr; if (Options.MCOptions.ShowMCEncoding) - MCE = getTarget().createMCCodeEmitter(MII, MRI, STI, *Context); + MCE = getTarget().createMCCodeEmitter(MII, MRI, *Context); MCAsmBackend *MAB = getTarget().createMCAsmBackend(MRI, getTargetTriple(), TargetCPU); + auto FOut = llvm::make_unique<formatted_raw_ostream>(Out); MCStreamer *S = getTarget().createAsmStreamer( - *Context, Out, Options.MCOptions.AsmVerbose, + *Context, std::move(FOut), Options.MCOptions.AsmVerbose, Options.MCOptions.MCUseDwarfDirectory, InstPrinter, MCE, MAB, Options.MCOptions.ShowMCInst); AsmStreamer.reset(S); @@ -200,17 +192,19 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, case CGFT_ObjectFile: { // Create the code emitter for the target if it exists. If not, .o file // emission fails. - MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(MII, MRI, STI, - *Context); + MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(MII, MRI, *Context); MCAsmBackend *MAB = getTarget().createMCAsmBackend(MRI, getTargetTriple(), TargetCPU); if (!MCE || !MAB) return true; - AsmStreamer.reset( - getTarget() - .createMCObjectStreamer(getTargetTriple(), *Context, *MAB, Out, MCE, - STI, Options.MCOptions.MCRelaxAll)); + // Don't waste memory on names of temp labels. + Context->setUseNamesOnTempLabels(false); + + Triple T(getTargetTriple()); + AsmStreamer.reset(getTarget().createMCObjectStreamer( + T, *Context, *MAB, Out, MCE, STI, Options.MCOptions.MCRelaxAll, + /*DWARFMustBeAtTheEnd*/ true)); break; } case CGFT_Null: @@ -221,13 +215,11 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, } // Create the AsmPrinter, which takes ownership of AsmStreamer if successful. - FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer); + FunctionPass *Printer = + getTarget().createAsmPrinter(*this, std::move(AsmStreamer)); if (!Printer) return true; - // If successful, createAsmPrinter took ownership of AsmStreamer. - AsmStreamer.release(); - PM.add(Printer); return false; @@ -238,9 +230,8 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, /// code is not supported. It fills the MCContext Ctx pointer which can be /// used to build custom MCStreamer. /// -bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, - MCContext *&Ctx, - raw_ostream &Out, +bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx, + raw_pwrite_stream &Out, bool DisableVerify) { // Add common CodeGen passes. Ctx = addPassesToGenerateCode(this, PM, DisableVerify, nullptr, nullptr); @@ -252,29 +243,26 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, // Create the code emitter for the target if it exists. If not, .o file // emission fails. - const MCRegisterInfo &MRI = *getSubtargetImpl()->getRegisterInfo(); - const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>(); - MCCodeEmitter *MCE = getTarget().createMCCodeEmitter( - *getSubtargetImpl()->getInstrInfo(), MRI, STI, *Ctx); + const MCRegisterInfo &MRI = *getMCRegisterInfo(); + MCCodeEmitter *MCE = + getTarget().createMCCodeEmitter(*getMCInstrInfo(), MRI, *Ctx); MCAsmBackend *MAB = getTarget().createMCAsmBackend(MRI, getTargetTriple(), TargetCPU); if (!MCE || !MAB) return true; - std::unique_ptr<MCStreamer> AsmStreamer; - AsmStreamer.reset(getTarget() - .createMCObjectStreamer(getTargetTriple(), *Ctx, *MAB, - Out, MCE, STI, - Options.MCOptions.MCRelaxAll)); + Triple T(getTargetTriple()); + const MCSubtargetInfo &STI = *getMCSubtargetInfo(); + std::unique_ptr<MCStreamer> AsmStreamer(getTarget().createMCObjectStreamer( + T, *Ctx, *MAB, Out, MCE, STI, Options.MCOptions.MCRelaxAll, + /*DWARFMustBeAtTheEnd*/ true)); // Create the AsmPrinter, which takes ownership of AsmStreamer if successful. - FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer); + FunctionPass *Printer = + getTarget().createAsmPrinter(*this, std::move(AsmStreamer)); if (!Printer) return true; - // If successful, createAsmPrinter took ownership of AsmStreamer. - AsmStreamer.release(); - PM.add(Printer); return false; // success! diff --git a/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp b/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp index cdf505e..4321849 100644 --- a/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp +++ b/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp @@ -138,16 +138,3 @@ void LatencyPriorityQueue::remove(SUnit *SU) { std::swap(*I, Queue.back()); Queue.pop_back(); } - -#ifdef NDEBUG -void LatencyPriorityQueue::dump(ScheduleDAG *DAG) const {} -#else -void LatencyPriorityQueue::dump(ScheduleDAG *DAG) const { - LatencyPriorityQueue q = *this; - while (!q.empty()) { - SUnit *su = q.pop(); - dbgs() << "Height " << su->getHeight() << ": "; - su->dump(DAG); - } -} -#endif diff --git a/contrib/llvm/lib/CodeGen/LexicalScopes.cpp b/contrib/llvm/lib/CodeGen/LexicalScopes.cpp index b621e3b..be61a20 100644 --- a/contrib/llvm/lib/CodeGen/LexicalScopes.cpp +++ b/contrib/llvm/lib/CodeGen/LexicalScopes.cpp @@ -59,11 +59,11 @@ void LexicalScopes::extractLexicalScopes( for (const auto &MBB : *MF) { const MachineInstr *RangeBeginMI = nullptr; const MachineInstr *PrevMI = nullptr; - DebugLoc PrevDL; + const DILocation *PrevDL = nullptr; for (const auto &MInsn : MBB) { // Check if instruction has valid location information. - const DebugLoc MIDL = MInsn.getDebugLoc(); - if (MIDL.isUnknown()) { + const DILocation *MIDL = MInsn.getDebugLoc(); + if (!MIDL) { PrevMI = &MInsn; continue; } @@ -96,7 +96,7 @@ void LexicalScopes::extractLexicalScopes( } // Create last instruction range. - if (RangeBeginMI && PrevMI && !PrevDL.isUnknown()) { + if (RangeBeginMI && PrevMI && PrevDL) { InsnRange R(RangeBeginMI, PrevMI); MIRanges.push_back(R); MI2ScopeMap[RangeBeginMI] = getOrCreateLexicalScope(PrevDL); @@ -104,30 +104,19 @@ void LexicalScopes::extractLexicalScopes( } } -LexicalScope *LexicalScopes::findInlinedScope(DebugLoc DL) { - MDNode *Scope = nullptr; - MDNode *IA = nullptr; - DL.getScopeAndInlinedAt(Scope, IA, MF->getFunction()->getContext()); - auto I = InlinedLexicalScopeMap.find(std::make_pair(Scope, IA)); - return I != InlinedLexicalScopeMap.end() ? &I->second : nullptr; -} - /// findLexicalScope - Find lexical scope, either regular or inlined, for the /// given DebugLoc. Return NULL if not found. -LexicalScope *LexicalScopes::findLexicalScope(DebugLoc DL) { - MDNode *Scope = nullptr; - MDNode *IA = nullptr; - DL.getScopeAndInlinedAt(Scope, IA, MF->getFunction()->getContext()); +LexicalScope *LexicalScopes::findLexicalScope(const DILocation *DL) { + DILocalScope *Scope = DL->getScope(); if (!Scope) return nullptr; // The scope that we were created with could have an extra file - which // isn't what we care about in this case. - DIDescriptor D = DIDescriptor(Scope); - if (D.isLexicalBlockFile()) - Scope = DILexicalBlockFile(Scope).getScope(); + if (auto *File = dyn_cast<DILexicalBlockFile>(Scope)) + Scope = File->getScope(); - if (IA) { + if (auto *IA = DL->getInlinedAt()) { auto I = InlinedLexicalScopeMap.find(std::make_pair(Scope, IA)); return I != InlinedLexicalScopeMap.end() ? &I->second : nullptr; } @@ -136,47 +125,39 @@ LexicalScope *LexicalScopes::findLexicalScope(DebugLoc DL) { /// getOrCreateLexicalScope - Find lexical scope for the given DebugLoc. If /// not available then create new lexical scope. -LexicalScope *LexicalScopes::getOrCreateLexicalScope(DebugLoc DL) { - if (DL.isUnknown()) - return nullptr; - MDNode *Scope = nullptr; - MDNode *InlinedAt = nullptr; - DL.getScopeAndInlinedAt(Scope, InlinedAt, MF->getFunction()->getContext()); - - if (InlinedAt) { +LexicalScope *LexicalScopes::getOrCreateLexicalScope(const DILocalScope *Scope, + const DILocation *IA) { + if (IA) { // Create an abstract scope for inlined function. getOrCreateAbstractScope(Scope); // Create an inlined scope for inlined function. - return getOrCreateInlinedScope(Scope, InlinedAt); + return getOrCreateInlinedScope(Scope, IA); } return getOrCreateRegularScope(Scope); } /// getOrCreateRegularScope - Find or create a regular lexical scope. -LexicalScope *LexicalScopes::getOrCreateRegularScope(MDNode *Scope) { - DIDescriptor D = DIDescriptor(Scope); - if (D.isLexicalBlockFile()) { - Scope = DILexicalBlockFile(Scope).getScope(); - D = DIDescriptor(Scope); - } +LexicalScope * +LexicalScopes::getOrCreateRegularScope(const DILocalScope *Scope) { + if (auto *File = dyn_cast<DILexicalBlockFile>(Scope)) + Scope = File->getScope(); auto I = LexicalScopeMap.find(Scope); if (I != LexicalScopeMap.end()) return &I->second; + // FIXME: Should the following dyn_cast be DILexicalBlock? LexicalScope *Parent = nullptr; - if (D.isLexicalBlock()) - Parent = getOrCreateLexicalScope(DebugLoc::getFromDILexicalBlock(Scope)); - // FIXME: Use forward_as_tuple instead of make_tuple, once MSVC2012 - // compatibility is no longer required. - I = LexicalScopeMap.emplace(std::piecewise_construct, std::make_tuple(Scope), - std::make_tuple(Parent, DIDescriptor(Scope), - nullptr, false)).first; + if (auto *Block = dyn_cast<DILexicalBlockBase>(Scope)) + Parent = getOrCreateLexicalScope(Block->getScope()); + I = LexicalScopeMap.emplace(std::piecewise_construct, + std::forward_as_tuple(Scope), + std::forward_as_tuple(Parent, Scope, nullptr, + false)).first; if (!Parent) { - assert(DIDescriptor(Scope).isSubprogram()); - assert(DISubprogram(Scope).describes(MF->getFunction())); + assert(cast<DISubprogram>(Scope)->describes(MF->getFunction())); assert(!CurrentFnLexicalScope); CurrentFnLexicalScope = &I->second; } @@ -185,51 +166,49 @@ LexicalScope *LexicalScopes::getOrCreateRegularScope(MDNode *Scope) { } /// getOrCreateInlinedScope - Find or create an inlined lexical scope. -LexicalScope *LexicalScopes::getOrCreateInlinedScope(MDNode *ScopeNode, - MDNode *InlinedAt) { - std::pair<const MDNode*, const MDNode*> P(ScopeNode, InlinedAt); +LexicalScope * +LexicalScopes::getOrCreateInlinedScope(const DILocalScope *Scope, + const DILocation *InlinedAt) { + std::pair<const DILocalScope *, const DILocation *> P(Scope, InlinedAt); auto I = InlinedLexicalScopeMap.find(P); if (I != InlinedLexicalScopeMap.end()) return &I->second; LexicalScope *Parent; - DILexicalBlock Scope(ScopeNode); - if (Scope.isSubprogram()) - Parent = getOrCreateLexicalScope(DebugLoc::getFromDILocation(InlinedAt)); + if (auto *Block = dyn_cast<DILexicalBlockBase>(Scope)) + Parent = getOrCreateInlinedScope(Block->getScope(), InlinedAt); else - Parent = getOrCreateInlinedScope(Scope.getContext(), InlinedAt); + Parent = getOrCreateLexicalScope(InlinedAt); - // FIXME: Use forward_as_tuple instead of make_tuple, once MSVC2012 - // compatibility is no longer required. I = InlinedLexicalScopeMap.emplace(std::piecewise_construct, - std::make_tuple(P), - std::make_tuple(Parent, Scope, InlinedAt, - false)).first; + std::forward_as_tuple(P), + std::forward_as_tuple(Parent, Scope, + InlinedAt, false)) + .first; return &I->second; } /// getOrCreateAbstractScope - Find or create an abstract lexical scope. -LexicalScope *LexicalScopes::getOrCreateAbstractScope(const MDNode *N) { - assert(N && "Invalid Scope encoding!"); +LexicalScope * +LexicalScopes::getOrCreateAbstractScope(const DILocalScope *Scope) { + assert(Scope && "Invalid Scope encoding!"); - DIDescriptor Scope(N); - if (Scope.isLexicalBlockFile()) - Scope = DILexicalBlockFile(Scope).getScope(); + if (auto *File = dyn_cast<DILexicalBlockFile>(Scope)) + Scope = File->getScope(); auto I = AbstractScopeMap.find(Scope); if (I != AbstractScopeMap.end()) return &I->second; + // FIXME: Should the following isa be DILexicalBlock? LexicalScope *Parent = nullptr; - if (Scope.isLexicalBlock()) { - DILexicalBlock DB(Scope); - DIDescriptor ParentDesc = DB.getContext(); - Parent = getOrCreateAbstractScope(ParentDesc); - } + if (auto *Block = dyn_cast<DILexicalBlockBase>(Scope)) + Parent = getOrCreateAbstractScope(Block->getScope()); + I = AbstractScopeMap.emplace(std::piecewise_construct, std::forward_as_tuple(Scope), std::forward_as_tuple(Parent, Scope, nullptr, true)).first; - if (Scope.isSubprogram()) + if (isa<DISubprogram>(Scope)) AbstractScopesList.push_back(&I->second); return &I->second; } @@ -290,7 +269,7 @@ void LexicalScopes::assignInstructionRanges( /// have machine instructions that belong to lexical scope identified by /// DebugLoc. void LexicalScopes::getMachineBasicBlocks( - DebugLoc DL, SmallPtrSetImpl<const MachineBasicBlock *> &MBBs) { + const DILocation *DL, SmallPtrSetImpl<const MachineBasicBlock *> &MBBs) { MBBs.clear(); LexicalScope *Scope = getOrCreateLexicalScope(DL); if (!Scope) @@ -313,7 +292,7 @@ void LexicalScopes::getMachineBasicBlocks( /// dominates - Return true if DebugLoc's lexical scope dominates at least one /// machine instruction's lexical scope in a given machine basic block. -bool LexicalScopes::dominates(DebugLoc DL, MachineBasicBlock *MBB) { +bool LexicalScopes::dominates(const DILocation *DL, MachineBasicBlock *MBB) { LexicalScope *Scope = getOrCreateLexicalScope(DL); if (!Scope) return false; @@ -325,12 +304,10 @@ bool LexicalScopes::dominates(DebugLoc DL, MachineBasicBlock *MBB) { bool Result = false; for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ++I) { - DebugLoc IDL = I->getDebugLoc(); - if (IDL.isUnknown()) - continue; - if (LexicalScope *IScope = getOrCreateLexicalScope(IDL)) - if (Scope->dominates(IScope)) - return true; + if (const DILocation *IDL = I->getDebugLoc()) + if (LexicalScope *IScope = getOrCreateLexicalScope(IDL)) + if (Scope->dominates(IScope)) + return true; } return Result; } diff --git a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp index dc936a3..1571551 100644 --- a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp +++ b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp @@ -36,6 +36,7 @@ #include "llvm/IR/Value.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -157,10 +158,10 @@ public: UserValue *getNext() const { return next; } /// match - Does this UserValue match the parameters? - bool match(const MDNode *Var, const MDNode *Expr, unsigned Offset, - bool indirect) const { - return Var == Variable && Expr == Expression && Offset == offset && - indirect == IsIndirect; + bool match(const MDNode *Var, const MDNode *Expr, const DILocation *IA, + unsigned Offset, bool indirect) const { + return Var == Variable && Expr == Expression && dl->getInlinedAt() == IA && + Offset == offset && indirect == IsIndirect; } /// merge - Merge equivalence classes. @@ -268,15 +269,9 @@ public: void emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS, const TargetInstrInfo &TRI); - /// findDebugLoc - Return DebugLoc used for this DBG_VALUE instruction. A - /// variable may have more than one corresponding DBG_VALUE instructions. - /// Only first one needs DebugLoc to identify variable's lexical scope - /// in source file. - DebugLoc findDebugLoc(); - /// getDebugLoc - Return DebugLoc of this UserValue. DebugLoc getDebugLoc() { return dl;} - void print(raw_ostream&, const TargetMachine*); + void print(raw_ostream &, const TargetRegisterInfo *); }; } // namespace @@ -362,10 +357,47 @@ public: }; } // namespace -void UserValue::print(raw_ostream &OS, const TargetMachine *TM) { - DIVariable DV(Variable); +static void printDebugLoc(DebugLoc DL, raw_ostream &CommentOS, + const LLVMContext &Ctx) { + if (!DL) + return; + + auto *Scope = cast<DIScope>(DL.getScope()); + // Omit the directory, because it's likely to be long and uninteresting. + CommentOS << Scope->getFilename(); + CommentOS << ':' << DL.getLine(); + if (DL.getCol() != 0) + CommentOS << ':' << DL.getCol(); + + DebugLoc InlinedAtDL = DL.getInlinedAt(); + if (!InlinedAtDL) + return; + + CommentOS << " @[ "; + printDebugLoc(InlinedAtDL, CommentOS, Ctx); + CommentOS << " ]"; +} + +static void printExtendedName(raw_ostream &OS, const DILocalVariable *V, + const DILocation *DL) { + const LLVMContext &Ctx = V->getContext(); + StringRef Res = V->getName(); + if (!Res.empty()) + OS << Res << "," << V->getLine(); + if (auto *InlinedAt = DL->getInlinedAt()) { + if (DebugLoc InlinedAtDL = InlinedAt) { + OS << " @["; + printDebugLoc(InlinedAtDL, OS, Ctx); + OS << "]"; + } + } +} + +void UserValue::print(raw_ostream &OS, const TargetRegisterInfo *TRI) { + auto *DV = cast<DILocalVariable>(Variable); OS << "!\""; - DV.printExtendedName(OS); + printExtendedName(OS, DV, dl); + OS << "\"\t"; if (offset) OS << '+' << offset; @@ -378,7 +410,7 @@ void UserValue::print(raw_ostream &OS, const TargetMachine *TM) { } for (unsigned i = 0, e = locations.size(); i != e; ++i) { OS << " Loc" << i << '='; - locations[i].print(OS, TM); + locations[i].print(OS, TRI); } OS << '\n'; } @@ -386,7 +418,7 @@ void UserValue::print(raw_ostream &OS, const TargetMachine *TM) { void LDVImpl::print(raw_ostream &OS) { OS << "********** DEBUG VARIABLES **********\n"; for (unsigned i = 0, e = userValues.size(); i != e; ++i) - userValues[i]->print(OS, &MF->getTarget()); + userValues[i]->print(OS, TRI); } void UserValue::coalesceLocation(unsigned LocNo) { @@ -432,7 +464,7 @@ UserValue *LDVImpl::getUserValue(const MDNode *Var, const MDNode *Expr, UserValue *UV = Leader->getLeader(); Leader = UV; for (; UV; UV = UV->getNext()) - if (UV->match(Var, Expr, Offset, IsIndirect)) + if (UV->match(Var, Expr, DL->getInlinedAt(), Offset, IsIndirect)) return UV; } @@ -941,11 +973,6 @@ findInsertLocation(MachineBasicBlock *MBB, SlotIndex Idx, std::next(MachineBasicBlock::iterator(MI)); } -DebugLoc UserValue::findDebugLoc() { - DebugLoc D = dl; - dl = DebugLoc(); - return D; -} void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx, unsigned LocNo, LiveIntervals &LIS, @@ -954,11 +981,14 @@ void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx, MachineOperand &Loc = locations[LocNo]; ++NumInsertedDebugValues; + assert(cast<DILocalVariable>(Variable) + ->isValidLocationForIntrinsic(getDebugLoc()) && + "Expected inlined-at fields to agree"); if (Loc.isReg()) - BuildMI(*MBB, I, findDebugLoc(), TII.get(TargetOpcode::DBG_VALUE), + BuildMI(*MBB, I, getDebugLoc(), TII.get(TargetOpcode::DBG_VALUE), IsIndirect, Loc.getReg(), offset, Variable, Expression); else - BuildMI(*MBB, I, findDebugLoc(), TII.get(TargetOpcode::DBG_VALUE)) + BuildMI(*MBB, I, getDebugLoc(), TII.get(TargetOpcode::DBG_VALUE)) .addOperand(Loc) .addImm(offset) .addMetadata(Variable) @@ -1004,7 +1034,7 @@ void LDVImpl::emitDebugValues(VirtRegMap *VRM) { return; const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); for (unsigned i = 0, e = userValues.size(); i != e; ++i) { - DEBUG(userValues[i]->print(dbgs(), &MF->getTarget())); + DEBUG(userValues[i]->print(dbgs(), TRI)); userValues[i]->rewriteLocations(*VRM, *TRI); userValues[i]->emitDebugValues(VRM, *LIS, *TII); } diff --git a/contrib/llvm/lib/CodeGen/LiveDebugVariables.h b/contrib/llvm/lib/CodeGen/LiveDebugVariables.h index 9748329..ac2d1a1 100644 --- a/contrib/llvm/lib/CodeGen/LiveDebugVariables.h +++ b/contrib/llvm/lib/CodeGen/LiveDebugVariables.h @@ -33,12 +33,13 @@ class VirtRegMap; class LiveDebugVariables : public MachineFunctionPass { void *pImpl; - DenseMap<const Function*, DISubprogram> FunctionDIs; + DenseMap<const Function *, DISubprogram *> FunctionDIs; + public: static char ID; // Pass identification, replacement for typeid LiveDebugVariables(); - ~LiveDebugVariables(); + ~LiveDebugVariables() override; /// renameRegister - Move any user variables in OldReg to NewReg:SubIdx. /// @param OldReg Old virtual register that is going away. diff --git a/contrib/llvm/lib/CodeGen/LiveInterval.cpp b/contrib/llvm/lib/CodeGen/LiveInterval.cpp index 9423edc..d75e441 100644 --- a/contrib/llvm/lib/CodeGen/LiveInterval.cpp +++ b/contrib/llvm/lib/CodeGen/LiveInterval.cpp @@ -32,6 +32,274 @@ #include <algorithm> using namespace llvm; +namespace { +//===----------------------------------------------------------------------===// +// Implementation of various methods necessary for calculation of live ranges. +// The implementation of the methods abstracts from the concrete type of the +// segment collection. +// +// Implementation of the class follows the Template design pattern. The base +// class contains generic algorithms that call collection-specific methods, +// which are provided in concrete subclasses. In order to avoid virtual calls +// these methods are provided by means of C++ template instantiation. +// The base class calls the methods of the subclass through method impl(), +// which casts 'this' pointer to the type of the subclass. +// +//===----------------------------------------------------------------------===// + +template <typename ImplT, typename IteratorT, typename CollectionT> +class CalcLiveRangeUtilBase { +protected: + LiveRange *LR; + +protected: + CalcLiveRangeUtilBase(LiveRange *LR) : LR(LR) {} + +public: + typedef LiveRange::Segment Segment; + typedef IteratorT iterator; + + VNInfo *createDeadDef(SlotIndex Def, VNInfo::Allocator &VNInfoAllocator) { + assert(!Def.isDead() && "Cannot define a value at the dead slot"); + + iterator I = impl().find(Def); + if (I == segments().end()) { + VNInfo *VNI = LR->getNextValue(Def, VNInfoAllocator); + impl().insertAtEnd(Segment(Def, Def.getDeadSlot(), VNI)); + return VNI; + } + + Segment *S = segmentAt(I); + if (SlotIndex::isSameInstr(Def, S->start)) { + assert(S->valno->def == S->start && "Inconsistent existing value def"); + + // It is possible to have both normal and early-clobber defs of the same + // register on an instruction. It doesn't make a lot of sense, but it is + // possible to specify in inline assembly. + // + // Just convert everything to early-clobber. + Def = std::min(Def, S->start); + if (Def != S->start) + S->start = S->valno->def = Def; + return S->valno; + } + assert(SlotIndex::isEarlierInstr(Def, S->start) && "Already live at def"); + VNInfo *VNI = LR->getNextValue(Def, VNInfoAllocator); + segments().insert(I, Segment(Def, Def.getDeadSlot(), VNI)); + return VNI; + } + + VNInfo *extendInBlock(SlotIndex StartIdx, SlotIndex Use) { + if (segments().empty()) + return nullptr; + iterator I = + impl().findInsertPos(Segment(Use.getPrevSlot(), Use, nullptr)); + if (I == segments().begin()) + return nullptr; + --I; + if (I->end <= StartIdx) + return nullptr; + if (I->end < Use) + extendSegmentEndTo(I, Use); + return I->valno; + } + + /// This method is used when we want to extend the segment specified + /// by I to end at the specified endpoint. To do this, we should + /// merge and eliminate all segments that this will overlap + /// with. The iterator is not invalidated. + void extendSegmentEndTo(iterator I, SlotIndex NewEnd) { + assert(I != segments().end() && "Not a valid segment!"); + Segment *S = segmentAt(I); + VNInfo *ValNo = I->valno; + + // Search for the first segment that we can't merge with. + iterator MergeTo = std::next(I); + for (; MergeTo != segments().end() && NewEnd >= MergeTo->end; ++MergeTo) + assert(MergeTo->valno == ValNo && "Cannot merge with differing values!"); + + // If NewEnd was in the middle of a segment, make sure to get its endpoint. + S->end = std::max(NewEnd, std::prev(MergeTo)->end); + + // If the newly formed segment now touches the segment after it and if they + // have the same value number, merge the two segments into one segment. + if (MergeTo != segments().end() && MergeTo->start <= I->end && + MergeTo->valno == ValNo) { + S->end = MergeTo->end; + ++MergeTo; + } + + // Erase any dead segments. + segments().erase(std::next(I), MergeTo); + } + + /// This method is used when we want to extend the segment specified + /// by I to start at the specified endpoint. To do this, we should + /// merge and eliminate all segments that this will overlap with. + iterator extendSegmentStartTo(iterator I, SlotIndex NewStart) { + assert(I != segments().end() && "Not a valid segment!"); + Segment *S = segmentAt(I); + VNInfo *ValNo = I->valno; + + // Search for the first segment that we can't merge with. + iterator MergeTo = I; + do { + if (MergeTo == segments().begin()) { + S->start = NewStart; + segments().erase(MergeTo, I); + return I; + } + assert(MergeTo->valno == ValNo && "Cannot merge with differing values!"); + --MergeTo; + } while (NewStart <= MergeTo->start); + + // If we start in the middle of another segment, just delete a range and + // extend that segment. + if (MergeTo->end >= NewStart && MergeTo->valno == ValNo) { + segmentAt(MergeTo)->end = S->end; + } else { + // Otherwise, extend the segment right after. + ++MergeTo; + Segment *MergeToSeg = segmentAt(MergeTo); + MergeToSeg->start = NewStart; + MergeToSeg->end = S->end; + } + + segments().erase(std::next(MergeTo), std::next(I)); + return MergeTo; + } + + iterator addSegment(Segment S) { + SlotIndex Start = S.start, End = S.end; + iterator I = impl().findInsertPos(S); + + // If the inserted segment starts in the middle or right at the end of + // another segment, just extend that segment to contain the segment of S. + if (I != segments().begin()) { + iterator B = std::prev(I); + if (S.valno == B->valno) { + if (B->start <= Start && B->end >= Start) { + extendSegmentEndTo(B, End); + return B; + } + } else { + // Check to make sure that we are not overlapping two live segments with + // different valno's. + assert(B->end <= Start && + "Cannot overlap two segments with differing ValID's" + " (did you def the same reg twice in a MachineInstr?)"); + } + } + + // Otherwise, if this segment ends in the middle of, or right next + // to, another segment, merge it into that segment. + if (I != segments().end()) { + if (S.valno == I->valno) { + if (I->start <= End) { + I = extendSegmentStartTo(I, Start); + + // If S is a complete superset of a segment, we may need to grow its + // endpoint as well. + if (End > I->end) + extendSegmentEndTo(I, End); + return I; + } + } else { + // Check to make sure that we are not overlapping two live segments with + // different valno's. + assert(I->start >= End && + "Cannot overlap two segments with differing ValID's"); + } + } + + // Otherwise, this is just a new segment that doesn't interact with + // anything. + // Insert it. + return segments().insert(I, S); + } + +private: + ImplT &impl() { return *static_cast<ImplT *>(this); } + + CollectionT &segments() { return impl().segmentsColl(); } + + Segment *segmentAt(iterator I) { return const_cast<Segment *>(&(*I)); } +}; + +//===----------------------------------------------------------------------===// +// Instantiation of the methods for calculation of live ranges +// based on a segment vector. +//===----------------------------------------------------------------------===// + +class CalcLiveRangeUtilVector; +typedef CalcLiveRangeUtilBase<CalcLiveRangeUtilVector, LiveRange::iterator, + LiveRange::Segments> CalcLiveRangeUtilVectorBase; + +class CalcLiveRangeUtilVector : public CalcLiveRangeUtilVectorBase { +public: + CalcLiveRangeUtilVector(LiveRange *LR) : CalcLiveRangeUtilVectorBase(LR) {} + +private: + friend CalcLiveRangeUtilVectorBase; + + LiveRange::Segments &segmentsColl() { return LR->segments; } + + void insertAtEnd(const Segment &S) { LR->segments.push_back(S); } + + iterator find(SlotIndex Pos) { return LR->find(Pos); } + + iterator findInsertPos(Segment S) { + return std::upper_bound(LR->begin(), LR->end(), S.start); + } +}; + +//===----------------------------------------------------------------------===// +// Instantiation of the methods for calculation of live ranges +// based on a segment set. +//===----------------------------------------------------------------------===// + +class CalcLiveRangeUtilSet; +typedef CalcLiveRangeUtilBase<CalcLiveRangeUtilSet, + LiveRange::SegmentSet::iterator, + LiveRange::SegmentSet> CalcLiveRangeUtilSetBase; + +class CalcLiveRangeUtilSet : public CalcLiveRangeUtilSetBase { +public: + CalcLiveRangeUtilSet(LiveRange *LR) : CalcLiveRangeUtilSetBase(LR) {} + +private: + friend CalcLiveRangeUtilSetBase; + + LiveRange::SegmentSet &segmentsColl() { return *LR->segmentSet; } + + void insertAtEnd(const Segment &S) { + LR->segmentSet->insert(LR->segmentSet->end(), S); + } + + iterator find(SlotIndex Pos) { + iterator I = + LR->segmentSet->upper_bound(Segment(Pos, Pos.getNextSlot(), nullptr)); + if (I == LR->segmentSet->begin()) + return I; + iterator PrevI = std::prev(I); + if (Pos < (*PrevI).end) + return PrevI; + return I; + } + + iterator findInsertPos(Segment S) { + iterator I = LR->segmentSet->upper_bound(S); + if (I != LR->segmentSet->end() && !(S.start < *I)) + ++I; + return I; + } +}; +} // namespace + +//===----------------------------------------------------------------------===// +// LiveRange methods +//===----------------------------------------------------------------------===// + LiveRange::iterator LiveRange::find(SlotIndex Pos) { // This algorithm is basically std::upper_bound. // Unfortunately, std::upper_bound cannot be used with mixed types until we @@ -52,30 +320,11 @@ LiveRange::iterator LiveRange::find(SlotIndex Pos) { VNInfo *LiveRange::createDeadDef(SlotIndex Def, VNInfo::Allocator &VNInfoAllocator) { - assert(!Def.isDead() && "Cannot define a value at the dead slot"); - iterator I = find(Def); - if (I == end()) { - VNInfo *VNI = getNextValue(Def, VNInfoAllocator); - segments.push_back(Segment(Def, Def.getDeadSlot(), VNI)); - return VNI; - } - if (SlotIndex::isSameInstr(Def, I->start)) { - assert(I->valno->def == I->start && "Inconsistent existing value def"); - - // It is possible to have both normal and early-clobber defs of the same - // register on an instruction. It doesn't make a lot of sense, but it is - // possible to specify in inline assembly. - // - // Just convert everything to early-clobber. - Def = std::min(Def, I->start); - if (Def != I->start) - I->start = I->valno->def = Def; - return I->valno; - } - assert(SlotIndex::isEarlierInstr(Def, I->start) && "Already live at def"); - VNInfo *VNI = getNextValue(Def, VNInfoAllocator); - segments.insert(I, Segment(Def, Def.getDeadSlot(), VNI)); - return VNI; + // Use the segment set, if it is available. + if (segmentSet != nullptr) + return CalcLiveRangeUtilSet(this).createDeadDef(Def, VNInfoAllocator); + // Otherwise use the segment vector. + return CalcLiveRangeUtilVector(this).createDeadDef(Def, VNInfoAllocator); } // overlaps - Return true if the intersection of the two live ranges is @@ -236,68 +485,18 @@ void LiveRange::RenumberValues() { } } -/// This method is used when we want to extend the segment specified by I to end -/// at the specified endpoint. To do this, we should merge and eliminate all -/// segments that this will overlap with. The iterator is not invalidated. -void LiveRange::extendSegmentEndTo(iterator I, SlotIndex NewEnd) { - assert(I != end() && "Not a valid segment!"); - VNInfo *ValNo = I->valno; - - // Search for the first segment that we can't merge with. - iterator MergeTo = std::next(I); - for (; MergeTo != end() && NewEnd >= MergeTo->end; ++MergeTo) { - assert(MergeTo->valno == ValNo && "Cannot merge with differing values!"); - } - - // If NewEnd was in the middle of a segment, make sure to get its endpoint. - I->end = std::max(NewEnd, std::prev(MergeTo)->end); - - // If the newly formed segment now touches the segment after it and if they - // have the same value number, merge the two segments into one segment. - if (MergeTo != end() && MergeTo->start <= I->end && - MergeTo->valno == ValNo) { - I->end = MergeTo->end; - ++MergeTo; - } - - // Erase any dead segments. - segments.erase(std::next(I), MergeTo); +void LiveRange::addSegmentToSet(Segment S) { + CalcLiveRangeUtilSet(this).addSegment(S); } - -/// This method is used when we want to extend the segment specified by I to -/// start at the specified endpoint. To do this, we should merge and eliminate -/// all segments that this will overlap with. -LiveRange::iterator -LiveRange::extendSegmentStartTo(iterator I, SlotIndex NewStart) { - assert(I != end() && "Not a valid segment!"); - VNInfo *ValNo = I->valno; - - // Search for the first segment that we can't merge with. - iterator MergeTo = I; - do { - if (MergeTo == begin()) { - I->start = NewStart; - segments.erase(MergeTo, I); - return I; - } - assert(MergeTo->valno == ValNo && "Cannot merge with differing values!"); - --MergeTo; - } while (NewStart <= MergeTo->start); - - // If we start in the middle of another segment, just delete a range and - // extend that segment. - if (MergeTo->end >= NewStart && MergeTo->valno == ValNo) { - MergeTo->end = I->end; - } else { - // Otherwise, extend the segment right after. - ++MergeTo; - MergeTo->start = NewStart; - MergeTo->end = I->end; +LiveRange::iterator LiveRange::addSegment(Segment S) { + // Use the segment set, if it is available. + if (segmentSet != nullptr) { + addSegmentToSet(S); + return end(); } - - segments.erase(std::next(MergeTo), std::next(I)); - return MergeTo; + // Otherwise use the segment vector. + return CalcLiveRangeUtilVector(this).addSegment(S); } void LiveRange::append(const Segment S) { @@ -306,69 +505,15 @@ void LiveRange::append(const Segment S) { segments.push_back(S); } -LiveRange::iterator LiveRange::addSegmentFrom(Segment S, iterator From) { - SlotIndex Start = S.start, End = S.end; - iterator it = std::upper_bound(From, end(), Start); - - // If the inserted segment starts in the middle or right at the end of - // another segment, just extend that segment to contain the segment of S. - if (it != begin()) { - iterator B = std::prev(it); - if (S.valno == B->valno) { - if (B->start <= Start && B->end >= Start) { - extendSegmentEndTo(B, End); - return B; - } - } else { - // Check to make sure that we are not overlapping two live segments with - // different valno's. - assert(B->end <= Start && - "Cannot overlap two segments with differing ValID's" - " (did you def the same reg twice in a MachineInstr?)"); - } - } - - // Otherwise, if this segment ends in the middle of, or right next to, another - // segment, merge it into that segment. - if (it != end()) { - if (S.valno == it->valno) { - if (it->start <= End) { - it = extendSegmentStartTo(it, Start); - - // If S is a complete superset of a segment, we may need to grow its - // endpoint as well. - if (End > it->end) - extendSegmentEndTo(it, End); - return it; - } - } else { - // Check to make sure that we are not overlapping two live segments with - // different valno's. - assert(it->start >= End && - "Cannot overlap two segments with differing ValID's"); - } - } - - // Otherwise, this is just a new segment that doesn't interact with anything. - // Insert it. - return segments.insert(it, S); -} - /// extendInBlock - If this range is live before Kill in the basic /// block that starts at StartIdx, extend it to be live up to Kill and return /// the value. If there is no live range before Kill, return NULL. VNInfo *LiveRange::extendInBlock(SlotIndex StartIdx, SlotIndex Kill) { - if (empty()) - return nullptr; - iterator I = std::upper_bound(begin(), end(), Kill.getPrevSlot()); - if (I == begin()) - return nullptr; - --I; - if (I->end <= StartIdx) - return nullptr; - if (I->end < Kill) - extendSegmentEndTo(I, Kill); - return I->valno; + // Use the segment set, if it is available. + if (segmentSet != nullptr) + return CalcLiveRangeUtilSet(this).extendInBlock(StartIdx, Kill); + // Otherwise use the segment vector. + return CalcLiveRangeUtilVector(this).extendInBlock(StartIdx, Kill); } /// Remove the specified segment from this range. Note that the segment must @@ -424,13 +569,9 @@ void LiveRange::removeSegment(SlotIndex Start, SlotIndex End, /// Also remove the value# from value# list. void LiveRange::removeValNo(VNInfo *ValNo) { if (empty()) return; - iterator I = end(); - iterator E = begin(); - do { - --I; - if (I->valno == ValNo) - segments.erase(I); - } while (I != E); + segments.erase(std::remove_if(begin(), end(), [ValNo](const Segment &S) { + return S.valno == ValNo; + }), end()); // Now that ValNo is dead, remove it. markValNoForDeletion(ValNo); } @@ -598,6 +739,21 @@ VNInfo *LiveRange::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) { return V2; } +void LiveRange::flushSegmentSet() { + assert(segmentSet != nullptr && "segment set must have been created"); + assert( + segments.empty() && + "segment set can be used only initially before switching to the array"); + segments.append(segmentSet->begin(), segmentSet->end()); + segmentSet = nullptr; + verify(); +} + +void LiveInterval::freeSubRange(SubRange *S) { + S->~SubRange(); + // Memory was allocated with BumpPtr allocator and is not freed here. +} + void LiveInterval::removeEmptySubRanges() { SubRange **NextPtr = &SubRanges; SubRange *I = *NextPtr; @@ -609,12 +765,22 @@ void LiveInterval::removeEmptySubRanges() { } // Skip empty subranges until we find the first nonempty one. do { - I = I->Next; + SubRange *Next = I->Next; + freeSubRange(I); + I = Next; } while (I != nullptr && I->empty()); *NextPtr = I; } } +void LiveInterval::clearSubRanges() { + for (SubRange *I = SubRanges, *Next; I != nullptr; I = Next) { + Next = I->Next; + freeSubRange(I); + } + SubRanges = nullptr; +} + /// Helper function for constructMainRangeFromSubranges(): Search the CFG /// backwards until we find a place covered by a LiveRange segment that actually /// has a valno set. @@ -650,23 +816,45 @@ static VNInfo *searchForVNI(const SlotIndexes &Indexes, LiveRange &LR, static void determineMissingVNIs(const SlotIndexes &Indexes, LiveInterval &LI) { SmallPtrSet<const MachineBasicBlock*, 5> Visited; - for (LiveRange::Segment &S : LI.segments) { - if (S.valno != nullptr) - continue; - // This can only happen at the begin of a basic block. - assert(S.start.isBlock() && "valno should only be missing at block begin"); - - Visited.clear(); - const MachineBasicBlock *MBB = Indexes.getMBBFromIndex(S.start); - for (const MachineBasicBlock *Pred : MBB->predecessors()) { - VNInfo *VNI = searchForVNI(Indexes, LI, Pred, Visited); - if (VNI != nullptr) { - S.valno = VNI; - break; + + LiveRange::iterator OutIt; + VNInfo *PrevValNo = nullptr; + for (LiveRange::iterator I = LI.begin(), E = LI.end(); I != E; ++I) { + LiveRange::Segment &S = *I; + // Determine final VNI if necessary. + if (S.valno == nullptr) { + // This can only happen at the begin of a basic block. + assert(S.start.isBlock() && "valno should only be missing at block begin"); + + Visited.clear(); + const MachineBasicBlock *MBB = Indexes.getMBBFromIndex(S.start); + for (const MachineBasicBlock *Pred : MBB->predecessors()) { + VNInfo *VNI = searchForVNI(Indexes, LI, Pred, Visited); + if (VNI != nullptr) { + S.valno = VNI; + break; + } } + assert(S.valno != nullptr && "could not determine valno"); + } + // Merge with previous segment if it has the same VNI. + if (PrevValNo == S.valno && OutIt->end == S.start) { + OutIt->end = S.end; + } else { + // Didn't merge. Move OutIt to next segment. + if (PrevValNo == nullptr) + OutIt = LI.begin(); + else + ++OutIt; + + if (OutIt != I) + *OutIt = *I; + PrevValNo = S.valno; } - assert(S.valno != nullptr && "could not determine valno"); } + // If we merged some segments chop off the end. + ++OutIt; + LI.segments.erase(OutIt, LI.end()); } void LiveInterval::constructMainRangeFromSubranges( @@ -789,6 +977,12 @@ void LiveInterval::constructMainRangeFromSubranges( NeedVNIFixup = true; } + // In rare cases we can produce adjacent segments with the same value + // number (if they come from different subranges, but happen to have + // the same defining instruction). VNIFixup will fix those cases. + if (!empty() && segments.back().end == Pos && + segments.back().valno == VNI) + NeedVNIFixup = true; CurrentSegment.start = Pos; CurrentSegment.valno = VNI; ConstructingSegment = true; @@ -997,6 +1191,13 @@ static inline bool coalescable(const LiveRange::Segment &A, void LiveRangeUpdater::add(LiveRange::Segment Seg) { assert(LR && "Cannot add to a null destination"); + // Fall back to the regular add method if the live range + // is using the segment set instead of the segment vector. + if (LR->segmentSet != nullptr) { + LR->addSegmentToSet(Seg); + return; + } + // Flush the state if Start moves backwards. if (!LastStart.isValid() || LastStart > Seg.start) { if (isDirty()) diff --git a/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp index 56f38b6..adca4cc 100644 --- a/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -67,6 +67,13 @@ static cl::opt<bool> EnableSubRegLiveness( "enable-subreg-liveness", cl::Hidden, cl::init(true), cl::desc("Enable subregister liveness tracking.")); +namespace llvm { +cl::opt<bool> UseSegmentSetForPhysRegs( + "use-segment-set-for-physregs", cl::Hidden, cl::init(true), + cl::desc( + "Use segment set for the computation of the live ranges of physregs.")); +} + void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addRequired<AliasAnalysis>(); @@ -192,7 +199,7 @@ void LiveIntervals::computeVirtRegInterval(LiveInterval &LI) { assert(LRCalc && "LRCalc not initialized."); assert(LI.empty() && "Should only compute empty intervals."); LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator()); - LRCalc->calculate(LI); + LRCalc->calculate(LI, MRI->shouldTrackSubRegLiveness(LI.reg)); computeDeadValues(LI, nullptr); } @@ -268,6 +275,10 @@ void LiveIntervals::computeRegUnitRange(LiveRange &LR, unsigned Unit) { LRCalc->extendToUses(LR, Reg); } } + + // Flush the segment set to the segment vector. + if (UseSegmentSetForPhysRegs) + LR.flushSegmentSet(); } @@ -300,7 +311,8 @@ void LiveIntervals::computeLiveInRegUnits() { unsigned Unit = *Units; LiveRange *LR = RegUnitRanges[Unit]; if (!LR) { - LR = RegUnitRanges[Unit] = new LiveRange(); + // Use segment set to speed-up initial computation of the live range. + LR = RegUnitRanges[Unit] = new LiveRange(UseSegmentSetForPhysRegs); NewRanges.push_back(Unit); } VNInfo *VNI = LR->createDeadDef(Begin, getVNInfoAllocator()); @@ -448,23 +460,34 @@ bool LiveIntervals::computeDeadValues(LiveInterval &LI, for (auto VNI : LI.valnos) { if (VNI->isUnused()) continue; - LiveRange::iterator I = LI.FindSegmentContaining(VNI->def); + SlotIndex Def = VNI->def; + LiveRange::iterator I = LI.FindSegmentContaining(Def); assert(I != LI.end() && "Missing segment for VNI"); - if (I->end != VNI->def.getDeadSlot()) + + // Is the register live before? Otherwise we may have to add a read-undef + // flag for subregister defs. + if (MRI->shouldTrackSubRegLiveness(LI.reg)) { + if ((I == LI.begin() || std::prev(I)->end < Def) && !VNI->isPHIDef()) { + MachineInstr *MI = getInstructionFromIndex(Def); + MI->addRegisterDefReadUndef(LI.reg); + } + } + + if (I->end != Def.getDeadSlot()) continue; if (VNI->isPHIDef()) { // This is a dead PHI. Remove it. VNI->markUnused(); LI.removeSegment(I); - DEBUG(dbgs() << "Dead PHI at " << VNI->def << " may separate interval\n"); + DEBUG(dbgs() << "Dead PHI at " << Def << " may separate interval\n"); PHIRemoved = true; } else { // This is a dead def. Make sure the instruction knows. - MachineInstr *MI = getInstructionFromIndex(VNI->def); + MachineInstr *MI = getInstructionFromIndex(Def); assert(MI && "No instruction defining live value"); MI->addRegisterDead(LI.reg, TRI); if (dead && MI->allDefsAreDead()) { - DEBUG(dbgs() << "All defs dead: " << VNI->def << '\t' << *MI); + DEBUG(dbgs() << "All defs dead: " << Def << '\t' << *MI); dead->push_back(MI); } } @@ -609,15 +632,6 @@ void LiveIntervals::pruneValue(LiveRange &LR, SlotIndex Kill, } } -void LiveIntervals::pruneValue(LiveInterval &LI, SlotIndex Kill, - SmallVectorImpl<SlotIndex> *EndPoints) { - pruneValue((LiveRange&)LI, Kill, EndPoints); - - for (LiveInterval::SubRange &SR : LI.subranges()) { - pruneValue(SR, Kill, nullptr); - } -} - //===----------------------------------------------------------------------===// // Register allocator hooks. // @@ -648,7 +662,7 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) { RU.push_back(std::make_pair(&RURange, RURange.find(LI.begin()->end))); } - if (MRI->tracksSubRegLiveness()) { + if (MRI->subRegLivenessEnabled()) { SRs.clear(); for (const LiveInterval::SubRange &SR : LI.subranges()) { SRs.push_back(std::make_pair(&SR, SR.find(LI.begin()->end))); @@ -686,7 +700,7 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) { goto CancelKill; } - if (MRI->tracksSubRegLiveness()) { + if (MRI->subRegLivenessEnabled()) { // When reading a partial undefined value we must not add a kill flag. // The regalloc might have used the undef lane for something else. // Example: @@ -1376,3 +1390,25 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB, repairOldRegInRange(Begin, End, endIdx, LI, Reg); } } + +void LiveIntervals::removePhysRegDefAt(unsigned Reg, SlotIndex Pos) { + for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) { + if (LiveRange *LR = getCachedRegUnit(*Units)) + if (VNInfo *VNI = LR->getVNInfoAt(Pos)) + LR->removeValNo(VNI); + } +} + +void LiveIntervals::removeVRegDefAt(LiveInterval &LI, SlotIndex Pos) { + VNInfo *VNI = LI.getVNInfoAt(Pos); + if (VNI == nullptr) + return; + LI.removeValNo(VNI); + + // Also remove the value in subranges. + for (LiveInterval::SubRange &S : LI.subranges()) { + if (VNInfo *SVNI = S.getVNInfoAt(Pos)) + S.removeValNo(SVNI); + } + LI.removeEmptySubRanges(); +} diff --git a/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp b/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp index 7efd941..eef7643 100644 --- a/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp +++ b/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp @@ -16,17 +16,23 @@ #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; /// \brief Remove all registers from the set that get clobbered by the register /// mask. -void LivePhysRegs::removeRegsInMask(const MachineOperand &MO) { +/// The clobbers set will be the list of live registers clobbered +/// by the regmask. +void LivePhysRegs::removeRegsInMask(const MachineOperand &MO, + SmallVectorImpl<std::pair<unsigned, const MachineOperand*>> *Clobbers) { SparseSet<unsigned>::iterator LRI = LiveRegs.begin(); while (LRI != LiveRegs.end()) { - if (MO.clobbersPhysReg(*LRI)) + if (MO.clobbersPhysReg(*LRI)) { + if (Clobbers) + Clobbers->push_back(std::make_pair(*LRI, &MO)); LRI = LiveRegs.erase(LRI); - else + } else ++LRI; } } @@ -44,7 +50,7 @@ void LivePhysRegs::stepBackward(const MachineInstr &MI) { continue; removeReg(Reg); } else if (O->isRegMask()) - removeRegsInMask(*O); + removeRegsInMask(*O, nullptr); } // Add uses to the set. @@ -62,8 +68,8 @@ void LivePhysRegs::stepBackward(const MachineInstr &MI) { /// killed-uses, add defs. This is the not recommended way, because it depends /// on accurate kill flags. If possible use stepBackwards() instead of this /// function. -void LivePhysRegs::stepForward(const MachineInstr &MI) { - SmallVector<unsigned, 4> Defs; +void LivePhysRegs::stepForward(const MachineInstr &MI, + SmallVectorImpl<std::pair<unsigned, const MachineOperand*>> &Clobbers) { // Remove killed registers from the set. for (ConstMIBundleOperands O(&MI); O.isValid(); ++O) { if (O->isReg()) { @@ -71,8 +77,9 @@ void LivePhysRegs::stepForward(const MachineInstr &MI) { if (Reg == 0) continue; if (O->isDef()) { - if (!O->isDead()) - Defs.push_back(Reg); + // Note, dead defs are still recorded. The caller should decide how to + // handle them. + Clobbers.push_back(std::make_pair(Reg, &*O)); } else { if (!O->isKill()) continue; @@ -80,12 +87,16 @@ void LivePhysRegs::stepForward(const MachineInstr &MI) { removeReg(Reg); } } else if (O->isRegMask()) - removeRegsInMask(*O); + removeRegsInMask(*O, &Clobbers); } // Add defs to the set. - for (unsigned i = 0, e = Defs.size(); i != e; ++i) - addReg(Defs[i]); + for (auto Reg : Clobbers) { + // Skip dead defs. They shouldn't be added to the set. + if (Reg.second->isReg() && Reg.second->isDead()) + continue; + addReg(Reg.first); + } } /// Prin the currently live registers to OS. diff --git a/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp b/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp index 1d46161..bb2877a 100644 --- a/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp +++ b/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp @@ -43,17 +43,14 @@ void LiveRangeCalc::reset(const MachineFunction *mf, static void createDeadDef(SlotIndexes &Indexes, VNInfo::Allocator &Alloc, LiveRange &LR, const MachineOperand &MO) { const MachineInstr *MI = MO.getParent(); - SlotIndex DefIdx; - if (MI->isPHI()) - DefIdx = Indexes.getMBBStartIdx(MI->getParent()); - else - DefIdx = Indexes.getInstructionIndex(MI).getRegSlot(MO.isEarlyClobber()); + SlotIndex DefIdx = + Indexes.getInstructionIndex(MI).getRegSlot(MO.isEarlyClobber()); // Create the def in LR. This may find an existing def. LR.createDeadDef(DefIdx, Alloc); } -void LiveRangeCalc::calculate(LiveInterval &LI) { +void LiveRangeCalc::calculate(LiveInterval &LI, bool TrackSubRegs) { assert(MRI && Indexes && "call reset() first"); // Step 1: Create minimal live segments for every definition of Reg. @@ -66,7 +63,7 @@ void LiveRangeCalc::calculate(LiveInterval &LI) { continue; unsigned SubReg = MO.getSubReg(); - if (LI.hasSubRanges() || (SubReg != 0 && MRI->tracksSubRegLiveness())) { + if (LI.hasSubRanges() || (SubReg != 0 && TrackSubRegs)) { unsigned Mask = SubReg != 0 ? TRI.getSubRegIndexLaneMask(SubReg) : MRI->getMaxLaneMaskForVReg(Reg); @@ -222,23 +219,23 @@ void LiveRangeCalc::updateFromLiveIns() { } -void LiveRangeCalc::extend(LiveRange &LR, SlotIndex Kill, unsigned PhysReg) { - assert(Kill.isValid() && "Invalid SlotIndex"); +void LiveRangeCalc::extend(LiveRange &LR, SlotIndex Use, unsigned PhysReg) { + assert(Use.isValid() && "Invalid SlotIndex"); assert(Indexes && "Missing SlotIndexes"); assert(DomTree && "Missing dominator tree"); - MachineBasicBlock *KillMBB = Indexes->getMBBFromIndex(Kill.getPrevSlot()); - assert(KillMBB && "No MBB at Kill"); + MachineBasicBlock *UseMBB = Indexes->getMBBFromIndex(Use.getPrevSlot()); + assert(UseMBB && "No MBB at Use"); // Is there a def in the same MBB we can extend? - if (LR.extendInBlock(Indexes->getMBBStartIdx(KillMBB), Kill)) + if (LR.extendInBlock(Indexes->getMBBStartIdx(UseMBB), Use)) return; - // Find the single reaching def, or determine if Kill is jointly dominated by + // Find the single reaching def, or determine if Use is jointly dominated by // multiple values, and we may need to create even more phi-defs to preserve // VNInfo SSA form. Perform a search for all predecessor blocks where we // know the dominating VNInfo. - if (findReachingDefs(LR, *KillMBB, Kill, PhysReg)) + if (findReachingDefs(LR, *UseMBB, Use, PhysReg)) return; // When there were multiple different values, we may need new PHIs. @@ -257,12 +254,12 @@ void LiveRangeCalc::calculateValues() { } -bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &KillMBB, - SlotIndex Kill, unsigned PhysReg) { - unsigned KillMBBNum = KillMBB.getNumber(); +bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB, + SlotIndex Use, unsigned PhysReg) { + unsigned UseMBBNum = UseMBB.getNumber(); // Block numbers where LR should be live-in. - SmallVector<unsigned, 16> WorkList(1, KillMBBNum); + SmallVector<unsigned, 16> WorkList(1, UseMBBNum); // Remember if we have seen more than one value. bool UniqueVNI = true; @@ -275,13 +272,19 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &KillMBB, #ifndef NDEBUG if (MBB->pred_empty()) { MBB->getParent()->verify(); + errs() << "Use of " << PrintReg(PhysReg) + << " does not have a corresponding definition on every path:\n"; + const MachineInstr *MI = Indexes->getInstructionFromIndex(Use); + if (MI != nullptr) + errs() << Use << " " << *MI; llvm_unreachable("Use not jointly dominated by defs."); } if (TargetRegisterInfo::isPhysicalRegister(PhysReg) && !MBB->isLiveIn(PhysReg)) { MBB->getParent()->verify(); - errs() << "The register needs to be live in to BB#" << MBB->getNumber() + errs() << "The register " << PrintReg(PhysReg) + << " needs to be live in to BB#" << MBB->getNumber() << ", but is missing from the live-in list.\n"; llvm_unreachable("Invalid global physical register"); } @@ -316,11 +319,11 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &KillMBB, } // No, we need a live-in value for Pred as well - if (Pred != &KillMBB) + if (Pred != &UseMBB) WorkList.push_back(Pred->getNumber()); else - // Loopback to KillMBB, so value is really live through. - Kill = SlotIndex(); + // Loopback to UseMBB, so value is really live through. + Use = SlotIndex(); } } @@ -338,9 +341,9 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &KillMBB, E = WorkList.end(); I != E; ++I) { SlotIndex Start, End; std::tie(Start, End) = Indexes->getMBBRange(*I); - // Trim the live range in KillMBB. - if (*I == KillMBBNum && Kill.isValid()) - End = Kill; + // Trim the live range in UseMBB. + if (*I == UseMBBNum && Use.isValid()) + End = Use; else Map[MF->getBlockNumbered(*I)] = LiveOutPair(TheVNI, nullptr); Updater.add(Start, End, TheVNI); @@ -355,8 +358,8 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &KillMBB, I = WorkList.begin(), E = WorkList.end(); I != E; ++I) { MachineBasicBlock *MBB = MF->getBlockNumbered(*I); addLiveInBlock(LR, DomTree->getNode(MBB)); - if (MBB == &KillMBB) - LiveIn.back().Kill = Kill; + if (MBB == &UseMBB) + LiveIn.back().Kill = Use; } return false; diff --git a/contrib/llvm/lib/CodeGen/LiveRangeCalc.h b/contrib/llvm/lib/CodeGen/LiveRangeCalc.h index 1b9099b..34d9953 100644 --- a/contrib/llvm/lib/CodeGen/LiveRangeCalc.h +++ b/contrib/llvm/lib/CodeGen/LiveRangeCalc.h @@ -101,17 +101,17 @@ class LiveRangeCalc { /// used to add entries directly. SmallVector<LiveInBlock, 16> LiveIn; - /// Assuming that LI is live-in to KillMBB and killed at Kill, find the set - /// of defs that can reach it. + /// Assuming that @p LR is live-in to @p UseMBB, find the set of defs that can + /// reach it. /// - /// If only one def can reach Kill, all paths from the def to kill are added - /// to LI, and the function returns true. + /// If only one def can reach @p UseMBB, all paths from the def to @p UseMBB + /// are added to @p LR, and the function returns true. /// - /// If multiple values can reach Kill, the blocks that need LI to be live in - /// are added to the LiveIn array, and the function returns false. + /// If multiple values can reach @p UseMBB, the blocks that need @p LR to be + /// live in are added to the LiveIn array, and the function returns false. /// /// PhysReg, when set, is used to verify live-in lists on basic blocks. - bool findReachingDefs(LiveRange &LR, MachineBasicBlock &KillMBB, + bool findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB, SlotIndex Kill, unsigned PhysReg); /// updateSSA - Compute the values that will be live in to all requested @@ -162,15 +162,14 @@ public: // Modify existing live ranges. // - /// extend - Extend the live range of LI to reach Kill. + /// Extend the live range of @p LR to reach @p Use. /// - /// The existing values in LI must be live so they jointly dominate Kill. If - /// Kill is not dominated by a single existing value, PHI-defs are inserted - /// as required to preserve SSA form. If Kill is known to be dominated by a - /// single existing value, Alloc may be null. + /// The existing values in @p LR must be live so they jointly dominate @p Use. + /// If @p Use is not dominated by a single existing value, PHI-defs are + /// inserted as required to preserve SSA form. /// /// PhysReg, when set, is used to verify live-in lists on basic blocks. - void extend(LiveRange &LR, SlotIndex Kill, unsigned PhysReg = 0); + void extend(LiveRange &LR, SlotIndex Use, unsigned PhysReg = 0); /// createDeadDefs - Create a dead def in LI for every def operand of Reg. /// Each instruction defining Reg gets a new VNInfo with a corresponding @@ -188,7 +187,7 @@ public: /// Calculates liveness for the register specified in live interval @p LI. /// Creates subregister live ranges as needed if subreg liveness tracking is /// enabled. - void calculate(LiveInterval &LI); + void calculate(LiveInterval &LI, bool TrackSubRegs); //===--------------------------------------------------------------------===// // Low-level interface. diff --git a/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp b/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp index 47da205..27c57d5 100644 --- a/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp +++ b/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp @@ -196,7 +196,7 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI, // We also need to make sure it is safe to move the load. // Assume there are stores between DefMI and UseMI. bool SawStore = true; - if (!DefMI->isSafeToMove(&TII, nullptr, SawStore)) + if (!DefMI->isSafeToMove(nullptr, SawStore)) return false; DEBUG(dbgs() << "Try to fold single def: " << *DefMI @@ -235,7 +235,7 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) { // Use the same criteria as DeadMachineInstructionElim. bool SawStore = false; - if (!MI->isSafeToMove(&TII, nullptr, SawStore)) { + if (!MI->isSafeToMove(nullptr, SawStore)) { DEBUG(dbgs() << "Can't delete: " << Idx << '\t' << *MI); return; } @@ -256,15 +256,8 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) { // Check if MI reads any unreserved physregs. if (Reg && MOI->readsReg() && !MRI.isReserved(Reg)) ReadsPhysRegs = true; - else if (MOI->isDef()) { - for (MCRegUnitIterator Units(Reg, MRI.getTargetRegisterInfo()); - Units.isValid(); ++Units) { - if (LiveRange *LR = LIS.getCachedRegUnit(*Units)) { - if (VNInfo *VNI = LR->getVNInfoAt(Idx)) - LR->removeValNo(VNI); - } - } - } + else if (MOI->isDef()) + LIS.removePhysRegDefAt(Reg, Idx); continue; } LiveInterval &LI = LIS.getInterval(Reg); @@ -280,21 +273,11 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) { // Remove defined value. if (MOI->isDef()) { - if (VNInfo *VNI = LI.getVNInfoAt(Idx)) { - if (TheDelegate) - TheDelegate->LRE_WillShrinkVirtReg(LI.reg); - LI.removeValNo(VNI); - if (LI.empty()) { - RegsToErase.push_back(Reg); - } else { - // Also remove the value in subranges. - for (LiveInterval::SubRange &S : LI.subranges()) { - if (VNInfo *SVNI = S.getVNInfoAt(Idx)) - S.removeValNo(SVNI); - } - LI.removeEmptySubRanges(); - } - } + if (TheDelegate && LI.getVNInfoAt(Idx) != nullptr) + TheDelegate->LRE_WillShrinkVirtReg(LI.reg); + LIS.removeVRegDefAt(LI, Idx); + if (LI.empty()) + RegsToErase.push_back(Reg); } } @@ -416,7 +399,7 @@ LiveRangeEdit::calculateRegClassAndHint(MachineFunction &MF, VirtRegAuxInfo VRAI(MF, LIS, Loops, MBFI); for (unsigned I = 0, Size = size(); I < Size; ++I) { LiveInterval &LI = LIS.getInterval(get(I)); - if (MRI.recomputeRegClass(LI.reg, MF.getTarget())) + if (MRI.recomputeRegClass(LI.reg)) DEBUG({ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); dbgs() << "Inflated " << PrintReg(LI.reg) << " to " diff --git a/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp b/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp index 8a6ac25..5c9c679 100644 --- a/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp +++ b/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp @@ -61,8 +61,10 @@ LiveStacks::getOrCreateInterval(int Slot, const TargetRegisterClass *RC) { assert(Slot >= 0 && "Spill slot indice must be >= 0"); SS2IntervalMap::iterator I = S2IMap.find(Slot); if (I == S2IMap.end()) { - I = S2IMap.insert(I, std::make_pair(Slot, - LiveInterval(TargetRegisterInfo::index2StackSlot(Slot), 0.0F))); + I = S2IMap.emplace(std::piecewise_construct, std::forward_as_tuple(Slot), + std::forward_as_tuple( + TargetRegisterInfo::index2StackSlot(Slot), 0.0F)) + .first; S2RCMap.insert(std::make_pair(Slot, RC)); } else { // Use the largest common subclass register class. diff --git a/contrib/llvm/lib/CodeGen/LiveVariables.cpp b/contrib/llvm/lib/CodeGen/LiveVariables.cpp index c4bca5f..11deb81 100644 --- a/contrib/llvm/lib/CodeGen/LiveVariables.cpp +++ b/contrib/llvm/lib/CodeGen/LiveVariables.cpp @@ -36,6 +36,7 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include <algorithm> using namespace llvm; diff --git a/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp index e8bf687..8378429 100644 --- a/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp +++ b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp @@ -252,7 +252,8 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) { } static inline bool -lookupCandidateBaseReg(int64_t BaseOffset, +lookupCandidateBaseReg(unsigned BaseReg, + int64_t BaseOffset, int64_t FrameSizeAdjust, int64_t LocalFrameOffset, const MachineInstr *MI, @@ -260,7 +261,7 @@ lookupCandidateBaseReg(int64_t BaseOffset, // Check if the relative offset from the where the base register references // to the target address is in range for the instruction. int64_t Offset = FrameSizeAdjust + LocalFrameOffset - BaseOffset; - return TRI->isFrameOffsetLegal(MI, Offset); + return TRI->isFrameOffsetLegal(MI, BaseReg, Offset); } bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { @@ -362,8 +363,9 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { // instruction itself will be taken into account by the target, // so we don't have to adjust for it here when reusing a base // register. - if (UsedBaseReg && lookupCandidateBaseReg(BaseOffset, FrameSizeAdjust, - LocalOffset, MI, TRI)) { + if (UsedBaseReg && lookupCandidateBaseReg(BaseReg, BaseOffset, + FrameSizeAdjust, LocalOffset, MI, + TRI)) { DEBUG(dbgs() << " Reusing base register " << BaseReg << "\n"); // We found a register to reuse. Offset = FrameSizeAdjust + LocalOffset - BaseOffset; @@ -382,7 +384,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { // then don't bother creating it. if (ref + 1 >= e || !lookupCandidateBaseReg( - BaseOffset, FrameSizeAdjust, + BaseReg, BaseOffset, FrameSizeAdjust, FrameReferenceInsns[ref + 1].getLocalOffset(), FrameReferenceInsns[ref + 1].getMachineInstr(), TRI)) { BaseOffset = PrevBaseOffset; diff --git a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp index 3c73905..d5fdf8e 100644 --- a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -53,7 +53,7 @@ MCSymbol *MachineBasicBlock::getSymbol() const { const MachineFunction *MF = getParent(); MCContext &Ctx = MF->getContext(); const char *Prefix = Ctx.getAsmInfo()->getPrivateLabelPrefix(); - CachedMCSymbol = Ctx.GetOrCreateSymbol(Twine(Prefix) + "BB" + + CachedMCSymbol = Ctx.getOrCreateSymbol(Twine(Prefix) + "BB" + Twine(MF->getFunctionNumber()) + "_" + Twine(getNumber())); } @@ -250,7 +250,7 @@ std::string MachineBasicBlock::getFullName() const { if (getBasicBlock()) Name += getBasicBlock()->getName(); else - Name += (Twine("BB") + Twine(getNumber())).str(); + Name += ("BB" + Twine(getNumber())).str(); return Name; } @@ -307,7 +307,7 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const { OS << '\t'; if (I->isInsideBundle()) OS << " * "; - I->print(OS, &getParent()->getTarget()); + I->print(OS); } // Print the successors of this block according to the CFG. @@ -1129,21 +1129,19 @@ getWeightIterator(MachineBasicBlock::const_succ_iterator I) const { /// instructions after (searching just for defs) MI. MachineBasicBlock::LivenessQueryResult MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI, - unsigned Reg, MachineInstr *MI, - unsigned Neighborhood) { + unsigned Reg, const_iterator Before, + unsigned Neighborhood) const { unsigned N = Neighborhood; - MachineBasicBlock *MBB = MI->getParent(); - // Start by searching backwards from MI, looking for kills, reads or defs. - - MachineBasicBlock::iterator I(MI); + // Start by searching backwards from Before, looking for kills, reads or defs. + const_iterator I(Before); // If this is the first insn in the block, don't search backwards. - if (I != MBB->begin()) { + if (I != begin()) { do { --I; MachineOperandIteratorBase::PhysRegInfo Analysis = - MIOperands(I).analyzePhysReg(Reg, TRI); + ConstMIOperands(I).analyzePhysReg(Reg, TRI); if (Analysis.Defines) // Outputs happen after inputs so they take precedence if both are @@ -1158,15 +1156,15 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI, // Defined or read without a previous kill - live. return Analysis.Reads ? LQR_Live : LQR_OverlappingLive; - } while (I != MBB->begin() && --N > 0); + } while (I != begin() && --N > 0); } // Did we get to the start of the block? - if (I == MBB->begin()) { + if (I == begin()) { // If so, the register's state is definitely defined by the live-in state. for (MCRegAliasIterator RAI(Reg, TRI, /*IncludeSelf=*/true); RAI.isValid(); ++RAI) { - if (MBB->isLiveIn(*RAI)) + if (isLiveIn(*RAI)) return (*RAI == Reg) ? LQR_Live : LQR_OverlappingLive; } @@ -1175,13 +1173,13 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI, N = Neighborhood; - // Try searching forwards from MI, looking for reads or defs. - I = MachineBasicBlock::iterator(MI); + // Try searching forwards from Before, looking for reads or defs. + I = const_iterator(Before); // If this is the last insn in the block, don't search forwards. - if (I != MBB->end()) { - for (++I; I != MBB->end() && N > 0; ++I, --N) { + if (I != end()) { + for (++I; I != end() && N > 0; ++I, --N) { MachineOperandIteratorBase::PhysRegInfo Analysis = - MIOperands(I).analyzePhysReg(Reg, TRI); + ConstMIOperands(I).analyzePhysReg(Reg, TRI); if (Analysis.ReadsOverlap) // Used, therefore must have been live. diff --git a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp index aaa7d91..2969bad 100644 --- a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -33,6 +33,7 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" @@ -40,13 +41,14 @@ #include "llvm/Support/Allocator.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> using namespace llvm; -#define DEBUG_TYPE "block-placement2" +#define DEBUG_TYPE "block-placement" STATISTIC(NumCondBranches, "Number of conditional branches"); STATISTIC(NumUncondBranches, "Number of uncondittional branches"); @@ -61,11 +63,23 @@ static cl::opt<unsigned> AlignAllBlock("align-all-blocks", cl::init(0), cl::Hidden); // FIXME: Find a good default for this flag and remove the flag. -static cl::opt<unsigned> -ExitBlockBias("block-placement-exit-block-bias", - cl::desc("Block frequency percentage a loop exit block needs " - "over the original exit to be considered the new exit."), - cl::init(0), cl::Hidden); +static cl::opt<unsigned> ExitBlockBias( + "block-placement-exit-block-bias", + cl::desc("Block frequency percentage a loop exit block needs " + "over the original exit to be considered the new exit."), + cl::init(0), cl::Hidden); + +static cl::opt<bool> OutlineOptionalBranches( + "outline-optional-branches", + cl::desc("Put completely optional branches, i.e. branches with a common " + "post dominator, out of line."), + cl::init(false), cl::Hidden); + +static cl::opt<unsigned> OutlineOptionalThreshold( + "outline-optional-threshold", + cl::desc("Don't outline optional branches that are a single block with an " + "instruction count below this threshold"), + cl::init(4), cl::Hidden); namespace { class BlockChain; @@ -107,7 +121,7 @@ public: /// function. It also registers itself as the chain that block participates /// in with the BlockToChain mapping. BlockChain(BlockToChainMapType &BlockToChain, MachineBasicBlock *BB) - : Blocks(1, BB), BlockToChain(BlockToChain), LoopPredecessors(0) { + : Blocks(1, BB), BlockToChain(BlockToChain), LoopPredecessors(0) { assert(BB && "Cannot create a chain with a null basic block"); BlockToChain[BB] = this; } @@ -144,19 +158,18 @@ public: // Update the incoming blocks to point to this chain, and add them to the // chain structure. - for (BlockChain::iterator BI = Chain->begin(), BE = Chain->end(); - BI != BE; ++BI) { - Blocks.push_back(*BI); - assert(BlockToChain[*BI] == Chain && "Incoming blocks not in chain"); - BlockToChain[*BI] = this; + for (MachineBasicBlock *ChainBB : *Chain) { + Blocks.push_back(ChainBB); + assert(BlockToChain[ChainBB] == Chain && "Incoming blocks not in chain"); + BlockToChain[ChainBB] = this; } } #ifndef NDEBUG /// \brief Dump the blocks in this chain. LLVM_DUMP_METHOD void dump() { - for (iterator I = begin(), E = end(); I != E; ++I) - (*I)->dump(); + for (MachineBasicBlock *MBB : *this) + MBB->dump(); } #endif // NDEBUG @@ -188,6 +201,13 @@ class MachineBlockPlacement : public MachineFunctionPass { /// \brief A handle to the target's lowering info. const TargetLoweringBase *TLI; + /// \brief A handle to the post dominator tree. + MachineDominatorTree *MDT; + + /// \brief A set of blocks that are unavoidably execute, i.e. they dominate + /// all terminators of the MachineFunction. + SmallPtrSet<MachineBasicBlock *, 4> UnavoidableBlocks; + /// \brief Allocator and owner of BlockChain structures. /// /// We build BlockChains lazily while processing the loop structure of @@ -205,28 +225,26 @@ class MachineBlockPlacement : public MachineFunctionPass { /// between basic blocks. DenseMap<MachineBasicBlock *, BlockChain *> BlockToChain; - void markChainSuccessors(BlockChain &Chain, - MachineBasicBlock *LoopHeaderBB, + void markChainSuccessors(BlockChain &Chain, MachineBasicBlock *LoopHeaderBB, SmallVectorImpl<MachineBasicBlock *> &BlockWorkList, const BlockFilterSet *BlockFilter = nullptr); MachineBasicBlock *selectBestSuccessor(MachineBasicBlock *BB, BlockChain &Chain, const BlockFilterSet *BlockFilter); - MachineBasicBlock *selectBestCandidateBlock( - BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList, - const BlockFilterSet *BlockFilter); - MachineBasicBlock *getFirstUnplacedBlock( - MachineFunction &F, - const BlockChain &PlacedChain, - MachineFunction::iterator &PrevUnplacedBlockIt, - const BlockFilterSet *BlockFilter); + MachineBasicBlock * + selectBestCandidateBlock(BlockChain &Chain, + SmallVectorImpl<MachineBasicBlock *> &WorkList, + const BlockFilterSet *BlockFilter); + MachineBasicBlock * + getFirstUnplacedBlock(MachineFunction &F, const BlockChain &PlacedChain, + MachineFunction::iterator &PrevUnplacedBlockIt, + const BlockFilterSet *BlockFilter); void buildChain(MachineBasicBlock *BB, BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &BlockWorkList, const BlockFilterSet *BlockFilter = nullptr); MachineBasicBlock *findBestLoopTop(MachineLoop &L, const BlockFilterSet &LoopBlockSet); - MachineBasicBlock *findBestLoopExit(MachineFunction &F, - MachineLoop &L, + MachineBasicBlock *findBestLoopExit(MachineFunction &F, MachineLoop &L, const BlockFilterSet &LoopBlockSet); void buildLoopChains(MachineFunction &F, MachineLoop &L); void rotateLoop(BlockChain &LoopChain, MachineBasicBlock *ExitingBB, @@ -244,6 +262,7 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<MachineBranchProbabilityInfo>(); AU.addRequired<MachineBlockFrequencyInfo>(); + AU.addRequired<MachineDominatorTree>(); AU.addRequired<MachineLoopInfo>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -252,12 +271,13 @@ public: char MachineBlockPlacement::ID = 0; char &llvm::MachineBlockPlacementID = MachineBlockPlacement::ID; -INITIALIZE_PASS_BEGIN(MachineBlockPlacement, "block-placement2", +INITIALIZE_PASS_BEGIN(MachineBlockPlacement, "block-placement", "Branch Probability Basic Block Placement", false, false) INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) -INITIALIZE_PASS_END(MachineBlockPlacement, "block-placement2", +INITIALIZE_PASS_END(MachineBlockPlacement, "block-placement", "Branch Probability Basic Block Placement", false, false) #ifndef NDEBUG @@ -267,8 +287,8 @@ INITIALIZE_PASS_END(MachineBlockPlacement, "block-placement2", static std::string getBlockName(MachineBasicBlock *BB) { std::string Result; raw_string_ostream OS(Result); - OS << "BB#" << BB->getNumber() - << " (derived from LLVM BB '" << BB->getName() << "')"; + OS << "BB#" << BB->getNumber(); + OS << " (derived from LLVM BB '" << BB->getName() << "')"; OS.flush(); return Result; } @@ -292,26 +312,22 @@ static std::string getBlockNum(MachineBasicBlock *BB) { /// having one fewer active predecessor. It also adds any successors of this /// chain which reach the zero-predecessor state to the worklist passed in. void MachineBlockPlacement::markChainSuccessors( - BlockChain &Chain, - MachineBasicBlock *LoopHeaderBB, + BlockChain &Chain, MachineBasicBlock *LoopHeaderBB, SmallVectorImpl<MachineBasicBlock *> &BlockWorkList, const BlockFilterSet *BlockFilter) { // Walk all the blocks in this chain, marking their successors as having // a predecessor placed. - for (BlockChain::iterator CBI = Chain.begin(), CBE = Chain.end(); - CBI != CBE; ++CBI) { + for (MachineBasicBlock *MBB : Chain) { // Add any successors for which this is the only un-placed in-loop // predecessor to the worklist as a viable candidate for CFG-neutral // placement. No subsequent placement of this block will violate the CFG // shape, so we get to use heuristics to choose a favorable placement. - for (MachineBasicBlock::succ_iterator SI = (*CBI)->succ_begin(), - SE = (*CBI)->succ_end(); - SI != SE; ++SI) { - if (BlockFilter && !BlockFilter->count(*SI)) + for (MachineBasicBlock *Succ : MBB->successors()) { + if (BlockFilter && !BlockFilter->count(Succ)) continue; - BlockChain &SuccChain = *BlockToChain[*SI]; + BlockChain &SuccChain = *BlockToChain[Succ]; // Disregard edges within a fixed chain, or edges to the loop header. - if (&Chain == &SuccChain || *SI == LoopHeaderBB) + if (&Chain == &SuccChain || Succ == LoopHeaderBB) continue; // This is a cross-chain edge that is within the loop, so decrement the @@ -331,9 +347,10 @@ void MachineBlockPlacement::markChainSuccessors( /// very hot successor edges. /// /// \returns The best successor block found, or null if none are viable. -MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor( - MachineBasicBlock *BB, BlockChain &Chain, - const BlockFilterSet *BlockFilter) { +MachineBasicBlock * +MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB, + BlockChain &Chain, + const BlockFilterSet *BlockFilter) { const BranchProbability HotProb(4, 5); // 80% MachineBasicBlock *BestSucc = nullptr; @@ -347,65 +364,85 @@ MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor( uint32_t WeightScale = 0; uint32_t SumWeight = MBPI->getSumForBlock(BB, WeightScale); DEBUG(dbgs() << "Attempting merge from: " << getBlockName(BB) << "\n"); - for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), - SE = BB->succ_end(); - SI != SE; ++SI) { - if (BlockFilter && !BlockFilter->count(*SI)) + for (MachineBasicBlock *Succ : BB->successors()) { + if (BlockFilter && !BlockFilter->count(Succ)) continue; - BlockChain &SuccChain = *BlockToChain[*SI]; + BlockChain &SuccChain = *BlockToChain[Succ]; if (&SuccChain == &Chain) { - DEBUG(dbgs() << " " << getBlockName(*SI) << " -> Already merged!\n"); + DEBUG(dbgs() << " " << getBlockName(Succ) << " -> Already merged!\n"); continue; } - if (*SI != *SuccChain.begin()) { - DEBUG(dbgs() << " " << getBlockName(*SI) << " -> Mid chain!\n"); + if (Succ != *SuccChain.begin()) { + DEBUG(dbgs() << " " << getBlockName(Succ) << " -> Mid chain!\n"); continue; } - uint32_t SuccWeight = MBPI->getEdgeWeight(BB, *SI); + uint32_t SuccWeight = MBPI->getEdgeWeight(BB, Succ); BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight); + // If we outline optional branches, look whether Succ is unavoidable, i.e. + // dominates all terminators of the MachineFunction. If it does, other + // successors must be optional. Don't do this for cold branches. + if (OutlineOptionalBranches && SuccProb > HotProb.getCompl() && + UnavoidableBlocks.count(Succ) > 0) { + auto HasShortOptionalBranch = [&]() { + for (MachineBasicBlock *Pred : Succ->predecessors()) { + // Check whether there is an unplaced optional branch. + if (Pred == Succ || (BlockFilter && !BlockFilter->count(Pred)) || + BlockToChain[Pred] == &Chain) + continue; + // Check whether the optional branch has exactly one BB. + if (Pred->pred_size() > 1 || *Pred->pred_begin() != BB) + continue; + // Check whether the optional branch is small. + if (Pred->size() < OutlineOptionalThreshold) + return true; + } + return false; + }; + if (!HasShortOptionalBranch()) + return Succ; + } + // Only consider successors which are either "hot", or wouldn't violate // any CFG constraints. if (SuccChain.LoopPredecessors != 0) { if (SuccProb < HotProb) { - DEBUG(dbgs() << " " << getBlockName(*SI) << " -> " << SuccProb + DEBUG(dbgs() << " " << getBlockName(Succ) << " -> " << SuccProb << " (prob) (CFG conflict)\n"); continue; } - // Make sure that a hot successor doesn't have a globally more important - // predecessor. - BlockFrequency CandidateEdgeFreq - = MBFI->getBlockFreq(BB) * SuccProb * HotProb.getCompl(); + // Make sure that a hot successor doesn't have a globally more + // important predecessor. + BlockFrequency CandidateEdgeFreq = + MBFI->getBlockFreq(BB) * SuccProb * HotProb.getCompl(); bool BadCFGConflict = false; - for (MachineBasicBlock::pred_iterator PI = (*SI)->pred_begin(), - PE = (*SI)->pred_end(); - PI != PE; ++PI) { - if (*PI == *SI || (BlockFilter && !BlockFilter->count(*PI)) || - BlockToChain[*PI] == &Chain) + for (MachineBasicBlock *Pred : Succ->predecessors()) { + if (Pred == Succ || (BlockFilter && !BlockFilter->count(Pred)) || + BlockToChain[Pred] == &Chain) continue; - BlockFrequency PredEdgeFreq - = MBFI->getBlockFreq(*PI) * MBPI->getEdgeProbability(*PI, *SI); + BlockFrequency PredEdgeFreq = + MBFI->getBlockFreq(Pred) * MBPI->getEdgeProbability(Pred, Succ); if (PredEdgeFreq >= CandidateEdgeFreq) { BadCFGConflict = true; break; } } if (BadCFGConflict) { - DEBUG(dbgs() << " " << getBlockName(*SI) << " -> " << SuccProb + DEBUG(dbgs() << " " << getBlockName(Succ) << " -> " << SuccProb << " (prob) (non-cold CFG conflict)\n"); continue; } } - DEBUG(dbgs() << " " << getBlockName(*SI) << " -> " << SuccProb + DEBUG(dbgs() << " " << getBlockName(Succ) << " -> " << SuccProb << " (prob)" << (SuccChain.LoopPredecessors != 0 ? " (CFG break)" : "") << "\n"); if (BestSucc && BestWeight >= SuccWeight) continue; - BestSucc = *SI; + BestSucc = Succ; BestWeight = SuccWeight; } return BestSucc; @@ -430,29 +467,26 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock( // some code complexity) into the loop below. WorkList.erase(std::remove_if(WorkList.begin(), WorkList.end(), [&](MachineBasicBlock *BB) { - return BlockToChain.lookup(BB) == &Chain; - }), + return BlockToChain.lookup(BB) == &Chain; + }), WorkList.end()); MachineBasicBlock *BestBlock = nullptr; BlockFrequency BestFreq; - for (SmallVectorImpl<MachineBasicBlock *>::iterator WBI = WorkList.begin(), - WBE = WorkList.end(); - WBI != WBE; ++WBI) { - BlockChain &SuccChain = *BlockToChain[*WBI]; + for (MachineBasicBlock *MBB : WorkList) { + BlockChain &SuccChain = *BlockToChain[MBB]; if (&SuccChain == &Chain) { - DEBUG(dbgs() << " " << getBlockName(*WBI) - << " -> Already merged!\n"); + DEBUG(dbgs() << " " << getBlockName(MBB) << " -> Already merged!\n"); continue; } assert(SuccChain.LoopPredecessors == 0 && "Found CFG-violating block"); - BlockFrequency CandidateFreq = MBFI->getBlockFreq(*WBI); - DEBUG(dbgs() << " " << getBlockName(*WBI) << " -> "; - MBFI->printBlockFreq(dbgs(), CandidateFreq) << " (freq)\n"); + BlockFrequency CandidateFreq = MBFI->getBlockFreq(MBB); + DEBUG(dbgs() << " " << getBlockName(MBB) << " -> "; + MBFI->printBlockFreq(dbgs(), CandidateFreq) << " (freq)\n"); if (BestBlock && BestFreq >= CandidateFreq) continue; - BestBlock = *WBI; + BestBlock = MBB; BestFreq = CandidateFreq; } return BestBlock; @@ -485,8 +519,7 @@ MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock( } void MachineBlockPlacement::buildChain( - MachineBasicBlock *BB, - BlockChain &Chain, + MachineBasicBlock *BB, BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &BlockWorkList, const BlockFilterSet *BlockFilter) { assert(BB); @@ -513,8 +546,8 @@ void MachineBlockPlacement::buildChain( BestSucc = selectBestCandidateBlock(Chain, BlockWorkList, BlockFilter); if (!BestSucc) { - BestSucc = getFirstUnplacedBlock(F, Chain, PrevUnplacedBlockIt, - BlockFilter); + BestSucc = + getFirstUnplacedBlock(F, Chain, PrevUnplacedBlockIt, BlockFilter); if (!BestSucc) break; @@ -527,8 +560,8 @@ void MachineBlockPlacement::buildChain( // Zero out LoopPredecessors for the successor we're about to merge in case // we selected a successor that didn't fit naturally into the CFG. SuccChain.LoopPredecessors = 0; - DEBUG(dbgs() << "Merging from " << getBlockNum(BB) - << " to " << getBlockNum(BestSucc) << "\n"); + DEBUG(dbgs() << "Merging from " << getBlockNum(BB) << " to " + << getBlockNum(BestSucc) << "\n"); markChainSuccessors(SuccChain, LoopHeaderBB, BlockWorkList, BlockFilter); Chain.merge(BestSucc, &SuccChain); BB = *std::prev(Chain.end()); @@ -558,20 +591,17 @@ MachineBlockPlacement::findBestLoopTop(MachineLoop &L, if (!LoopBlockSet.count(*HeaderChain.begin())) return L.getHeader(); - DEBUG(dbgs() << "Finding best loop top for: " - << getBlockName(L.getHeader()) << "\n"); + DEBUG(dbgs() << "Finding best loop top for: " << getBlockName(L.getHeader()) + << "\n"); BlockFrequency BestPredFreq; MachineBasicBlock *BestPred = nullptr; - for (MachineBasicBlock::pred_iterator PI = L.getHeader()->pred_begin(), - PE = L.getHeader()->pred_end(); - PI != PE; ++PI) { - MachineBasicBlock *Pred = *PI; + for (MachineBasicBlock *Pred : L.getHeader()->predecessors()) { if (!LoopBlockSet.count(Pred)) continue; DEBUG(dbgs() << " header pred: " << getBlockName(Pred) << ", " << Pred->succ_size() << " successors, "; - MBFI->printBlockFreq(dbgs(), Pred) << " freq\n"); + MBFI->printBlockFreq(dbgs(), Pred) << " freq\n"); if (Pred->succ_size() > 1) continue; @@ -598,15 +628,13 @@ MachineBlockPlacement::findBestLoopTop(MachineLoop &L, return BestPred; } - /// \brief Find the best loop exiting block for layout. /// /// This routine implements the logic to analyze the loop looking for the best /// block to layout at the top of the loop. Typically this is done to maximize /// fallthrough opportunities. MachineBasicBlock * -MachineBlockPlacement::findBestLoopExit(MachineFunction &F, - MachineLoop &L, +MachineBlockPlacement::findBestLoopExit(MachineFunction &F, MachineLoop &L, const BlockFilterSet &LoopBlockSet) { // We don't want to layout the loop linearly in all cases. If the loop header // is just a normal basic block in the loop, we want to look for what block @@ -628,15 +656,13 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F, // blocks where rotating to exit with that block will reach an outer loop. SmallPtrSet<MachineBasicBlock *, 4> BlocksExitingToOuterLoop; - DEBUG(dbgs() << "Finding best loop exit for: " - << getBlockName(L.getHeader()) << "\n"); - for (MachineLoop::block_iterator I = L.block_begin(), - E = L.block_end(); - I != E; ++I) { - BlockChain &Chain = *BlockToChain[*I]; + DEBUG(dbgs() << "Finding best loop exit for: " << getBlockName(L.getHeader()) + << "\n"); + for (MachineBasicBlock *MBB : L.getBlocks()) { + BlockChain &Chain = *BlockToChain[MBB]; // Ensure that this block is at the end of a chain; otherwise it could be - // mid-way through an inner loop or a successor of an analyzable branch. - if (*I != *std::prev(Chain.end())) + // mid-way through an inner loop or a successor of an unanalyzable branch. + if (MBB != *std::prev(Chain.end())) continue; // Now walk the successors. We need to establish whether this has a viable @@ -650,59 +676,56 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F, // the MBPI analysis, we use the internal weights and manually compute the // probabilities to avoid quadratic behavior. uint32_t WeightScale = 0; - uint32_t SumWeight = MBPI->getSumForBlock(*I, WeightScale); - for (MachineBasicBlock::succ_iterator SI = (*I)->succ_begin(), - SE = (*I)->succ_end(); - SI != SE; ++SI) { - if ((*SI)->isLandingPad()) + uint32_t SumWeight = MBPI->getSumForBlock(MBB, WeightScale); + for (MachineBasicBlock *Succ : MBB->successors()) { + if (Succ->isLandingPad()) continue; - if (*SI == *I) + if (Succ == MBB) continue; - BlockChain &SuccChain = *BlockToChain[*SI]; + BlockChain &SuccChain = *BlockToChain[Succ]; // Don't split chains, either this chain or the successor's chain. if (&Chain == &SuccChain) { - DEBUG(dbgs() << " exiting: " << getBlockName(*I) << " -> " - << getBlockName(*SI) << " (chain conflict)\n"); + DEBUG(dbgs() << " exiting: " << getBlockName(MBB) << " -> " + << getBlockName(Succ) << " (chain conflict)\n"); continue; } - uint32_t SuccWeight = MBPI->getEdgeWeight(*I, *SI); - if (LoopBlockSet.count(*SI)) { - DEBUG(dbgs() << " looping: " << getBlockName(*I) << " -> " - << getBlockName(*SI) << " (" << SuccWeight << ")\n"); + uint32_t SuccWeight = MBPI->getEdgeWeight(MBB, Succ); + if (LoopBlockSet.count(Succ)) { + DEBUG(dbgs() << " looping: " << getBlockName(MBB) << " -> " + << getBlockName(Succ) << " (" << SuccWeight << ")\n"); HasLoopingSucc = true; continue; } unsigned SuccLoopDepth = 0; - if (MachineLoop *ExitLoop = MLI->getLoopFor(*SI)) { + if (MachineLoop *ExitLoop = MLI->getLoopFor(Succ)) { SuccLoopDepth = ExitLoop->getLoopDepth(); if (ExitLoop->contains(&L)) - BlocksExitingToOuterLoop.insert(*I); + BlocksExitingToOuterLoop.insert(MBB); } BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight); - BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(*I) * SuccProb; - DEBUG(dbgs() << " exiting: " << getBlockName(*I) << " -> " - << getBlockName(*SI) << " [L:" << SuccLoopDepth - << "] ("; - MBFI->printBlockFreq(dbgs(), ExitEdgeFreq) << ")\n"); + BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(MBB) * SuccProb; + DEBUG(dbgs() << " exiting: " << getBlockName(MBB) << " -> " + << getBlockName(Succ) << " [L:" << SuccLoopDepth << "] ("; + MBFI->printBlockFreq(dbgs(), ExitEdgeFreq) << ")\n"); // Note that we bias this toward an existing layout successor to retain // incoming order in the absence of better information. The exit must have // a frequency higher than the current exit before we consider breaking // the layout. BranchProbability Bias(100 - ExitBlockBias, 100); - if (!ExitingBB || BestExitLoopDepth < SuccLoopDepth || + if (!ExitingBB || SuccLoopDepth > BestExitLoopDepth || ExitEdgeFreq > BestExitEdgeFreq || - ((*I)->isLayoutSuccessor(*SI) && + (MBB->isLayoutSuccessor(Succ) && !(ExitEdgeFreq < BestExitEdgeFreq * Bias))) { BestExitEdgeFreq = ExitEdgeFreq; - ExitingBB = *I; + ExitingBB = MBB; } } - // Restore the old exiting state, no viable looping successor was found. if (!HasLoopingSucc) { + // Restore the old exiting state, no viable looping successor was found. ExitingBB = OldExitingBB; BestExitEdgeFreq = OldBestExitEdgeFreq; continue; @@ -738,12 +761,10 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain, MachineBasicBlock *Top = *LoopChain.begin(); bool ViableTopFallthrough = false; - for (MachineBasicBlock::pred_iterator PI = Top->pred_begin(), - PE = Top->pred_end(); - PI != PE; ++PI) { - BlockChain *PredChain = BlockToChain[*PI]; - if (!LoopBlockSet.count(*PI) && - (!PredChain || *PI == *std::prev(PredChain->end()))) { + for (MachineBasicBlock *Pred : Top->predecessors()) { + BlockChain *PredChain = BlockToChain[Pred]; + if (!LoopBlockSet.count(Pred) && + (!PredChain || Pred == *std::prev(PredChain->end()))) { ViableTopFallthrough = true; break; } @@ -754,18 +775,16 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain, // introduce an unnecessary branch. if (ViableTopFallthrough) { MachineBasicBlock *Bottom = *std::prev(LoopChain.end()); - for (MachineBasicBlock::succ_iterator SI = Bottom->succ_begin(), - SE = Bottom->succ_end(); - SI != SE; ++SI) { - BlockChain *SuccChain = BlockToChain[*SI]; - if (!LoopBlockSet.count(*SI) && - (!SuccChain || *SI == *SuccChain->begin())) + for (MachineBasicBlock *Succ : Bottom->successors()) { + BlockChain *SuccChain = BlockToChain[Succ]; + if (!LoopBlockSet.count(Succ) && + (!SuccChain || Succ == *SuccChain->begin())) return; } } - BlockChain::iterator ExitIt = std::find(LoopChain.begin(), LoopChain.end(), - ExitingBB); + BlockChain::iterator ExitIt = + std::find(LoopChain.begin(), LoopChain.end(), ExitingBB); if (ExitIt == LoopChain.end()) return; @@ -782,8 +801,8 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F, MachineLoop &L) { // First recurse through any nested loops, building chains for those inner // loops. - for (MachineLoop::iterator LI = L.begin(), LE = L.end(); LI != LE; ++LI) - buildLoopChains(F, **LI); + for (MachineLoop *InnerLoop : L) + buildLoopChains(F, *InnerLoop); SmallVector<MachineBasicBlock *, 16> BlockWorkList; BlockFilterSet LoopBlockSet(L.block_begin(), L.block_end()); @@ -809,21 +828,16 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F, SmallPtrSet<BlockChain *, 4> UpdatedPreds; assert(LoopChain.LoopPredecessors == 0); UpdatedPreds.insert(&LoopChain); - for (MachineLoop::block_iterator BI = L.block_begin(), - BE = L.block_end(); - BI != BE; ++BI) { - BlockChain &Chain = *BlockToChain[*BI]; + for (MachineBasicBlock *LoopBB : L.getBlocks()) { + BlockChain &Chain = *BlockToChain[LoopBB]; if (!UpdatedPreds.insert(&Chain).second) continue; assert(Chain.LoopPredecessors == 0); - for (BlockChain::iterator BCI = Chain.begin(), BCE = Chain.end(); - BCI != BCE; ++BCI) { - assert(BlockToChain[*BCI] == &Chain); - for (MachineBasicBlock::pred_iterator PI = (*BCI)->pred_begin(), - PE = (*BCI)->pred_end(); - PI != PE; ++PI) { - if (BlockToChain[*PI] == &Chain || !LoopBlockSet.count(*PI)) + for (MachineBasicBlock *ChainBB : Chain) { + assert(BlockToChain[ChainBB] == &Chain); + for (MachineBasicBlock *Pred : ChainBB->predecessors()) { + if (BlockToChain[Pred] == &Chain || !LoopBlockSet.count(Pred)) continue; ++Chain.LoopPredecessors; } @@ -845,29 +859,26 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F, << " Loop header: " << getBlockName(*L.block_begin()) << "\n" << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n"; } - for (BlockChain::iterator BCI = LoopChain.begin(), BCE = LoopChain.end(); - BCI != BCE; ++BCI) { - dbgs() << " ... " << getBlockName(*BCI) << "\n"; - if (!LoopBlockSet.erase(*BCI)) { + for (MachineBasicBlock *ChainBB : LoopChain) { + dbgs() << " ... " << getBlockName(ChainBB) << "\n"; + if (!LoopBlockSet.erase(ChainBB)) { // We don't mark the loop as bad here because there are real situations // where this can occur. For example, with an unanalyzable fallthrough // from a loop block to a non-loop block or vice versa. dbgs() << "Loop chain contains a block not contained by the loop!\n" << " Loop header: " << getBlockName(*L.block_begin()) << "\n" << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n" - << " Bad block: " << getBlockName(*BCI) << "\n"; + << " Bad block: " << getBlockName(ChainBB) << "\n"; } } if (!LoopBlockSet.empty()) { BadLoop = true; - for (BlockFilterSet::iterator LBI = LoopBlockSet.begin(), - LBE = LoopBlockSet.end(); - LBI != LBE; ++LBI) + for (MachineBasicBlock *LoopBB : LoopBlockSet) dbgs() << "Loop contains blocks never placed into a chain!\n" << " Loop header: " << getBlockName(*L.block_begin()) << "\n" << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n" - << " Bad block: " << getBlockName(*LBI) << "\n"; + << " Bad block: " << getBlockName(LoopBB) << "\n"; } assert(!BadLoop && "Detected problems with the placement of this loop."); }); @@ -879,8 +890,8 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { SmallVector<MachineOperand, 4> Cond; // For AnalyzeBranch. for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) { MachineBasicBlock *BB = FI; - BlockChain *Chain - = new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB); + BlockChain *Chain = + new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB); // Also, merge any blocks which we cannot reason about and must preserve // the exact fallthrough behavior for. for (;;) { @@ -903,28 +914,44 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { } } + if (OutlineOptionalBranches) { + // Find the nearest common dominator of all of F's terminators. + MachineBasicBlock *Terminator = nullptr; + for (MachineBasicBlock &MBB : F) { + if (MBB.succ_size() == 0) { + if (Terminator == nullptr) + Terminator = &MBB; + else + Terminator = MDT->findNearestCommonDominator(Terminator, &MBB); + } + } + + // MBBs dominating this common dominator are unavoidable. + UnavoidableBlocks.clear(); + for (MachineBasicBlock &MBB : F) { + if (MDT->dominates(&MBB, Terminator)) { + UnavoidableBlocks.insert(&MBB); + } + } + } + // Build any loop-based chains. - for (MachineLoopInfo::iterator LI = MLI->begin(), LE = MLI->end(); LI != LE; - ++LI) - buildLoopChains(F, **LI); + for (MachineLoop *L : *MLI) + buildLoopChains(F, *L); SmallVector<MachineBasicBlock *, 16> BlockWorkList; SmallPtrSet<BlockChain *, 4> UpdatedPreds; - for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) { - MachineBasicBlock *BB = &*FI; - BlockChain &Chain = *BlockToChain[BB]; + for (MachineBasicBlock &MBB : F) { + BlockChain &Chain = *BlockToChain[&MBB]; if (!UpdatedPreds.insert(&Chain).second) continue; assert(Chain.LoopPredecessors == 0); - for (BlockChain::iterator BCI = Chain.begin(), BCE = Chain.end(); - BCI != BCE; ++BCI) { - assert(BlockToChain[*BCI] == &Chain); - for (MachineBasicBlock::pred_iterator PI = (*BCI)->pred_begin(), - PE = (*BCI)->pred_end(); - PI != PE; ++PI) { - if (BlockToChain[*PI] == &Chain) + for (MachineBasicBlock *ChainBB : Chain) { + assert(BlockToChain[ChainBB] == &Chain); + for (MachineBasicBlock *Pred : ChainBB->predecessors()) { + if (BlockToChain[Pred] == &Chain) continue; ++Chain.LoopPredecessors; } @@ -944,46 +971,40 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { // Crash at the end so we get all of the debugging output first. bool BadFunc = false; FunctionBlockSetType FunctionBlockSet; - for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) - FunctionBlockSet.insert(FI); + for (MachineBasicBlock &MBB : F) + FunctionBlockSet.insert(&MBB); - for (BlockChain::iterator BCI = FunctionChain.begin(), - BCE = FunctionChain.end(); - BCI != BCE; ++BCI) - if (!FunctionBlockSet.erase(*BCI)) { + for (MachineBasicBlock *ChainBB : FunctionChain) + if (!FunctionBlockSet.erase(ChainBB)) { BadFunc = true; dbgs() << "Function chain contains a block not in the function!\n" - << " Bad block: " << getBlockName(*BCI) << "\n"; + << " Bad block: " << getBlockName(ChainBB) << "\n"; } if (!FunctionBlockSet.empty()) { BadFunc = true; - for (FunctionBlockSetType::iterator FBI = FunctionBlockSet.begin(), - FBE = FunctionBlockSet.end(); - FBI != FBE; ++FBI) + for (MachineBasicBlock *RemainingBB : FunctionBlockSet) dbgs() << "Function contains blocks never placed into a chain!\n" - << " Bad block: " << getBlockName(*FBI) << "\n"; + << " Bad block: " << getBlockName(RemainingBB) << "\n"; } assert(!BadFunc && "Detected problems with the block placement."); }); // Splice the blocks into place. MachineFunction::iterator InsertPos = F.begin(); - for (BlockChain::iterator BI = FunctionChain.begin(), - BE = FunctionChain.end(); - BI != BE; ++BI) { - DEBUG(dbgs() << (BI == FunctionChain.begin() ? "Placing chain " - : " ... ") - << getBlockName(*BI) << "\n"); - if (InsertPos != MachineFunction::iterator(*BI)) - F.splice(InsertPos, *BI); + for (MachineBasicBlock *ChainBB : FunctionChain) { + DEBUG(dbgs() << (ChainBB == *FunctionChain.begin() ? "Placing chain " + : " ... ") + << getBlockName(ChainBB) << "\n"); + if (InsertPos != MachineFunction::iterator(ChainBB)) + F.splice(InsertPos, ChainBB); else ++InsertPos; // Update the terminator of the previous block. - if (BI == FunctionChain.begin()) + if (ChainBB == *FunctionChain.begin()) continue; - MachineBasicBlock *PrevBB = std::prev(MachineFunction::iterator(*BI)); + MachineBasicBlock *PrevBB = std::prev(MachineFunction::iterator(ChainBB)); // FIXME: It would be awesome of updateTerminator would just return rather // than assert when the branch cannot be analyzed in order to remove this @@ -993,16 +1014,16 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { if (!TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) { // The "PrevBB" is not yet updated to reflect current code layout, so, // o. it may fall-through to a block without explict "goto" instruction - // before layout, and no longer fall-through it after layout; or + // before layout, and no longer fall-through it after layout; or // o. just opposite. - // + // // AnalyzeBranch() may return erroneous value for FBB when these two // situations take place. For the first scenario FBB is mistakenly set // NULL; for the 2nd scenario, the FBB, which is expected to be NULL, // is mistakenly pointing to "*BI". // bool needUpdateBr = true; - if (!Cond.empty() && (!FBB || FBB == *BI)) { + if (!Cond.empty() && (!FBB || FBB == ChainBB)) { PrevBB->updateTerminator(); needUpdateBr = false; Cond.clear(); @@ -1022,7 +1043,7 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { << getBlockName(PrevBB) << "\n"); DEBUG(dbgs() << " Edge weight: " << MBPI->getEdgeWeight(PrevBB, FBB) << " vs " << MBPI->getEdgeWeight(PrevBB, TBB) << "\n"); - DebugLoc dl; // FIXME: this is nowhere + DebugLoc dl; // FIXME: this is nowhere TII->RemoveBranch(*PrevBB); TII->InsertBranch(*PrevBB, FBB, TBB, Cond, dl); needUpdateBr = true; @@ -1043,33 +1064,33 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { // exclusively on the loop info here so that we can align backedges in // unnatural CFGs and backedges that were introduced purely because of the // loop rotations done during this layout pass. - if (F.getFunction()->getAttributes(). - hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize)) + if (F.getFunction()->hasFnAttribute(Attribute::OptimizeForSize)) return; if (FunctionChain.begin() == FunctionChain.end()) - return; // Empty chain. + return; // Empty chain. const BranchProbability ColdProb(1, 5); // 20% BlockFrequency EntryFreq = MBFI->getBlockFreq(F.begin()); BlockFrequency WeightedEntryFreq = EntryFreq * ColdProb; - for (BlockChain::iterator BI = std::next(FunctionChain.begin()), - BE = FunctionChain.end(); - BI != BE; ++BI) { + for (MachineBasicBlock *ChainBB : FunctionChain) { + if (ChainBB == *FunctionChain.begin()) + continue; + // Don't align non-looping basic blocks. These are unlikely to execute // enough times to matter in practice. Note that we'll still handle // unnatural CFGs inside of a natural outer loop (the common case) and // rotated loops. - MachineLoop *L = MLI->getLoopFor(*BI); + MachineLoop *L = MLI->getLoopFor(ChainBB); if (!L) continue; unsigned Align = TLI->getPrefLoopAlignment(L); if (!Align) - continue; // Don't care about loop alignment. + continue; // Don't care about loop alignment. // If the block is cold relative to the function entry don't waste space // aligning it. - BlockFrequency Freq = MBFI->getBlockFreq(*BI); + BlockFrequency Freq = MBFI->getBlockFreq(ChainBB); if (Freq < WeightedEntryFreq) continue; @@ -1082,12 +1103,13 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { // Check for the existence of a non-layout predecessor which would benefit // from aligning this block. - MachineBasicBlock *LayoutPred = *std::prev(BI); + MachineBasicBlock *LayoutPred = + &*std::prev(MachineFunction::iterator(ChainBB)); // Force alignment if all the predecessors are jumps. We already checked // that the block isn't cold above. - if (!LayoutPred->isSuccessor(*BI)) { - (*BI)->setAlignment(Align); + if (!LayoutPred->isSuccessor(ChainBB)) { + ChainBB->setAlignment(Align); continue; } @@ -1095,10 +1117,11 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { // cold relative to the block. When this is true, other predecessors make up // all of the hot entries into the block and thus alignment is likely to be // important. - BranchProbability LayoutProb = MBPI->getEdgeProbability(LayoutPred, *BI); + BranchProbability LayoutProb = + MBPI->getEdgeProbability(LayoutPred, ChainBB); BlockFrequency LayoutEdgeFreq = MBFI->getBlockFreq(LayoutPred) * LayoutProb; if (LayoutEdgeFreq <= (Freq * ColdProb)) - (*BI)->setAlignment(Align); + ChainBB->setAlignment(Align); } } @@ -1115,6 +1138,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) { MLI = &getAnalysis<MachineLoopInfo>(); TII = F.getSubtarget().getInstrInfo(); TLI = F.getSubtarget().getTargetLowering(); + MDT = &getAnalysis<MachineDominatorTree>(); assert(BlockToChain.empty()); buildCFGChains(F); @@ -1124,9 +1148,8 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) { if (AlignAllBlock) // Align all of the blocks in the function to a specific alignment. - for (MachineFunction::iterator FI = F.begin(), FE = F.end(); - FI != FE; ++FI) - FI->setAlignment(AlignAllBlock); + for (MachineBasicBlock &MBB : F) + MBB.setAlignment(AlignAllBlock); // We always return true as we have no way to track whether the final order // differs from the original order. @@ -1181,20 +1204,19 @@ bool MachineBlockPlacementStats::runOnMachineFunction(MachineFunction &F) { MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); - for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) { - BlockFrequency BlockFreq = MBFI->getBlockFreq(I); - Statistic &NumBranches = (I->succ_size() > 1) ? NumCondBranches - : NumUncondBranches; - Statistic &BranchTakenFreq = (I->succ_size() > 1) ? CondBranchTakenFreq - : UncondBranchTakenFreq; - for (MachineBasicBlock::succ_iterator SI = I->succ_begin(), - SE = I->succ_end(); - SI != SE; ++SI) { + for (MachineBasicBlock &MBB : F) { + BlockFrequency BlockFreq = MBFI->getBlockFreq(&MBB); + Statistic &NumBranches = + (MBB.succ_size() > 1) ? NumCondBranches : NumUncondBranches; + Statistic &BranchTakenFreq = + (MBB.succ_size() > 1) ? CondBranchTakenFreq : UncondBranchTakenFreq; + for (MachineBasicBlock *Succ : MBB.successors()) { // Skip if this successor is a fallthrough. - if (I->isLayoutSuccessor(*SI)) + if (MBB.isLayoutSuccessor(Succ)) continue; - BlockFrequency EdgeFreq = BlockFreq * MBPI->getEdgeProbability(I, *SI); + BlockFrequency EdgeFreq = + BlockFreq * MBPI->getEdgeProbability(&MBB, Succ); ++NumBranches; BranchTakenFreq += EdgeFreq.getFrequency(); } diff --git a/contrib/llvm/lib/CodeGen/MachineCSE.cpp b/contrib/llvm/lib/CodeGen/MachineCSE.cpp index 2960408..87aaaa0 100644 --- a/contrib/llvm/lib/CodeGen/MachineCSE.cpp +++ b/contrib/llvm/lib/CodeGen/MachineCSE.cpp @@ -24,6 +24,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/RecyclingAllocator.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; @@ -47,7 +48,7 @@ namespace { MachineRegisterInfo *MRI; public: static char ID; // Pass identification - MachineCSE() : MachineFunctionPass(ID), LookAheadLimit(5), CurrVN(0) { + MachineCSE() : MachineFunctionPass(ID), LookAheadLimit(0), CurrVN(0) { initializeMachineCSEPass(*PassRegistry::getPassRegistry()); } @@ -68,7 +69,7 @@ namespace { } private: - const unsigned LookAheadLimit; + unsigned LookAheadLimit; typedef RecyclingAllocator<BumpPtrAllocator, ScopedHashTableVal<MachineInstr*, unsigned> > AllocatorTy; typedef ScopedHashTable<MachineInstr*, unsigned, @@ -580,8 +581,15 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { // Actually perform the elimination. if (DoCSE) { for (unsigned i = 0, e = CSEPairs.size(); i != e; ++i) { - MRI->replaceRegWith(CSEPairs[i].first, CSEPairs[i].second); - MRI->clearKillFlags(CSEPairs[i].second); + unsigned OldReg = CSEPairs[i].first; + unsigned NewReg = CSEPairs[i].second; + // OldReg may have been unused but is used now, clear the Dead flag + MachineInstr *Def = MRI->getUniqueVRegDef(NewReg); + assert(Def != nullptr && "CSEd register has no unique definition?"); + Def->clearRegisterDeads(NewReg); + // Replace with NewReg and clear kill flags which may be wrong now. + MRI->replaceRegWith(OldReg, NewReg); + MRI->clearKillFlags(NewReg); } // Go through implicit defs of CSMI and MI, if a def is not dead at MI, @@ -708,5 +716,6 @@ bool MachineCSE::runOnMachineFunction(MachineFunction &MF) { MRI = &MF.getRegInfo(); AA = &getAnalysis<AliasAnalysis>(); DT = &getAnalysis<MachineDominatorTree>(); + LookAheadLimit = TII->getMachineCSELookAheadLimit(); return PerformCSE(DT->getRootNode()); } diff --git a/contrib/llvm/lib/CodeGen/MachineCombiner.cpp b/contrib/llvm/lib/CodeGen/MachineCombiner.cpp index 2931258..a4bc77e 100644 --- a/contrib/llvm/lib/CodeGen/MachineCombiner.cpp +++ b/contrib/llvm/lib/CodeGen/MachineCombiner.cpp @@ -45,7 +45,7 @@ class MachineCombiner : public MachineFunctionPass { TargetSchedModel TSchedModel; - /// OptSize - True if optimizing for code size. + /// True if optimizing for code size. bool OptSize; public: @@ -109,7 +109,7 @@ MachineInstr *MachineCombiner::getOperandDef(const MachineOperand &MO) { return DefInstr; } -/// getDepth - Computes depth of instructions in vector \InsInstr. +/// Computes depth of instructions in vector \InsInstr. /// /// \param InsInstrs is a vector of machine instructions /// \param InstrIdxForVirtReg is a dense map of virtual register to index @@ -125,14 +125,13 @@ MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs, SmallVector<unsigned, 16> InstrDepth; assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n"); - // Foreach instruction in in the new sequence compute the depth based on the + // For each instruction in the new sequence compute the depth based on the // operands. Use the trace information when possible. For new operands which // are tracked in the InstrIdxForVirtReg map depth is looked up in InstrDepth for (auto *InstrPtr : InsInstrs) { // for each Use unsigned IDepth = 0; DEBUG(dbgs() << "NEW INSTR "; InstrPtr->dump(); dbgs() << "\n";); - for (unsigned i = 0, e = InstrPtr->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = InstrPtr->getOperand(i); + for (const MachineOperand &MO : InstrPtr->operands()) { // Check for virtual register operand. if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))) continue; @@ -169,8 +168,7 @@ MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs, return InstrDepth[NewRootIdx]; } -/// getLatency - Computes instruction latency as max of latency of defined -/// operands +/// Computes instruction latency as max of latency of defined operands. /// /// \param Root is a machine instruction that could be replaced by NewRoot. /// It is used to compute a more accurate latency information for NewRoot in @@ -187,8 +185,7 @@ unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot, // Check each definition in NewRoot and compute the latency unsigned NewRootLatency = 0; - for (unsigned i = 0, e = NewRoot->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = NewRoot->getOperand(i); + for (const MachineOperand &MO : NewRoot->operands()) { // Check for virtual register operand. if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))) continue; @@ -211,12 +208,12 @@ unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot, return NewRootLatency; } -/// preservesCriticalPathlen - True when the new instruction sequence does not +/// True when the new instruction sequence does not /// lengthen the critical path. The DAGCombine code sequence ends in MI /// (Machine Instruction) Root. The new code sequence ends in MI NewRoot. A /// necessary condition for the new sequence to replace the old sequence is that -/// is cannot lengthen the critical path. This is decided by the formula -/// (NewRootDepth + NewRootLatency) <= (RootDepth + RootLatency + RootSlack)). +/// it cannot lengthen the critical path. This is decided by the formula +/// (NewRootDepth + NewRootLatency) <= (RootDepth + RootLatency + RootSlack)). /// The slack is the number of cycles Root can be delayed before the critical /// patch becomes longer. bool MachineCombiner::preservesCriticalPathLen( @@ -264,8 +261,7 @@ void MachineCombiner::instr2instrSC( InstrsSC.push_back(SC); } } -/// preservesResourceLen - True when the new instructions do not increase -/// resource length +/// True when the new instructions do not increase resource length bool MachineCombiner::preservesResourceLen( MachineBasicBlock *MBB, MachineTraceMetrics::Trace BlockTrace, SmallVectorImpl<MachineInstr *> &InsInstrs, @@ -300,7 +296,7 @@ bool MachineCombiner::preservesResourceLen( } /// \returns true when new instruction sequence should be generated -/// independent if it lenghtens critical path or not +/// independent if it lengthens critical path or not bool MachineCombiner::doSubstitute(unsigned NewSize, unsigned OldSize) { if (OptSize && (NewSize < OldSize)) return true; @@ -309,7 +305,7 @@ bool MachineCombiner::doSubstitute(unsigned NewSize, unsigned OldSize) { return false; } -/// combineInstructions - substitute a slow code sequence with a faster one by +/// Substitute a slow code sequence with a faster one by /// evaluating instruction combining pattern. /// The prototype of such a pattern is MUl + ADD -> MADD. Performs instruction /// combining based on machine trace metrics. Only combine a sequence of @@ -370,7 +366,7 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) { continue; // Substitute when we optimize for codesize and the new sequence has // fewer instructions OR - // the new sequence neither lenghten the critical path nor increases + // the new sequence neither lengthens the critical path nor increases // resource pressure. if (doSubstitute(InsInstrs.size(), DelInstrs.size()) || (preservesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs, @@ -406,8 +402,7 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) { } bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) { - const TargetSubtargetInfo &STI = - MF.getTarget().getSubtarget<TargetSubtargetInfo>(); + const TargetSubtargetInfo &STI = MF.getSubtarget(); TII = STI.getInstrInfo(); TRI = STI.getRegisterInfo(); SchedModel = STI.getSchedModel(); @@ -416,8 +411,7 @@ bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) { Traces = &getAnalysis<MachineTraceMetrics>(); MinInstr = 0; - OptSize = MF.getFunction()->getAttributes().hasAttribute( - AttributeSet::FunctionIndex, Attribute::OptimizeForSize); + OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize); DEBUG(dbgs() << getPassName() << ": " << MF.getName() << '\n'); if (!TII->useMachineCombiner()) { diff --git a/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp index 9611122..43c80b7 100644 --- a/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp +++ b/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp @@ -252,7 +252,11 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { report_fatal_error("MachineCopyPropagation should be run after" " register allocation!"); - if (MO.isDef()) { + // Treat undef use like defs. + // The backends are allowed to do whatever they want with undef value + // and we cannot be sure this register will not be rewritten to break + // some false dependencies for the hardware for instance. + if (MO.isDef() || MO.isUndef()) { Defs.push_back(Reg); continue; } diff --git a/contrib/llvm/lib/CodeGen/MachineDominators.cpp b/contrib/llvm/lib/CodeGen/MachineDominators.cpp index df60cf3..467a2e4 100644 --- a/contrib/llvm/lib/CodeGen/MachineDominators.cpp +++ b/contrib/llvm/lib/CodeGen/MachineDominators.cpp @@ -14,6 +14,7 @@ #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/SmallBitVector.h" using namespace llvm; @@ -59,3 +60,68 @@ void MachineDominatorTree::releaseMemory() { void MachineDominatorTree::print(raw_ostream &OS, const Module*) const { DT->print(OS); } + +void MachineDominatorTree::applySplitCriticalEdges() const { + // Bail out early if there is nothing to do. + if (CriticalEdgesToSplit.empty()) + return; + + // For each element in CriticalEdgesToSplit, remember whether or not element + // is the new immediate domminator of its successor. The mapping is done by + // index, i.e., the information for the ith element of CriticalEdgesToSplit is + // the ith element of IsNewIDom. + SmallBitVector IsNewIDom(CriticalEdgesToSplit.size(), true); + size_t Idx = 0; + + // Collect all the dominance properties info, before invalidating + // the underlying DT. + for (CriticalEdge &Edge : CriticalEdgesToSplit) { + // Update dominator information. + MachineBasicBlock *Succ = Edge.ToBB; + MachineDomTreeNode *SuccDTNode = DT->getNode(Succ); + + for (MachineBasicBlock *PredBB : Succ->predecessors()) { + if (PredBB == Edge.NewBB) + continue; + // If we are in this situation: + // FromBB1 FromBB2 + // + + + // + + + + + // + + + + + // ... Split1 Split2 ... + // + + + // + + + // + + // Succ + // Instead of checking the domiance property with Split2, we check it with + // FromBB2 since Split2 is still unknown of the underlying DT structure. + if (NewBBs.count(PredBB)) { + assert(PredBB->pred_size() == 1 && "A basic block resulting from a " + "critical edge split has more " + "than one predecessor!"); + PredBB = *PredBB->pred_begin(); + } + if (!DT->dominates(SuccDTNode, DT->getNode(PredBB))) { + IsNewIDom[Idx] = false; + break; + } + } + ++Idx; + } + + // Now, update DT with the collected dominance properties info. + Idx = 0; + for (CriticalEdge &Edge : CriticalEdgesToSplit) { + // We know FromBB dominates NewBB. + MachineDomTreeNode *NewDTNode = DT->addNewBlock(Edge.NewBB, Edge.FromBB); + + // If all the other predecessors of "Succ" are dominated by "Succ" itself + // then the new block is the new immediate dominator of "Succ". Otherwise, + // the new block doesn't dominate anything. + if (IsNewIDom[Idx]) + DT->changeImmediateDominator(DT->getNode(Edge.ToBB), NewDTNode); + ++Idx; + } + NewBBs.clear(); + CriticalEdgesToSplit.clear(); +} diff --git a/contrib/llvm/lib/CodeGen/MachineFunction.cpp b/contrib/llvm/lib/CodeGen/MachineFunction.cpp index 6b4cba6..8ec63f8 100644 --- a/contrib/llvm/lib/CodeGen/MachineFunction.cpp +++ b/contrib/llvm/lib/CodeGen/MachineFunction.cpp @@ -54,7 +54,7 @@ void ilist_traits<MachineBasicBlock>::deleteNode(MachineBasicBlock *MBB) { MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM, unsigned FunctionNum, MachineModuleInfo &mmi) - : Fn(F), Target(TM), STI(TM.getSubtargetImpl()), Ctx(mmi.getContext()), + : Fn(F), Target(TM), STI(TM.getSubtargetImpl(*F)), Ctx(mmi.getContext()), MMI(mmi) { if (STI->getRegisterInfo()) RegInfo = new (Allocator) MachineRegisterInfo(this); @@ -67,17 +67,14 @@ MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM, STI->getFrameLowering()->isStackRealignable(), !F->hasFnAttribute("no-realign-stack")); - if (Fn->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::StackAlignment)) - FrameInfo->ensureMaxAlignment(Fn->getAttributes(). - getStackAlignment(AttributeSet::FunctionIndex)); + if (Fn->hasFnAttribute(Attribute::StackAlignment)) + FrameInfo->ensureMaxAlignment(Fn->getFnStackAlignment()); ConstantPool = new (Allocator) MachineConstantPool(TM); Alignment = STI->getTargetLowering()->getMinFunctionAlignment(); // FIXME: Shouldn't use pref alignment if explicit alignment is set on Fn. - if (!Fn->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::OptimizeForSize)) + if (!Fn->hasFnAttribute(Attribute::OptimizeForSize)) Alignment = std::max(Alignment, STI->getTargetLowering()->getPrefFunctionAlignment()); @@ -383,7 +380,7 @@ namespace llvm { DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {} static std::string getGraphName(const MachineFunction *F) { - return "CFG for '" + F->getName().str() + "' function"; + return ("CFG for '" + F->getName() + "' function").str(); } std::string getNodeLabel(const MachineBasicBlock *Node, @@ -462,7 +459,7 @@ unsigned MachineFunction::addLiveIn(unsigned PReg, /// normal 'L' label is returned. MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx, bool isLinkerPrivate) const { - const DataLayout *DL = getSubtarget().getDataLayout(); + const DataLayout *DL = getTarget().getDataLayout(); assert(JumpTableInfo && "No jump tables"); assert(JTI < JumpTableInfo->getJumpTables().size() && "Invalid JTI!"); @@ -471,14 +468,14 @@ MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx, SmallString<60> Name; raw_svector_ostream(Name) << Prefix << "JTI" << getFunctionNumber() << '_' << JTI; - return Ctx.GetOrCreateSymbol(Name.str()); + return Ctx.getOrCreateSymbol(Name); } /// getPICBaseSymbol - Return a function-local symbol to represent the PIC /// base. MCSymbol *MachineFunction::getPICBaseSymbol() const { - const DataLayout *DL = getSubtarget().getDataLayout(); - return Ctx.GetOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix())+ + const DataLayout *DL = getTarget().getDataLayout(); + return Ctx.getOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix())+ Twine(getFunctionNumber())+"$pb"); } @@ -587,21 +584,12 @@ int MachineFrameInfo::CreateFixedSpillStackObject(uint64_t Size, return -++NumFixedObjects; } -int MachineFrameInfo::CreateFrameAllocation(uint64_t Size) { - // Force the use of a frame pointer. The intention is that this intrinsic be - // used in conjunction with unwind mechanisms that leak the frame pointer. - setFrameAddressIsTaken(true); - Size = RoundUpToAlignment(Size, StackAlignment); - return CreateStackObject(Size, StackAlignment, false); -} - BitVector MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const { assert(MBB && "MBB must be valid"); const MachineFunction *MF = MBB->getParent(); assert(MF && "MBB must be part of a MachineFunction"); - const TargetMachine &TM = MF->getTarget(); - const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo(); + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); BitVector BV(TRI->getNumRegs()); // Before CSI is calculated, no registers are considered pristine. They can be @@ -612,8 +600,8 @@ MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const { for (const MCPhysReg *CSR = TRI->getCalleeSavedRegs(MF); CSR && *CSR; ++CSR) BV.set(*CSR); - // The entry MBB always has all CSRs pristine. - if (MBB == &MF->front()) + // Each MBB before the save point has all CSRs pristine. + if (isBeforeSavePoint(*MF, *MBB)) return BV; // On other MBBs the saved CSRs are not pristine. @@ -625,6 +613,40 @@ MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const { return BV; } +// Note: We could use some sort of caching mecanism, but we lack the ability +// to know when the cache is invalid, i.e., the CFG changed. +// Assuming we have that, we can simply compute all the set of MBBs +// that are before the save point. +bool MachineFrameInfo::isBeforeSavePoint(const MachineFunction &MF, + const MachineBasicBlock &MBB) const { + // Early exit if shrink-wrapping did not kick. + if (!Save) + return &MBB == &MF.front(); + + // Starting from MBB, check if there is a path leading to Save that do + // not cross Restore. + SmallPtrSet<const MachineBasicBlock *, 8> Visited; + SmallVector<const MachineBasicBlock *, 8> WorkList; + WorkList.push_back(&MBB); + Visited.insert(&MBB); + do { + const MachineBasicBlock *CurBB = WorkList.pop_back_val(); + // By construction, the region that is after the save point is + // dominated by the Save and post-dominated by the Restore. + // If we do not reach Restore and still reach Save, this + // means MBB is before Save. + if (CurBB == Save) + return true; + if (CurBB == Restore) + continue; + // Enqueue all the successors not already visited. + for (MachineBasicBlock *SuccBB : CurBB->successors()) + if (Visited.insert(SuccBB).second) + WorkList.push_back(SuccBB); + } while (!WorkList.empty()); + return false; +} + unsigned MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const { const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); @@ -821,7 +843,7 @@ void MachineJumpTableInfo::dump() const { print(dbgs()); } void MachineConstantPoolValue::anchor() { } const DataLayout *MachineConstantPool::getDataLayout() const { - return TM.getSubtargetImpl()->getDataLayout(); + return TM.getDataLayout(); } Type *MachineConstantPoolEntry::getType() const { @@ -843,13 +865,13 @@ MachineConstantPoolEntry::getSectionKind(const DataLayout *DL) const { switch (getRelocationInfo()) { default: llvm_unreachable("Unknown section kind"); - case 2: + case Constant::GlobalRelocations: Kind = SectionKind::getReadOnlyWithRel(); break; - case 1: + case Constant::LocalRelocation: Kind = SectionKind::getReadOnlyWithRelLocal(); break; - case 0: + case Constant::NoRelocation: switch (DL->getTypeAllocSize(getType())) { case 4: Kind = SectionKind::getMergeableConst4(); @@ -861,7 +883,7 @@ MachineConstantPoolEntry::getSectionKind(const DataLayout *DL) const { Kind = SectionKind::getMergeableConst16(); break; default: - Kind = SectionKind::getMergeableConst(); + Kind = SectionKind::getReadOnly(); break; } } @@ -907,16 +929,16 @@ static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B, // DataLayout. if (isa<PointerType>(A->getType())) A = ConstantFoldInstOperands(Instruction::PtrToInt, IntTy, - const_cast<Constant*>(A), TD); + const_cast<Constant *>(A), *TD); else if (A->getType() != IntTy) A = ConstantFoldInstOperands(Instruction::BitCast, IntTy, - const_cast<Constant*>(A), TD); + const_cast<Constant *>(A), *TD); if (isa<PointerType>(B->getType())) B = ConstantFoldInstOperands(Instruction::PtrToInt, IntTy, - const_cast<Constant*>(B), TD); + const_cast<Constant *>(B), *TD); else if (B->getType() != IntTy) B = ConstantFoldInstOperands(Instruction::BitCast, IntTy, - const_cast<Constant*>(B), TD); + const_cast<Constant *>(B), *TD); return A == B; } diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp index 2f076b6..aaf06a7 100644 --- a/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp +++ b/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp @@ -16,7 +16,6 @@ #include "llvm/Analysis/DominanceFrontier.h" #include "llvm/Analysis/IVUsers.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" @@ -54,7 +53,7 @@ void MachineFunctionPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<DominanceFrontier>(); AU.addPreserved<DominatorTreeWrapperPass>(); AU.addPreserved<IVUsers>(); - AU.addPreserved<LoopInfo>(); + AU.addPreserved<LoopInfoWrapperPass>(); AU.addPreserved<MemoryDependenceAnalysis>(); AU.addPreserved<ScalarEvolution>(); AU.addPreserved<StackProtector>(); diff --git a/contrib/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm/lib/CodeGen/MachineInstr.cpp index 968ec2c..205032f 100644 --- a/contrib/llvm/lib/CodeGen/MachineInstr.cpp +++ b/contrib/llvm/lib/CodeGen/MachineInstr.cpp @@ -141,6 +141,28 @@ void MachineOperand::ChangeToFPImmediate(const ConstantFP *FPImm) { Contents.CFP = FPImm; } +void MachineOperand::ChangeToES(const char *SymName, unsigned char TargetFlags) { + assert((!isReg() || !isTied()) && + "Cannot change a tied operand into an external symbol"); + + removeRegFromUses(); + + OpKind = MO_ExternalSymbol; + Contents.OffsetedInfo.Val.SymbolName = SymName; + setOffset(0); // Offset is always 0. + setTargetFlags(TargetFlags); +} + +void MachineOperand::ChangeToMCSymbol(MCSymbol *Sym) { + assert((!isReg() || !isTied()) && + "Cannot change a tied operand into an MCSymbol"); + + removeRegFromUses(); + + OpKind = MO_MCSymbol; + Contents.Sym = Sym; +} + /// ChangeToRegister - Replace this operand with a new register operand of /// the specified value. If an operand is known to be an register already, /// the setReg method should be used. @@ -276,17 +298,8 @@ hash_code llvm::hash_value(const MachineOperand &MO) { /// print - Print the specified machine operand. /// -void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { - // If the instruction is embedded into a basic block, we can find the - // target info for the instruction. - if (!TM) - if (const MachineInstr *MI = getParent()) - if (const MachineBasicBlock *MBB = MI->getParent()) - if (const MachineFunction *MF = MBB->getParent()) - TM = &MF->getTarget(); - const TargetRegisterInfo *TRI = - TM ? TM->getSubtargetImpl()->getRegisterInfo() : nullptr; - +void MachineOperand::print(raw_ostream &OS, + const TargetRegisterInfo *TRI) const { switch (getType()) { case MachineOperand::MO_Register: OS << PrintReg(getReg(), TRI, getSubReg()); @@ -308,8 +321,8 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { if (isUndef() && getSubReg()) OS << ",read-undef"; } else if (isImplicit()) { - OS << "imp-use"; - NeedComma = true; + OS << "imp-use"; + NeedComma = true; } if (isKill()) { @@ -595,10 +608,10 @@ void MachineInstr::addImplicitDefUseOperands(MachineFunction &MF) { /// implicit operands. It reserves space for the number of operands specified by /// the MCInstrDesc. MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &tid, - const DebugLoc dl, bool NoImp) - : MCID(&tid), Parent(nullptr), Operands(nullptr), NumOperands(0), - Flags(0), AsmPrinterFlags(0), - NumMemRefs(0), MemRefs(nullptr), debugLoc(dl) { + DebugLoc dl, bool NoImp) + : MCID(&tid), Parent(nullptr), Operands(nullptr), NumOperands(0), Flags(0), + AsmPrinterFlags(0), NumMemRefs(0), MemRefs(nullptr), + debugLoc(std::move(dl)) { assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor"); // Reserve space for the expected number of operands. @@ -625,8 +638,8 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI) Operands = MF.allocateOperandArray(CapOperands); // Copy operands. - for (unsigned i = 0; i != MI.getNumOperands(); ++i) - addOperand(MF, MI.getOperand(i)); + for (const MachineOperand &MO : MI.operands()) + addOperand(MF, MO); // Copy all the sensible flags. setFlags(MI.Flags); @@ -645,18 +658,18 @@ MachineRegisterInfo *MachineInstr::getRegInfo() { /// this instruction from their respective use lists. This requires that the /// operands already be on their use lists. void MachineInstr::RemoveRegOperandsFromUseLists(MachineRegisterInfo &MRI) { - for (unsigned i = 0, e = getNumOperands(); i != e; ++i) - if (Operands[i].isReg()) - MRI.removeRegOperandFromUseList(&Operands[i]); + for (MachineOperand &MO : operands()) + if (MO.isReg()) + MRI.removeRegOperandFromUseList(&MO); } /// AddRegOperandsToUseLists - Add all of the register operands in /// this instruction from their respective use lists. This requires that the /// operands not be on their use lists yet. void MachineInstr::AddRegOperandsToUseLists(MachineRegisterInfo &MRI) { - for (unsigned i = 0, e = getNumOperands(); i != e; ++i) - if (Operands[i].isReg()) - MRI.addRegOperandToUseList(&Operands[i]); + for (MachineOperand &MO : operands()) + if (MO.isReg()) + MRI.addRegOperandToUseList(&MO); } void MachineInstr::addOperand(const MachineOperand &Op) { @@ -674,14 +687,8 @@ static void moveOperands(MachineOperand *Dst, MachineOperand *Src, if (MRI) return MRI->moveOperands(Dst, Src, NumOps); - // Here it would be convenient to call memmove, so that isn't allowed because - // MachineOperand has a constructor and so isn't a POD type. - if (Dst < Src) - for (unsigned i = 0; i != NumOps; ++i) - new (Dst + i) MachineOperand(Src[i]); - else - for (unsigned i = NumOps; i ; --i) - new (Dst + i - 1) MachineOperand(Src[i - 1]); + // MachineOperand is a trivially copyable type so we can just use memmove. + std::memmove(Dst, Src, NumOps * sizeof(MachineOperand)); } /// addOperand - Add the specified operand to the instruction. If it is an @@ -896,8 +903,8 @@ bool MachineInstr::isIdenticalTo(const MachineInstr *Other, } // If DebugLoc does not match then two dbg.values are not identical. if (isDebugValue()) - if (!getDebugLoc().isUnknown() && !Other->getDebugLoc().isUnknown() - && getDebugLoc() != Other->getDebugLoc()) + if (getDebugLoc() && Other->getDebugLoc() && + getDebugLoc() != Other->getDebugLoc()) return false; return true; } @@ -926,8 +933,7 @@ void MachineInstr::eraseFromParentAndMarkDBGValuesForRemoval() { MachineInstr *MI = (MachineInstr *)this; MachineRegisterInfo &MRI = MF->getRegInfo(); - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); + for (const MachineOperand &MO : MI->operands()) { if (!MO.isReg() || !MO.isDef()) continue; unsigned Reg = MO.getReg(); @@ -1330,8 +1336,7 @@ unsigned MachineInstr::findTiedOperandIdx(unsigned OpIdx) const { /// clearKillInfo - Clears kill flags on all operands. /// void MachineInstr::clearKillInfo() { - for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { - MachineOperand &MO = getOperand(i); + for (MachineOperand &MO : operands()) { if (MO.isReg() && MO.isUse()) MO.setIsKill(false); } @@ -1344,15 +1349,13 @@ void MachineInstr::substituteRegister(unsigned FromReg, if (TargetRegisterInfo::isPhysicalRegister(ToReg)) { if (SubIdx) ToReg = RegInfo.getSubReg(ToReg, SubIdx); - for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { - MachineOperand &MO = getOperand(i); + for (MachineOperand &MO : operands()) { if (!MO.isReg() || MO.getReg() != FromReg) continue; MO.substPhysReg(ToReg, RegInfo); } } else { - for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { - MachineOperand &MO = getOperand(i); + for (MachineOperand &MO : operands()) { if (!MO.isReg() || MO.getReg() != FromReg) continue; MO.substVirtReg(ToReg, SubIdx, RegInfo); @@ -1363,9 +1366,7 @@ void MachineInstr::substituteRegister(unsigned FromReg, /// isSafeToMove - Return true if it is safe to move this instruction. If /// SawStore is set to true, it means that there is a store (or call) between /// the instruction's location and its intended destination. -bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII, - AliasAnalysis *AA, - bool &SawStore) const { +bool MachineInstr::isSafeToMove(AliasAnalysis *AA, bool &SawStore) const { // Ignore stuff that we obviously can't move. // // Treat volatile loads as stores. This is not strictly necessary for @@ -1495,8 +1496,7 @@ bool MachineInstr::hasUnmodeledSideEffects() const { /// allDefsAreDead - Return true if all the defs of this instruction are dead. /// bool MachineInstr::allDefsAreDead() const { - for (unsigned i = 0, e = getNumOperands(); i < e; ++i) { - const MachineOperand &MO = getOperand(i); + for (const MachineOperand &MO : operands()) { if (!MO.isReg() || MO.isUse()) continue; if (!MO.isDead()) @@ -1523,23 +1523,19 @@ void MachineInstr::dump() const { #endif } -static void printDebugLoc(DebugLoc DL, const MachineFunction *MF, - raw_ostream &CommentOS) { - const LLVMContext &Ctx = MF->getFunction()->getContext(); - DL.print(Ctx, CommentOS); -} - -void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM, - bool SkipOpers) const { - // We can be a bit tidier if we know the TargetMachine and/or MachineFunction. +void MachineInstr::print(raw_ostream &OS, bool SkipOpers) const { + // We can be a bit tidier if we know the MachineFunction. const MachineFunction *MF = nullptr; + const TargetRegisterInfo *TRI = nullptr; const MachineRegisterInfo *MRI = nullptr; + const TargetInstrInfo *TII = nullptr; if (const MachineBasicBlock *MBB = getParent()) { MF = MBB->getParent(); - if (!TM && MF) - TM = &MF->getTarget(); - if (MF) + if (MF) { MRI = &MF->getRegInfo(); + TRI = MF->getSubtarget().getRegisterInfo(); + TII = MF->getSubtarget().getInstrInfo(); + } } // Save a list of virtual registers. @@ -1552,7 +1548,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM, !getOperand(StartOp).isImplicit(); ++StartOp) { if (StartOp != 0) OS << ", "; - getOperand(StartOp).print(OS, TM); + getOperand(StartOp).print(OS, TRI); unsigned Reg = getOperand(StartOp).getReg(); if (TargetRegisterInfo::isVirtualRegister(Reg)) VirtRegs.push_back(Reg); @@ -1562,8 +1558,8 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM, OS << " = "; // Print the opcode name. - if (TM && TM->getSubtargetImpl()->getInstrInfo()) - OS << TM->getSubtargetImpl()->getInstrInfo()->getName(getOpcode()); + if (TII) + OS << TII->getName(getOpcode()); else OS << "UNKNOWN"; @@ -1579,7 +1575,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM, if (isInlineAsm() && e >= InlineAsm::MIOp_FirstOperand) { // Print asm string. OS << " "; - getOperand(InlineAsm::MIOp_AsmString).print(OS, TM); + getOperand(InlineAsm::MIOp_AsmString).print(OS, TRI); // Print HasSideEffects, MayLoad, MayStore, IsAlignStack unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm(); @@ -1617,9 +1613,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM, if (TargetRegisterInfo::isPhysicalRegister(Reg)) { if (MRI->use_empty(Reg)) { bool HasAliasLive = false; - for (MCRegAliasIterator AI( - Reg, TM->getSubtargetImpl()->getRegisterInfo(), true); - AI.isValid(); ++AI) { + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { unsigned AliasReg = *AI; if (!MRI->use_empty(AliasReg)) { HasAliasLive = true; @@ -1645,17 +1639,13 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM, } if (isDebugValue() && MO.isMetadata()) { // Pretty print DBG_VALUE instructions. - const MDNode *MD = MO.getMetadata(); - DIDescriptor DI(MD); - DIVariable DIV(MD); - - if (DI.isVariable() && !DIV.getName().empty()) - OS << "!\"" << DIV.getName() << '\"'; + auto *DIV = dyn_cast<DILocalVariable>(MO.getMetadata()); + if (DIV && !DIV->getName().empty()) + OS << "!\"" << DIV->getName() << '\"'; else - MO.print(OS, TM); - } else if (TM && (isInsertSubreg() || isRegSequence()) && MO.isImm()) { - OS << TM->getSubtargetImpl()->getRegisterInfo()->getSubRegIndexName( - MO.getImm()); + MO.print(OS, TRI); + } else if (TRI && (isInsertSubreg() || isRegSequence()) && MO.isImm()) { + OS << TRI->getSubRegIndexName(MO.getImm()); } else if (i == AsmDescOp && MO.isImm()) { // Pretty print the inline asm operand descriptor. OS << '$' << AsmOpCount++; @@ -1672,11 +1662,8 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM, unsigned RCID = 0; if (InlineAsm::hasRegClassConstraint(Flag, RCID)) { - if (TM) { - const TargetRegisterInfo *TRI = - TM->getSubtargetImpl()->getRegisterInfo(); - OS << ':' - << TRI->getRegClassName(TRI->getRegClass(RCID)); + if (TRI) { + OS << ':' << TRI->getRegClassName(TRI->getRegClass(RCID)); } else OS << ":RC" << RCID; } @@ -1690,7 +1677,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM, // Compute the index of the next operand descriptor. AsmDescOp += 1 + InlineAsm::getNumOperandRegisters(Flag); } else - MO.print(OS, TM); + MO.print(OS, TRI); } // Briefly indicate whether any call clobbers were omitted. @@ -1726,7 +1713,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM, if (!HaveSemi) OS << ";"; HaveSemi = true; for (unsigned i = 0; i != VirtRegs.size(); ++i) { const TargetRegisterClass *RC = MRI->getRegClass(VirtRegs[i]); - OS << " " << MRI->getTargetRegisterInfo()->getRegClassName(RC) + OS << " " << TRI->getRegClassName(RC) << ':' << PrintReg(VirtRegs[i]); for (unsigned j = i+1; j != VirtRegs.size();) { if (MRI->getRegClass(VirtRegs[j]) != RC) { @@ -1741,24 +1728,24 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM, } // Print debug location information. - if (isDebugValue() && getOperand(e - 1).isMetadata()) { + if (isDebugValue() && getOperand(e - 2).isMetadata()) { if (!HaveSemi) OS << ";"; - DIVariable DV(getOperand(e - 1).getMetadata()); - OS << " line no:" << DV.getLineNumber(); - if (MDNode *InlinedAt = DV.getInlinedAt()) { - DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(InlinedAt); - if (!InlinedAtDL.isUnknown() && MF) { + auto *DV = cast<DILocalVariable>(getOperand(e - 2).getMetadata()); + OS << " line no:" << DV->getLine(); + if (auto *InlinedAt = debugLoc->getInlinedAt()) { + DebugLoc InlinedAtDL(InlinedAt); + if (InlinedAtDL && MF) { OS << " inlined @[ "; - printDebugLoc(InlinedAtDL, MF, OS); + InlinedAtDL.print(OS); OS << " ]"; } } if (isIndirectDebugValue()) OS << " indirect"; - } else if (!debugLoc.isUnknown() && MF) { + } else if (debugLoc && MF) { if (!HaveSemi) OS << ";"; OS << " dbg:"; - printDebugLoc(debugLoc, MF, OS); + debugLoc.print(OS); } OS << '\n'; @@ -1827,8 +1814,7 @@ void MachineInstr::clearRegisterKills(unsigned Reg, const TargetRegisterInfo *RegInfo) { if (!TargetRegisterInfo::isPhysicalRegister(Reg)) RegInfo = nullptr; - for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { - MachineOperand &MO = getOperand(i); + for (MachineOperand &MO : operands()) { if (!MO.isReg() || !MO.isUse() || !MO.isKill()) continue; unsigned OpReg = MO.getReg(); @@ -1889,6 +1875,22 @@ bool MachineInstr::addRegisterDead(unsigned Reg, return true; } +void MachineInstr::clearRegisterDeads(unsigned Reg) { + for (MachineOperand &MO : operands()) { + if (!MO.isReg() || !MO.isDef() || MO.getReg() != Reg) + continue; + MO.setIsDead(false); + } +} + +void MachineInstr::addRegisterDefReadUndef(unsigned Reg) { + for (MachineOperand &MO : operands()) { + if (!MO.isReg() || !MO.isDef() || MO.getReg() != Reg || MO.getSubReg() == 0) + continue; + MO.setIsUndef(); + } +} + void MachineInstr::addRegisterDefined(unsigned Reg, const TargetRegisterInfo *RegInfo) { if (TargetRegisterInfo::isPhysicalRegister(Reg)) { @@ -1896,8 +1898,7 @@ void MachineInstr::addRegisterDefined(unsigned Reg, if (MO) return; } else { - for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { - const MachineOperand &MO = getOperand(i); + for (const MachineOperand &MO : operands()) { if (MO.isReg() && MO.getReg() == Reg && MO.isDef() && MO.getSubReg() == 0) return; @@ -1911,8 +1912,7 @@ void MachineInstr::addRegisterDefined(unsigned Reg, void MachineInstr::setPhysRegsDeadExcept(ArrayRef<unsigned> UsedRegs, const TargetRegisterInfo &TRI) { bool HasRegMask = false; - for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { - MachineOperand &MO = getOperand(i); + for (MachineOperand &MO : operands()) { if (MO.isRegMask()) { HasRegMask = true; continue; @@ -1920,15 +1920,10 @@ void MachineInstr::setPhysRegsDeadExcept(ArrayRef<unsigned> UsedRegs, if (!MO.isReg() || !MO.isDef()) continue; unsigned Reg = MO.getReg(); if (!TargetRegisterInfo::isPhysicalRegister(Reg)) continue; - bool Dead = true; - for (ArrayRef<unsigned>::iterator I = UsedRegs.begin(), E = UsedRegs.end(); - I != E; ++I) - if (TRI.regsOverlap(*I, Reg)) { - Dead = false; - break; - } // If there are no uses, including partial uses, the def is dead. - if (Dead) MO.setIsDead(); + if (std::none_of(UsedRegs.begin(), UsedRegs.end(), + [&](unsigned Use) { return TRI.regsOverlap(Use, Reg); })) + MO.setIsDead(); } // This is a call with a register mask operand. @@ -1945,8 +1940,7 @@ MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) { SmallVector<size_t, 8> HashComponents; HashComponents.reserve(MI->getNumOperands() + 1); HashComponents.push_back(MI->getOpcode()); - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); + for (const MachineOperand &MO : MI->operands()) { if (MO.isReg() && MO.isDef() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) continue; // Skip virtual register defs. diff --git a/contrib/llvm/lib/CodeGen/MachineLICM.cpp b/contrib/llvm/lib/CodeGen/MachineLICM.cpp index cb14a5c..3967a2f 100644 --- a/contrib/llvm/lib/CodeGen/MachineLICM.cpp +++ b/contrib/llvm/lib/CodeGen/MachineLICM.cpp @@ -10,10 +10,6 @@ // This pass performs loop invariant code motion on machine instructions. We // attempt to remove as much code from the body of a loop as possible. // -// This pass does not attempt to throttle itself to limit register pressure. -// The register allocation phases are expected to perform rematerialization -// to recover when register pressure is high. -// // This pass is not intended to be a replacement or a complete alternative // for the LLVM-IR-level LICM pass. It is only designed to hoist simple // constructs that are not exposed before lowering and instruction selection. @@ -54,6 +50,12 @@ HoistCheapInsts("hoist-cheap-insts", cl::desc("MachineLICM should hoist even cheap instructions"), cl::init(false), cl::Hidden); +static cl::opt<bool> +SinkInstsToAvoidSpills("sink-insts-to-avoid-spills", + cl::desc("MachineLICM should sink instructions into " + "loops to avoid register spills"), + cl::init(false), cl::Hidden); + STATISTIC(NumHoisted, "Number of machine instructions hoisted out of loops"); STATISTIC(NumLowRP, @@ -98,7 +100,7 @@ namespace { SmallSet<unsigned, 32> RegSeen; SmallVector<unsigned, 8> RegPressure; - // Register pressure "limit" per register class. If the pressure + // Register pressure "limit" per register pressure set. If the pressure // is higher than the limit, then it's considered high. SmallVector<unsigned, 8> RegLimit; @@ -208,7 +210,8 @@ namespace { /// CanCauseHighRegPressure - Visit BBs from header to current BB, /// check if hoisting an instruction of the given cost matrix can cause high /// register pressure. - bool CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost, bool Cheap); + bool CanCauseHighRegPressure(const DenseMap<unsigned, int> &Cost, + bool Cheap); /// UpdateBackTraceRegPressure - Traverse the back trace from header to /// the current block and update their register pressures to reflect the @@ -243,21 +246,30 @@ namespace { void HoistOutOfLoop(MachineDomTreeNode *LoopHeaderNode); void HoistRegion(MachineDomTreeNode *N, bool IsHeader); - /// getRegisterClassIDAndCost - For a given MI, register, and the operand - /// index, return the ID and cost of its representative register class by - /// reference. - void getRegisterClassIDAndCost(const MachineInstr *MI, - unsigned Reg, unsigned OpIdx, - unsigned &RCId, unsigned &RCCost) const; + /// SinkIntoLoop - Sink instructions into loops if profitable. This + /// especially tries to prevent register spills caused by register pressure + /// if there is little to no overhead moving instructions into loops. + void SinkIntoLoop(); /// InitRegPressure - Find all virtual register references that are liveout /// of the preheader to initialize the starting "register pressure". Note /// this does not count live through (livein but not used) registers. void InitRegPressure(MachineBasicBlock *BB); + /// calcRegisterCost - Calculate the additional register pressure that the + /// registers used in MI cause. + /// + /// If 'ConsiderSeen' is true, updates 'RegSeen' and uses the information to + /// figure out which usages are live-ins. + /// FIXME: Figure out a way to consider 'RegSeen' from all code paths. + DenseMap<unsigned, int> calcRegisterCost(const MachineInstr *MI, + bool ConsiderSeen, + bool ConsiderUnseenAsDef); + /// UpdateRegPressure - Update estimate of register pressure after the /// specified instruction. - void UpdateRegPressure(const MachineInstr *MI); + void UpdateRegPressure(const MachineInstr *MI, + bool ConsiderUnseenAsDef = false); /// ExtractHoistableLoad - Unfold a load from the given machineinstr if /// the load itself could be hoisted. Return the unfolded and hoistable @@ -343,13 +355,12 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { if (PreRegAlloc) { // Estimate register pressure during pre-regalloc pass. - unsigned NumRC = TRI->getNumRegClasses(); - RegPressure.resize(NumRC); + unsigned NumRPS = TRI->getNumRegPressureSets(); + RegPressure.resize(NumRPS); std::fill(RegPressure.begin(), RegPressure.end(), 0); - RegLimit.resize(NumRC); - for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), - E = TRI->regclass_end(); I != E; ++I) - RegLimit[(*I)->getID()] = TRI->getRegPressureLimit(*I, MF); + RegLimit.resize(NumRPS); + for (unsigned i = 0, e = NumRPS; i != e; ++i) + RegLimit[i] = TRI->getRegPressureSetLimit(MF, i); } // Get our Loop information... @@ -381,6 +392,9 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { FirstInLoop = true; HoistOutOfLoop(N); CSEMap.clear(); + + if (SinkInstsToAvoidSpills) + SinkIntoLoop(); } } @@ -693,6 +707,10 @@ void MachineLICM::ExitScopeIfDone(MachineDomTreeNode *Node, /// one pass without iteration. /// void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) { + MachineBasicBlock *Preheader = getCurPreheader(); + if (!Preheader) + return; + SmallVector<MachineDomTreeNode*, 32> Scopes; SmallVector<MachineDomTreeNode*, 8> WorkList; DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap; @@ -700,7 +718,7 @@ void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) { // Perform a DFS walk to determine the order of visit. WorkList.push_back(HeaderN); - do { + while (!WorkList.empty()) { MachineDomTreeNode *Node = WorkList.pop_back_val(); assert(Node && "Null dominator tree node?"); MachineBasicBlock *BB = Node->getBlock(); @@ -734,28 +752,21 @@ void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) { ParentMap[Child] = Node; WorkList.push_back(Child); } - } while (!WorkList.empty()); + } - if (Scopes.size() != 0) { - MachineBasicBlock *Preheader = getCurPreheader(); - if (!Preheader) - return; + if (Scopes.size() == 0) + return; - // Compute registers which are livein into the loop headers. - RegSeen.clear(); - BackTrace.clear(); - InitRegPressure(Preheader); - } + // Compute registers which are livein into the loop headers. + RegSeen.clear(); + BackTrace.clear(); + InitRegPressure(Preheader); // Now perform LICM. for (unsigned i = 0, e = Scopes.size(); i != e; ++i) { MachineDomTreeNode *Node = Scopes[i]; MachineBasicBlock *MBB = Node->getBlock(); - MachineBasicBlock *Preheader = getCurPreheader(); - if (!Preheader) - continue; - EnterScope(MBB); // Process the block @@ -774,27 +785,57 @@ void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) { } } -static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) { - return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg()); -} +void MachineLICM::SinkIntoLoop() { + MachineBasicBlock *Preheader = getCurPreheader(); + if (!Preheader) + return; + + SmallVector<MachineInstr *, 8> Candidates; + for (MachineBasicBlock::instr_iterator I = Preheader->instr_begin(); + I != Preheader->instr_end(); ++I) { + // We need to ensure that we can safely move this instruction into the loop. + // As such, it must not have side-effects, e.g. such as a call has. + if (IsLoopInvariantInst(*I) && !HasLoopPHIUse(I)) + Candidates.push_back(I); + } -/// getRegisterClassIDAndCost - For a given MI, register, and the operand -/// index, return the ID and cost of its representative register class. -void -MachineLICM::getRegisterClassIDAndCost(const MachineInstr *MI, - unsigned Reg, unsigned OpIdx, - unsigned &RCId, unsigned &RCCost) const { - const TargetRegisterClass *RC = MRI->getRegClass(Reg); - MVT VT = *RC->vt_begin(); - if (VT == MVT::Untyped) { - RCId = RC->getID(); - RCCost = 1; - } else { - RCId = TLI->getRepRegClassFor(VT)->getID(); - RCCost = TLI->getRepRegClassCostFor(VT); + for (MachineInstr *I : Candidates) { + const MachineOperand &MO = I->getOperand(0); + if (!MO.isDef() || !MO.isReg() || !MO.getReg()) + continue; + if (!MRI->hasOneDef(MO.getReg())) + continue; + bool CanSink = true; + MachineBasicBlock *B = nullptr; + for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) { + // FIXME: Come up with a proper cost model that estimates whether sinking + // the instruction (and thus possibly executing it on every loop + // iteration) is more expensive than a register. + // For now assumes that copies are cheap and thus almost always worth it. + if (!MI.isCopy()) { + CanSink = false; + break; + } + if (!B) { + B = MI.getParent(); + continue; + } + B = DT->findNearestCommonDominator(B, MI.getParent()); + if (!B) { + CanSink = false; + break; + } + } + if (!CanSink || !B || B == Preheader) + continue; + B->splice(B->getFirstNonPHI(), Preheader, I); } } +static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) { + return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg()); +} + /// InitRegPressure - Find all virtual register references that are liveout of /// the preheader to initialize the starting "register pressure". Note this /// does not count live through (livein but not used) registers. @@ -812,41 +853,30 @@ void MachineLICM::InitRegPressure(MachineBasicBlock *BB) { InitRegPressure(*BB->pred_begin()); } - for (MachineBasicBlock::iterator MII = BB->begin(), E = BB->end(); - MII != E; ++MII) { - MachineInstr *MI = &*MII; - for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || MO.isImplicit()) - continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) - continue; - - bool isNew = RegSeen.insert(Reg).second; - unsigned RCId, RCCost; - getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost); - if (MO.isDef()) - RegPressure[RCId] += RCCost; - else { - bool isKill = isOperandKill(MO, MRI); - if (isNew && !isKill) - // Haven't seen this, it must be a livein. - RegPressure[RCId] += RCCost; - else if (!isNew && isKill) - RegPressure[RCId] -= RCCost; - } - } - } + for (const MachineInstr &MI : *BB) + UpdateRegPressure(&MI, /*ConsiderUnseenAsDef=*/true); } /// UpdateRegPressure - Update estimate of register pressure after the /// specified instruction. -void MachineLICM::UpdateRegPressure(const MachineInstr *MI) { - if (MI->isImplicitDef()) - return; +void MachineLICM::UpdateRegPressure(const MachineInstr *MI, + bool ConsiderUnseenAsDef) { + auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/true, ConsiderUnseenAsDef); + for (const auto &RPIdAndCost : Cost) { + unsigned Class = RPIdAndCost.first; + if (static_cast<int>(RegPressure[Class]) < -RPIdAndCost.second) + RegPressure[Class] = 0; + else + RegPressure[Class] += RPIdAndCost.second; + } +} - SmallVector<unsigned, 4> Defs; +DenseMap<unsigned, int> +MachineLICM::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen, + bool ConsiderUnseenAsDef) { + DenseMap<unsigned, int> Cost; + if (MI->isImplicitDef()) + return Cost; for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || MO.isImplicit()) @@ -855,27 +885,33 @@ void MachineLICM::UpdateRegPressure(const MachineInstr *MI) { if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; - bool isNew = RegSeen.insert(Reg).second; + // FIXME: It seems bad to use RegSeen only for some of these calculations. + bool isNew = ConsiderSeen ? RegSeen.insert(Reg).second : false; + const TargetRegisterClass *RC = MRI->getRegClass(Reg); + + RegClassWeight W = TRI->getRegClassWeight(RC); + int RCCost = 0; if (MO.isDef()) - Defs.push_back(Reg); - else if (!isNew && isOperandKill(MO, MRI)) { - unsigned RCId, RCCost; - getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost); - if (RCCost > RegPressure[RCId]) - RegPressure[RCId] = 0; + RCCost = W.RegWeight; + else { + bool isKill = isOperandKill(MO, MRI); + if (isNew && !isKill && ConsiderUnseenAsDef) + // Haven't seen this, it must be a livein. + RCCost = W.RegWeight; + else if (!isNew && isKill) + RCCost = -W.RegWeight; + } + if (RCCost == 0) + continue; + const int *PS = TRI->getRegClassPressureSets(RC); + for (; *PS != -1; ++PS) { + if (Cost.find(*PS) == Cost.end()) + Cost[*PS] = RCCost; else - RegPressure[RCId] -= RCCost; + Cost[*PS] += RCCost; } } - - unsigned Idx = 0; - while (!Defs.empty()) { - unsigned Reg = Defs.pop_back_val(); - unsigned RCId, RCCost; - getRegisterClassIDAndCost(MI, Reg, Idx, RCId, RCCost); - RegPressure[RCId] += RCCost; - ++Idx; - } + return Cost; } /// isLoadFromGOTOrConstantPool - Return true if this machine instruction @@ -898,7 +934,7 @@ static bool isLoadFromGOTOrConstantPool(MachineInstr &MI) { bool MachineLICM::IsLICMCandidate(MachineInstr &I) { // Check if it's safe to move the instruction. bool DontMoveAcrossStore = true; - if (!I.isSafeToMove(TII, AA, DontMoveAcrossStore)) + if (!I.isSafeToMove(AA, DontMoveAcrossStore)) return false; // If it is load then check if it is guaranteed to execute by making sure that @@ -1067,27 +1103,23 @@ bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const { /// CanCauseHighRegPressure - Visit BBs from header to current BB, check /// if hoisting an instruction of the given cost matrix can cause high /// register pressure. -bool MachineLICM::CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost, +bool MachineLICM::CanCauseHighRegPressure(const DenseMap<unsigned, int>& Cost, bool CheapInstr) { - for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end(); - CI != CE; ++CI) { - if (CI->second <= 0) + for (const auto &RPIdAndCost : Cost) { + if (RPIdAndCost.second <= 0) continue; - unsigned RCId = CI->first; - unsigned Limit = RegLimit[RCId]; - int Cost = CI->second; + unsigned Class = RPIdAndCost.first; + int Limit = RegLimit[Class]; // Don't hoist cheap instructions if they would increase register pressure, // even if we're under the limit. if (CheapInstr && !HoistCheapInsts) return true; - for (unsigned i = BackTrace.size(); i != 0; --i) { - SmallVectorImpl<unsigned> &RP = BackTrace[i-1]; - if (RP[RCId] + Cost >= Limit) + for (const auto &RP : BackTrace) + if (static_cast<int>(RP[Class]) + RPIdAndCost.second >= Limit) return true; - } } return false; @@ -1097,46 +1129,15 @@ bool MachineLICM::CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost, /// current block and update their register pressures to reflect the effect /// of hoisting MI from the current block to the preheader. void MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) { - if (MI->isImplicitDef()) - return; - // First compute the 'cost' of the instruction, i.e. its contribution // to register pressure. - DenseMap<unsigned, int> Cost; - for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || MO.isImplicit()) - continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) - continue; - - unsigned RCId, RCCost; - getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost); - if (MO.isDef()) { - DenseMap<unsigned, int>::iterator CI = Cost.find(RCId); - if (CI != Cost.end()) - CI->second += RCCost; - else - Cost.insert(std::make_pair(RCId, RCCost)); - } else if (isOperandKill(MO, MRI)) { - DenseMap<unsigned, int>::iterator CI = Cost.find(RCId); - if (CI != Cost.end()) - CI->second -= RCCost; - else - Cost.insert(std::make_pair(RCId, -RCCost)); - } - } + auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/false, + /*ConsiderUnseenAsDef=*/false); // Update register pressure of blocks from loop header to current block. - for (unsigned i = 0, e = BackTrace.size(); i != e; ++i) { - SmallVectorImpl<unsigned> &RP = BackTrace[i]; - for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end(); - CI != CE; ++CI) { - unsigned RCId = CI->first; - RP[RCId] += CI->second; - } - } + for (auto &RP : BackTrace) + for (const auto &RPIdAndCost : Cost) + RP[RPIdAndCost.first] += RPIdAndCost.second; } /// IsProfitableToHoist - Return true if it is potentially profitable to hoist @@ -1171,15 +1172,8 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { if (TII->isTriviallyReMaterializable(&MI, AA)) return true; - // Estimate register pressure to determine whether to LICM the instruction. - // In low register pressure situation, we can be more aggressive about - // hoisting. Also, favors hoisting long latency instructions even in - // moderately high pressure situation. - // Cheap instructions will only be hoisted if they don't increase register - // pressure at all. // FIXME: If there are long latency loop-invariant instructions inside the // loop at this point, why didn't the optimizer's LICM hoist them? - DenseMap<unsigned, int> Cost; for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI.getOperand(i); if (!MO.isReg() || MO.isImplicit()) @@ -1187,24 +1181,22 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { unsigned Reg = MO.getReg(); if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; - - unsigned RCId, RCCost; - getRegisterClassIDAndCost(&MI, Reg, i, RCId, RCCost); - if (MO.isDef()) { - if (HasHighOperandLatency(MI, i, Reg)) { - DEBUG(dbgs() << "Hoist High Latency: " << MI); - ++NumHighLatency; - return true; - } - Cost[RCId] += RCCost; - } else if (isOperandKill(MO, MRI)) { - // Is a virtual register use is a kill, hoisting it out of the loop - // may actually reduce register pressure or be register pressure - // neutral. - Cost[RCId] -= RCCost; + if (MO.isDef() && HasHighOperandLatency(MI, i, Reg)) { + DEBUG(dbgs() << "Hoist High Latency: " << MI); + ++NumHighLatency; + return true; } } + // Estimate register pressure to determine whether to LICM the instruction. + // In low register pressure situation, we can be more aggressive about + // hoisting. Also, favors hoisting long latency instructions even in + // moderately high pressure situation. + // Cheap instructions will only be hoisted if they don't increase register + // pressure at all. + auto Cost = calcRegisterCost(&MI, /*ConsiderSeen=*/false, + /*ConsiderUnseenAsDef=*/false); + // Visit BBs from header to current BB, if hoisting this doesn't cause // high register pressure, then it's safe to proceed. if (!CanCauseHighRegPressure(Cost, CheapInstr)) { diff --git a/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp b/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp index 89054d4..ce6abdd 100644 --- a/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp +++ b/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp @@ -19,6 +19,7 @@ #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/Passes.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; // Explicitly instantiate methods in LoopInfoImpl.h for MI-level Loops. diff --git a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp index baad411..d9da7bc 100644 --- a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp +++ b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp @@ -9,10 +9,12 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/ADT/PointerUnion.h" +#include "llvm/Analysis/LibCallSemantics.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalVariable.h" @@ -116,7 +118,7 @@ MCSymbol *MMIAddrLabelMap::getAddrLabelSymbol(BasicBlock *BB) { BBCallbacks.back().setMap(this); Entry.Index = BBCallbacks.size()-1; Entry.Fn = BB->getParent(); - MCSymbol *Result = Context.CreateTempSymbol(); + MCSymbol *Result = Context.createTempSymbol(); Entry.Symbols = Result; return Result; } @@ -276,6 +278,7 @@ bool MachineModuleInfo::doInitialization(Module &M) { DbgInfoAvailable = UsesVAFloatArgument = UsesMorestackAddr = false; // Always emit some info, by default "no personality" info. Personalities.push_back(nullptr); + PersonalityTypeCache = EHPersonality::Unknown; AddrLabelSymbols = nullptr; TheModule = nullptr; @@ -398,7 +401,7 @@ void MachineModuleInfo::addInvoke(MachineBasicBlock *LandingPad, /// addLandingPad - Provide the label of a try LandingPad block. /// MCSymbol *MachineModuleInfo::addLandingPad(MachineBasicBlock *LandingPad) { - MCSymbol *LandingPadLabel = Context.CreateTempSymbol(); + MCSymbol *LandingPadLabel = Context.createTempSymbol(); LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); LP.LandingPadLabel = LandingPadLabel; return LandingPadLabel; @@ -423,6 +426,12 @@ void MachineModuleInfo::addPersonality(MachineBasicBlock *LandingPad, Personalities.push_back(Personality); } +void MachineModuleInfo::addWinEHState(MachineBasicBlock *LandingPad, + int State) { + LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); + LP.WinEHState = State; +} + /// addCatchTypeInfo - Provide the catch typeinfo for a landing pad. /// void MachineModuleInfo:: @@ -452,6 +461,25 @@ void MachineModuleInfo::addCleanup(MachineBasicBlock *LandingPad) { LP.TypeIds.push_back(0); } +void MachineModuleInfo::addSEHCatchHandler(MachineBasicBlock *LandingPad, + const Function *Filter, + const BlockAddress *RecoverBA) { + LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); + SEHHandler Handler; + Handler.FilterOrFinally = Filter; + Handler.RecoverBA = RecoverBA; + LP.SEHHandlers.push_back(Handler); +} + +void MachineModuleInfo::addSEHCleanupHandler(MachineBasicBlock *LandingPad, + const Function *Cleanup) { + LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); + SEHHandler Handler; + Handler.FilterOrFinally = Cleanup; + Handler.RecoverBA = nullptr; + LP.SEHHandlers.push_back(Handler); +} + /// TidyLandingPads - Remap landing pad labels and remove any deleted landing /// pads. void MachineModuleInfo::TidyLandingPads(DenseMap<MCSymbol*, uintptr_t> *LPMap) { @@ -546,9 +574,18 @@ try_next:; /// getPersonality - Return the personality function for the current function. const Function *MachineModuleInfo::getPersonality() const { - // FIXME: Until PR1414 will be fixed, we're using 1 personality function per - // function - return !LandingPads.empty() ? LandingPads[0].Personality : nullptr; + for (const LandingPadInfo &LPI : LandingPads) + if (LPI.Personality) + return LPI.Personality; + return nullptr; +} + +EHPersonality MachineModuleInfo::getPersonalityType() { + if (PersonalityTypeCache == EHPersonality::Unknown) { + if (const Function *F = getPersonality()) + PersonalityTypeCache = classifyEHPersonality(F); + } + return PersonalityTypeCache; } /// getPersonalityIndex - Return unique index for current personality @@ -572,3 +609,18 @@ unsigned MachineModuleInfo::getPersonalityIndex() const { // in the zero index. return 0; } + +const Function *MachineModuleInfo::getWinEHParent(const Function *F) const { + StringRef WinEHParentName = + F->getFnAttribute("wineh-parent").getValueAsString(); + if (WinEHParentName.empty() || WinEHParentName == F->getName()) + return F; + return F->getParent()->getFunction(WinEHParentName); +} + +WinEHFuncInfo &MachineModuleInfo::getWinEHFuncInfo(const Function *F) { + auto &Ptr = FuncInfoMap[getWinEHParent(F)]; + if (!Ptr) + Ptr.reset(new WinEHFuncInfo); + return *Ptr; +} diff --git a/contrib/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp b/contrib/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp index a1c7e9f..22d519e 100644 --- a/contrib/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp +++ b/contrib/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp @@ -31,15 +31,14 @@ static int SortSymbolPair(const void *LHS, const void *RHS) { return LHSS->getName().compare(RHSS->getName()); } -/// GetSortedStubs - Return the entries from a DenseMap in a deterministic -/// sorted orer. -MachineModuleInfoImpl::SymbolListTy -MachineModuleInfoImpl::GetSortedStubs(const DenseMap<MCSymbol*, - MachineModuleInfoImpl::StubValueTy>&Map) { +MachineModuleInfoImpl::SymbolListTy MachineModuleInfoImpl::getSortedStubs( + DenseMap<MCSymbol *, MachineModuleInfoImpl::StubValueTy> &Map) { MachineModuleInfoImpl::SymbolListTy List(Map.begin(), Map.end()); if (!List.empty()) qsort(&List[0], List.size(), sizeof(List[0]), SortSymbolPair); + + Map.clear(); return List; } diff --git a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp index 86bb34b..278a8f2 100644 --- a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp +++ b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp @@ -61,11 +61,11 @@ MachineRegisterInfo::constrainRegClass(unsigned Reg, } bool -MachineRegisterInfo::recomputeRegClass(unsigned Reg, const TargetMachine &TM) { - const TargetInstrInfo *TII = TM.getSubtargetImpl()->getInstrInfo(); +MachineRegisterInfo::recomputeRegClass(unsigned Reg) { + const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); const TargetRegisterClass *OldRC = getRegClass(Reg); const TargetRegisterClass *NewRC = - getTargetRegisterInfo()->getLargestLegalSuperClass(OldRC); + getTargetRegisterInfo()->getLargestLegalSuperClass(OldRC, *MF); // Stop early if there is no room to grow. if (NewRC == OldRC) diff --git a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp index 9fe23c5..44107d6 100644 --- a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp @@ -209,6 +209,11 @@ static MachineSchedRegistry DefaultSchedRegistry("default", "Use the target's default scheduler choice.", useDefaultMachineSched); +static cl::opt<bool> EnableMachineSched( + "enable-misched", + cl::desc("Enable the machine instruction scheduling pass."), cl::init(true), + cl::Hidden); + /// Forward declare the standard machine scheduler. This will be used as the /// default scheduler if the target does not set a default. static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C); @@ -304,6 +309,12 @@ ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() { /// design would be to split blocks at scheduling boundaries, but LLVM has a /// general bias against block splitting purely for implementation simplicity. bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { + if (EnableMachineSched.getNumOccurrences()) { + if (!EnableMachineSched) + return false; + } else if (!mf.getSubtarget().enableMachineScheduler()) + return false; + DEBUG(dbgs() << "Before MISsched:\n"; mf.print(dbgs())); // Initialize the context of the pass. @@ -336,9 +347,7 @@ bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) { if (skipOptnoneFunction(*mf.getFunction())) return false; - const TargetSubtargetInfo &ST = - mf.getTarget().getSubtarget<TargetSubtargetInfo>(); - if (!ST.enablePostMachineScheduler()) { + if (!mf.getSubtarget().enablePostMachineScheduler()) { DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n"); return false; } @@ -934,8 +943,9 @@ updateScheduledPressure(const SUnit *SU, unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID); if (NewMaxPressure[ID] >= Limit - 2) { DEBUG(dbgs() << " " << TRI->getRegPressureSetName(ID) << ": " - << NewMaxPressure[ID] << " > " << Limit << "(+ " - << BotRPTracker.getLiveThru()[ID] << " livethru)\n"); + << NewMaxPressure[ID] + << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ") << Limit + << "(+ " << BotRPTracker.getLiveThru()[ID] << " livethru)\n"); } } } @@ -1027,8 +1037,6 @@ void ScheduleDAGMILive::schedule() { scheduleMI(SU, IsTopNode); - updateQueues(SU, IsTopNode); - if (DFSResult) { unsigned SubtreeID = DFSResult->getSubtreeID(SU); if (!ScheduledTrees.test(SubtreeID)) { @@ -1040,6 +1048,8 @@ void ScheduleDAGMILive::schedule() { // Notify the scheduling strategy after updating the DAG. SchedImpl->schedNode(SU, IsTopNode); + + updateQueues(SU, IsTopNode); } assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); @@ -1434,12 +1444,15 @@ void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) { // Check if either the dest or source is local. If it's live across a back // edge, it's not local. Note that if both vregs are live across the back // edge, we cannot successfully contrain the copy without cyclic scheduling. - unsigned LocalReg = DstReg; - unsigned GlobalReg = SrcReg; + // If both the copy's source and dest are local live intervals, then we + // should treat the dest as the global for the purpose of adding + // constraints. This adds edges from source's other uses to the copy. + unsigned LocalReg = SrcReg; + unsigned GlobalReg = DstReg; LiveInterval *LocalLI = &LIS->getInterval(LocalReg); if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) { - LocalReg = SrcReg; - GlobalReg = DstReg; + LocalReg = DstReg; + GlobalReg = SrcReg; LocalLI = &LIS->getInterval(LocalReg); if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) return; @@ -2599,8 +2612,7 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand, TryCand, Cand, PhysRegCopy)) return; - // Avoid exceeding the target's limit. If signed PSetID is negative, it is - // invalid; convert it to INT_MAX to give it lowest priority. + // Avoid exceeding the target's limit. if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess, Cand.RPDelta.Excess, TryCand, Cand, RegExcess)) diff --git a/contrib/llvm/lib/CodeGen/MachineSink.cpp b/contrib/llvm/lib/CodeGen/MachineSink.cpp index 8337793..5f03390 100644 --- a/contrib/llvm/lib/CodeGen/MachineSink.cpp +++ b/contrib/llvm/lib/CodeGen/MachineSink.cpp @@ -19,6 +19,7 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SparseBitVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" @@ -70,6 +71,8 @@ namespace { // will be split. SetVector<std::pair<MachineBasicBlock*,MachineBasicBlock*> > ToSplit; + SparseBitVector<> RegsToClearKillFlags; + public: static char ID; // Pass identification MachineSinking() : MachineFunctionPass(ID) { @@ -287,6 +290,12 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { if (!MadeChange) break; EverMadeChange = true; } + + // Now clear any kill flags for recorded registers. + for (auto I : RegsToClearKillFlags) + MRI->clearKillFlags(I); + RegsToClearKillFlags.clear(); + return EverMadeChange; } @@ -643,7 +652,7 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { return false; // Check if it's safe to move the instruction. - if (!MI->isSafeToMove(TII, AA, SawStore)) + if (!MI->isSafeToMove(AA, SawStore)) return false; // FIXME: This should include support for sinking instructions within the @@ -656,7 +665,8 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { bool BreakPHIEdge = false; MachineBasicBlock *ParentBlock = MI->getParent(); - MachineBasicBlock *SuccToSinkTo = FindSuccToSinkTo(MI, ParentBlock, BreakPHIEdge); + MachineBasicBlock *SuccToSinkTo = FindSuccToSinkTo(MI, ParentBlock, + BreakPHIEdge); // If there are no outputs, it must have side-effects. if (!SuccToSinkTo) @@ -684,7 +694,7 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { // other code paths. bool TryBreak = false; bool store = true; - if (!MI->isSafeToMove(TII, AA, store)) { + if (!MI->isSafeToMove(AA, store)) { DEBUG(dbgs() << " *** NOTE: Won't sink load along critical edge.\n"); TryBreak = true; } @@ -755,7 +765,13 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { // Conservatively, clear any kill flags, since it's possible that they are no // longer correct. - MI->clearKillInfo(); + // Note that we have to clear the kill flags for any register this instruction + // uses as we may sink over another instruction which currently kills the + // used registers. + for (MachineOperand &MO : MI->operands()) { + if (MO.isReg() && MO.isUse()) + RegsToClearKillFlags.set(MO.getReg()); // Remember to clear kill flags. + } return true; } diff --git a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp index 2cf87eb..e07250b 100644 --- a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp +++ b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp @@ -52,12 +52,11 @@ void MachineTraceMetrics::getAnalysisUsage(AnalysisUsage &AU) const { bool MachineTraceMetrics::runOnMachineFunction(MachineFunction &Func) { MF = &Func; - TII = MF->getSubtarget().getInstrInfo(); - TRI = MF->getSubtarget().getRegisterInfo(); + const TargetSubtargetInfo &ST = MF->getSubtarget(); + TII = ST.getInstrInfo(); + TRI = ST.getRegisterInfo(); MRI = &MF->getRegInfo(); Loops = &getAnalysis<MachineLoopInfo>(); - const TargetSubtargetInfo &ST = - MF->getTarget().getSubtarget<TargetSubtargetInfo>(); SchedModel.init(ST.getSchedModel(), &ST, TII); BlockInfo.resize(MF->getNumBlockIDs()); ProcResourceCycles.resize(MF->getNumBlockIDs() * @@ -321,9 +320,7 @@ MinInstrCountEnsemble::pickTracePred(const MachineBasicBlock *MBB) { unsigned CurCount = MTM.getResources(MBB)->InstrCount; const MachineBasicBlock *Best = nullptr; unsigned BestDepth = 0; - for (MachineBasicBlock::const_pred_iterator - I = MBB->pred_begin(), E = MBB->pred_end(); I != E; ++I) { - const MachineBasicBlock *Pred = *I; + for (const MachineBasicBlock *Pred : MBB->predecessors()) { const MachineTraceMetrics::TraceBlockInfo *PredTBI = getDepthResources(Pred); // Ignore cycles that aren't natural loops. @@ -345,9 +342,7 @@ MinInstrCountEnsemble::pickTraceSucc(const MachineBasicBlock *MBB) { const MachineLoop *CurLoop = getLoopFor(MBB); const MachineBasicBlock *Best = nullptr; unsigned BestHeight = 0; - for (MachineBasicBlock::const_succ_iterator - I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) { - const MachineBasicBlock *Succ = *I; + for (const MachineBasicBlock *Succ : MBB->successors()) { // Don't consider back-edges. if (CurLoop && Succ == CurLoop->getHeader()) continue; @@ -464,13 +459,11 @@ void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) { // Run an upwards post-order search for the trace start. Bounds.Downward = false; Bounds.Visited.clear(); - typedef ipo_ext_iterator<const MachineBasicBlock*, LoopBounds> UpwardPO; - for (UpwardPO I = ipo_ext_begin(MBB, Bounds), E = ipo_ext_end(MBB, Bounds); - I != E; ++I) { + for (auto I : inverse_post_order_ext(MBB, Bounds)) { DEBUG(dbgs() << " pred for BB#" << I->getNumber() << ": "); TraceBlockInfo &TBI = BlockInfo[I->getNumber()]; // All the predecessors have been visited, pick the preferred one. - TBI.Pred = pickTracePred(*I); + TBI.Pred = pickTracePred(I); DEBUG({ if (TBI.Pred) dbgs() << "BB#" << TBI.Pred->getNumber() << '\n'; @@ -478,19 +471,17 @@ void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) { dbgs() << "null\n"; }); // The trace leading to I is now known, compute the depth resources. - computeDepthResources(*I); + computeDepthResources(I); } // Run a downwards post-order search for the trace end. Bounds.Downward = true; Bounds.Visited.clear(); - typedef po_ext_iterator<const MachineBasicBlock*, LoopBounds> DownwardPO; - for (DownwardPO I = po_ext_begin(MBB, Bounds), E = po_ext_end(MBB, Bounds); - I != E; ++I) { + for (auto I : post_order_ext(MBB, Bounds)) { DEBUG(dbgs() << " succ for BB#" << I->getNumber() << ": "); TraceBlockInfo &TBI = BlockInfo[I->getNumber()]; // All the successors have been visited, pick the preferred one. - TBI.Succ = pickTraceSucc(*I); + TBI.Succ = pickTraceSucc(I); DEBUG({ if (TBI.Succ) dbgs() << "BB#" << TBI.Succ->getNumber() << '\n'; @@ -498,7 +489,7 @@ void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) { dbgs() << "null\n"; }); // The trace leaving I is now known, compute the height resources. - computeHeightResources(*I); + computeHeightResources(I); } } diff --git a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp index 364e8e2..f5edcb7 100644 --- a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp @@ -55,16 +55,13 @@ namespace { MachineVerifier(Pass *pass, const char *b) : PASS(pass), - Banner(b), - OutFileName(getenv("LLVM_VERIFY_MACHINEINSTRS")) + Banner(b) {} bool runOnMachineFunction(MachineFunction &MF); Pass *const PASS; const char *Banner; - const char *const OutFileName; - raw_ostream *OS; const MachineFunction *MF; const TargetMachine *TM; const TargetInstrInfo *TII; @@ -277,22 +274,6 @@ void MachineFunction::verify(Pass *p, const char *Banner) const { } bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { - raw_ostream *OutFile = nullptr; - if (OutFileName) { - std::error_code EC; - OutFile = new raw_fd_ostream(OutFileName, EC, - sys::fs::F_Append | sys::fs::F_Text); - if (EC) { - errs() << "Error opening '" << OutFileName << "': " << EC.message() - << '\n'; - exit(1); - } - - OS = OutFile; - } else { - OS = &errs(); - } - foundErrors = 0; this->MF = &MF; @@ -327,7 +308,7 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { MBBE = MFI->instr_end(); MBBI != MBBE; ++MBBI) { if (MBBI->getParent() != MFI) { report("Bad instruction parent pointer", MFI); - *OS << "Instruction: " << *MBBI; + errs() << "Instruction: " << *MBBI; continue; } @@ -348,8 +329,18 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { } else if (!CurBundle) report("No bundle header", MBBI); visitMachineInstrBefore(MBBI); - for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) - visitMachineOperand(&MBBI->getOperand(I), I); + for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) { + const MachineInstr &MI = *MBBI; + const MachineOperand &Op = MI.getOperand(I); + if (Op.getParent() != &MI) { + // Make sure to use correct addOperand / RemoveOperand / ChangeTo + // functions when replacing operands of a MachineInstr. + report("Instruction has operand with wrong parent set", &MI); + } + + visitMachineOperand(&Op, I); + } + visitMachineInstrAfter(MBBI); // Was this the last bundled instruction? @@ -363,9 +354,7 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { } visitMachineFunctionAfter(); - if (OutFile) - delete OutFile; - else if (foundErrors) + if (foundErrors) report_fatal_error("Found "+Twine(foundErrors)+" machine code errors."); // Clean up. @@ -382,76 +371,76 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { void MachineVerifier::report(const char *msg, const MachineFunction *MF) { assert(MF); - *OS << '\n'; + errs() << '\n'; if (!foundErrors++) { if (Banner) - *OS << "# " << Banner << '\n'; - MF->print(*OS, Indexes); + errs() << "# " << Banner << '\n'; + MF->print(errs(), Indexes); } - *OS << "*** Bad machine code: " << msg << " ***\n" + errs() << "*** Bad machine code: " << msg << " ***\n" << "- function: " << MF->getName() << "\n"; } void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) { assert(MBB); report(msg, MBB->getParent()); - *OS << "- basic block: BB#" << MBB->getNumber() + errs() << "- basic block: BB#" << MBB->getNumber() << ' ' << MBB->getName() << " (" << (const void*)MBB << ')'; if (Indexes) - *OS << " [" << Indexes->getMBBStartIdx(MBB) + errs() << " [" << Indexes->getMBBStartIdx(MBB) << ';' << Indexes->getMBBEndIdx(MBB) << ')'; - *OS << '\n'; + errs() << '\n'; } void MachineVerifier::report(const char *msg, const MachineInstr *MI) { assert(MI); report(msg, MI->getParent()); - *OS << "- instruction: "; + errs() << "- instruction: "; if (Indexes && Indexes->hasIndex(MI)) - *OS << Indexes->getInstructionIndex(MI) << '\t'; - MI->print(*OS, TM); + errs() << Indexes->getInstructionIndex(MI) << '\t'; + MI->print(errs(), TM); } void MachineVerifier::report(const char *msg, const MachineOperand *MO, unsigned MONum) { assert(MO); report(msg, MO->getParent()); - *OS << "- operand " << MONum << ": "; - MO->print(*OS, TM); - *OS << "\n"; + errs() << "- operand " << MONum << ": "; + MO->print(errs(), TRI); + errs() << "\n"; } void MachineVerifier::report(const char *msg, const MachineFunction *MF, const LiveInterval &LI) { report(msg, MF); - *OS << "- interval: " << LI << '\n'; + errs() << "- interval: " << LI << '\n'; } void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB, const LiveInterval &LI) { report(msg, MBB); - *OS << "- interval: " << LI << '\n'; + errs() << "- interval: " << LI << '\n'; } void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB, const LiveRange &LR, unsigned Reg, unsigned LaneMask) { report(msg, MBB); - *OS << "- liverange: " << LR << '\n'; - *OS << "- register: " << PrintReg(Reg, TRI) << '\n'; + errs() << "- liverange: " << LR << '\n'; + errs() << "- register: " << PrintReg(Reg, TRI) << '\n'; if (LaneMask != 0) - *OS << "- lanemask: " << format("%04X\n", LaneMask); + errs() << "- lanemask: " << format("%04X\n", LaneMask); } void MachineVerifier::report(const char *msg, const MachineFunction *MF, const LiveRange &LR, unsigned Reg, unsigned LaneMask) { report(msg, MF); - *OS << "- liverange: " << LR << '\n'; - *OS << "- register: " << PrintReg(Reg, TRI) << '\n'; + errs() << "- liverange: " << LR << '\n'; + errs() << "- register: " << PrintReg(Reg, TRI) << '\n'; if (LaneMask != 0) - *OS << "- lanemask: " << format("%04X\n", LaneMask); + errs() << "- lanemask: " << format("%04X\n", LaneMask); } void MachineVerifier::markReachable(const MachineBasicBlock *MBB) { @@ -539,7 +528,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { report("MBB has successor that isn't part of the function.", MBB); if (!MBBInfoMap[*I].Preds.count(MBB)) { report("Inconsistent CFG", MBB); - *OS << "MBB is not in the predecessor list of the successor BB#" + errs() << "MBB is not in the predecessor list of the successor BB#" << (*I)->getNumber() << ".\n"; } } @@ -551,7 +540,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { report("MBB has predecessor that isn't part of the function.", MBB); if (!MBBInfoMap[*I].Succs.count(MBB)) { report("Inconsistent CFG", MBB); - *OS << "MBB is not in the successor list of the predecessor BB#" + errs() << "MBB is not in the successor list of the predecessor BB#" << (*I)->getNumber() << ".\n"; } } @@ -726,7 +715,7 @@ void MachineVerifier::visitMachineBundleBefore(const MachineInstr *MI) { SlotIndex idx = Indexes->getInstructionIndex(MI); if (!(idx > lastIndex)) { report("Instruction index out of order", MI); - *OS << "Last instruction was at " << lastIndex << '\n'; + errs() << "Last instruction was at " << lastIndex << '\n'; } lastIndex = idx; } @@ -739,7 +728,7 @@ void MachineVerifier::visitMachineBundleBefore(const MachineInstr *MI) { FirstTerminator = MI; } else if (FirstTerminator) { report("Non-terminator instruction after the first terminator", MI); - *OS << "First terminator was:\t" << *FirstTerminator; + errs() << "First terminator was:\t" << *FirstTerminator; } } @@ -760,7 +749,7 @@ void MachineVerifier::verifyInlineAsm(const MachineInstr *MI) { if (!isUInt<5>(MI->getOperand(1).getImm())) report("Unknown asm flags", &MI->getOperand(1), 1); - assert(InlineAsm::MIOp_FirstOperand == 2 && "Asm format changed"); + static_assert(InlineAsm::MIOp_FirstOperand == 2, "Asm format changed"); unsigned OpNo = InlineAsm::MIOp_FirstOperand; unsigned NumOps; @@ -791,7 +780,7 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { const MCInstrDesc &MCID = MI->getDesc(); if (MI->getNumOperands() < MCID.getNumOperands()) { report("Too few operands", MI); - *OS << MCID.getNumOperands() << " operands expected, but " + errs() << MCID.getNumOperands() << " operands expected, but " << MI->getNumOperands() << " given.\n"; } @@ -921,7 +910,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { TII->getRegClass(MCID, MONum, TRI, *MF)) { if (!DRC->contains(Reg)) { report("Illegal physical register for instruction", MO, MONum); - *OS << TRI->getName(Reg) << " is not a " + errs() << TRI->getName(Reg) << " is not a " << TRI->getRegClassName(DRC) << " register.\n"; } } @@ -933,13 +922,13 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { TRI->getSubClassWithSubReg(RC, SubIdx); if (!SRC) { report("Invalid subregister index for virtual register", MO, MONum); - *OS << "Register class " << TRI->getRegClassName(RC) + errs() << "Register class " << TRI->getRegClassName(RC) << " does not support subreg index " << SubIdx << "\n"; return; } if (RC != SRC) { report("Invalid register class for subregister index", MO, MONum); - *OS << "Register class " << TRI->getRegClassName(RC) + errs() << "Register class " << TRI->getRegClassName(RC) << " does not fully support subreg index " << SubIdx << "\n"; return; } @@ -948,7 +937,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { TII->getRegClass(MCID, MONum, TRI, *MF)) { if (SubIdx) { const TargetRegisterClass *SuperRC = - TRI->getLargestLegalSuperClass(RC); + TRI->getLargestLegalSuperClass(RC, *MF); if (!SuperRC) { report("No largest legal super class exists.", MO, MONum); return; @@ -961,7 +950,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { } if (!RC->hasSuperClassEq(DRC)) { report("Illegal virtual register for instruction", MO, MONum); - *OS << "Expected a " << TRI->getRegClassName(DRC) + errs() << "Expected a " << TRI->getRegClassName(DRC) << " register, but got a " << TRI->getRegClassName(RC) << " register\n"; } @@ -987,11 +976,11 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { SlotIndex Idx = LiveInts->getInstructionIndex(MI); if (MI->mayLoad() && !LI.liveAt(Idx.getRegSlot(true))) { report("Instruction loads from dead spill slot", MO, MONum); - *OS << "Live stack: " << LI << '\n'; + errs() << "Live stack: " << LI << '\n'; } if (MI->mayStore() && !LI.liveAt(Idx.getRegSlot())) { report("Instruction stores to dead spill slot", MO, MONum); - *OS << "Live stack: " << LI << '\n'; + errs() << "Live stack: " << LI << '\n'; } } break; @@ -1030,12 +1019,12 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { LiveQueryResult LRQ = LR->Query(UseIdx); if (!LRQ.valueIn()) { report("No live segment at use", MO, MONum); - *OS << UseIdx << " is not live in " << PrintRegUnit(*Units, TRI) + errs() << UseIdx << " is not live in " << PrintRegUnit(*Units, TRI) << ' ' << *LR << '\n'; } if (MO->isKill() && !LRQ.isKill()) { report("Live range continues after kill flag", MO, MONum); - *OS << PrintRegUnit(*Units, TRI) << ' ' << *LR << '\n'; + errs() << PrintRegUnit(*Units, TRI) << ' ' << *LR << '\n'; } } } @@ -1048,13 +1037,13 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { LiveQueryResult LRQ = LI.Query(UseIdx); if (!LRQ.valueIn()) { report("No live segment at use", MO, MONum); - *OS << UseIdx << " is not live in " << LI << '\n'; + errs() << UseIdx << " is not live in " << LI << '\n'; } // Check for extra kill flags. // Note that we allow missing kill flags for now. if (MO->isKill() && !LRQ.isKill()) { report("Live range continues after kill flag", MO, MONum); - *OS << "Live range: " << LI << '\n'; + errs() << "Live range: " << LI << '\n'; } } else { report("Virtual register has no live interval", MO, MONum); @@ -1077,6 +1066,25 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { } } } + // If there is an additional implicit-use of a super register we stop + // here. By definition we are fine if the super register is not + // (completely) dead, if the complete super register is dead we will + // get a report for its operand. + if (Bad) { + for (const MachineOperand &MOP : MI->uses()) { + if (!MOP.isReg()) + continue; + if (!MOP.isImplicit()) + continue; + for (MCSubRegIterator SubRegs(MOP.getReg(), TRI); SubRegs.isValid(); + ++SubRegs) { + if (*SubRegs == Reg) { + Bad = false; + break; + } + } + } + } if (Bad) report("Using an undefined physical register", MO, MONum); } else if (MRI->def_empty(Reg)) { @@ -1118,19 +1126,19 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { assert(VNI && "NULL valno is not allowed"); if (VNI->def != DefIdx) { report("Inconsistent valno->def", MO, MONum); - *OS << "Valno " << VNI->id << " is not defined at " + errs() << "Valno " << VNI->id << " is not defined at " << DefIdx << " in " << LI << '\n'; } } else { report("No live segment at def", MO, MONum); - *OS << DefIdx << " is not live in " << LI << '\n'; + errs() << DefIdx << " is not live in " << LI << '\n'; } // Check that, if the dead def flag is present, LiveInts agree. if (MO->isDead()) { LiveQueryResult LRQ = LI.Query(DefIdx); if (!LRQ.isDeadDef()) { report("Live range continues after dead def flag", MO, MONum); - *OS << "Live range: " << LI << '\n'; + errs() << "Live range: " << LI << '\n'; } } } else { @@ -1172,7 +1180,7 @@ MachineVerifier::visitMachineBasicBlockAfter(const MachineBasicBlock *MBB) { SlotIndex stop = Indexes->getMBBEndIdx(MBB); if (!(stop > lastIndex)) { report("Block ends before last instruction index", MBB); - *OS << "Block ends at " << stop + errs() << "Block ends at " << stop << " last instruction was at " << lastIndex << '\n'; } lastIndex = stop; @@ -1274,7 +1282,7 @@ void MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB) { PrE = MBB->pred_end(); PrI != PrE; ++PrI) { if (!seen.count(*PrI)) { report("Missing PHI operand", &BBI); - *OS << "BB#" << (*PrI)->getNumber() + errs() << "BB#" << (*PrI)->getNumber() << " is a predecessor according to the CFG.\n"; } } @@ -1305,7 +1313,7 @@ void MachineVerifier::visitMachineFunctionAfter() { ++I) if (MInfo.regsKilled.count(*I)) { report("Virtual register killed in block, but needed live out.", &MBB); - *OS << "Virtual register " << PrintReg(*I) + errs() << "Virtual register " << PrintReg(*I) << " is used after the block.\n"; } } @@ -1337,13 +1345,13 @@ void MachineVerifier::verifyLiveVariables() { if (MInfo.vregsRequired.count(Reg)) { if (!VI.AliveBlocks.test(MBB.getNumber())) { report("LiveVariables: Block missing from AliveBlocks", &MBB); - *OS << "Virtual register " << PrintReg(Reg) + errs() << "Virtual register " << PrintReg(Reg) << " must be live through the block.\n"; } } else { if (VI.AliveBlocks.test(MBB.getNumber())) { report("LiveVariables: Block should not be in AliveBlocks", &MBB); - *OS << "Virtual register " << PrintReg(Reg) + errs() << "Virtual register " << PrintReg(Reg) << " is not needed live through the block.\n"; } } @@ -1362,7 +1370,7 @@ void MachineVerifier::verifyLiveIntervals() { if (!LiveInts->hasInterval(Reg)) { report("Missing live interval for virtual register", MF); - *OS << PrintReg(Reg, TRI) << " still has defs or uses\n"; + errs() << PrintReg(Reg, TRI) << " still has defs or uses\n"; continue; } @@ -1388,13 +1396,13 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR, if (!DefVNI) { report("Valno not live at def and not marked unused", MF, LR, Reg, LaneMask); - *OS << "Valno #" << VNI->id << '\n'; + errs() << "Valno #" << VNI->id << '\n'; return; } if (DefVNI != VNI) { report("Live segment at def has different valno", MF, LR, Reg, LaneMask); - *OS << "Valno #" << VNI->id << " is defined at " << VNI->def + errs() << "Valno #" << VNI->id << " is defined at " << VNI->def << " where valno #" << DefVNI->id << " is live\n"; return; } @@ -1402,7 +1410,7 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR, const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(VNI->def); if (!MBB) { report("Invalid definition index", MF, LR, Reg, LaneMask); - *OS << "Valno #" << VNI->id << " is defined at " << VNI->def + errs() << "Valno #" << VNI->id << " is defined at " << VNI->def << " in " << LR << '\n'; return; } @@ -1411,7 +1419,7 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR, if (VNI->def != LiveInts->getMBBStartIdx(MBB)) { report("PHIDef value is not defined at MBB start", MBB, LR, Reg, LaneMask); - *OS << "Valno #" << VNI->id << " is defined at " << VNI->def + errs() << "Valno #" << VNI->id << " is defined at " << VNI->def << ", not at the beginning of BB#" << MBB->getNumber() << '\n'; } return; @@ -1421,7 +1429,7 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR, const MachineInstr *MI = LiveInts->getInstructionFromIndex(VNI->def); if (!MI) { report("No instruction at def index", MBB, LR, Reg, LaneMask); - *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n'; + errs() << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n'; return; } @@ -1449,7 +1457,7 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR, if (!hasDef) { report("Defining instruction does not modify register", MI); - *OS << "Valno #" << VNI->id << " in " << LR << '\n'; + errs() << "Valno #" << VNI->id << " in " << LR << '\n'; } // Early clobber defs begin at USE slots, but other defs must begin at @@ -1458,12 +1466,12 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR, if (!VNI->def.isEarlyClobber()) { report("Early clobber def must be at an early-clobber slot", MBB, LR, Reg, LaneMask); - *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n'; + errs() << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n'; } } else if (!VNI->def.isRegister()) { report("Non-PHI, non-early clobber def must be at a register slot", MBB, LR, Reg, LaneMask); - *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n'; + errs() << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n'; } } } @@ -1477,32 +1485,32 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, if (VNI->id >= LR.getNumValNums() || VNI != LR.getValNumInfo(VNI->id)) { report("Foreign valno in live segment", MF, LR, Reg, LaneMask); - *OS << S << " has a bad valno\n"; + errs() << S << " has a bad valno\n"; } if (VNI->isUnused()) { report("Live segment valno is marked unused", MF, LR, Reg, LaneMask); - *OS << S << '\n'; + errs() << S << '\n'; } const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(S.start); if (!MBB) { report("Bad start of live segment, no basic block", MF, LR, Reg, LaneMask); - *OS << S << '\n'; + errs() << S << '\n'; return; } SlotIndex MBBStartIdx = LiveInts->getMBBStartIdx(MBB); if (S.start != MBBStartIdx && S.start != VNI->def) { report("Live segment must begin at MBB entry or valno def", MBB, LR, Reg, LaneMask); - *OS << S << '\n'; + errs() << S << '\n'; } const MachineBasicBlock *EndMBB = LiveInts->getMBBFromIndex(S.end.getPrevSlot()); if (!EndMBB) { report("Bad end of live segment, no basic block", MF, LR, Reg, LaneMask); - *OS << S << '\n'; + errs() << S << '\n'; return; } @@ -1521,7 +1529,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, if (!MI) { report("Live segment doesn't end at a valid instruction", EndMBB, LR, Reg, LaneMask); - *OS << S << '\n'; + errs() << S << '\n'; return; } @@ -1529,7 +1537,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, if (S.end.isBlock()) { report("Live segment ends at B slot of an instruction", EndMBB, LR, Reg, LaneMask); - *OS << S << '\n'; + errs() << S << '\n'; } if (S.end.isDead()) { @@ -1538,7 +1546,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, if (!SlotIndex::isSameInstr(S.start, S.end)) { report("Live segment ending at dead slot spans instructions", EndMBB, LR, Reg, LaneMask); - *OS << S << '\n'; + errs() << S << '\n'; } } @@ -1549,7 +1557,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, report("Live segment ending at early clobber slot must be " "redefined by an EC def in the same instruction", EndMBB, LR, Reg, LaneMask); - *OS << S << '\n'; + errs() << S << '\n'; } } @@ -1575,10 +1583,11 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, if (!hasRead) { // When tracking subregister liveness, the main range must start new // values on partial register writes, even if there is no read. - if (!MRI->tracksSubRegLiveness() || LaneMask != 0 || !hasSubRegDef) { + if (!MRI->shouldTrackSubRegLiveness(Reg) || LaneMask != 0 || + !hasSubRegDef) { report("Instruction ending live segment doesn't read the register", MI); - *OS << S << " in " << LR << '\n'; + errs() << S << " in " << LR << '\n'; } } } @@ -1619,7 +1628,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, if (!PVNI) { report("Register not marked live out of predecessor", *PI, LR, Reg, LaneMask); - *OS << "Valno #" << VNI->id << " live into BB#" << MFI->getNumber() + errs() << "Valno #" << VNI->id << " live into BB#" << MFI->getNumber() << '@' << LiveInts->getMBBStartIdx(MFI) << ", not live before " << PEnd << '\n'; continue; @@ -1629,7 +1638,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, if (!IsPHI && PVNI != VNI) { report("Different value live out of predecessor", *PI, LR, Reg, LaneMask); - *OS << "Valno #" << PVNI->id << " live out of BB#" + errs() << "Valno #" << PVNI->id << " live out of BB#" << (*PI)->getNumber() << '@' << PEnd << "\nValno #" << VNI->id << " live into BB#" << MFI->getNumber() << '@' << LiveInts->getMBBStartIdx(MFI) << '\n'; @@ -1651,40 +1660,35 @@ void MachineVerifier::verifyLiveRange(const LiveRange &LR, unsigned Reg, } void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) { - verifyLiveRange(LI, LI.reg); - unsigned Reg = LI.reg; - if (TargetRegisterInfo::isVirtualRegister(Reg)) { - unsigned Mask = 0; - unsigned MaxMask = MRI->getMaxLaneMaskForVReg(Reg); - for (const LiveInterval::SubRange &SR : LI.subranges()) { - if ((Mask & SR.LaneMask) != 0) - report("Lane masks of sub ranges overlap in live interval", MF, LI); - if ((SR.LaneMask & ~MaxMask) != 0) - report("Subrange lanemask is invalid", MF, LI); - Mask |= SR.LaneMask; - verifyLiveRange(SR, LI.reg, SR.LaneMask); - if (!LI.covers(SR)) - report("A Subrange is not covered by the main range", MF, LI); - } - } else if (LI.hasSubRanges()) { - report("subregister liveness only allowed for virtual registers", MF, LI); + assert(TargetRegisterInfo::isVirtualRegister(Reg)); + verifyLiveRange(LI, Reg); + + unsigned Mask = 0; + unsigned MaxMask = MRI->getMaxLaneMaskForVReg(Reg); + for (const LiveInterval::SubRange &SR : LI.subranges()) { + if ((Mask & SR.LaneMask) != 0) + report("Lane masks of sub ranges overlap in live interval", MF, LI); + if ((SR.LaneMask & ~MaxMask) != 0) + report("Subrange lanemask is invalid", MF, LI); + Mask |= SR.LaneMask; + verifyLiveRange(SR, LI.reg, SR.LaneMask); + if (!LI.covers(SR)) + report("A Subrange is not covered by the main range", MF, LI); } // Check the LI only has one connected component. - if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { - ConnectedVNInfoEqClasses ConEQ(*LiveInts); - unsigned NumComp = ConEQ.Classify(&LI); - if (NumComp > 1) { - report("Multiple connected components in live interval", MF, LI); - for (unsigned comp = 0; comp != NumComp; ++comp) { - *OS << comp << ": valnos"; - for (LiveInterval::const_vni_iterator I = LI.vni_begin(), - E = LI.vni_end(); I!=E; ++I) - if (comp == ConEQ.getEqClass(*I)) - *OS << ' ' << (*I)->id; - *OS << '\n'; - } + ConnectedVNInfoEqClasses ConEQ(*LiveInts); + unsigned NumComp = ConEQ.Classify(&LI); + if (NumComp > 1) { + report("Multiple connected components in live interval", MF, LI); + for (unsigned comp = 0; comp != NumComp; ++comp) { + errs() << comp << ": valnos"; + for (LiveInterval::const_vni_iterator I = LI.vni_begin(), + E = LI.vni_end(); I!=E; ++I) + if (comp == ConEQ.getEqClass(*I)) + errs() << ' ' << (*I)->id; + errs() << '\n'; } } } @@ -1712,8 +1716,8 @@ namespace { /// by a FrameDestroy <n>, stack adjustments are identical on all /// CFG edges to a merge point, and frame is destroyed at end of a return block. void MachineVerifier::verifyStackFrame() { - int FrameSetupOpcode = TII->getCallFrameSetupOpcode(); - int FrameDestroyOpcode = TII->getCallFrameDestroyOpcode(); + unsigned FrameSetupOpcode = TII->getCallFrameSetupOpcode(); + unsigned FrameDestroyOpcode = TII->getCallFrameDestroyOpcode(); SmallVector<StackStateOfBB, 8> SPState; SPState.resize(MF->getNumBlockIDs()); @@ -1764,7 +1768,7 @@ void MachineVerifier::verifyStackFrame() { BBState.ExitValue; if (BBState.ExitIsSetup && AbsSPAdj != Size) { report("FrameDestroy <n> is after FrameSetup <m>", &I); - *OS << "FrameDestroy <" << Size << "> is after FrameSetup <" + errs() << "FrameDestroy <" << Size << "> is after FrameSetup <" << AbsSPAdj << ">.\n"; } BBState.ExitValue += Size; @@ -1781,7 +1785,7 @@ void MachineVerifier::verifyStackFrame() { (SPState[(*I)->getNumber()].ExitValue != BBState.EntryValue || SPState[(*I)->getNumber()].ExitIsSetup != BBState.EntryIsSetup)) { report("The exit stack state of a predecessor is inconsistent.", MBB); - *OS << "Predecessor BB#" << (*I)->getNumber() << " has exit state (" + errs() << "Predecessor BB#" << (*I)->getNumber() << " has exit state (" << SPState[(*I)->getNumber()].ExitValue << ", " << SPState[(*I)->getNumber()].ExitIsSetup << "), while BB#" << MBB->getNumber() << " has entry state (" @@ -1797,7 +1801,7 @@ void MachineVerifier::verifyStackFrame() { (SPState[(*I)->getNumber()].EntryValue != BBState.ExitValue || SPState[(*I)->getNumber()].EntryIsSetup != BBState.ExitIsSetup)) { report("The entry stack state of a successor is inconsistent.", MBB); - *OS << "Successor BB#" << (*I)->getNumber() << " has entry state (" + errs() << "Successor BB#" << (*I)->getNumber() << " has entry state (" << SPState[(*I)->getNumber()].EntryValue << ", " << SPState[(*I)->getNumber()].EntryIsSetup << "), while BB#" << MBB->getNumber() << " has exit state (" diff --git a/contrib/llvm/lib/CodeGen/OcamlGC.cpp b/contrib/llvm/lib/CodeGen/OcamlGC.cpp index 48db200..17654a6 100644 --- a/contrib/llvm/lib/CodeGen/OcamlGC.cpp +++ b/contrib/llvm/lib/CodeGen/OcamlGC.cpp @@ -20,16 +20,15 @@ using namespace llvm; namespace { - class OcamlGC : public GCStrategy { - public: - OcamlGC(); - }; +class OcamlGC : public GCStrategy { +public: + OcamlGC(); +}; } -static GCRegistry::Add<OcamlGC> -X("ocaml", "ocaml 3.10-compatible GC"); +static GCRegistry::Add<OcamlGC> X("ocaml", "ocaml 3.10-compatible GC"); -void llvm::linkOcamlGC() { } +void llvm::linkOcamlGC() {} OcamlGC::OcamlGC() { NeededSafePoints = 1 << GC::PostCall; diff --git a/contrib/llvm/lib/CodeGen/PHIElimination.cpp b/contrib/llvm/lib/CodeGen/PHIElimination.cpp index def2e3d..d514190 100644 --- a/contrib/llvm/lib/CodeGen/PHIElimination.cpp +++ b/contrib/llvm/lib/CodeGen/PHIElimination.cpp @@ -29,6 +29,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> @@ -46,6 +47,10 @@ SplitAllCriticalEdges("phi-elim-split-all-critical-edges", cl::init(false), cl::Hidden, cl::desc("Split all critical edges during " "PHI elimination")); +static cl::opt<bool> NoPhiElimLiveOutEarlyExit( + "no-phi-elim-live-out-early-exit", cl::init(false), cl::Hidden, + cl::desc("Do not use an early exit if isLiveOutPastPHIs returns true.")); + namespace { class PHIElimination : public MachineFunctionPass { MachineRegisterInfo *MRI; // Machine register information @@ -573,12 +578,14 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF, // there is a risk it may not be coalesced away. // // If the copy would be a kill, there is no need to split the edge. - if (!isLiveOutPastPHIs(Reg, PreMBB) && !SplitAllCriticalEdges) + bool ShouldSplit = isLiveOutPastPHIs(Reg, PreMBB); + if (!ShouldSplit && !NoPhiElimLiveOutEarlyExit) continue; - - DEBUG(dbgs() << PrintReg(Reg) << " live-out before critical edge BB#" - << PreMBB->getNumber() << " -> BB#" << MBB.getNumber() - << ": " << *BBI); + if (ShouldSplit) { + DEBUG(dbgs() << PrintReg(Reg) << " live-out before critical edge BB#" + << PreMBB->getNumber() << " -> BB#" << MBB.getNumber() + << ": " << *BBI); + } // If Reg is not live-in to MBB, it means it must be live-in to some // other PreMBB successor, and we can avoid the interference by splitting @@ -588,7 +595,7 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF, // is likely to be left after coalescing. If we are looking at a loop // exiting edge, split it so we won't insert code in the loop, otherwise // don't bother. - bool ShouldSplit = !isLiveIn(Reg, &MBB) || SplitAllCriticalEdges; + ShouldSplit = ShouldSplit && !isLiveIn(Reg, &MBB); // Check for a loop exiting edge. if (!ShouldSplit && CurLoop != PreLoop) { @@ -603,7 +610,7 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF, // Split unless this edge is entering CurLoop from an outer loop. ShouldSplit = PreLoop && !PreLoop->contains(CurLoop); } - if (!ShouldSplit) + if (!ShouldSplit && !SplitAllCriticalEdges) continue; if (!PreMBB->SplitCriticalEdge(&MBB, this)) { DEBUG(dbgs() << "Failed to split critical edge.\n"); diff --git a/contrib/llvm/lib/CodeGen/Passes.cpp b/contrib/llvm/lib/CodeGen/Passes.cpp index e53e874..6902243 100644 --- a/contrib/llvm/lib/CodeGen/Passes.cpp +++ b/contrib/llvm/lib/CodeGen/Passes.cpp @@ -14,18 +14,16 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/Analysis/Passes.h" -#include "llvm/CodeGen/GCStrategy.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/IR/IRPrintingPasses.h" +#include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Verifier.h" #include "llvm/MC/MCAsmInfo.h" -#include "llvm/PassManager.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/SymbolRewriter.h" @@ -54,11 +52,11 @@ static cl::opt<bool> DisableMachineLICM("disable-machine-licm", cl::Hidden, static cl::opt<bool> DisableMachineCSE("disable-machine-cse", cl::Hidden, cl::desc("Disable Machine Common Subexpression Elimination")); static cl::opt<cl::boolOrDefault> -OptimizeRegAlloc("optimize-regalloc", cl::Hidden, + EnableShrinkWrapOpt("enable-shrink-wrap", cl::Hidden, + cl::desc("enable the shrink-wrapping pass")); +static cl::opt<cl::boolOrDefault> OptimizeRegAlloc( + "optimize-regalloc", cl::Hidden, cl::desc("Enable optimized register allocation compilation path.")); -static cl::opt<cl::boolOrDefault> -EnableMachineSched("enable-misched", - cl::desc("Enable the machine instruction scheduling pass.")); static cl::opt<bool> DisablePostRAMachineLICM("disable-postra-machine-licm", cl::Hidden, cl::desc("Disable Machine LICM")); @@ -82,7 +80,9 @@ static cl::opt<bool> PrintGCInfo("print-gc", cl::Hidden, cl::desc("Dump garbage collector data")); static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden, cl::desc("Verify generated machine code"), - cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=nullptr)); + cl::init(false), + cl::ZeroOrMore); + static cl::opt<std::string> PrintMachineInstrs("print-machineinstrs", cl::ValueOptional, cl::desc("Print machine instrs"), @@ -115,28 +115,6 @@ static IdentifyingPassPtr applyDisable(IdentifyingPassPtr PassID, return PassID; } -/// Allow Pass selection to be overriden by command line options. This supports -/// flags with ternary conditions. TargetID is passed through by default. The -/// pass is suppressed when the option is false. When the option is true, the -/// StandardID is selected if the target provides no default. -static IdentifyingPassPtr applyOverride(IdentifyingPassPtr TargetID, - cl::boolOrDefault Override, - AnalysisID StandardID) { - switch (Override) { - case cl::BOU_UNSET: - return TargetID; - case cl::BOU_TRUE: - if (TargetID.isValid()) - return TargetID; - if (StandardID == nullptr) - report_fatal_error("Target cannot enable pass"); - return StandardID; - case cl::BOU_FALSE: - return IdentifyingPassPtr(); - } - llvm_unreachable("Invalid command line option state"); -} - /// Allow standard passes to be disabled by the command line, regardless of who /// is adding the pass. /// @@ -181,9 +159,6 @@ static IdentifyingPassPtr overridePass(AnalysisID StandardID, if (StandardID == &MachineCSEID) return applyDisable(TargetID, DisableMachineCSE); - if (StandardID == &MachineSchedulerID) - return applyOverride(TargetID, EnableMachineSched, StandardID); - if (StandardID == &TargetPassConfig::PostRAMachineLICMID) return applyDisable(TargetID, DisablePostRAMachineLICM); @@ -234,10 +209,10 @@ TargetPassConfig::~TargetPassConfig() { // Out of line constructor provides default values for pass options and // registers all common codegen passes. TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm) - : ImmutablePass(ID), PM(&pm), StartAfter(nullptr), StopAfter(nullptr), - Started(true), Stopped(false), AddingMachinePasses(false), TM(tm), - Impl(nullptr), Initialized(false), DisableVerify(false), - EnableTailMerge(true) { + : ImmutablePass(ID), PM(&pm), StartAfter(nullptr), StopAfter(nullptr), + Started(true), Stopped(false), AddingMachinePasses(false), TM(tm), + Impl(nullptr), Initialized(false), DisableVerify(false), + EnableTailMerge(true), EnableShrinkWrap(false) { Impl = new PassConfigImpl(); @@ -248,11 +223,6 @@ TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm) // Substitute Pseudo Pass IDs for real ones. substitutePass(&EarlyTailDuplicateID, &TailDuplicateID); substitutePass(&PostRAMachineLICMID, &MachineLICMID); - - // Temporarily disable experimental passes. - const TargetSubtargetInfo &ST = TM->getSubtarget<TargetSubtargetInfo>(); - if (!ST.useMachineScheduler()) - disablePass(&MachineSchedulerID); } /// Insert InsertedPassID pass after TargetPassID. @@ -408,10 +378,8 @@ void TargetPassConfig::addIRPasses() { // Before running any passes, run the verifier to determine if the input // coming from the front-end and/or optimizer is valid. - if (!DisableVerify) { + if (!DisableVerify) addPass(createVerifierPass()); - addPass(createDebugInfoVerifierPass()); - } // Run loop strength reduction before anything else. if (getOptLevel() != CodeGenOpt::None && !DisableLSR) { @@ -420,7 +388,10 @@ void TargetPassConfig::addIRPasses() { addPass(createPrintFunctionPass(dbgs(), "\n\n*** Code after LSR ***\n")); } + // Run GC lowering passes for builtin collectors + // TODO: add a pass insertion point here addPass(createGCLoweringPass()); + addPass(createShadowStackGCLoweringPass()); // Make sure that no unreachable blocks are instruction selected. addPass(createUnreachableBlockEliminationPass()); @@ -448,10 +419,15 @@ void TargetPassConfig::addPassesToHandleExceptions() { // FALLTHROUGH case ExceptionHandling::DwarfCFI: case ExceptionHandling::ARM: - case ExceptionHandling::ItaniumWinEH: addPass(createDwarfEHPass(TM)); break; - case ExceptionHandling::MSVC: // FIXME: Add preparation. + case ExceptionHandling::WinEH: + // We support using both GCC-style and MSVC-style exceptions on Windows, so + // add both preparation passes. Each pass will only actually run if it + // recognizes the personality function. + addPass(createWinEHPass(TM)); + addPass(createDwarfEHPass(TM)); + break; case ExceptionHandling::None: addPass(createLowerInvokePass()); @@ -474,12 +450,6 @@ void TargetPassConfig::addCodeGenPrepare() { void TargetPassConfig::addISelPrepare() { addPreISel(); - // Need to verify DebugInfo *before* creating the stack protector analysis. - // It's a function pass, and verifying between it and its users causes a - // crash. - if (!DisableVerify) - addPass(createDebugInfoVerifierPass()); - addPass(createStackProtectorPass(TM)); if (PrintISelInput) @@ -557,6 +527,8 @@ void TargetPassConfig::addMachinePasses() { addPostRegAlloc(); // Insert prolog/epilog code. Eliminate abstract frame index references... + if (getEnableShrinkWrap()) + addPass(&ShrinkWrapID); addPass(&PrologEpilogCodeInserterID); /// Add passes that optimize machine instructions after register allocation. @@ -632,6 +604,21 @@ void TargetPassConfig::addMachineSSAOptimization() { addPass(&DeadMachineInstructionElimID); } +bool TargetPassConfig::getEnableShrinkWrap() const { + switch (EnableShrinkWrapOpt) { + case cl::BOU_UNSET: + return EnableShrinkWrap && getOptLevel() != CodeGenOpt::None; + // If EnableShrinkWrap is set, it takes precedence on whatever the + // target sets. The rational is that we assume we want to test + // something related to shrink-wrapping. + case cl::BOU_TRUE: + return true; + case cl::BOU_FALSE: + return false; + } + llvm_unreachable("Invalid shrink-wrapping state"); +} + //===---------------------------------------------------------------------===// /// Register Allocation Pass Configuration //===---------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp index 283d1f2..ebe05e3 100644 --- a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp +++ b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp @@ -76,6 +76,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" @@ -411,8 +412,7 @@ optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, if (ExtendLife && !ExtendedUses.empty()) // Extend the liveness of the extension result. - std::copy(ExtendedUses.begin(), ExtendedUses.end(), - std::back_inserter(Uses)); + Uses.append(ExtendedUses.begin(), ExtendedUses.end()); // Now replace all uses. bool Changed = false; @@ -916,7 +916,7 @@ bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr *MI) { // => v0 = COPY v1 // Currently we haven't seen motivating example for that and we // want to avoid untested code. - NumRewrittenCopies += Changed == true; + NumRewrittenCopies += Changed; return Changed; } diff --git a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp index 89e1d11..55f08e4 100644 --- a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp +++ b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp @@ -141,7 +141,7 @@ namespace { TargetSubtargetInfo::AntiDepBreakMode AntiDepMode, SmallVectorImpl<const TargetRegisterClass *> &CriticalPathRCs); - ~SchedulePostRATDList(); + ~SchedulePostRATDList() override; /// startBlock - Initialize register live-range state for scheduling in /// this block. @@ -282,9 +282,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { } else { // Check that post-RA scheduling is enabled for this target. // This may upgrade the AntiDepMode. - const TargetSubtargetInfo &ST = - Fn.getTarget().getSubtarget<TargetSubtargetInfo>(); - if (!enablePostRAScheduler(ST, PassConfig->getOptLevel(), + if (!enablePostRAScheduler(Fn.getSubtarget(), PassConfig->getOptLevel(), AntiDepMode, CriticalPathRCs)) return false; } diff --git a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp index 61407fa..76583f0 100644 --- a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -16,7 +16,6 @@ // //===----------------------------------------------------------------------===// -#include "PrologEpilogInserter.h" #include "llvm/ADT/IndexedMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" @@ -28,8 +27,10 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/CodeGen/StackProtector.h" +#include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/LLVMContext.h" @@ -48,6 +49,53 @@ using namespace llvm; #define DEBUG_TYPE "pei" +namespace { +class PEI : public MachineFunctionPass { +public: + static char ID; + PEI() : MachineFunctionPass(ID) { + initializePEIPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override; + + /// runOnMachineFunction - Insert prolog/epilog code and replace abstract + /// frame indexes with appropriate references. + /// + bool runOnMachineFunction(MachineFunction &Fn) override; + +private: + RegScavenger *RS; + + // MinCSFrameIndex, MaxCSFrameIndex - Keeps the range of callee saved + // stack frame indexes. + unsigned MinCSFrameIndex, MaxCSFrameIndex; + + // Save and Restore blocks of the current function. + MachineBasicBlock *SaveBlock; + SmallVector<MachineBasicBlock *, 4> RestoreBlocks; + + // Flag to control whether to use the register scavenger to resolve + // frame index materialization registers. Set according to + // TRI->requiresFrameIndexScavenging() for the current function. + bool FrameIndexVirtualScavenging; + + void calculateSets(MachineFunction &Fn); + void calculateCallsInformation(MachineFunction &Fn); + void calculateCalleeSavedRegisters(MachineFunction &Fn); + void insertCSRSpillsAndRestores(MachineFunction &Fn); + void calculateFrameObjectOffsets(MachineFunction &Fn); + void replaceFrameIndices(MachineFunction &Fn); + void replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, + int &SPAdj); + void scavengeFrameVirtualRegs(MachineFunction &Fn); + void insertPrologEpilogCode(MachineFunction &Fn); + + // Convenience for recognizing return blocks. + bool isReturnBlock(MachineBasicBlock *MBB); +}; +} // namespace + char PEI::ID = 0; char &llvm::PrologEpilogCodeInserterID = PEI::ID; @@ -85,20 +133,26 @@ bool PEI::isReturnBlock(MachineBasicBlock* MBB) { /// Compute the set of return blocks void PEI::calculateSets(MachineFunction &Fn) { - // Sets used to compute spill, restore placement sets. - const std::vector<CalleeSavedInfo> &CSI = - Fn.getFrameInfo()->getCalleeSavedInfo(); + const MachineFrameInfo *MFI = Fn.getFrameInfo(); - // If no CSRs used, we are done. - if (CSI.empty()) + // Even when we do not change any CSR, we still want to insert the + // prologue and epilogue of the function. + // So set the save points for those. + + // Use the points found by shrink-wrapping, if any. + if (MFI->getSavePoint()) { + SaveBlock = MFI->getSavePoint(); + assert(MFI->getRestorePoint() && "Both restore and save must be set"); + RestoreBlocks.push_back(MFI->getRestorePoint()); return; + } // Save refs to entry and return blocks. - EntryBlock = Fn.begin(); + SaveBlock = Fn.begin(); for (MachineFunction::iterator MBB = Fn.begin(), E = Fn.end(); MBB != E; ++MBB) if (isReturnBlock(MBB)) - ReturnBlocks.push_back(MBB); + RestoreBlocks.push_back(MBB); return; } @@ -178,7 +232,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { } delete RS; - ReturnBlocks.clear(); + RestoreBlocks.clear(); return true; } @@ -194,12 +248,12 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) { bool AdjustsStack = MFI->adjustsStack(); // Get the function call frame set-up and tear-down instruction opcode - int FrameSetupOpcode = TII.getCallFrameSetupOpcode(); - int FrameDestroyOpcode = TII.getCallFrameDestroyOpcode(); + unsigned FrameSetupOpcode = TII.getCallFrameSetupOpcode(); + unsigned FrameDestroyOpcode = TII.getCallFrameDestroyOpcode(); // Early exit for targets which have no call frame setup/destroy pseudo // instructions. - if (FrameSetupOpcode == -1 && FrameDestroyOpcode == -1) + if (FrameSetupOpcode == ~0u && FrameDestroyOpcode == ~0u) return; std::vector<MachineBasicBlock::iterator> FrameSDOps; @@ -324,6 +378,61 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &F) { MFI->setCalleeSavedInfo(CSI); } +/// Helper function to update the liveness information for the callee-saved +/// registers. +static void updateLiveness(MachineFunction &MF) { + MachineFrameInfo *MFI = MF.getFrameInfo(); + // Visited will contain all the basic blocks that are in the region + // where the callee saved registers are alive: + // - Anything that is not Save or Restore -> LiveThrough. + // - Save -> LiveIn. + // - Restore -> LiveOut. + // The live-out is not attached to the block, so no need to keep + // Restore in this set. + SmallPtrSet<MachineBasicBlock *, 8> Visited; + SmallVector<MachineBasicBlock *, 8> WorkList; + MachineBasicBlock *Entry = &MF.front(); + MachineBasicBlock *Save = MFI->getSavePoint(); + + if (!Save) + Save = Entry; + + if (Entry != Save) { + WorkList.push_back(Entry); + Visited.insert(Entry); + } + Visited.insert(Save); + + MachineBasicBlock *Restore = MFI->getRestorePoint(); + if (Restore) + // By construction Restore cannot be visited, otherwise it + // means there exists a path to Restore that does not go + // through Save. + WorkList.push_back(Restore); + + while (!WorkList.empty()) { + const MachineBasicBlock *CurBB = WorkList.pop_back_val(); + // By construction, the region that is after the save point is + // dominated by the Save and post-dominated by the Restore. + if (CurBB == Save) + continue; + // Enqueue all the successors not already visited. + // Those are by construction either before Save or after Restore. + for (MachineBasicBlock *SuccBB : CurBB->successors()) + if (Visited.insert(SuccBB).second) + WorkList.push_back(SuccBB); + } + + const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); + + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + for (MachineBasicBlock *MBB : Visited) + // Add the callee-saved register as live-in. + // It's killed at the spill. + MBB->addLiveIn(CSI[i].getReg()); + } +} + /// insertCSRSpillsAndRestores - Insert spill and restore code for /// callee saved registers used in the function. /// @@ -344,26 +453,22 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { MachineBasicBlock::iterator I; // Spill using target interface. - I = EntryBlock->begin(); - if (!TFI->spillCalleeSavedRegisters(*EntryBlock, I, CSI, TRI)) { + I = SaveBlock->begin(); + if (!TFI->spillCalleeSavedRegisters(*SaveBlock, I, CSI, TRI)) { for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - // Add the callee-saved register as live-in. - // It's killed at the spill. - EntryBlock->addLiveIn(CSI[i].getReg()); - // Insert the spill to the stack frame. unsigned Reg = CSI[i].getReg(); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.storeRegToStackSlot(*EntryBlock, I, Reg, true, CSI[i].getFrameIdx(), + TII.storeRegToStackSlot(*SaveBlock, I, Reg, true, CSI[i].getFrameIdx(), RC, TRI); } } + // Update the live-in information of all the blocks up to the save point. + updateLiveness(Fn); // Restore using target interface. - for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) { - MachineBasicBlock *MBB = ReturnBlocks[ri]; + for (MachineBasicBlock *MBB : RestoreBlocks) { I = MBB->end(); - --I; // Skip over all terminator instructions, which are part of the return // sequence. @@ -495,7 +600,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { unsigned Align = MFI->getObjectAlignment(i); // Adjust to alignment boundary - Offset = (Offset+Align-1)/Align*Align; + Offset = RoundUpToAlignment(Offset, Align); MFI->setObjectOffset(i, -Offset); // Set the computed offset } @@ -504,7 +609,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { for (int i = MaxCSFI; i >= MinCSFI ; --i) { unsigned Align = MFI->getObjectAlignment(i); // Adjust to alignment boundary - Offset = (Offset+Align-1)/Align*Align; + Offset = RoundUpToAlignment(Offset, Align); MFI->setObjectOffset(i, Offset); Offset += MFI->getObjectSize(i); @@ -537,7 +642,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { unsigned Align = MFI->getLocalFrameMaxAlign(); // Adjust to alignment boundary. - Offset = (Offset + Align - 1) / Align * Align; + Offset = RoundUpToAlignment(Offset, Align); DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n"); @@ -656,8 +761,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // If the frame pointer is eliminated, all frame offsets will be relative to // SP not FP. Align to MaxAlign so this works. StackAlign = std::max(StackAlign, MaxAlign); - unsigned AlignMask = StackAlign - 1; - Offset = (Offset + AlignMask) & ~uint64_t(AlignMask); + Offset = RoundUpToAlignment(Offset, StackAlign); } // Update frame info to pretend that this is part of the stack... @@ -674,21 +778,18 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) { const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering(); // Add prologue to the function... - TFI.emitPrologue(Fn); + TFI.emitPrologue(Fn, *SaveBlock); - // Add epilogue to restore the callee-save registers in each exiting block - for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) { - // If last instruction is a return instruction, add an epilogue - if (!I->empty() && I->back().isReturn()) - TFI.emitEpilogue(Fn, *I); - } + // Add epilogue to restore the callee-save registers in each exiting block. + for (MachineBasicBlock *RestoreBlock : RestoreBlocks) + TFI.emitEpilogue(Fn, *RestoreBlock); // Emit additional code that is required to support segmented stacks, if // we've been asked for it. This, when linked with a runtime with support // for segmented stacks (libgcc is one), will result in allocating stack // space in small chunks instead of one large contiguous block. if (Fn.shouldSplitStack()) - TFI.adjustForSegmentedStacks(Fn); + TFI.adjustForSegmentedStacks(Fn, *SaveBlock); // Emit additional code that is required to explicitly handle the stack in // HiPE native code (if needed) when loaded in the Erlang/OTP runtime. The @@ -696,7 +797,7 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) { // different conditional check and another BIF for allocating more stack // space. if (Fn.getFunction()->getCallingConv() == CallingConv::HiPE) - TFI.adjustForHiPEPrologue(Fn); + TFI.adjustForHiPEPrologue(Fn, *SaveBlock); } /// replaceFrameIndices - Replace all MO_FrameIndex operands with physical @@ -706,6 +807,25 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering(); if (!TFI.needsFrameIndexResolution(Fn)) return; + MachineModuleInfo &MMI = Fn.getMMI(); + const Function *F = Fn.getFunction(); + const Function *ParentF = MMI.getWinEHParent(F); + unsigned FrameReg; + if (F == ParentF) { + WinEHFuncInfo &FuncInfo = MMI.getWinEHFuncInfo(Fn.getFunction()); + // FIXME: This should be unconditional but we have bugs in the preparation + // pass. + if (FuncInfo.UnwindHelpFrameIdx != INT_MAX) + FuncInfo.UnwindHelpFrameOffset = TFI.getFrameIndexReferenceFromSP( + Fn, FuncInfo.UnwindHelpFrameIdx, FrameReg); + } else if (MMI.hasWinEHFuncInfo(F)) { + WinEHFuncInfo &FuncInfo = MMI.getWinEHFuncInfo(Fn.getFunction()); + auto I = FuncInfo.CatchHandlerParentFrameObjIdx.find(F); + if (I != FuncInfo.CatchHandlerParentFrameObjIdx.end()) + FuncInfo.CatchHandlerParentFrameObjOffset[F] = + TFI.getFrameIndexReferenceFromSP(Fn, I->second, FrameReg); + } + // Store SPAdj at exit of a basic block. SmallVector<int, 8> SPState; SPState.resize(Fn.getNumBlockIDs()); @@ -744,8 +864,8 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo(); const TargetRegisterInfo &TRI = *Fn.getSubtarget().getRegisterInfo(); const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering(); - int FrameSetupOpcode = TII.getCallFrameSetupOpcode(); - int FrameDestroyOpcode = TII.getCallFrameDestroyOpcode(); + unsigned FrameSetupOpcode = TII.getCallFrameSetupOpcode(); + unsigned FrameDestroyOpcode = TII.getCallFrameDestroyOpcode(); if (RS && !FrameIndexVirtualScavenging) RS->enterBasicBlock(BB); @@ -811,17 +931,6 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, continue; } - // Frame allocations are target independent. Simply swap the index with - // the offset. - if (MI->getOpcode() == TargetOpcode::FRAME_ALLOC) { - assert(TFI->hasFP(Fn) && "frame alloc requires FP"); - MachineOperand &FI = MI->getOperand(i); - unsigned Reg; - int FrameOffset = TFI->getFrameIndexReference(Fn, FI.getIndex(), Reg); - FI.ChangeToImmediate(FrameOffset); - continue; - } - // Some instructions (e.g. inline asm instructions) can have // multiple frame indices and/or cause eliminateFrameIndex // to insert more than one instruction. We need the register diff --git a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.h b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.h deleted file mode 100644 index f88b8ef..0000000 --- a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.h +++ /dev/null @@ -1,78 +0,0 @@ -//===-- PrologEpilogInserter.h - Prolog/Epilog code insertion -*- C++ -*---===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This pass is responsible for finalizing the functions frame layout, saving -// callee saved registers, and for emitting prolog & epilog code for the -// function. -// -// This pass must be run after register allocation. After this pass is -// executed, it is illegal to construct MO_FrameIndex operands. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_CODEGEN_PROLOGEPILOGINSERTER_H -#define LLVM_LIB_CODEGEN_PROLOGEPILOGINSERTER_H - -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SparseBitVector.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/Target/TargetRegisterInfo.h" - -namespace llvm { - class RegScavenger; - class MachineBasicBlock; - - class PEI : public MachineFunctionPass { - public: - static char ID; - PEI() : MachineFunctionPass(ID) { - initializePEIPass(*PassRegistry::getPassRegistry()); - } - - void getAnalysisUsage(AnalysisUsage &AU) const override; - - /// runOnMachineFunction - Insert prolog/epilog code and replace abstract - /// frame indexes with appropriate references. - /// - bool runOnMachineFunction(MachineFunction &Fn) override; - - private: - RegScavenger *RS; - - // MinCSFrameIndex, MaxCSFrameIndex - Keeps the range of callee saved - // stack frame indexes. - unsigned MinCSFrameIndex, MaxCSFrameIndex; - - // Entry and return blocks of the current function. - MachineBasicBlock* EntryBlock; - SmallVector<MachineBasicBlock*, 4> ReturnBlocks; - - // Flag to control whether to use the register scavenger to resolve - // frame index materialization registers. Set according to - // TRI->requiresFrameIndexScavenging() for the curren function. - bool FrameIndexVirtualScavenging; - - void calculateSets(MachineFunction &Fn); - void calculateCallsInformation(MachineFunction &Fn); - void calculateCalleeSavedRegisters(MachineFunction &Fn); - void insertCSRSpillsAndRestores(MachineFunction &Fn); - void calculateFrameObjectOffsets(MachineFunction &Fn); - void replaceFrameIndices(MachineFunction &Fn); - void replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, - int &SPAdj); - void scavengeFrameVirtualRegs(MachineFunction &Fn); - void insertPrologEpilogCode(MachineFunction &Fn); - - // Convenience for recognizing return blocks. - bool isReturnBlock(MachineBasicBlock* MBB); - }; -} // End llvm namespace -#endif diff --git a/contrib/llvm/lib/CodeGen/RegAllocBase.cpp b/contrib/llvm/lib/CodeGen/RegAllocBase.cpp index 6b346f4..16ff48e 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocBase.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocBase.cpp @@ -27,6 +27,7 @@ #endif #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/Timer.h" diff --git a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp index c621414..fd3d4d7 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp @@ -301,13 +301,9 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, const MDNode *Expr = DBG->getDebugExpression(); bool IsIndirect = DBG->isIndirectDebugValue(); uint64_t Offset = IsIndirect ? DBG->getOperand(1).getImm() : 0; - DebugLoc DL; - if (MI == MBB->end()) { - // If MI is at basic block end then use last instruction's location. - MachineBasicBlock::iterator EI = MI; - DL = (--EI)->getDebugLoc(); - } else - DL = MI->getDebugLoc(); + DebugLoc DL = DBG->getDebugLoc(); + assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && + "Expected inlined-at fields to agree"); MachineInstr *NewDV = BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::DBG_VALUE)) .addFrameIndex(FI) @@ -877,6 +873,9 @@ void RAFast::AllocateBasicBlock() { const MDNode *Expr = MI->getDebugExpression(); DebugLoc DL = MI->getDebugLoc(); MachineBasicBlock *MBB = MI->getParent(); + assert( + cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && + "Expected inlined-at fields to agree"); MachineInstr *NewDV = BuildMI(*MBB, MBB->erase(MI), DL, TII->get(TargetOpcode::DBG_VALUE)) .addFrameIndex(SS) diff --git a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp index edc3294..26f42c9 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -538,8 +538,9 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) { // Giant live ranges fall back to the global assignment heuristic, which // prevents excessive spilling in pathological cases. bool ReverseLocal = TRI->reverseLocalAssignment(); + const TargetRegisterClass &RC = *MRI->getRegClass(Reg); bool ForceGlobal = !ReverseLocal && - (Size / SlotIndex::InstrDist) > (2 * MRI->getRegClass(Reg)->getNumRegs()); + (Size / SlotIndex::InstrDist) > (2 * RC.getNumRegs()); if (ExtraRegInfo[Reg].Stage == RS_Assign && !ForceGlobal && !LI->empty() && LIS->intervalIsInOneMBB(*LI)) { @@ -552,10 +553,10 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) { // Allocating bottom up may allow many short LRGs to be assigned first // to one of the cheap registers. This could be much faster for very // large blocks on targets with many physical registers. - Prio = Indexes->getZeroIndex().getInstrDistance(LI->beginIndex()); + Prio = Indexes->getZeroIndex().getInstrDistance(LI->endIndex()); } - } - else { + Prio |= RC.AllocationPriority << 24; + } else { // Allocate global and split ranges in long->short order. Long ranges that // don't fit should be spilled (or split) ASAP so they don't create // interference. Mark a bit to prioritize global above local ranges. @@ -1554,7 +1555,8 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order, DEBUG(dbgs() << "Split around " << Uses.size() << " individual instrs.\n"); - const TargetRegisterClass *SuperRC = TRI->getLargestLegalSuperClass(CurRC); + const TargetRegisterClass *SuperRC = + TRI->getLargestLegalSuperClass(CurRC, *MF); unsigned SuperRCNumAllocatableRegs = RCI.getNumAllocatableRegs(SuperRC); // Split around every non-copy instruction if this split will relax // the constraints on the virtual register. diff --git a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp index eb7e563..eeff73d 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp @@ -126,7 +126,12 @@ private: void findVRegIntervalsToAlloc(const MachineFunction &MF, LiveIntervals &LIS); /// \brief Constructs an initial graph. - void initializeGraph(PBQPRAGraph &G); + void initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM, Spiller &VRegSpiller); + + /// \brief Spill the given VReg. + void spillVReg(unsigned VReg, SmallVectorImpl<unsigned> &NewIntervals, + MachineFunction &MF, LiveIntervals &LIS, VirtRegMap &VRM, + Spiller &VRegSpiller); /// \brief Given a solved PBQP problem maps this solution back to a register /// assignment. @@ -172,11 +177,41 @@ public: class Interference : public PBQPRAConstraint { private: -private: - typedef const PBQP::RegAlloc::AllowedRegVector* AllowedRegVecPtr; - typedef std::pair<AllowedRegVecPtr, AllowedRegVecPtr> IMatrixKey; - typedef DenseMap<IMatrixKey, PBQPRAGraph::MatrixPtr> IMatrixCache; + typedef std::pair<AllowedRegVecPtr, AllowedRegVecPtr> IKey; + typedef DenseMap<IKey, PBQPRAGraph::MatrixPtr> IMatrixCache; + typedef DenseSet<IKey> DisjointAllowedRegsCache; + typedef std::pair<PBQP::GraphBase::NodeId, PBQP::GraphBase::NodeId> IEdgeKey; + typedef DenseSet<IEdgeKey> IEdgeCache; + + bool haveDisjointAllowedRegs(const PBQPRAGraph &G, PBQPRAGraph::NodeId NId, + PBQPRAGraph::NodeId MId, + const DisjointAllowedRegsCache &D) const { + const auto *NRegs = &G.getNodeMetadata(NId).getAllowedRegs(); + const auto *MRegs = &G.getNodeMetadata(MId).getAllowedRegs(); + + if (NRegs == MRegs) + return false; + + if (NRegs < MRegs) + return D.count(IKey(NRegs, MRegs)) > 0; + + return D.count(IKey(MRegs, NRegs)) > 0; + } + + void setDisjointAllowedRegs(const PBQPRAGraph &G, PBQPRAGraph::NodeId NId, + PBQPRAGraph::NodeId MId, + DisjointAllowedRegsCache &D) { + const auto *NRegs = &G.getNodeMetadata(NId).getAllowedRegs(); + const auto *MRegs = &G.getNodeMetadata(MId).getAllowedRegs(); + + assert(NRegs != MRegs && "AllowedRegs can not be disjoint with itself"); + + if (NRegs < MRegs) + D.insert(IKey(NRegs, MRegs)); + else + D.insert(IKey(MRegs, NRegs)); + } // Holds (Interval, CurrentSegmentID, and NodeId). The first two are required // for the fast interference graph construction algorithm. The last is there @@ -244,6 +279,13 @@ public: // and uniquing them. IMatrixCache C; + // Finding an edge is expensive in the worst case (O(max_clique(G))). So + // cache locally edges we have already seen. + IEdgeCache EC; + + // Cache known disjoint allowed registers pairs + DisjointAllowedRegsCache D; + typedef std::set<IntervalInfo, decltype(&lowestEndPoint)> IntervalSet; typedef std::priority_queue<IntervalInfo, std::vector<IntervalInfo>, decltype(&lowestStartPoint)> IntervalQueue; @@ -287,14 +329,21 @@ public: for (const auto &A : Active) { PBQP::GraphBase::NodeId MId = getNodeId(A); + // Do not add an edge when the nodes' allowed registers do not + // intersect: there is obviously no interference. + if (haveDisjointAllowedRegs(G, NId, MId, D)) + continue; + // Check that we haven't already added this edge - // FIXME: findEdge is expensive in the worst case (O(max_clique(G))). - // It might be better to replace this with a local bit-matrix. - if (G.findEdge(NId, MId) != PBQPRAGraph::invalidEdgeId()) + IEdgeKey EK(std::min(NId, MId), std::max(NId, MId)); + if (EC.count(EK)) continue; // This is a new edge - add it to the graph. - createInterferenceEdge(G, NId, MId, C); + if (!createInterferenceEdge(G, NId, MId, C)) + setDisjointAllowedRegs(G, NId, MId, D); + else + EC.insert(EK); } // Finally, add Cur to the Active set. @@ -304,35 +353,48 @@ public: private: - void createInterferenceEdge(PBQPRAGraph &G, PBQPRAGraph::NodeId NId, - PBQPRAGraph::NodeId MId, IMatrixCache &C) { + // Create an Interference edge and add it to the graph, unless it is + // a null matrix, meaning the nodes' allowed registers do not have any + // interference. This case occurs frequently between integer and floating + // point registers for example. + // return true iff both nodes interferes. + bool createInterferenceEdge(PBQPRAGraph &G, + PBQPRAGraph::NodeId NId, PBQPRAGraph::NodeId MId, + IMatrixCache &C) { const TargetRegisterInfo &TRI = - *G.getMetadata().MF.getTarget().getSubtargetImpl()->getRegisterInfo(); - + *G.getMetadata().MF.getSubtarget().getRegisterInfo(); const auto &NRegs = G.getNodeMetadata(NId).getAllowedRegs(); const auto &MRegs = G.getNodeMetadata(MId).getAllowedRegs(); // Try looking the edge costs up in the IMatrixCache first. - IMatrixKey K(&NRegs, &MRegs); + IKey K(&NRegs, &MRegs); IMatrixCache::iterator I = C.find(K); if (I != C.end()) { G.addEdgeBypassingCostAllocator(NId, MId, I->second); - return; + return true; } PBQPRAGraph::RawMatrix M(NRegs.size() + 1, MRegs.size() + 1, 0); + bool NodesInterfere = false; for (unsigned I = 0; I != NRegs.size(); ++I) { unsigned PRegN = NRegs[I]; for (unsigned J = 0; J != MRegs.size(); ++J) { unsigned PRegM = MRegs[J]; - if (TRI.regsOverlap(PRegN, PRegM)) + if (TRI.regsOverlap(PRegN, PRegM)) { M[I + 1][J + 1] = std::numeric_limits<PBQP::PBQPNum>::infinity(); + NodesInterfere = true; + } } } + if (!NodesInterfere) + return false; + PBQPRAGraph::EdgeId EId = G.addEdge(NId, MId, std::move(M)); C[K] = G.getEdgeCostsPtr(EId); + + return true; } }; @@ -342,7 +404,7 @@ public: void apply(PBQPRAGraph &G) override { MachineFunction &MF = G.getMetadata().MF; MachineBlockFrequencyInfo &MBFI = G.getMetadata().MBFI; - CoalescerPair CP(*MF.getTarget().getSubtargetImpl()->getRegisterInfo()); + CoalescerPair CP(*MF.getSubtarget().getRegisterInfo()); // Scan the machine function and add a coalescing cost whenever CoalescerPair // gives the Ok. @@ -398,7 +460,7 @@ public: } PBQPRAGraph::RawMatrix Costs(G.getEdgeCosts(EId)); addVirtRegCoalesce(Costs, *Allowed1, *Allowed2, CBenefit); - G.setEdgeCosts(EId, std::move(Costs)); + G.updateEdgeCosts(EId, std::move(Costs)); } } } @@ -488,15 +550,21 @@ static bool isACalleeSavedRegister(unsigned reg, const TargetRegisterInfo &TRI, return false; } -void RegAllocPBQP::initializeGraph(PBQPRAGraph &G) { +void RegAllocPBQP::initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM, + Spiller &VRegSpiller) { MachineFunction &MF = G.getMetadata().MF; LiveIntervals &LIS = G.getMetadata().LIS; const MachineRegisterInfo &MRI = G.getMetadata().MF.getRegInfo(); const TargetRegisterInfo &TRI = - *G.getMetadata().MF.getTarget().getSubtargetImpl()->getRegisterInfo(); + *G.getMetadata().MF.getSubtarget().getRegisterInfo(); + + std::vector<unsigned> Worklist(VRegsToAlloc.begin(), VRegsToAlloc.end()); + + while (!Worklist.empty()) { + unsigned VReg = Worklist.back(); + Worklist.pop_back(); - for (auto VReg : VRegsToAlloc) { const TargetRegisterClass *TRC = MRI.getRegClass(VReg); LiveInterval &VRegLI = LIS.getInterval(VReg); @@ -531,6 +599,15 @@ void RegAllocPBQP::initializeGraph(PBQPRAGraph &G) { VRegAllowed.push_back(PReg); } + // Check for vregs that have no allowed registers. These should be + // pre-spilled and the new vregs added to the worklist. + if (VRegAllowed.empty()) { + SmallVector<unsigned, 8> NewVRegs; + spillVReg(VReg, NewVRegs, MF, LIS, VRM, VRegSpiller); + Worklist.insert(Worklist.end(), NewVRegs.begin(), NewVRegs.end()); + continue; + } + PBQPRAGraph::RawVector NodeCosts(VRegAllowed.size() + 1, 0); // Tweak cost of callee saved registers, as using then force spilling and @@ -547,14 +624,40 @@ void RegAllocPBQP::initializeGraph(PBQPRAGraph &G) { } } +void RegAllocPBQP::spillVReg(unsigned VReg, + SmallVectorImpl<unsigned> &NewIntervals, + MachineFunction &MF, LiveIntervals &LIS, + VirtRegMap &VRM, Spiller &VRegSpiller) { + + VRegsToAlloc.erase(VReg); + LiveRangeEdit LRE(&LIS.getInterval(VReg), NewIntervals, MF, LIS, &VRM); + VRegSpiller.spill(LRE); + + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + (void)TRI; + DEBUG(dbgs() << "VREG " << PrintReg(VReg, &TRI) << " -> SPILLED (Cost: " + << LRE.getParent().weight << ", New vregs: "); + + // Copy any newly inserted live intervals into the list of regs to + // allocate. + for (LiveRangeEdit::iterator I = LRE.begin(), E = LRE.end(); + I != E; ++I) { + const LiveInterval &LI = LIS.getInterval(*I); + assert(!LI.empty() && "Empty spill range."); + DEBUG(dbgs() << PrintReg(LI.reg, &TRI) << " "); + VRegsToAlloc.insert(LI.reg); + } + + DEBUG(dbgs() << ")\n"); +} + bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAGraph &G, const PBQP::Solution &Solution, VirtRegMap &VRM, Spiller &VRegSpiller) { MachineFunction &MF = G.getMetadata().MF; LiveIntervals &LIS = G.getMetadata().LIS; - const TargetRegisterInfo &TRI = - *MF.getTarget().getSubtargetImpl()->getRegisterInfo(); + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); (void)TRI; // Set to true if we have any spills @@ -576,28 +679,11 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAGraph &G, assert(PReg != 0 && "Invalid preg selected."); VRM.assignVirt2Phys(VReg, PReg); } else { - VRegsToAlloc.erase(VReg); - SmallVector<unsigned, 8> NewSpills; - LiveRangeEdit LRE(&LIS.getInterval(VReg), NewSpills, MF, LIS, &VRM); - VRegSpiller.spill(LRE); - - DEBUG(dbgs() << "VREG " << PrintReg(VReg, &TRI) << " -> SPILLED (Cost: " - << LRE.getParent().weight << ", New vregs: "); - - // Copy any newly inserted live intervals into the list of regs to - // allocate. - for (LiveRangeEdit::iterator I = LRE.begin(), E = LRE.end(); - I != E; ++I) { - LiveInterval &LI = LIS.getInterval(*I); - assert(!LI.empty() && "Empty spill range."); - DEBUG(dbgs() << PrintReg(LI.reg, &TRI) << " "); - VRegsToAlloc.insert(LI.reg); - } - - DEBUG(dbgs() << ")\n"); - - // We need another round if spill intervals were added. - AnotherRoundNeeded |= !LRE.empty(); + // Spill VReg. If this introduces new intervals we'll need another round + // of allocation. + SmallVector<unsigned, 8> NewVRegs; + spillVReg(VReg, NewVRegs, MF, LIS, VRM, VRegSpiller); + AnotherRoundNeeded |= !NewVRegs.empty(); } } @@ -670,7 +756,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { // If there are non-empty intervals allocate them using pbqp. if (!VRegsToAlloc.empty()) { - const TargetSubtargetInfo &Subtarget = *MF.getTarget().getSubtargetImpl(); + const TargetSubtargetInfo &Subtarget = MF.getSubtarget(); std::unique_ptr<PBQPRAConstraintList> ConstraintsRoot = llvm::make_unique<PBQPRAConstraintList>(); ConstraintsRoot->addConstraint(llvm::make_unique<SpillCosts>()); @@ -686,7 +772,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << " PBQP Regalloc round " << Round << ":\n"); PBQPRAGraph G(PBQPRAGraph::GraphMetadata(MF, LIS, MBFI)); - initializeGraph(G); + initializeGraph(G, VRM, *VRegSpiller); ConstraintsRoot->apply(G); #ifndef NDEBUG @@ -699,7 +785,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { raw_fd_ostream OS(GraphFileName, EC, sys::fs::F_Text); DEBUG(dbgs() << "Dumping graph for round " << Round << " to \"" << GraphFileName << "\"\n"); - G.dumpToStream(OS); + G.dump(OS); } #endif @@ -719,6 +805,79 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { return true; } +namespace { +// A helper class for printing node and register info in a consistent way +class PrintNodeInfo { +public: + typedef PBQP::RegAlloc::PBQPRAGraph Graph; + typedef PBQP::RegAlloc::PBQPRAGraph::NodeId NodeId; + + PrintNodeInfo(NodeId NId, const Graph &G) : G(G), NId(NId) {} + + void print(raw_ostream &OS) const { + const MachineRegisterInfo &MRI = G.getMetadata().MF.getRegInfo(); + const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); + unsigned VReg = G.getNodeMetadata(NId).getVReg(); + const char *RegClassName = TRI->getRegClassName(MRI.getRegClass(VReg)); + OS << NId << " (" << RegClassName << ':' << PrintReg(VReg, TRI) << ')'; + } + +private: + const Graph &G; + NodeId NId; +}; + +inline raw_ostream &operator<<(raw_ostream &OS, const PrintNodeInfo &PR) { + PR.print(OS); + return OS; +} +} // anonymous namespace + +void PBQP::RegAlloc::PBQPRAGraph::dump(raw_ostream &OS) const { + for (auto NId : nodeIds()) { + const Vector &Costs = getNodeCosts(NId); + assert(Costs.getLength() != 0 && "Empty vector in graph."); + OS << PrintNodeInfo(NId, *this) << ": " << Costs << '\n'; + } + OS << '\n'; + + for (auto EId : edgeIds()) { + NodeId N1Id = getEdgeNode1Id(EId); + NodeId N2Id = getEdgeNode2Id(EId); + assert(N1Id != N2Id && "PBQP graphs should not have self-edges."); + const Matrix &M = getEdgeCosts(EId); + assert(M.getRows() != 0 && "No rows in matrix."); + assert(M.getCols() != 0 && "No cols in matrix."); + OS << PrintNodeInfo(N1Id, *this) << ' ' << M.getRows() << " rows / "; + OS << PrintNodeInfo(N2Id, *this) << ' ' << M.getCols() << " cols:\n"; + OS << M << '\n'; + } +} + +void PBQP::RegAlloc::PBQPRAGraph::dump() const { dump(dbgs()); } + +void PBQP::RegAlloc::PBQPRAGraph::printDot(raw_ostream &OS) const { + OS << "graph {\n"; + for (auto NId : nodeIds()) { + OS << " node" << NId << " [ label=\"" + << PrintNodeInfo(NId, *this) << "\\n" + << getNodeCosts(NId) << "\" ]\n"; + } + + OS << " edge [ len=" << nodeIds().size() << " ]\n"; + for (auto EId : edgeIds()) { + OS << " node" << getEdgeNode1Id(EId) + << " -- node" << getEdgeNode2Id(EId) + << " [ label=\""; + const Matrix &EdgeCosts = getEdgeCosts(EId); + for (unsigned i = 0; i < EdgeCosts.getRows(); ++i) { + OS << EdgeCosts.getRowAsVector(i) << "\\n"; + } + OS << "\" ]\n"; + } + OS << "}\n"; +} + FunctionPass *llvm::createPBQPRegisterAllocator(char *customPassID) { return new RegAllocPBQP(customPassID); } diff --git a/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp b/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp index ab33672..178fa18 100644 --- a/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp +++ b/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp @@ -131,7 +131,8 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const { RCI.NumRegs = StressRA; // Check if RC is a proper sub-class. - if (const TargetRegisterClass *Super = TRI->getLargestLegalSuperClass(RC)) + if (const TargetRegisterClass *Super = + TRI->getLargestLegalSuperClass(RC, *MF)) if (Super != RC && getNumAllocatableRegs(Super) > RCI.NumRegs) RCI.ProperSubClass = true; @@ -175,6 +176,6 @@ unsigned RegisterClassInfo::computePSetLimit(unsigned Idx) const { } compute(RC); unsigned NReserved = RC->getNumRegs() - getNumAllocatableRegs(RC); - return TRI->getRegPressureSetLimit(Idx) - - TRI->getRegClassWeight(RC).RegWeight * NReserved; + return TRI->getRegPressureSetLimit(*MF, Idx) - + TRI->getRegClassWeight(RC).RegWeight * NReserved; } diff --git a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp index b8cae4a..ac7d98f 100644 --- a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -58,12 +58,16 @@ EnableJoining("join-liveintervals", cl::desc("Coalesce copies (default=true)"), cl::init(true)); -// Temporary flag to test critical edge unsplitting. +static cl::opt<bool> UseTerminalRule("terminal-rule", + cl::desc("Apply the terminal rule"), + cl::init(false), cl::Hidden); + +/// Temporary flag to test critical edge unsplitting. static cl::opt<bool> EnableJoinSplits("join-splitedges", cl::desc("Coalesce copies on split edges (default=subtarget)"), cl::Hidden); -// Temporary flag to test global copy optimization. +/// Temporary flag to test global copy optimization. static cl::opt<cl::boolOrDefault> EnableGlobalCopies("join-globalcopies", cl::desc("Coalesce copies that span blocks (default=subtarget)"), @@ -120,7 +124,7 @@ namespace { /// Recursively eliminate dead defs in DeadDefs. void eliminateDeadDefs(); - /// LiveRangeEdit callback. + /// LiveRangeEdit callback for eliminateDeadDefs(). void LRE_WillEraseInstruction(MachineInstr *MI) override; /// Coalesce the LocalWorkList. @@ -133,16 +137,15 @@ namespace { /// copies that cannot yet be coalesced into WorkList. void copyCoalesceInMBB(MachineBasicBlock *MBB); - /// Try to coalesce all copies in CurrList. Return - /// true if any progress was made. + /// Tries to coalesce all copies in CurrList. Returns true if any progress + /// was made. bool copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList); - /// Attempt to join intervals corresponding to SrcReg/DstReg, - /// which are the src/dst of the copy instruction CopyMI. This returns - /// true if the copy was successfully coalesced away. If it is not - /// currently possible to coalesce this interval, but it may be possible if - /// other things get coalesced, then it returns true by reference in - /// 'Again'. + /// Attempt to join intervals corresponding to SrcReg/DstReg, which are the + /// src/dst of the copy instruction CopyMI. This returns true if the copy + /// was successfully coalesced away. If it is not currently possible to + /// coalesce this interval, but it may be possible if other things get + /// coalesced, then it returns true by reference in 'Again'. bool joinCopy(MachineInstr *TheCopy, bool &Again); /// Attempt to join these two intervals. On failure, this @@ -161,18 +164,20 @@ namespace { /// LaneMask are split as necessary. @p LaneMask are the lanes that /// @p ToMerge will occupy in the coalescer register. @p LI has its subrange /// lanemasks already adjusted to the coalesced register. - void mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge, + /// @returns false if live range conflicts couldn't get resolved. + bool mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge, unsigned LaneMask, CoalescerPair &CP); /// Join the liveranges of two subregisters. Joins @p RRange into /// @p LRange, @p RRange may be invalid afterwards. - void joinSubRegRanges(LiveRange &LRange, LiveRange &RRange, + /// @returns false if live range conflicts couldn't get resolved. + bool joinSubRegRanges(LiveRange &LRange, LiveRange &RRange, unsigned LaneMask, const CoalescerPair &CP); - /// We found a non-trivially-coalescable copy. If - /// the source value number is defined by a copy from the destination reg - /// see if we can merge these two destination reg valno# into a single - /// value number, eliminating a copy. + /// We found a non-trivially-coalescable copy. If the source value number is + /// defined by a copy from the destination reg see if we can merge these two + /// destination reg valno# into a single value number, eliminating a copy. + /// This returns true if an interval was modified. bool adjustCopiesBackFrom(const CoalescerPair &CP, MachineInstr *CopyMI); /// Return true if there are definitions of IntB @@ -184,28 +189,69 @@ namespace { /// If the source value number is defined by a commutable instruction and /// its other operand is coalesced to the copy dest register, see if we /// can transform the copy into a noop by commuting the definition. + /// This returns true if an interval was modified. bool removeCopyByCommutingDef(const CoalescerPair &CP,MachineInstr *CopyMI); /// If the source of a copy is defined by a /// trivial computation, replace the copy by rematerialize the definition. - bool reMaterializeTrivialDef(CoalescerPair &CP, MachineInstr *CopyMI, + bool reMaterializeTrivialDef(const CoalescerPair &CP, MachineInstr *CopyMI, bool &IsDefCopy); - /// Return true if a physreg copy should be joined. + /// Return true if a copy involving a physreg should be joined. bool canJoinPhys(const CoalescerPair &CP); - /// Replace all defs and uses of SrcReg to DstReg and - /// update the subregister number if it is not zero. If DstReg is a - /// physical register and the existing subregister number of the def / use - /// being updated is not zero, make sure to set it to the correct physical - /// subregister. + /// Replace all defs and uses of SrcReg to DstReg and update the subregister + /// number if it is not zero. If DstReg is a physical register and the + /// existing subregister number of the def / use being updated is not zero, + /// make sure to set it to the correct physical subregister. void updateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx); /// Handle copies of undef values. + /// Returns true if @p CopyMI was a copy of an undef value and eliminated. bool eliminateUndefCopy(MachineInstr *CopyMI); + /// Check whether or not we should apply the terminal rule on the + /// destination (Dst) of \p Copy. + /// When the terminal rule applies, Copy is not profitable to + /// coalesce. + /// Dst is terminal if it has exactly one affinity (Dst, Src) and + /// at least one interference (Dst, Dst2). If Dst is terminal, the + /// terminal rule consists in checking that at least one of + /// interfering node, say Dst2, has an affinity of equal or greater + /// weight with Src. + /// In that case, Dst2 and Dst will not be able to be both coalesced + /// with Src. Since Dst2 exposes more coalescing opportunities than + /// Dst, we can drop \p Copy. + bool applyTerminalRule(const MachineInstr &Copy) const; + + /// Check whether or not \p LI is composed by multiple connected + /// components and if that is the case, fix that. + void splitNewRanges(LiveInterval *LI) { + ConnectedVNInfoEqClasses ConEQ(*LIS); + unsigned NumComps = ConEQ.Classify(LI); + if (NumComps <= 1) + return; + SmallVector<LiveInterval*, 8> NewComps(1, LI); + for (unsigned i = 1; i != NumComps; ++i) { + unsigned VReg = MRI->createVirtualRegister(MRI->getRegClass(LI->reg)); + NewComps.push_back(&LIS->createEmptyInterval(VReg)); + } + + ConEQ.Distribute(&NewComps[0], *MRI); + } + + /// Wrapper method for \see LiveIntervals::shrinkToUses. + /// This method does the proper fixing of the live-ranges when the afore + /// mentioned method returns true. + void shrinkToUses(LiveInterval *LI, + SmallVectorImpl<MachineInstr * > *Dead = nullptr) { + if (LIS->shrinkToUses(LI, Dead)) + // We may have created multiple connected components, split them. + splitNewRanges(LI); + } + public: - static char ID; // Class identification, replacement for typeinfo + static char ID; ///< Class identification, replacement for typeinfo RegisterCoalescer() : MachineFunctionPass(ID) { initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry()); } @@ -220,7 +266,7 @@ namespace { /// Implement the dump method. void print(raw_ostream &O, const Module* = nullptr) const override; }; -} /// end anonymous namespace +} // end anonymous namespace char &llvm::RegisterCoalescerID = RegisterCoalescer::ID; @@ -254,11 +300,11 @@ static bool isMoveInstr(const TargetRegisterInfo &tri, const MachineInstr *MI, return true; } -// Return true if this block should be vacated by the coalescer to eliminate -// branches. The important cases to handle in the coalescer are critical edges -// split during phi elimination which contain only copies. Simple blocks that -// contain non-branches should also be vacated, but this can be handled by an -// earlier pass similar to early if-conversion. +/// Return true if this block should be vacated by the coalescer to eliminate +/// branches. The important cases to handle in the coalescer are critical edges +/// split during phi elimination which contain only copies. Simple blocks that +/// contain non-branches should also be vacated, but this can be handled by an +/// earlier pass similar to early if-conversion. static bool isSplitEdge(const MachineBasicBlock *MBB) { if (MBB->pred_size() != 1 || MBB->succ_size() != 1) return false; @@ -423,27 +469,11 @@ void RegisterCoalescer::eliminateDeadDefs() { nullptr, this).eliminateDeadDefs(DeadDefs); } -// Callback from eliminateDeadDefs(). void RegisterCoalescer::LRE_WillEraseInstruction(MachineInstr *MI) { // MI may be in WorkList. Make sure we don't visit it. ErasedInstrs.insert(MI); } -/// We found a non-trivially-coalescable copy with IntA -/// being the source and IntB being the dest, thus this defines a value number -/// in IntB. If the source value number (in IntA) is defined by a copy from B, -/// see if we can merge these two pieces of B into a single value number, -/// eliminating a copy. For example: -/// -/// A3 = B0 -/// ... -/// B1 = A3 <- this copy -/// -/// In this case, B0 can be extended to where the B1 copy lives, allowing the B1 -/// value number to be replaced with B0 (which simplifies the B liveinterval). -/// -/// This returns true if an interval was modified. -/// bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP, MachineInstr *CopyMI) { assert(!CP.isPartial() && "This doesn't work for partial copies."); @@ -455,6 +485,20 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP, LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(); + // We have a non-trivially-coalescable copy with IntA being the source and + // IntB being the dest, thus this defines a value number in IntB. If the + // source value number (in IntA) is defined by a copy from B, see if we can + // merge these two pieces of B into a single value number, eliminating a copy. + // For example: + // + // A3 = B0 + // ... + // B1 = A3 <- this copy + // + // In this case, B0 can be extended to where the B1 copy lives, allowing the + // B1 value number to be replaced with B0 (which simplifies the B + // liveinterval). + // BValNo is a value number in B that is defined by a copy from A. 'B1' in // the example above. LiveInterval::iterator BS = IntB.FindSegmentContaining(CopyIdx); @@ -538,14 +582,12 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP, // will also add the isKill marker. CopyMI->substituteRegister(IntA.reg, IntB.reg, 0, *TRI); if (AS->end == CopyIdx) - LIS->shrinkToUses(&IntA); + shrinkToUses(&IntA); ++numExtends; return true; } -/// Return true if there are definitions of IntB -/// other than BValNo val# that can reach uses of AValno val# of IntA. bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA, LiveInterval &IntB, VNInfo *AValNo, @@ -585,29 +627,6 @@ static void addSegmentsWithValNo(LiveRange &Dst, VNInfo *DstValNo, } } -/// We found a non-trivially-coalescable copy with -/// IntA being the source and IntB being the dest, thus this defines a value -/// number in IntB. If the source value number (in IntA) is defined by a -/// commutable instruction and its other operand is coalesced to the copy dest -/// register, see if we can transform the copy into a noop by commuting the -/// definition. For example, -/// -/// A3 = op A2 B0<kill> -/// ... -/// B1 = A3 <- this copy -/// ... -/// = op A3 <- more uses -/// -/// ==> -/// -/// B2 = op B0 A2<kill> -/// ... -/// B1 = B2 <- now an identity copy -/// ... -/// = op B2 <- more uses -/// -/// This returns true if an interval was modified. -/// bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, MachineInstr *CopyMI) { assert(!CP.isPhys()); @@ -617,6 +636,26 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, LiveInterval &IntB = LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); + // We found a non-trivially-coalescable copy with IntA being the source and + // IntB being the dest, thus this defines a value number in IntB. If the + // source value number (in IntA) is defined by a commutable instruction and + // its other operand is coalesced to the copy dest register, see if we can + // transform the copy into a noop by commuting the definition. For example, + // + // A3 = op A2 B0<kill> + // ... + // B1 = A3 <- this copy + // ... + // = op A3 <- more uses + // + // ==> + // + // B2 = op B0 A2<kill> + // ... + // B1 = B2 <- now an identity copy + // ... + // = op B2 <- more uses + // BValNo is a value number in B that is defined by a copy from A. 'B1' in // the example above. SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(); @@ -745,15 +784,14 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, continue; DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI); assert(DVNI->def == DefIdx); - BValNo = IntB.MergeValueNumberInto(BValNo, DVNI); + BValNo = IntB.MergeValueNumberInto(DVNI, BValNo); for (LiveInterval::SubRange &S : IntB.subranges()) { VNInfo *SubDVNI = S.getVNInfoAt(DefIdx); if (!SubDVNI) continue; VNInfo *SubBValNo = S.getVNInfoAt(CopyIdx); assert(SubBValNo->def == CopyIdx); - VNInfo *Merged = S.MergeValueNumberInto(SubBValNo, SubDVNI); - Merged->def = CopyIdx; + S.MergeValueNumberInto(SubDVNI, SubBValNo); } ErasedInstrs.insert(UseMI); @@ -809,7 +847,6 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, VNInfo *BSubValNo = NewRange->getNextValue(CopyIdx, Allocator); addSegmentsWithValNo(*NewRange, BSubValNo, SA, ASubValNo); } - SA.removeValNo(ASubValNo); } } @@ -817,25 +854,30 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, addSegmentsWithValNo(IntB, BValNo, IntA, AValNo); DEBUG(dbgs() << "\t\textended: " << IntB << '\n'); - IntA.removeValNo(AValNo); - // Remove valuenos in subranges (the A+B have subranges case has already been - // handled above) - if (!IntB.hasSubRanges()) { - SlotIndex AIdx = CopyIdx.getRegSlot(true); - for (LiveInterval::SubRange &SA : IntA.subranges()) { - VNInfo *ASubValNo = SA.getVNInfoAt(AIdx); - assert(ASubValNo != nullptr); - SA.removeValNo(ASubValNo); - } - } + LIS->removeVRegDefAt(IntA, AValNo->def); + DEBUG(dbgs() << "\t\ttrimmed: " << IntA << '\n'); ++numCommutes; return true; } -/// If the source of a copy is defined by a trivial -/// computation, replace the copy by rematerialize the definition. -bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP, +/// Returns true if @p MI defines the full vreg @p Reg, as opposed to just +/// defining a subregister. +static bool definesFullReg(const MachineInstr &MI, unsigned Reg) { + assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && + "This code cannot handle physreg aliasing"); + for (const MachineOperand &Op : MI.operands()) { + if (!Op.isReg() || !Op.isDef() || Op.getReg() != Reg) + continue; + // Return true if we define the full register or don't care about the value + // inside other subregisters. + if (Op.getSubReg() == 0 || Op.isUndef()) + return true; + } + return false; +} + +bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, MachineInstr *CopyMI, bool &IsDefCopy) { IsDefCopy = false; @@ -863,8 +905,10 @@ bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP, return false; if (!TII->isTriviallyReMaterializable(DefMI, AA)) return false; + if (!definesFullReg(*DefMI, SrcReg)) + return false; bool SawStore = false; - if (!DefMI->isSafeToMove(TII, AA, SawStore)) + if (!DefMI->isSafeToMove(AA, SawStore)) return false; const MCInstrDesc &MCID = DefMI->getDesc(); if (MCID.getNumDefs() != 1) @@ -911,6 +955,28 @@ bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP, TII->reMaterialize(*MBB, MII, DstReg, SrcIdx, DefMI, *TRI); MachineInstr *NewMI = std::prev(MII); + // In a situation like the following: + // %vreg0:subreg = instr ; DefMI, subreg = DstIdx + // %vreg1 = copy %vreg0:subreg ; CopyMI, SrcIdx = 0 + // instead of widening %vreg1 to the register class of %vreg0 simply do: + // %vreg1 = instr + const TargetRegisterClass *NewRC = CP.getNewRC(); + if (DstIdx != 0) { + MachineOperand &DefMO = NewMI->getOperand(0); + if (DefMO.getSubReg() == DstIdx) { + assert(SrcIdx == 0 && CP.isFlipped() + && "Shouldn't have SrcIdx+DstIdx at this point"); + const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg); + const TargetRegisterClass *CommonRC = + TRI->getCommonSubClass(DefRC, DstRC); + if (CommonRC != nullptr) { + NewRC = CommonRC; + DstIdx = 0; + DefMO.setSubReg(0); + } + } + } + LIS->ReplaceMachineInstrInMaps(CopyMI, NewMI); CopyMI->eraseFromParent(); ErasedInstrs.insert(CopyMI); @@ -922,23 +988,23 @@ bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP, for (unsigned i = NewMI->getDesc().getNumOperands(), e = NewMI->getNumOperands(); i != e; ++i) { MachineOperand &MO = NewMI->getOperand(i); - if (MO.isReg()) { - assert(MO.isDef() && MO.isImplicit() && MO.isDead() && + if (MO.isReg() && MO.isDef()) { + assert(MO.isImplicit() && MO.isDead() && TargetRegisterInfo::isPhysicalRegister(MO.getReg())); NewMIImplDefs.push_back(MO.getReg()); } } if (TargetRegisterInfo::isVirtualRegister(DstReg)) { - const TargetRegisterClass *NewRC = CP.getNewRC(); unsigned NewIdx = NewMI->getOperand(0).getSubReg(); - if (NewIdx) - NewRC = TRI->getMatchingSuperRegClass(NewRC, DefRC, NewIdx); - else - NewRC = TRI->getCommonSubClass(NewRC, DefRC); - - assert(NewRC && "subreg chosen for remat incompatible with instruction"); + if (DefRC != nullptr) { + if (NewIdx) + NewRC = TRI->getMatchingSuperRegClass(NewRC, DefRC, NewIdx); + else + NewRC = TRI->getCommonSubClass(NewRC, DefRC); + assert(NewRC && "subreg chosen for remat incompatible with instruction"); + } MRI->setRegClass(DstReg, NewRC); updateRegDefsUses(DstReg, DstReg, DstIdx); @@ -1005,7 +1071,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP, ++NumReMats; // The source interval can become smaller because we removed a use. - LIS->shrinkToUses(&SrcInt, &DeadDefs); + shrinkToUses(&SrcInt, &DeadDefs); if (!DeadDefs.empty()) { // If the virtual SrcReg is completely eliminated, update all DBG_VALUEs // to describe DstReg instead. @@ -1022,21 +1088,15 @@ bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP, return true; } -static void removeUndefValue(LiveRange &LR, SlotIndex At) -{ - VNInfo *VNInfo = LR.getVNInfoAt(At); - assert(VNInfo != nullptr && SlotIndex::isSameInstr(VNInfo->def, At)); - LR.removeValNo(VNInfo); -} - -/// ProcessImpicitDefs may leave some copies of <undef> -/// values, it only removes local variables. When we have a copy like: -/// -/// %vreg1 = COPY %vreg2<undef> -/// -/// We delete the copy and remove the corresponding value number from %vreg1. -/// Any uses of that value number are marked as <undef>. bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) { + // ProcessImpicitDefs may leave some copies of <undef> values, it only removes + // local variables. When we have a copy like: + // + // %vreg1 = COPY %vreg2<undef> + // + // We delete the copy and remove the corresponding value number from %vreg1. + // Any uses of that value number are marked as <undef>. + // Note that we do not query CoalescerPair here but redo isMoveInstr as the // CoalescerPair may have a new register class with adjusted subreg indices // at this point. @@ -1061,22 +1121,25 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) { // Remove any DstReg segments starting at the instruction. LiveInterval &DstLI = LIS->getInterval(DstReg); - unsigned DstMask = TRI->getSubRegIndexLaneMask(DstSubIdx); SlotIndex RegIndex = Idx.getRegSlot(); - for (LiveInterval::SubRange &SR : DstLI.subranges()) { - if ((SR.LaneMask & DstMask) == 0) - continue; - removeUndefValue(SR, RegIndex); - - DstLI.removeEmptySubRanges(); - } // Remove value or merge with previous one in case of a subregister def. if (VNInfo *PrevVNI = DstLI.getVNInfoAt(Idx)) { - VNInfo *VNInfo = DstLI.getVNInfoAt(RegIndex); - DstLI.MergeValueNumberInto(VNInfo, PrevVNI); - } else { - removeUndefValue(DstLI, RegIndex); - } + VNInfo *VNI = DstLI.getVNInfoAt(RegIndex); + DstLI.MergeValueNumberInto(VNI, PrevVNI); + + // The affected subregister segments can be removed. + unsigned DstMask = TRI->getSubRegIndexLaneMask(DstSubIdx); + for (LiveInterval::SubRange &SR : DstLI.subranges()) { + if ((SR.LaneMask & DstMask) == 0) + continue; + + VNInfo *SVNI = SR.getVNInfoAt(RegIndex); + assert(SVNI != nullptr && SlotIndex::isSameInstr(SVNI->def, RegIndex)); + SR.removeValNo(SVNI); + } + DstLI.removeEmptySubRanges(); + } else + LIS->removeVRegDefAt(DstLI, RegIndex); // Mark uses as undef. for (MachineOperand &MO : MRI->reg_nodbg_operands(DstReg)) { @@ -1106,10 +1169,6 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) { return true; } -/// Replace all defs and uses of SrcReg to DstReg and update the subregister -/// number if it is not zero. If DstReg is a physical register and the existing -/// subregister number of the def / use being updated is not zero, make sure to -/// set it to the correct physical subregister. void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx) { @@ -1151,7 +1210,7 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg, // A subreg use of a partially undef (super) register may be a complete // undef use now and then has to be marked that way. - if (SubIdx != 0 && MO.isUse() && MRI->tracksSubRegLiveness()) { + if (SubIdx != 0 && MO.isUse() && MRI->shouldTrackSubRegLiveness(DstReg)) { if (!DstInt->hasSubRanges()) { BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); unsigned Mask = MRI->getMaxLaneMaskForVReg(DstInt->reg); @@ -1198,29 +1257,23 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg, } } -/// Return true if a copy involving a physreg should be joined. bool RegisterCoalescer::canJoinPhys(const CoalescerPair &CP) { - /// Always join simple intervals that are defined by a single copy from a - /// reserved register. This doesn't increase register pressure, so it is - /// always beneficial. + // Always join simple intervals that are defined by a single copy from a + // reserved register. This doesn't increase register pressure, so it is + // always beneficial. if (!MRI->isReserved(CP.getDstReg())) { DEBUG(dbgs() << "\tCan only merge into reserved registers.\n"); return false; } LiveInterval &JoinVInt = LIS->getInterval(CP.getSrcReg()); - if (CP.isFlipped() && JoinVInt.containsOneValue()) + if (JoinVInt.containsOneValue()) return true; - DEBUG(dbgs() << "\tCannot join defs into reserved register.\n"); + DEBUG(dbgs() << "\tCannot join complex intervals into reserved register.\n"); return false; } -/// Attempt to join intervals corresponding to SrcReg/DstReg, -/// which are the src/dst of the copy instruction CopyMI. This returns true -/// if the copy was successfully coalesced away. If it is not currently -/// possible to coalesce this interval, but it may be possible if other -/// things get coalesced, then it returns true by reference in 'Again'. bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { Again = false; @@ -1399,7 +1452,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { } if (ShrinkMainRange) { LiveInterval &LI = LIS->getInterval(CP.getDstReg()); - LIS->shrinkToUses(&LI); + shrinkToUses(&LI); } // SrcReg is guaranteed to be the register whose live interval that is @@ -1407,7 +1460,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { LIS->removeInterval(CP.getSrcReg()); // Update regalloc hint. - TRI->UpdateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *MF); + TRI->updateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *MF); DEBUG({ dbgs() << "\tSuccess: " << PrintReg(CP.getSrcReg(), TRI, CP.getSrcIdx()) @@ -1424,24 +1477,23 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { return true; } -/// Attempt joining with a reserved physreg. bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) { + unsigned DstReg = CP.getDstReg(); assert(CP.isPhys() && "Must be a physreg copy"); - assert(MRI->isReserved(CP.getDstReg()) && "Not a reserved register"); + assert(MRI->isReserved(DstReg) && "Not a reserved register"); LiveInterval &RHS = LIS->getInterval(CP.getSrcReg()); DEBUG(dbgs() << "\t\tRHS = " << RHS << '\n'); - assert(CP.isFlipped() && RHS.containsOneValue() && - "Invalid join with reserved register"); + assert(RHS.containsOneValue() && "Invalid join with reserved register"); // Optimization for reserved registers like ESP. We can only merge with a - // reserved physreg if RHS has a single value that is a copy of CP.DstReg(). + // reserved physreg if RHS has a single value that is a copy of DstReg. // The live range of the reserved register will look like a set of dead defs // - we don't properly track the live range of reserved registers. // Deny any overlapping intervals. This depends on all the reserved // register live ranges to look like dead defs. - for (MCRegUnitIterator UI(CP.getDstReg(), TRI); UI.isValid(); ++UI) + for (MCRegUnitIterator UI(DstReg, TRI); UI.isValid(); ++UI) if (RHS.overlaps(LIS->getRegUnit(*UI))) { DEBUG(dbgs() << "\t\tInterference: " << PrintRegUnit(*UI, TRI) << '\n'); return false; @@ -1453,7 +1505,46 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) { // defs are there. // Delete the identity copy. - MachineInstr *CopyMI = MRI->getVRegDef(RHS.reg); + MachineInstr *CopyMI; + if (CP.isFlipped()) { + CopyMI = MRI->getVRegDef(RHS.reg); + } else { + if (!MRI->hasOneNonDBGUse(RHS.reg)) { + DEBUG(dbgs() << "\t\tMultiple vreg uses!\n"); + return false; + } + + MachineInstr *DestMI = MRI->getVRegDef(RHS.reg); + CopyMI = &*MRI->use_instr_nodbg_begin(RHS.reg); + const SlotIndex CopyRegIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(); + const SlotIndex DestRegIdx = LIS->getInstructionIndex(DestMI).getRegSlot(); + + // We checked above that there are no interfering defs of the physical + // register. However, for this case, where we intent to move up the def of + // the physical register, we also need to check for interfering uses. + SlotIndexes *Indexes = LIS->getSlotIndexes(); + for (SlotIndex SI = Indexes->getNextNonNullIndex(DestRegIdx); + SI != CopyRegIdx; SI = Indexes->getNextNonNullIndex(SI)) { + MachineInstr *MI = LIS->getInstructionFromIndex(SI); + if (MI->readsRegister(DstReg, TRI)) { + DEBUG(dbgs() << "\t\tInterference (read): " << *MI); + return false; + } + } + + // We're going to remove the copy which defines a physical reserved + // register, so remove its valno, etc. + DEBUG(dbgs() << "\t\tRemoving phys reg def of " << DstReg << " at " + << CopyRegIdx << "\n"); + + LIS->removePhysRegDefAt(DstReg, CopyRegIdx); + // Create a new dead def at the new def location. + for (MCRegUnitIterator UI(DstReg, TRI); UI.isValid(); ++UI) { + LiveRange &LR = LIS->getRegUnit(*UI); + LR.createDeadDef(DestRegIdx, LIS->getVNInfoAllocator()); + } + } + LIS->RemoveMachineInstrFromMaps(CopyMI); CopyMI->eraseFromParent(); @@ -1538,11 +1629,12 @@ class JoinVals { /// (Main) register we work on. const unsigned Reg; - // Reg (and therefore the values in this liverange) will end up as subregister - // SubIdx in the coalesced register. Either CP.DstIdx or CP.SrcIdx. + /// Reg (and therefore the values in this liverange) will end up as + /// subregister SubIdx in the coalesced register. Either CP.DstIdx or + /// CP.SrcIdx. const unsigned SubIdx; - // The LaneMask that this liverange will occupy the coalesced register. May be - // smaller than the lanemask produced by SubIdx when merging subranges. + /// The LaneMask that this liverange will occupy the coalesced register. May + /// be smaller than the lanemask produced by SubIdx when merging subranges. const unsigned LaneMask; /// This is true when joining sub register ranges, false when joining main @@ -1551,7 +1643,7 @@ class JoinVals { /// Whether the current LiveInterval tracks subregister liveness. const bool TrackSubRegLiveness; - // Values that will be present in the final live range. + /// Values that will be present in the final live range. SmallVectorImpl<VNInfo*> &NewVNInfo; const CoalescerPair &CP; @@ -1559,75 +1651,75 @@ class JoinVals { SlotIndexes *Indexes; const TargetRegisterInfo *TRI; - // Value number assignments. Maps value numbers in LI to entries in NewVNInfo. - // This is suitable for passing to LiveInterval::join(). + /// Value number assignments. Maps value numbers in LI to entries in + /// NewVNInfo. This is suitable for passing to LiveInterval::join(). SmallVector<int, 8> Assignments; - // Conflict resolution for overlapping values. + /// Conflict resolution for overlapping values. enum ConflictResolution { - // No overlap, simply keep this value. + /// No overlap, simply keep this value. CR_Keep, - // Merge this value into OtherVNI and erase the defining instruction. - // Used for IMPLICIT_DEF, coalescable copies, and copies from external - // values. + /// Merge this value into OtherVNI and erase the defining instruction. + /// Used for IMPLICIT_DEF, coalescable copies, and copies from external + /// values. CR_Erase, - // Merge this value into OtherVNI but keep the defining instruction. - // This is for the special case where OtherVNI is defined by the same - // instruction. + /// Merge this value into OtherVNI but keep the defining instruction. + /// This is for the special case where OtherVNI is defined by the same + /// instruction. CR_Merge, - // Keep this value, and have it replace OtherVNI where possible. This - // complicates value mapping since OtherVNI maps to two different values - // before and after this def. - // Used when clobbering undefined or dead lanes. + /// Keep this value, and have it replace OtherVNI where possible. This + /// complicates value mapping since OtherVNI maps to two different values + /// before and after this def. + /// Used when clobbering undefined or dead lanes. CR_Replace, - // Unresolved conflict. Visit later when all values have been mapped. + /// Unresolved conflict. Visit later when all values have been mapped. CR_Unresolved, - // Unresolvable conflict. Abort the join. + /// Unresolvable conflict. Abort the join. CR_Impossible }; - // Per-value info for LI. The lane bit masks are all relative to the final - // joined register, so they can be compared directly between SrcReg and - // DstReg. + /// Per-value info for LI. The lane bit masks are all relative to the final + /// joined register, so they can be compared directly between SrcReg and + /// DstReg. struct Val { ConflictResolution Resolution; - // Lanes written by this def, 0 for unanalyzed values. + /// Lanes written by this def, 0 for unanalyzed values. unsigned WriteLanes; - // Lanes with defined values in this register. Other lanes are undef and - // safe to clobber. + /// Lanes with defined values in this register. Other lanes are undef and + /// safe to clobber. unsigned ValidLanes; - // Value in LI being redefined by this def. + /// Value in LI being redefined by this def. VNInfo *RedefVNI; - // Value in the other live range that overlaps this def, if any. + /// Value in the other live range that overlaps this def, if any. VNInfo *OtherVNI; - // Is this value an IMPLICIT_DEF that can be erased? - // - // IMPLICIT_DEF values should only exist at the end of a basic block that - // is a predecessor to a phi-value. These IMPLICIT_DEF instructions can be - // safely erased if they are overlapping a live value in the other live - // interval. - // - // Weird control flow graphs and incomplete PHI handling in - // ProcessImplicitDefs can very rarely create IMPLICIT_DEF values with - // longer live ranges. Such IMPLICIT_DEF values should be treated like - // normal values. + /// Is this value an IMPLICIT_DEF that can be erased? + /// + /// IMPLICIT_DEF values should only exist at the end of a basic block that + /// is a predecessor to a phi-value. These IMPLICIT_DEF instructions can be + /// safely erased if they are overlapping a live value in the other live + /// interval. + /// + /// Weird control flow graphs and incomplete PHI handling in + /// ProcessImplicitDefs can very rarely create IMPLICIT_DEF values with + /// longer live ranges. Such IMPLICIT_DEF values should be treated like + /// normal values. bool ErasableImplicitDef; - // True when the live range of this value will be pruned because of an - // overlapping CR_Replace value in the other live range. + /// True when the live range of this value will be pruned because of an + /// overlapping CR_Replace value in the other live range. bool Pruned; - // True once Pruned above has been computed. + /// True once Pruned above has been computed. bool PrunedComputed; Val() : Resolution(CR_Keep), WriteLanes(0), ValidLanes(0), @@ -1637,17 +1729,61 @@ class JoinVals { bool isAnalyzed() const { return WriteLanes != 0; } }; - // One entry per value number in LI. + /// One entry per value number in LI. SmallVector<Val, 8> Vals; + /// Compute the bitmask of lanes actually written by DefMI. + /// Set Redef if there are any partial register definitions that depend on the + /// previous value of the register. unsigned computeWriteLanes(const MachineInstr *DefMI, bool &Redef) const; + + /// Find the ultimate value that VNI was copied from. std::pair<const VNInfo*,unsigned> followCopyChain(const VNInfo *VNI) const; + bool valuesIdentical(VNInfo *Val0, VNInfo *Val1, const JoinVals &Other) const; + + /// Analyze ValNo in this live range, and set all fields of Vals[ValNo]. + /// Return a conflict resolution when possible, but leave the hard cases as + /// CR_Unresolved. + /// Recursively calls computeAssignment() on this and Other, guaranteeing that + /// both OtherVNI and RedefVNI have been analyzed and mapped before returning. + /// The recursion always goes upwards in the dominator tree, making loops + /// impossible. ConflictResolution analyzeValue(unsigned ValNo, JoinVals &Other); + + /// Compute the value assignment for ValNo in RI. + /// This may be called recursively by analyzeValue(), but never for a ValNo on + /// the stack. void computeAssignment(unsigned ValNo, JoinVals &Other); + + /// Assuming ValNo is going to clobber some valid lanes in Other.LR, compute + /// the extent of the tainted lanes in the block. + /// + /// Multiple values in Other.LR can be affected since partial redefinitions + /// can preserve previously tainted lanes. + /// + /// 1 %dst = VLOAD <-- Define all lanes in %dst + /// 2 %src = FOO <-- ValNo to be joined with %dst:ssub0 + /// 3 %dst:ssub1 = BAR <-- Partial redef doesn't clear taint in ssub0 + /// 4 %dst:ssub0 = COPY %src <-- Conflict resolved, ssub0 wasn't read + /// + /// For each ValNo in Other that is affected, add an (EndIndex, TaintedLanes) + /// entry to TaintedVals. + /// + /// Returns false if the tainted lanes extend beyond the basic block. bool taintExtent(unsigned, unsigned, JoinVals&, SmallVectorImpl<std::pair<SlotIndex, unsigned> >&); + + /// Return true if MI uses any of the given Lanes from Reg. + /// This does not include partial redefinitions of Reg. bool usesLanes(const MachineInstr *MI, unsigned, unsigned, unsigned) const; + + /// Determine if ValNo is a copy of a value number in LR or Other.LR that will + /// be pruned: + /// + /// %dst = COPY %src + /// %src = COPY %dst <-- This value to be pruned. + /// %dst = COPY %src <-- This value is a copy of a pruned value. bool isPrunedValue(unsigned ValNo, JoinVals &Other); public: @@ -1675,9 +1811,9 @@ public: void pruneValues(JoinVals &Other, SmallVectorImpl<SlotIndex> &EndPoints, bool changeInstrs); - // Removes subranges starting at copies that get removed. This sometimes - // happens when undefined subranges are copied around. These ranges contain - // no usefull information and can be removed. + /// Removes subranges starting at copies that get removed. This sometimes + /// happens when undefined subranges are copied around. These ranges contain + /// no usefull information and can be removed. void pruneSubRegValues(LiveInterval &LI, unsigned &ShrinkMask); /// Erase any machine instructions that have been coalesced away. @@ -1687,14 +1823,14 @@ public: void eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs, SmallVectorImpl<unsigned> &ShrinkRegs); + /// Remove liverange defs at places where implicit defs will be removed. + void removeImplicitDefs(); + /// Get the value assignments suitable for passing to LiveInterval::join. const int *getAssignments() const { return Assignments.data(); } }; } // end anonymous namespace -/// Compute the bitmask of lanes actually written by DefMI. -/// Set Redef if there are any partial register definitions that depend on the -/// previous value of the register. unsigned JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef) const { unsigned L = 0; @@ -1709,7 +1845,6 @@ unsigned JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef) return L; } -/// Find the ultimate value that VNI was copied from. std::pair<const VNInfo*, unsigned> JoinVals::followCopyChain( const VNInfo *VNI) const { unsigned Reg = this->Reg; @@ -1770,13 +1905,6 @@ bool JoinVals::valuesIdentical(VNInfo *Value0, VNInfo *Value1, return Orig0->def == Orig1->def && Reg0 == Reg1; } -/// Analyze ValNo in this live range, and set all fields of Vals[ValNo]. -/// Return a conflict resolution when possible, but leave the hard cases as -/// CR_Unresolved. -/// Recursively calls computeAssignment() on this and Other, guaranteeing that -/// both OtherVNI and RedefVNI have been analyzed and mapped before returning. -/// The recursion always goes upwards in the dominator tree, making loops -/// impossible. JoinVals::ConflictResolution JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { Val &V = Vals[ValNo]; @@ -1798,7 +1926,11 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { assert(DefMI != nullptr); if (SubRangeJoin) { // We don't care about the lanes when joining subregister ranges. - V.ValidLanes = V.WriteLanes = 1; + V.WriteLanes = V.ValidLanes = 1; + if (DefMI->isImplicitDef()) { + V.ValidLanes = 0; + V.ErasableImplicitDef = true; + } } else { bool Redef = false; V.ValidLanes = V.WriteLanes = computeWriteLanes(DefMI, Redef); @@ -1995,9 +2127,6 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { return CR_Unresolved; } -/// Compute the value assignment for ValNo in RI. -/// This may be called recursively by analyzeValue(), but never for a ValNo on -/// the stack. void JoinVals::computeAssignment(unsigned ValNo, JoinVals &Other) { Val &V = Vals[ValNo]; if (V.isAnalyzed()) { @@ -2051,21 +2180,6 @@ bool JoinVals::mapValues(JoinVals &Other) { return true; } -/// Assuming ValNo is going to clobber some valid lanes in Other.LR, compute -/// the extent of the tainted lanes in the block. -/// -/// Multiple values in Other.LR can be affected since partial redefinitions can -/// preserve previously tainted lanes. -/// -/// 1 %dst = VLOAD <-- Define all lanes in %dst -/// 2 %src = FOO <-- ValNo to be joined with %dst:ssub0 -/// 3 %dst:ssub1 = BAR <-- Partial redef doesn't clear taint in ssub0 -/// 4 %dst:ssub0 = COPY %src <-- Conflict resolved, ssub0 wasn't read -/// -/// For each ValNo in Other that is affected, add an (EndIndex, TaintedLanes) -/// entry to TaintedVals. -/// -/// Returns false if the tainted lanes extend beyond the basic block. bool JoinVals:: taintExtent(unsigned ValNo, unsigned TaintedLanes, JoinVals &Other, SmallVectorImpl<std::pair<SlotIndex, unsigned> > &TaintExtent) { @@ -2106,8 +2220,6 @@ taintExtent(unsigned ValNo, unsigned TaintedLanes, JoinVals &Other, return true; } -/// Return true if MI uses any of the given Lanes from Reg. -/// This does not include partial redefinitions of Reg. bool JoinVals::usesLanes(const MachineInstr *MI, unsigned Reg, unsigned SubIdx, unsigned Lanes) const { if (MI->isDebugValue()) @@ -2189,13 +2301,6 @@ bool JoinVals::resolveConflicts(JoinVals &Other) { return true; } -// Determine if ValNo is a copy of a value number in LR or Other.LR that will -// be pruned: -// -// %dst = COPY %src -// %src = COPY %dst <-- This value to be pruned. -// %dst = COPY %src <-- This value is a copy of a pruned value. -// bool JoinVals::isPrunedValue(unsigned ValNo, JoinVals &Other) { Val &V = Vals[ValNo]; if (V.Pruned || V.PrunedComputed) @@ -2308,25 +2413,39 @@ void JoinVals::pruneSubRegValues(LiveInterval &LI, unsigned &ShrinkMask) LI.removeEmptySubRanges(); } +void JoinVals::removeImplicitDefs() { + for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) { + Val &V = Vals[i]; + if (V.Resolution != CR_Keep || !V.ErasableImplicitDef || !V.Pruned) + continue; + + VNInfo *VNI = LR.getValNumInfo(i); + VNI->markUnused(); + LR.removeValNo(VNI); + } +} + void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs, SmallVectorImpl<unsigned> &ShrinkRegs) { for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) { // Get the def location before markUnused() below invalidates it. SlotIndex Def = LR.getValNumInfo(i)->def; switch (Vals[i].Resolution) { - case CR_Keep: + case CR_Keep: { // If an IMPLICIT_DEF value is pruned, it doesn't serve a purpose any // longer. The IMPLICIT_DEF instructions are only inserted by // PHIElimination to guarantee that all PHI predecessors have a value. if (!Vals[i].ErasableImplicitDef || !Vals[i].Pruned) break; - // Remove value number i from LR. Note that this VNInfo is still present - // in NewVNInfo, so it will appear as an unused value number in the final - // joined interval. - LR.getValNumInfo(i)->markUnused(); - LR.removeValNo(LR.getValNumInfo(i)); + // Remove value number i from LR. + VNInfo *VNI = LR.getValNumInfo(i); + LR.removeValNo(VNI); + // Note that this VNInfo is reused and still referenced in NewVNInfo, + // make it appear like an unused value number. + VNI->markUnused(); DEBUG(dbgs() << "\t\tremoved " << i << '@' << Def << ": " << LR << '\n'); // FALL THROUGH. + } case CR_Erase: { MachineInstr *MI = Indexes->getInstructionFromIndex(Def); @@ -2349,7 +2468,7 @@ void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs, } } -void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange, +bool RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange, unsigned LaneMask, const CoalescerPair &CP) { SmallVector<VNInfo*, 16> NewVNInfo; @@ -2358,13 +2477,20 @@ void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange, JoinVals LHSVals(LRange, CP.getDstReg(), CP.getDstIdx(), LaneMask, NewVNInfo, CP, LIS, TRI, true, true); - /// Compute NewVNInfo and resolve conflicts (see also joinVirtRegs()) - /// Conflicts should already be resolved so the mapping/resolution should - /// always succeed. - if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals)) - llvm_unreachable("Can't join subrange although main ranges are compatible"); - if (!LHSVals.resolveConflicts(RHSVals) || !RHSVals.resolveConflicts(LHSVals)) - llvm_unreachable("Can't join subrange although main ranges are compatible"); + // Compute NewVNInfo and resolve conflicts (see also joinVirtRegs()) + // We should be able to resolve all conflicts here as we could successfully do + // it on the mainrange already. There is however a problem when multiple + // ranges get mapped to the "overflow" lane mask bit which creates unexpected + // interferences. + if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals)) { + DEBUG(dbgs() << "*** Couldn't join subrange!\n"); + return false; + } + if (!LHSVals.resolveConflicts(RHSVals) || + !RHSVals.resolveConflicts(LHSVals)) { + DEBUG(dbgs() << "*** Couldn't join subrange!\n"); + return false; + } // The merging algorithm in LiveInterval::join() can't handle conflicting // value mappings, so we need to remove any live ranges that overlap a @@ -2374,6 +2500,9 @@ void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange, LHSVals.pruneValues(RHSVals, EndPoints, false); RHSVals.pruneValues(LHSVals, EndPoints, false); + LHSVals.removeImplicitDefs(); + RHSVals.removeImplicitDefs(); + LRange.verify(); RRange.verify(); @@ -2383,16 +2512,17 @@ void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange, DEBUG(dbgs() << "\t\tjoined lanes: " << LRange << "\n"); if (EndPoints.empty()) - return; + return true; // Recompute the parts of the live range we had to remove because of // CR_Replace conflicts. DEBUG(dbgs() << "\t\trestoring liveness to " << EndPoints.size() << " points: " << LRange << '\n'); LIS->extendToIndices(LRange, EndPoints); + return true; } -void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI, +bool RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge, unsigned LaneMask, CoalescerPair &CP) { BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); @@ -2420,7 +2550,8 @@ void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI, CommonRange = &R; } LiveRange RangeCopy(ToMerge, Allocator); - joinSubRegRanges(*CommonRange, RangeCopy, Common, CP); + if (!joinSubRegRanges(*CommonRange, RangeCopy, Common, CP)) + return false; LaneMask &= ~RMask; } @@ -2428,13 +2559,14 @@ void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI, DEBUG(dbgs() << format("\t\tNew Lane %04X\n", LaneMask)); LI.createSubRangeFrom(Allocator, LaneMask, ToMerge); } + return true; } bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { SmallVector<VNInfo*, 16> NewVNInfo; LiveInterval &RHS = LIS->getInterval(CP.getSrcReg()); LiveInterval &LHS = LIS->getInterval(CP.getDstReg()); - bool TrackSubRegLiveness = MRI->tracksSubRegLiveness(); + bool TrackSubRegLiveness = MRI->shouldTrackSubRegLiveness(*CP.getNewRC()); JoinVals RHSVals(RHS, CP.getSrcReg(), CP.getSrcIdx(), 0, NewVNInfo, CP, LIS, TRI, false, TrackSubRegLiveness); JoinVals LHSVals(LHS, CP.getDstReg(), CP.getDstIdx(), 0, NewVNInfo, CP, LIS, @@ -2478,22 +2610,40 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { // Determine lanemasks of RHS in the coalesced register and merge subranges. unsigned SrcIdx = CP.getSrcIdx(); + bool Abort = false; if (!RHS.hasSubRanges()) { unsigned Mask = SrcIdx == 0 ? CP.getNewRC()->getLaneMask() : TRI->getSubRegIndexLaneMask(SrcIdx); - mergeSubRangeInto(LHS, RHS, Mask, CP); + if (!mergeSubRangeInto(LHS, RHS, Mask, CP)) + Abort = true; } else { // Pair up subranges and merge. for (LiveInterval::SubRange &R : RHS.subranges()) { unsigned Mask = TRI->composeSubRegIndexLaneMask(SrcIdx, R.LaneMask); - mergeSubRangeInto(LHS, R, Mask, CP); + if (!mergeSubRangeInto(LHS, R, Mask, CP)) { + Abort = true; + break; + } } } + if (Abort) { + // This shouldn't have happened :-( + // However we are aware of at least one existing problem where we + // can't merge subranges when multiple ranges end up in the + // "overflow bit" 32. As a workaround we drop all subregister ranges + // which means we loose some precision but are back to a well defined + // state. + assert((CP.getNewRC()->getLaneMask() & 0x80000000u) + && "SubRange merge should only fail when merging into bit 32."); + DEBUG(dbgs() << "\tSubrange join aborted!\n"); + LHS.clearSubRanges(); + RHS.clearSubRanges(); + } else { + DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n"); - DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n"); - - LHSVals.pruneSubRegValues(LHS, ShrinkMask); - RHSVals.pruneSubRegValues(LHS, ShrinkMask); + LHSVals.pruneSubRegValues(LHS, ShrinkMask); + RHSVals.pruneSubRegValues(LHS, ShrinkMask); + } } // The merging algorithm in LiveInterval::join() can't handle conflicting @@ -2510,7 +2660,7 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { LHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs); RHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs); while (!ShrinkRegs.empty()) - LIS->shrinkToUses(&LIS->getInterval(ShrinkRegs.pop_back_val())); + shrinkToUses(&LIS->getInterval(ShrinkRegs.pop_back_val())); // Join RHS into LHS. LHS.join(RHS, LHSVals.getAssignments(), RHSVals.getAssignments(), NewVNInfo); @@ -2532,13 +2682,12 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { return true; } -/// Attempt to join these two intervals. On failure, this returns false. bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) { return CP.isPhys() ? joinReservedPhysReg(CP) : joinVirtRegs(CP); } namespace { -// Information concerning MBB coalescing priority. +/// Information concerning MBB coalescing priority. struct MBBPriorityInfo { MachineBasicBlock *MBB; unsigned Depth; @@ -2549,10 +2698,10 @@ struct MBBPriorityInfo { }; } -// C-style comparator that sorts first based on the loop depth of the basic -// block (the unsigned), and then on the MBB number. -// -// EnableGlobalCopies assumes that the primary sort key is loop depth. +/// C-style comparator that sorts first based on the loop depth of the basic +/// block (the unsigned), and then on the MBB number. +/// +/// EnableGlobalCopies assumes that the primary sort key is loop depth. static int compareMBBPriority(const MBBPriorityInfo *LHS, const MBBPriorityInfo *RHS) { // Deeper loops first @@ -2592,8 +2741,6 @@ static bool isLocalCopy(MachineInstr *Copy, const LiveIntervals *LIS) { || LIS->intervalIsInOneMBB(LIS->getInterval(DstReg)); } -// Try joining WorkList copies starting from index From. -// Null out any successful joins. bool RegisterCoalescer:: copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList) { bool Progress = false; @@ -2615,6 +2762,64 @@ copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList) { return Progress; } +/// Check if DstReg is a terminal node. +/// I.e., it does not have any affinity other than \p Copy. +static bool isTerminalReg(unsigned DstReg, const MachineInstr &Copy, + const MachineRegisterInfo *MRI) { + assert(Copy.isCopyLike()); + // Check if the destination of this copy as any other affinity. + for (const MachineInstr &MI : MRI->reg_nodbg_instructions(DstReg)) + if (&MI != &Copy && MI.isCopyLike()) + return false; + return true; +} + +bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const { + assert(Copy.isCopyLike()); + if (!UseTerminalRule) + return false; + unsigned DstReg, DstSubReg, SrcReg, SrcSubReg; + isMoveInstr(*TRI, &Copy, SrcReg, DstReg, SrcSubReg, DstSubReg); + // Check if the destination of this copy has any other affinity. + if (TargetRegisterInfo::isPhysicalRegister(DstReg) || + // If SrcReg is a physical register, the copy won't be coalesced. + // Ignoring it may have other side effect (like missing + // rematerialization). So keep it. + TargetRegisterInfo::isPhysicalRegister(SrcReg) || + !isTerminalReg(DstReg, Copy, MRI)) + return false; + + // DstReg is a terminal node. Check if it inteferes with any other + // copy involving SrcReg. + const MachineBasicBlock *OrigBB = Copy.getParent(); + const LiveInterval &DstLI = LIS->getInterval(DstReg); + for (const MachineInstr &MI : MRI->reg_nodbg_instructions(SrcReg)) { + // Technically we should check if the weight of the new copy is + // interesting compared to the other one and update the weight + // of the copies accordingly. However, this would only work if + // we would gather all the copies first then coalesce, whereas + // right now we interleave both actions. + // For now, just consider the copies that are in the same block. + if (&MI == &Copy || !MI.isCopyLike() || MI.getParent() != OrigBB) + continue; + unsigned OtherReg, OtherSubReg, OtherSrcReg, OtherSrcSubReg; + isMoveInstr(*TRI, &Copy, OtherSrcReg, OtherReg, OtherSrcSubReg, + OtherSubReg); + if (OtherReg == SrcReg) + OtherReg = OtherSrcReg; + // Check if OtherReg is a non-terminal. + if (TargetRegisterInfo::isPhysicalRegister(OtherReg) || + isTerminalReg(OtherReg, MI, MRI)) + continue; + // Check that OtherReg interfere with DstReg. + if (LIS->getInterval(OtherReg).overlaps(DstLI)) { + DEBUG(dbgs() << "Apply terminal rule for: " << PrintReg(DstReg) << '\n'); + return true; + } + } + return false; +} + void RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) { DEBUG(dbgs() << MBB->getName() << ":\n"); @@ -2623,6 +2828,8 @@ RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) { // yet, it might invalidate the iterator. const unsigned PrevSize = WorkList.size(); if (JoinGlobalCopies) { + SmallVector<MachineInstr*, 2> LocalTerminals; + SmallVector<MachineInstr*, 2> GlobalTerminals; // Coalesce copies bottom-up to coalesce local defs before local uses. They // are not inherently easier to resolve, but slightly preferable until we // have local live range splitting. In particular this is required by @@ -2631,17 +2838,35 @@ RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) { MII != E; ++MII) { if (!MII->isCopyLike()) continue; - if (isLocalCopy(&(*MII), LIS)) - LocalWorkList.push_back(&(*MII)); - else - WorkList.push_back(&(*MII)); + bool ApplyTerminalRule = applyTerminalRule(*MII); + if (isLocalCopy(&(*MII), LIS)) { + if (ApplyTerminalRule) + LocalTerminals.push_back(&(*MII)); + else + LocalWorkList.push_back(&(*MII)); + } else { + if (ApplyTerminalRule) + GlobalTerminals.push_back(&(*MII)); + else + WorkList.push_back(&(*MII)); + } } + // Append the copies evicted by the terminal rule at the end of the list. + LocalWorkList.append(LocalTerminals.begin(), LocalTerminals.end()); + WorkList.append(GlobalTerminals.begin(), GlobalTerminals.end()); } else { + SmallVector<MachineInstr*, 2> Terminals; for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) - if (MII->isCopyLike()) - WorkList.push_back(MII); + if (MII->isCopyLike()) { + if (applyTerminalRule(*MII)) + Terminals.push_back(&(*MII)); + else + WorkList.push_back(MII); + } + // Append the copies evicted by the terminal rule at the end of the list. + WorkList.append(Terminals.begin(), Terminals.end()); } // Try coalescing the collected copies immediately, and remove the nulls. // This prevents the WorkList from getting too large since most copies are @@ -2704,15 +2929,14 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { MF = &fn; MRI = &fn.getRegInfo(); TM = &fn.getTarget(); - TRI = TM->getSubtargetImpl()->getRegisterInfo(); - TII = TM->getSubtargetImpl()->getInstrInfo(); + const TargetSubtargetInfo &STI = fn.getSubtarget(); + TRI = STI.getRegisterInfo(); + TII = STI.getInstrInfo(); LIS = &getAnalysis<LiveIntervals>(); AA = &getAnalysis<AliasAnalysis>(); Loops = &getAnalysis<MachineLoopInfo>(); - - const TargetSubtargetInfo &ST = TM->getSubtarget<TargetSubtargetInfo>(); if (EnableGlobalCopies == cl::BOU_UNSET) - JoinGlobalCopies = ST.useMachineScheduler(); + JoinGlobalCopies = STI.enableJoinGlobalCopies(); else JoinGlobalCopies = (EnableGlobalCopies == cl::BOU_TRUE); @@ -2744,7 +2968,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { unsigned Reg = InflateRegs[i]; if (MRI->reg_nodbg_empty(Reg)) continue; - if (MRI->recomputeRegClass(Reg, *TM)) { + if (MRI->recomputeRegClass(Reg)) { DEBUG(dbgs() << PrintReg(Reg) << " inflated to " << TRI->getRegClassName(MRI->getRegClass(Reg)) << '\n'); LiveInterval &LI = LIS->getInterval(Reg); @@ -2754,9 +2978,9 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { // remove the subranges. LI.clearSubRanges(); } else { +#ifndef NDEBUG // If subranges are still supported, then the same subregs should still // be supported. -#ifndef NDEBUG for (LiveInterval::SubRange &S : LI.subranges()) { assert ((S.LaneMask & ~MaxMask) == 0); } @@ -2772,7 +2996,6 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { return true; } -/// Implement the dump method. void RegisterCoalescer::print(raw_ostream &O, const Module* m) const { LIS->print(O, m); } diff --git a/contrib/llvm/lib/CodeGen/RegisterPressure.cpp b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp index 9925efb..667783e 100644 --- a/contrib/llvm/lib/CodeGen/RegisterPressure.cpp +++ b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp @@ -304,6 +304,7 @@ static bool containsReg(ArrayRef<unsigned> RegUnits, unsigned RegUnit) { return std::find(RegUnits.begin(), RegUnits.end(), RegUnit) != RegUnits.end(); } +namespace { /// Collect this instruction's unique uses and defs into SmallVectors for /// processing defs and uses in order. /// @@ -354,6 +355,7 @@ protected: } } }; +} // namespace /// Collect physical and virtual register operands. static void collectOperands(const MachineInstr *MI, @@ -748,9 +750,11 @@ void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) { /// /// This assumes that the current LiveOut set is sufficient. /// -/// FIXME: This is expensive for an on-the-fly query. We need to cache the -/// result per-SUnit with enough information to adjust for the current -/// scheduling position. But this works as a proof of concept. +/// This is expensive for an on-the-fly query because it calls +/// bumpUpwardPressure to recompute the pressure sets based on current +/// liveness. This mainly exists to verify correctness, e.g. with +/// -verify-misched. getUpwardPressureDelta is the fast version of this query +/// that uses the per-SUnit cache of the PressureDiff. void RegPressureTracker:: getMaxUpwardPressureDelta(const MachineInstr *MI, PressureDiff *PDiff, RegPressureDelta &Delta, @@ -807,10 +811,8 @@ getMaxUpwardPressureDelta(const MachineInstr *MI, PressureDiff *PDiff, #endif } -/// This is a prototype of the fast version of querying register pressure that -/// does not directly depend on current liveness. It's still slow because we -/// recompute pressure change on-the-fly. This implementation only exists to -/// prove correctness. +/// This is the fast version of querying register pressure that does not +/// directly depend on current liveness. /// /// @param Delta captures information needed for heuristics. /// @@ -948,6 +950,11 @@ void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) { /// register units of that pressure set introduced by this instruction. /// /// This assumes that the current LiveIn set is sufficient. +/// +/// This is expensive for an on-the-fly query because it calls +/// bumpDownwardPressure to recompute the pressure sets based on current +/// liveness. We don't yet have a fast version of downward pressure tracking +/// analagous to getUpwardPressureDelta. void RegPressureTracker:: getMaxDownwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta, ArrayRef<PressureChange> CriticalPSets, diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp index 6f8b337..76a7fef 100644 --- a/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp +++ b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp @@ -36,8 +36,8 @@ static cl::opt<bool> StressSchedOpt( void SchedulingPriorityQueue::anchor() { } ScheduleDAG::ScheduleDAG(MachineFunction &mf) - : TM(mf.getTarget()), TII(TM.getSubtargetImpl()->getInstrInfo()), - TRI(TM.getSubtargetImpl()->getRegisterInfo()), MF(mf), + : TM(mf.getTarget()), TII(mf.getSubtarget().getInstrInfo()), + TRI(mf.getSubtarget().getRegisterInfo()), MF(mf), MRI(mf.getRegInfo()), EntrySU(), ExitSU() { #ifndef NDEBUG StressSched = StressSchedOpt; diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp index 409e704..c60c518 100644 --- a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -20,6 +20,7 @@ #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -27,7 +28,6 @@ #include "llvm/CodeGen/RegisterPressure.h" #include "llvm/CodeGen/ScheduleDFS.h" #include "llvm/IR/Operator.h" -#include "llvm/MC/MCInstrItineraries.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Format.h" @@ -51,18 +51,17 @@ static cl::opt<bool> UseTBAA("use-tbaa-in-sched-mi", cl::Hidden, ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, const MachineLoopInfo *mli, - bool IsPostRAFlag, - bool RemoveKillFlags, + bool IsPostRAFlag, bool RemoveKillFlags, LiveIntervals *lis) - : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()), LIS(lis), - IsPostRA(IsPostRAFlag), RemoveKillFlags(RemoveKillFlags), - CanHandleTerminators(false), FirstDbgValue(nullptr) { + : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()), LIS(lis), + IsPostRA(IsPostRAFlag), RemoveKillFlags(RemoveKillFlags), + CanHandleTerminators(false), FirstDbgValue(nullptr) { assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals"); DbgValues.clear(); assert(!(IsPostRA && MRI.getNumVirtRegs()) && "Virtual registers must be removed prior to PostRA scheduling"); - const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); + const TargetSubtargetInfo &ST = mf.getSubtarget(); SchedModel.init(ST.getSchedModel(), &ST, TII); } @@ -97,14 +96,15 @@ static const Value *getUnderlyingObjectFromInt(const Value *V) { /// getUnderlyingObjects - This is a wrapper around GetUnderlyingObjects /// and adds support for basic ptrtoint+arithmetic+inttoptr sequences. static void getUnderlyingObjects(const Value *V, - SmallVectorImpl<Value *> &Objects) { + SmallVectorImpl<Value *> &Objects, + const DataLayout &DL) { SmallPtrSet<const Value *, 16> Visited; SmallVector<const Value *, 4> Working(1, V); do { V = Working.pop_back_val(); SmallVector<Value *, 4> Objs; - GetUnderlyingObjects(const_cast<Value *>(V), Objs); + GetUnderlyingObjects(const_cast<Value *>(V), Objs, DL); for (SmallVectorImpl<Value *>::iterator I = Objs.begin(), IE = Objs.end(); I != IE; ++I) { @@ -133,7 +133,8 @@ UnderlyingObjectsVector; /// object, return the Value for that object. static void getUnderlyingObjectsForInstr(const MachineInstr *MI, const MachineFrameInfo *MFI, - UnderlyingObjectsVector &Objects) { + UnderlyingObjectsVector &Objects, + const DataLayout &DL) { if (!MI->hasOneMemOperand() || (!(*MI->memoperands_begin())->getValue() && !(*MI->memoperands_begin())->getPseudoValue()) || @@ -142,6 +143,13 @@ static void getUnderlyingObjectsForInstr(const MachineInstr *MI, if (const PseudoSourceValue *PSV = (*MI->memoperands_begin())->getPseudoValue()) { + // Function that contain tail calls don't have unique PseudoSourceValue + // objects. Two PseudoSourceValues might refer to the same or overlapping + // locations. The client code calling this function assumes this is not the + // case. So return a conservative answer of no known object. + if (MFI->hasTailCall()) + return; + // For now, ignore PseudoSourceValues which may alias LLVM IR values // because the code that uses this function has no way to cope with // such aliases. @@ -157,12 +165,9 @@ static void getUnderlyingObjectsForInstr(const MachineInstr *MI, return; SmallVector<Value *, 4> Objs; - getUnderlyingObjects(V, Objs); - - for (SmallVectorImpl<Value *>::iterator I = Objs.begin(), IE = Objs.end(); - I != IE; ++I) { - V = *I; + getUnderlyingObjects(V, Objs, DL); + for (Value *V : Objs) { if (!isIdentifiedObject(V)) { Objects.clear(); return; @@ -253,7 +258,7 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) { assert(MO.isDef() && "expect physreg def"); // Ask the target if address-backscheduling is desirable, and if so how much. - const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); + const TargetSubtargetInfo &ST = MF.getSubtarget(); for (MCRegAliasIterator Alias(MO.getReg(), TRI, true); Alias.isValid(); ++Alias) { @@ -444,7 +449,7 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { int DefOp = Def->findRegisterDefOperandIdx(Reg); dep.setLatency(SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx)); - const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); + const TargetSubtargetInfo &ST = MF.getSubtarget(); ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep)); SU->addPred(dep); } @@ -469,7 +474,8 @@ static inline bool isGlobalMemoryObject(AliasAnalysis *AA, MachineInstr *MI) { // This MI might have either incomplete info, or known to be unsafe // to deal with (i.e. volatile object). static inline bool isUnsafeMemoryObject(MachineInstr *MI, - const MachineFrameInfo *MFI) { + const MachineFrameInfo *MFI, + const DataLayout &DL) { if (!MI || MI->memoperands_empty()) return true; // We purposefully do no check for hasOneMemOperand() here @@ -492,11 +498,10 @@ static inline bool isUnsafeMemoryObject(MachineInstr *MI, return true; SmallVector<Value *, 4> Objs; - getUnderlyingObjects(V, Objs); - for (SmallVectorImpl<Value *>::iterator I = Objs.begin(), - IE = Objs.end(); I != IE; ++I) { + getUnderlyingObjects(V, Objs, DL); + for (Value *V : Objs) { // Does this pointer refer to a distinct and identifiable object? - if (!isIdentifiedObject(*I)) + if (!isIdentifiedObject(V)) return true; } @@ -509,7 +514,7 @@ static inline bool isUnsafeMemoryObject(MachineInstr *MI, /// these two MIs be reordered during scheduling from memory dependency /// point of view. static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI, - MachineInstr *MIa, + const DataLayout &DL, MachineInstr *MIa, MachineInstr *MIb) { const MachineFunction *MF = MIa->getParent()->getParent(); const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); @@ -528,7 +533,7 @@ static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI, if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand()) return true; - if (isUnsafeMemoryObject(MIa, MFI) || isUnsafeMemoryObject(MIb, MFI)) + if (isUnsafeMemoryObject(MIa, MFI, DL) || isUnsafeMemoryObject(MIb, MFI, DL)) return true; // If we are dealing with two "normal" loads, we do not need an edge @@ -580,10 +585,10 @@ static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI, /// This recursive function iterates over chain deps of SUb looking for /// "latest" node that needs a chain edge to SUa. -static unsigned -iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI, - SUnit *SUa, SUnit *SUb, SUnit *ExitSU, unsigned *Depth, - SmallPtrSetImpl<const SUnit*> &Visited) { +static unsigned iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI, + const DataLayout &DL, SUnit *SUa, SUnit *SUb, + SUnit *ExitSU, unsigned *Depth, + SmallPtrSetImpl<const SUnit *> &Visited) { if (!SUa || !SUb || SUb == ExitSU) return *Depth; @@ -608,7 +613,7 @@ iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI, // add that edge to the predecessors chain of SUb, // and stop descending. if (*Depth > 200 || - MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) { + MIsNeedChainEdge(AA, MFI, DL, SUa->getInstr(), SUb->getInstr())) { SUb->addPred(SDep(SUa, SDep::MayAliasMem)); return *Depth; } @@ -618,7 +623,7 @@ iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI, for (SUnit::const_succ_iterator I = SUb->Succs.begin(), E = SUb->Succs.end(); I != E; ++I) if (I->isNormalMemoryOrBarrier()) - iterateChainSucc (AA, MFI, SUa, I->getSUnit(), ExitSU, Depth, Visited); + iterateChainSucc(AA, MFI, DL, SUa, I->getSUnit(), ExitSU, Depth, Visited); return *Depth; } @@ -627,7 +632,8 @@ iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI, /// checks whether SU can be aliasing any node dominated /// by it. static void adjustChainDeps(AliasAnalysis *AA, const MachineFrameInfo *MFI, - SUnit *SU, SUnit *ExitSU, std::set<SUnit *> &CheckList, + const DataLayout &DL, SUnit *SU, SUnit *ExitSU, + std::set<SUnit *> &CheckList, unsigned LatencyToLoad) { if (!SU) return; @@ -639,7 +645,7 @@ static void adjustChainDeps(AliasAnalysis *AA, const MachineFrameInfo *MFI, I != IE; ++I) { if (SU == *I) continue; - if (MIsNeedChainEdge(AA, MFI, SU->getInstr(), (*I)->getInstr())) { + if (MIsNeedChainEdge(AA, MFI, DL, SU->getInstr(), (*I)->getInstr())) { SDep Dep(SU, SDep::MayAliasMem); Dep.setLatency(((*I)->getInstr()->mayLoad()) ? LatencyToLoad : 0); (*I)->addPred(Dep); @@ -650,22 +656,22 @@ static void adjustChainDeps(AliasAnalysis *AA, const MachineFrameInfo *MFI, for (SUnit::const_succ_iterator J = (*I)->Succs.begin(), JE = (*I)->Succs.end(); J != JE; ++J) if (J->isNormalMemoryOrBarrier()) - iterateChainSucc (AA, MFI, SU, J->getSUnit(), - ExitSU, &Depth, Visited); + iterateChainSucc(AA, MFI, DL, SU, J->getSUnit(), ExitSU, &Depth, + Visited); } } /// Check whether two objects need a chain edge, if so, add it /// otherwise remember the rejected SU. -static inline -void addChainDependency (AliasAnalysis *AA, const MachineFrameInfo *MFI, - SUnit *SUa, SUnit *SUb, - std::set<SUnit *> &RejectList, - unsigned TrueMemOrderLatency = 0, - bool isNormalMemory = false) { +static inline void addChainDependency(AliasAnalysis *AA, + const MachineFrameInfo *MFI, + const DataLayout &DL, SUnit *SUa, + SUnit *SUb, std::set<SUnit *> &RejectList, + unsigned TrueMemOrderLatency = 0, + bool isNormalMemory = false) { // If this is a false dependency, // do not add the edge, but rememeber the rejected node. - if (MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) { + if (MIsNeedChainEdge(AA, MFI, DL, SUa->getInstr(), SUb->getInstr())) { SDep Dep(SUa, isNormalMemory ? SDep::MayAliasMem : SDep::Barrier); Dep.setLatency(TrueMemOrderLatency); SUb->addPred(Dep); @@ -743,7 +749,7 @@ void ScheduleDAGInstrs::initSUnits() { void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, RegPressureTracker *RPTracker, PressureDiffs *PDiffs) { - const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); + const TargetSubtargetInfo &ST = MF.getSubtarget(); bool UseAA = EnableAASchedMI.getNumOccurrences() > 0 ? EnableAASchedMI : ST.useAA(); AliasAnalysis *AAForDep = UseAA ? AA : nullptr; @@ -884,7 +890,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, BarrierChain = SU; // This is a barrier event that acts as a pivotal node in the DAG, // so it is safe to clear list of exposed nodes. - adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, + adjustChainDeps(AA, MFI, *TM.getDataLayout(), SU, &ExitSU, RejectMemNodes, TrueMemOrderLatency); RejectMemNodes.clear(); NonAliasMemDefs.clear(); @@ -897,25 +903,27 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, unsigned ChainLatency = 0; if (AliasChain->getInstr()->mayLoad()) ChainLatency = TrueMemOrderLatency; - addChainDependency(AAForDep, MFI, SU, AliasChain, RejectMemNodes, - ChainLatency); + addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, AliasChain, + RejectMemNodes, ChainLatency); } AliasChain = SU; for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) - addChainDependency(AAForDep, MFI, SU, PendingLoads[k], RejectMemNodes, + addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, + PendingLoads[k], RejectMemNodes, TrueMemOrderLatency); for (MapVector<ValueType, std::vector<SUnit *> >::iterator I = AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) { for (unsigned i = 0, e = I->second.size(); i != e; ++i) - addChainDependency(AAForDep, MFI, SU, I->second[i], RejectMemNodes); + addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, + I->second[i], RejectMemNodes); } for (MapVector<ValueType, std::vector<SUnit *> >::iterator I = AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) { for (unsigned i = 0, e = I->second.size(); i != e; ++i) - addChainDependency(AAForDep, MFI, SU, I->second[i], RejectMemNodes, - TrueMemOrderLatency); + addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, + I->second[i], RejectMemNodes, TrueMemOrderLatency); } - adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, + adjustChainDeps(AA, MFI, *TM.getDataLayout(), SU, &ExitSU, RejectMemNodes, TrueMemOrderLatency); PendingLoads.clear(); AliasMemDefs.clear(); @@ -929,7 +937,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, BarrierChain->addPred(SDep(SU, SDep::Barrier)); UnderlyingObjectsVector Objs; - getUnderlyingObjectsForInstr(MI, MFI, Objs); + getUnderlyingObjectsForInstr(MI, MFI, Objs, *TM.getDataLayout()); if (Objs.empty()) { // Treat all other stores conservatively. @@ -953,8 +961,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); if (I != IE) { for (unsigned i = 0, e = I->second.size(); i != e; ++i) - addChainDependency(AAForDep, MFI, SU, I->second[i], RejectMemNodes, - 0, true); + addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, + I->second[i], RejectMemNodes, 0, true); // If we're not using AA, then we only need one store per object. if (!AAForDep) @@ -978,7 +986,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, ((ThisMayAlias) ? AliasMemUses.end() : NonAliasMemUses.end()); if (J != JE) { for (unsigned i = 0, e = J->second.size(); i != e; ++i) - addChainDependency(AAForDep, MFI, SU, J->second[i], RejectMemNodes, + addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, + J->second[i], RejectMemNodes, TrueMemOrderLatency, true); J->second.clear(); } @@ -987,23 +996,23 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, // Add dependencies from all the PendingLoads, i.e. loads // with no underlying object. for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) - addChainDependency(AAForDep, MFI, SU, PendingLoads[k], RejectMemNodes, + addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, + PendingLoads[k], RejectMemNodes, TrueMemOrderLatency); // Add dependence on alias chain, if needed. if (AliasChain) - addChainDependency(AAForDep, MFI, SU, AliasChain, RejectMemNodes); - // But we also should check dependent instructions for the - // SU in question. - adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, - TrueMemOrderLatency); + addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, AliasChain, + RejectMemNodes); } + adjustChainDeps(AA, MFI, *TM.getDataLayout(), SU, &ExitSU, RejectMemNodes, + TrueMemOrderLatency); } else if (MI->mayLoad()) { bool MayAlias = true; if (MI->isInvariantLoad(AA)) { // Invariant load, no chain dependencies needed! } else { UnderlyingObjectsVector Objs; - getUnderlyingObjectsForInstr(MI, MFI, Objs); + getUnderlyingObjectsForInstr(MI, MFI, Objs, *TM.getDataLayout()); if (Objs.empty()) { // A load with no underlying object. Depend on all @@ -1011,8 +1020,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, for (MapVector<ValueType, std::vector<SUnit *> >::iterator I = AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) for (unsigned i = 0, e = I->second.size(); i != e; ++i) - addChainDependency(AAForDep, MFI, SU, I->second[i], - RejectMemNodes); + addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, + I->second[i], RejectMemNodes); PendingLoads.push_back(SU); MayAlias = true; @@ -1035,18 +1044,20 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); if (I != IE) for (unsigned i = 0, e = I->second.size(); i != e; ++i) - addChainDependency(AAForDep, MFI, SU, I->second[i], - RejectMemNodes, 0, true); + addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, + I->second[i], RejectMemNodes, 0, true); if (ThisMayAlias) AliasMemUses[V].push_back(SU); else NonAliasMemUses[V].push_back(SU); } if (MayAlias) - adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, /*Latency=*/0); + adjustChainDeps(AA, MFI, *TM.getDataLayout(), SU, &ExitSU, + RejectMemNodes, /*Latency=*/0); // Add dependencies on alias and barrier chains, if needed. if (MayAlias && AliasChain) - addChainDependency(AAForDep, MFI, SU, AliasChain, RejectMemNodes); + addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, AliasChain, + RejectMemNodes); if (BarrierChain) BarrierChain->addPred(SDep(SU, SDep::Barrier)); } @@ -1080,22 +1091,65 @@ void ScheduleDAGInstrs::startBlockForKills(MachineBasicBlock *BB) { } } +/// \brief If we change a kill flag on the bundle instruction implicit register +/// operands, then we also need to propagate that to any instructions inside +/// the bundle which had the same kill state. +static void toggleBundleKillFlag(MachineInstr *MI, unsigned Reg, + bool NewKillState) { + if (MI->getOpcode() != TargetOpcode::BUNDLE) + return; + + // Walk backwards from the last instruction in the bundle to the first. + // Once we set a kill flag on an instruction, we bail out, as otherwise we + // might set it on too many operands. We will clear as many flags as we + // can though. + MachineBasicBlock::instr_iterator Begin = MI; + MachineBasicBlock::instr_iterator End = getBundleEnd(MI); + while (Begin != End) { + for (MIOperands MO(--End); MO.isValid(); ++MO) { + if (!MO->isReg() || MO->isDef() || Reg != MO->getReg()) + continue; + + // DEBUG_VALUE nodes do not contribute to code generation and should + // always be ignored. Failure to do so may result in trying to modify + // KILL flags on DEBUG_VALUE nodes, which is distressing. + if (MO->isDebug()) + continue; + + // If the register has the internal flag then it could be killing an + // internal def of the register. In this case, just skip. We only want + // to toggle the flag on operands visible outside the bundle. + if (MO->isInternalRead()) + continue; + + if (MO->isKill() == NewKillState) + continue; + MO->setIsKill(NewKillState); + if (NewKillState) + return; + } + } +} + bool ScheduleDAGInstrs::toggleKillFlag(MachineInstr *MI, MachineOperand &MO) { // Setting kill flag... if (!MO.isKill()) { MO.setIsKill(true); + toggleBundleKillFlag(MI, MO.getReg(), true); return false; } // If MO itself is live, clear the kill flag... if (LiveRegs.test(MO.getReg())) { MO.setIsKill(false); + toggleBundleKillFlag(MI, MO.getReg(), false); return false; } // If any subreg of MO is live, then create an imp-def for that // subreg and keep MO marked as killed. MO.setIsKill(false); + toggleBundleKillFlag(MI, MO.getReg(), false); bool AllDead = true; const unsigned SuperReg = MO.getReg(); MachineInstrBuilder MIB(MF, MI); @@ -1106,8 +1160,10 @@ bool ScheduleDAGInstrs::toggleKillFlag(MachineInstr *MI, MachineOperand &MO) { } } - if(AllDead) + if(AllDead) { MO.setIsKill(true); + toggleBundleKillFlag(MI, MO.getReg(), true); + } return false; } @@ -1180,6 +1236,12 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock *MBB) { // Warning: toggleKillFlag may invalidate MO. toggleKillFlag(MI, MO); DEBUG(MI->dump()); + DEBUG(if (MI->getOpcode() == TargetOpcode::BUNDLE) { + MachineBasicBlock::instr_iterator Begin = MI; + MachineBasicBlock::instr_iterator End = getBundleEnd(MI); + while (++Begin != End) + DEBUG(Begin->dump()); + }); } killedRegs.set(Reg); @@ -1214,7 +1276,7 @@ std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const { else if (SU == &ExitSU) oss << "<exit>"; else - SU->getInstr()->print(oss, &TM, /*SkipOpers=*/true); + SU->getInstr()->print(oss, /*SkipOpers=*/true); return oss.str(); } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 1df61e4..2c2dc85 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -246,10 +246,11 @@ namespace { SDValue visitSDIVREM(SDNode *N); SDValue visitUDIVREM(SDNode *N); SDValue visitAND(SDNode *N); + SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *LocReference); SDValue visitOR(SDNode *N); + SDValue visitORLike(SDValue N0, SDValue N1, SDNode *LocReference); SDValue visitXOR(SDNode *N); SDValue SimplifyVBinOp(SDNode *N); - SDValue SimplifyVUnaryOp(SDNode *N); SDValue visitSHL(SDNode *N); SDValue visitSRA(SDNode *N); SDValue visitSRL(SDNode *N); @@ -267,6 +268,7 @@ namespace { SDValue visitZERO_EXTEND(SDNode *N); SDValue visitANY_EXTEND(SDNode *N); SDValue visitSIGN_EXTEND_INREG(SDNode *N); + SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N); SDValue visitTRUNCATE(SDNode *N); SDValue visitBITCAST(SDNode *N); SDValue visitBUILD_PAIR(SDNode *N); @@ -302,9 +304,16 @@ namespace { SDValue visitCONCAT_VECTORS(SDNode *N); SDValue visitEXTRACT_SUBVECTOR(SDNode *N); SDValue visitVECTOR_SHUFFLE(SDNode *N); + SDValue visitSCALAR_TO_VECTOR(SDNode *N); SDValue visitINSERT_SUBVECTOR(SDNode *N); SDValue visitMLOAD(SDNode *N); SDValue visitMSTORE(SDNode *N); + SDValue visitMGATHER(SDNode *N); + SDValue visitMSCATTER(SDNode *N); + SDValue visitFP_TO_FP16(SDNode *N); + + SDValue visitFADDForFMACombine(SDNode *N); + SDValue visitFSUBForFMACombine(SDNode *N); SDValue XformToShuffleWithZero(SDNode *N); SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS); @@ -327,6 +336,7 @@ namespace { SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, unsigned HiOp); SDValue CombineConsecutiveLoads(SDNode *N, EVT VT); + SDValue CombineExtLoad(SDNode *N); SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT); SDValue BuildSDIV(SDNode *N); SDValue BuildSDIVPow2(SDNode *N); @@ -363,6 +373,28 @@ namespace { /// chain (aliasing node.) SDValue FindBetterChain(SDNode *N, SDValue Chain); + /// Holds a pointer to an LSBaseSDNode as well as information on where it + /// is located in a sequence of memory operations connected by a chain. + struct MemOpLink { + MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq): + MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { } + // Ptr to the mem node. + LSBaseSDNode *MemNode; + // Offset from the base ptr. + int64_t OffsetFromBase; + // What is the sequence number of this mem node. + // Lowest mem operand in the DAG starts at zero. + unsigned SequenceNum; + }; + + /// This is a helper function for MergeConsecutiveStores. When the source + /// elements of the consecutive stores are all constants or all extracted + /// vector elements, try to merge them into one larger store. + /// \return True if a merged store was created. + bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes, + EVT MemVT, unsigned NumElem, + bool IsConstantSrc, bool UseVector); + /// Merge consecutive store operations into a wide store. /// This optimization uses wide integers or vectors when possible. /// \return True if some memory operations were changed. @@ -380,12 +412,9 @@ namespace { DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL) : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) { - AttributeSet FnAttrs = - DAG.getMachineFunction().getFunction()->getAttributes(); - ForCodeSize = - FnAttrs.hasAttribute(AttributeSet::FunctionIndex, - Attribute::OptimizeForSize) || - FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize); + auto *F = DAG.getMachineFunction().getFunction(); + ForCodeSize = F->hasFnAttribute(Attribute::OptimizeForSize) || + F->hasFnAttribute(Attribute::MinSize); } /// Runs the dag combiner on all nodes in the work list @@ -446,7 +475,7 @@ void TargetLowering::DAGCombinerInfo::RemoveFromWorklist(SDNode *N) { } SDValue TargetLowering::DAGCombinerInfo:: -CombineTo(SDNode *N, const std::vector<SDValue> &To, bool AddTo) { +CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) { return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo); } @@ -566,7 +595,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, case ISD::ConstantFP: { APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF(); V.changeSign(); - return DAG.getConstantFP(V, Op.getValueType()); + return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType()); } case ISD::FADD: // FIXME: determine better conditions for this xform. @@ -683,13 +712,23 @@ static bool isConstantSplatVector(SDNode *N, APInt& SplatValue) { EltVT.getSizeInBits() >= SplatBitSize); } -// \brief Returns the SDNode if it is a constant BuildVector or constant. -static SDNode *isConstantBuildVectorOrConstantInt(SDValue N) { +// \brief Returns the SDNode if it is a constant integer BuildVector +// or constant integer. +static SDNode *isConstantIntBuildVectorOrConstantInt(SDValue N) { if (isa<ConstantSDNode>(N)) return N.getNode(); - BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N); - if (BV && BV->isConstant()) - return BV; + if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) + return N.getNode(); + return nullptr; +} + +// \brief Returns the SDNode if it is a constant float BuildVector +// or constant float. +static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) { + if (isa<ConstantFPSDNode>(N)) + return N.getNode(); + if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode())) + return N.getNode(); return nullptr; } @@ -735,10 +774,10 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL, SDValue N0, SDValue N1) { EVT VT = N0.getValueType(); if (N0.getOpcode() == Opc) { - if (SDNode *L = isConstantBuildVectorOrConstantInt(N0.getOperand(1))) { - if (SDNode *R = isConstantBuildVectorOrConstantInt(N1)) { + if (SDNode *L = isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) { + if (SDNode *R = isConstantIntBuildVectorOrConstantInt(N1)) { // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2)) - if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, L, R)) + if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R)) return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode); return SDValue(); } @@ -755,10 +794,10 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL, } if (N1.getOpcode() == Opc) { - if (SDNode *R = isConstantBuildVectorOrConstantInt(N1.getOperand(1))) { - if (SDNode *L = isConstantBuildVectorOrConstantInt(N0)) { + if (SDNode *R = isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) { + if (SDNode *L = isConstantIntBuildVectorOrConstantInt(N0)) { // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2)) - if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, R, L)) + if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L)) return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode); return SDValue(); } @@ -1309,6 +1348,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N); case ISD::ANY_EXTEND: return visitANY_EXTEND(N); case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N); + case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N); case ISD::TRUNCATE: return visitTRUNCATE(N); case ISD::BITCAST: return visitBITCAST(N); case ISD::BUILD_PAIR: return visitBUILD_PAIR(N); @@ -1344,9 +1384,13 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N); case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N); case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N); + case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N); case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N); + case ISD::MGATHER: return visitMGATHER(N); case ISD::MLOAD: return visitMLOAD(N); + case ISD::MSCATTER: return visitMSCATTER(N); case ISD::MSTORE: return visitMSTORE(N); + case ISD::FP_TO_FP16: return visitFP_TO_FP16(N); } return SDValue(); } @@ -1412,9 +1456,10 @@ SDValue DAGCombiner::combine(SDNode *N) { SDNode *CSENode; if (const BinaryWithFlagsSDNode *BinNode = dyn_cast<BinaryWithFlagsSDNode>(N)) { - CSENode = DAG.getNodeIfExists( - N->getOpcode(), N->getVTList(), Ops, BinNode->hasNoUnsignedWrap(), - BinNode->hasNoSignedWrap(), BinNode->isExact()); + CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops, + BinNode->Flags.hasNoUnsignedWrap(), + BinNode->Flags.hasNoSignedWrap(), + BinNode->Flags.hasExact()); } else { CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops); } @@ -1471,7 +1516,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { switch (Op.getOpcode()) { case ISD::EntryToken: // Entry tokens don't need to be added to the list. They are - // rededundant. + // redundant. Changed = true; break; @@ -1500,7 +1545,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { SDValue Result; - // If we've change things around then replace token factor. + // If we've changed things around then replace token factor. if (Changed) { if (Ops.empty()) { // The entry token is the only possible outcome. @@ -1510,8 +1555,11 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops); } - // Don't add users to work list. - return CombineTo(N, Result, false); + // Add users to worklist if AA is enabled, since it may introduce + // a lot of new chained token factors while removing memory deps. + bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA + : DAG.getSubtarget().useAA(); + return CombineTo(N, Result, UseAA /*add to worklist*/); } return Result; @@ -1534,17 +1582,37 @@ SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) { return SDValue(N, 0); // Return N so it doesn't get rechecked! } +static bool isNullConstant(SDValue V) { + ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V); + return Const != nullptr && Const->isNullValue(); +} + +static bool isAllOnesConstant(SDValue V) { + ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V); + return Const != nullptr && Const->isAllOnesValue(); +} + +static bool isOneConstant(SDValue V) { + ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V); + return Const != nullptr && Const->isOne(); +} + +/// If \p N is a ContantSDNode with isOpaque() == false return it casted to a +/// ContantSDNode pointer else nullptr. +static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) { + ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N); + return Const != nullptr && !Const->isOpaque() ? Const : nullptr; +} + SDValue DAGCombiner::visitADD(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); EVT VT = N0.getValueType(); // fold vector ops if (VT.isVector()) { - SDValue FoldedVOp = SimplifyVBinOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; // fold (add x, 0) -> x, vector edition if (ISD::isBuildVectorAllZeros(N1.getNode())) @@ -1559,13 +1627,16 @@ SDValue DAGCombiner::visitADD(SDNode *N) { if (N1.getOpcode() == ISD::UNDEF) return N1; // fold (add c1, c2) -> c1+c2 + ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); + ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); if (N0C && N1C) - return DAG.FoldConstantArithmetic(ISD::ADD, VT, N0C, N1C); + return DAG.FoldConstantArithmetic(ISD::ADD, SDLoc(N), VT, N0C, N1C); // canonicalize constant to RHS - if (N0C && !N1C) + if (isConstantIntBuildVectorOrConstantInt(N0) && + !isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, N0); // fold (add x, 0) -> x - if (N1C && N1C->isNullValue()) + if (isNullConstant(N1)) return N0; // fold (add Sym, c) -> Sym+c if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0)) @@ -1576,22 +1647,21 @@ SDValue DAGCombiner::visitADD(SDNode *N) { (uint64_t)N1C->getSExtValue()); // fold ((c1-A)+c2) -> (c1+c2)-A if (N1C && N0.getOpcode() == ISD::SUB) - if (ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getOperand(0))) - return DAG.getNode(ISD::SUB, SDLoc(N), VT, + if (ConstantSDNode *N0C = getAsNonOpaqueConstant(N0.getOperand(0))) { + SDLoc DL(N); + return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(N1C->getAPIntValue()+ - N0C->getAPIntValue(), VT), + N0C->getAPIntValue(), DL, VT), N0.getOperand(1)); + } // reassociate add - SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1); - if (RADD.getNode()) + if (SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1)) return RADD; // fold ((0-A) + B) -> B-A - if (N0.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N0.getOperand(0)) && - cast<ConstantSDNode>(N0.getOperand(0))->isNullValue()) + if (N0.getOpcode() == ISD::SUB && isNullConstant(N0.getOperand(0))) return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, N0.getOperand(1)); // fold (A + (0-B)) -> A-B - if (N1.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N1.getOperand(0)) && - cast<ConstantSDNode>(N1.getOperand(0))->isNullValue()) + if (N1.getOpcode() == ISD::SUB && isNullConstant(N1.getOperand(0))) return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1.getOperand(1)); // fold (A+(B-A)) -> B if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1)) @@ -1651,34 +1721,27 @@ SDValue DAGCombiner::visitADD(SDNode *N) { } // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n)) - if (N1.getOpcode() == ISD::SHL && - N1.getOperand(0).getOpcode() == ISD::SUB) - if (ConstantSDNode *C = - dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(0))) - if (C->getAPIntValue() == 0) - return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, - DAG.getNode(ISD::SHL, SDLoc(N), VT, - N1.getOperand(0).getOperand(1), - N1.getOperand(1))); - if (N0.getOpcode() == ISD::SHL && - N0.getOperand(0).getOpcode() == ISD::SUB) - if (ConstantSDNode *C = - dyn_cast<ConstantSDNode>(N0.getOperand(0).getOperand(0))) - if (C->getAPIntValue() == 0) - return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, - DAG.getNode(ISD::SHL, SDLoc(N), VT, - N0.getOperand(0).getOperand(1), - N0.getOperand(1))); + if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB && + isNullConstant(N1.getOperand(0).getOperand(0))) + return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, + DAG.getNode(ISD::SHL, SDLoc(N), VT, + N1.getOperand(0).getOperand(1), + N1.getOperand(1))); + if (N0.getOpcode() == ISD::SHL && N0.getOperand(0).getOpcode() == ISD::SUB && + isNullConstant(N0.getOperand(0).getOperand(0))) + return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, + DAG.getNode(ISD::SHL, SDLoc(N), VT, + N0.getOperand(0).getOperand(1), + N0.getOperand(1))); if (N1.getOpcode() == ISD::AND) { SDValue AndOp0 = N1.getOperand(0); - ConstantSDNode *AndOp1 = dyn_cast<ConstantSDNode>(N1->getOperand(1)); unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0); unsigned DestBits = VT.getScalarType().getSizeInBits(); // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x)) // and similar xforms where the inner op is either ~0 or 0. - if (NumSignBits == DestBits && AndOp1 && AndOp1->isOne()) { + if (NumSignBits == DestBits && isOneConstant(N1->getOperand(1))) { SDLoc DL(N); return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0); } @@ -1699,7 +1762,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) { if (TN->getVT() == MVT::i1) { SDLoc DL(N); SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0), - DAG.getConstant(1, VT)); + DAG.getConstant(1, DL, VT)); return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt); } } @@ -1710,8 +1773,6 @@ SDValue DAGCombiner::visitADD(SDNode *N) { SDValue DAGCombiner::visitADDC(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); EVT VT = N0.getValueType(); // If the flag result is dead, turn this into an ADD. @@ -1721,11 +1782,13 @@ SDValue DAGCombiner::visitADDC(SDNode *N) { SDLoc(N), MVT::Glue)); // canonicalize constant to RHS. + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); if (N0C && !N1C) return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N1, N0); // fold (addc x, 0) -> x + no carry out - if (N1C && N1C->isNullValue()) + if (isNullConstant(N1)) return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), MVT::Glue)); @@ -1752,10 +1815,10 @@ SDValue DAGCombiner::visitADDE(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue CarryIn = N->getOperand(2); - ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); // canonicalize constant to RHS + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); if (N0C && !N1C) return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(), N1, N0, CarryIn); @@ -1773,25 +1836,21 @@ static SDValue tryFoldToZero(SDLoc DL, const TargetLowering &TLI, EVT VT, SelectionDAG &DAG, bool LegalOperations, bool LegalTypes) { if (!VT.isVector()) - return DAG.getConstant(0, VT); + return DAG.getConstant(0, DL, VT); if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) - return DAG.getConstant(0, VT); + return DAG.getConstant(0, DL, VT); return SDValue(); } SDValue DAGCombiner::visitSUB(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode()); - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); - ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? nullptr : - dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode()); EVT VT = N0.getValueType(); // fold vector ops if (VT.isVector()) { - SDValue FoldedVOp = SimplifyVBinOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; // fold (sub x, 0) -> x, vector edition if (ISD::isBuildVectorAllZeros(N1.getNode())) @@ -1803,14 +1862,18 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (N0 == N1) return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes); // fold (sub c1, c2) -> c1-c2 + ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); + ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); if (N0C && N1C) - return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C); + return DAG.FoldConstantArithmetic(ISD::SUB, SDLoc(N), VT, N0C, N1C); // fold (sub x, c) -> (add x, -c) - if (N1C) - return DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, - DAG.getConstant(-N1C->getAPIntValue(), VT)); + if (N1C) { + SDLoc DL(N); + return DAG.getNode(ISD::ADD, DL, VT, N0, + DAG.getConstant(-N1C->getAPIntValue(), DL, VT)); + } // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) - if (N0C && N0C->isAllOnesValue()) + if (isAllOnesConstant(N0)) return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0); // fold A-(A-B) -> B if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0)) @@ -1822,10 +1885,13 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1) return N0.getOperand(0); // fold C2-(A+C1) -> (C2-C1)-A + ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? nullptr : + dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode()); if (N1.getOpcode() == ISD::ADD && N0C && N1C1) { + SDLoc DL(N); SDValue NewC = DAG.getConstant(N0C->getAPIntValue() - N1C1->getAPIntValue(), - VT); - return DAG.getNode(ISD::SUB, SDLoc(N), VT, NewC, + DL, VT); + return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0)); } // fold ((A+(B+or-C))-B) -> A+or-C @@ -1866,7 +1932,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1)) if (GA->getGlobal() == GB->getGlobal()) return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(), - VT); + SDLoc(N), VT); } // sub X, (sextinreg Y i1) -> add X, (and Y 1) @@ -1875,7 +1941,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (TN->getVT() == MVT::i1) { SDLoc DL(N); SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0), - DAG.getConstant(1, VT)); + DAG.getConstant(1, DL, VT)); return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt); } } @@ -1886,8 +1952,6 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { SDValue DAGCombiner::visitSUBC(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); EVT VT = N0.getValueType(); // If the flag result is dead, turn this into an SUB. @@ -1897,18 +1961,20 @@ SDValue DAGCombiner::visitSUBC(SDNode *N) { MVT::Glue)); // fold (subc x, x) -> 0 + no borrow - if (N0 == N1) - return CombineTo(N, DAG.getConstant(0, VT), - DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), + if (N0 == N1) { + SDLoc DL(N); + return CombineTo(N, DAG.getConstant(0, DL, VT), + DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); + } // fold (subc x, 0) -> x + no borrow - if (N1C && N1C->isNullValue()) + if (isNullConstant(N1)) return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), MVT::Glue)); // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow - if (N0C && N0C->isAllOnesValue()) + if (isAllOnesConstant(N0)) return CombineTo(N, DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0), DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), MVT::Glue)); @@ -1935,33 +2001,41 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { // fold (mul x, undef) -> 0 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) - return DAG.getConstant(0, VT); + return DAG.getConstant(0, SDLoc(N), VT); bool N0IsConst = false; bool N1IsConst = false; + bool N1IsOpaqueConst = false; + bool N0IsOpaqueConst = false; APInt ConstValue0, ConstValue1; // fold vector ops if (VT.isVector()) { - SDValue FoldedVOp = SimplifyVBinOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; N0IsConst = isConstantSplatVector(N0.getNode(), ConstValue0); N1IsConst = isConstantSplatVector(N1.getNode(), ConstValue1); } else { - N0IsConst = dyn_cast<ConstantSDNode>(N0) != nullptr; - ConstValue0 = N0IsConst ? (dyn_cast<ConstantSDNode>(N0))->getAPIntValue() - : APInt(); - N1IsConst = dyn_cast<ConstantSDNode>(N1) != nullptr; - ConstValue1 = N1IsConst ? (dyn_cast<ConstantSDNode>(N1))->getAPIntValue() - : APInt(); + N0IsConst = isa<ConstantSDNode>(N0); + if (N0IsConst) { + ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue(); + N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque(); + } + N1IsConst = isa<ConstantSDNode>(N1); + if (N1IsConst) { + ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue(); + N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque(); + } } // fold (mul c1, c2) -> c1*c2 - if (N0IsConst && N1IsConst) - return DAG.FoldConstantArithmetic(ISD::MUL, VT, N0.getNode(), N1.getNode()); + if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst) + return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT, + N0.getNode(), N1.getNode()); - // canonicalize constant to RHS - if (N0IsConst && !N1IsConst) + // canonicalize constant to RHS (vector doesn't have to splat) + if (isConstantIntBuildVectorOrConstantInt(N0) && + !isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0); // fold (mul x, 0) -> 0 if (N1IsConst && ConstValue1 == 0) @@ -1974,23 +2048,30 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { if (N1IsConst && ConstValue1 == 1 && IsFullSplat) return N0; // fold (mul x, -1) -> 0-x - if (N1IsConst && ConstValue1.isAllOnesValue()) - return DAG.getNode(ISD::SUB, SDLoc(N), VT, - DAG.getConstant(0, VT), N0); + if (N1IsConst && ConstValue1.isAllOnesValue()) { + SDLoc DL(N); + return DAG.getNode(ISD::SUB, DL, VT, + DAG.getConstant(0, DL, VT), N0); + } // fold (mul x, (1 << c)) -> x << c - if (N1IsConst && ConstValue1.isPowerOf2() && IsFullSplat) - return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, - DAG.getConstant(ConstValue1.logBase2(), + if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isPowerOf2() && + IsFullSplat) { + SDLoc DL(N); + return DAG.getNode(ISD::SHL, DL, VT, N0, + DAG.getConstant(ConstValue1.logBase2(), DL, getShiftAmountTy(N0.getValueType()))); + } // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c - if (N1IsConst && (-ConstValue1).isPowerOf2() && IsFullSplat) { + if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2() && + IsFullSplat) { unsigned Log2Val = (-ConstValue1).logBase2(); + SDLoc DL(N); // FIXME: If the input is something that is easily negated (e.g. a // single-use add), we should put the negate there. - return DAG.getNode(ISD::SUB, SDLoc(N), VT, - DAG.getConstant(0, VT), - DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, - DAG.getConstant(Log2Val, + return DAG.getNode(ISD::SUB, DL, VT, + DAG.getConstant(0, DL, VT), + DAG.getNode(ISD::SHL, DL, VT, N0, + DAG.getConstant(Log2Val, DL, getShiftAmountTy(N0.getValueType())))); } @@ -2041,8 +2122,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { N0.getOperand(1), N1)); // reassociate mul - SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1); - if (RMUL.getNode()) + if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1)) return RMUL; return SDValue(); @@ -2051,26 +2131,27 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { SDValue DAGCombiner::visitSDIV(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = isConstOrConstSplat(N0); - ConstantSDNode *N1C = isConstOrConstSplat(N1); EVT VT = N->getValueType(0); // fold vector ops - if (VT.isVector()) { - SDValue FoldedVOp = SimplifyVBinOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; - } + if (VT.isVector()) + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; // fold (sdiv c1, c2) -> c1/c2 - if (N0C && N1C && !N1C->isNullValue()) - return DAG.FoldConstantArithmetic(ISD::SDIV, VT, N0C, N1C); + ConstantSDNode *N0C = isConstOrConstSplat(N0); + ConstantSDNode *N1C = isConstOrConstSplat(N1); + if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque()) + return DAG.FoldConstantArithmetic(ISD::SDIV, SDLoc(N), VT, N0C, N1C); // fold (sdiv X, 1) -> X - if (N1C && N1C->getAPIntValue() == 1LL) + if (N1C && N1C->isOne()) return N0; // fold (sdiv X, -1) -> 0-X - if (N1C && N1C->isAllOnesValue()) - return DAG.getNode(ISD::SUB, SDLoc(N), VT, - DAG.getConstant(0, VT), N0); + if (N1C && N1C->isAllOnesValue()) { + SDLoc DL(N); + return DAG.getNode(ISD::SUB, DL, VT, + DAG.getConstant(0, DL, VT), N0); + } // If we know the sign bits of both operands are zero, strength reduce to a // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2 if (!VT.isVector()) { @@ -2080,8 +2161,9 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { } // fold (sdiv X, pow2) -> simple ops after legalize - if (N1C && !N1C->isNullValue() && (N1C->getAPIntValue().isPowerOf2() || - (-N1C->getAPIntValue()).isPowerOf2())) { + if (N1C && !N1C->isNullValue() && !N1C->isOpaque() && + (N1C->getAPIntValue().isPowerOf2() || + (-N1C->getAPIntValue()).isPowerOf2())) { // If dividing by powers of two is cheap, then don't perform the following // fold. if (TLI.isPow2SDivCheap()) @@ -2093,24 +2175,26 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { return Res; unsigned lg2 = N1C->getAPIntValue().countTrailingZeros(); + SDLoc DL(N); // Splat the sign bit into the register SDValue SGN = - DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, - DAG.getConstant(VT.getScalarSizeInBits() - 1, + DAG.getNode(ISD::SRA, DL, VT, N0, + DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, getShiftAmountTy(N0.getValueType()))); AddToWorklist(SGN.getNode()); // Add (N0 < 0) ? abs2 - 1 : 0; SDValue SRL = - DAG.getNode(ISD::SRL, SDLoc(N), VT, SGN, - DAG.getConstant(VT.getScalarSizeInBits() - lg2, + DAG.getNode(ISD::SRL, DL, VT, SGN, + DAG.getConstant(VT.getScalarSizeInBits() - lg2, DL, getShiftAmountTy(SGN.getValueType()))); - SDValue ADD = DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, SRL); + SDValue ADD = DAG.getNode(ISD::ADD, DL, VT, N0, SRL); AddToWorklist(SRL.getNode()); AddToWorklist(ADD.getNode()); // Divide by pow2 - SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), VT, ADD, - DAG.getConstant(lg2, getShiftAmountTy(ADD.getValueType()))); + SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, ADD, + DAG.getConstant(lg2, DL, + getShiftAmountTy(ADD.getValueType()))); // If we're dividing by a positive value, we're done. Otherwise, we must // negate the result. @@ -2118,10 +2202,10 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { return SRA; AddToWorklist(SRA.getNode()); - return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(0, VT), SRA); + return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA); } - // if integer divide is expensive and we satisfy the requirements, emit an + // If integer divide is expensive and we satisfy the requirements, emit an // alternate sequence. if (N1C && !TLI.isIntDivCheap()) { SDValue Op = BuildSDIV(N); @@ -2130,7 +2214,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { // undef / X -> 0 if (N0.getOpcode() == ISD::UNDEF) - return DAG.getConstant(0, VT); + return DAG.getConstant(0, SDLoc(N), VT); // X / undef -> undef if (N1.getOpcode() == ISD::UNDEF) return N1; @@ -2141,36 +2225,40 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { SDValue DAGCombiner::visitUDIV(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = isConstOrConstSplat(N0); - ConstantSDNode *N1C = isConstOrConstSplat(N1); EVT VT = N->getValueType(0); // fold vector ops - if (VT.isVector()) { - SDValue FoldedVOp = SimplifyVBinOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; - } + if (VT.isVector()) + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; // fold (udiv c1, c2) -> c1/c2 - if (N0C && N1C && !N1C->isNullValue()) - return DAG.FoldConstantArithmetic(ISD::UDIV, VT, N0C, N1C); + ConstantSDNode *N0C = isConstOrConstSplat(N0); + ConstantSDNode *N1C = isConstOrConstSplat(N1); + if (N0C && N1C) + if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, SDLoc(N), VT, + N0C, N1C)) + return Folded; // fold (udiv x, (1 << c)) -> x >>u c - if (N1C && N1C->getAPIntValue().isPowerOf2()) - return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, - DAG.getConstant(N1C->getAPIntValue().logBase2(), + if (N1C && !N1C->isOpaque() && N1C->getAPIntValue().isPowerOf2()) { + SDLoc DL(N); + return DAG.getNode(ISD::SRL, DL, VT, N0, + DAG.getConstant(N1C->getAPIntValue().logBase2(), DL, getShiftAmountTy(N0.getValueType()))); + } // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2 if (N1.getOpcode() == ISD::SHL) { - if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) { + if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) { if (SHC->getAPIntValue().isPowerOf2()) { EVT ADDVT = N1.getOperand(1).getValueType(); - SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N), ADDVT, + SDLoc DL(N); + SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), DAG.getConstant(SHC->getAPIntValue() .logBase2(), - ADDVT)); + DL, ADDVT)); AddToWorklist(Add.getNode()); - return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, Add); + return DAG.getNode(ISD::SRL, DL, VT, N0, Add); } } } @@ -2182,7 +2270,7 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { // undef / X -> 0 if (N0.getOpcode() == ISD::UNDEF) - return DAG.getConstant(0, VT); + return DAG.getConstant(0, SDLoc(N), VT); // X / undef -> undef if (N1.getOpcode() == ISD::UNDEF) return N1; @@ -2193,13 +2281,15 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { SDValue DAGCombiner::visitSREM(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = isConstOrConstSplat(N0); - ConstantSDNode *N1C = isConstOrConstSplat(N1); EVT VT = N->getValueType(0); // fold (srem c1, c2) -> c1%c2 - if (N0C && N1C && !N1C->isNullValue()) - return DAG.FoldConstantArithmetic(ISD::SREM, VT, N0C, N1C); + ConstantSDNode *N0C = isConstOrConstSplat(N0); + ConstantSDNode *N1C = isConstOrConstSplat(N1); + if (N0C && N1C) + if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::SREM, SDLoc(N), VT, + N0C, N1C)) + return Folded; // If we know the sign bits of both operands are zero, strength reduce to a // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15 if (!VT.isVector()) { @@ -2224,7 +2314,7 @@ SDValue DAGCombiner::visitSREM(SDNode *N) { // undef % X -> 0 if (N0.getOpcode() == ISD::UNDEF) - return DAG.getConstant(0, VT); + return DAG.getConstant(0, SDLoc(N), VT); // X % undef -> undef if (N1.getOpcode() == ISD::UNDEF) return N1; @@ -2235,27 +2325,33 @@ SDValue DAGCombiner::visitSREM(SDNode *N) { SDValue DAGCombiner::visitUREM(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = isConstOrConstSplat(N0); - ConstantSDNode *N1C = isConstOrConstSplat(N1); EVT VT = N->getValueType(0); // fold (urem c1, c2) -> c1%c2 - if (N0C && N1C && !N1C->isNullValue()) - return DAG.FoldConstantArithmetic(ISD::UREM, VT, N0C, N1C); + ConstantSDNode *N0C = isConstOrConstSplat(N0); + ConstantSDNode *N1C = isConstOrConstSplat(N1); + if (N0C && N1C) + if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UREM, SDLoc(N), VT, + N0C, N1C)) + return Folded; // fold (urem x, pow2) -> (and x, pow2-1) - if (N1C && !N1C->isNullValue() && N1C->getAPIntValue().isPowerOf2()) - return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, - DAG.getConstant(N1C->getAPIntValue()-1,VT)); + if (N1C && !N1C->isNullValue() && !N1C->isOpaque() && + N1C->getAPIntValue().isPowerOf2()) { + SDLoc DL(N); + return DAG.getNode(ISD::AND, DL, VT, N0, + DAG.getConstant(N1C->getAPIntValue() - 1, DL, VT)); + } // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1)) if (N1.getOpcode() == ISD::SHL) { - if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) { + if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) { if (SHC->getAPIntValue().isPowerOf2()) { + SDLoc DL(N); SDValue Add = - DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, - DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), + DAG.getNode(ISD::ADD, DL, VT, N1, + DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), DL, VT)); AddToWorklist(Add.getNode()); - return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, Add); + return DAG.getNode(ISD::AND, DL, VT, N0, Add); } } } @@ -2277,7 +2373,7 @@ SDValue DAGCombiner::visitUREM(SDNode *N) { // undef % X -> 0 if (N0.getOpcode() == ISD::UNDEF) - return DAG.getConstant(0, VT); + return DAG.getConstant(0, SDLoc(N), VT); // X % undef -> undef if (N1.getOpcode() == ISD::UNDEF) return N1; @@ -2288,21 +2384,23 @@ SDValue DAGCombiner::visitUREM(SDNode *N) { SDValue DAGCombiner::visitMULHS(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); EVT VT = N->getValueType(0); SDLoc DL(N); // fold (mulhs x, 0) -> 0 - if (N1C && N1C->isNullValue()) + if (isNullConstant(N1)) return N1; // fold (mulhs x, 1) -> (sra x, size(x)-1) - if (N1C && N1C->getAPIntValue() == 1) - return DAG.getNode(ISD::SRA, SDLoc(N), N0.getValueType(), N0, + if (isOneConstant(N1)) { + SDLoc DL(N); + return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0, DAG.getConstant(N0.getValueType().getSizeInBits() - 1, + DL, getShiftAmountTy(N0.getValueType()))); + } // fold (mulhs x, undef) -> 0 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) - return DAG.getConstant(0, VT); + return DAG.getConstant(0, SDLoc(N), VT); // If the type twice as wide is legal, transform the mulhs to a wider multiply // plus a shift. @@ -2315,7 +2413,8 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) { N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1); N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, - DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType()))); + DAG.getConstant(SimpleSize, DL, + getShiftAmountTy(N1.getValueType()))); return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); } } @@ -2326,19 +2425,18 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) { SDValue DAGCombiner::visitMULHU(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); EVT VT = N->getValueType(0); SDLoc DL(N); // fold (mulhu x, 0) -> 0 - if (N1C && N1C->isNullValue()) + if (isNullConstant(N1)) return N1; // fold (mulhu x, 1) -> 0 - if (N1C && N1C->getAPIntValue() == 1) - return DAG.getConstant(0, N0.getValueType()); + if (isOneConstant(N1)) + return DAG.getConstant(0, DL, N0.getValueType()); // fold (mulhu x, undef) -> 0 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) - return DAG.getConstant(0, VT); + return DAG.getConstant(0, DL, VT); // If the type twice as wide is legal, transform the mulhu to a wider multiply // plus a shift. @@ -2351,7 +2449,8 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) { N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1); N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, - DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType()))); + DAG.getConstant(SimpleSize, DL, + getShiftAmountTy(N1.getValueType()))); return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); } } @@ -2417,8 +2516,8 @@ SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) { EVT VT = N->getValueType(0); SDLoc DL(N); - // If the type twice as wide is legal, transform the mulhu to a wider multiply - // plus a shift. + // If the type is twice as wide is legal, transform the mulhu to a wider + // multiply plus a shift. if (VT.isSimple() && !VT.isVector()) { MVT Simple = VT.getSimpleVT(); unsigned SimpleSize = Simple.getSizeInBits(); @@ -2429,7 +2528,8 @@ SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) { Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); // Compute the high part as N1. Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, - DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType()))); + DAG.getConstant(SimpleSize, DL, + getShiftAmountTy(Lo.getValueType()))); Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); // Compute the low part as N0. Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); @@ -2447,8 +2547,8 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { EVT VT = N->getValueType(0); SDLoc DL(N); - // If the type twice as wide is legal, transform the mulhu to a wider multiply - // plus a shift. + // If the type is twice as wide is legal, transform the mulhu to a wider + // multiply plus a shift. if (VT.isSimple() && !VT.isVector()) { MVT Simple = VT.getSimpleVT(); unsigned SimpleSize = Simple.getSizeInBits(); @@ -2459,7 +2559,8 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); // Compute the high part as N1. Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, - DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType()))); + DAG.getConstant(SimpleSize, DL, + getShiftAmountTy(Lo.getValueType()))); Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); // Compute the low part as N0. Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); @@ -2615,7 +2716,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { // build vector of all zeros that might be illegal at this stage. if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) { if (!LegalTypes) - ShOp = DAG.getConstant(0, VT); + ShOp = DAG.getConstant(0, SDLoc(N), VT); else ShOp = SDValue(); } @@ -2636,7 +2737,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { ShOp = N0->getOperand(0); if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) { if (!LegalTypes) - ShOp = DAG.getConstant(0, VT); + ShOp = DAG.getConstant(0, SDLoc(N), VT); else ShOp = SDValue(); } @@ -2657,19 +2758,122 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { return SDValue(); } +/// This contains all DAGCombine rules which reduce two values combined by +/// an And operation to a single value. This makes them reusable in the context +/// of visitSELECT(). Rules involving constants are not included as +/// visitSELECT() already handles those cases. +SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, + SDNode *LocReference) { + EVT VT = N1.getValueType(); + + // fold (and x, undef) -> 0 + if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, SDLoc(LocReference), VT); + // fold (and (setcc x), (setcc y)) -> (setcc (and x, y)) + SDValue LL, LR, RL, RR, CC0, CC1; + if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ + ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get(); + ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get(); + + if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 && + LL.getValueType().isInteger()) { + // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0) + if (isNullConstant(LR) && Op1 == ISD::SETEQ) { + SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), + LR.getValueType(), LL, RL); + AddToWorklist(ORNode.getNode()); + return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1); + } + if (isAllOnesConstant(LR)) { + // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1) + if (Op1 == ISD::SETEQ) { + SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0), + LR.getValueType(), LL, RL); + AddToWorklist(ANDNode.getNode()); + return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1); + } + // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1) + if (Op1 == ISD::SETGT) { + SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), + LR.getValueType(), LL, RL); + AddToWorklist(ORNode.getNode()); + return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1); + } + } + } + // Simplify (and (setne X, 0), (setne X, -1)) -> (setuge (add X, 1), 2) + if (LL == RL && isa<ConstantSDNode>(LR) && isa<ConstantSDNode>(RR) && + Op0 == Op1 && LL.getValueType().isInteger() && + Op0 == ISD::SETNE && ((isNullConstant(LR) && isAllOnesConstant(RR)) || + (isAllOnesConstant(LR) && isNullConstant(RR)))) { + SDLoc DL(N0); + SDValue ADDNode = DAG.getNode(ISD::ADD, DL, LL.getValueType(), + LL, DAG.getConstant(1, DL, + LL.getValueType())); + AddToWorklist(ADDNode.getNode()); + return DAG.getSetCC(SDLoc(LocReference), VT, ADDNode, + DAG.getConstant(2, DL, LL.getValueType()), + ISD::SETUGE); + } + // canonicalize equivalent to ll == rl + if (LL == RR && LR == RL) { + Op1 = ISD::getSetCCSwappedOperands(Op1); + std::swap(RL, RR); + } + if (LL == RL && LR == RR) { + bool isInteger = LL.getValueType().isInteger(); + ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger); + if (Result != ISD::SETCC_INVALID && + (!LegalOperations || + (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && + TLI.isOperationLegal(ISD::SETCC, + getSetCCResultType(N0.getSimpleValueType()))))) + return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(), + LL, LR, Result); + } + } + + if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL && + VT.getSizeInBits() <= 64) { + if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { + APInt ADDC = ADDI->getAPIntValue(); + if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) { + // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal + // immediate for an add, but it is legal if its top c2 bits are set, + // transform the ADD so the immediate doesn't need to be materialized + // in a register. + if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) { + APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), + SRLI->getZExtValue()); + if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) { + ADDC |= Mask; + if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) { + SDLoc DL(N0); + SDValue NewAdd = + DAG.getNode(ISD::ADD, DL, VT, + N0.getOperand(0), DAG.getConstant(ADDC, DL, VT)); + CombineTo(N0.getNode(), NewAdd); + // Return N so it doesn't get rechecked! + return SDValue(LocReference, 0); + } + } + } + } + } + } + + return SDValue(); +} + SDValue DAGCombiner::visitAND(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - SDValue LL, LR, RL, RR, CC0, CC1; - ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); EVT VT = N1.getValueType(); - unsigned BitWidth = VT.getScalarType().getSizeInBits(); // fold vector ops if (VT.isVector()) { - SDValue FoldedVOp = SimplifyVBinOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; // fold (and x, 0) -> 0, vector edition if (ISD::isBuildVectorAllZeros(N0.getNode())) @@ -2677,13 +2881,13 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return DAG.getConstant( APInt::getNullValue( N0.getValueType().getScalarType().getSizeInBits()), - N0.getValueType()); + SDLoc(N), N0.getValueType()); if (ISD::isBuildVectorAllZeros(N1.getNode())) // do not return N1, because undef node may exist in N1 return DAG.getConstant( APInt::getNullValue( N1.getValueType().getScalarType().getSizeInBits()), - N1.getValueType()); + SDLoc(N), N1.getValueType()); // fold (and x, -1) -> x, vector edition if (ISD::isBuildVectorAllOnes(N0.getNode())) @@ -2692,25 +2896,25 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return N0; } - // fold (and x, undef) -> 0 - if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) - return DAG.getConstant(0, VT); // fold (and c1, c2) -> c1&c2 - if (N0C && N1C) - return DAG.FoldConstantArithmetic(ISD::AND, VT, N0C, N1C); + ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + if (N0C && N1C && !N1C->isOpaque()) + return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C); // canonicalize constant to RHS - if (N0C && !N1C) + if (isConstantIntBuildVectorOrConstantInt(N0) && + !isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0); // fold (and x, -1) -> x - if (N1C && N1C->isAllOnesValue()) + if (isAllOnesConstant(N1)) return N0; // if (and x, c) is known to be zero, return 0 + unsigned BitWidth = VT.getScalarType().getSizeInBits(); if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnesValue(BitWidth))) - return DAG.getConstant(0, VT); + return DAG.getConstant(0, SDLoc(N), VT); // reassociate and - SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1); - if (RAND.getNode()) + if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1)) return RAND; // fold (and (or x, C), D) -> D if (C & D) == D if (N1C && N0.getOpcode() == ISD::OR) @@ -2840,117 +3044,6 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return SDValue(N, 0); // Return N so it doesn't get rechecked! } } - // fold (and (setcc x), (setcc y)) -> (setcc (and x, y)) - if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ - ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get(); - ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get(); - - if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 && - LL.getValueType().isInteger()) { - // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0) - if (cast<ConstantSDNode>(LR)->isNullValue() && Op1 == ISD::SETEQ) { - SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), - LR.getValueType(), LL, RL); - AddToWorklist(ORNode.getNode()); - return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1); - } - // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1) - if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETEQ) { - SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0), - LR.getValueType(), LL, RL); - AddToWorklist(ANDNode.getNode()); - return DAG.getSetCC(SDLoc(N), VT, ANDNode, LR, Op1); - } - // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1) - if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETGT) { - SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), - LR.getValueType(), LL, RL); - AddToWorklist(ORNode.getNode()); - return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1); - } - } - // Simplify (and (setne X, 0), (setne X, -1)) -> (setuge (add X, 1), 2) - if (LL == RL && isa<ConstantSDNode>(LR) && isa<ConstantSDNode>(RR) && - Op0 == Op1 && LL.getValueType().isInteger() && - Op0 == ISD::SETNE && ((cast<ConstantSDNode>(LR)->isNullValue() && - cast<ConstantSDNode>(RR)->isAllOnesValue()) || - (cast<ConstantSDNode>(LR)->isAllOnesValue() && - cast<ConstantSDNode>(RR)->isNullValue()))) { - SDValue ADDNode = DAG.getNode(ISD::ADD, SDLoc(N0), LL.getValueType(), - LL, DAG.getConstant(1, LL.getValueType())); - AddToWorklist(ADDNode.getNode()); - return DAG.getSetCC(SDLoc(N), VT, ADDNode, - DAG.getConstant(2, LL.getValueType()), ISD::SETUGE); - } - // canonicalize equivalent to ll == rl - if (LL == RR && LR == RL) { - Op1 = ISD::getSetCCSwappedOperands(Op1); - std::swap(RL, RR); - } - if (LL == RL && LR == RR) { - bool isInteger = LL.getValueType().isInteger(); - ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger); - if (Result != ISD::SETCC_INVALID && - (!LegalOperations || - (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && - TLI.isOperationLegal(ISD::SETCC, - getSetCCResultType(N0.getSimpleValueType()))))) - return DAG.getSetCC(SDLoc(N), N0.getValueType(), - LL, LR, Result); - } - } - - // Simplify: (and (op x...), (op y...)) -> (op (and x, y)) - if (N0.getOpcode() == N1.getOpcode()) { - SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); - if (Tmp.getNode()) return Tmp; - } - - // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1) - // fold (and (sra)) -> (and (srl)) when possible. - if (!VT.isVector() && - SimplifyDemandedBits(SDValue(N, 0))) - return SDValue(N, 0); - - // fold (zext_inreg (extload x)) -> (zextload x) - if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) { - LoadSDNode *LN0 = cast<LoadSDNode>(N0); - EVT MemVT = LN0->getMemoryVT(); - // If we zero all the possible extended bits, then we can turn this into - // a zextload if we are running before legalize or the operation is legal. - unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits(); - if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, - BitWidth - MemVT.getScalarType().getSizeInBits())) && - ((!LegalOperations && !LN0->isVolatile()) || - TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { - SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, - LN0->getChain(), LN0->getBasePtr(), - MemVT, LN0->getMemOperand()); - AddToWorklist(N); - CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); - return SDValue(N, 0); // Return N so it doesn't get rechecked! - } - } - // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use - if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && - N0.hasOneUse()) { - LoadSDNode *LN0 = cast<LoadSDNode>(N0); - EVT MemVT = LN0->getMemoryVT(); - // If we zero all the possible extended bits, then we can turn this into - // a zextload if we are running before legalize or the operation is legal. - unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits(); - if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, - BitWidth - MemVT.getScalarType().getSizeInBits())) && - ((!LegalOperations && !LN0->isVolatile()) || - TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { - SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, - LN0->getChain(), LN0->getBasePtr(), - MemVT, LN0->getMemOperand()); - AddToWorklist(N); - CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); - return SDValue(N, 0); // Return N so it doesn't get rechecked! - } - } // fold (and (load x), 255) -> (zextload x, i8) // fold (and (extload x, i16), 255) -> (zextload x, i8) @@ -3001,8 +3094,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) { unsigned LVTStoreBytes = LoadedVT.getStoreSize(); unsigned EVTStoreBytes = ExtVT.getStoreSize(); unsigned PtrOff = LVTStoreBytes - EVTStoreBytes; - NewPtr = DAG.getNode(ISD::ADD, SDLoc(LN0), PtrType, - NewPtr, DAG.getConstant(PtrOff, PtrType)); + SDLoc DL(LN0); + NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, + NewPtr, DAG.getConstant(PtrOff, DL, PtrType)); Alignment = MinAlign(Alignment, PtrOff); } @@ -3022,33 +3116,60 @@ SDValue DAGCombiner::visitAND(SDNode *N) { } } - if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL && - VT.getSizeInBits() <= 64) { - if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { - APInt ADDC = ADDI->getAPIntValue(); - if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) { - // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal - // immediate for an add, but it is legal if its top c2 bits are set, - // transform the ADD so the immediate doesn't need to be materialized - // in a register. - if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) { - APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), - SRLI->getZExtValue()); - if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) { - ADDC |= Mask; - if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) { - SDValue NewAdd = - DAG.getNode(ISD::ADD, SDLoc(N0), VT, - N0.getOperand(0), DAG.getConstant(ADDC, VT)); - CombineTo(N0.getNode(), NewAdd); - return SDValue(N, 0); // Return N so it doesn't get rechecked! - } - } - } - } - } + if (SDValue Combined = visitANDLike(N0, N1, N)) + return Combined; + + // Simplify: (and (op x...), (op y...)) -> (op (and x, y)) + if (N0.getOpcode() == N1.getOpcode()) { + SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); + if (Tmp.getNode()) return Tmp; } + // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1) + // fold (and (sra)) -> (and (srl)) when possible. + if (!VT.isVector() && + SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + + // fold (zext_inreg (extload x)) -> (zextload x) + if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + EVT MemVT = LN0->getMemoryVT(); + // If we zero all the possible extended bits, then we can turn this into + // a zextload if we are running before legalize or the operation is legal. + unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits(); + if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, + BitWidth - MemVT.getScalarType().getSizeInBits())) && + ((!LegalOperations && !LN0->isVolatile()) || + TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, + LN0->getChain(), LN0->getBasePtr(), + MemVT, LN0->getMemOperand()); + AddToWorklist(N); + CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use + if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && + N0.hasOneUse()) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + EVT MemVT = LN0->getMemoryVT(); + // If we zero all the possible extended bits, then we can turn this into + // a zextload if we are running before legalize or the operation is legal. + unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits(); + if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, + BitWidth - MemVT.getScalarType().getSizeInBits())) && + ((!LegalOperations && !LN0->isVolatile()) || + TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, + LN0->getChain(), LN0->getBasePtr(), + MemVT, LN0->getMemOperand()); + AddToWorklist(N); + CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const) if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) { SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), @@ -3159,9 +3280,12 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, } SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00); - if (OpSizeInBits > 16) - Res = DAG.getNode(ISD::SRL, SDLoc(N), VT, Res, - DAG.getConstant(OpSizeInBits-16, getShiftAmountTy(VT))); + if (OpSizeInBits > 16) { + SDLoc DL(N); + Res = DAG.getNode(ISD::SRL, DL, VT, Res, + DAG.getConstant(OpSizeInBits - 16, DL, + getShiftAmountTy(VT))); + } return Res; } @@ -3299,33 +3423,125 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3]) return SDValue(); - SDValue BSwap = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, - SDValue(Parts[0],0)); + SDLoc DL(N); + SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, + SDValue(Parts[0], 0)); // Result of the bswap should be rotated by 16. If it's not legal, then // do (x << 16) | (x >> 16). - SDValue ShAmt = DAG.getConstant(16, getShiftAmountTy(VT)); + SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT)); if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT)) - return DAG.getNode(ISD::ROTL, SDLoc(N), VT, BSwap, ShAmt); + return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt); if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT)) - return DAG.getNode(ISD::ROTR, SDLoc(N), VT, BSwap, ShAmt); - return DAG.getNode(ISD::OR, SDLoc(N), VT, - DAG.getNode(ISD::SHL, SDLoc(N), VT, BSwap, ShAmt), - DAG.getNode(ISD::SRL, SDLoc(N), VT, BSwap, ShAmt)); + return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt); + return DAG.getNode(ISD::OR, DL, VT, + DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt), + DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt)); +} + +/// This contains all DAGCombine rules which reduce two values combined by +/// an Or operation to a single value \see visitANDLike(). +SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) { + EVT VT = N1.getValueType(); + // fold (or x, undef) -> -1 + if (!LegalOperations && + (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)) { + EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; + return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), + SDLoc(LocReference), VT); + } + // fold (or (setcc x), (setcc y)) -> (setcc (or x, y)) + SDValue LL, LR, RL, RR, CC0, CC1; + if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ + ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get(); + ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get(); + + if (LR == RR && Op0 == Op1 && LL.getValueType().isInteger()) { + // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0) + // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0) + if (isNullConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) { + SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR), + LR.getValueType(), LL, RL); + AddToWorklist(ORNode.getNode()); + return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1); + } + // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1) + // fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1) + if (isAllOnesConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) { + SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR), + LR.getValueType(), LL, RL); + AddToWorklist(ANDNode.getNode()); + return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1); + } + } + // canonicalize equivalent to ll == rl + if (LL == RR && LR == RL) { + Op1 = ISD::getSetCCSwappedOperands(Op1); + std::swap(RL, RR); + } + if (LL == RL && LR == RR) { + bool isInteger = LL.getValueType().isInteger(); + ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger); + if (Result != ISD::SETCC_INVALID && + (!LegalOperations || + (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && + TLI.isOperationLegal(ISD::SETCC, + getSetCCResultType(N0.getValueType()))))) + return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(), + LL, LR, Result); + } + } + + // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible. + if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND && + // Don't increase # computations. + (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) { + // We can only do this xform if we know that bits from X that are set in C2 + // but not in C1 are already zero. Likewise for Y. + if (const ConstantSDNode *N0O1C = + getAsNonOpaqueConstant(N0.getOperand(1))) { + if (const ConstantSDNode *N1O1C = + getAsNonOpaqueConstant(N1.getOperand(1))) { + // We can only do this xform if we know that bits from X that are set in + // C2 but not in C1 are already zero. Likewise for Y. + const APInt &LHSMask = N0O1C->getAPIntValue(); + const APInt &RHSMask = N1O1C->getAPIntValue(); + + if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) && + DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) { + SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT, + N0.getOperand(0), N1.getOperand(0)); + SDLoc DL(LocReference); + return DAG.getNode(ISD::AND, DL, VT, X, + DAG.getConstant(LHSMask | RHSMask, DL, VT)); + } + } + } + } + + // (or (and X, M), (and X, N)) -> (and X, (or M, N)) + if (N0.getOpcode() == ISD::AND && + N1.getOpcode() == ISD::AND && + N0.getOperand(0) == N1.getOperand(0) && + // Don't increase # computations. + (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) { + SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT, + N0.getOperand(1), N1.getOperand(1)); + return DAG.getNode(ISD::AND, SDLoc(LocReference), VT, N0.getOperand(0), X); + } + + return SDValue(); } SDValue DAGCombiner::visitOR(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - SDValue LL, LR, RL, RR, CC0, CC1; - ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); EVT VT = N1.getValueType(); // fold vector ops if (VT.isVector()) { - SDValue FoldedVOp = SimplifyVBinOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; // fold (or x, 0) -> x, vector edition if (ISD::isBuildVectorAllZeros(N0.getNode())) @@ -3339,13 +3555,13 @@ SDValue DAGCombiner::visitOR(SDNode *N) { return DAG.getConstant( APInt::getAllOnesValue( N0.getValueType().getScalarType().getSizeInBits()), - N0.getValueType()); + SDLoc(N), N0.getValueType()); if (ISD::isBuildVectorAllOnes(N1.getNode())) // do not return N1, because undef node may exist in N1 return DAG.getConstant( APInt::getAllOnesValue( N1.getValueType().getScalarType().getSizeInBits()), - N1.getValueType()); + SDLoc(N), N1.getValueType()); // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask1) // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf B, A, Mask2) @@ -3404,28 +3620,28 @@ SDValue DAGCombiner::visitOR(SDNode *N) { } } - // fold (or x, undef) -> -1 - if (!LegalOperations && - (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)) { - EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; - return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT); - } // fold (or c1, c2) -> c1|c2 - if (N0C && N1C) - return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C); + ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + if (N0C && N1C && !N1C->isOpaque()) + return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C); // canonicalize constant to RHS - if (N0C && !N1C) + if (isConstantIntBuildVectorOrConstantInt(N0) && + !isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0); // fold (or x, 0) -> x - if (N1C && N1C->isNullValue()) + if (isNullConstant(N1)) return N0; // fold (or x, -1) -> -1 - if (N1C && N1C->isAllOnesValue()) + if (isAllOnesConstant(N1)) return N1; // fold (or x, c) -> c iff (x & ~c) == 0 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue())) return N1; + if (SDValue Combined = visitORLike(N0, N1, N)) + return Combined; + // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16) SDValue BSwap = MatchBSwapHWord(N, N0, N1); if (BSwap.getNode()) @@ -3435,8 +3651,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) { return BSwap; // reassociate or - SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1); - if (ROR.getNode()) + if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1)) return ROR; // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2) // iff (c1 & c2) == 0. @@ -3444,86 +3659,20 @@ SDValue DAGCombiner::visitOR(SDNode *N) { isa<ConstantSDNode>(N0.getOperand(1))) { ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1)); if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) { - if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1)) + if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT, + N1C, C1)) return DAG.getNode( ISD::AND, SDLoc(N), VT, DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1), COR); return SDValue(); } } - // fold (or (setcc x), (setcc y)) -> (setcc (or x, y)) - if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ - ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get(); - ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get(); - - if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 && - LL.getValueType().isInteger()) { - // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0) - // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0) - if (cast<ConstantSDNode>(LR)->isNullValue() && - (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) { - SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR), - LR.getValueType(), LL, RL); - AddToWorklist(ORNode.getNode()); - return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1); - } - // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1) - // fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1) - if (cast<ConstantSDNode>(LR)->isAllOnesValue() && - (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) { - SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR), - LR.getValueType(), LL, RL); - AddToWorklist(ANDNode.getNode()); - return DAG.getSetCC(SDLoc(N), VT, ANDNode, LR, Op1); - } - } - // canonicalize equivalent to ll == rl - if (LL == RR && LR == RL) { - Op1 = ISD::getSetCCSwappedOperands(Op1); - std::swap(RL, RR); - } - if (LL == RL && LR == RR) { - bool isInteger = LL.getValueType().isInteger(); - ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger); - if (Result != ISD::SETCC_INVALID && - (!LegalOperations || - (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && - TLI.isOperationLegal(ISD::SETCC, - getSetCCResultType(N0.getValueType()))))) - return DAG.getSetCC(SDLoc(N), N0.getValueType(), - LL, LR, Result); - } - } - // Simplify: (or (op x...), (op y...)) -> (op (or x, y)) if (N0.getOpcode() == N1.getOpcode()) { SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); if (Tmp.getNode()) return Tmp; } - // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible. - if (N0.getOpcode() == ISD::AND && - N1.getOpcode() == ISD::AND && - N0.getOperand(1).getOpcode() == ISD::Constant && - N1.getOperand(1).getOpcode() == ISD::Constant && - // Don't increase # computations. - (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) { - // We can only do this xform if we know that bits from X that are set in C2 - // but not in C1 are already zero. Likewise for Y. - const APInt &LHSMask = - cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); - const APInt &RHSMask = - cast<ConstantSDNode>(N1.getOperand(1))->getAPIntValue(); - - if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) && - DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) { - SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT, - N0.getOperand(0), N1.getOperand(0)); - return DAG.getNode(ISD::AND, SDLoc(N), VT, X, - DAG.getConstant(LHSMask | RHSMask, VT)); - } - } - // See if this is some rotate idiom. if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N))) return SDValue(Rot, 0); @@ -3751,7 +3900,7 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { Mask &= cast<ConstantSDNode>(RHSMask)->getAPIntValue() | LHSBits; } - Rot = DAG.getNode(ISD::AND, DL, VT, Rot, DAG.getConstant(Mask, VT)); + Rot = DAG.getNode(ISD::AND, DL, VT, Rot, DAG.getConstant(Mask, DL, VT)); } return Rot.getNode(); @@ -3793,15 +3942,12 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { SDValue DAGCombiner::visitXOR(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - SDValue LHS, RHS, CC; - ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); EVT VT = N0.getValueType(); // fold vector ops if (VT.isVector()) { - SDValue FoldedVOp = SimplifyVBinOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; // fold (xor x, 0) -> x, vector edition if (ISD::isBuildVectorAllZeros(N0.getNode())) @@ -3812,27 +3958,30 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { // fold (xor undef, undef) -> 0. This is a common idiom (misuse). if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF) - return DAG.getConstant(0, VT); + return DAG.getConstant(0, SDLoc(N), VT); // fold (xor x, undef) -> undef if (N0.getOpcode() == ISD::UNDEF) return N0; if (N1.getOpcode() == ISD::UNDEF) return N1; // fold (xor c1, c2) -> c1^c2 + ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); + ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); if (N0C && N1C) - return DAG.FoldConstantArithmetic(ISD::XOR, VT, N0C, N1C); + return DAG.FoldConstantArithmetic(ISD::XOR, SDLoc(N), VT, N0C, N1C); // canonicalize constant to RHS - if (N0C && !N1C) + if (isConstantIntBuildVectorOrConstantInt(N0) && + !isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0); // fold (xor x, 0) -> x - if (N1C && N1C->isNullValue()) + if (isNullConstant(N1)) return N0; // reassociate xor - SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1); - if (RXOR.getNode()) + if (SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1)) return RXOR; // fold !(x cc y) -> (x !cc y) + SDValue LHS, RHS, CC; if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) { bool isInt = LHS.getValueType().isInteger(); ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(), @@ -3853,18 +4002,19 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { } // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y))) - if (N1C && N1C->getAPIntValue() == 1 && N0.getOpcode() == ISD::ZERO_EXTEND && + if (isOneConstant(N1) && N0.getOpcode() == ISD::ZERO_EXTEND && N0.getNode()->hasOneUse() && isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){ SDValue V = N0.getOperand(0); - V = DAG.getNode(ISD::XOR, SDLoc(N0), V.getValueType(), V, - DAG.getConstant(1, V.getValueType())); + SDLoc DL(N0); + V = DAG.getNode(ISD::XOR, DL, V.getValueType(), V, + DAG.getConstant(1, DL, V.getValueType())); AddToWorklist(V.getNode()); return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V); } // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc - if (N1C && N1C->getAPIntValue() == 1 && VT == MVT::i1 && + if (isOneConstant(N1) && VT == MVT::i1 && (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) { SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) { @@ -3876,7 +4026,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { } } // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants - if (N1C && N1C->isAllOnesValue() && + if (isAllOnesConstant(N1) && (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) { SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) { @@ -3897,21 +4047,48 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { } // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2)) if (N1C && N0.getOpcode() == ISD::XOR) { - ConstantSDNode *N00C = dyn_cast<ConstantSDNode>(N0.getOperand(0)); - ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); - if (N00C) - return DAG.getNode(ISD::XOR, SDLoc(N), VT, N0.getOperand(1), + if (const ConstantSDNode *N00C = getAsNonOpaqueConstant(N0.getOperand(0))) { + SDLoc DL(N); + return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), DAG.getConstant(N1C->getAPIntValue() ^ - N00C->getAPIntValue(), VT)); - if (N01C) - return DAG.getNode(ISD::XOR, SDLoc(N), VT, N0.getOperand(0), + N00C->getAPIntValue(), DL, VT)); + } + if (const ConstantSDNode *N01C = getAsNonOpaqueConstant(N0.getOperand(1))) { + SDLoc DL(N); + return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0), DAG.getConstant(N1C->getAPIntValue() ^ - N01C->getAPIntValue(), VT)); + N01C->getAPIntValue(), DL, VT)); + } } // fold (xor x, x) -> 0 if (N0 == N1) return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes); + // fold (xor (shl 1, x), -1) -> (rotl ~1, x) + // Here is a concrete example of this equivalence: + // i16 x == 14 + // i16 shl == 1 << 14 == 16384 == 0b0100000000000000 + // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111 + // + // => + // + // i16 ~1 == 0b1111111111111110 + // i16 rol(~1, 14) == 0b1011111111111111 + // + // Some additional tips to help conceptualize this transform: + // - Try to see the operation as placing a single zero in a value of all ones. + // - There exists no value for x which would allow the result to contain zero. + // - Values of x larger than the bitwidth are undefined and do not require a + // consistent result. + // - Pushing the zero left requires shifting one bits in from the right. + // A rotate left of ~1 is a nice way of achieving the desired result. + if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0.getOpcode() == ISD::SHL + && isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) { + SDLoc DL(N); + return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT), + N0.getOperand(1)); + } + // Simplify: xor (op x...), (op y...) -> (op (xor x, y)) if (N0.getOpcode() == N1.getOpcode()) { SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); @@ -3929,10 +4106,6 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { /// Handle transforms common to the three shifts, when the shift amount is a /// constant. SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) { - // We can't and shouldn't fold opaque constants. - if (Amt->isOpaque()) - return SDValue(); - SDNode *LHS = N->getOperand(0).getNode(); if (!LHS->hasOneUse()) return SDValue(); @@ -3959,8 +4132,8 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) { } // We require the RHS of the binop to be a constant and not opaque as well. - ConstantSDNode *BinOpCst = dyn_cast<ConstantSDNode>(LHS->getOperand(1)); - if (!BinOpCst || BinOpCst->isOpaque()) return SDValue(); + ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1)); + if (!BinOpCst) return SDValue(); // FIXME: disable this unless the input to the binop is a shift by a constant. // If it is not a shift, it pessimizes some common cases like: @@ -4013,14 +4186,17 @@ SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) { SDValue N01 = N->getOperand(0).getOperand(1); if (ConstantSDNode *N01C = isConstOrConstSplat(N01)) { - EVT TruncVT = N->getValueType(0); - SDValue N00 = N->getOperand(0).getOperand(0); - APInt TruncC = N01C->getAPIntValue(); - TruncC = TruncC.trunc(TruncVT.getScalarSizeInBits()); + if (!N01C->isOpaque()) { + EVT TruncVT = N->getValueType(0); + SDValue N00 = N->getOperand(0).getOperand(0); + APInt TruncC = N01C->getAPIntValue(); + TruncC = TruncC.trunc(TruncVT.getScalarSizeInBits()); + SDLoc DL(N); - return DAG.getNode(ISD::AND, SDLoc(N), TruncVT, - DAG.getNode(ISD::TRUNCATE, SDLoc(N), TruncVT, N00), - DAG.getConstant(TruncC, TruncVT)); + return DAG.getNode(ISD::AND, DL, TruncVT, + DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00), + DAG.getConstant(TruncC, DL, TruncVT)); + } } } @@ -4042,15 +4218,14 @@ SDValue DAGCombiner::visitRotate(SDNode *N) { SDValue DAGCombiner::visitSHL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); EVT VT = N0.getValueType(); unsigned OpSizeInBits = VT.getScalarSizeInBits(); // fold vector ops + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); if (VT.isVector()) { - SDValue FoldedVOp = SimplifyVBinOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1); // If setcc produces all-one true value then: @@ -4064,7 +4239,8 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC && TLI.getBooleanContents(N00.getOperand(0).getValueType()) == TargetLowering::ZeroOrNegativeOneBooleanContent) { - if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, VT, N01CV, N1CV)) + if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, + N01CV, N1CV)) return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C); } } else { @@ -4074,10 +4250,11 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { } // fold (shl c1, c2) -> c1<<c2 - if (N0C && N1C) - return DAG.FoldConstantArithmetic(ISD::SHL, VT, N0C, N1C); + ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); + if (N0C && N1C && !N1C->isOpaque()) + return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C); // fold (shl 0, x) -> 0 - if (N0C && N0C->isNullValue()) + if (isNullConstant(N0)) return N0; // fold (shl x, c >= size(x)) -> undef if (N1C && N1C->getZExtValue() >= OpSizeInBits) @@ -4087,11 +4264,11 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { return N0; // fold (shl undef, x) -> 0 if (N0.getOpcode() == ISD::UNDEF) - return DAG.getConstant(0, VT); + return DAG.getConstant(0, SDLoc(N), VT); // if (shl x, c) is known to be zero, return 0 if (DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnesValue(OpSizeInBits))) - return DAG.getConstant(0, VT); + return DAG.getConstant(0, SDLoc(N), VT); // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))). if (N1.getOpcode() == ISD::TRUNCATE && N1.getOperand(0).getOpcode() == ISD::AND) { @@ -4108,10 +4285,11 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { uint64_t c1 = N0C1->getZExtValue(); uint64_t c2 = N1C->getZExtValue(); + SDLoc DL(N); if (c1 + c2 >= OpSizeInBits) - return DAG.getConstant(0, VT); - return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0), - DAG.getConstant(c1 + c2, N1.getValueType())); + return DAG.getConstant(0, DL, VT); + return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), + DAG.getConstant(c1 + c2, DL, N1.getValueType())); } } @@ -4131,12 +4309,13 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { EVT InnerShiftVT = N0Op0.getValueType(); uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits(); if (c2 >= OpSizeInBits - InnerShiftSize) { + SDLoc DL(N0); if (c1 + c2 >= OpSizeInBits) - return DAG.getConstant(0, VT); - return DAG.getNode(ISD::SHL, SDLoc(N0), VT, - DAG.getNode(N0.getOpcode(), SDLoc(N0), VT, + return DAG.getConstant(0, DL, VT); + return DAG.getNode(ISD::SHL, DL, VT, + DAG.getNode(N0.getOpcode(), DL, VT, N0Op0->getOperand(0)), - DAG.getConstant(c1 + c2, N1.getValueType())); + DAG.getConstant(c1 + c2, DL, N1.getValueType())); } } } @@ -4154,8 +4333,10 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { if (c1 == c2) { SDValue NewOp0 = N0.getOperand(0); EVT CountVT = NewOp0.getOperand(1).getValueType(); - SDValue NewSHL = DAG.getNode(ISD::SHL, SDLoc(N), NewOp0.getValueType(), - NewOp0, DAG.getConstant(c2, CountVT)); + SDLoc DL(N); + SDValue NewSHL = DAG.getNode(ISD::SHL, DL, NewOp0.getValueType(), + NewOp0, + DAG.getConstant(c2, DL, CountVT)); AddToWorklist(NewSHL.getNode()); return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL); } @@ -4176,25 +4357,30 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { SDValue Shift; if (c2 > c1) { Mask = Mask.shl(c2 - c1); - Shift = DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0), - DAG.getConstant(c2 - c1, N1.getValueType())); + SDLoc DL(N); + Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), + DAG.getConstant(c2 - c1, DL, N1.getValueType())); } else { Mask = Mask.lshr(c1 - c2); - Shift = DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), - DAG.getConstant(c1 - c2, N1.getValueType())); + SDLoc DL(N); + Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), + DAG.getConstant(c1 - c2, DL, N1.getValueType())); } - return DAG.getNode(ISD::AND, SDLoc(N0), VT, Shift, - DAG.getConstant(Mask, VT)); + SDLoc DL(N0); + return DAG.getNode(ISD::AND, DL, VT, Shift, + DAG.getConstant(Mask, DL, VT)); } } } // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1)) if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) { unsigned BitSize = VT.getScalarSizeInBits(); + SDLoc DL(N); SDValue HiBitsMask = DAG.getConstant(APInt::getHighBitsSet(BitSize, - BitSize - N1C->getZExtValue()), VT); - return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0), + BitSize - N1C->getZExtValue()), + DL, VT); + return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask); } @@ -4210,7 +4396,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1); } - if (N1C) { + if (N1C && !N1C->isOpaque()) { SDValue NewSHL = visitShiftByConstant(N, N1C); if (NewSHL.getNode()) return NewSHL; @@ -4222,27 +4408,27 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { SDValue DAGCombiner::visitSRA(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); EVT VT = N0.getValueType(); unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); // fold vector ops + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); if (VT.isVector()) { - SDValue FoldedVOp = SimplifyVBinOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; N1C = isConstOrConstSplat(N1); } // fold (sra c1, c2) -> (sra c1, c2) - if (N0C && N1C) - return DAG.FoldConstantArithmetic(ISD::SRA, VT, N0C, N1C); + ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); + if (N0C && N1C && !N1C->isOpaque()) + return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C); // fold (sra 0, x) -> 0 - if (N0C && N0C->isNullValue()) + if (isNullConstant(N0)) return N0; // fold (sra -1, x) -> -1 - if (N0C && N0C->isAllOnesValue()) + if (isAllOnesConstant(N0)) return N0; // fold (sra x, (setge c, size(x))) -> undef if (N1C && N1C->getZExtValue() >= OpSizeInBits) @@ -4270,8 +4456,9 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { unsigned Sum = N1C->getZExtValue() + C1->getZExtValue(); if (Sum >= OpSizeInBits) Sum = OpSizeInBits - 1; - return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0), - DAG.getConstant(Sum, N1.getValueType())); + SDLoc DL(N); + return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), + DAG.getConstant(Sum, DL, N1.getValueType())); } } @@ -4303,14 +4490,15 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) && TLI.isTruncateFree(VT, TruncVT)) { - SDValue Amt = DAG.getConstant(ShiftAmt, - getShiftAmountTy(N0.getOperand(0).getValueType())); - SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0), VT, - N0.getOperand(0), Amt); - SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), TruncVT, - Shift); - return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), - N->getValueType(0), Trunc); + SDLoc DL(N); + SDValue Amt = DAG.getConstant(ShiftAmt, DL, + getShiftAmountTy(N0.getOperand(0).getValueType())); + SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, + N0.getOperand(0), Amt); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, + Shift); + return DAG.getNode(ISD::SIGN_EXTEND, DL, + N->getValueType(0), Trunc); } } } @@ -4337,12 +4525,13 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { EVT LargeVT = N0Op0.getValueType(); if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) { + SDLoc DL(N); SDValue Amt = - DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), + DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), DL, getShiftAmountTy(N0Op0.getOperand(0).getValueType())); - SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), LargeVT, + SDValue SRA = DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt); - return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, SRA); + return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA); } } } @@ -4356,7 +4545,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { if (DAG.SignBitIsZero(N0)) return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1); - if (N1C) { + if (N1C && !N1C->isOpaque()) { SDValue NewSRA = visitShiftByConstant(N, N1C); if (NewSRA.getNode()) return NewSRA; @@ -4368,24 +4557,24 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { SDValue DAGCombiner::visitSRL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); EVT VT = N0.getValueType(); unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); // fold vector ops + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); if (VT.isVector()) { - SDValue FoldedVOp = SimplifyVBinOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; N1C = isConstOrConstSplat(N1); } // fold (srl c1, c2) -> c1 >>u c2 - if (N0C && N1C) - return DAG.FoldConstantArithmetic(ISD::SRL, VT, N0C, N1C); + ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); + if (N0C && N1C && !N1C->isOpaque()) + return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C); // fold (srl 0, x) -> 0 - if (N0C && N0C->isNullValue()) + if (isNullConstant(N0)) return N0; // fold (srl x, c >= size(x)) -> undef if (N1C && N1C->getZExtValue() >= OpSizeInBits) @@ -4396,17 +4585,18 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // if (srl x, c) is known to be zero, return 0 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnesValue(OpSizeInBits))) - return DAG.getConstant(0, VT); + return DAG.getConstant(0, SDLoc(N), VT); // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2)) if (N1C && N0.getOpcode() == ISD::SRL) { if (ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1))) { uint64_t c1 = N01C->getZExtValue(); uint64_t c2 = N1C->getZExtValue(); + SDLoc DL(N); if (c1 + c2 >= OpSizeInBits) - return DAG.getConstant(0, VT); - return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), - DAG.getConstant(c1 + c2, N1.getValueType())); + return DAG.getConstant(0, DL, VT); + return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), + DAG.getConstant(c1 + c2, DL, N1.getValueType())); } } @@ -4422,12 +4612,14 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits(); // This is only valid if the OpSizeInBits + c1 = size of inner shift. if (c1 + OpSizeInBits == InnerShiftSize) { + SDLoc DL(N0); if (c1 + c2 >= InnerShiftSize) - return DAG.getConstant(0, VT); - return DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, - DAG.getNode(ISD::SRL, SDLoc(N0), InnerShiftVT, + return DAG.getConstant(0, DL, VT); + return DAG.getNode(ISD::TRUNCATE, DL, VT, + DAG.getNode(ISD::SRL, DL, InnerShiftVT, N0.getOperand(0)->getOperand(0), - DAG.getConstant(c1 + c2, ShiftCountVT))); + DAG.getConstant(c1 + c2, DL, + ShiftCountVT))); } } @@ -4436,8 +4628,9 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { unsigned BitSize = N0.getScalarValueSizeInBits(); if (BitSize <= 64) { uint64_t ShAmt = N1C->getZExtValue() + 64 - BitSize; - return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0), - DAG.getConstant(~0ULL >> ShAmt, VT)); + SDLoc DL(N); + return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), + DAG.getConstant(~0ULL >> ShAmt, DL, VT)); } } @@ -4451,14 +4644,17 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) { uint64_t ShiftAmt = N1C->getZExtValue(); - SDValue SmallShift = DAG.getNode(ISD::SRL, SDLoc(N0), SmallVT, + SDLoc DL0(N0); + SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT, N0.getOperand(0), - DAG.getConstant(ShiftAmt, getShiftAmountTy(SmallVT))); + DAG.getConstant(ShiftAmt, DL0, + getShiftAmountTy(SmallVT))); AddToWorklist(SmallShift.getNode()); APInt Mask = APInt::getAllOnesValue(OpSizeInBits).lshr(ShiftAmt); - return DAG.getNode(ISD::AND, SDLoc(N), VT, - DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, SmallShift), - DAG.getConstant(Mask, VT)); + SDLoc DL(N); + return DAG.getNode(ISD::AND, DL, VT, + DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift), + DAG.getConstant(Mask, DL, VT)); } } @@ -4477,12 +4673,12 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // If any of the input bits are KnownOne, then the input couldn't be all // zeros, thus the result of the srl will always be zero. - if (KnownOne.getBoolValue()) return DAG.getConstant(0, VT); + if (KnownOne.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT); // If all of the bits input the to ctlz node are known to be zero, then // the result of the ctlz is "32" and the result of the shift is one. APInt UnknownBits = ~KnownZero; - if (UnknownBits == 0) return DAG.getConstant(1, VT); + if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT); // Otherwise, check to see if there is exactly one bit input to the ctlz. if ((UnknownBits & (UnknownBits - 1)) == 0) { @@ -4494,13 +4690,16 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { SDValue Op = N0.getOperand(0); if (ShAmt) { - Op = DAG.getNode(ISD::SRL, SDLoc(N0), VT, Op, - DAG.getConstant(ShAmt, getShiftAmountTy(Op.getValueType()))); + SDLoc DL(N0); + Op = DAG.getNode(ISD::SRL, DL, VT, Op, + DAG.getConstant(ShAmt, DL, + getShiftAmountTy(Op.getValueType()))); AddToWorklist(Op.getNode()); } - return DAG.getNode(ISD::XOR, SDLoc(N), VT, - Op, DAG.getConstant(1, VT)); + SDLoc DL(N); + return DAG.getNode(ISD::XOR, DL, VT, + Op, DAG.getConstant(1, DL, VT)); } } @@ -4517,7 +4716,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (N1C && SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); - if (N1C) { + if (N1C && !N1C->isOpaque()) { SDValue NewSRL = visitShiftByConstant(N, N1C); if (NewSRL.getNode()) return NewSRL; @@ -4651,23 +4850,19 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue N2 = N->getOperand(2); - ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); - ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2); EVT VT = N->getValueType(0); EVT VT0 = N0.getValueType(); // fold (select C, X, X) -> X if (N1 == N2) return N1; - // fold (select true, X, Y) -> X - if (N0C && !N0C->isNullValue()) - return N1; - // fold (select false, X, Y) -> Y - if (N0C && N0C->isNullValue()) - return N2; + if (const ConstantSDNode *N0C = dyn_cast<const ConstantSDNode>(N0)) { + // fold (select true, X, Y) -> X + // fold (select false, X, Y) -> Y + return !N0C->isNullValue() ? N1 : N2; + } // fold (select C, 1, X) -> (or C, X) - if (VT == MVT::i1 && N1C && N1C->getAPIntValue() == 1) + if (VT == MVT::i1 && isOneConstant(N1)) return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2); // fold (select C, 0, 1) -> (xor C, 1) // We can't do this reliably if integer based booleans have different contents @@ -4684,40 +4879,43 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { TLI.getBooleanContents(false, true) && TLI.getBooleanContents(false, false) == TargetLowering::ZeroOrOneBooleanContent)) && - N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) { + isNullConstant(N1) && isOneConstant(N2)) { SDValue XORNode; - if (VT == VT0) - return DAG.getNode(ISD::XOR, SDLoc(N), VT0, - N0, DAG.getConstant(1, VT0)); - XORNode = DAG.getNode(ISD::XOR, SDLoc(N0), VT0, - N0, DAG.getConstant(1, VT0)); + if (VT == VT0) { + SDLoc DL(N); + return DAG.getNode(ISD::XOR, DL, VT0, + N0, DAG.getConstant(1, DL, VT0)); + } + SDLoc DL0(N0); + XORNode = DAG.getNode(ISD::XOR, DL0, VT0, + N0, DAG.getConstant(1, DL0, VT0)); AddToWorklist(XORNode.getNode()); if (VT.bitsGT(VT0)) return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, XORNode); return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, XORNode); } // fold (select C, 0, X) -> (and (not C), X) - if (VT == VT0 && VT == MVT::i1 && N1C && N1C->isNullValue()) { + if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) { SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT); AddToWorklist(NOTNode.getNode()); return DAG.getNode(ISD::AND, SDLoc(N), VT, NOTNode, N2); } // fold (select C, X, 1) -> (or (not C), X) - if (VT == VT0 && VT == MVT::i1 && N2C && N2C->getAPIntValue() == 1) { + if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) { SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT); AddToWorklist(NOTNode.getNode()); return DAG.getNode(ISD::OR, SDLoc(N), VT, NOTNode, N1); } // fold (select C, X, 0) -> (and C, X) - if (VT == MVT::i1 && N2C && N2C->isNullValue()) + if (VT == MVT::i1 && isNullConstant(N2)) return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1); // fold (select X, X, Y) -> (or X, Y) // fold (select X, 1, Y) -> (or X, Y) - if (VT == MVT::i1 && (N0 == N1 || (N1C && N1C->getAPIntValue() == 1))) + if (VT == MVT::i1 && (N0 == N1 || isOneConstant(N1))) return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2); // fold (select X, Y, X) -> (and X, Y) // fold (select X, Y, 0) -> (and X, Y) - if (VT == MVT::i1 && (N0 == N2 || (N2C && N2C->getAPIntValue() == 0))) + if (VT == MVT::i1 && (N0 == N2 || isNullConstant(N2))) return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1); // If we can fold this based on the true/false value, do so. @@ -4757,6 +4955,69 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { return SimplifySelect(SDLoc(N), N0, N1, N2); } + if (VT0 == MVT::i1) { + if (TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) { + // select (and Cond0, Cond1), X, Y + // -> select Cond0, (select Cond1, X, Y), Y + if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) { + SDValue Cond0 = N0->getOperand(0); + SDValue Cond1 = N0->getOperand(1); + SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N), + N1.getValueType(), Cond1, N1, N2); + return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, + InnerSelect, N2); + } + // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y) + if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) { + SDValue Cond0 = N0->getOperand(0); + SDValue Cond1 = N0->getOperand(1); + SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N), + N1.getValueType(), Cond1, N1, N2); + return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, N1, + InnerSelect); + } + } + + // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y + if (N1->getOpcode() == ISD::SELECT) { + SDValue N1_0 = N1->getOperand(0); + SDValue N1_1 = N1->getOperand(1); + SDValue N1_2 = N1->getOperand(2); + if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) { + // Create the actual and node if we can generate good code for it. + if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) { + SDValue And = DAG.getNode(ISD::AND, SDLoc(N), N0.getValueType(), + N0, N1_0); + return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), And, + N1_1, N2); + } + // Otherwise see if we can optimize the "and" to a better pattern. + if (SDValue Combined = visitANDLike(N0, N1_0, N)) + return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined, + N1_1, N2); + } + } + // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y + if (N2->getOpcode() == ISD::SELECT) { + SDValue N2_0 = N2->getOperand(0); + SDValue N2_1 = N2->getOperand(1); + SDValue N2_2 = N2->getOperand(2); + if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) { + // Create the actual or node if we can generate good code for it. + if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) { + SDValue Or = DAG.getNode(ISD::OR, SDLoc(N), N0.getValueType(), + N0, N2_0); + return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Or, + N1, N2_2); + } + // Otherwise see if we can optimize to a better pattern. + if (SDValue Combined = visitORLike(N0, N2_0, N)) + return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined, + N1, N2_2); + } + } + } + return SDValue(); } @@ -4832,6 +5093,67 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) { TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1)); } +SDValue DAGCombiner::visitMSCATTER(SDNode *N) { + + if (Level >= AfterLegalizeTypes) + return SDValue(); + + MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N); + SDValue Mask = MSC->getMask(); + SDValue Data = MSC->getValue(); + SDLoc DL(N); + + // If the MSCATTER data type requires splitting and the mask is provided by a + // SETCC, then split both nodes and its operands before legalization. This + // prevents the type legalizer from unrolling SETCC into scalar comparisons + // and enables future optimizations (e.g. min/max pattern matching on X86). + if (Mask.getOpcode() != ISD::SETCC) + return SDValue(); + + // Check if any splitting is required. + if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) != + TargetLowering::TypeSplitVector) + return SDValue(); + SDValue MaskLo, MaskHi, Lo, Hi; + std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); + + EVT LoVT, HiVT; + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0)); + + SDValue Chain = MSC->getChain(); + + EVT MemoryVT = MSC->getMemoryVT(); + unsigned Alignment = MSC->getOriginalAlignment(); + + EVT LoMemVT, HiMemVT; + std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + + SDValue DataLo, DataHi; + std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); + + SDValue BasePtr = MSC->getBasePtr(); + SDValue IndexLo, IndexHi; + std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL); + + MachineMemOperand *MMO = DAG.getMachineFunction(). + getMachineMemOperand(MSC->getPointerInfo(), + MachineMemOperand::MOStore, LoMemVT.getStoreSize(), + Alignment, MSC->getAAInfo(), MSC->getRanges()); + + SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo }; + Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(), + DL, OpsLo, MMO); + + SDValue OpsHi[] = {Chain, DataHi, MaskHi, BasePtr, IndexHi}; + Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(), + DL, OpsHi, MMO); + + AddToWorklist(Lo.getNode()); + AddToWorklist(Hi.getNode()); + + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); +} + SDValue DAGCombiner::visitMSTORE(SDNode *N) { if (Level >= AfterLegalizeTypes) @@ -4878,7 +5200,7 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) { std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); MachineMemOperand *MMO = DAG.getMachineFunction(). - getMachineMemOperand(MST->getPointerInfo(), + getMachineMemOperand(MST->getPointerInfo(), MachineMemOperand::MOStore, LoMemVT.getStoreSize(), Alignment, MST->getAAInfo(), MST->getRanges()); @@ -4887,10 +5209,10 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) { unsigned IncrementSize = LoMemVT.getSizeInBits()/8; Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, Ptr.getValueType())); + DAG.getConstant(IncrementSize, DL, Ptr.getValueType())); MMO = DAG.getMachineFunction(). - getMachineMemOperand(MST->getPointerInfo(), + getMachineMemOperand(MST->getPointerInfo(), MachineMemOperand::MOStore, HiMemVT.getStoreSize(), SecondHalfAlignment, MST->getAAInfo(), MST->getRanges()); @@ -4906,6 +5228,83 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitMGATHER(SDNode *N) { + + if (Level >= AfterLegalizeTypes) + return SDValue(); + + MaskedGatherSDNode *MGT = dyn_cast<MaskedGatherSDNode>(N); + SDValue Mask = MGT->getMask(); + SDLoc DL(N); + + // If the MGATHER result requires splitting and the mask is provided by a + // SETCC, then split both nodes and its operands before legalization. This + // prevents the type legalizer from unrolling SETCC into scalar comparisons + // and enables future optimizations (e.g. min/max pattern matching on X86). + + if (Mask.getOpcode() != ISD::SETCC) + return SDValue(); + + EVT VT = N->getValueType(0); + + // Check if any splitting is required. + if (TLI.getTypeAction(*DAG.getContext(), VT) != + TargetLowering::TypeSplitVector) + return SDValue(); + + SDValue MaskLo, MaskHi, Lo, Hi; + std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); + + SDValue Src0 = MGT->getValue(); + SDValue Src0Lo, Src0Hi; + std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL); + + EVT LoVT, HiVT; + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT); + + SDValue Chain = MGT->getChain(); + EVT MemoryVT = MGT->getMemoryVT(); + unsigned Alignment = MGT->getOriginalAlignment(); + + EVT LoMemVT, HiMemVT; + std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + + SDValue BasePtr = MGT->getBasePtr(); + SDValue Index = MGT->getIndex(); + SDValue IndexLo, IndexHi; + std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL); + + MachineMemOperand *MMO = DAG.getMachineFunction(). + getMachineMemOperand(MGT->getPointerInfo(), + MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), + Alignment, MGT->getAAInfo(), MGT->getRanges()); + + SDValue OpsLo[] = { Chain, Src0Lo, MaskLo, BasePtr, IndexLo }; + Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo, + MMO); + + SDValue OpsHi[] = {Chain, Src0Hi, MaskHi, BasePtr, IndexHi}; + Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi, + MMO); + + AddToWorklist(Lo.getNode()); + AddToWorklist(Hi.getNode()); + + // Build a factor node to remember that this load is independent of the + // other one. + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + DAG.ReplaceAllUsesOfValueWith(SDValue(MGT, 1), Chain); + + SDValue GatherRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); + + SDValue RetOps[] = { GatherRes, Chain }; + return DAG.getMergeValues(RetOps, DL); +} + SDValue DAGCombiner::visitMLOAD(SDNode *N) { if (Level >= AfterLegalizeTypes) @@ -4953,7 +5352,7 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) { std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); MachineMemOperand *MMO = DAG.getMachineFunction(). - getMachineMemOperand(MLD->getPointerInfo(), + getMachineMemOperand(MLD->getPointerInfo(), MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), Alignment, MLD->getAAInfo(), MLD->getRanges()); @@ -4962,10 +5361,10 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) { unsigned IncrementSize = LoMemVT.getSizeInBits()/8; Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, Ptr.getValueType())); + DAG.getConstant(IncrementSize, DL, Ptr.getValueType())); MMO = DAG.getMachineFunction(). - getMachineMemOperand(MLD->getPointerInfo(), + getMachineMemOperand(MLD->getPointerInfo(), MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges()); @@ -5021,7 +5420,7 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { EVT VT = LHS.getValueType(); SDValue Shift = DAG.getNode( ISD::SRA, DL, VT, LHS, - DAG.getConstant(VT.getScalarType().getSizeInBits() - 1, VT)); + DAG.getConstant(VT.getScalarType().getSizeInBits() - 1, DL, VT)); SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift); AddToWorklist(Shift.getNode()); AddToWorklist(Add.getNode()); @@ -5029,6 +5428,9 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { } } + if (SimplifySelectOps(N, N1, N2)) + return SDValue(N, 0); // Don't revisit N. + // If the VSELECT result requires splitting and the mask is provided by a // SETCC, then split both nodes and its operands before legalization. This // prevents the type legalizer from unrolling SETCC into scalar comparisons @@ -5141,7 +5543,8 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, EVT VT = N->getValueType(0); assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || - Opcode == ISD::ANY_EXTEND) && "Expected EXTEND dag node in input!"); + Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG) + && "Expected EXTEND dag node in input!"); // fold (sext c1) -> c1 // fold (zext c1) -> c1 @@ -5163,7 +5566,7 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, unsigned EVTBits = N0->getValueType(0).getScalarType().getSizeInBits(); unsigned ShAmt = VTBits - EVTBits; SmallVector<SDValue, 8> Elts; - unsigned NumElts = N0->getNumOperands(); + unsigned NumElts = VT.getVectorNumElements(); SDLoc DL(N); for (unsigned i=0; i != NumElts; ++i) { @@ -5173,14 +5576,15 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, continue; } + SDLoc DL(Op); ConstantSDNode *CurrentND = cast<ConstantSDNode>(Op); const APInt &C = APInt(VTBits, CurrentND->getAPIntValue().getZExtValue()); - if (Opcode == ISD::SIGN_EXTEND) + if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG) Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt).getZExtValue(), - SVT)); + DL, SVT)); else Elts.push_back(DAG.getConstant(C.shl(ShAmt).lshr(ShAmt).getZExtValue(), - SVT)); + DL, SVT)); } return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Elts).getNode(); @@ -5271,6 +5675,102 @@ void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, } } +// FIXME: Bring more similar combines here, common to sext/zext (maybe aext?). +SDValue DAGCombiner::CombineExtLoad(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT DstVT = N->getValueType(0); + EVT SrcVT = N0.getValueType(); + + assert((N->getOpcode() == ISD::SIGN_EXTEND || + N->getOpcode() == ISD::ZERO_EXTEND) && + "Unexpected node type (not an extend)!"); + + // fold (sext (load x)) to multiple smaller sextloads; same for zext. + // For example, on a target with legal v4i32, but illegal v8i32, turn: + // (v8i32 (sext (v8i16 (load x)))) + // into: + // (v8i32 (concat_vectors (v4i32 (sextload x)), + // (v4i32 (sextload (x + 16))))) + // Where uses of the original load, i.e.: + // (v8i16 (load x)) + // are replaced with: + // (v8i16 (truncate + // (v8i32 (concat_vectors (v4i32 (sextload x)), + // (v4i32 (sextload (x + 16))))))) + // + // This combine is only applicable to illegal, but splittable, vectors. + // All legal types, and illegal non-vector types, are handled elsewhere. + // This combine is controlled by TargetLowering::isVectorLoadExtDesirable. + // + if (N0->getOpcode() != ISD::LOAD) + return SDValue(); + + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + + if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) || + !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() || + !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0))) + return SDValue(); + + SmallVector<SDNode *, 4> SetCCs; + if (!ExtendUsesToFormExtLoad(N, N0, N->getOpcode(), SetCCs, TLI)) + return SDValue(); + + ISD::LoadExtType ExtType = + N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD; + + // Try to split the vector types to get down to legal types. + EVT SplitSrcVT = SrcVT; + EVT SplitDstVT = DstVT; + while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) && + SplitSrcVT.getVectorNumElements() > 1) { + SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first; + SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first; + } + + if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT)) + return SDValue(); + + SDLoc DL(N); + const unsigned NumSplits = + DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements(); + const unsigned Stride = SplitSrcVT.getStoreSize(); + SmallVector<SDValue, 4> Loads; + SmallVector<SDValue, 4> Chains; + + SDValue BasePtr = LN0->getBasePtr(); + for (unsigned Idx = 0; Idx < NumSplits; Idx++) { + const unsigned Offset = Idx * Stride; + const unsigned Align = MinAlign(LN0->getAlignment(), Offset); + + SDValue SplitLoad = DAG.getExtLoad( + ExtType, DL, SplitDstVT, LN0->getChain(), BasePtr, + LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, + LN0->isVolatile(), LN0->isNonTemporal(), LN0->isInvariant(), + Align, LN0->getAAInfo()); + + BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, + DAG.getConstant(Stride, DL, BasePtr.getValueType())); + + Loads.push_back(SplitLoad.getValue(0)); + Chains.push_back(SplitLoad.getValue(1)); + } + + SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); + SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads); + + CombineTo(N, NewValue); + + // Replace uses of the original load (before extension) + // with a truncate of the concatenated sextloaded vectors. + SDValue Trunc = + DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue); + CombineTo(N0.getNode(), Trunc, NewChain); + ExtendSetCCUses(SetCCs, Trunc, NewValue, DL, + (ISD::NodeType)N->getOpcode()); + return SDValue(N, 0); // Return N so it doesn't get rechecked! +} + SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -5337,17 +5837,18 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { } // fold (sext (load x)) -> (sext (truncate (sextload x))) - // None of the supported targets knows how to perform load and sign extend - // on vectors in one instruction. We only perform this transformation on - // scalars. - if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && - ISD::isUNINDEXEDLoad(N0.getNode()) && - ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || + // Only generate vector extloads when 1) they're legal, and 2) they are + // deemed desirable by the target. + if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && + ((!LegalOperations && !VT.isVector() && + !cast<LoadSDNode>(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()))) { bool DoXform = true; SmallVector<SDNode*, 4> SetCCs; if (!N0.hasOneUse()) DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI); + if (VT.isVector()) + DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0)); if (DoXform) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, @@ -5364,6 +5865,11 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { } } + // fold (sext (load x)) to multiple smaller sextloads. + // Only on illegal but splittable vectors. + if (SDValue ExtLoad = CombineExtLoad(N)) + return ExtLoad; + // fold (sext (sextload x)) -> (sext (truncate (sextload x))) // fold (sext ( extload x)) -> (sext (truncate (sextload x))) if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) && @@ -5407,14 +5913,15 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { LN0->getMemOperand()); APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); Mask = Mask.sext(VT.getSizeInBits()); - SDValue And = DAG.getNode(N0.getOpcode(), SDLoc(N), VT, - ExtLoad, DAG.getConstant(Mask, VT)); + SDLoc DL(N); + SDValue And = DAG.getNode(N0.getOpcode(), DL, VT, + ExtLoad, DAG.getConstant(Mask, DL, VT)); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0.getOperand(0)), N0.getOperand(0).getValueType(), ExtLoad); CombineTo(N, And); CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); - ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::SIGN_EXTEND); return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -5457,11 +5964,12 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // sext(setcc x, y, cc) -> (select (setcc x, y, cc), -1, 0) unsigned ElementWidth = VT.getScalarType().getSizeInBits(); + SDLoc DL(N); SDValue NegOne = - DAG.getConstant(APInt::getAllOnesValue(ElementWidth), VT); + DAG.getConstant(APInt::getAllOnesValue(ElementWidth), DL, VT); SDValue SCC = - SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1), - NegOne, DAG.getConstant(0, VT), + SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), + NegOne, DAG.getConstant(0, DL, VT), cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); if (SCC.getNode()) return SCC; @@ -5473,7 +5981,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N0.getOperand(0), N0.getOperand(1), CC); return DAG.getSelect(DL, VT, SetCC, - NegOne, DAG.getConstant(0, VT)); + NegOne, DAG.getConstant(0, DL, VT)); } } } @@ -5507,11 +6015,9 @@ static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, SDValue Op1 = N->getOperand(1); assert(Op0.getValueType() == Op1.getValueType()); - ConstantSDNode *COp0 = dyn_cast<ConstantSDNode>(Op0); - ConstantSDNode *COp1 = dyn_cast<ConstantSDNode>(Op1); - if (COp0 && COp0->isNullValue()) + if (isNullConstant(Op0)) Op = Op1; - else if (COp1 && COp1->isNullValue()) + else if (isNullConstant(Op1)) Op = Op0; else return false; @@ -5622,22 +6128,24 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { } APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); Mask = Mask.zext(VT.getSizeInBits()); - return DAG.getNode(ISD::AND, SDLoc(N), VT, - X, DAG.getConstant(Mask, VT)); + SDLoc DL(N); + return DAG.getNode(ISD::AND, DL, VT, + X, DAG.getConstant(Mask, DL, VT)); } // fold (zext (load x)) -> (zext (truncate (zextload x))) - // None of the supported targets knows how to perform load and vector_zext - // on vectors in one instruction. We only perform this transformation on - // scalars. - if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && - ISD::isUNINDEXEDLoad(N0.getNode()) && - ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || + // Only generate vector extloads when 1) they're legal, and 2) they are + // deemed desirable by the target. + if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && + ((!LegalOperations && !VT.isVector() && + !cast<LoadSDNode>(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()))) { bool DoXform = true; SmallVector<SDNode*, 4> SetCCs; if (!N0.hasOneUse()) DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI); + if (VT.isVector()) + DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0)); if (DoXform) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, @@ -5655,6 +6163,11 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { } } + // fold (zext (load x)) to multiple smaller zextloads. + // Only on illegal but splittable vectors. + if (SDValue ExtLoad = CombineExtLoad(N)) + return ExtLoad; + // fold (zext (and/or/xor (load x), cst)) -> // (and/or/xor (zextload x), (zext cst)) if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR || @@ -5677,14 +6190,15 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { LN0->getMemOperand()); APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); Mask = Mask.zext(VT.getSizeInBits()); - SDValue And = DAG.getNode(N0.getOpcode(), SDLoc(N), VT, - ExtLoad, DAG.getConstant(Mask, VT)); + SDLoc DL(N); + SDValue And = DAG.getNode(N0.getOpcode(), DL, VT, + ExtLoad, DAG.getConstant(Mask, DL, VT)); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0.getOperand(0)), N0.getOperand(0).getValueType(), ExtLoad); CombineTo(N, And); CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); - ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::ZERO_EXTEND); return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -5722,19 +6236,20 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors. // Only do this before legalize for now. EVT EltVT = VT.getVectorElementType(); + SDLoc DL(N); SmallVector<SDValue,8> OneOps(VT.getVectorNumElements(), - DAG.getConstant(1, EltVT)); + DAG.getConstant(1, DL, EltVT)); if (VT.getSizeInBits() == N0VT.getSizeInBits()) // We know that the # elements of the results is the same as the // # elements of the compare (and the # elements of the compare result // for that matter). Check to see that they are the same size. If so, // we know that the element size of the sext'd result matches the // element size of the compare operands. - return DAG.getNode(ISD::AND, SDLoc(N), VT, - DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0), + return DAG.getNode(ISD::AND, DL, VT, + DAG.getSetCC(DL, VT, N0.getOperand(0), N0.getOperand(1), cast<CondCodeSDNode>(N0.getOperand(2))->get()), - DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, + DAG.getNode(ISD::BUILD_VECTOR, DL, VT, OneOps)); // If the desired elements are smaller or larger than the source @@ -5747,18 +6262,19 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { EVT::getVectorVT(*DAG.getContext(), MatchingElementType, N0VT.getVectorNumElements()); SDValue VsetCC = - DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0), + DAG.getSetCC(DL, MatchingVectorType, N0.getOperand(0), N0.getOperand(1), cast<CondCodeSDNode>(N0.getOperand(2))->get()); - return DAG.getNode(ISD::AND, SDLoc(N), VT, - DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT), - DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, OneOps)); + return DAG.getNode(ISD::AND, DL, VT, + DAG.getSExtOrTrunc(VsetCC, DL, VT), + DAG.getNode(ISD::BUILD_VECTOR, DL, VT, OneOps)); } // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc + SDLoc DL(N); SDValue SCC = - SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1), - DAG.getConstant(1, VT), DAG.getConstant(0, VT), + SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), + DAG.getConstant(1, DL, VT), DAG.getConstant(0, DL, VT), cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); if (SCC.getNode()) return SCC; } @@ -5850,8 +6366,9 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { } APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); Mask = Mask.zext(VT.getSizeInBits()); - return DAG.getNode(ISD::AND, SDLoc(N), VT, - X, DAG.getConstant(Mask, VT)); + SDLoc DL(N); + return DAG.getNode(ISD::AND, DL, VT, + X, DAG.getConstant(Mask, DL, VT)); } // fold (aext (load x)) -> (aext (truncate (extload x))) @@ -5934,9 +6451,10 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { } // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc + SDLoc DL(N); SDValue SCC = - SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1), - DAG.getConstant(1, VT), DAG.getConstant(0, VT), + SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), + DAG.getConstant(1, DL, VT), DAG.getConstant(0, DL, VT), cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); if (SCC.getNode()) return SCC; @@ -5957,7 +6475,7 @@ SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) { const APInt &CVal = CV->getAPIntValue(); APInt NewVal = CVal & Mask; if (NewVal != CVal) - return DAG.getConstant(NewVal, V.getValueType()); + return DAG.getConstant(NewVal, SDLoc(V), V.getValueType()); break; } case ISD::OR: @@ -5972,7 +6490,7 @@ SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) { // Only look at single-use SRLs. if (!V.getNode()->hasOneUse()) break; - if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(V.getOperand(1))) { + if (ConstantSDNode *RHSC = getAsNonOpaqueConstant(V.getOperand(1))) { // See if we can recursively simplify the LHS. unsigned Amt = RHSC->getZExtValue(); @@ -6117,9 +6635,10 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { uint64_t PtrOff = ShAmt / 8; unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff); - SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LN0), + SDLoc DL(LN0); + SDValue NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, LN0->getBasePtr(), - DAG.getConstant(PtrOff, PtrType)); + DAG.getConstant(PtrOff, DL, PtrType)); AddToWorklist(NewPtr.getNode()); SDValue Load; @@ -6148,11 +6667,12 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { // no larger than the source) then the useful bits of the result are // zero; we can't simply return the shortened shift, because the result // of that operation is undefined. + SDLoc DL(N0); if (ShLeftAmt >= VT.getSizeInBits()) - Result = DAG.getConstant(0, VT); + Result = DAG.getConstant(0, DL, VT); else - Result = DAG.getNode(ISD::SHL, SDLoc(N0), VT, - Result, DAG.getConstant(ShLeftAmt, ShImmTy)); + Result = DAG.getNode(ISD::SHL, DL, VT, + Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy)); } // Return the new loaded value. @@ -6279,7 +6799,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { ConstantSDNode *CurrentND = cast<ConstantSDNode>(Op); const APInt &C = APInt(VTBits, CurrentND->getAPIntValue().getZExtValue()); Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt).getZExtValue(), - Op.getValueType())); + SDLoc(Op), Op.getValueType())); } return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Elts); @@ -6288,6 +6808,20 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + if (N0.getOpcode() == ISD::UNDEF) + return DAG.getUNDEF(VT); + + if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, + LegalOperations)) + return SDValue(Res, 0); + + return SDValue(); +} + SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -6297,7 +6831,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { if (N0.getValueType() == N->getValueType(0)) return N0; // fold (truncate c1) -> c1 - if (isa<ConstantSDNode>(N0)) + if (isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0); // fold (truncate (truncate x)) -> (truncate x) if (N0.getOpcode() == ISD::TRUNCATE) @@ -6350,9 +6884,10 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { SDValue V = DAG.getNode(ISD::BITCAST, SDLoc(N), NVT, N0.getOperand(0)); + SDLoc DL(N); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, - SDLoc(N), TrTy, V, - DAG.getConstant(Index, IndexTy)); + DL, TrTy, V, + DAG.getConstant(Index, DL, IndexTy)); } } @@ -6598,13 +7133,14 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { N0.getOperand(0)); AddToWorklist(NewConv.getNode()); + SDLoc DL(N); APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); if (N0.getOpcode() == ISD::FNEG) - return DAG.getNode(ISD::XOR, SDLoc(N), VT, - NewConv, DAG.getConstant(SignBit, VT)); + return DAG.getNode(ISD::XOR, DL, VT, + NewConv, DAG.getConstant(SignBit, DL, VT)); assert(N0.getOpcode() == ISD::FABS); - return DAG.getNode(ISD::AND, SDLoc(N), VT, - NewConv, DAG.getConstant(~SignBit, VT)); + return DAG.getNode(ISD::AND, DL, VT, + NewConv, DAG.getConstant(~SignBit, DL, VT)); } // fold (bitconvert (fcopysign cst, x)) -> @@ -6629,9 +7165,11 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { } else if (OrigXWidth > VTWidth) { // To get the sign bit in the right place, we have to shift it right // before truncating. - X = DAG.getNode(ISD::SRL, SDLoc(X), + SDLoc DL(X); + X = DAG.getNode(ISD::SRL, DL, X.getValueType(), X, - DAG.getConstant(OrigXWidth-VTWidth, X.getValueType())); + DAG.getConstant(OrigXWidth-VTWidth, DL, + X.getValueType())); AddToWorklist(X.getNode()); X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X); AddToWorklist(X.getNode()); @@ -6639,13 +7177,13 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); X = DAG.getNode(ISD::AND, SDLoc(X), VT, - X, DAG.getConstant(SignBit, VT)); + X, DAG.getConstant(SignBit, SDLoc(X), VT)); AddToWorklist(X.getNode()); SDValue Cst = DAG.getNode(ISD::BITCAST, SDLoc(N0), VT, N0.getOperand(0)); Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT, - Cst, DAG.getConstant(~SignBit, VT)); + Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT)); AddToWorklist(Cst.getNode()); return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst); @@ -6659,6 +7197,51 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { return CombineLD; } + // Remove double bitcasts from shuffles - this is often a legacy of + // XformToShuffleWithZero being used to combine bitmaskings (of + // float vectors bitcast to integer vectors) into shuffles. + // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1) + if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() && + N0->getOpcode() == ISD::VECTOR_SHUFFLE && + VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() && + !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) { + ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0); + + // If operands are a bitcast, peek through if it casts the original VT. + // If operands are a UNDEF or constant, just bitcast back to original VT. + auto PeekThroughBitcast = [&](SDValue Op) { + if (Op.getOpcode() == ISD::BITCAST && + Op.getOperand(0)->getValueType(0) == VT) + return SDValue(Op.getOperand(0)); + if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) || + ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode())) + return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op); + return SDValue(); + }; + + SDValue SV0 = PeekThroughBitcast(N0->getOperand(0)); + SDValue SV1 = PeekThroughBitcast(N0->getOperand(1)); + if (!(SV0 && SV1)) + return SDValue(); + + int MaskScale = + VT.getVectorNumElements() / N0.getValueType().getVectorNumElements(); + SmallVector<int, 8> NewMask; + for (int M : SVN->getMask()) + for (int i = 0; i != MaskScale; ++i) + NewMask.push_back(M < 0 ? -1 : M * MaskScale + i); + + bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT); + if (!LegalMask) { + std::swap(SV0, SV1); + ShuffleVectorSDNode::commuteMask(NewMask); + LegalMask = TLI.isShuffleMaskLegal(NewMask, VT); + } + + if (LegalMask) + return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask); + } + return SDValue(); } @@ -6727,6 +7310,8 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT); } + SDLoc DL(BV); + // Okay, we know the src/dst types are both integers of differing types. // Handling growing first. assert(SrcEltVT.isInteger() && DstEltVT.isInteger()); @@ -6753,16 +7338,15 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { if (EltIsUndef) Ops.push_back(DAG.getUNDEF(DstEltVT)); else - Ops.push_back(DAG.getConstant(NewBits, DstEltVT)); + Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT)); } EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size()); - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops); + return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops); } // Finally, this must be the case where we are shrinking elements: each input // turns into multiple outputs. - bool isS2V = ISD::isScalarToVector(BV); unsigned NumOutputsPerInput = SrcBitSize/DstBitSize; EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, NumOutputsPerInput*BV->getNumOperands()); @@ -6770,8 +7354,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { if (BV->getOperand(i).getOpcode() == ISD::UNDEF) { - for (unsigned j = 0; j != NumOutputsPerInput; ++j) - Ops.push_back(DAG.getUNDEF(DstEltVT)); + Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT)); continue; } @@ -6780,11 +7363,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { for (unsigned j = 0; j != NumOutputsPerInput; ++j) { APInt ThisVal = OpVal.trunc(DstBitSize); - Ops.push_back(DAG.getConstant(ThisVal, DstEltVT)); - if (isS2V && i == 0 && j == 0 && ThisVal.zext(SrcBitSize) == OpVal) - // Simply turn this into a SCALAR_TO_VECTOR of the new type. - return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT, - Ops[0]); + Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT)); OpVal = OpVal.lshr(DstBitSize); } @@ -6793,7 +7372,450 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { std::reverse(Ops.end()-NumOutputsPerInput, Ops.end()); } - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops); + return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops); +} + +/// Try to perform FMA combining on a given FADD node. +SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N->getValueType(0); + SDLoc SL(N); + + const TargetOptions &Options = DAG.getTarget().Options; + bool UnsafeFPMath = (Options.AllowFPOpFusion == FPOpFusion::Fast || + Options.UnsafeFPMath); + + // Floating-point multiply-add with intermediate rounding. + bool HasFMAD = (LegalOperations && + TLI.isOperationLegal(ISD::FMAD, VT)); + + // Floating-point multiply-add without intermediate rounding. + bool HasFMA = ((!LegalOperations || + TLI.isOperationLegalOrCustom(ISD::FMA, VT)) && + TLI.isFMAFasterThanFMulAndFAdd(VT) && + UnsafeFPMath); + + // No valid opcode, do not combine. + if (!HasFMAD && !HasFMA) + return SDValue(); + + // Always prefer FMAD to FMA for precision. + unsigned int PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; + bool Aggressive = TLI.enableAggressiveFMAFusion(VT); + bool LookThroughFPExt = TLI.isFPExtFree(VT); + + // fold (fadd (fmul x, y), z) -> (fma x, y, z) + if (N0.getOpcode() == ISD::FMUL && + (Aggressive || N0->hasOneUse())) { + return DAG.getNode(PreferredFusedOpcode, SL, VT, + N0.getOperand(0), N0.getOperand(1), N1); + } + + // fold (fadd x, (fmul y, z)) -> (fma y, z, x) + // Note: Commutes FADD operands. + if (N1.getOpcode() == ISD::FMUL && + (Aggressive || N1->hasOneUse())) { + return DAG.getNode(PreferredFusedOpcode, SL, VT, + N1.getOperand(0), N1.getOperand(1), N0); + } + + // Look through FP_EXTEND nodes to do more combining. + if (UnsafeFPMath && LookThroughFPExt) { + // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) + if (N0.getOpcode() == ISD::FP_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == ISD::FMUL) + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N00.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N00.getOperand(1)), N1); + } + + // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x) + // Note: Commutes FADD operands. + if (N1.getOpcode() == ISD::FP_EXTEND) { + SDValue N10 = N1.getOperand(0); + if (N10.getOpcode() == ISD::FMUL) + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N10.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N10.getOperand(1)), N0); + } + } + + // More folding opportunities when target permits. + if ((UnsafeFPMath || HasFMAD) && Aggressive) { + // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z)) + if (N0.getOpcode() == PreferredFusedOpcode && + N0.getOperand(2).getOpcode() == ISD::FMUL) { + return DAG.getNode(PreferredFusedOpcode, SL, VT, + N0.getOperand(0), N0.getOperand(1), + DAG.getNode(PreferredFusedOpcode, SL, VT, + N0.getOperand(2).getOperand(0), + N0.getOperand(2).getOperand(1), + N1)); + } + + // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x)) + if (N1->getOpcode() == PreferredFusedOpcode && + N1.getOperand(2).getOpcode() == ISD::FMUL) { + return DAG.getNode(PreferredFusedOpcode, SL, VT, + N1.getOperand(0), N1.getOperand(1), + DAG.getNode(PreferredFusedOpcode, SL, VT, + N1.getOperand(2).getOperand(0), + N1.getOperand(2).getOperand(1), + N0)); + } + + if (UnsafeFPMath && LookThroughFPExt) { + // fold (fadd (fma x, y, (fpext (fmul u, v))), z) + // -> (fma x, y, (fma (fpext u), (fpext v), z)) + auto FoldFAddFMAFPExtFMul = [&] ( + SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) { + return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y, + DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, U), + DAG.getNode(ISD::FP_EXTEND, SL, VT, V), + Z)); + }; + if (N0.getOpcode() == PreferredFusedOpcode) { + SDValue N02 = N0.getOperand(2); + if (N02.getOpcode() == ISD::FP_EXTEND) { + SDValue N020 = N02.getOperand(0); + if (N020.getOpcode() == ISD::FMUL) + return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1), + N020.getOperand(0), N020.getOperand(1), + N1); + } + } + + // fold (fadd (fpext (fma x, y, (fmul u, v))), z) + // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z)) + // FIXME: This turns two single-precision and one double-precision + // operation into two double-precision operations, which might not be + // interesting for all targets, especially GPUs. + auto FoldFAddFPExtFMAFMul = [&] ( + SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) { + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, X), + DAG.getNode(ISD::FP_EXTEND, SL, VT, Y), + DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, U), + DAG.getNode(ISD::FP_EXTEND, SL, VT, V), + Z)); + }; + if (N0.getOpcode() == ISD::FP_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == PreferredFusedOpcode) { + SDValue N002 = N00.getOperand(2); + if (N002.getOpcode() == ISD::FMUL) + return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1), + N002.getOperand(0), N002.getOperand(1), + N1); + } + } + + // fold (fadd x, (fma y, z, (fpext (fmul u, v))) + // -> (fma y, z, (fma (fpext u), (fpext v), x)) + if (N1.getOpcode() == PreferredFusedOpcode) { + SDValue N12 = N1.getOperand(2); + if (N12.getOpcode() == ISD::FP_EXTEND) { + SDValue N120 = N12.getOperand(0); + if (N120.getOpcode() == ISD::FMUL) + return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1), + N120.getOperand(0), N120.getOperand(1), + N0); + } + } + + // fold (fadd x, (fpext (fma y, z, (fmul u, v))) + // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x)) + // FIXME: This turns two single-precision and one double-precision + // operation into two double-precision operations, which might not be + // interesting for all targets, especially GPUs. + if (N1.getOpcode() == ISD::FP_EXTEND) { + SDValue N10 = N1.getOperand(0); + if (N10.getOpcode() == PreferredFusedOpcode) { + SDValue N102 = N10.getOperand(2); + if (N102.getOpcode() == ISD::FMUL) + return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1), + N102.getOperand(0), N102.getOperand(1), + N0); + } + } + } + } + + return SDValue(); +} + +/// Try to perform FMA combining on a given FSUB node. +SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N->getValueType(0); + SDLoc SL(N); + + const TargetOptions &Options = DAG.getTarget().Options; + bool UnsafeFPMath = (Options.AllowFPOpFusion == FPOpFusion::Fast || + Options.UnsafeFPMath); + + // Floating-point multiply-add with intermediate rounding. + bool HasFMAD = (LegalOperations && + TLI.isOperationLegal(ISD::FMAD, VT)); + + // Floating-point multiply-add without intermediate rounding. + bool HasFMA = ((!LegalOperations || + TLI.isOperationLegalOrCustom(ISD::FMA, VT)) && + TLI.isFMAFasterThanFMulAndFAdd(VT) && + UnsafeFPMath); + + // No valid opcode, do not combine. + if (!HasFMAD && !HasFMA) + return SDValue(); + + // Always prefer FMAD to FMA for precision. + unsigned int PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; + bool Aggressive = TLI.enableAggressiveFMAFusion(VT); + bool LookThroughFPExt = TLI.isFPExtFree(VT); + + // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) + if (N0.getOpcode() == ISD::FMUL && + (Aggressive || N0->hasOneUse())) { + return DAG.getNode(PreferredFusedOpcode, SL, VT, + N0.getOperand(0), N0.getOperand(1), + DAG.getNode(ISD::FNEG, SL, VT, N1)); + } + + // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x) + // Note: Commutes FSUB operands. + if (N1.getOpcode() == ISD::FMUL && + (Aggressive || N1->hasOneUse())) + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, + N1.getOperand(0)), + N1.getOperand(1), N0); + + // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) + if (N0.getOpcode() == ISD::FNEG && + N0.getOperand(0).getOpcode() == ISD::FMUL && + (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) { + SDValue N00 = N0.getOperand(0).getOperand(0); + SDValue N01 = N0.getOperand(0).getOperand(1); + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, N00), N01, + DAG.getNode(ISD::FNEG, SL, VT, N1)); + } + + // Look through FP_EXTEND nodes to do more combining. + if (UnsafeFPMath && LookThroughFPExt) { + // fold (fsub (fpext (fmul x, y)), z) + // -> (fma (fpext x), (fpext y), (fneg z)) + if (N0.getOpcode() == ISD::FP_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == ISD::FMUL) + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N00.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N00.getOperand(1)), + DAG.getNode(ISD::FNEG, SL, VT, N1)); + } + + // fold (fsub x, (fpext (fmul y, z))) + // -> (fma (fneg (fpext y)), (fpext z), x) + // Note: Commutes FSUB operands. + if (N1.getOpcode() == ISD::FP_EXTEND) { + SDValue N10 = N1.getOperand(0); + if (N10.getOpcode() == ISD::FMUL) + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N10.getOperand(0))), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N10.getOperand(1)), + N0); + } + + // fold (fsub (fpext (fneg (fmul, x, y))), z) + // -> (fneg (fma (fpext x), (fpext y), z)) + // Note: This could be removed with appropriate canonicalization of the + // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the + // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent + // from implementing the canonicalization in visitFSUB. + if (N0.getOpcode() == ISD::FP_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == ISD::FNEG) { + SDValue N000 = N00.getOperand(0); + if (N000.getOpcode() == ISD::FMUL) { + return DAG.getNode(ISD::FNEG, SL, VT, + DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N000.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N000.getOperand(1)), + N1)); + } + } + } + + // fold (fsub (fneg (fpext (fmul, x, y))), z) + // -> (fneg (fma (fpext x)), (fpext y), z) + // Note: This could be removed with appropriate canonicalization of the + // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the + // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent + // from implementing the canonicalization in visitFSUB. + if (N0.getOpcode() == ISD::FNEG) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == ISD::FP_EXTEND) { + SDValue N000 = N00.getOperand(0); + if (N000.getOpcode() == ISD::FMUL) { + return DAG.getNode(ISD::FNEG, SL, VT, + DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N000.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N000.getOperand(1)), + N1)); + } + } + } + + } + + // More folding opportunities when target permits. + if ((UnsafeFPMath || HasFMAD) && Aggressive) { + // fold (fsub (fma x, y, (fmul u, v)), z) + // -> (fma x, y (fma u, v, (fneg z))) + if (N0.getOpcode() == PreferredFusedOpcode && + N0.getOperand(2).getOpcode() == ISD::FMUL) { + return DAG.getNode(PreferredFusedOpcode, SL, VT, + N0.getOperand(0), N0.getOperand(1), + DAG.getNode(PreferredFusedOpcode, SL, VT, + N0.getOperand(2).getOperand(0), + N0.getOperand(2).getOperand(1), + DAG.getNode(ISD::FNEG, SL, VT, + N1))); + } + + // fold (fsub x, (fma y, z, (fmul u, v))) + // -> (fma (fneg y), z, (fma (fneg u), v, x)) + if (N1.getOpcode() == PreferredFusedOpcode && + N1.getOperand(2).getOpcode() == ISD::FMUL) { + SDValue N20 = N1.getOperand(2).getOperand(0); + SDValue N21 = N1.getOperand(2).getOperand(1); + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, + N1.getOperand(0)), + N1.getOperand(1), + DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, N20), + + N21, N0)); + } + + if (UnsafeFPMath && LookThroughFPExt) { + // fold (fsub (fma x, y, (fpext (fmul u, v))), z) + // -> (fma x, y (fma (fpext u), (fpext v), (fneg z))) + if (N0.getOpcode() == PreferredFusedOpcode) { + SDValue N02 = N0.getOperand(2); + if (N02.getOpcode() == ISD::FP_EXTEND) { + SDValue N020 = N02.getOperand(0); + if (N020.getOpcode() == ISD::FMUL) + return DAG.getNode(PreferredFusedOpcode, SL, VT, + N0.getOperand(0), N0.getOperand(1), + DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N020.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N020.getOperand(1)), + DAG.getNode(ISD::FNEG, SL, VT, + N1))); + } + } + + // fold (fsub (fpext (fma x, y, (fmul u, v))), z) + // -> (fma (fpext x), (fpext y), + // (fma (fpext u), (fpext v), (fneg z))) + // FIXME: This turns two single-precision and one double-precision + // operation into two double-precision operations, which might not be + // interesting for all targets, especially GPUs. + if (N0.getOpcode() == ISD::FP_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == PreferredFusedOpcode) { + SDValue N002 = N00.getOperand(2); + if (N002.getOpcode() == ISD::FMUL) + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N00.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N00.getOperand(1)), + DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N002.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N002.getOperand(1)), + DAG.getNode(ISD::FNEG, SL, VT, + N1))); + } + } + + // fold (fsub x, (fma y, z, (fpext (fmul u, v)))) + // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x)) + if (N1.getOpcode() == PreferredFusedOpcode && + N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) { + SDValue N120 = N1.getOperand(2).getOperand(0); + if (N120.getOpcode() == ISD::FMUL) { + SDValue N1200 = N120.getOperand(0); + SDValue N1201 = N120.getOperand(1); + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), + N1.getOperand(1), + DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, + VT, N1200)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N1201), + N0)); + } + } + + // fold (fsub x, (fpext (fma y, z, (fmul u, v)))) + // -> (fma (fneg (fpext y)), (fpext z), + // (fma (fneg (fpext u)), (fpext v), x)) + // FIXME: This turns two single-precision and one double-precision + // operation into two double-precision operations, which might not be + // interesting for all targets, especially GPUs. + if (N1.getOpcode() == ISD::FP_EXTEND && + N1.getOperand(0).getOpcode() == PreferredFusedOpcode) { + SDValue N100 = N1.getOperand(0).getOperand(0); + SDValue N101 = N1.getOperand(0).getOperand(1); + SDValue N102 = N1.getOperand(0).getOperand(2); + if (N102.getOpcode() == ISD::FMUL) { + SDValue N1020 = N102.getOperand(0); + SDValue N1021 = N102.getOperand(1); + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N100)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, N101), + DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, + VT, N1020)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N1021), + N0)); + } + } + } + } + + return SDValue(); } SDValue DAGCombiner::visitFADD(SDNode *N) { @@ -6802,32 +7824,32 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); EVT VT = N->getValueType(0); + SDLoc DL(N); const TargetOptions &Options = DAG.getTarget().Options; // fold vector ops - if (VT.isVector()) { - SDValue FoldedVOp = SimplifyVBinOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; - } + if (VT.isVector()) + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; // fold (fadd c1, c2) -> c1 + c2 if (N0CFP && N1CFP) - return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N1); + return DAG.getNode(ISD::FADD, DL, VT, N0, N1); // canonicalize constant to RHS if (N0CFP && !N1CFP) - return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N0); + return DAG.getNode(ISD::FADD, DL, VT, N1, N0); // fold (fadd A, (fneg B)) -> (fsub A, B) if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2) - return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0, + return DAG.getNode(ISD::FSUB, DL, VT, N0, GetNegatedExpression(N1, DAG, LegalOperations)); // fold (fadd (fneg A), B) -> (fsub B, A) if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2) - return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N1, + return DAG.getNode(ISD::FSUB, DL, VT, N1, GetNegatedExpression(N0, DAG, LegalOperations)); // If 'unsafe math' is enabled, fold lots of things. @@ -6843,17 +7865,16 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { // fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2)) if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1))) - return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0.getOperand(0), - DAG.getNode(ISD::FADD, SDLoc(N), VT, - N0.getOperand(1), N1)); + return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), + DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1)); // If allowed, fold (fadd (fneg x), x) -> 0.0 if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) - return DAG.getConstantFP(0.0, VT); + return DAG.getConstantFP(0.0, DL, VT); // If allowed, fold (fadd x, (fneg x)) -> 0.0 if (AllowNewConst && N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0) - return DAG.getConstantFP(0.0, VT); + return DAG.getConstantFP(0.0, DL, VT); // We can fold chains of FADD's of the same value into multiplications. // This transform is not safe in general because we are reducing the number @@ -6865,21 +7886,18 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { // (fadd (fmul x, c), x) -> (fmul x, c+1) if (CFP01 && !CFP00 && N0.getOperand(0) == N1) { - SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, - SDValue(CFP01, 0), - DAG.getConstantFP(1.0, VT)); - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, NewCFP); + SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP01, 0), + DAG.getConstantFP(1.0, DL, VT)); + return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP); } // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2) if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD && N1.getOperand(0) == N1.getOperand(1) && N0.getOperand(0) == N1.getOperand(0)) { - SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, - SDValue(CFP01, 0), - DAG.getConstantFP(2.0, VT)); - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, - N0.getOperand(0), NewCFP); + SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP01, 0), + DAG.getConstantFP(2.0, DL, VT)); + return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP); } } @@ -6889,20 +7907,18 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { // (fadd x, (fmul x, c)) -> (fmul x, c+1) if (CFP11 && !CFP10 && N1.getOperand(0) == N0) { - SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, - SDValue(CFP11, 0), - DAG.getConstantFP(1.0, VT)); - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, NewCFP); + SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP11, 0), + DAG.getConstantFP(1.0, DL, VT)); + return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP); } // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2) if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD && N0.getOperand(0) == N0.getOperand(1) && N1.getOperand(0) == N0.getOperand(0)) { - SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, - SDValue(CFP11, 0), - DAG.getConstantFP(2.0, VT)); - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1.getOperand(0), NewCFP); + SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP11, 0), + DAG.getConstantFP(2.0, DL, VT)); + return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP); } } @@ -6910,18 +7926,20 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N0.getOperand(0)); // (fadd (fadd x, x), x) -> (fmul x, 3.0) if (!CFP && N0.getOperand(0) == N0.getOperand(1) && - (N0.getOperand(0) == N1)) - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, - N1, DAG.getConstantFP(3.0, VT)); + (N0.getOperand(0) == N1)) { + return DAG.getNode(ISD::FMUL, DL, VT, + N1, DAG.getConstantFP(3.0, DL, VT)); + } } if (N1.getOpcode() == ISD::FADD && AllowNewConst) { ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0)); // (fadd x, (fadd x, x)) -> (fmul x, 3.0) if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) && - N1.getOperand(0) == N0) - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, - N0, DAG.getConstantFP(3.0, VT)); + N1.getOperand(0) == N0) { + return DAG.getNode(ISD::FMUL, DL, VT, + N0, DAG.getConstantFP(3.0, DL, VT)); + } } // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0) @@ -6929,81 +7947,18 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD && N0.getOperand(0) == N0.getOperand(1) && N1.getOperand(0) == N1.getOperand(1) && - N0.getOperand(0) == N1.getOperand(0)) - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, - N0.getOperand(0), DAG.getConstantFP(4.0, VT)); + N0.getOperand(0) == N1.getOperand(0)) { + return DAG.getNode(ISD::FMUL, DL, VT, + N0.getOperand(0), DAG.getConstantFP(4.0, DL, VT)); + } } } // enable-unsafe-fp-math // FADD -> FMA combines: - if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) && - TLI.isFMAFasterThanFMulAndFAdd(VT) && - (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) { - - // fold (fadd (fmul x, y), z) -> (fma x, y, z) - if (N0.getOpcode() == ISD::FMUL && - (N0->hasOneUse() || TLI.enableAggressiveFMAFusion(VT))) - return DAG.getNode(ISD::FMA, SDLoc(N), VT, - N0.getOperand(0), N0.getOperand(1), N1); - - // fold (fadd x, (fmul y, z)) -> (fma y, z, x) - // Note: Commutes FADD operands. - if (N1.getOpcode() == ISD::FMUL && - (N1->hasOneUse() || TLI.enableAggressiveFMAFusion(VT))) - return DAG.getNode(ISD::FMA, SDLoc(N), VT, - N1.getOperand(0), N1.getOperand(1), N0); - - // When FP_EXTEND nodes are free on the target, and there is an opportunity - // to combine into FMA, arrange such nodes accordingly. - if (TLI.isFPExtFree(VT)) { - - // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) - if (N0.getOpcode() == ISD::FP_EXTEND) { - SDValue N00 = N0.getOperand(0); - if (N00.getOpcode() == ISD::FMUL) - return DAG.getNode(ISD::FMA, SDLoc(N), VT, - DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, - N00.getOperand(0)), - DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, - N00.getOperand(1)), N1); - } - - // fold (fadd x, (fpext (fmul y, z)), z) -> (fma (fpext y), (fpext z), x) - // Note: Commutes FADD operands. - if (N1.getOpcode() == ISD::FP_EXTEND) { - SDValue N10 = N1.getOperand(0); - if (N10.getOpcode() == ISD::FMUL) - return DAG.getNode(ISD::FMA, SDLoc(N), VT, - DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, - N10.getOperand(0)), - DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, - N10.getOperand(1)), N0); - } - } - - // More folding opportunities when target permits. - if (TLI.enableAggressiveFMAFusion(VT)) { - - // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z)) - if (N0.getOpcode() == ISD::FMA && - N0.getOperand(2).getOpcode() == ISD::FMUL) - return DAG.getNode(ISD::FMA, SDLoc(N), VT, - N0.getOperand(0), N0.getOperand(1), - DAG.getNode(ISD::FMA, SDLoc(N), VT, - N0.getOperand(2).getOperand(0), - N0.getOperand(2).getOperand(1), - N1)); - - // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x)) - if (N1->getOpcode() == ISD::FMA && - N1.getOperand(2).getOpcode() == ISD::FMUL) - return DAG.getNode(ISD::FMA, SDLoc(N), VT, - N1.getOperand(0), N1.getOperand(1), - DAG.getNode(ISD::FMA, SDLoc(N), VT, - N1.getOperand(2).getOperand(0), - N1.getOperand(2).getOperand(1), - N0)); - } + SDValue Fused = visitFADDForFMACombine(N); + if (Fused) { + AddToWorklist(Fused.getNode()); + return Fused; } return SDValue(); @@ -7019,14 +7974,13 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { const TargetOptions &Options = DAG.getTarget().Options; // fold vector ops - if (VT.isVector()) { - SDValue FoldedVOp = SimplifyVBinOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; - } + if (VT.isVector()) + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; // fold (fsub c1, c2) -> c1-c2 if (N0CFP && N1CFP) - return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0, N1); + return DAG.getNode(ISD::FSUB, dl, VT, N0, N1); // fold (fsub A, (fneg B)) -> (fadd A, B) if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) @@ -7049,7 +8003,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { // (fsub x, x) -> 0.0 if (N0 == N1) - return DAG.getConstantFP(0.0f, VT); + return DAG.getConstantFP(0.0f, dl, VT); // (fsub x, (fadd x, y)) -> (fneg y) // (fsub x, (fadd y, x)) -> (fneg y) @@ -7066,138 +8020,10 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { } // FSUB -> FMA combines: - if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) && - TLI.isFMAFasterThanFMulAndFAdd(VT) && - (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) { - - // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) - if (N0.getOpcode() == ISD::FMUL && - (N0->hasOneUse() || TLI.enableAggressiveFMAFusion(VT))) - return DAG.getNode(ISD::FMA, dl, VT, - N0.getOperand(0), N0.getOperand(1), - DAG.getNode(ISD::FNEG, dl, VT, N1)); - - // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x) - // Note: Commutes FSUB operands. - if (N1.getOpcode() == ISD::FMUL && - (N1->hasOneUse() || TLI.enableAggressiveFMAFusion(VT))) - return DAG.getNode(ISD::FMA, dl, VT, - DAG.getNode(ISD::FNEG, dl, VT, - N1.getOperand(0)), - N1.getOperand(1), N0); - - // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) - if (N0.getOpcode() == ISD::FNEG && - N0.getOperand(0).getOpcode() == ISD::FMUL && - ((N0->hasOneUse() && N0.getOperand(0).hasOneUse()) || - TLI.enableAggressiveFMAFusion(VT))) { - SDValue N00 = N0.getOperand(0).getOperand(0); - SDValue N01 = N0.getOperand(0).getOperand(1); - return DAG.getNode(ISD::FMA, dl, VT, - DAG.getNode(ISD::FNEG, dl, VT, N00), N01, - DAG.getNode(ISD::FNEG, dl, VT, N1)); - } - - // When FP_EXTEND nodes are free on the target, and there is an opportunity - // to combine into FMA, arrange such nodes accordingly. - if (TLI.isFPExtFree(VT)) { - - // fold (fsub (fpext (fmul x, y)), z) - // -> (fma (fpext x), (fpext y), (fneg z)) - if (N0.getOpcode() == ISD::FP_EXTEND) { - SDValue N00 = N0.getOperand(0); - if (N00.getOpcode() == ISD::FMUL) - return DAG.getNode(ISD::FMA, SDLoc(N), VT, - DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, - N00.getOperand(0)), - DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, - N00.getOperand(1)), - DAG.getNode(ISD::FNEG, SDLoc(N), VT, N1)); - } - - // fold (fsub x, (fpext (fmul y, z))) - // -> (fma (fneg (fpext y)), (fpext z), x) - // Note: Commutes FSUB operands. - if (N1.getOpcode() == ISD::FP_EXTEND) { - SDValue N10 = N1.getOperand(0); - if (N10.getOpcode() == ISD::FMUL) - return DAG.getNode(ISD::FMA, SDLoc(N), VT, - DAG.getNode(ISD::FNEG, SDLoc(N), VT, - DAG.getNode(ISD::FP_EXTEND, SDLoc(N), - VT, N10.getOperand(0))), - DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, - N10.getOperand(1)), - N0); - } - - // fold (fsub (fpext (fneg (fmul, x, y))), z) - // -> (fma (fneg (fpext x)), (fpext y), (fneg z)) - if (N0.getOpcode() == ISD::FP_EXTEND) { - SDValue N00 = N0.getOperand(0); - if (N00.getOpcode() == ISD::FNEG) { - SDValue N000 = N00.getOperand(0); - if (N000.getOpcode() == ISD::FMUL) { - return DAG.getNode(ISD::FMA, dl, VT, - DAG.getNode(ISD::FNEG, dl, VT, - DAG.getNode(ISD::FP_EXTEND, SDLoc(N), - VT, N000.getOperand(0))), - DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, - N000.getOperand(1)), - DAG.getNode(ISD::FNEG, dl, VT, N1)); - } - } - } - - // fold (fsub (fneg (fpext (fmul, x, y))), z) - // -> (fma (fneg (fpext x)), (fpext y), (fneg z)) - if (N0.getOpcode() == ISD::FNEG) { - SDValue N00 = N0.getOperand(0); - if (N00.getOpcode() == ISD::FP_EXTEND) { - SDValue N000 = N00.getOperand(0); - if (N000.getOpcode() == ISD::FMUL) { - return DAG.getNode(ISD::FMA, dl, VT, - DAG.getNode(ISD::FNEG, dl, VT, - DAG.getNode(ISD::FP_EXTEND, SDLoc(N), - VT, N000.getOperand(0))), - DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, - N000.getOperand(1)), - DAG.getNode(ISD::FNEG, dl, VT, N1)); - } - } - } - } - - // More folding opportunities when target permits. - if (TLI.enableAggressiveFMAFusion(VT)) { - - // fold (fsub (fma x, y, (fmul u, v)), z) - // -> (fma x, y (fma u, v, (fneg z))) - if (N0.getOpcode() == ISD::FMA && - N0.getOperand(2).getOpcode() == ISD::FMUL) - return DAG.getNode(ISD::FMA, SDLoc(N), VT, - N0.getOperand(0), N0.getOperand(1), - DAG.getNode(ISD::FMA, SDLoc(N), VT, - N0.getOperand(2).getOperand(0), - N0.getOperand(2).getOperand(1), - DAG.getNode(ISD::FNEG, SDLoc(N), VT, - N1))); - - // fold (fsub x, (fma y, z, (fmul u, v))) - // -> (fma (fneg y), z, (fma (fneg u), v, x)) - if (N1.getOpcode() == ISD::FMA && - N1.getOperand(2).getOpcode() == ISD::FMUL) { - SDValue N20 = N1.getOperand(2).getOperand(0); - SDValue N21 = N1.getOperand(2).getOperand(1); - return DAG.getNode(ISD::FMA, SDLoc(N), VT, - DAG.getNode(ISD::FNEG, SDLoc(N), VT, - N1.getOperand(0)), - N1.getOperand(1), - DAG.getNode(ISD::FMA, SDLoc(N), VT, - DAG.getNode(ISD::FNEG, SDLoc(N), VT, - N20), - N21, N0)); - } - } + SDValue Fused = visitFSUBForFMACombine(N); + if (Fused) { + AddToWorklist(Fused.getNode()); + return Fused; } return SDValue(); @@ -7209,29 +8035,24 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0); ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1); EVT VT = N->getValueType(0); + SDLoc DL(N); const TargetOptions &Options = DAG.getTarget().Options; // fold vector ops if (VT.isVector()) { // This just handles C1 * C2 for vectors. Other vector folds are below. - SDValue FoldedVOp = SimplifyVBinOp(N); - if (FoldedVOp.getNode()) + if (SDValue FoldedVOp = SimplifyVBinOp(N)) return FoldedVOp; - // Canonicalize vector constant to RHS. - if (N0.getOpcode() == ISD::BUILD_VECTOR && - N1.getOpcode() != ISD::BUILD_VECTOR) - if (auto *BV0 = dyn_cast<BuildVectorSDNode>(N0)) - if (BV0->isConstant()) - return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0); } // fold (fmul c1, c2) -> c1*c2 if (N0CFP && N1CFP) - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, N1); + return DAG.getNode(ISD::FMUL, DL, VT, N0, N1); // canonicalize constant to RHS - if (N0CFP && !N1CFP) - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, N0); + if (isConstantFPBuildVectorOrConstantFP(N0) && + !isConstantFPBuildVectorOrConstantFP(N1)) + return DAG.getNode(ISD::FMUL, DL, VT, N1, N0); // fold (fmul A, 1.0) -> A if (N1CFP && N1CFP->isExactlyValue(1.0)) @@ -7247,14 +8068,22 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { // Fold scalars or any vector constants (not just splats). // This fold is done in general by InstCombine, but extra fmul insts // may have been generated during lowering. + SDValue N00 = N0.getOperand(0); SDValue N01 = N0.getOperand(1); auto *BV1 = dyn_cast<BuildVectorSDNode>(N1); + auto *BV00 = dyn_cast<BuildVectorSDNode>(N00); auto *BV01 = dyn_cast<BuildVectorSDNode>(N01); - if ((N1CFP && isConstOrConstSplatFP(N01)) || - (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) { - SDLoc SL(N); - SDValue MulConsts = DAG.getNode(ISD::FMUL, SL, VT, N01, N1); - return DAG.getNode(ISD::FMUL, SL, VT, N0.getOperand(0), MulConsts); + + // Check 1: Make sure that the first operand of the inner multiply is NOT + // a constant. Otherwise, we may induce infinite looping. + if (!(isConstOrConstSplatFP(N00) || (BV00 && BV00->isConstant()))) { + // Check 2: Make sure that the second operand of the inner multiply and + // the second operand of the outer multiply are constants. + if ((N1CFP && isConstOrConstSplatFP(N01)) || + (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) { + SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1); + return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts); + } } } @@ -7263,21 +8092,20 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { // during an early run of DAGCombiner can prevent folding with fmuls // inserted during lowering. if (N0.getOpcode() == ISD::FADD && N0.getOperand(0) == N0.getOperand(1)) { - SDLoc SL(N); - const SDValue Two = DAG.getConstantFP(2.0, VT); - SDValue MulConsts = DAG.getNode(ISD::FMUL, SL, VT, Two, N1); - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), MulConsts); + const SDValue Two = DAG.getConstantFP(2.0, DL, VT); + SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1); + return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts); } } // fold (fmul X, 2.0) -> (fadd X, X) if (N1CFP && N1CFP->isExactlyValue(+2.0)) - return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N0); + return DAG.getNode(ISD::FADD, DL, VT, N0, N0); // fold (fmul X, -1.0) -> (fneg X) if (N1CFP && N1CFP->isExactlyValue(-1.0)) if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) - return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0); + return DAG.getNode(ISD::FNEG, DL, VT, N0); // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y) if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) { @@ -7285,7 +8113,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { // Both can be negated for free, check to see if at least one is cheaper // negated. if (LHSNeg == 2 || RHSNeg == 2) - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, + return DAG.getNode(ISD::FMUL, DL, VT, GetNegatedExpression(N0, DAG, LegalOperations), GetNegatedExpression(N1, DAG, LegalOperations)); } @@ -7364,14 +8192,14 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { if (Options.UnsafeFPMath && N1CFP && N0 == N2) return DAG.getNode(ISD::FMUL, dl, VT, N0, DAG.getNode(ISD::FADD, dl, VT, - N1, DAG.getConstantFP(1.0, VT))); + N1, DAG.getConstantFP(1.0, dl, VT))); // (fma x, c, (fneg x)) -> (fmul x, (c-1)) if (Options.UnsafeFPMath && N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) return DAG.getNode(ISD::FMUL, dl, VT, N0, DAG.getNode(ISD::FADD, dl, VT, - N1, DAG.getConstantFP(-1.0, VT))); + N1, DAG.getConstantFP(-1.0, dl, VT))); return SDValue(); @@ -7387,10 +8215,9 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { const TargetOptions &Options = DAG.getTarget().Options; // fold vector ops - if (VT.isVector()) { - SDValue FoldedVOp = SimplifyVBinOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; - } + if (VT.isVector()) + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; // fold (fdiv c1, c2) -> c1/c2 if (N0CFP && N1CFP) @@ -7412,8 +8239,8 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) || TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) || TLI.isFPImmLegal(Recip, VT))) - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, - DAG.getConstantFP(Recip, VT)); + return DAG.getNode(ISD::FMUL, DL, VT, N0, + DAG.getConstantFP(Recip, DL, VT)); } // If this FDIV is part of a reciprocal square root, it may be folded @@ -7492,24 +8319,22 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { SmallVector<SDNode *, 4> Users; // Find all FDIV users of the same divisor. - for (SDNode::use_iterator UI = N1.getNode()->use_begin(), - UE = N1.getNode()->use_end(); - UI != UE; ++UI) { - SDNode *User = UI.getUse().getUser(); - if (User->getOpcode() == ISD::FDIV && User->getOperand(1) == N1) - Users.push_back(User); + for (auto *U : N1->uses()) { + if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) + Users.push_back(U); } if (TLI.combineRepeatedFPDivisors(Users.size())) { - SDValue FPOne = DAG.getConstantFP(1.0, VT); // floating point 1.0 - SDValue Reciprocal = DAG.getNode(ISD::FDIV, SDLoc(N), VT, FPOne, N1); + SDValue FPOne = DAG.getConstantFP(1.0, DL, VT); + SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1); // Dividend / Divisor -> Dividend * Reciprocal - for (auto I = Users.begin(), E = Users.end(); I != E; ++I) { - if ((*I)->getOperand(0) != FPOne) { - SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(*I), VT, - (*I)->getOperand(0), Reciprocal); - DAG.ReplaceAllUsesWith(*I, NewNode.getNode()); + for (auto *U : Users) { + SDValue Dividend = U->getOperand(0); + if (Dividend != FPOne) { + SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend, + Reciprocal); + DAG.ReplaceAllUsesWith(U, NewNode.getNode()); } } return SDValue(); @@ -7539,20 +8364,21 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) { // Compute this as X * (1/sqrt(X)) = X * (X ** -0.5) if (SDValue RV = BuildRsqrtEstimate(N->getOperand(0))) { EVT VT = RV.getValueType(); - RV = DAG.getNode(ISD::FMUL, SDLoc(N), VT, N->getOperand(0), RV); + SDLoc DL(N); + RV = DAG.getNode(ISD::FMUL, DL, VT, N->getOperand(0), RV); AddToWorklist(RV.getNode()); // Unfortunately, RV is now NaN if the input was exactly 0. // Select out this case and force the answer to 0. - SDValue Zero = DAG.getConstantFP(0.0, VT); + SDValue Zero = DAG.getConstantFP(0.0, DL, VT); SDValue ZeroCmp = - DAG.getSetCC(SDLoc(N), TLI.getSetCCResultType(*DAG.getContext(), VT), + DAG.getSetCC(DL, TLI.getSetCCResultType(*DAG.getContext(), VT), N->getOperand(0), Zero, ISD::SETEQ); AddToWorklist(ZeroCmp.getNode()); AddToWorklist(RV.getNode()); RV = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, - SDLoc(N), VT, ZeroCmp, Zero, RV); + DL, VT, ZeroCmp, Zero, RV); return RV; } } @@ -7611,12 +8437,11 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { SDValue N0 = N->getOperand(0); - ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); EVT VT = N->getValueType(0); EVT OpVT = N0.getValueType(); // fold (sint_to_fp c1) -> c1fp - if (N0C && + if (isConstantIntBuildVectorOrConstantInt(N0) && // ...but only if the target supports immediate floating-point values (!LegalOperations || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) @@ -7638,11 +8463,12 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { !VT.isVector() && (!LegalOperations || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) { + SDLoc DL(N); SDValue Ops[] = { N0.getOperand(0), N0.getOperand(1), - DAG.getConstantFP(-1.0, VT) , DAG.getConstantFP(0.0, VT), + DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT), N0.getOperand(2) }; - return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops); + return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops); } // fold (sint_to_fp (zext (setcc x, y, cc))) -> @@ -7651,11 +8477,12 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() && (!LegalOperations || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) { + SDLoc DL(N); SDValue Ops[] = { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1), - DAG.getConstantFP(1.0, VT) , DAG.getConstantFP(0.0, VT), + DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT), N0.getOperand(0).getOperand(2) }; - return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops); + return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops); } } @@ -7664,12 +8491,11 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { SDValue N0 = N->getOperand(0); - ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); EVT VT = N->getValueType(0); EVT OpVT = N0.getValueType(); // fold (uint_to_fp c1) -> c1fp - if (N0C && + if (isConstantIntBuildVectorOrConstantInt(N0) && // ...but only if the target supports immediate floating-point values (!LegalOperations || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) @@ -7691,39 +8517,82 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { if (N0.getOpcode() == ISD::SETCC && !VT.isVector() && (!LegalOperations || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) { + SDLoc DL(N); SDValue Ops[] = { N0.getOperand(0), N0.getOperand(1), - DAG.getConstantFP(1.0, VT), DAG.getConstantFP(0.0, VT), + DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT), N0.getOperand(2) }; - return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops); + return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops); } } return SDValue(); } +// Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x +static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP) + return SDValue(); + + SDValue Src = N0.getOperand(0); + EVT SrcVT = Src.getValueType(); + bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP; + bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT; + + // We can safely assume the conversion won't overflow the output range, + // because (for example) (uint8_t)18293.f is undefined behavior. + + // Since we can assume the conversion won't overflow, our decision as to + // whether the input will fit in the float should depend on the minimum + // of the input range and output range. + + // This means this is also safe for a signed input and unsigned output, since + // a negative input would lead to undefined behavior. + unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned; + unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned; + unsigned ActualSize = std::min(InputSize, OutputSize); + const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType()); + + // We can only fold away the float conversion if the input range can be + // represented exactly in the float range. + if (APFloat::semanticsPrecision(sem) >= ActualSize) { + if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) { + unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND + : ISD::ZERO_EXTEND; + return DAG.getNode(ExtOp, SDLoc(N), VT, Src); + } + if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits()) + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src); + if (SrcVT == VT) + return Src; + return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Src); + } + return SDValue(); +} + SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) { SDValue N0 = N->getOperand(0); - ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); EVT VT = N->getValueType(0); // fold (fp_to_sint c1fp) -> c1 - if (N0CFP) + if (isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0); - return SDValue(); + return FoldIntToFPToInt(N, DAG); } SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) { SDValue N0 = N->getOperand(0); - ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); EVT VT = N->getValueType(0); // fold (fp_to_uint c1fp) -> c1 - if (N0CFP) + if (isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0); - return SDValue(); + return FoldIntToFPToInt(N, DAG); } SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { @@ -7742,11 +8611,18 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { // fold (fp_round (fp_round x)) -> (fp_round x) if (N0.getOpcode() == ISD::FP_ROUND) { - // This is a value preserving truncation if both round's are. - bool IsTrunc = N->getConstantOperandVal(1) == 1 && - N0.getNode()->getConstantOperandVal(1) == 1; - return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0.getOperand(0), - DAG.getIntPtrConstant(IsTrunc)); + const bool NIsTrunc = N->getConstantOperandVal(1) == 1; + const bool N0IsTrunc = N0.getNode()->getConstantOperandVal(1) == 1; + // If the first fp_round isn't a value preserving truncation, it might + // introduce a tie in the second fp_round, that wouldn't occur in the + // single-step fp_round we want to fold to. + // In other words, double rounding isn't the same as rounding. + // Also, this is a value preserving truncation iff both fp_round's are. + if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) { + SDLoc DL(N); + return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0), + DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL)); + } } // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y) @@ -7769,8 +8645,9 @@ SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) { // fold (fp_round_inreg c1fp) -> c1fp if (N0CFP && isTypeLegal(EVT)) { - SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), EVT); - return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, Round); + SDLoc DL(N); + SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT); + return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round); } return SDValue(); @@ -7778,7 +8655,6 @@ SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) { SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); - ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); EVT VT = N->getValueType(0); // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded. @@ -7787,9 +8663,14 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { return SDValue(); // fold (fp_extend c1fp) -> c1fp - if (N0CFP) + if (isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0); + // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op) + if (N0.getOpcode() == ISD::FP16_TO_FP && + TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal) + return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0)); + // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the // value of X. if (N0.getOpcode() == ISD::FP_ROUND @@ -7813,7 +8694,8 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { CombineTo(N, ExtLoad); CombineTo(N0.getNode(), DAG.getNode(ISD::FP_ROUND, SDLoc(N0), - N0.getValueType(), ExtLoad, DAG.getIntPtrConstant(1)), + N0.getValueType(), ExtLoad, + DAG.getIntPtrConstant(1, SDLoc(N0))), ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -7823,11 +8705,10 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { SDValue DAGCombiner::visitFCEIL(SDNode *N) { SDValue N0 = N->getOperand(0); - ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); EVT VT = N->getValueType(0); // fold (fceil c1) -> fceil(c1) - if (N0CFP) + if (isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0); return SDValue(); @@ -7835,11 +8716,10 @@ SDValue DAGCombiner::visitFCEIL(SDNode *N) { SDValue DAGCombiner::visitFTRUNC(SDNode *N) { SDValue N0 = N->getOperand(0); - ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); EVT VT = N->getValueType(0); // fold (ftrunc c1) -> ftrunc(c1) - if (N0CFP) + if (isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0); return SDValue(); @@ -7847,11 +8727,10 @@ SDValue DAGCombiner::visitFTRUNC(SDNode *N) { SDValue DAGCombiner::visitFFLOOR(SDNode *N) { SDValue N0 = N->getOperand(0); - ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); EVT VT = N->getValueType(0); // fold (ffloor c1) -> ffloor(c1) - if (N0CFP) + if (isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0); return SDValue(); @@ -7862,14 +8741,9 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); - if (VT.isVector()) { - SDValue FoldedVOp = SimplifyVUnaryOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; - } - // Constant fold FNEG. - if (isa<ConstantFPSDNode>(N0)) - return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N->getOperand(0)); + if (isConstantFPBuildVectorOrConstantFP(N0)) + return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0); if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(), &DAG.getTarget().Options)) @@ -7893,8 +8767,9 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { // For a scalar, just generate 0x80... SignMask = APInt::getSignBit(IntVT.getSizeInBits()); } - Int = DAG.getNode(ISD::XOR, SDLoc(N0), IntVT, Int, - DAG.getConstant(SignMask, IntVT)); + SDLoc DL0(N0); + Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int, + DAG.getConstant(SignMask, DL0, IntVT)); AddToWorklist(Int.getNode()); return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Int); } @@ -7927,7 +8802,7 @@ SDValue DAGCombiner::visitFMINNUM(SDNode *N) { if (N0CFP && N1CFP) { const APFloat &C0 = N0CFP->getValueAPF(); const APFloat &C1 = N1CFP->getValueAPF(); - return DAG.getConstantFP(minnum(C0, C1), N->getValueType(0)); + return DAG.getConstantFP(minnum(C0, C1), SDLoc(N), N->getValueType(0)); } if (N0CFP) { @@ -7948,7 +8823,7 @@ SDValue DAGCombiner::visitFMAXNUM(SDNode *N) { if (N0CFP && N1CFP) { const APFloat &C0 = N0CFP->getValueAPF(); const APFloat &C1 = N1CFP->getValueAPF(); - return DAG.getConstantFP(maxnum(C0, C1), N->getValueType(0)); + return DAG.getConstantFP(maxnum(C0, C1), SDLoc(N), N->getValueType(0)); } if (N0CFP) { @@ -7964,13 +8839,8 @@ SDValue DAGCombiner::visitFABS(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); - if (VT.isVector()) { - SDValue FoldedVOp = SimplifyVUnaryOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; - } - // fold (fabs c1) -> fabs(c1) - if (isa<ConstantFPSDNode>(N0)) + if (isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); // fold (fabs (fabs x)) -> (fabs x) @@ -8000,8 +8870,9 @@ SDValue DAGCombiner::visitFABS(SDNode *N) { // For a scalar, just generate 0x7f... SignMask = ~APInt::getSignBit(IntVT.getSizeInBits()); } - Int = DAG.getNode(ISD::AND, SDLoc(N0), IntVT, Int, - DAG.getConstant(SignMask, IntVT)); + SDLoc DL(N0); + Int = DAG.getNode(ISD::AND, DL, IntVT, Int, + DAG.getConstant(SignMask, DL, IntVT)); AddToWorklist(Int.getNode()); return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Int); } @@ -8071,13 +8942,14 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { if (AndConst.isPowerOf2() && cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) { + SDLoc DL(N); SDValue SetCC = - DAG.getSetCC(SDLoc(N), + DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()), - Op0, DAG.getConstant(0, Op0.getValueType()), + Op0, DAG.getConstant(0, DL, Op0.getValueType()), ISD::SETNE); - SDValue NewBRCond = DAG.getNode(ISD::BRCOND, SDLoc(N), + SDValue NewBRCond = DAG.getNode(ISD::BRCOND, DL, MVT::Other, Chain, SetCC, N2); // Don't add the new BRCond into the worklist or else SimplifySelectCC // will convert it back to (X & C1) >> C2. @@ -8130,12 +9002,11 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) { bool Equal = false; - if (ConstantSDNode *RHSCI = dyn_cast<ConstantSDNode>(Op0)) - if (RHSCI->getAPIntValue() == 1 && Op0.hasOneUse() && - Op0.getOpcode() == ISD::XOR) { - TheXor = Op0.getNode(); - Equal = true; - } + if (isOneConstant(Op0) && Op0.hasOneUse() && + Op0.getOpcode() == ISD::XOR) { + TheXor = Op0.getNode(); + Equal = true; + } EVT SetCCVT = N1.getValueType(); if (LegalTypes) @@ -8193,11 +9064,11 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) { if (LD->isIndexed() || LD->getBasePtr().getNode() != N) return false; - VT = Use->getValueType(0); + VT = LD->getMemoryVT(); } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) { if (ST->isIndexed() || ST->getBasePtr().getNode() != N) return false; - VT = ST->getValue().getValueType(); + VT = ST->getMemoryVT(); } else return false; @@ -8280,8 +9151,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { } // Don't create a indexed load / store with zero offset. - if (isa<ConstantSDNode>(Offset) && - cast<ConstantSDNode>(Offset)->isNullValue()) + if (isNullConstant(Offset)) return false; // Try turning it into a pre-indexed load / store except when: @@ -8309,24 +9179,25 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { // a copy of the original base pointer. SmallVector<SDNode *, 16> OtherUses; if (isa<ConstantSDNode>(Offset)) - for (SDNode *Use : BasePtr.getNode()->uses()) { - if (Use == Ptr.getNode()) + for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(), + UE = BasePtr.getNode()->use_end(); + UI != UE; ++UI) { + SDUse &Use = UI.getUse(); + // Skip the use that is Ptr and uses of other results from BasePtr's + // node (important for nodes that return multiple results). + if (Use.getUser() == Ptr.getNode() || Use != BasePtr) continue; - if (Use->isPredecessorOf(N)) + if (Use.getUser()->isPredecessorOf(N)) continue; - if (Use->getOpcode() != ISD::ADD && Use->getOpcode() != ISD::SUB) { + if (Use.getUser()->getOpcode() != ISD::ADD && + Use.getUser()->getOpcode() != ISD::SUB) { OtherUses.clear(); break; } - SDValue Op0 = Use->getOperand(0), Op1 = Use->getOperand(1); - if (Op1.getNode() == BasePtr.getNode()) - std::swap(Op0, Op1); - assert(Op0.getNode() == BasePtr.getNode() && - "Use of ADD/SUB but not an operand"); - + SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1); if (!isa<ConstantSDNode>(Op1)) { OtherUses.clear(); break; @@ -8338,7 +9209,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { break; } - OtherUses.push_back(Use); + OtherUses.push_back(Use.getUser()); } if (Swapped) @@ -8431,12 +9302,14 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1; else CNV = CNV - Offset1; + SDLoc DL(OtherUses[i]); + // We can now generate the new expression. - SDValue NewOp1 = DAG.getConstant(CNV, CN->getValueType(0)); + SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0)); SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0); SDValue NewUse = DAG.getNode(Opcode, - SDLoc(OtherUses[i]), + DL, OtherUses[i]->getValueType(0), NewOp1, NewOp2); DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse); deleteAndRecombine(OtherUses[i]); @@ -8494,8 +9367,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { ISD::MemIndexedMode AM = ISD::UNINDEXED; if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) { // Don't create a indexed load / store with zero offset. - if (isa<ConstantSDNode>(Offset) && - cast<ConstantSDNode>(Offset)->isNullValue()) + if (isNullConstant(Offset)) continue; // Try turning it into a post-indexed load / store except when @@ -8585,7 +9457,7 @@ SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) { "Cannot split out indexing using opaque target constants"); if (Inc.getOpcode() == ISD::TargetConstant) { ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc); - Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), + Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc), ConstInc->getValueType(0)); } @@ -8686,7 +9558,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { LD->getMemoryVT(), LD->isVolatile(), LD->isNonTemporal(), LD->isInvariant(), Align, LD->getAAInfo()); - return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true); + if (NewLoad.getNode() != N) + return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true); } } } @@ -8851,9 +9724,6 @@ struct LoadedSlice { unsigned Shift = 0, SelectionDAG *DAG = nullptr) : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {} - LoadedSlice(const LoadedSlice &LS) - : Inst(LS.Inst), Origin(LS.Origin), Shift(LS.Shift), DAG(LS.DAG) {} - /// \brief Get the bits used in a chunk of bits \p BitWidth large. /// \return Result is \p BitWidth and has used bits set to 1 and /// not used bits set to 0. @@ -8980,8 +9850,9 @@ struct LoadedSlice { if (Offset) { // BaseAddr = BaseAddr + Offset. EVT ArithType = BaseAddr.getValueType(); - BaseAddr = DAG->getNode(ISD::ADD, SDLoc(Origin), ArithType, BaseAddr, - DAG->getConstant(Offset, ArithType)); + SDLoc DL(Origin); + BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr, + DAG->getConstant(Offset, DL, ArithType)); } // Create the type of the loaded slice according to its size. @@ -9336,7 +10207,7 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { if (NotMaskLZ == 64) return Result; // All zero mask. // See if we have a continuous run of bits. If so, we have 0*1+0* - if (CountTrailingOnes_64(NotMask >> NotMaskTZ)+NotMaskTZ+NotMaskLZ != 64) + if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64) return Result; // Adjust NotMaskLZ down to be from the actual size of the int instead of i64. @@ -9387,10 +10258,12 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, // Okay, we can do this! Replace the 'St' store with a store of IVal that is // shifted by ByteShift and truncated down to NumBytes. - if (ByteShift) - IVal = DAG.getNode(ISD::SRL, SDLoc(IVal), IVal.getValueType(), IVal, - DAG.getConstant(ByteShift*8, + if (ByteShift) { + SDLoc DL(IVal); + IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal, + DAG.getConstant(ByteShift*8, DL, DC->getShiftAmountTy(IVal.getValueType()))); + } // Figure out the offset for the store and the alignment of the access. unsigned StOffset; @@ -9403,8 +10276,9 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, SDValue Ptr = St->getBasePtr(); if (StOffset) { - Ptr = DAG.getNode(ISD::ADD, SDLoc(IVal), Ptr.getValueType(), - Ptr, DAG.getConstant(StOffset, Ptr.getValueType())); + SDLoc DL(IVal); + Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), + Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType())); NewAlign = MinAlign(NewAlign, StOffset); } @@ -9486,8 +10360,9 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { // The narrowing should be profitable, the load/store operation should be // legal (or custom) and the store size should be equal to the NewVT width. while (NewBW < BitWidth && - !(TLI.isOperationLegalOrCustom(Opc, NewVT) && - TLI.isNarrowingProfitable(VT, NewVT))) { + (NewVT.getStoreSizeInBits() != NewBW || + !TLI.isOperationLegalOrCustom(Opc, NewVT) || + !TLI.isNarrowingProfitable(VT, NewVT))) { NewBW = NextPowerOf2(NewBW); NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW); } @@ -9517,7 +10392,8 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD), Ptr.getValueType(), Ptr, - DAG.getConstant(PtrOff, Ptr.getValueType())); + DAG.getConstant(PtrOff, SDLoc(LD), + Ptr.getValueType())); SDValue NewLD = DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr, LD->getPointerInfo().getWithOffset(PtrOff), @@ -9525,7 +10401,8 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { LD->isInvariant(), NewAlign, LD->getAAInfo()); SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD, - DAG.getConstant(NewImm, NewVT)); + DAG.getConstant(NewImm, SDLoc(Value), + NewVT)); SDValue NewST = DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr, ST->getPointerInfo().getWithOffset(PtrOff), @@ -9599,6 +10476,7 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { return SDValue(); } +namespace { /// Helper struct to parse and store a memory address as base + index + offset. /// We ignore sign extensions when it is safe to do so. /// The following two expressions are not equivalent. To differentiate we need @@ -9686,37 +10564,156 @@ struct BaseIndexOffset { return BaseIndexOffset(Base, Index, Off, IsIndexSignExt); } }; +} // namespace -/// Holds a pointer to an LSBaseSDNode as well as information on where it -/// is located in a sequence of memory operations connected by a chain. -struct MemOpLink { - MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq): - MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { } - // Ptr to the mem node. - LSBaseSDNode *MemNode; - // Offset from the base ptr. - int64_t OffsetFromBase; - // What is the sequence number of this mem node. - // Lowest mem operand in the DAG starts at zero. - unsigned SequenceNum; -}; +bool DAGCombiner::MergeStoresOfConstantsOrVecElts( + SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, + unsigned NumElem, bool IsConstantSrc, bool UseVector) { + // Make sure we have something to merge. + if (NumElem < 2) + return false; + + int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8; + LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; + unsigned LatestNodeUsed = 0; + + for (unsigned i=0; i < NumElem; ++i) { + // Find a chain for the new wide-store operand. Notice that some + // of the store nodes that we found may not be selected for inclusion + // in the wide store. The chain we use needs to be the chain of the + // latest store node which is *used* and replaced by the wide store. + if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum) + LatestNodeUsed = i; + } + + // The latest Node in the DAG. + LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode; + SDLoc DL(StoreNodes[0].MemNode); + + SDValue StoredVal; + if (UseVector) { + // Find a legal type for the vector store. + EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem); + assert(TLI.isTypeLegal(Ty) && "Illegal vector store"); + if (IsConstantSrc) { + // A vector store with a constant source implies that the constant is + // zero; we only handle merging stores of constant zeros because the zero + // can be materialized without a load. + // It may be beneficial to loosen this restriction to allow non-zero + // store merging. + StoredVal = DAG.getConstant(0, DL, Ty); + } else { + SmallVector<SDValue, 8> Ops; + for (unsigned i = 0; i < NumElem ; ++i) { + StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); + SDValue Val = St->getValue(); + // All of the operands of a BUILD_VECTOR must have the same type. + if (Val.getValueType() != MemVT) + return false; + Ops.push_back(Val); + } + + // Build the extracted vector elements back into a vector. + StoredVal = DAG.getNode(ISD::BUILD_VECTOR, DL, Ty, Ops); + } + } else { + // We should always use a vector store when merging extracted vector + // elements, so this path implies a store of constants. + assert(IsConstantSrc && "Merged vector elements should use vector store"); + + unsigned StoreBW = NumElem * ElementSizeBytes * 8; + APInt StoreInt(StoreBW, 0); + + // Construct a single integer constant which is made of the smaller + // constant inputs. + bool IsLE = TLI.isLittleEndian(); + for (unsigned i = 0; i < NumElem ; ++i) { + unsigned Idx = IsLE ? (NumElem - 1 - i) : i; + StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode); + SDValue Val = St->getValue(); + StoreInt <<= ElementSizeBytes*8; + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) { + StoreInt |= C->getAPIntValue().zext(StoreBW); + } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) { + StoreInt |= C->getValueAPF().bitcastToAPInt().zext(StoreBW); + } else { + llvm_unreachable("Invalid constant element type"); + } + } + + // Create the new Load and Store operations. + EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW); + StoredVal = DAG.getConstant(StoreInt, DL, StoreTy); + } + + SDValue NewStore = DAG.getStore(LatestOp->getChain(), DL, StoredVal, + FirstInChain->getBasePtr(), + FirstInChain->getPointerInfo(), + false, false, + FirstInChain->getAlignment()); + + // Replace the last store with the new store + CombineTo(LatestOp, NewStore); + // Erase all other stores. + for (unsigned i = 0; i < NumElem ; ++i) { + if (StoreNodes[i].MemNode == LatestOp) + continue; + StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); + // ReplaceAllUsesWith will replace all uses that existed when it was + // called, but graph optimizations may cause new ones to appear. For + // example, the case in pr14333 looks like + // + // St's chain -> St -> another store -> X + // + // And the only difference from St to the other store is the chain. + // When we change it's chain to be St's chain they become identical, + // get CSEed and the net result is that X is now a use of St. + // Since we know that St is redundant, just iterate. + while (!St->use_empty()) + DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain()); + deleteAndRecombine(St); + } + + return true; +} + +static bool allowableAlignment(const SelectionDAG &DAG, + const TargetLowering &TLI, EVT EVTTy, + unsigned AS, unsigned Align) { + if (TLI.allowsMisalignedMemoryAccesses(EVTTy, AS, Align)) + return true; + + Type *Ty = EVTTy.getTypeForEVT(*DAG.getContext()); + unsigned ABIAlignment = TLI.getDataLayout()->getPrefTypeAlignment(Ty); + return (Align >= ABIAlignment); +} bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { + if (OptLevel == CodeGenOpt::None) + return false; + EVT MemVT = St->getMemoryVT(); int64_t ElementSizeBytes = MemVT.getSizeInBits()/8; - bool NoVectors = DAG.getMachineFunction().getFunction()->getAttributes(). - hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat); + bool NoVectors = DAG.getMachineFunction().getFunction()->hasFnAttribute( + Attribute::NoImplicitFloat); + + // This function cannot currently deal with non-byte-sized memory sizes. + if (ElementSizeBytes * 8 != MemVT.getSizeInBits()) + return false; // Don't merge vectors into wider inputs. if (MemVT.isVector() || !MemVT.isSimple()) return false; // Perform an early exit check. Do not bother looking at stored values that - // are not constants or loads. + // are not constants, loads, or extracted vector elements. SDValue StoredVal = St->getValue(); bool IsLoadSrc = isa<LoadSDNode>(StoredVal); - if (!isa<ConstantSDNode>(StoredVal) && !isa<ConstantFPSDNode>(StoredVal) && - !IsLoadSrc) + bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) || + isa<ConstantFPSDNode>(StoredVal); + bool IsExtractVecEltSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT); + + if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecEltSrc) return false; // Only look at ends of store sequences. @@ -9761,10 +10758,6 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { if (!Ptr.equalBaseIndex(BasePtr)) break; - // Check that the alignment is the same. - if (Index->getAlignment() != St->getAlignment()) - break; - // The memory operands must not be volatile. if (Index->isVolatile() || Index->isIndexed()) break; @@ -9778,11 +10771,6 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { if (Index->getMemoryVT() != MemVT) break; - // We do not allow unaligned stores because we want to prevent overriding - // stores. - if (Index->getAlignment()*8 != MemVT.getSizeInBits()) - break; - // We found a potential memory operand to merge. StoreNodes.push_back(MemOpLink(Index, Ptr.Offset, Seq++)); @@ -9856,9 +10844,11 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { // The node with the lowest store address. LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; + unsigned FirstStoreAS = FirstInChain->getAddressSpace(); + unsigned FirstStoreAlign = FirstInChain->getAlignment(); // Store the constants into memory as one consecutive store. - if (!IsLoadSrc) { + if (IsConstantSrc) { unsigned LastLegalType = 0; unsigned LastLegalVectorType = 0; bool NonZero = false; @@ -9878,27 +10868,41 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { // Find a legal type for the constant store. unsigned StoreBW = (i+1) * ElementSizeBytes * 8; EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW); - if (TLI.isTypeLegal(StoreTy)) + if (TLI.isTypeLegal(StoreTy) && + allowableAlignment(DAG, TLI, StoreTy, FirstStoreAS, + FirstStoreAlign)) { LastLegalType = i+1; // Or check whether a truncstore is legal. - else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) == - TargetLowering::TypePromoteInteger) { + } else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) == + TargetLowering::TypePromoteInteger) { EVT LegalizedStoredValueTy = TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType()); - if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy)) - LastLegalType = i+1; + if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && + allowableAlignment(DAG, TLI, LegalizedStoredValueTy, FirstStoreAS, + FirstStoreAlign)) { + LastLegalType = i + 1; + } } // Find a legal type for the vector store. EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1); - if (TLI.isTypeLegal(Ty)) + if (TLI.isTypeLegal(Ty) && + allowableAlignment(DAG, TLI, Ty, FirstStoreAS, FirstStoreAlign)) { LastLegalVectorType = i + 1; + } } - // We only use vectors if the constant is known to be zero and the - // function is not marked with the noimplicitfloat attribute. - if (NonZero || NoVectors) + + // We only use vectors if the constant is known to be zero or the target + // allows it and the function is not marked with the noimplicitfloat + // attribute. + if (NoVectors) { LastLegalVectorType = 0; + } else if (NonZero && !TLI.storeOfVectorConstantIsCheap(MemVT, + LastLegalVectorType, + FirstStoreAS)) { + LastLegalVectorType = 0; + } // Check if we found a legal integer type to store. if (LastLegalType == 0 && LastLegalVectorType == 0) @@ -9907,85 +10911,34 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors; unsigned NumElem = UseVector ? LastLegalVectorType : LastLegalType; - // Make sure we have something to merge. - if (NumElem < 2) - return false; - - unsigned EarliestNodeUsed = 0; - for (unsigned i=0; i < NumElem; ++i) { - // Find a chain for the new wide-store operand. Notice that some - // of the store nodes that we found may not be selected for inclusion - // in the wide store. The chain we use needs to be the chain of the - // earliest store node which is *used* and replaced by the wide store. - if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum) - EarliestNodeUsed = i; - } + return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem, + true, UseVector); + } - // The earliest Node in the DAG. - LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode; - SDLoc DL(StoreNodes[0].MemNode); + // When extracting multiple vector elements, try to store them + // in one vector store rather than a sequence of scalar stores. + if (IsExtractVecEltSrc) { + unsigned NumElem = 0; + for (unsigned i = 0; i < LastConsecutiveStore + 1; ++i) { + StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); + SDValue StoredVal = St->getValue(); + // This restriction could be loosened. + // Bail out if any stored values are not elements extracted from a vector. + // It should be possible to handle mixed sources, but load sources need + // more careful handling (see the block of code below that handles + // consecutive loads). + if (StoredVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT) + return false; - SDValue StoredVal; - if (UseVector) { // Find a legal type for the vector store. - EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem); - assert(TLI.isTypeLegal(Ty) && "Illegal vector store"); - StoredVal = DAG.getConstant(0, Ty); - } else { - unsigned StoreBW = NumElem * ElementSizeBytes * 8; - APInt StoreInt(StoreBW, 0); - - // Construct a single integer constant which is made of the smaller - // constant inputs. - bool IsLE = TLI.isLittleEndian(); - for (unsigned i = 0; i < NumElem ; ++i) { - unsigned Idx = IsLE ?(NumElem - 1 - i) : i; - StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode); - SDValue Val = St->getValue(); - StoreInt<<=ElementSizeBytes*8; - if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) { - StoreInt|=C->getAPIntValue().zext(StoreBW); - } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) { - StoreInt|= C->getValueAPF().bitcastToAPInt().zext(StoreBW); - } else { - llvm_unreachable("Invalid constant element type"); - } - } - - // Create the new Load and Store operations. - EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW); - StoredVal = DAG.getConstant(StoreInt, StoreTy); - } - - SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL, StoredVal, - FirstInChain->getBasePtr(), - FirstInChain->getPointerInfo(), - false, false, - FirstInChain->getAlignment()); - - // Replace the first store with the new store - CombineTo(EarliestOp, NewStore); - // Erase all other stores. - for (unsigned i = 0; i < NumElem ; ++i) { - if (StoreNodes[i].MemNode == EarliestOp) - continue; - StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); - // ReplaceAllUsesWith will replace all uses that existed when it was - // called, but graph optimizations may cause new ones to appear. For - // example, the case in pr14333 looks like - // - // St's chain -> St -> another store -> X - // - // And the only difference from St to the other store is the chain. - // When we change it's chain to be St's chain they become identical, - // get CSEed and the net result is that X is now a use of St. - // Since we know that St is redundant, just iterate. - while (!St->use_empty()) - DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain()); - deleteAndRecombine(St); + EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1); + if (TLI.isTypeLegal(Ty) && + allowableAlignment(DAG, TLI, Ty, FirstStoreAS, FirstStoreAlign)) + NumElem = i + 1; } - return true; + return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem, + false, true); } // Below we handle the case of multiple consecutive stores that @@ -10007,10 +10960,6 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { if (!Ld->hasNUsesOfValue(1, 0)) break; - // Check that the alignment is the same as the stores. - if (Ld->getAlignment() != St->getAlignment()) - break; - // The memory operands must not be volatile. if (Ld->isVolatile() || Ld->isIndexed()) break; @@ -10048,6 +10997,10 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { St->getAlignment() >= RequiredAlignment) return false; + LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode); + unsigned FirstLoadAS = FirstLoad->getAddressSpace(); + unsigned FirstLoadAlign = FirstLoad->getAlignment(); + // Scan the memory operations on the chain and find the first non-consecutive // load memory address. These variables hold the index in the store node // array. @@ -10056,7 +11009,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { unsigned LastLegalVectorType = 0; unsigned LastLegalIntegerType = 0; StartAddress = LoadNodes[0].OffsetFromBase; - SDValue FirstChain = LoadNodes[0].MemNode->getChain(); + SDValue FirstChain = FirstLoad->getChain(); for (unsigned i = 1; i < LoadNodes.size(); ++i) { // All loads much share the same chain. if (LoadNodes[i].MemNode->getChain() != FirstChain) @@ -10069,13 +11022,18 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { // Find a legal type for the vector store. EVT StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1); - if (TLI.isTypeLegal(StoreTy)) + if (TLI.isTypeLegal(StoreTy) && + allowableAlignment(DAG, TLI, StoreTy, FirstStoreAS, FirstStoreAlign) && + allowableAlignment(DAG, TLI, StoreTy, FirstLoadAS, FirstLoadAlign)) { LastLegalVectorType = i + 1; + } // Find a legal type for the integer store. unsigned StoreBW = (i+1) * ElementSizeBytes * 8; StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW); - if (TLI.isTypeLegal(StoreTy)) + if (TLI.isTypeLegal(StoreTy) && + allowableAlignment(DAG, TLI, StoreTy, FirstStoreAS, FirstStoreAlign) && + allowableAlignment(DAG, TLI, StoreTy, FirstLoadAS, FirstLoadAlign)) LastLegalIntegerType = i + 1; // Or check whether a truncstore and extload is legal. else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) == @@ -10085,7 +11043,11 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy, StoreTy) && TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy, StoreTy) && - TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy)) + TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) && + allowableAlignment(DAG, TLI, LegalizedStoredValueTy, FirstStoreAS, + FirstStoreAlign) && + allowableAlignment(DAG, TLI, LegalizedStoredValueTy, FirstLoadAS, + FirstLoadAlign)) LastLegalIntegerType = i+1; } } @@ -10103,18 +11065,19 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { if (NumElem < 2) return false; - // The earliest Node in the DAG. - unsigned EarliestNodeUsed = 0; - LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode; + // The latest Node in the DAG. + unsigned LatestNodeUsed = 0; for (unsigned i=1; i<NumElem; ++i) { // Find a chain for the new wide-store operand. Notice that some // of the store nodes that we found may not be selected for inclusion // in the wide store. The chain we use needs to be the chain of the - // earliest store node which is *used* and replaced by the wide store. - if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum) - EarliestNodeUsed = i; + // latest store node which is *used* and replaced by the wide store. + if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum) + LatestNodeUsed = i; } + LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode; + // Find if it is better to use vectors or integers to load and store // to memory. EVT JointMemOpVT; @@ -10128,18 +11091,13 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { SDLoc LoadDL(LoadNodes[0].MemNode); SDLoc StoreDL(StoreNodes[0].MemNode); - LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode); - SDValue NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, - FirstLoad->getChain(), - FirstLoad->getBasePtr(), - FirstLoad->getPointerInfo(), - false, false, false, - FirstLoad->getAlignment()); - - SDValue NewStore = DAG.getStore(EarliestOp->getChain(), StoreDL, NewLoad, - FirstInChain->getBasePtr(), - FirstInChain->getPointerInfo(), false, false, - FirstInChain->getAlignment()); + SDValue NewLoad = DAG.getLoad( + JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(), + FirstLoad->getPointerInfo(), false, false, false, FirstLoadAlign); + + SDValue NewStore = DAG.getStore( + LatestOp->getChain(), StoreDL, NewLoad, FirstInChain->getBasePtr(), + FirstInChain->getPointerInfo(), false, false, FirstStoreAlign); // Replace one of the loads with the new load. LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[0].MemNode); @@ -10154,12 +11112,12 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Ld->getChain()); } - // Replace the first store with the new store. - CombineTo(EarliestOp, NewStore); + // Replace the last store with the new store. + CombineTo(LatestOp, NewStore); // Erase all other stores. for (unsigned i = 0; i < NumElem ; ++i) { // Remove all Store nodes. - if (StoreNodes[i].MemNode == EarliestOp) + if (StoreNodes[i].MemNode == LatestOp) continue; StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain()); @@ -10214,8 +11172,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { case MVT::f32: if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) || TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { + ; Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF(). - bitcastToAPInt().getZExtValue(), MVT::i32); + bitcastToAPInt().getZExtValue(), SDLoc(CFP), + MVT::i32); return DAG.getStore(Chain, SDLoc(N), Tmp, Ptr, ST->getMemOperand()); } @@ -10224,8 +11184,9 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations && !ST->isVolatile()) || TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) { + ; Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). - getZExtValue(), MVT::i64); + getZExtValue(), SDLoc(CFP), MVT::i64); return DAG.getStore(Chain, SDLoc(N), Tmp, Ptr, ST->getMemOperand()); } @@ -10236,8 +11197,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // argument passing. Since this is so common, custom legalize the // 64-bit integer store into two 32-bit stores. uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); - SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, MVT::i32); - SDValue Hi = DAG.getConstant(Val >> 32, MVT::i32); + SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32); + SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32); if (TLI.isBigEndian()) std::swap(Lo, Hi); unsigned Alignment = ST->getAlignment(); @@ -10245,18 +11206,20 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { bool isNonTemporal = ST->isNonTemporal(); AAMDNodes AAInfo = ST->getAAInfo(); + SDLoc DL(N); + SDValue St0 = DAG.getStore(Chain, SDLoc(ST), Lo, Ptr, ST->getPointerInfo(), isVolatile, isNonTemporal, ST->getAlignment(), AAInfo); - Ptr = DAG.getNode(ISD::ADD, SDLoc(N), Ptr.getValueType(), Ptr, - DAG.getConstant(4, Ptr.getValueType())); + Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, + DAG.getConstant(4, DL, Ptr.getValueType())); Alignment = MinAlign(Alignment, 4U); SDValue St1 = DAG.getStore(Chain, SDLoc(ST), Hi, Ptr, ST->getPointerInfo().getWithOffset(4), isVolatile, isNonTemporal, Alignment, AAInfo); - return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, St0, St1); } @@ -10268,11 +11231,15 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // Try to infer better alignment information than the store already has. if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) { if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { - if (Align > ST->getAlignment()) - return DAG.getTruncStore(Chain, SDLoc(N), Value, + if (Align > ST->getAlignment()) { + SDValue NewStore = + DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(), ST->getMemoryVT(), ST->isVolatile(), ST->isNonTemporal(), Align, ST->getAAInfo()); + if (NewStore.getNode() != N) + return CombineTo(ST, NewStore, true); + } } } @@ -10493,24 +11460,20 @@ SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad( SDValue Offset; EVT PtrType = NewPtr.getValueType(); MachinePointerInfo MPI; + SDLoc DL(EVE); if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) { int Elt = ConstEltNo->getZExtValue(); unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8; - if (TLI.isBigEndian()) - PtrOff = InVecVT.getSizeInBits() / 8 - PtrOff; - Offset = DAG.getConstant(PtrOff, PtrType); + Offset = DAG.getConstant(PtrOff, DL, PtrType); MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff); } else { + Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType); Offset = DAG.getNode( - ISD::MUL, SDLoc(EVE), EltNo.getValueType(), EltNo, - DAG.getConstant(VecEltVT.getStoreSize(), EltNo.getValueType())); - if (TLI.isBigEndian()) - Offset = DAG.getNode( - ISD::SUB, SDLoc(EVE), EltNo.getValueType(), - DAG.getConstant(InVecVT.getStoreSize(), EltNo.getValueType()), Offset); + ISD::MUL, DL, PtrType, Offset, + DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType)); MPI = OriginalLoad->getPointerInfo(); } - NewPtr = DAG.getNode(ISD::ADD, SDLoc(EVE), PtrType, NewPtr, Offset); + NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset); // The replacement we need to do here is a little tricky: we need to // replace an extractelement of a load with a load. @@ -10620,8 +11583,8 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { if (!LegalOperations) { EVT IndexTy = TLI.getVectorIdxTy(); - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, - SVInVec, DAG.getConstant(OrigElt, IndexTy)); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, SVInVec, + DAG.getConstant(OrigElt, SDLoc(SVOp), IndexTy)); } } @@ -10710,7 +11673,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { if (ISD::isNormalLoad(InVec.getNode())) { LN0 = cast<LoadSDNode>(InVec); Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems; - EltNo = DAG.getConstant(Elt, EltNo.getValueType()); + EltNo = DAG.getConstant(Elt, SDLoc(EltNo), EltNo.getValueType()); } } @@ -10800,7 +11763,7 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) { unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits(); assert(ElemRatio > 1 && "Invalid element size ratio"); SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType): - DAG.getConstant(0, SourceType); + DAG.getConstant(0, SDLoc(N), SourceType); unsigned NewBVElems = ElemRatio * VT.getVectorNumElements(); SmallVector<SDValue, 8> Ops(NewBVElems, Filler); @@ -10890,6 +11853,11 @@ SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) { if (!TLI.isOperationLegalOrCustom(Opcode, NVT)) return SDValue(); + // Just because the floating-point vector type is legal does not necessarily + // mean that the corresponding integer vector type is. + if (!isTypeLegal(NVT)) + return SDValue(); + SmallVector<SDValue, 8> Opnds; for (unsigned i = 0; i != NumInScalars; ++i) { SDValue In = N->getOperand(i); @@ -10914,12 +11882,10 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { if (ISD::allOperandsUndef(N)) return DAG.getUNDEF(VT); - SDValue V = reduceBuildVecExtToExtBuildVec(N); - if (V.getNode()) + if (SDValue V = reduceBuildVecExtToExtBuildVec(N)) return V; - V = reduceBuildVecConvertToConvertBuildVec(N); - if (V.getNode()) + if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N)) return V; // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT @@ -10942,8 +11908,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { if (Op.getOpcode() == ISD::UNDEF) continue; // See if we can combine this build_vector into a blend with a zero vector. - if (!VecIn2.getNode() && ((Op.getOpcode() == ISD::Constant && - cast<ConstantSDNode>(Op.getNode())->isNullValue()) || + if (!VecIn2.getNode() && (isNullConstant(Op) || (Op.getOpcode() == ISD::ConstantFP && cast<ConstantFPSDNode>(Op.getNode())->getValueAPF().isZero()))) { UsesZeroVector = true; @@ -11047,20 +12012,20 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { if (!TLI.isExtractSubvectorCheap(VT, VT.getVectorNumElements())) return SDValue(); - + // Try to replace VecIn1 with two extract_subvectors // No need to update the masks, they should still be correct. - VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1, - DAG.getConstant(VT.getVectorNumElements(), TLI.getVectorIdxTy())); + VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1, + DAG.getConstant(VT.getVectorNumElements(), dl, TLI.getVectorIdxTy())); VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1, - DAG.getConstant(0, TLI.getVectorIdxTy())); + DAG.getConstant(0, dl, TLI.getVectorIdxTy())); } else return SDValue(); } if (UsesZeroVector) - VecIn2 = VT.isInteger() ? DAG.getConstant(0, VT) : - DAG.getConstantFP(0.0, VT); + VecIn2 = VT.isInteger() ? DAG.getConstant(0, dl, VT) : + DAG.getConstantFP(0.0, dl, VT); else // If VecIn2 is unused then change it to undef. VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT); @@ -11081,6 +12046,68 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { return SDValue(); } +static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT OpVT = N->getOperand(0).getValueType(); + + // If the operands are legal vectors, leave them alone. + if (TLI.isTypeLegal(OpVT)) + return SDValue(); + + SDLoc DL(N); + EVT VT = N->getValueType(0); + SmallVector<SDValue, 8> Ops; + + EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits()); + SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT); + + // Keep track of what we encounter. + bool AnyInteger = false; + bool AnyFP = false; + for (const SDValue &Op : N->ops()) { + if (ISD::BITCAST == Op.getOpcode() && + !Op.getOperand(0).getValueType().isVector()) + Ops.push_back(Op.getOperand(0)); + else if (ISD::UNDEF == Op.getOpcode()) + Ops.push_back(ScalarUndef); + else + return SDValue(); + + // Note whether we encounter an integer or floating point scalar. + // If it's neither, bail out, it could be something weird like x86mmx. + EVT LastOpVT = Ops.back().getValueType(); + if (LastOpVT.isFloatingPoint()) + AnyFP = true; + else if (LastOpVT.isInteger()) + AnyInteger = true; + else + return SDValue(); + } + + // If any of the operands is a floating point scalar bitcast to a vector, + // use floating point types throughout, and bitcast everything. + // Replace UNDEFs by another scalar UNDEF node, of the final desired type. + if (AnyFP) { + SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits()); + ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT); + if (AnyInteger) { + for (SDValue &Op : Ops) { + if (Op.getValueType() == SVT) + continue; + if (Op.getOpcode() == ISD::UNDEF) + Op = ScalarUndef; + else + Op = DAG.getNode(ISD::BITCAST, DL, SVT, Op); + } + } + } + + EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT, + VT.getSizeInBits() / SVT.getSizeInBits()); + return DAG.getNode(ISD::BITCAST, DL, VT, + DAG.getNode(ISD::BUILD_VECTOR, DL, VecVT, Ops)); +} + SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { // TODO: Check to see if this is a CONCAT_VECTORS of a bunch of // EXTRACT_SUBVECTOR operations. If so, and if the EXTRACT_SUBVECTOR vector @@ -11096,9 +12123,10 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { if (ISD::allOperandsUndef(N)) return DAG.getUNDEF(VT); - // Optimize concat_vectors where one of the vectors is undef. - if (N->getNumOperands() == 2 && - N->getOperand(1)->getOpcode() == ISD::UNDEF) { + // Optimize concat_vectors where all but the first of the vectors are undef. + if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) { + return Op.getOpcode() == ISD::UNDEF; + })) { SDValue In = N->getOperand(0); assert(In.getValueType().isVector() && "Must concat vectors"); @@ -11106,6 +12134,15 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { if (In->getOpcode() == ISD::BITCAST && !In->getOperand(0)->getValueType(0).isVector()) { SDValue Scalar = In->getOperand(0); + + // If the bitcast type isn't legal, it might be a trunc of a legal type; + // look through the trunc so we can still do the transform: + // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar) + if (Scalar->getOpcode() == ISD::TRUNCATE && + !TLI.isTypeLegal(Scalar.getValueType()) && + TLI.isTypeLegal(Scalar->getOperand(0).getValueType())) + Scalar = Scalar->getOperand(0); + EVT SclTy = Scalar->getValueType(0); if (!SclTy.isFloatingPoint() && !SclTy.isInteger()) @@ -11122,39 +12159,61 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { } } + // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR. + // We have already tested above for an UNDEF only concatenation. // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...)) // -> (BUILD_VECTOR A, B, ..., C, D, ...) - if (N->getNumOperands() == 2 && - N->getOperand(0).getOpcode() == ISD::BUILD_VECTOR && - N->getOperand(1).getOpcode() == ISD::BUILD_VECTOR) { - EVT VT = N->getValueType(0); - SDValue N0 = N->getOperand(0); - SDValue N1 = N->getOperand(1); + auto IsBuildVectorOrUndef = [](const SDValue &Op) { + return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode(); + }; + bool AllBuildVectorsOrUndefs = + std::all_of(N->op_begin(), N->op_end(), IsBuildVectorOrUndef); + if (AllBuildVectorsOrUndefs) { SmallVector<SDValue, 8> Opnds; - unsigned BuildVecNumElts = N0.getNumOperands(); - - EVT SclTy0 = N0.getOperand(0)->getValueType(0); - EVT SclTy1 = N1.getOperand(0)->getValueType(0); - if (SclTy0.isFloatingPoint()) { - for (unsigned i = 0; i != BuildVecNumElts; ++i) - Opnds.push_back(N0.getOperand(i)); - for (unsigned i = 0; i != BuildVecNumElts; ++i) - Opnds.push_back(N1.getOperand(i)); - } else { + EVT SVT = VT.getScalarType(); + + EVT MinVT = SVT; + if (!SVT.isFloatingPoint()) { // If BUILD_VECTOR are from built from integer, they may have different - // operand types. Get the smaller type and truncate all operands to it. - EVT MinTy = SclTy0.bitsLE(SclTy1) ? SclTy0 : SclTy1; - for (unsigned i = 0; i != BuildVecNumElts; ++i) - Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy, - N0.getOperand(i))); - for (unsigned i = 0; i != BuildVecNumElts; ++i) - Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy, - N1.getOperand(i))); + // operand types. Get the smallest type and truncate all operands to it. + bool FoundMinVT = false; + for (const SDValue &Op : N->ops()) + if (ISD::BUILD_VECTOR == Op.getOpcode()) { + EVT OpSVT = Op.getOperand(0)->getValueType(0); + MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT; + FoundMinVT = true; + } + assert(FoundMinVT && "Concat vector type mismatch"); } + for (const SDValue &Op : N->ops()) { + EVT OpVT = Op.getValueType(); + unsigned NumElts = OpVT.getVectorNumElements(); + + if (ISD::UNDEF == Op.getOpcode()) + Opnds.append(NumElts, DAG.getUNDEF(MinVT)); + + if (ISD::BUILD_VECTOR == Op.getOpcode()) { + if (SVT.isFloatingPoint()) { + assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch"); + Opnds.append(Op->op_begin(), Op->op_begin() + NumElts); + } else { + for (unsigned i = 0; i != NumElts; ++i) + Opnds.push_back( + DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i))); + } + } + } + + assert(VT.getVectorNumElements() == Opnds.size() && + "Concat vector type mismatch"); return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds); } + // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR. + if (SDValue V = combineConcatVectorOfScalars(N, DAG)) + return V; + // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR // nodes often generate nop CONCAT_VECTOR nodes. // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that @@ -11216,7 +12275,7 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { // type. if (V->getOperand(0).getValueType() != NVT) return SDValue(); - unsigned Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); + unsigned Idx = N->getConstantOperandVal(1); unsigned NumElems = NVT.getVectorNumElements(); assert((Idx % NumElems) == 0 && "IDX in concat is not a multiple of the result vector length."); @@ -11347,7 +12406,8 @@ static SDValue simplifyShuffleOperands(ShuffleVectorSDNode *SVN, SDValue N0, return DAG.getVectorShuffle(VT, SDLoc(SVN), S0, S1, SVN->getMask()); } -// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat. +// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat, +// or turn a shuffle of a single concat into simpler shuffle then concat. static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) { EVT VT = N->getValueType(0); unsigned NumElts = VT.getVectorNumElements(); @@ -11361,6 +12421,18 @@ static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) { unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements(); unsigned NumConcats = NumElts / NumElemsPerConcat; + // Special case: shuffle(concat(A,B)) can be more efficiently represented + // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high + // half vector elements. + if (NumElemsPerConcat * 2 == NumElts && N1.getOpcode() == ISD::UNDEF && + std::all_of(SVN->getMask().begin() + NumElemsPerConcat, + SVN->getMask().end(), [](int i) { return i == -1; })) { + N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1), + ArrayRef<int>(SVN->getMask().begin(), NumElemsPerConcat)); + N1 = DAG.getUNDEF(ConcatVT); + return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1); + } + // Look at every vector that's inserted. We're looking for exact // subvector-sized copies from a concatenated vector for (unsigned I = 0; I != NumConcats; ++I) { @@ -11459,7 +12531,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { } // If it is a splat, check if the argument vector is another splat or a - // build_vector with all scalar elements the same. + // build_vector. if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) { SDNode *V = N0.getNode(); @@ -11496,6 +12568,18 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { // Splat of <x, x, x, x>, return <x, x, x, x> if (AllSame) return N0; + + // Canonicalize any other splat as a build_vector. + const SDValue &Splatted = V->getOperand(SVN->getSplatIndex()); + SmallVector<SDValue, 8> Ops(NumElts, Splatted); + SDValue NewBV = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), + V->getValueType(0), Ops); + + // We may have jumped through bitcasts, so the type of the + // BUILD_VECTOR may not match the type of the shuffle. + if (V->getValueType(0) != VT) + NewBV = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, NewBV); + return NewBV; } } @@ -11516,6 +12600,118 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { return V; } + // Attempt to combine a shuffle of 2 inputs of 'scalar sources' - + // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR. + if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) { + SmallVector<SDValue, 8> Ops; + for (int M : SVN->getMask()) { + SDValue Op = DAG.getUNDEF(VT.getScalarType()); + if (M >= 0) { + int Idx = M % NumElts; + SDValue &S = (M < (int)NumElts ? N0 : N1); + if (S.getOpcode() == ISD::BUILD_VECTOR && S.hasOneUse()) { + Op = S.getOperand(Idx); + } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR && S.hasOneUse()) { + if (Idx == 0) + Op = S.getOperand(0); + } else { + // Operand can't be combined - bail out. + break; + } + } + Ops.push_back(Op); + } + if (Ops.size() == VT.getVectorNumElements()) { + // BUILD_VECTOR requires all inputs to be of the same type, find the + // maximum type and extend them all. + EVT SVT = VT.getScalarType(); + if (SVT.isInteger()) + for (SDValue &Op : Ops) + SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT); + if (SVT != VT.getScalarType()) + for (SDValue &Op : Ops) + Op = TLI.isZExtFree(Op.getValueType(), SVT) + ? DAG.getZExtOrTrunc(Op, SDLoc(N), SVT) + : DAG.getSExtOrTrunc(Op, SDLoc(N), SVT); + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Ops); + } + } + + // If this shuffle only has a single input that is a bitcasted shuffle, + // attempt to merge the 2 shuffles and suitably bitcast the inputs/output + // back to their original types. + if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() && + N1.getOpcode() == ISD::UNDEF && Level < AfterLegalizeVectorOps && + TLI.isTypeLegal(VT)) { + + // Peek through the bitcast only if there is one user. + SDValue BC0 = N0; + while (BC0.getOpcode() == ISD::BITCAST) { + if (!BC0.hasOneUse()) + break; + BC0 = BC0.getOperand(0); + } + + auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) { + if (Scale == 1) + return SmallVector<int, 8>(Mask.begin(), Mask.end()); + + SmallVector<int, 8> NewMask; + for (int M : Mask) + for (int s = 0; s != Scale; ++s) + NewMask.push_back(M < 0 ? -1 : Scale * M + s); + return NewMask; + }; + + if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) { + EVT SVT = VT.getScalarType(); + EVT InnerVT = BC0->getValueType(0); + EVT InnerSVT = InnerVT.getScalarType(); + + // Determine which shuffle works with the smaller scalar type. + EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT; + EVT ScaleSVT = ScaleVT.getScalarType(); + + if (TLI.isTypeLegal(ScaleVT) && + 0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) && + 0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) { + + int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits(); + int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits(); + + // Scale the shuffle masks to the smaller scalar type. + ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0); + SmallVector<int, 8> InnerMask = + ScaleShuffleMask(InnerSVN->getMask(), InnerScale); + SmallVector<int, 8> OuterMask = + ScaleShuffleMask(SVN->getMask(), OuterScale); + + // Merge the shuffle masks. + SmallVector<int, 8> NewMask; + for (int M : OuterMask) + NewMask.push_back(M < 0 ? -1 : InnerMask[M]); + + // Test for shuffle mask legality over both commutations. + SDValue SV0 = BC0->getOperand(0); + SDValue SV1 = BC0->getOperand(1); + bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT); + if (!LegalMask) { + std::swap(SV0, SV1); + ShuffleVectorSDNode::commuteMask(NewMask); + LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT); + } + + if (LegalMask) { + SV0 = DAG.getNode(ISD::BITCAST, SDLoc(N), ScaleVT, SV0); + SV1 = DAG.getNode(ISD::BITCAST, SDLoc(N), ScaleVT, SV1); + return DAG.getNode( + ISD::BITCAST, SDLoc(N), VT, + DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask)); + } + } + } + } + // Canonicalize shuffles according to rules: // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A) // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B) @@ -11543,8 +12739,9 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2) // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2) // Don't try to fold shuffles with illegal type. - if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG && - TLI.isTypeLegal(VT)) { + // Only fold if this shuffle is the only user of the other shuffle. + if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) && + Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) { ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0); // The incoming shuffle must be of the same type as the result of the @@ -11624,20 +12821,11 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { // Avoid introducing shuffles with illegal mask. if (!TLI.isShuffleMaskLegal(Mask, VT)) { - // Compute the commuted shuffle mask and test again. - for (unsigned i = 0; i != NumElts; ++i) { - int idx = Mask[i]; - if (idx < 0) - continue; - else if (idx < (int)NumElts) - Mask[i] = idx + NumElts; - else - Mask[i] = idx - NumElts; - } + ShuffleVectorSDNode::commuteMask(Mask); if (!TLI.isShuffleMaskLegal(Mask, VT)) return SDValue(); - + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2) // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2) // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2) @@ -11653,6 +12841,34 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) { + SDValue InVal = N->getOperand(0); + EVT VT = N->getValueType(0); + + // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern + // with a VECTOR_SHUFFLE. + if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { + SDValue InVec = InVal->getOperand(0); + SDValue EltNo = InVal->getOperand(1); + + // FIXME: We could support implicit truncation if the shuffle can be + // scaled to a smaller vector scalar type. + ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo); + if (C0 && VT == InVec.getValueType() && + VT.getScalarType() == InVal.getValueType()) { + SmallVector<int, 8> NewMask(VT.getVectorNumElements(), -1); + int Elt = C0->getZExtValue(); + NewMask[0] = Elt; + + if (TLI.isShuffleMaskLegal(NewMask, VT)) + return DAG.getVectorShuffle(VT, SDLoc(N), InVec, DAG.getUNDEF(VT), + NewMask); + } + } + + return SDValue(); +} + SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N2 = N->getOperand(2); @@ -11680,48 +12896,64 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) { + SDValue N0 = N->getOperand(0); + + // fold (fp_to_fp16 (fp16_to_fp op)) -> op + if (N0->getOpcode() == ISD::FP16_TO_FP) + return N0->getOperand(0); + + return SDValue(); +} + /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle /// with the destination vector and a zero vector. /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==> /// vector_shuffle V, Zero, <0, 4, 2, 4> SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { EVT VT = N->getValueType(0); - SDLoc dl(N); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); - if (N->getOpcode() == ISD::AND) { - if (RHS.getOpcode() == ISD::BITCAST) - RHS = RHS.getOperand(0); - if (RHS.getOpcode() == ISD::BUILD_VECTOR) { - SmallVector<int, 8> Indices; - unsigned NumElts = RHS.getNumOperands(); - for (unsigned i = 0; i != NumElts; ++i) { - SDValue Elt = RHS.getOperand(i); - if (!isa<ConstantSDNode>(Elt)) - return SDValue(); + SDLoc dl(N); - if (cast<ConstantSDNode>(Elt)->isAllOnesValue()) - Indices.push_back(i); - else if (cast<ConstantSDNode>(Elt)->isNullValue()) - Indices.push_back(NumElts+i); - else - return SDValue(); - } + // Make sure we're not running after operation legalization where it + // may have custom lowered the vector shuffles. + if (LegalOperations) + return SDValue(); - // Let's see if the target supports this vector_shuffle. - EVT RVT = RHS.getValueType(); - if (!TLI.isVectorClearMaskLegal(Indices, RVT)) - return SDValue(); + if (N->getOpcode() != ISD::AND) + return SDValue(); + + if (RHS.getOpcode() == ISD::BITCAST) + RHS = RHS.getOperand(0); - // Return the new VECTOR_SHUFFLE node. - EVT EltVT = RVT.getVectorElementType(); - SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(), - DAG.getConstant(0, EltVT)); - SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), RVT, ZeroOps); - LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS); - SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]); - return DAG.getNode(ISD::BITCAST, dl, VT, Shuf); + if (RHS.getOpcode() == ISD::BUILD_VECTOR) { + SmallVector<int, 8> Indices; + unsigned NumElts = RHS.getNumOperands(); + + for (unsigned i = 0; i != NumElts; ++i) { + SDValue Elt = RHS.getOperand(i); + if (isAllOnesConstant(Elt)) + Indices.push_back(i); + else if (isNullConstant(Elt)) + Indices.push_back(NumElts+i); + else + return SDValue(); } + + // Let's see if the target supports this vector_shuffle. + EVT RVT = RHS.getValueType(); + if (!TLI.isVectorClearMaskLegal(Indices, RVT)) + return SDValue(); + + // Return the new VECTOR_SHUFFLE node. + EVT EltVT = RVT.getVectorElementType(); + SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(), + DAG.getConstant(0, dl, EltVT)); + SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, dl, RVT, ZeroOps); + LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS); + SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]); + return DAG.getNode(ISD::BITCAST, dl, VT, Shuf); } return SDValue(); @@ -11734,8 +12966,9 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); - SDValue Shuffle = XformToShuffleWithZero(N); - if (Shuffle.getNode()) return Shuffle; + + if (SDValue Shuffle = XformToShuffleWithZero(N)) + return Shuffle; // If the LHS and RHS are BUILD_VECTOR nodes, see if we can constant fold // this operation. @@ -11754,9 +12987,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { // Can't fold divide by zero. if (N->getOpcode() == ISD::SDIV || N->getOpcode() == ISD::UDIV || N->getOpcode() == ISD::FDIV) { - if ((RHSOp.getOpcode() == ISD::Constant && - cast<ConstantSDNode>(RHSOp.getNode())->isNullValue()) || - (RHSOp.getOpcode() == ISD::ConstantFP && + if (isNullConstant(RHSOp) || (RHSOp.getOpcode() == ISD::ConstantFP && cast<ConstantFPSDNode>(RHSOp.getNode())->getValueAPF().isZero())) break; } @@ -11813,38 +13044,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { return SDValue(); } -/// Visit a binary vector operation, like FABS/FNEG. -SDValue DAGCombiner::SimplifyVUnaryOp(SDNode *N) { - assert(N->getValueType(0).isVector() && - "SimplifyVUnaryOp only works on vectors!"); - - SDValue N0 = N->getOperand(0); - - if (N0.getOpcode() != ISD::BUILD_VECTOR) - return SDValue(); - - // Operand is a BUILD_VECTOR node, see if we can constant fold it. - SmallVector<SDValue, 8> Ops; - for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) { - SDValue Op = N0.getOperand(i); - if (Op.getOpcode() != ISD::UNDEF && - Op.getOpcode() != ISD::ConstantFP) - break; - EVT EltVT = Op.getValueType(); - SDValue FoldOp = DAG.getNode(N->getOpcode(), SDLoc(N0), EltVT, Op); - if (FoldOp.getOpcode() != ISD::UNDEF && - FoldOp.getOpcode() != ISD::ConstantFP) - break; - Ops.push_back(FoldOp); - AddToWorklist(FoldOp.getNode()); - } - - if (Ops.size() != N0.getNumOperands()) - return SDValue(); - - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N0.getValueType(), Ops); -} - SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0, SDValue N1, SDValue N2){ assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!"); @@ -11881,6 +13080,38 @@ SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0, bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, SDValue RHS) { + // fold (select (setcc x, -0.0, *lt), NaN, (fsqrt x)) + // The select + setcc is redundant, because fsqrt returns NaN for X < -0. + if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) { + if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) { + // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?)) + SDValue Sqrt = RHS; + ISD::CondCode CC; + SDValue CmpLHS; + const ConstantFPSDNode *NegZero = nullptr; + + if (TheSelect->getOpcode() == ISD::SELECT_CC) { + CC = dyn_cast<CondCodeSDNode>(TheSelect->getOperand(4))->get(); + CmpLHS = TheSelect->getOperand(0); + NegZero = isConstOrConstSplatFP(TheSelect->getOperand(1)); + } else { + // SELECT or VSELECT + SDValue Cmp = TheSelect->getOperand(0); + if (Cmp.getOpcode() == ISD::SETCC) { + CC = dyn_cast<CondCodeSDNode>(Cmp.getOperand(2))->get(); + CmpLHS = Cmp.getOperand(0); + NegZero = isConstOrConstSplatFP(Cmp.getOperand(1)); + } + } + if (NegZero && NegZero->isNegative() && NegZero->isZero() && + Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT || + CC == ISD::SETULT || CC == ISD::SETLT)) { + // We have: (select (setcc x, -0.0, *lt), NaN, (fsqrt x)) + CombineTo(TheSelect, Sqrt); + return true; + } + } + } // Cannot simplify select with vector condition if (TheSelect->getOperand(0).getValueType().isVector()) return false; @@ -11902,6 +13133,9 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, if (LHS.getOperand(0) != RHS.getOperand(0) || // Do not let this transformation reduce the number of volatile loads. LLD->isVolatile() || RLD->isVolatile() || + // FIXME: If either is a pre/post inc/dec load, + // we'd need to split out the address adjustment. + LLD->isIndexed() || RLD->isIndexed() || // If this is an EXTLOAD, the VT's must match. LLD->getMemoryVT() != RLD->getMemoryVT() || // If this is an EXTLOAD, the kind of extension must match. @@ -12003,20 +13237,17 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, EVT VT = N2.getValueType(); ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode()); - ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3.getNode()); // Determine if the condition we're dealing with is constant SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1, CC, DL, false); if (SCC.getNode()) AddToWorklist(SCC.getNode()); - ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode()); - // fold select_cc true, x, y -> x - if (SCCC && !SCCC->isNullValue()) - return N2; - // fold select_cc false, x, y -> y - if (SCCC && SCCC->isNullValue()) - return N3; + if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) { + // fold select_cc true, x, y -> x + // fold select_cc false, x, y -> y + return !SCCC->isNullValue() ? N2 : N3; + } // Check to see if we can simplify the select into an fabs node if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) { @@ -12069,9 +13300,9 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, // Get the offsets to the 0 and 1 element of the array so that we can // select between them. - SDValue Zero = DAG.getIntPtrConstant(0); + SDValue Zero = DAG.getIntPtrConstant(0, DL); unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType()); - SDValue One = DAG.getIntPtrConstant(EltSize); + SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV)); SDValue Cond = DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), @@ -12086,24 +13317,23 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(), false, false, false, Alignment); - } } // Check to see if we can perform the "gzip trick", transforming // (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A) - if (N1C && N3C && N3C->isNullValue() && CC == ISD::SETLT && - (N1C->isNullValue() || // (a < 0) ? b : 0 - (N1C->getAPIntValue() == 1 && N0 == N2))) { // (a < 1) ? a : 0 + if (isNullConstant(N3) && CC == ISD::SETLT && + (isNullConstant(N1) || // (a < 0) ? b : 0 + (isOneConstant(N1) && N0 == N2))) { // (a < 1) ? a : 0 EVT XType = N0.getValueType(); EVT AType = N2.getValueType(); if (XType.bitsGE(AType)) { // and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a // single-bit constant. - if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue()-1)) == 0)) { + if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) { unsigned ShCtV = N2C->getAPIntValue().logBase2(); - ShCtV = XType.getSizeInBits()-ShCtV-1; - SDValue ShCt = DAG.getConstant(ShCtV, + ShCtV = XType.getSizeInBits() - ShCtV - 1; + SDValue ShCt = DAG.getConstant(ShCtV, SDLoc(N0), getShiftAmountTy(N0.getValueType())); SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0), XType, N0, ShCt); @@ -12119,7 +13349,8 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, SDValue Shift = DAG.getNode(ISD::SRA, SDLoc(N0), XType, N0, - DAG.getConstant(XType.getSizeInBits()-1, + DAG.getConstant(XType.getSizeInBits() - 1, + SDLoc(N0), getShiftAmountTy(N0.getValueType()))); AddToWorklist(Shift.getNode()); @@ -12139,23 +13370,21 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, // single bit-test can be materialized as an all-ones register with // shift-left and shift-right-arith. if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND && - N0->getValueType(0) == VT && - N1C && N1C->isNullValue() && - N2C && N2C->isNullValue()) { + N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) { SDValue AndLHS = N0->getOperand(0); ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1)); if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) { // Shift the tested bit over the sign bit. APInt AndMask = ConstAndRHS->getAPIntValue(); SDValue ShlAmt = - DAG.getConstant(AndMask.countLeadingZeros(), + DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS), getShiftAmountTy(AndLHS.getValueType())); SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt); // Now arithmetic right shift it all the way over, so the result is either // all-ones, or zero. SDValue ShrAmt = - DAG.getConstant(AndMask.getBitWidth()-1, + DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl), getShiftAmountTy(Shl.getValueType())); SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt); @@ -12164,13 +13393,13 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, } // fold select C, 16, 0 -> shl C, 4 - if (N2C && N3C && N3C->isNullValue() && N2C->getAPIntValue().isPowerOf2() && + if (N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2() && TLI.getBooleanContents(N0.getValueType()) == TargetLowering::ZeroOrOneBooleanContent) { // If the caller doesn't want us to simplify this into a zext of a compare, // don't do it. - if (NotExtCompare && N2C->getAPIntValue() == 1) + if (NotExtCompare && N2C->isOne()) return SDValue(); // Get a SetCC of the condition @@ -12198,13 +13427,13 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, AddToWorklist(SCC.getNode()); AddToWorklist(Temp.getNode()); - if (N2C->getAPIntValue() == 1) + if (N2C->isOne()) return Temp; // shl setcc result by log2 n2c return DAG.getNode( ISD::SHL, DL, N2.getValueType(), Temp, - DAG.getConstant(N2C->getAPIntValue().logBase2(), + DAG.getConstant(N2C->getAPIntValue().logBase2(), SDLoc(Temp), getShiftAmountTy(Temp.getValueType()))); } } @@ -12212,7 +13441,7 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, // Check to see if this is the equivalent of setcc // FIXME: Turn all of these into setcc if setcc if setcc is legal // otherwise, go ahead with the folds. - if (0 && N3C && N3C->isNullValue() && N2C && (N2C->getAPIntValue() == 1ULL)) { + if (0 && isNullConstant(N3) && isOneConstant(N2)) { EVT XType = N0.getValueType(); if (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, getSetCCResultType(XType))) { @@ -12223,30 +13452,34 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, } // fold (seteq X, 0) -> (srl (ctlz X, log2(size(X)))) - if (N1C && N1C->isNullValue() && CC == ISD::SETEQ && + if (isNullConstant(N1) && CC == ISD::SETEQ && (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, XType))) { SDValue Ctlz = DAG.getNode(ISD::CTLZ, SDLoc(N0), XType, N0); return DAG.getNode(ISD::SRL, DL, XType, Ctlz, DAG.getConstant(Log2_32(XType.getSizeInBits()), + SDLoc(Ctlz), getShiftAmountTy(Ctlz.getValueType()))); } // fold (setgt X, 0) -> (srl (and (-X, ~X), size(X)-1)) - if (N1C && N1C->isNullValue() && CC == ISD::SETGT) { - SDValue NegN0 = DAG.getNode(ISD::SUB, SDLoc(N0), - XType, DAG.getConstant(0, XType), N0); - SDValue NotN0 = DAG.getNOT(SDLoc(N0), N0, XType); + if (isNullConstant(N1) && CC == ISD::SETGT) { + SDLoc DL(N0); + SDValue NegN0 = DAG.getNode(ISD::SUB, DL, + XType, DAG.getConstant(0, DL, XType), N0); + SDValue NotN0 = DAG.getNOT(DL, N0, XType); return DAG.getNode(ISD::SRL, DL, XType, DAG.getNode(ISD::AND, DL, XType, NegN0, NotN0), - DAG.getConstant(XType.getSizeInBits()-1, + DAG.getConstant(XType.getSizeInBits() - 1, DL, getShiftAmountTy(XType))); } // fold (setgt X, -1) -> (xor (srl (X, size(X)-1), 1)) - if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT) { - SDValue Sign = DAG.getNode(ISD::SRL, SDLoc(N0), XType, N0, - DAG.getConstant(XType.getSizeInBits()-1, + if (isAllOnesConstant(N1) && CC == ISD::SETGT) { + SDLoc DL(N0); + SDValue Sign = DAG.getNode(ISD::SRL, DL, XType, N0, + DAG.getConstant(XType.getSizeInBits() - 1, DL, getShiftAmountTy(N0.getValueType()))); - return DAG.getNode(ISD::XOR, DL, XType, Sign, DAG.getConstant(1, XType)); + return DAG.getNode(ISD::XOR, DL, XType, Sign, DAG.getConstant(1, DL, + XType)); } } @@ -12269,11 +13502,12 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, EVT XType = N0.getValueType(); if (SubC && SubC->isNullValue() && XType.isInteger()) { - SDValue Shift = DAG.getNode(ISD::SRA, SDLoc(N0), XType, + SDLoc DL(N0); + SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, - DAG.getConstant(XType.getSizeInBits()-1, + DAG.getConstant(XType.getSizeInBits() - 1, DL, getShiftAmountTy(N0.getValueType()))); - SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), + SDValue Add = DAG.getNode(ISD::ADD, DL, XType, N0, Shift); AddToWorklist(Shift.getNode()); AddToWorklist(Add.getNode()); @@ -12303,7 +13537,7 @@ SDValue DAGCombiner::BuildSDIV(SDNode *N) { return SDValue(); // Avoid division by zero. - if (!C->getAPIntValue()) + if (C->isNullValue()) return SDValue(); std::vector<SDNode*> Built; @@ -12323,7 +13557,7 @@ SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) { return SDValue(); // Avoid division by zero. - if (!C->getAPIntValue()) + if (C->isNullValue()) return SDValue(); std::vector<SDNode *> Built; @@ -12344,7 +13578,7 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) { return SDValue(); // Avoid division by zero. - if (!C->getAPIntValue()) + if (C->isNullValue()) return SDValue(); std::vector<SDNode*> Built; @@ -12374,7 +13608,7 @@ SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op) { // does not require additional intermediate precision] EVT VT = Op.getValueType(); SDLoc DL(Op); - SDValue FPOne = DAG.getConstantFP(1.0, VT); + SDValue FPOne = DAG.getConstantFP(1.0, DL, VT); AddToWorklist(Est.getNode()); @@ -12409,7 +13643,7 @@ SDValue DAGCombiner::BuildRsqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations) { EVT VT = Arg.getValueType(); SDLoc DL(Arg); - SDValue ThreeHalves = DAG.getConstantFP(1.5, VT); + SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT); // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that // this entire sequence requires only one FP constant. @@ -12445,8 +13679,8 @@ SDValue DAGCombiner::BuildRsqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations) { EVT VT = Arg.getValueType(); SDLoc DL(Arg); - SDValue MinusThree = DAG.getConstantFP(-3.0, VT); - SDValue MinusHalf = DAG.getConstantFP(-0.5, VT); + SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT); + SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT); // Newton iterations: Est = -0.5 * Est * (-3.0 + Arg * Est * Est) for (unsigned i = 0; i < Iterations; ++i) { diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index c46539b..0351c33 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -44,6 +44,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" @@ -61,8 +62,8 @@ #include "llvm/IR/Operator.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetSubtargetInfo.h" @@ -424,7 +425,7 @@ bool FastISel::selectBinaryOp(const User *I, unsigned ISDOpcode) { // Check if the second operand is a constant and handle it appropriately. if (const auto *CI = dyn_cast<ConstantInt>(I->getOperand(1))) { - uint64_t Imm = CI->getZExtValue(); + uint64_t Imm = CI->getSExtValue(); // Transform "sdiv exact X, 8" -> "sra X, 3". if (ISDOpcode == ISD::SDIV && isa<BinaryOperator>(I) && @@ -710,7 +711,7 @@ bool FastISel::selectPatchpoint(const CallInst *I) { CallingConv::ID CC = I->getCallingConv(); bool IsAnyRegCC = CC == CallingConv::AnyReg; bool HasDef = !I->getType()->isVoidTy(); - Value *Callee = I->getOperand(PatchPointOpers::TargetPos); + Value *Callee = I->getOperand(PatchPointOpers::TargetPos)->stripPointerCasts(); // Get the real number of arguments participating in the call <numArgs> assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::NArgPos)) && @@ -756,23 +757,25 @@ bool FastISel::selectPatchpoint(const CallInst *I) { cast<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos)); Ops.push_back(MachineOperand::CreateImm(NumBytes->getZExtValue())); - // Assume that the callee is a constant address or null pointer. - // FIXME: handle function symbols in the future. - uint64_t CalleeAddr; - if (const auto *C = dyn_cast<IntToPtrInst>(Callee)) - CalleeAddr = cast<ConstantInt>(C->getOperand(0))->getZExtValue(); - else if (const auto *C = dyn_cast<ConstantExpr>(Callee)) { - if (C->getOpcode() == Instruction::IntToPtr) - CalleeAddr = cast<ConstantInt>(C->getOperand(0))->getZExtValue(); - else + // Add the call target. + if (const auto *C = dyn_cast<IntToPtrInst>(Callee)) { + uint64_t CalleeConstAddr = + cast<ConstantInt>(C->getOperand(0))->getZExtValue(); + Ops.push_back(MachineOperand::CreateImm(CalleeConstAddr)); + } else if (const auto *C = dyn_cast<ConstantExpr>(Callee)) { + if (C->getOpcode() == Instruction::IntToPtr) { + uint64_t CalleeConstAddr = + cast<ConstantInt>(C->getOperand(0))->getZExtValue(); + Ops.push_back(MachineOperand::CreateImm(CalleeConstAddr)); + } else llvm_unreachable("Unsupported ConstantExpr."); + } else if (const auto *GV = dyn_cast<GlobalValue>(Callee)) { + Ops.push_back(MachineOperand::CreateGA(GV, 0)); } else if (isa<ConstantPointerNull>(Callee)) - CalleeAddr = 0; + Ops.push_back(MachineOperand::CreateImm(0)); else llvm_unreachable("Unsupported callee address."); - Ops.push_back(MachineOperand::CreateImm(CalleeAddr)); - // Adjust <numArgs> to account for any arguments that have been passed on // the stack instead. unsigned NumCallRegArgs = IsAnyRegCC ? NumArgs : CLI.OutRegs.size(); @@ -801,7 +804,8 @@ bool FastISel::selectPatchpoint(const CallInst *I) { return false; // Push the register mask info. - Ops.push_back(MachineOperand::CreateRegMask(TRI.getCallPreservedMask(CC))); + Ops.push_back(MachineOperand::CreateRegMask( + TRI.getCallPreservedMask(*FuncInfo.MF, CC))); // Add scratch registers as implicit def and early clobber. const MCPhysReg *ScratchRegs = TLI.getScratchRegisters(CC); @@ -1077,12 +1081,17 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { // The donothing intrinsic does, well, nothing. case Intrinsic::donothing: return true; + case Intrinsic::eh_actions: { + unsigned ResultReg = getRegForValue(UndefValue::get(II->getType())); + if (!ResultReg) + return false; + updateValueMap(II, ResultReg); + return true; + } case Intrinsic::dbg_declare: { const DbgDeclareInst *DI = cast<DbgDeclareInst>(II); - DIVariable DIVar(DI->getVariable()); - assert((!DIVar || DIVar.isVariable()) && - "Variable in DbgDeclareInst should be either null or a DIVariable."); - if (!DIVar || !FuncInfo.MF->getMMI().hasDebugInfo()) { + assert(DI->getVariable() && "Missing variable"); + if (!FuncInfo.MF->getMMI().hasDebugInfo()) { DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); return true; } @@ -1122,6 +1131,8 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { false); if (Op) { + assert(DI->getVariable()->isValidLocationForIntrinsic(DbgLoc) && + "Expected inlined-at fields to agree"); if (Op->isReg()) { Op->setIsDebug(true); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, @@ -1146,6 +1157,8 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { const DbgValueInst *DI = cast<DbgValueInst>(II); const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE); const Value *V = DI->getValue(); + assert(DI->getVariable()->isValidLocationForIntrinsic(DbgLoc) && + "Expected inlined-at fields to agree"); if (!V) { // Currently the optimizer can produce this; insert an undef to // help debugging. Probably the optimizer should not do this. @@ -1580,7 +1593,7 @@ FastISel::FastISel(FunctionLoweringInfo &FuncInfo, bool SkipTargetIndependentISel) : FuncInfo(FuncInfo), MF(FuncInfo.MF), MRI(FuncInfo.MF->getRegInfo()), MFI(*FuncInfo.MF->getFrameInfo()), MCP(*FuncInfo.MF->getConstantPool()), - TM(FuncInfo.MF->getTarget()), DL(*MF->getSubtarget().getDataLayout()), + TM(FuncInfo.MF->getTarget()), DL(*TM.getDataLayout()), TII(*MF->getSubtarget().getInstrInfo()), TLI(*MF->getSubtarget().getTargetLowering()), TRI(*MF->getSubtarget().getRegisterInfo()), LibInfo(LibInfo), @@ -1662,6 +1675,7 @@ unsigned FastISel::fastEmit_ri_(MVT VT, unsigned Opcode, unsigned Op0, if (ResultReg) return ResultReg; unsigned MaterialReg = fastEmit_i(ImmType, ImmType, ISD::Constant, Imm); + bool IsImmKill = true; if (!MaterialReg) { // This is a bit ugly/slow, but failing here means falling out of // fast-isel, which would be very slow. @@ -1670,9 +1684,15 @@ unsigned FastISel::fastEmit_ri_(MVT VT, unsigned Opcode, unsigned Op0, MaterialReg = getRegForValue(ConstantInt::get(ITy, Imm)); if (!MaterialReg) return 0; + // FIXME: If the materialized register here has no uses yet then this + // will be the first use and we should be able to mark it as killed. + // However, the local value area for materialising constant expressions + // grows down, not up, which means that any constant expressions we generate + // later which also use 'Imm' could be after this instruction and therefore + // after this kill. + IsImmKill = false; } - return fastEmit_rr(VT, VT, Opcode, Op0, Op0IsKill, MaterialReg, - /*IsKill=*/true); + return fastEmit_rr(VT, VT, Opcode, Op0, Op0IsKill, MaterialReg, IsImmKill); } unsigned FastISel::createResultReg(const TargetRegisterClass *RC) { diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 19aca6e..7b5b8c4 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -20,6 +20,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" @@ -31,6 +32,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" @@ -78,12 +80,40 @@ static ISD::NodeType getPreferredExtendForValue(const Value *V) { return ExtendKind; } +namespace { +struct WinEHNumbering { + WinEHNumbering(WinEHFuncInfo &FuncInfo) : FuncInfo(FuncInfo), + CurrentBaseState(-1), NextState(0) {} + + WinEHFuncInfo &FuncInfo; + int CurrentBaseState; + int NextState; + + SmallVector<std::unique_ptr<ActionHandler>, 4> HandlerStack; + SmallPtrSet<const Function *, 4> VisitedHandlers; + + int currentEHNumber() const { + return HandlerStack.empty() ? CurrentBaseState : HandlerStack.back()->getEHState(); + } + + void createUnwindMapEntry(int ToState, ActionHandler *AH); + void createTryBlockMapEntry(int TryLow, int TryHigh, + ArrayRef<CatchHandler *> Handlers); + void processCallSite(MutableArrayRef<std::unique_ptr<ActionHandler>> Actions, + ImmutableCallSite CS); + void popUnmatchedActions(int FirstMismatch); + void calculateStateNumbers(const Function &F); + void findActionRootLPads(const Function &F); +}; +} + void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, SelectionDAG *DAG) { Fn = &fn; MF = &mf; TLI = MF->getSubtarget().getTargetLowering(); RegInfo = &MF->getRegInfo(); + MachineModuleInfo &MMI = MF->getMMI(); // Check whether the function can return without sret-demotion. SmallVector<ISD::OutputArg, 4> Outs; @@ -133,16 +163,17 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, ImmutableCallSite CS(I); if (isa<InlineAsm>(CS.getCalledValue())) { unsigned SP = TLI->getStackPointerRegisterToSaveRestore(); + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); std::vector<TargetLowering::AsmOperandInfo> Ops = - TLI->ParseConstraints(CS); + TLI->ParseConstraints(TRI, CS); for (size_t I = 0, E = Ops.size(); I != E; ++I) { TargetLowering::AsmOperandInfo &Op = Ops[I]; if (Op.Type == InlineAsm::isClobber) { // Clobbers don't have SDValue operands, hence SDValue(). TLI->ComputeConstraintToUse(Op, SDValue(), DAG); std::pair<unsigned, const TargetRegisterClass *> PhysReg = - TLI->getRegForInlineAsmConstraint(Op.ConstraintCode, - Op.ConstraintVT); + TLI->getRegForInlineAsmConstraint(TRI, Op.ConstraintCode, + Op.ConstraintVT); if (PhysReg.first == SP) MF->getFrameInfo()->setHasInlineAsmWithSPAdjust(true); } @@ -176,13 +207,9 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, // during the initial isel pass through the IR so that it is done // in a predictable order. if (const DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(I)) { - MachineModuleInfo &MMI = MF->getMMI(); - DIVariable DIVar(DI->getVariable()); - assert((!DIVar || DIVar.isVariable()) && - "Variable in DbgDeclareInst should be either null or a DIVariable."); - if (MMI.hasDebugInfo() && - DIVar && - !DI->getDebugLoc().isUnknown()) { + assert(DI->getVariable() && "Missing variable"); + assert(DI->getDebugLoc() && "Missing location"); + if (MMI.hasDebugInfo()) { // Don't handle byval struct arguments or VLAs, for example. // Non-byval arguments are handled here (they refer to the stack // temporary alloca at this point). @@ -249,9 +276,414 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, } // Mark landing pad blocks. - for (BB = Fn->begin(); BB != EB; ++BB) - if (const InvokeInst *Invoke = dyn_cast<InvokeInst>(BB->getTerminator())) + SmallVector<const LandingPadInst *, 4> LPads; + for (BB = Fn->begin(); BB != EB; ++BB) { + if (const auto *Invoke = dyn_cast<InvokeInst>(BB->getTerminator())) MBBMap[Invoke->getSuccessor(1)]->setIsLandingPad(); + if (BB->isLandingPad()) + LPads.push_back(BB->getLandingPadInst()); + } + + // If this is an MSVC EH personality, we need to do a bit more work. + EHPersonality Personality = EHPersonality::Unknown; + if (!LPads.empty()) + Personality = classifyEHPersonality(LPads.back()->getPersonalityFn()); + if (!isMSVCEHPersonality(Personality)) + return; + + WinEHFuncInfo *EHInfo = nullptr; + if (Personality == EHPersonality::MSVC_Win64SEH) { + addSEHHandlersForLPads(LPads); + } else if (Personality == EHPersonality::MSVC_CXX) { + const Function *WinEHParentFn = MMI.getWinEHParent(&fn); + EHInfo = &MMI.getWinEHFuncInfo(WinEHParentFn); + if (EHInfo->LandingPadStateMap.empty()) { + WinEHNumbering Num(*EHInfo); + Num.findActionRootLPads(*WinEHParentFn); + // The VisitedHandlers list is used by both findActionRootLPads and + // calculateStateNumbers, but both functions need to visit all handlers. + Num.VisitedHandlers.clear(); + Num.calculateStateNumbers(*WinEHParentFn); + // Pop everything on the handler stack. + // It may be necessary to call this more than once because a handler can + // be pushed on the stack as a result of clearing the stack. + while (!Num.HandlerStack.empty()) + Num.processCallSite(None, ImmutableCallSite()); + } + + // Copy the state numbers to LandingPadInfo for the current function, which + // could be a handler or the parent. + for (const LandingPadInst *LP : LPads) { + MachineBasicBlock *LPadMBB = MBBMap[LP->getParent()]; + MMI.addWinEHState(LPadMBB, EHInfo->LandingPadStateMap[LP]); + } + } +} + +void FunctionLoweringInfo::addSEHHandlersForLPads( + ArrayRef<const LandingPadInst *> LPads) { + MachineModuleInfo &MMI = MF->getMMI(); + + // Iterate over all landing pads with llvm.eh.actions calls. + for (const LandingPadInst *LP : LPads) { + const IntrinsicInst *ActionsCall = + dyn_cast<IntrinsicInst>(LP->getNextNode()); + if (!ActionsCall || + ActionsCall->getIntrinsicID() != Intrinsic::eh_actions) + continue; + + // Parse the llvm.eh.actions call we found. + MachineBasicBlock *LPadMBB = MBBMap[LP->getParent()]; + SmallVector<std::unique_ptr<ActionHandler>, 4> Actions; + parseEHActions(ActionsCall, Actions); + + // Iterate EH actions from most to least precedence, which means + // iterating in reverse. + for (auto I = Actions.rbegin(), E = Actions.rend(); I != E; ++I) { + ActionHandler *Action = I->get(); + if (auto *CH = dyn_cast<CatchHandler>(Action)) { + const auto *Filter = + dyn_cast<Function>(CH->getSelector()->stripPointerCasts()); + assert((Filter || CH->getSelector()->isNullValue()) && + "expected function or catch-all"); + const auto *RecoverBA = + cast<BlockAddress>(CH->getHandlerBlockOrFunc()); + MMI.addSEHCatchHandler(LPadMBB, Filter, RecoverBA); + } else { + assert(isa<CleanupHandler>(Action)); + const auto *Fini = cast<Function>(Action->getHandlerBlockOrFunc()); + MMI.addSEHCleanupHandler(LPadMBB, Fini); + } + } + } +} + +void WinEHNumbering::createUnwindMapEntry(int ToState, ActionHandler *AH) { + WinEHUnwindMapEntry UME; + UME.ToState = ToState; + if (auto *CH = dyn_cast_or_null<CleanupHandler>(AH)) + UME.Cleanup = cast<Function>(CH->getHandlerBlockOrFunc()); + else + UME.Cleanup = nullptr; + FuncInfo.UnwindMap.push_back(UME); +} + +void WinEHNumbering::createTryBlockMapEntry(int TryLow, int TryHigh, + ArrayRef<CatchHandler *> Handlers) { + // See if we already have an entry for this set of handlers. + // This is using iterators rather than a range-based for loop because + // if we find the entry we're looking for we'll need the iterator to erase it. + int NumHandlers = Handlers.size(); + auto I = FuncInfo.TryBlockMap.begin(); + auto E = FuncInfo.TryBlockMap.end(); + for ( ; I != E; ++I) { + auto &Entry = *I; + if (Entry.HandlerArray.size() != (size_t)NumHandlers) + continue; + int N; + for (N = 0; N < NumHandlers; ++N) { + if (Entry.HandlerArray[N].Handler != Handlers[N]->getHandlerBlockOrFunc()) + break; // breaks out of inner loop + } + // If all the handlers match, this is what we were looking for. + if (N == NumHandlers) { + break; + } + } + + // If we found an existing entry for this set of handlers, extend the range + // but move the entry to the end of the map vector. The order of entries + // in the map is critical to the way that the runtime finds handlers. + // FIXME: Depending on what has happened with block ordering, this may + // incorrectly combine entries that should remain separate. + if (I != E) { + // Copy the existing entry. + WinEHTryBlockMapEntry Entry = *I; + Entry.TryLow = std::min(TryLow, Entry.TryLow); + Entry.TryHigh = std::max(TryHigh, Entry.TryHigh); + assert(Entry.TryLow <= Entry.TryHigh); + // Erase the old entry and add this one to the back. + FuncInfo.TryBlockMap.erase(I); + FuncInfo.TryBlockMap.push_back(Entry); + return; + } + + // If we didn't find an entry, create a new one. + WinEHTryBlockMapEntry TBME; + TBME.TryLow = TryLow; + TBME.TryHigh = TryHigh; + assert(TBME.TryLow <= TBME.TryHigh); + for (CatchHandler *CH : Handlers) { + WinEHHandlerType HT; + if (CH->getSelector()->isNullValue()) { + HT.Adjectives = 0x40; + HT.TypeDescriptor = nullptr; + } else { + auto *GV = cast<GlobalVariable>(CH->getSelector()->stripPointerCasts()); + // Selectors are always pointers to GlobalVariables with 'struct' type. + // The struct has two fields, adjectives and a type descriptor. + auto *CS = cast<ConstantStruct>(GV->getInitializer()); + HT.Adjectives = + cast<ConstantInt>(CS->getAggregateElement(0U))->getZExtValue(); + HT.TypeDescriptor = + cast<GlobalVariable>(CS->getAggregateElement(1)->stripPointerCasts()); + } + HT.Handler = cast<Function>(CH->getHandlerBlockOrFunc()); + HT.CatchObjRecoverIdx = CH->getExceptionVarIndex(); + TBME.HandlerArray.push_back(HT); + } + FuncInfo.TryBlockMap.push_back(TBME); +} + +static void print_name(const Value *V) { +#ifndef NDEBUG + if (!V) { + DEBUG(dbgs() << "null"); + return; + } + + if (const auto *F = dyn_cast<Function>(V)) + DEBUG(dbgs() << F->getName()); + else + DEBUG(V->dump()); +#endif +} + +void WinEHNumbering::processCallSite( + MutableArrayRef<std::unique_ptr<ActionHandler>> Actions, + ImmutableCallSite CS) { + DEBUG(dbgs() << "processCallSite (EH state = " << currentEHNumber() + << ") for: "); + print_name(CS ? CS.getCalledValue() : nullptr); + DEBUG(dbgs() << '\n'); + + DEBUG(dbgs() << "HandlerStack: \n"); + for (int I = 0, E = HandlerStack.size(); I < E; ++I) { + DEBUG(dbgs() << " "); + print_name(HandlerStack[I]->getHandlerBlockOrFunc()); + DEBUG(dbgs() << '\n'); + } + DEBUG(dbgs() << "Actions: \n"); + for (int I = 0, E = Actions.size(); I < E; ++I) { + DEBUG(dbgs() << " "); + print_name(Actions[I]->getHandlerBlockOrFunc()); + DEBUG(dbgs() << '\n'); + } + int FirstMismatch = 0; + for (int E = std::min(HandlerStack.size(), Actions.size()); FirstMismatch < E; + ++FirstMismatch) { + if (HandlerStack[FirstMismatch]->getHandlerBlockOrFunc() != + Actions[FirstMismatch]->getHandlerBlockOrFunc()) + break; + } + + // Remove unmatched actions from the stack and process their EH states. + popUnmatchedActions(FirstMismatch); + + DEBUG(dbgs() << "Pushing actions for CallSite: "); + print_name(CS ? CS.getCalledValue() : nullptr); + DEBUG(dbgs() << '\n'); + + bool LastActionWasCatch = false; + const LandingPadInst *LastRootLPad = nullptr; + for (size_t I = FirstMismatch; I != Actions.size(); ++I) { + // We can reuse eh states when pushing two catches for the same invoke. + bool CurrActionIsCatch = isa<CatchHandler>(Actions[I].get()); + auto *Handler = cast<Function>(Actions[I]->getHandlerBlockOrFunc()); + // Various conditions can lead to a handler being popped from the + // stack and re-pushed later. That shouldn't create a new state. + // FIXME: Can code optimization lead to re-used handlers? + if (FuncInfo.HandlerEnclosedState.count(Handler)) { + // If we already assigned the state enclosed by this handler re-use it. + Actions[I]->setEHState(FuncInfo.HandlerEnclosedState[Handler]); + continue; + } + const LandingPadInst* RootLPad = FuncInfo.RootLPad[Handler]; + if (CurrActionIsCatch && LastActionWasCatch && RootLPad == LastRootLPad) { + DEBUG(dbgs() << "setEHState for handler to " << currentEHNumber() << "\n"); + Actions[I]->setEHState(currentEHNumber()); + } else { + DEBUG(dbgs() << "createUnwindMapEntry(" << currentEHNumber() << ", "); + print_name(Actions[I]->getHandlerBlockOrFunc()); + DEBUG(dbgs() << ") with EH state " << NextState << "\n"); + createUnwindMapEntry(currentEHNumber(), Actions[I].get()); + DEBUG(dbgs() << "setEHState for handler to " << NextState << "\n"); + Actions[I]->setEHState(NextState); + NextState++; + } + HandlerStack.push_back(std::move(Actions[I])); + LastActionWasCatch = CurrActionIsCatch; + LastRootLPad = RootLPad; + } + + // This is used to defer numbering states for a handler until after the + // last time it appears in an invoke action list. + if (CS.isInvoke()) { + for (int I = 0, E = HandlerStack.size(); I < E; ++I) { + auto *Handler = cast<Function>(HandlerStack[I]->getHandlerBlockOrFunc()); + if (FuncInfo.LastInvoke[Handler] != cast<InvokeInst>(CS.getInstruction())) + continue; + FuncInfo.LastInvokeVisited[Handler] = true; + DEBUG(dbgs() << "Last invoke of "); + print_name(Handler); + DEBUG(dbgs() << " has been visited.\n"); + } + } + + DEBUG(dbgs() << "In EHState " << currentEHNumber() << " for CallSite: "); + print_name(CS ? CS.getCalledValue() : nullptr); + DEBUG(dbgs() << '\n'); +} + +void WinEHNumbering::popUnmatchedActions(int FirstMismatch) { + // Don't recurse while we are looping over the handler stack. Instead, defer + // the numbering of the catch handlers until we are done popping. + SmallVector<CatchHandler *, 4> PoppedCatches; + for (int I = HandlerStack.size() - 1; I >= FirstMismatch; --I) { + std::unique_ptr<ActionHandler> Handler = HandlerStack.pop_back_val(); + if (isa<CatchHandler>(Handler.get())) + PoppedCatches.push_back(cast<CatchHandler>(Handler.release())); + } + + int TryHigh = NextState - 1; + int LastTryLowIdx = 0; + for (int I = 0, E = PoppedCatches.size(); I != E; ++I) { + CatchHandler *CH = PoppedCatches[I]; + DEBUG(dbgs() << "Popped handler with state " << CH->getEHState() << "\n"); + if (I + 1 == E || CH->getEHState() != PoppedCatches[I + 1]->getEHState()) { + int TryLow = CH->getEHState(); + auto Handlers = + makeArrayRef(&PoppedCatches[LastTryLowIdx], I - LastTryLowIdx + 1); + DEBUG(dbgs() << "createTryBlockMapEntry(" << TryLow << ", " << TryHigh); + for (size_t J = 0; J < Handlers.size(); ++J) { + DEBUG(dbgs() << ", "); + print_name(Handlers[J]->getHandlerBlockOrFunc()); + } + DEBUG(dbgs() << ")\n"); + createTryBlockMapEntry(TryLow, TryHigh, Handlers); + LastTryLowIdx = I + 1; + } + } + + for (CatchHandler *CH : PoppedCatches) { + if (auto *F = dyn_cast<Function>(CH->getHandlerBlockOrFunc())) { + if (FuncInfo.LastInvokeVisited[F]) { + DEBUG(dbgs() << "Assigning base state " << NextState << " to "); + print_name(F); + DEBUG(dbgs() << '\n'); + FuncInfo.HandlerBaseState[F] = NextState; + DEBUG(dbgs() << "createUnwindMapEntry(" << currentEHNumber() + << ", null)\n"); + createUnwindMapEntry(currentEHNumber(), nullptr); + ++NextState; + calculateStateNumbers(*F); + } + else { + DEBUG(dbgs() << "Deferring handling of "); + print_name(F); + DEBUG(dbgs() << " until last invoke visited.\n"); + } + } + delete CH; + } +} + +void WinEHNumbering::calculateStateNumbers(const Function &F) { + auto I = VisitedHandlers.insert(&F); + if (!I.second) + return; // We've already visited this handler, don't renumber it. + + int OldBaseState = CurrentBaseState; + if (FuncInfo.HandlerBaseState.count(&F)) { + CurrentBaseState = FuncInfo.HandlerBaseState[&F]; + } + + size_t SavedHandlerStackSize = HandlerStack.size(); + + DEBUG(dbgs() << "Calculating state numbers for: " << F.getName() << '\n'); + SmallVector<std::unique_ptr<ActionHandler>, 4> ActionList; + for (const BasicBlock &BB : F) { + for (const Instruction &I : BB) { + const auto *CI = dyn_cast<CallInst>(&I); + if (!CI || CI->doesNotThrow()) + continue; + processCallSite(None, CI); + } + const auto *II = dyn_cast<InvokeInst>(BB.getTerminator()); + if (!II) + continue; + const LandingPadInst *LPI = II->getLandingPadInst(); + auto *ActionsCall = dyn_cast<IntrinsicInst>(LPI->getNextNode()); + if (!ActionsCall) + continue; + assert(ActionsCall->getIntrinsicID() == Intrinsic::eh_actions); + parseEHActions(ActionsCall, ActionList); + if (ActionList.empty()) + continue; + processCallSite(ActionList, II); + ActionList.clear(); + FuncInfo.LandingPadStateMap[LPI] = currentEHNumber(); + DEBUG(dbgs() << "Assigning state " << currentEHNumber() + << " to landing pad at " << LPI->getParent()->getName() + << '\n'); + } + + // Pop any actions that were pushed on the stack for this function. + popUnmatchedActions(SavedHandlerStackSize); + + DEBUG(dbgs() << "Assigning max state " << NextState - 1 + << " to " << F.getName() << '\n'); + FuncInfo.CatchHandlerMaxState[&F] = NextState - 1; + + CurrentBaseState = OldBaseState; +} + +// This function follows the same basic traversal as calculateStateNumbers +// but it is necessary to identify the root landing pad associated +// with each action before we start assigning state numbers. +void WinEHNumbering::findActionRootLPads(const Function &F) { + auto I = VisitedHandlers.insert(&F); + if (!I.second) + return; // We've already visited this handler, don't revisit it. + + SmallVector<std::unique_ptr<ActionHandler>, 4> ActionList; + for (const BasicBlock &BB : F) { + const auto *II = dyn_cast<InvokeInst>(BB.getTerminator()); + if (!II) + continue; + const LandingPadInst *LPI = II->getLandingPadInst(); + auto *ActionsCall = dyn_cast<IntrinsicInst>(LPI->getNextNode()); + if (!ActionsCall) + continue; + + assert(ActionsCall->getIntrinsicID() == Intrinsic::eh_actions); + parseEHActions(ActionsCall, ActionList); + if (ActionList.empty()) + continue; + for (int I = 0, E = ActionList.size(); I < E; ++I) { + if (auto *Handler + = dyn_cast<Function>(ActionList[I]->getHandlerBlockOrFunc())) { + FuncInfo.LastInvoke[Handler] = II; + // Don't replace the root landing pad if we previously saw this + // handler in a different function. + if (FuncInfo.RootLPad.count(Handler) && + FuncInfo.RootLPad[Handler]->getParent()->getParent() != &F) + continue; + DEBUG(dbgs() << "Setting root lpad for "); + print_name(Handler); + DEBUG(dbgs() << " to " << LPI->getParent()->getName() << '\n'); + FuncInfo.RootLPad[Handler] = LPI; + } + } + // Walk the actions again and look for nested handlers. This has to + // happen after all of the actions have been processed in the current + // function. + for (int I = 0, E = ActionList.size(); I < E; ++I) + if (auto *Handler + = dyn_cast<Function>(ActionList[I]->getHandlerBlockOrFunc())) + findActionRootLPads(*Handler); + ActionList.clear(); + } } /// clear - Clear out all the function-specific state. This returns this @@ -274,6 +706,7 @@ void FunctionLoweringInfo::clear() { ByValArgFrameIndexMap.clear(); RegFixups.clear(); StatepointStackSlots.clear(); + StatepointRelocatedValues.clear(); PreferredExtendType.clear(); } @@ -460,8 +893,7 @@ void llvm::ComputeUsesVAFloatArgument(const CallInst &I, if (FT->isVarArg() && !MMI->usesVAFloatArgument()) { for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { Type* T = I.getArgOperand(i)->getType(); - for (po_iterator<Type*> i = po_begin(T), e = po_end(T); - i != e; ++i) { + for (auto i : post_order(T)) { if (i->isFloatingPointTy()) { MMI->setUsesVAFloatArgument(true); return; @@ -471,60 +903,6 @@ void llvm::ComputeUsesVAFloatArgument(const CallInst &I, } } -/// AddCatchInfo - Extract the personality and type infos from an eh.selector -/// call, and add them to the specified machine basic block. -void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI, - MachineBasicBlock *MBB) { - // Inform the MachineModuleInfo of the personality for this landing pad. - const ConstantExpr *CE = cast<ConstantExpr>(I.getArgOperand(1)); - assert(CE->getOpcode() == Instruction::BitCast && - isa<Function>(CE->getOperand(0)) && - "Personality should be a function"); - MMI->addPersonality(MBB, cast<Function>(CE->getOperand(0))); - - // Gather all the type infos for this landing pad and pass them along to - // MachineModuleInfo. - std::vector<const GlobalValue *> TyInfo; - unsigned N = I.getNumArgOperands(); - - for (unsigned i = N - 1; i > 1; --i) { - if (const ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(i))) { - unsigned FilterLength = CI->getZExtValue(); - unsigned FirstCatch = i + FilterLength + !FilterLength; - assert(FirstCatch <= N && "Invalid filter length"); - - if (FirstCatch < N) { - TyInfo.reserve(N - FirstCatch); - for (unsigned j = FirstCatch; j < N; ++j) - TyInfo.push_back(ExtractTypeInfo(I.getArgOperand(j))); - MMI->addCatchTypeInfo(MBB, TyInfo); - TyInfo.clear(); - } - - if (!FilterLength) { - // Cleanup. - MMI->addCleanup(MBB); - } else { - // Filter. - TyInfo.reserve(FilterLength - 1); - for (unsigned j = i + 1; j < FirstCatch; ++j) - TyInfo.push_back(ExtractTypeInfo(I.getArgOperand(j))); - MMI->addFilterTypeInfo(MBB, TyInfo); - TyInfo.clear(); - } - - N = i; - } - } - - if (N > 2) { - TyInfo.reserve(N - 2); - for (unsigned j = 2; j < N; ++j) - TyInfo.push_back(ExtractTypeInfo(I.getArgOperand(j))); - MMI->addCatchTypeInfo(MBB, TyInfo); - } -} - /// AddLandingPadInfo - Extract the exception handling information from the /// landingpad instruction and add them to the specified machine module info. void llvm::AddLandingPadInfo(const LandingPadInst &I, MachineModuleInfo &MMI, diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index a65f33e..7abc0c4 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -406,10 +406,10 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB, Type *Type = CP->getType(); // MachineConstantPool wants an explicit alignment. if (Align == 0) { - Align = MF->getSubtarget().getDataLayout()->getPrefTypeAlignment(Type); + Align = MF->getTarget().getDataLayout()->getPrefTypeAlignment(Type); if (Align == 0) { // Alignment of vector types. FIXME! - Align = MF->getSubtarget().getDataLayout()->getTypeAllocSize(Type); + Align = MF->getTarget().getDataLayout()->getTypeAllocSize(Type); } } @@ -650,6 +650,8 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD, MDNode *Var = SD->getVariable(); MDNode *Expr = SD->getExpression(); DebugLoc DL = SD->getDebugLoc(); + assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && + "Expected inlined-at fields to agree"); if (SD->getKind() == SDDbgValue::FRAMEIX) { // Stack address; this needs to be lowered in target-dependent fashion. @@ -951,6 +953,9 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, // Remember to operand index of the group flags. SmallVector<unsigned, 8> GroupIdx; + // Remember registers that are part of early-clobber defs. + SmallVector<unsigned, 8> ECRegs; + // Add all of the operand registers to the instruction. for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { unsigned Flags = @@ -979,6 +984,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); MIB.addReg(Reg, RegState::Define | RegState::EarlyClobber | getImplRegState(TargetRegisterInfo::isPhysicalRegister(Reg))); + ECRegs.push_back(Reg); } break; case InlineAsm::Kind_RegUse: // Use of register. @@ -1004,6 +1010,19 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, } } + // GCC inline assembly allows input operands to also be early-clobber + // output operands (so long as the operand is written only after it's + // used), but this does not match the semantics of our early-clobber flag. + // If an early-clobber operand register is also an input operand register, + // then remove the early-clobber flag. + for (unsigned Reg : ECRegs) { + if (MIB->readsRegister(Reg, TRI)) { + MachineOperand *MO = MIB->findRegisterDefOperand(Reg, false, TRI); + assert(MO && "No def operand for clobbered register?"); + MO->setIsEarlyClobber(false); + } + } + // Get the mdnode from the asm if it exists and add it to the instruction. SDValue MDV = Node->getOperand(InlineAsm::Op_MDNode); const MDNode *MD = cast<MDNodeSDNode>(MDV)->getMD(); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index e5473e3..7d98872 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -249,7 +249,7 @@ SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) { ConstantFP *LLVMC = const_cast<ConstantFP*>(CFP->getConstantFPValue()); if (!UseCP) { assert((VT == MVT::f64 || VT == MVT::f32) && "Invalid type expansion"); - return DAG.getConstant(LLVMC->getValueAPF().bitcastToAPInt(), + return DAG.getConstant(LLVMC->getValueAPF().bitcastToAPInt(), dl, (VT == MVT::f64) ? MVT::i64 : MVT::i32); } @@ -331,7 +331,7 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, SDValue Store = DAG.getTruncStore(Chain, dl, Val, StackPtr, MachinePointerInfo(), StoredVT, false, false, 0); - SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy(AS)); + SDValue Increment = DAG.getConstant(RegBytes, dl, TLI.getPointerTy(AS)); SmallVector<SDValue, 8> Stores; unsigned Offset = 0; @@ -385,7 +385,7 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, int IncrementSize = NumBits / 8; // Divide the stored value in two parts. - SDValue ShiftAmount = DAG.getConstant(NumBits, + SDValue ShiftAmount = DAG.getConstant(NumBits, dl, TLI.getShiftAmountTy(Val.getValueType())); SDValue Lo = Val; SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount); @@ -397,7 +397,7 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, ST->isVolatile(), ST->isNonTemporal(), Alignment); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, TLI.getPointerTy(AS))); + DAG.getConstant(IncrementSize, dl, TLI.getPointerTy(AS))); Alignment = MinAlign(Alignment, IncrementSize); Store2 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Hi:Lo, Ptr, ST->getPointerInfo().getWithOffset(IncrementSize), @@ -448,7 +448,7 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, // Make sure the stack slot is also aligned for the register type. SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT); - SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy()); + SDValue Increment = DAG.getConstant(RegBytes, dl, TLI.getPointerTy()); SmallVector<SDValue, 8> Stores; SDValue StackPtr = StackBase; unsigned Offset = 0; @@ -528,7 +528,7 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, LD->isNonTemporal(), LD->isInvariant(), Alignment, LD->getAAInfo()); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, Ptr.getValueType())); + DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), NewLoadedVT, LD->isVolatile(), @@ -540,7 +540,7 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, LD->isNonTemporal(), LD->isInvariant(), Alignment, LD->getAAInfo()); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, Ptr.getValueType())); + DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), NewLoadedVT, LD->isVolatile(), @@ -549,7 +549,7 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, } // aggregate the two parts - SDValue ShiftAmount = DAG.getConstant(NumBits, + SDValue ShiftAmount = DAG.getConstant(NumBits, dl, TLI.getShiftAmountTy(Hi.getValueType())); SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount); Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo); @@ -596,7 +596,8 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx, Tmp3 = DAG.getNode(CastOpc, dl, PtrVT, Tmp3); // Add the offset to the index. unsigned EltSize = EltVT.getSizeInBits()/8; - Tmp3 = DAG.getNode(ISD::MUL, dl, IdxVT, Tmp3,DAG.getConstant(EltSize, IdxVT)); + Tmp3 = DAG.getNode(ISD::MUL, dl, IdxVT, Tmp3, + DAG.getConstant(EltSize, dl, IdxVT)); SDValue StackPtr2 = DAG.getNode(ISD::ADD, dl, IdxVT, Tmp3, StackPtr); // Store the scalar value. Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2, MachinePointerInfo(), EltVT, @@ -655,7 +656,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { TLI.isTypeLegal(MVT::i32)) { SDValue Con = DAG.getConstant(CFP->getValueAPF(). bitcastToAPInt().zextOrTrunc(32), - MVT::i32); + SDLoc(CFP), MVT::i32); return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(), isVolatile, isNonTemporal, Alignment, AAInfo); } @@ -664,7 +665,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { // If this target supports 64-bit registers, do a single 64-bit store. if (TLI.isTypeLegal(MVT::i64)) { SDValue Con = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). - zextOrTrunc(64), MVT::i64); + zextOrTrunc(64), SDLoc(CFP), MVT::i64); return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(), isVolatile, isNonTemporal, Alignment, AAInfo); } @@ -673,15 +674,15 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { // Otherwise, if the target supports 32-bit registers, use 2 32-bit // stores. If the target supports neither 32- nor 64-bits, this // xform is certainly not worth it. - const APInt &IntVal =CFP->getValueAPF().bitcastToAPInt(); - SDValue Lo = DAG.getConstant(IntVal.trunc(32), MVT::i32); - SDValue Hi = DAG.getConstant(IntVal.lshr(32).trunc(32), MVT::i32); + const APInt &IntVal = CFP->getValueAPF().bitcastToAPInt(); + SDValue Lo = DAG.getConstant(IntVal.trunc(32), dl, MVT::i32); + SDValue Hi = DAG.getConstant(IntVal.lshr(32).trunc(32), dl, MVT::i32); if (TLI.isBigEndian()) std::swap(Lo, Hi); Lo = DAG.getStore(Chain, dl, Lo, Ptr, ST->getPointerInfo(), isVolatile, isNonTemporal, Alignment, AAInfo); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(4, Ptr.getValueType())); + DAG.getConstant(4, dl, Ptr.getValueType())); Hi = DAG.getStore(Chain, dl, Hi, Ptr, ST->getPointerInfo().getWithOffset(4), isVolatile, isNonTemporal, MinAlign(Alignment, 4U), @@ -731,7 +732,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { } case TargetLowering::Custom: { SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); - if (Res.getNode()) + if (Res && Res != SDValue(Node, 0)) ReplaceNode(SDValue(Node, 0), Res); return; } @@ -792,9 +793,10 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { // Store the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, Ptr.getValueType())); + DAG.getConstant(IncrementSize, dl, + Ptr.getValueType())); Hi = DAG.getNode(ISD::SRL, dl, Value.getValueType(), Value, - DAG.getConstant(RoundWidth, + DAG.getConstant(RoundWidth, dl, TLI.getShiftAmountTy(Value.getValueType()))); Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo().getWithOffset(IncrementSize), @@ -805,7 +807,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X // Store the top RoundWidth bits. Hi = DAG.getNode(ISD::SRL, dl, Value.getValueType(), Value, - DAG.getConstant(ExtraWidth, + DAG.getConstant(ExtraWidth, dl, TLI.getShiftAmountTy(Value.getValueType()))); Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(), RoundVT, isVolatile, isNonTemporal, Alignment, @@ -814,7 +816,8 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { // Store the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, Ptr.getValueType())); + DAG.getConstant(IncrementSize, dl, + Ptr.getValueType())); Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, isVolatile, isNonTemporal, @@ -843,7 +846,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { } case TargetLowering::Custom: { SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); - if (Res.getNode()) + if (Res && Res != SDValue(Node, 0)) ReplaceNode(SDValue(Node, 0), Res); return; } @@ -1004,7 +1007,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { // Load the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, Ptr.getValueType())); + DAG.getConstant(IncrementSize, dl, + Ptr.getValueType())); Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, isVolatile, isNonTemporal, isInvariant, @@ -1017,7 +1021,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { // Move the top bits to the right place. Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi, - DAG.getConstant(RoundWidth, + DAG.getConstant(RoundWidth, dl, TLI.getShiftAmountTy(Hi.getValueType()))); // Join the hi and lo parts. @@ -1033,7 +1037,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { // Load the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, Ptr.getValueType())); + DAG.getConstant(IncrementSize, dl, + Ptr.getValueType())); Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), @@ -1047,7 +1052,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { // Move the top bits to the right place. Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi, - DAG.getConstant(ExtraWidth, + DAG.getConstant(ExtraWidth, dl, TLI.getShiftAmountTy(Hi.getValueType()))); // Join the hi and lo parts. @@ -1240,12 +1245,6 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { break; case ISD::EXTRACT_ELEMENT: case ISD::FLT_ROUNDS_: - case ISD::SADDO: - case ISD::SSUBO: - case ISD::UADDO: - case ISD::USUBO: - case ISD::SMULO: - case ISD::UMULO: case ISD::FPOWI: case ISD::MERGE_VALUES: case ISD::EH_RETURN: @@ -1437,18 +1436,32 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { unsigned EltSize = Vec.getValueType().getVectorElementType().getSizeInBits()/8; Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx, - DAG.getConstant(EltSize, Idx.getValueType())); + DAG.getConstant(EltSize, SDLoc(Vec), Idx.getValueType())); Idx = DAG.getZExtOrTrunc(Idx, dl, TLI.getPointerTy()); StackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr); + SDValue NewLoad; + if (Op.getValueType().isVector()) - return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr,MachinePointerInfo(), - false, false, false, 0); - return DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr, - MachinePointerInfo(), - Vec.getValueType().getVectorElementType(), - false, false, false, 0); + NewLoad = DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, + MachinePointerInfo(), false, false, false, 0); + else + NewLoad = DAG.getExtLoad( + ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr, MachinePointerInfo(), + Vec.getValueType().getVectorElementType(), false, false, false, 0); + + // Replace the chain going out of the store, by the one out of the load. + DAG.ReplaceAllUsesOfValueWith(Ch, SDValue(NewLoad.getNode(), 1)); + + // We introduced a cycle though, so update the loads operands, making sure + // to use the original store's chain as an incoming chain. + SmallVector<SDValue, 6> NewLoadOperands(NewLoad->op_begin(), + NewLoad->op_end()); + NewLoadOperands[0] = Ch; + NewLoad = + SDValue(DAG.UpdateNodeOperands(NewLoad.getNode(), NewLoadOperands), 0); + return NewLoad; } SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) { @@ -1476,7 +1489,7 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) { Vec.getValueType().getVectorElementType().getSizeInBits()/8; Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx, - DAG.getConstant(EltSize, Idx.getValueType())); + DAG.getConstant(EltSize, SDLoc(Vec), Idx.getValueType())); Idx = DAG.getZExtOrTrunc(Idx, dl, TLI.getPointerTy()); SDValue SubStackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, @@ -1513,7 +1526,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { unsigned Offset = TypeByteSize*i; - SDValue Idx = DAG.getConstant(Offset, FIPtr.getValueType()); + SDValue Idx = DAG.getConstant(Offset, dl, FIPtr.getValueType()); Idx = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, Idx); // If the destination vector element type is narrower than the source @@ -1575,7 +1588,8 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { unsigned Strides = (FloatVT.getSizeInBits()-1)/LoadTy.getSizeInBits(); unsigned ByteOffset = (Strides * LoadTy.getSizeInBits()) / 8; LoadPtr = DAG.getNode(ISD::ADD, dl, LoadPtr.getValueType(), LoadPtr, - DAG.getConstant(ByteOffset, LoadPtr.getValueType())); + DAG.getConstant(ByteOffset, dl, + LoadPtr.getValueType())); // Load a legal integer containing the sign bit. SignBit = DAG.getLoad(LoadTy, dl, Ch, LoadPtr, MachinePointerInfo(), false, false, false, 0); @@ -1585,13 +1599,14 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { assert(BitShift < LoadTy.getSizeInBits() && "Pointer advanced wrong?"); if (BitShift) SignBit = DAG.getNode(ISD::SHL, dl, LoadTy, SignBit, - DAG.getConstant(BitShift, + DAG.getConstant(BitShift, dl, TLI.getShiftAmountTy(SignBit.getValueType()))); } } // Now get the sign bit proper, by seeing whether the value is negative. SignBit = DAG.getSetCC(dl, getSetCCResultType(SignBit.getValueType()), - SignBit, DAG.getConstant(0, SignBit.getValueType()), + SignBit, + DAG.getConstant(0, dl, SignBit.getValueType()), ISD::SETLT); // Get the absolute value of the result. SDValue AbsVal = DAG.getNode(ISD::FABS, dl, Tmp1.getValueType(), Tmp1); @@ -1616,8 +1631,7 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, // Chain the dynamic stack allocation so that it doesn't modify the stack // pointer when other instructions are using the stack. - Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true), - SDLoc(Node)); + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, dl, true), dl); SDValue Size = Tmp2.getOperand(1); SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT); @@ -1628,12 +1642,11 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, Tmp1 = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value if (Align > StackAlign) Tmp1 = DAG.getNode(ISD::AND, dl, VT, Tmp1, - DAG.getConstant(-(uint64_t)Align, VT)); + DAG.getConstant(-(uint64_t)Align, dl, VT)); Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1); // Output chain - Tmp2 = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, true), - DAG.getIntPtrConstant(0, true), SDValue(), - SDLoc(Node)); + Tmp2 = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true), + DAG.getIntPtrConstant(0, dl, true), SDValue(), dl); Results.push_back(Tmp1); Results.push_back(Tmp2); @@ -2404,7 +2417,8 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue StackSlot = DAG.CreateStackTemporary(MVT::f64); // word offset constant for Hi/Lo address computation - SDValue WordOff = DAG.getConstant(sizeof(int), StackSlot.getValueType()); + SDValue WordOff = DAG.getConstant(sizeof(int), dl, + StackSlot.getValueType()); // set up Hi and Lo (into buffer) address based on endian SDValue Hi = StackSlot; SDValue Lo = DAG.getNode(ISD::ADD, dl, StackSlot.getValueType(), @@ -2416,7 +2430,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0Mapped; if (isSigned) { // constant used to invert sign bit (signed to unsigned mapping) - SDValue SignBit = DAG.getConstant(0x80000000u, MVT::i32); + SDValue SignBit = DAG.getConstant(0x80000000u, dl, MVT::i32); Op0Mapped = DAG.getNode(ISD::XOR, dl, MVT::i32, Op0, SignBit); } else { Op0Mapped = Op0; @@ -2426,7 +2440,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, Op0Mapped, Lo, MachinePointerInfo(), false, false, 0); // initial hi portion of constructed double - SDValue InitialHi = DAG.getConstant(0x43300000u, MVT::i32); + SDValue InitialHi = DAG.getConstant(0x43300000u, dl, MVT::i32); // store the hi of the constructed double - biased exponent SDValue Store2 = DAG.getStore(Store1, dl, InitialHi, Hi, MachinePointerInfo(), @@ -2438,7 +2452,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Bias = DAG.getConstantFP(isSigned ? BitsToDouble(0x4330000080000000ULL) : BitsToDouble(0x4330000000000000ULL), - MVT::f64); + dl, MVT::f64); // subtract the bias SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Load, Bias); // final result @@ -2449,7 +2463,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, Result = Sub; } else if (DestVT.bitsLT(MVT::f64)) { Result = DAG.getNode(ISD::FP_ROUND, dl, DestVT, Sub, - DAG.getIntPtrConstant(0)); + DAG.getIntPtrConstant(0, dl)); } else if (DestVT.bitsGT(MVT::f64)) { Result = DAG.getNode(ISD::FP_EXTEND, dl, DestVT, Sub); } @@ -2465,15 +2479,16 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, // TODO: Generalize this for use with other types. if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f64) { SDValue TwoP52 = - DAG.getConstant(UINT64_C(0x4330000000000000), MVT::i64); + DAG.getConstant(UINT64_C(0x4330000000000000), dl, MVT::i64); SDValue TwoP84PlusTwoP52 = - DAG.getConstantFP(BitsToDouble(UINT64_C(0x4530000000100000)), MVT::f64); + DAG.getConstantFP(BitsToDouble(UINT64_C(0x4530000000100000)), dl, + MVT::f64); SDValue TwoP84 = - DAG.getConstant(UINT64_C(0x4530000000000000), MVT::i64); + DAG.getConstant(UINT64_C(0x4530000000000000), dl, MVT::i64); SDValue Lo = DAG.getZeroExtendInReg(Op0, dl, MVT::i32); SDValue Hi = DAG.getNode(ISD::SRL, dl, MVT::i64, Op0, - DAG.getConstant(32, MVT::i64)); + DAG.getConstant(32, dl, MVT::i64)); SDValue LoOr = DAG.getNode(ISD::OR, dl, MVT::i64, Lo, TwoP52); SDValue HiOr = DAG.getNode(ISD::OR, dl, MVT::i64, Hi, TwoP84); SDValue LoFlt = DAG.getNode(ISD::BITCAST, dl, MVT::f64, LoOr); @@ -2492,9 +2507,9 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Fast = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Op0); SDValue ShiftConst = - DAG.getConstant(1, TLI.getShiftAmountTy(Op0.getValueType())); + DAG.getConstant(1, dl, TLI.getShiftAmountTy(Op0.getValueType())); SDValue Shr = DAG.getNode(ISD::SRL, dl, MVT::i64, Op0, ShiftConst); - SDValue AndConst = DAG.getConstant(1, MVT::i64); + SDValue AndConst = DAG.getConstant(1, dl, MVT::i64); SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, AndConst); SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, And, Shr); @@ -2506,47 +2521,52 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, // thing most of the time. This would be a good candidate for a //pseudo-op, or, even better, for whole-function isel. SDValue SignBitTest = DAG.getSetCC(dl, getSetCCResultType(MVT::i64), - Op0, DAG.getConstant(0, MVT::i64), ISD::SETLT); + Op0, DAG.getConstant(0, dl, MVT::i64), ISD::SETLT); return DAG.getSelect(dl, MVT::f32, SignBitTest, Slow, Fast); } // Otherwise, implement the fully general conversion. SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, - DAG.getConstant(UINT64_C(0xfffffffffffff800), MVT::i64)); + DAG.getConstant(UINT64_C(0xfffffffffffff800), dl, MVT::i64)); SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, And, - DAG.getConstant(UINT64_C(0x800), MVT::i64)); + DAG.getConstant(UINT64_C(0x800), dl, MVT::i64)); SDValue And2 = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, - DAG.getConstant(UINT64_C(0x7ff), MVT::i64)); - SDValue Ne = DAG.getSetCC(dl, getSetCCResultType(MVT::i64), - And2, DAG.getConstant(UINT64_C(0), MVT::i64), ISD::SETNE); + DAG.getConstant(UINT64_C(0x7ff), dl, MVT::i64)); + SDValue Ne = DAG.getSetCC(dl, getSetCCResultType(MVT::i64), And2, + DAG.getConstant(UINT64_C(0), dl, MVT::i64), + ISD::SETNE); SDValue Sel = DAG.getSelect(dl, MVT::i64, Ne, Or, Op0); - SDValue Ge = DAG.getSetCC(dl, getSetCCResultType(MVT::i64), - Op0, DAG.getConstant(UINT64_C(0x0020000000000000), MVT::i64), - ISD::SETUGE); + SDValue Ge = DAG.getSetCC(dl, getSetCCResultType(MVT::i64), Op0, + DAG.getConstant(UINT64_C(0x0020000000000000), dl, + MVT::i64), + ISD::SETUGE); SDValue Sel2 = DAG.getSelect(dl, MVT::i64, Ge, Sel, Op0); EVT SHVT = TLI.getShiftAmountTy(Sel2.getValueType()); SDValue Sh = DAG.getNode(ISD::SRL, dl, MVT::i64, Sel2, - DAG.getConstant(32, SHVT)); + DAG.getConstant(32, dl, SHVT)); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Sh); SDValue Fcvt = DAG.getNode(ISD::UINT_TO_FP, dl, MVT::f64, Trunc); SDValue TwoP32 = - DAG.getConstantFP(BitsToDouble(UINT64_C(0x41f0000000000000)), MVT::f64); + DAG.getConstantFP(BitsToDouble(UINT64_C(0x41f0000000000000)), dl, + MVT::f64); SDValue Fmul = DAG.getNode(ISD::FMUL, dl, MVT::f64, TwoP32, Fcvt); SDValue Lo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Sel2); SDValue Fcvt2 = DAG.getNode(ISD::UINT_TO_FP, dl, MVT::f64, Lo); SDValue Fadd = DAG.getNode(ISD::FADD, dl, MVT::f64, Fmul, Fcvt2); return DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Fadd, - DAG.getIntPtrConstant(0)); + DAG.getIntPtrConstant(0, dl)); } SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0); SDValue SignSet = DAG.getSetCC(dl, getSetCCResultType(Op0.getValueType()), - Op0, DAG.getConstant(0, Op0.getValueType()), + Op0, + DAG.getConstant(0, dl, Op0.getValueType()), ISD::SETLT); - SDValue Zero = DAG.getIntPtrConstant(0), Four = DAG.getIntPtrConstant(4); + SDValue Zero = DAG.getIntPtrConstant(0, dl), + Four = DAG.getIntPtrConstant(4, dl); SDValue CstOffset = DAG.getSelect(dl, Zero.getValueType(), SignSet, Four, Zero); @@ -2681,34 +2701,41 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, SDLoc dl) { switch (VT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Unhandled Expand type in BSWAP!"); case MVT::i16: - Tmp2 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT)); - Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT)); + Tmp2 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); + Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); return DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); case MVT::i32: - Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, SHVT)); - Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT)); - Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT)); - Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, SHVT)); - Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3, DAG.getConstant(0xFF0000, VT)); - Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, VT)); + Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT)); + Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); + Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); + Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT)); + Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3, + DAG.getConstant(0xFF0000, dl, VT)); + Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT)); Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3); Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1); return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2); case MVT::i64: - Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, SHVT)); - Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(40, SHVT)); - Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, SHVT)); - Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT)); - Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT)); - Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, SHVT)); - Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, SHVT)); - Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, SHVT)); - Tmp7 = DAG.getNode(ISD::AND, dl, VT, Tmp7, DAG.getConstant(255ULL<<48, VT)); - Tmp6 = DAG.getNode(ISD::AND, dl, VT, Tmp6, DAG.getConstant(255ULL<<40, VT)); - Tmp5 = DAG.getNode(ISD::AND, dl, VT, Tmp5, DAG.getConstant(255ULL<<32, VT)); - Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4, DAG.getConstant(255ULL<<24, VT)); - Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3, DAG.getConstant(255ULL<<16, VT)); - Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(255ULL<<8 , VT)); + Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT)); + Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(40, dl, SHVT)); + Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT)); + Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); + Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); + Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT)); + Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT)); + Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT)); + Tmp7 = DAG.getNode(ISD::AND, dl, VT, Tmp7, + DAG.getConstant(255ULL<<48, dl, VT)); + Tmp6 = DAG.getNode(ISD::AND, dl, VT, Tmp6, + DAG.getConstant(255ULL<<40, dl, VT)); + Tmp5 = DAG.getNode(ISD::AND, dl, VT, Tmp5, + DAG.getConstant(255ULL<<32, dl, VT)); + Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4, + DAG.getConstant(255ULL<<24, dl, VT)); + Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3, + DAG.getConstant(255ULL<<16, dl, VT)); + Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, + DAG.getConstant(255ULL<<8 , dl, VT)); Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7); Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5); Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3); @@ -2735,34 +2762,38 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, // This is the "best" algorithm from // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel - SDValue Mask55 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), VT); - SDValue Mask33 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), VT); - SDValue Mask0F = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), VT); - SDValue Mask01 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), VT); + SDValue Mask55 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), + dl, VT); + SDValue Mask33 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), + dl, VT); + SDValue Mask0F = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), + dl, VT); + SDValue Mask01 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), + dl, VT); // v = v - ((v >> 1) & 0x55555555...) Op = DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getNode(ISD::AND, dl, VT, DAG.getNode(ISD::SRL, dl, VT, Op, - DAG.getConstant(1, ShVT)), + DAG.getConstant(1, dl, ShVT)), Mask55)); // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...) Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT, Op, Mask33), DAG.getNode(ISD::AND, dl, VT, DAG.getNode(ISD::SRL, dl, VT, Op, - DAG.getConstant(2, ShVT)), + DAG.getConstant(2, dl, ShVT)), Mask33)); // v = (v + (v >> 4)) & 0x0F0F0F0F... Op = DAG.getNode(ISD::AND, dl, VT, DAG.getNode(ISD::ADD, dl, VT, Op, DAG.getNode(ISD::SRL, dl, VT, Op, - DAG.getConstant(4, ShVT))), + DAG.getConstant(4, dl, ShVT))), Mask0F); // v = (v * 0x01010101...) >> (Len - 8) Op = DAG.getNode(ISD::SRL, dl, VT, DAG.getNode(ISD::MUL, dl, VT, Op, Mask01), - DAG.getConstant(Len - 8, ShVT)); + DAG.getConstant(Len - 8, dl, ShVT)); return Op; } @@ -2783,7 +2814,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, EVT ShVT = TLI.getShiftAmountTy(VT); unsigned len = VT.getSizeInBits(); for (unsigned i = 0; (1U << i) <= (len / 2); ++i) { - SDValue Tmp3 = DAG.getConstant(1ULL << i, ShVT); + SDValue Tmp3 = DAG.getConstant(1ULL << i, dl, ShVT); Op = DAG.getNode(ISD::OR, dl, VT, Op, DAG.getNode(ISD::SRL, dl, VT, Op, Tmp3)); } @@ -2802,12 +2833,12 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, SDValue Tmp3 = DAG.getNode(ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT), DAG.getNode(ISD::SUB, dl, VT, Op, - DAG.getConstant(1, VT))); + DAG.getConstant(1, dl, VT))); // If ISD::CTLZ is legal and CTPOP isn't, then do that instead. if (!TLI.isOperationLegalOrCustom(ISD::CTPOP, VT) && TLI.isOperationLegalOrCustom(ISD::CTLZ, VT)) return DAG.getNode(ISD::SUB, dl, VT, - DAG.getConstant(VT.getSizeInBits(), VT), + DAG.getConstant(VT.getSizeInBits(), dl, VT), DAG.getNode(ISD::CTLZ, dl, VT, Tmp3)); return DAG.getNode(ISD::CTPOP, dl, VT, Tmp3); } @@ -2817,132 +2848,8 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { unsigned Opc = Node->getOpcode(); MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT(); - RTLIB::Libcall LC; - - switch (Opc) { - default: - llvm_unreachable("Unhandled atomic intrinsic Expand!"); - case ISD::ATOMIC_SWAP: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_1; break; - case MVT::i16: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_2; break; - case MVT::i32: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_4; break; - case MVT::i64: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_8; break; - case MVT::i128:LC = RTLIB::SYNC_LOCK_TEST_AND_SET_16;break; - } - break; - case ISD::ATOMIC_CMP_SWAP: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1; break; - case MVT::i16: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2; break; - case MVT::i32: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4; break; - case MVT::i64: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8; break; - case MVT::i128:LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_16;break; - } - break; - case ISD::ATOMIC_LOAD_ADD: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_ADD_1; break; - case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_ADD_2; break; - case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_ADD_4; break; - case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_ADD_8; break; - case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_ADD_16;break; - } - break; - case ISD::ATOMIC_LOAD_SUB: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_SUB_1; break; - case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_SUB_2; break; - case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_SUB_4; break; - case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_SUB_8; break; - case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_SUB_16;break; - } - break; - case ISD::ATOMIC_LOAD_AND: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_AND_1; break; - case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_AND_2; break; - case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_AND_4; break; - case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_AND_8; break; - case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_AND_16;break; - } - break; - case ISD::ATOMIC_LOAD_OR: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_OR_1; break; - case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_OR_2; break; - case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_OR_4; break; - case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_OR_8; break; - case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_OR_16;break; - } - break; - case ISD::ATOMIC_LOAD_XOR: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_XOR_1; break; - case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_XOR_2; break; - case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_XOR_4; break; - case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_XOR_8; break; - case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_XOR_16;break; - } - break; - case ISD::ATOMIC_LOAD_NAND: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_NAND_1; break; - case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_NAND_2; break; - case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_NAND_4; break; - case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_NAND_8; break; - case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_NAND_16;break; - } - break; - case ISD::ATOMIC_LOAD_MAX: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_MAX_1; break; - case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_MAX_2; break; - case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_MAX_4; break; - case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_MAX_8; break; - case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_MAX_16;break; - } - break; - case ISD::ATOMIC_LOAD_UMAX: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_UMAX_1; break; - case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_UMAX_2; break; - case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_UMAX_4; break; - case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_UMAX_8; break; - case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_UMAX_16;break; - } - break; - case ISD::ATOMIC_LOAD_MIN: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_MIN_1; break; - case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_MIN_2; break; - case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_MIN_4; break; - case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_MIN_8; break; - case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_MIN_16;break; - } - break; - case ISD::ATOMIC_LOAD_UMIN: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_UMIN_1; break; - case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_UMIN_2; break; - case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_UMIN_4; break; - case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_UMIN_8; break; - case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_UMIN_16;break; - } - break; - } + RTLIB::Libcall LC = RTLIB::getATOMIC(Opc, VT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!"); return ExpandChainLibCall(LC, Node, false); } @@ -2967,10 +2874,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { case ISD::FRAMEADDR: case ISD::RETURNADDR: case ISD::FRAME_TO_ARGS_OFFSET: - Results.push_back(DAG.getConstant(0, Node->getValueType(0))); + Results.push_back(DAG.getConstant(0, dl, Node->getValueType(0))); break; case ISD::FLT_ROUNDS_: - Results.push_back(DAG.getConstant(1, Node->getValueType(0))); + Results.push_back(DAG.getConstant(1, dl, Node->getValueType(0))); break; case ISD::EH_RETURN: case ISD::EH_LABEL: @@ -2984,7 +2891,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { case ISD::EH_SJLJ_SETJMP: // If the target didn't expand this, just return 'zero' and preserve the // chain. - Results.push_back(DAG.getConstant(0, MVT::i32)); + Results.push_back(DAG.getConstant(0, dl, MVT::i32)); Results.push_back(Node->getOperand(0)); break; case ISD::ATOMIC_FENCE: { @@ -3005,7 +2912,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { } case ISD::ATOMIC_LOAD: { // There is no libcall for atomic load; fake it with ATOMIC_CMP_SWAP. - SDValue Zero = DAG.getConstant(0, Node->getValueType(0)); + SDValue Zero = DAG.getConstant(0, dl, Node->getValueType(0)); SDVTList VTs = DAG.getVTList(Node->getValueType(0), MVT::Other); SDValue Swap = DAG.getAtomicCmpSwap( ISD::ATOMIC_CMP_SWAP, dl, cast<AtomicSDNode>(Node)->getMemoryVT(), VTs, @@ -3081,10 +2988,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { case ISD::UNDEF: { EVT VT = Node->getValueType(0); if (VT.isInteger()) - Results.push_back(DAG.getConstant(0, VT)); + Results.push_back(DAG.getConstant(0, dl, VT)); else { assert(VT.isFloatingPoint() && "Unknown value type!"); - Results.push_back(DAG.getConstantFP(0, VT)); + Results.push_back(DAG.getConstantFP(0, dl, VT)); } break; } @@ -3123,7 +3030,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { ShiftAmountTy = VT; unsigned BitsDiff = VT.getScalarType().getSizeInBits() - ExtraVT.getScalarType().getSizeInBits(); - SDValue ShiftCst = DAG.getConstant(BitsDiff, ShiftAmountTy); + SDValue ShiftCst = DAG.getConstant(BitsDiff, dl, ShiftAmountTy); Tmp1 = DAG.getNode(ISD::SHL, dl, Node->getValueType(0), Node->getOperand(0), ShiftCst); Tmp1 = DAG.getNode(ISD::SRA, dl, Node->getValueType(0), Tmp1, ShiftCst); @@ -3161,7 +3068,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { APInt::getNullValue(VT.getSizeInBits())); APInt x = APInt::getSignBit(NVT.getSizeInBits()); (void)apf.convertFromAPInt(x, false, APFloat::rmNearestTiesToEven); - Tmp1 = DAG.getConstantFP(apf, VT); + Tmp1 = DAG.getConstantFP(apf, dl, VT); Tmp2 = DAG.getSetCC(dl, getSetCCResultType(VT), Node->getOperand(0), Tmp1, ISD::SETLT); @@ -3170,7 +3077,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { DAG.getNode(ISD::FSUB, dl, VT, Node->getOperand(0), Tmp1)); False = DAG.getNode(ISD::XOR, dl, NVT, False, - DAG.getConstant(x, NVT)); + DAG.getConstant(x, dl, NVT)); Tmp1 = DAG.getSelect(dl, NVT, Tmp2, True, False); Results.push_back(Tmp1); break; @@ -3191,11 +3098,11 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2"); VAList = DAG.getNode(ISD::ADD, dl, VAList.getValueType(), VAList, - DAG.getConstant(Align - 1, + DAG.getConstant(Align - 1, dl, VAList.getValueType())); VAList = DAG.getNode(ISD::AND, dl, VAList.getValueType(), VAList, - DAG.getConstant(-(int64_t)Align, + DAG.getConstant(-(int64_t)Align, dl, VAList.getValueType())); } @@ -3203,6 +3110,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Tmp3 = DAG.getNode(ISD::ADD, dl, VAList.getValueType(), VAList, DAG.getConstant(TLI.getDataLayout()-> getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())), + dl, VAList.getValueType())); // Store the incremented VAList to the legalized pointer Tmp3 = DAG.getStore(VAListLoad.getValue(1), dl, Tmp3, Tmp2, @@ -3317,11 +3225,11 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { if (Idx < NumElems) Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0, - DAG.getConstant(Idx, TLI.getVectorIdxTy()))); + DAG.getConstant(Idx, dl, TLI.getVectorIdxTy()))); else Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op1, - DAG.getConstant(Idx - NumElems, + DAG.getConstant(Idx - NumElems, dl, TLI.getVectorIdxTy()))); } @@ -3336,7 +3244,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { if (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue()) { // 1 -> Hi Tmp1 = DAG.getNode(ISD::SRL, dl, OpTy, Node->getOperand(0), - DAG.getConstant(OpTy.getSizeInBits()/2, + DAG.getConstant(OpTy.getSizeInBits()/2, dl, TLI.getShiftAmountTy(Node->getOperand(0).getValueType()))); Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, Node->getValueType(0), Tmp1); } else { @@ -3374,7 +3282,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { break; case ISD::FNEG: // Expand Y = FNEG(X) -> Y = SUB -0.0, X - Tmp1 = DAG.getConstantFP(-0.0, Node->getValueType(0)); + Tmp1 = DAG.getConstantFP(-0.0, dl, Node->getValueType(0)); Tmp1 = DAG.getNode(ISD::FSUB, dl, Node->getValueType(0), Tmp1, Node->getOperand(0)); Results.push_back(Tmp1); @@ -3383,7 +3291,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { // Expand Y = FABS(X) -> Y = (X >u 0.0) ? X : fneg(X). EVT VT = Node->getValueType(0); Tmp1 = Node->getOperand(0); - Tmp2 = DAG.getConstantFP(0.0, VT); + Tmp2 = DAG.getConstantFP(0.0, dl, VT); Tmp2 = DAG.getSetCC(dl, getSetCCResultType(Tmp1.getValueType()), Tmp1, Tmp2, ISD::SETUGT); Tmp3 = DAG.getNode(ISD::FNEG, dl, VT, Tmp1); @@ -3391,6 +3299,26 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(Tmp1); break; } + case ISD::SMIN: + case ISD::SMAX: + case ISD::UMIN: + case ISD::UMAX: { + // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B + ISD::CondCode Pred; + switch (Node->getOpcode()) { + default: llvm_unreachable("How did we get here?"); + case ISD::SMAX: Pred = ISD::SETGT; break; + case ISD::SMIN: Pred = ISD::SETLT; break; + case ISD::UMAX: Pred = ISD::SETUGT; break; + case ISD::UMIN: Pred = ISD::SETULT; break; + } + Tmp1 = Node->getOperand(0); + Tmp2 = Node->getOperand(1); + Tmp1 = DAG.getSelectCC(dl, Tmp1, Tmp2, Tmp1, Tmp2, Pred); + Results.push_back(Tmp1); + break; + } + case ISD::FMINNUM: Results.push_back(ExpandFPLibCall(Node, RTLIB::FMIN_F32, RTLIB::FMIN_F64, RTLIB::FMIN_F80, RTLIB::FMIN_F128, @@ -3519,6 +3447,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { RTLIB::FMA_F80, RTLIB::FMA_F128, RTLIB::FMA_PPCF128)); break; + case ISD::FMAD: + llvm_unreachable("Illegal fmad should never be formed"); + case ISD::FADD: Results.push_back(ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64, RTLIB::ADD_F80, RTLIB::ADD_F128, @@ -3545,6 +3476,21 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { break; } case ISD::FP_TO_FP16: { + if (!TLI.useSoftFloat() && TM.Options.UnsafeFPMath) { + SDValue Op = Node->getOperand(0); + MVT SVT = Op.getSimpleValueType(); + if ((SVT == MVT::f64 || SVT == MVT::f80) && + TLI.isOperationLegalOrCustom(ISD::FP_TO_FP16, MVT::f32)) { + // Under fastmath, we can expand this node into a fround followed by + // a float-half conversion. + SDValue FloatVal = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Op, + DAG.getIntPtrConstant(0, dl)); + Results.push_back( + DAG.getNode(ISD::FP_TO_FP16, dl, MVT::i16, FloatVal)); + break; + } + } + RTLIB::Libcall LC = RTLIB::getFPROUND(Node->getOperand(0).getValueType(), MVT::f16); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unable to expand fp_to_fp16"); @@ -3579,8 +3525,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { TLI.isOperationLegalOrCustom(ISD::XOR, VT) && "Don't know how to expand this subtraction!"); Tmp1 = DAG.getNode(ISD::XOR, dl, VT, Node->getOperand(1), - DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT)); - Tmp1 = DAG.getNode(ISD::ADD, dl, VT, Tmp1, DAG.getConstant(1, VT)); + DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), dl, + VT)); + Tmp1 = DAG.getNode(ISD::ADD, dl, VT, Tmp1, DAG.getConstant(1, dl, VT)); Results.push_back(DAG.getNode(ISD::ADD, dl, VT, Node->getOperand(0), Tmp1)); break; } @@ -3696,7 +3643,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { TLI.expandMUL(Node, Lo, Hi, HalfType, DAG)) { Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo); Hi = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Hi); - SDValue Shift = DAG.getConstant(HalfType.getSizeInBits(), + SDValue Shift = DAG.getConstant(HalfType.getSizeInBits(), dl, TLI.getShiftAmountTy(HalfType)); Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift); Results.push_back(DAG.getNode(ISD::OR, dl, VT, Lo, Hi)); @@ -3721,7 +3668,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { EVT ResultType = Node->getValueType(1); EVT OType = getSetCCResultType(Node->getValueType(0)); - SDValue Zero = DAG.getConstant(0, LHS.getValueType()); + SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType()); // LHSSign -> LHS >= 0 // RHSSign -> RHS >= 0 @@ -3787,9 +3734,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS); Tmp1 = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS); BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Tmp1, - DAG.getIntPtrConstant(0)); + DAG.getIntPtrConstant(0, dl)); TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Tmp1, - DAG.getIntPtrConstant(1)); + DAG.getIntPtrConstant(1, dl)); } else { // We can fall back to a libcall with an illegal type for the MUL if we // have a libcall big enough. @@ -3810,9 +3757,11 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { // part. unsigned LoSize = VT.getSizeInBits(); SDValue HiLHS = DAG.getNode(ISD::SRA, dl, VT, RHS, - DAG.getConstant(LoSize-1, TLI.getPointerTy())); + DAG.getConstant(LoSize - 1, dl, + TLI.getPointerTy())); SDValue HiRHS = DAG.getNode(ISD::SRA, dl, VT, LHS, - DAG.getConstant(LoSize-1, TLI.getPointerTy())); + DAG.getConstant(LoSize - 1, dl, + TLI.getPointerTy())); // Here we're passing the 2 arguments explicitly as 4 arguments that are // pre-lowered to the correct types. This all depends upon WideVT not @@ -3821,9 +3770,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { SDValue Args[] = { LHS, HiLHS, RHS, HiRHS }; SDValue Ret = ExpandLibCall(LC, WideVT, Args, 4, isSigned, dl); BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret, - DAG.getIntPtrConstant(0)); + DAG.getIntPtrConstant(0, dl)); TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret, - DAG.getIntPtrConstant(1)); + DAG.getIntPtrConstant(1, dl)); // Ret is a node with an illegal type. Because such things are not // generally permitted during this phase of legalization, make sure the // node has no more uses. The above EXTRACT_ELEMENT nodes should have been @@ -3833,14 +3782,14 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { } if (isSigned) { - Tmp1 = DAG.getConstant(VT.getSizeInBits() - 1, + Tmp1 = DAG.getConstant(VT.getSizeInBits() - 1, dl, TLI.getShiftAmountTy(BottomHalf.getValueType())); Tmp1 = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, Tmp1); TopHalf = DAG.getSetCC(dl, getSetCCResultType(VT), TopHalf, Tmp1, ISD::SETNE); } else { TopHalf = DAG.getSetCC(dl, getSetCCResultType(VT), TopHalf, - DAG.getConstant(0, VT), ISD::SETNE); + DAG.getConstant(0, dl, VT), ISD::SETNE); } Results.push_back(BottomHalf); Results.push_back(TopHalf); @@ -3851,7 +3800,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, PairTy, Node->getOperand(0)); Tmp2 = DAG.getNode(ISD::ANY_EXTEND, dl, PairTy, Node->getOperand(1)); Tmp2 = DAG.getNode(ISD::SHL, dl, PairTy, Tmp2, - DAG.getConstant(PairTy.getSizeInBits()/2, + DAG.getConstant(PairTy.getSizeInBits()/2, dl, TLI.getShiftAmountTy(PairTy))); Results.push_back(DAG.getNode(ISD::OR, dl, PairTy, Tmp1, Tmp2)); break; @@ -3866,7 +3815,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { cast<CondCodeSDNode>(Tmp1.getOperand(2))->get()); } else { Tmp1 = DAG.getSelectCC(dl, Tmp1, - DAG.getConstant(0, Tmp1.getValueType()), + DAG.getConstant(0, dl, Tmp1.getValueType()), Tmp2, Tmp3, ISD::SETNE); } Results.push_back(Tmp1); @@ -3882,8 +3831,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { unsigned EntrySize = DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(TD); - Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), - Index, DAG.getConstant(EntrySize, Index.getValueType())); + Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), Index, + DAG.getConstant(EntrySize, dl, Index.getValueType())); SDValue Addr = DAG.getNode(ISD::ADD, dl, Index.getValueType(), Index, Table); @@ -3917,10 +3866,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { // We test only the i1 bit. Skip the AND if UNDEF. Tmp3 = (Tmp2.getOpcode() == ISD::UNDEF) ? Tmp2 : DAG.getNode(ISD::AND, dl, Tmp2.getValueType(), Tmp2, - DAG.getConstant(1, Tmp2.getValueType())); + DAG.getConstant(1, dl, Tmp2.getValueType())); Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other, Tmp1, DAG.getCondCode(ISD::SETNE), Tmp3, - DAG.getConstant(0, Tmp3.getValueType()), + DAG.getConstant(0, dl, Tmp3.getValueType()), Node->getOperand(2)); } Results.push_back(Tmp1); @@ -3962,7 +3911,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { break; } Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, VT, Tmp1, Tmp2, - DAG.getConstant(TrueValue, VT), DAG.getConstant(0, VT), + DAG.getConstant(TrueValue, dl, VT), + DAG.getConstant(0, dl, VT), Tmp3); Results.push_back(Tmp1); break; @@ -4030,7 +3980,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1, Tmp2, Tmp3, Tmp4, CC); } else { - Tmp2 = DAG.getConstant(0, Tmp1.getValueType()); + Tmp2 = DAG.getConstant(0, dl, Tmp1.getValueType()); CC = DAG.getCondCode(ISD::SETNE); Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1, Tmp2, Tmp3, Tmp4, CC); @@ -4061,7 +4011,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4, Tmp2, Tmp3, Node->getOperand(4)); } else { - Tmp3 = DAG.getConstant(0, Tmp2.getValueType()); + Tmp3 = DAG.getConstant(0, dl, Tmp2.getValueType()); Tmp4 = DAG.getCondCode(ISD::SETNE); Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4, Tmp2, Tmp3, Node->getOperand(4)); @@ -4085,12 +4035,12 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { for (unsigned Idx = 0; Idx < NumElem; Idx++) { SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getScalarType(), - Node->getOperand(0), DAG.getConstant(Idx, - TLI.getVectorIdxTy())); + Node->getOperand(0), + DAG.getConstant(Idx, dl, TLI.getVectorIdxTy())); SDValue Sh = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getScalarType(), - Node->getOperand(1), DAG.getConstant(Idx, - TLI.getVectorIdxTy())); + Node->getOperand(1), + DAG.getConstant(Idx, dl, TLI.getVectorIdxTy())); Scalars.push_back(DAG.getNode(Node->getOpcode(), dl, VT.getScalarType(), Ex, Sh)); } @@ -4125,6 +4075,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Node->getOpcode() == ISD::SETCC) { OVT = Node->getOperand(0).getSimpleValueType(); } + if (Node->getOpcode() == ISD::BR_CC) + OVT = Node->getOperand(2).getSimpleValueType(); MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT); SDLoc dl(Node); SDValue Tmp1, Tmp2, Tmp3; @@ -4142,16 +4094,16 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { if (Node->getOpcode() == ISD::CTTZ) { // FIXME: This should set a bit in the zero extended value instead. Tmp2 = DAG.getSetCC(dl, getSetCCResultType(NVT), - Tmp1, DAG.getConstant(NVT.getSizeInBits(), NVT), + Tmp1, DAG.getConstant(NVT.getSizeInBits(), dl, NVT), ISD::SETEQ); Tmp1 = DAG.getSelect(dl, NVT, Tmp2, - DAG.getConstant(OVT.getSizeInBits(), NVT), Tmp1); + DAG.getConstant(OVT.getSizeInBits(), dl, NVT), Tmp1); } else if (Node->getOpcode() == ISD::CTLZ || Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF) { // Tmp1 = Tmp1 - (sizeinbits(NVT) - sizeinbits(Old VT)) Tmp1 = DAG.getNode(ISD::SUB, dl, NVT, Tmp1, DAG.getConstant(NVT.getSizeInBits() - - OVT.getSizeInBits(), NVT)); + OVT.getSizeInBits(), dl, NVT)); } Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp1)); break; @@ -4160,7 +4112,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0)); Tmp1 = DAG.getNode(ISD::BSWAP, dl, NVT, Tmp1); Tmp1 = DAG.getNode(ISD::SRL, dl, NVT, Tmp1, - DAG.getConstant(DiffBits, TLI.getShiftAmountTy(NVT))); + DAG.getConstant(DiffBits, dl, + TLI.getShiftAmountTy(NVT))); Results.push_back(Tmp1); break; } @@ -4250,7 +4203,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Tmp1 = DAG.getNode(TruncOp, dl, Node->getValueType(0), Tmp1); else Tmp1 = DAG.getNode(TruncOp, dl, Node->getValueType(0), Tmp1, - DAG.getIntPtrConstant(0)); + DAG.getIntPtrConstant(0, dl)); Results.push_back(Tmp1); break; } @@ -4280,27 +4233,74 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Tmp1, Tmp2, Node->getOperand(2))); break; } + case ISD::BR_CC: { + unsigned ExtOp = ISD::FP_EXTEND; + if (NVT.isInteger()) { + ISD::CondCode CCCode = + cast<CondCodeSDNode>(Node->getOperand(1))->get(); + ExtOp = isSignedIntSetCC(CCCode) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; + } + Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(2)); + Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(3)); + Results.push_back(DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), + Node->getOperand(0), Node->getOperand(1), + Tmp1, Tmp2, Node->getOperand(4))); + break; + } case ISD::FADD: case ISD::FSUB: case ISD::FMUL: case ISD::FDIV: case ISD::FREM: + case ISD::FMINNUM: + case ISD::FMAXNUM: + case ISD::FCOPYSIGN: case ISD::FPOW: { Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1)); Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2); Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT, - Tmp3, DAG.getIntPtrConstant(0))); + Tmp3, DAG.getIntPtrConstant(0, dl))); break; } - case ISD::FLOG2: - case ISD::FEXP2: + case ISD::FMA: { + Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); + Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1)); + Tmp3 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(2)); + Results.push_back( + DAG.getNode(ISD::FP_ROUND, dl, OVT, + DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2, Tmp3), + DAG.getIntPtrConstant(0, dl))); + break; + } + case ISD::FPOWI: { + Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); + Tmp2 = Node->getOperand(1); + Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2); + Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT, + Tmp3, DAG.getIntPtrConstant(0, dl))); + break; + } + case ISD::FFLOOR: + case ISD::FCEIL: + case ISD::FRINT: + case ISD::FNEARBYINT: + case ISD::FROUND: + case ISD::FTRUNC: + case ISD::FNEG: + case ISD::FSQRT: + case ISD::FSIN: + case ISD::FCOS: case ISD::FLOG: - case ISD::FEXP: { + case ISD::FLOG2: + case ISD::FLOG10: + case ISD::FABS: + case ISD::FEXP: + case ISD::FEXP2: { Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); Tmp2 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1); Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT, - Tmp2, DAG.getIntPtrConstant(0))); + Tmp2, DAG.getIntPtrConstant(0, dl))); break; } } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index b596715..37fdf44 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -131,7 +131,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_BUILD_PAIR(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(ConstantFPSDNode *N) { - return DAG.getConstant(N->getValueAPF().bitcastToAPInt(), + return DAG.getConstant(N->getValueAPF().bitcastToAPInt(), SDLoc(N), TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0))); } @@ -149,8 +149,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) { // Mask = ~(1 << (Size-1)) APInt API = APInt::getAllOnesValue(Size); - API.clearBit(Size-1); - SDValue Mask = DAG.getConstant(API, NVT); + API.clearBit(Size - 1); + SDValue Mask = DAG.getConstant(API, SDLoc(N), NVT); SDValue Op = GetSoftenedFloat(N->getOperand(0)); return DAG.getNode(ISD::AND, SDLoc(N), NVT, Op, Mask); } @@ -218,8 +218,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) { unsigned RSize = RVT.getSizeInBits(); // First get the sign bit of second operand. - SDValue SignBit = DAG.getNode(ISD::SHL, dl, RVT, DAG.getConstant(1, RVT), - DAG.getConstant(RSize - 1, + SDValue SignBit = DAG.getNode(ISD::SHL, dl, RVT, DAG.getConstant(1, dl, RVT), + DAG.getConstant(RSize - 1, dl, TLI.getShiftAmountTy(RVT))); SignBit = DAG.getNode(ISD::AND, dl, RVT, RHS, SignBit); @@ -227,21 +227,21 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) { int SizeDiff = RVT.getSizeInBits() - LVT.getSizeInBits(); if (SizeDiff > 0) { SignBit = DAG.getNode(ISD::SRL, dl, RVT, SignBit, - DAG.getConstant(SizeDiff, + DAG.getConstant(SizeDiff, dl, TLI.getShiftAmountTy(SignBit.getValueType()))); SignBit = DAG.getNode(ISD::TRUNCATE, dl, LVT, SignBit); } else if (SizeDiff < 0) { SignBit = DAG.getNode(ISD::ANY_EXTEND, dl, LVT, SignBit); SignBit = DAG.getNode(ISD::SHL, dl, LVT, SignBit, - DAG.getConstant(-SizeDiff, + DAG.getConstant(-SizeDiff, dl, TLI.getShiftAmountTy(SignBit.getValueType()))); } // Clear the sign bit of the first operand. - SDValue Mask = DAG.getNode(ISD::SHL, dl, LVT, DAG.getConstant(1, LVT), - DAG.getConstant(LSize - 1, + SDValue Mask = DAG.getNode(ISD::SHL, dl, LVT, DAG.getConstant(1, dl, LVT), + DAG.getConstant(LSize - 1, dl, TLI.getShiftAmountTy(LVT))); - Mask = DAG.getNode(ISD::SUB, dl, LVT, Mask, DAG.getConstant(1, LVT)); + Mask = DAG.getNode(ISD::SUB, dl, LVT, Mask, DAG.getConstant(1, dl, LVT)); LHS = DAG.getNode(ISD::AND, dl, LVT, LHS, Mask); // Or the value with the sign bit. @@ -386,8 +386,9 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) { SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDLoc dl(N); // Expand Y = FNEG(X) -> Y = SUB -0.0, X - SDValue Ops[2] = { DAG.getConstantFP(-0.0, N->getValueType(0)), + SDValue Ops[2] = { DAG.getConstantFP(-0.0, dl, N->getValueType(0)), GetSoftenedFloat(N->getOperand(0)) }; return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::SUB_F32, @@ -395,7 +396,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) { RTLIB::SUB_F80, RTLIB::SUB_F128, RTLIB::SUB_PPCF128), - NVT, Ops, 2, false, SDLoc(N)).first; + NVT, Ops, 2, false, dl).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { @@ -755,7 +756,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) { // If softenSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. if (!NewRHS.getNode()) { - NewRHS = DAG.getConstant(0, NewLHS.getValueType()); + NewRHS = DAG.getConstant(0, SDLoc(N), NewLHS.getValueType()); CCCode = ISD::SETNE; } @@ -794,7 +795,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) { // If softenSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. if (!NewRHS.getNode()) { - NewRHS = DAG.getConstant(0, NewLHS.getValueType()); + NewRHS = DAG.getConstant(0, SDLoc(N), NewLHS.getValueType()); CCCode = ISD::SETNE; } @@ -837,7 +838,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) { if (ST->isTruncatingStore()) // Do an FP_ROUND followed by a non-truncating store. Val = BitConvertToInteger(DAG.getNode(ISD::FP_ROUND, dl, ST->getMemoryVT(), - Val, DAG.getIntPtrConstant(0))); + Val, DAG.getIntPtrConstant(0, dl))); else Val = GetSoftenedFloat(Val); @@ -927,12 +928,13 @@ void DAGTypeLegalizer::ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo, assert(NVT.getSizeInBits() == integerPartWidth && "Do not know how to expand this float constant!"); APInt C = cast<ConstantFPSDNode>(N)->getValueAPF().bitcastToAPInt(); + SDLoc dl(N); Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT), APInt(integerPartWidth, C.getRawData()[1])), - NVT); + dl, NVT); Hi = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT), APInt(integerPartWidth, C.getRawData()[0])), - NVT); + dl, NVT); } void DAGTypeLegalizer::ExpandFloatRes_FABS(SDNode *N, SDValue &Lo, @@ -1136,9 +1138,10 @@ void DAGTypeLegalizer::ExpandFloatRes_FNEG(SDNode *N, SDValue &Lo, void DAGTypeLegalizer::ExpandFloatRes_FP_EXTEND(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - Hi = DAG.getNode(ISD::FP_EXTEND, SDLoc(N), NVT, N->getOperand(0)); + SDLoc dl(N); + Hi = DAG.getNode(ISD::FP_EXTEND, dl, NVT, N->getOperand(0)); Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT), - APInt(NVT.getSizeInBits(), 0)), NVT); + APInt(NVT.getSizeInBits(), 0)), dl, NVT); } void DAGTypeLegalizer::ExpandFloatRes_FPOW(SDNode *N, @@ -1262,7 +1265,7 @@ void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo, // The low part is zero. Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT), - APInt(NVT.getSizeInBits(), 0)), NVT); + APInt(NVT.getSizeInBits(), 0)), dl, NVT); // Modified the chain - switch anything that used the old chain to use the // new one. @@ -1287,7 +1290,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, Src = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl, MVT::i32, Src); Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT), - APInt(NVT.getSizeInBits(), 0)), NVT); + APInt(NVT.getSizeInBits(), 0)), dl, NVT); Hi = DAG.getNode(ISD::SINT_TO_FP, dl, NVT, Src); } else { RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; @@ -1335,8 +1338,8 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, Lo = DAG.getNode(ISD::FADD, dl, VT, Hi, DAG.getConstantFP(APFloat(APFloat::PPCDoubleDouble, APInt(128, Parts)), - MVT::ppcf128)); - Lo = DAG.getSelectCC(dl, Src, DAG.getConstant(0, SrcVT), + dl, MVT::ppcf128)); + Lo = DAG.getSelectCC(dl, Src, DAG.getConstant(0, dl, SrcVT), Lo, Hi, ISD::SETLT); GetPairElements(Lo, Lo, Hi); } @@ -1436,7 +1439,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_BR_CC(SDNode *N) { // If ExpandSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. if (!NewRHS.getNode()) { - NewRHS = DAG.getConstant(0, NewLHS.getValueType()); + NewRHS = DAG.getConstant(0, SDLoc(N), NewLHS.getValueType()); CCCode = ISD::SETNE; } @@ -1479,7 +1482,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) { SDValue Res = DAG.getNode(ISD::FP_ROUND_INREG, dl, MVT::ppcf128, N->getOperand(0), DAG.getValueType(MVT::f64)); Res = DAG.getNode(ISD::FP_ROUND, dl, MVT::f64, Res, - DAG.getIntPtrConstant(1)); + DAG.getIntPtrConstant(1, dl)); return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res); } @@ -1499,7 +1502,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) { "Logic only correct for ppcf128!"); const uint64_t TwoE31[] = {0x41e0000000000000LL, 0}; APFloat APF = APFloat(APFloat::PPCDoubleDouble, APInt(128, TwoE31)); - SDValue Tmp = DAG.getConstantFP(APF, MVT::ppcf128); + SDValue Tmp = DAG.getConstantFP(APF, dl, MVT::ppcf128); // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X // FIXME: generated code sucks. return DAG.getSelectCC(dl, N->getOperand(0), Tmp, @@ -1509,7 +1512,8 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) { MVT::ppcf128, N->getOperand(0), Tmp)), - DAG.getConstant(0x80000000, MVT::i32)), + DAG.getConstant(0x80000000, dl, + MVT::i32)), DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, N->getOperand(0)), ISD::SETGE); @@ -1529,7 +1533,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) { // If ExpandSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. if (!NewRHS.getNode()) { - NewRHS = DAG.getConstant(0, NewLHS.getValueType()); + NewRHS = DAG.getConstant(0, SDLoc(N), NewLHS.getValueType()); CCCode = ISD::SETNE; } @@ -1579,3 +1583,420 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) { return DAG.getTruncStore(Chain, SDLoc(N), Hi, Ptr, ST->getMemoryVT(), ST->getMemOperand()); } + +//===----------------------------------------------------------------------===// +// Float Operand Promotion +//===----------------------------------------------------------------------===// +// + +static ISD::NodeType GetPromotionOpcode(EVT OpVT, EVT RetVT) { + if (OpVT == MVT::f16) { + return ISD::FP16_TO_FP; + } else if (RetVT == MVT::f16) { + return ISD::FP_TO_FP16; + } + + report_fatal_error("Attempt at an invalid promotion-related conversion"); +} + +bool DAGTypeLegalizer::PromoteFloatOperand(SDNode *N, unsigned OpNo) { + SDValue R = SDValue(); + + // Nodes that use a promotion-requiring floating point operand, but doesn't + // produce a promotion-requiring floating point result, need to be legalized + // to use the promoted float operand. Nodes that produce at least one + // promotion-requiring floating point result have their operands legalized as + // a part of PromoteFloatResult. + switch (N->getOpcode()) { + default: + llvm_unreachable("Do not know how to promote this operator's operand!"); + + case ISD::BITCAST: R = PromoteFloatOp_BITCAST(N, OpNo); break; + case ISD::FCOPYSIGN: R = PromoteFloatOp_FCOPYSIGN(N, OpNo); break; + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: R = PromoteFloatOp_FP_TO_XINT(N, OpNo); break; + case ISD::FP_EXTEND: R = PromoteFloatOp_FP_EXTEND(N, OpNo); break; + case ISD::SELECT_CC: R = PromoteFloatOp_SELECT_CC(N, OpNo); break; + case ISD::SETCC: R = PromoteFloatOp_SETCC(N, OpNo); break; + case ISD::STORE: R = PromoteFloatOp_STORE(N, OpNo); break; + } + + if (R.getNode()) + ReplaceValueWith(SDValue(N, 0), R); + return false; +} + +SDValue DAGTypeLegalizer::PromoteFloatOp_BITCAST(SDNode *N, unsigned OpNo) { + SDValue Op = N->getOperand(0); + EVT OpVT = Op->getValueType(0); + + EVT IVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits()); + assert (IVT == N->getValueType(0) && "Bitcast to type of different size"); + + SDValue Promoted = GetPromotedFloat(N->getOperand(0)); + EVT PromotedVT = Promoted->getValueType(0); + + // Convert the promoted float value to the desired IVT. + return DAG.getNode(GetPromotionOpcode(PromotedVT, OpVT), SDLoc(N), IVT, + Promoted); +} + +// Promote Operand 1 of FCOPYSIGN. Operand 0 ought to be handled by +// PromoteFloatRes_FCOPYSIGN. +SDValue DAGTypeLegalizer::PromoteFloatOp_FCOPYSIGN(SDNode *N, unsigned OpNo) { + assert (OpNo == 1 && "Only Operand 1 must need promotion here"); + SDValue Op1 = GetPromotedFloat(N->getOperand(1)); + + return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), + N->getOperand(0), Op1); +} + +// Convert the promoted float value to the desired integer type +SDValue DAGTypeLegalizer::PromoteFloatOp_FP_TO_XINT(SDNode *N, unsigned OpNo) { + SDValue Op = GetPromotedFloat(N->getOperand(0)); + return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), Op); +} + +SDValue DAGTypeLegalizer::PromoteFloatOp_FP_EXTEND(SDNode *N, unsigned OpNo) { + SDValue Op = GetPromotedFloat(N->getOperand(0)); + EVT VT = N->getValueType(0); + + // Desired VT is same as promoted type. Use promoted float directly. + if (VT == Op->getValueType(0)) + return Op; + + // Else, extend the promoted float value to the desired VT. + return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, Op); +} + +// Promote the float operands used for comparison. The true- and false- +// operands have the same type as the result and are promoted, if needed, by +// PromoteFloatRes_SELECT_CC +SDValue DAGTypeLegalizer::PromoteFloatOp_SELECT_CC(SDNode *N, unsigned OpNo) { + SDValue LHS = GetPromotedFloat(N->getOperand(0)); + SDValue RHS = GetPromotedFloat(N->getOperand(1)); + + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0), + LHS, RHS, N->getOperand(2), N->getOperand(3), + N->getOperand(4)); +} + +// Construct a SETCC that compares the promoted values and sets the conditional +// code. +SDValue DAGTypeLegalizer::PromoteFloatOp_SETCC(SDNode *N, unsigned OpNo) { + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDValue Op0 = GetPromotedFloat(N->getOperand(0)); + SDValue Op1 = GetPromotedFloat(N->getOperand(1)); + ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get(); + + return DAG.getSetCC(SDLoc(N), NVT, Op0, Op1, CCCode); + +} + +// Lower the promoted Float down to the integer value of same size and construct +// a STORE of the integer value. +SDValue DAGTypeLegalizer::PromoteFloatOp_STORE(SDNode *N, unsigned OpNo) { + StoreSDNode *ST = cast<StoreSDNode>(N); + SDValue Val = ST->getValue(); + SDLoc DL(N); + + SDValue Promoted = GetPromotedFloat(Val); + EVT VT = ST->getOperand(1)->getValueType(0); + EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); + + SDValue NewVal; + NewVal = DAG.getNode(GetPromotionOpcode(Promoted.getValueType(), VT), DL, + IVT, Promoted); + + return DAG.getStore(ST->getChain(), DL, NewVal, ST->getBasePtr(), + ST->getMemOperand()); +} + +//===----------------------------------------------------------------------===// +// Float Result Promotion +//===----------------------------------------------------------------------===// + +void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) { + SDValue R = SDValue(); + + switch (N->getOpcode()) { + // These opcodes cannot appear if promotion of FP16 is done in the backend + // instead of Clang + case ISD::FP16_TO_FP: + case ISD::FP_TO_FP16: + default: + llvm_unreachable("Do not know how to promote this operator's result!"); + + case ISD::BITCAST: R = PromoteFloatRes_BITCAST(N); break; + case ISD::ConstantFP: R = PromoteFloatRes_ConstantFP(N); break; + case ISD::EXTRACT_VECTOR_ELT: + R = PromoteFloatRes_EXTRACT_VECTOR_ELT(N); break; + case ISD::FCOPYSIGN: R = PromoteFloatRes_FCOPYSIGN(N); break; + + // Unary FP Operations + case ISD::FABS: + case ISD::FCEIL: + case ISD::FCOS: + case ISD::FEXP: + case ISD::FEXP2: + case ISD::FFLOOR: + case ISD::FLOG: + case ISD::FLOG2: + case ISD::FLOG10: + case ISD::FNEARBYINT: + case ISD::FNEG: + case ISD::FRINT: + case ISD::FROUND: + case ISD::FSIN: + case ISD::FSQRT: + case ISD::FTRUNC: R = PromoteFloatRes_UnaryOp(N); break; + + // Binary FP Operations + case ISD::FADD: + case ISD::FDIV: + case ISD::FMAXNUM: + case ISD::FMINNUM: + case ISD::FMUL: + case ISD::FPOW: + case ISD::FREM: + case ISD::FSUB: R = PromoteFloatRes_BinOp(N); break; + + case ISD::FMA: // FMA is same as FMAD + case ISD::FMAD: R = PromoteFloatRes_FMAD(N); break; + + case ISD::FPOWI: R = PromoteFloatRes_FPOWI(N); break; + + case ISD::FP_ROUND: R = PromoteFloatRes_FP_ROUND(N); break; + case ISD::LOAD: R = PromoteFloatRes_LOAD(N); break; + case ISD::SELECT: R = PromoteFloatRes_SELECT(N); break; + case ISD::SELECT_CC: R = PromoteFloatRes_SELECT_CC(N); break; + + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: R = PromoteFloatRes_XINT_TO_FP(N); break; + case ISD::UNDEF: R = PromoteFloatRes_UNDEF(N); break; + + } + + if (R.getNode()) + SetPromotedFloat(SDValue(N, ResNo), R); +} + +// Bitcast from i16 to f16: convert the i16 to a f32 value instead. +// At this point, it is not possible to determine if the bitcast value is +// eventually stored to memory or promoted to f32 or promoted to a floating +// point at a higher precision. Some of these cases are handled by FP_EXTEND, +// STORE promotion handlers. +SDValue DAGTypeLegalizer::PromoteFloatRes_BITCAST(SDNode *N) { + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + return DAG.getNode(GetPromotionOpcode(VT, NVT), SDLoc(N), NVT, + N->getOperand(0)); +} + +SDValue DAGTypeLegalizer::PromoteFloatRes_ConstantFP(SDNode *N) { + ConstantFPSDNode *CFPNode = cast<ConstantFPSDNode>(N); + EVT VT = N->getValueType(0); + SDLoc DL(N); + + // Get the (bit-cast) APInt of the APFloat and build an integer constant + EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); + SDValue C = DAG.getConstant(CFPNode->getValueAPF().bitcastToAPInt(), DL, + IVT); + + // Convert the Constant to the desired FP type + // FIXME We might be able to do the conversion during compilation and get rid + // of it from the object code + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + return DAG.getNode(GetPromotionOpcode(VT, NVT), DL, NVT, C); +} + +// If the Index operand is a constant, try to redirect the extract operation to +// the correct legalized vector. If not, bit-convert the input vector to +// equivalent integer vector. Extract the element as an (bit-cast) integer +// value and convert it to the promoted type. +SDValue DAGTypeLegalizer::PromoteFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) { + SDLoc DL(N); + + // If the index is constant, try to extract the value from the legalized + // vector type. + if (isa<ConstantSDNode>(N->getOperand(1))) { + SDValue Vec = N->getOperand(0); + SDValue Idx = N->getOperand(1); + EVT VecVT = Vec->getValueType(0); + EVT EltVT = VecVT.getVectorElementType(); + + uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); + + switch (getTypeAction(VecVT)) { + default: break; + case TargetLowering::TypeScalarizeVector: { + SDValue Res = GetScalarizedVector(N->getOperand(0)); + ReplaceValueWith(SDValue(N, 0), Res); + return SDValue(); + } + case TargetLowering::TypeWidenVector: { + Vec = GetWidenedVector(Vec); + SDValue Res = DAG.getNode(N->getOpcode(), DL, EltVT, Vec, Idx); + ReplaceValueWith(SDValue(N, 0), Res); + return SDValue(); + } + case TargetLowering::TypeSplitVector: { + SDValue Lo, Hi; + GetSplitVector(Vec, Lo, Hi); + + uint64_t LoElts = Lo.getValueType().getVectorNumElements(); + SDValue Res; + if (IdxVal < LoElts) + Res = DAG.getNode(N->getOpcode(), DL, EltVT, Lo, Idx); + else + Res = DAG.getNode(N->getOpcode(), DL, EltVT, Hi, + DAG.getConstant(IdxVal - LoElts, DL, + Idx.getValueType())); + ReplaceValueWith(SDValue(N, 0), Res); + return SDValue(); + } + + } + } + + // Bit-convert the input vector to the equivalent integer vector + SDValue NewOp = BitConvertVectorToIntegerVector(N->getOperand(0)); + EVT IVT = NewOp.getValueType().getVectorElementType(); + + // Extract the element as an (bit-cast) integer value + SDValue NewVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IVT, + NewOp, N->getOperand(1)); + + // Convert the element to the desired FP type + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + return DAG.getNode(GetPromotionOpcode(VT, NVT), SDLoc(N), NVT, NewVal); +} + +// FCOPYSIGN(X, Y) returns the value of X with the sign of Y. If the result +// needs promotion, so does the argument X. Note that Y, if needed, will be +// handled during operand promotion. +SDValue DAGTypeLegalizer::PromoteFloatRes_FCOPYSIGN(SDNode *N) { + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDValue Op0 = GetPromotedFloat(N->getOperand(0)); + + SDValue Op1 = N->getOperand(1); + + return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op0, Op1); +} + +// Unary operation where the result and the operand have PromoteFloat type +// action. Construct a new SDNode with the promoted float value of the old +// operand. +SDValue DAGTypeLegalizer::PromoteFloatRes_UnaryOp(SDNode *N) { + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDValue Op = GetPromotedFloat(N->getOperand(0)); + + return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op); +} + +// Binary operations where the result and both operands have PromoteFloat type +// action. Construct a new SDNode with the promoted float values of the old +// operands. +SDValue DAGTypeLegalizer::PromoteFloatRes_BinOp(SDNode *N) { + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDValue Op0 = GetPromotedFloat(N->getOperand(0)); + SDValue Op1 = GetPromotedFloat(N->getOperand(1)); + + return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op0, Op1); +} + +SDValue DAGTypeLegalizer::PromoteFloatRes_FMAD(SDNode *N) { + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDValue Op0 = GetPromotedFloat(N->getOperand(0)); + SDValue Op1 = GetPromotedFloat(N->getOperand(1)); + SDValue Op2 = GetPromotedFloat(N->getOperand(2)); + + return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op0, Op1, Op2); +} + +// Promote the Float (first) operand and retain the Integer (second) operand +SDValue DAGTypeLegalizer::PromoteFloatRes_FPOWI(SDNode *N) { + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDValue Op0 = GetPromotedFloat(N->getOperand(0)); + SDValue Op1 = N->getOperand(1); + + return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op0, Op1); +} + +// Explicit operation to reduce precision. Reduce the value to half precision +// and promote it back to the legal type. +SDValue DAGTypeLegalizer::PromoteFloatRes_FP_ROUND(SDNode *N) { + SDLoc DL(N); + + SDValue Op = N->getOperand(0); + EVT VT = N->getValueType(0); + EVT OpVT = Op->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); + + // Round promoted float to desired precision + SDValue Round = DAG.getNode(GetPromotionOpcode(OpVT, VT), DL, IVT, Op); + // Promote it back to the legal output type + return DAG.getNode(GetPromotionOpcode(VT, NVT), DL, NVT, Round); +} + +SDValue DAGTypeLegalizer::PromoteFloatRes_LOAD(SDNode *N) { + LoadSDNode *L = cast<LoadSDNode>(N); + EVT VT = N->getValueType(0); + + // Load the value as an integer value with the same number of bits + EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); + SDValue newL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), + IVT, SDLoc(N), L->getChain(), L->getBasePtr(), + L->getOffset(), L->getPointerInfo(), IVT, L->isVolatile(), + L->isNonTemporal(), false, L->getAlignment(), + L->getAAInfo()); + // Legalize the chain result by replacing uses of the old value chain with the + // new one + ReplaceValueWith(SDValue(N, 1), newL.getValue(1)); + + // Convert the integer value to the desired FP type + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + return DAG.getNode(GetPromotionOpcode(VT, NVT), SDLoc(N), NVT, newL); +} + +// Construct a new SELECT node with the promoted true- and false- values. +SDValue DAGTypeLegalizer::PromoteFloatRes_SELECT(SDNode *N) { + SDValue TrueVal = GetPromotedFloat(N->getOperand(1)); + SDValue FalseVal = GetPromotedFloat(N->getOperand(2)); + + return DAG.getNode(ISD::SELECT, SDLoc(N), TrueVal->getValueType(0), + N->getOperand(0), TrueVal, FalseVal); +} + +// Construct a new SELECT_CC node with the promoted true- and false- values. +// The operands used for comparison are promoted by PromoteFloatOp_SELECT_CC. +SDValue DAGTypeLegalizer::PromoteFloatRes_SELECT_CC(SDNode *N) { + SDValue TrueVal = GetPromotedFloat(N->getOperand(2)); + SDValue FalseVal = GetPromotedFloat(N->getOperand(3)); + + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0), + N->getOperand(0), N->getOperand(1), TrueVal, FalseVal, + N->getOperand(4)); +} + +// Construct a SDNode that transforms the SINT or UINT operand to the promoted +// float type. +SDValue DAGTypeLegalizer::PromoteFloatRes_XINT_TO_FP(SDNode *N) { + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, N->getOperand(0)); +} + +SDValue DAGTypeLegalizer::PromoteFloatRes_UNDEF(SDNode *N) { + return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(), + N->getValueType(0))); +} + diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index a4e44cc..eeaebf78 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -71,6 +71,10 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::VSELECT: Res = PromoteIntRes_VSELECT(N); break; case ISD::SELECT_CC: Res = PromoteIntRes_SELECT_CC(N); break; case ISD::SETCC: Res = PromoteIntRes_SETCC(N); break; + case ISD::SMIN: + case ISD::SMAX: + case ISD::UMIN: + case ISD::UMAX: Res = PromoteIntRes_SimpleIntBinOp(N); break; case ISD::SHL: Res = PromoteIntRes_SHL(N); break; case ISD::SIGN_EXTEND_INREG: Res = PromoteIntRes_SIGN_EXTEND_INREG(N); break; @@ -251,6 +255,16 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) { case TargetLowering::TypeSoftenFloat: // Promote the integer operand by hand. return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, GetSoftenedFloat(InOp)); + case TargetLowering::TypePromoteFloat: { + // Convert the promoted float by hand. + if (NOutVT.bitsEq(NInVT)) { + SDValue PromotedOp = GetPromotedFloat(InOp); + SDValue Trunc = DAG.getNode(ISD::FP_TO_FP16, dl, NOutVT, PromotedOp); + return DAG.getNode(ISD::AssertZext, dl, NOutVT, Trunc, + DAG.getValueType(OutVT)); + } + break; + } case TargetLowering::TypeExpandInteger: case TargetLowering::TypeExpandFloat: break; @@ -297,7 +311,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) { unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(); return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op), - DAG.getConstant(DiffBits, TLI.getShiftAmountTy(NVT))); + DAG.getConstant(DiffBits, dl, TLI.getShiftAmountTy(NVT))); } SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) { @@ -345,7 +359,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) { // Subtract off the extra leading bits in the bigger type. return DAG.getNode( ISD::SUB, dl, NVT, Op, - DAG.getConstant(NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(), + DAG.getConstant(NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(), dl, NVT)); } @@ -366,7 +380,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) { // the top of the original type. auto TopBit = APInt::getOneBitSet(NVT.getScalarSizeInBits(), OVT.getScalarSizeInBits()); - Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, NVT)); + Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, dl, NVT)); } return DAG.getNode(N->getOpcode(), dl, NVT, Op); } @@ -723,9 +737,11 @@ SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) { if (N->getOpcode() == ISD::UMULO) { // Unsigned overflow occurred if the high part is non-zero. SDValue Hi = DAG.getNode(ISD::SRL, DL, Mul.getValueType(), Mul, - DAG.getIntPtrConstant(SmallVT.getSizeInBits())); + DAG.getIntPtrConstant(SmallVT.getSizeInBits(), + DL)); Overflow = DAG.getSetCC(DL, N->getValueType(1), Hi, - DAG.getConstant(0, Hi.getValueType()), ISD::SETNE); + DAG.getConstant(0, DL, Hi.getValueType()), + ISD::SETNE); } else { // Signed overflow occurred if the high part does not sign extend the low. SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Mul.getValueType(), @@ -784,7 +800,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) { SDValue Part = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Parts[i]); // Shift it to the right position and "or" it in. Part = DAG.getNode(ISD::SHL, dl, NVT, Part, - DAG.getConstant(i * RegVT.getSizeInBits(), + DAG.getConstant(i*RegVT.getSizeInBits(), dl, TLI.getPointerTy())); Res = DAG.getNode(ISD::OR, dl, NVT, Res, Part); } @@ -852,6 +868,7 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::FP16_TO_FP: case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break; case ISD::ZERO_EXTEND: Res = PromoteIntOp_ZERO_EXTEND(N); break; + case ISD::EXTRACT_SUBVECTOR: Res = PromoteIntOp_EXTRACT_SUBVECTOR(N); break; case ISD::SHL: case ISD::SRA: @@ -977,7 +994,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_PAIR(SDNode *N) { SDLoc dl(N); Hi = DAG.getNode(ISD::SHL, dl, N->getValueType(0), Hi, - DAG.getConstant(OVT.getSizeInBits(), TLI.getPointerTy())); + DAG.getConstant(OVT.getSizeInBits(), dl, + TLI.getPointerTy())); return DAG.getNode(ISD::OR, dl, N->getValueType(0), Lo, Hi); } @@ -1116,7 +1134,6 @@ SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){ SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo){ - assert(OpNo == 2 && "Only know how to promote the mask!"); SDValue DataOp = N->getValue(); EVT DataVT = DataOp.getValueType(); SDValue Mask = N->getMask(); @@ -1127,7 +1144,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpN if (!TLI.isTypeLegal(DataVT)) { if (getTypeAction(DataVT) == TargetLowering::TypePromoteInteger) { DataOp = GetPromotedInteger(DataOp); - Mask = PromoteTargetBoolean(Mask, DataOp.getValueType()); + if (!TLI.isTypeLegal(MaskVT)) + Mask = PromoteTargetBoolean(Mask, DataOp.getValueType()); TruncateStore = true; } else { @@ -1147,7 +1165,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpN unsigned NumConcat = WidenNumElts / MaskNumElts; SmallVector<SDValue, 16> Ops(NumConcat); - SDValue ZeroVal = DAG.getConstant(0, MaskVT); + SDValue ZeroVal = DAG.getConstant(0, dl, MaskVT); Ops[0] = Mask; for (unsigned i = 1; i != NumConcat; ++i) Ops[i] = ZeroVal; @@ -1323,92 +1341,8 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { unsigned Opc = Node->getOpcode(); MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT(); - RTLIB::Libcall LC; - - switch (Opc) { - default: - llvm_unreachable("Unhandled atomic intrinsic Expand!"); - case ISD::ATOMIC_SWAP: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_1; break; - case MVT::i16: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_2; break; - case MVT::i32: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_4; break; - case MVT::i64: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_8; break; - case MVT::i128:LC = RTLIB::SYNC_LOCK_TEST_AND_SET_16;break; - } - break; - case ISD::ATOMIC_CMP_SWAP: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1; break; - case MVT::i16: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2; break; - case MVT::i32: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4; break; - case MVT::i64: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8; break; - case MVT::i128:LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_16;break; - } - break; - case ISD::ATOMIC_LOAD_ADD: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_ADD_1; break; - case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_ADD_2; break; - case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_ADD_4; break; - case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_ADD_8; break; - case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_ADD_16;break; - } - break; - case ISD::ATOMIC_LOAD_SUB: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_SUB_1; break; - case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_SUB_2; break; - case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_SUB_4; break; - case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_SUB_8; break; - case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_SUB_16;break; - } - break; - case ISD::ATOMIC_LOAD_AND: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_AND_1; break; - case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_AND_2; break; - case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_AND_4; break; - case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_AND_8; break; - case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_AND_16;break; - } - break; - case ISD::ATOMIC_LOAD_OR: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_OR_1; break; - case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_OR_2; break; - case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_OR_4; break; - case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_OR_8; break; - case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_OR_16;break; - } - break; - case ISD::ATOMIC_LOAD_XOR: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_XOR_1; break; - case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_XOR_2; break; - case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_XOR_4; break; - case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_XOR_8; break; - case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_XOR_16;break; - } - break; - case ISD::ATOMIC_LOAD_NAND: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_NAND_1; break; - case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_NAND_2; break; - case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_NAND_4; break; - case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_NAND_8; break; - case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_NAND_16;break; - } - break; - } + RTLIB::Libcall LC = RTLIB::getATOMIC(Opc, VT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!"); return ExpandChainLibCall(LC, Node, false); } @@ -1417,12 +1351,19 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { /// and the shift amount is a constant 'Amt'. Expand the operation. void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt, SDValue &Lo, SDValue &Hi) { - assert(Amt && "Expected zero shifts to be already optimized away."); SDLoc DL(N); // Expand the incoming operand to be shifted, so that we have its parts SDValue InL, InH; GetExpandedInteger(N->getOperand(0), InL, InH); + // Though Amt shouldn't usually be 0, it's possible. E.g. when legalization + // splitted a vector shift, like this: <op1, op2> SHL <0, 2>. + if (!Amt) { + Lo = InL; + Hi = InH; + return; + } + EVT NVT = InL.getValueType(); unsigned VTBits = N->getValueType(0).getSizeInBits(); unsigned NVTBits = NVT.getSizeInBits(); @@ -1430,13 +1371,13 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt, if (N->getOpcode() == ISD::SHL) { if (Amt > VTBits) { - Lo = Hi = DAG.getConstant(0, NVT); + Lo = Hi = DAG.getConstant(0, DL, NVT); } else if (Amt > NVTBits) { - Lo = DAG.getConstant(0, NVT); + Lo = DAG.getConstant(0, DL, NVT); Hi = DAG.getNode(ISD::SHL, DL, - NVT, InL, DAG.getConstant(Amt-NVTBits, ShTy)); + NVT, InL, DAG.getConstant(Amt - NVTBits, DL, ShTy)); } else if (Amt == NVTBits) { - Lo = DAG.getConstant(0, NVT); + Lo = DAG.getConstant(0, DL, NVT); Hi = InL; } else if (Amt == 1 && TLI.isOperationLegalOrCustom(ISD::ADDC, @@ -1448,34 +1389,34 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt, SDValue HiOps[3] = { InH, InH, Lo.getValue(1) }; Hi = DAG.getNode(ISD::ADDE, DL, VTList, HiOps); } else { - Lo = DAG.getNode(ISD::SHL, DL, NVT, InL, DAG.getConstant(Amt, ShTy)); + Lo = DAG.getNode(ISD::SHL, DL, NVT, InL, DAG.getConstant(Amt, DL, ShTy)); Hi = DAG.getNode(ISD::OR, DL, NVT, DAG.getNode(ISD::SHL, DL, NVT, InH, - DAG.getConstant(Amt, ShTy)), + DAG.getConstant(Amt, DL, ShTy)), DAG.getNode(ISD::SRL, DL, NVT, InL, - DAG.getConstant(NVTBits-Amt, ShTy))); + DAG.getConstant(NVTBits - Amt, DL, ShTy))); } return; } if (N->getOpcode() == ISD::SRL) { if (Amt > VTBits) { - Lo = DAG.getConstant(0, NVT); - Hi = DAG.getConstant(0, NVT); + Lo = DAG.getConstant(0, DL, NVT); + Hi = DAG.getConstant(0, DL, NVT); } else if (Amt > NVTBits) { Lo = DAG.getNode(ISD::SRL, DL, - NVT, InH, DAG.getConstant(Amt-NVTBits,ShTy)); - Hi = DAG.getConstant(0, NVT); + NVT, InH, DAG.getConstant(Amt - NVTBits, DL, ShTy)); + Hi = DAG.getConstant(0, DL, NVT); } else if (Amt == NVTBits) { Lo = InH; - Hi = DAG.getConstant(0, NVT); + Hi = DAG.getConstant(0, DL, NVT); } else { Lo = DAG.getNode(ISD::OR, DL, NVT, DAG.getNode(ISD::SRL, DL, NVT, InL, - DAG.getConstant(Amt, ShTy)), + DAG.getConstant(Amt, DL, ShTy)), DAG.getNode(ISD::SHL, DL, NVT, InH, - DAG.getConstant(NVTBits-Amt, ShTy))); - Hi = DAG.getNode(ISD::SRL, DL, NVT, InH, DAG.getConstant(Amt, ShTy)); + DAG.getConstant(NVTBits - Amt, DL, ShTy))); + Hi = DAG.getNode(ISD::SRL, DL, NVT, InH, DAG.getConstant(Amt, DL, ShTy)); } return; } @@ -1483,23 +1424,23 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt, assert(N->getOpcode() == ISD::SRA && "Unknown shift!"); if (Amt > VTBits) { Hi = Lo = DAG.getNode(ISD::SRA, DL, NVT, InH, - DAG.getConstant(NVTBits-1, ShTy)); + DAG.getConstant(NVTBits - 1, DL, ShTy)); } else if (Amt > NVTBits) { Lo = DAG.getNode(ISD::SRA, DL, NVT, InH, - DAG.getConstant(Amt-NVTBits, ShTy)); + DAG.getConstant(Amt-NVTBits, DL, ShTy)); Hi = DAG.getNode(ISD::SRA, DL, NVT, InH, - DAG.getConstant(NVTBits-1, ShTy)); + DAG.getConstant(NVTBits - 1, DL, ShTy)); } else if (Amt == NVTBits) { Lo = InH; Hi = DAG.getNode(ISD::SRA, DL, NVT, InH, - DAG.getConstant(NVTBits-1, ShTy)); + DAG.getConstant(NVTBits - 1, DL, ShTy)); } else { Lo = DAG.getNode(ISD::OR, DL, NVT, DAG.getNode(ISD::SRL, DL, NVT, InL, - DAG.getConstant(Amt, ShTy)), + DAG.getConstant(Amt, DL, ShTy)), DAG.getNode(ISD::SHL, DL, NVT, InH, - DAG.getConstant(NVTBits-Amt, ShTy))); - Hi = DAG.getNode(ISD::SRA, DL, NVT, InH, DAG.getConstant(Amt, ShTy)); + DAG.getConstant(NVTBits - Amt, DL, ShTy))); + Hi = DAG.getNode(ISD::SRA, DL, NVT, InH, DAG.getConstant(Amt, DL, ShTy)); } } @@ -1535,21 +1476,21 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { if (KnownOne.intersects(HighBitMask)) { // Mask out the high bit, which we know is set. Amt = DAG.getNode(ISD::AND, dl, ShTy, Amt, - DAG.getConstant(~HighBitMask, ShTy)); + DAG.getConstant(~HighBitMask, dl, ShTy)); switch (N->getOpcode()) { default: llvm_unreachable("Unknown shift"); case ISD::SHL: - Lo = DAG.getConstant(0, NVT); // Low part is zero. + Lo = DAG.getConstant(0, dl, NVT); // Low part is zero. Hi = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt); // High part from Lo part. return true; case ISD::SRL: - Hi = DAG.getConstant(0, NVT); // Hi part is zero. + Hi = DAG.getConstant(0, dl, NVT); // Hi part is zero. Lo = DAG.getNode(ISD::SRL, dl, NVT, InH, Amt); // Lo part from Hi part. return true; case ISD::SRA: Hi = DAG.getNode(ISD::SRA, dl, NVT, InH, // Sign extend high part. - DAG.getConstant(NVTBits-1, ShTy)); + DAG.getConstant(NVTBits - 1, dl, ShTy)); Lo = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt); // Lo part from Hi part. return true; } @@ -1562,7 +1503,7 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { // shift if x is zero. We can use XOR here because x is known to be smaller // than 32. SDValue Amt2 = DAG.getNode(ISD::XOR, dl, ShTy, Amt, - DAG.getConstant(NVTBits-1, ShTy)); + DAG.getConstant(NVTBits - 1, dl, ShTy)); unsigned Op1, Op2; switch (N->getOpcode()) { @@ -1578,7 +1519,7 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { // Use a little trick to get the bits that move from Lo to Hi. First // shift by one bit. - SDValue Sh1 = DAG.getNode(Op2, dl, NVT, InL, DAG.getConstant(1, ShTy)); + SDValue Sh1 = DAG.getNode(Op2, dl, NVT, InL, DAG.getConstant(1, dl, ShTy)); // Then compute the remaining shift with amount-1. SDValue Sh2 = DAG.getNode(Op2, dl, NVT, Sh1, Amt2); @@ -1609,11 +1550,14 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue InL, InH; GetExpandedInteger(N->getOperand(0), InL, InH); - SDValue NVBitsNode = DAG.getConstant(NVTBits, ShTy); + SDValue NVBitsNode = DAG.getConstant(NVTBits, dl, ShTy); SDValue AmtExcess = DAG.getNode(ISD::SUB, dl, ShTy, Amt, NVBitsNode); SDValue AmtLack = DAG.getNode(ISD::SUB, dl, ShTy, NVBitsNode, Amt); SDValue isShort = DAG.getSetCC(dl, getSetCCResultType(ShTy), Amt, NVBitsNode, ISD::SETULT); + SDValue isZero = DAG.getSetCC(dl, getSetCCResultType(ShTy), + Amt, DAG.getConstant(0, dl, ShTy), + ISD::SETEQ); SDValue LoS, HiS, LoL, HiL; switch (N->getOpcode()) { @@ -1623,16 +1567,15 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { LoS = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt); HiS = DAG.getNode(ISD::OR, dl, NVT, DAG.getNode(ISD::SHL, dl, NVT, InH, Amt), - // FIXME: If Amt is zero, the following shift generates an undefined result - // on some architectures. DAG.getNode(ISD::SRL, dl, NVT, InL, AmtLack)); // Long: ShAmt >= NVTBits - LoL = DAG.getConstant(0, NVT); // Lo part is zero. + LoL = DAG.getConstant(0, dl, NVT); // Lo part is zero. HiL = DAG.getNode(ISD::SHL, dl, NVT, InL, AmtExcess); // Hi from Lo part. Lo = DAG.getSelect(dl, NVT, isShort, LoS, LoL); - Hi = DAG.getSelect(dl, NVT, isShort, HiS, HiL); + Hi = DAG.getSelect(dl, NVT, isZero, InH, + DAG.getSelect(dl, NVT, isShort, HiS, HiL)); return true; case ISD::SRL: // Short: ShAmt < NVTBits @@ -1644,10 +1587,11 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { DAG.getNode(ISD::SHL, dl, NVT, InH, AmtLack)); // Long: ShAmt >= NVTBits - HiL = DAG.getConstant(0, NVT); // Hi part is zero. + HiL = DAG.getConstant(0, dl, NVT); // Hi part is zero. LoL = DAG.getNode(ISD::SRL, dl, NVT, InH, AmtExcess); // Lo from Hi part. - Lo = DAG.getSelect(dl, NVT, isShort, LoS, LoL); + Lo = DAG.getSelect(dl, NVT, isZero, InL, + DAG.getSelect(dl, NVT, isShort, LoS, LoL)); Hi = DAG.getSelect(dl, NVT, isShort, HiS, HiL); return true; case ISD::SRA: @@ -1655,16 +1599,15 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { HiS = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt); LoS = DAG.getNode(ISD::OR, dl, NVT, DAG.getNode(ISD::SRL, dl, NVT, InL, Amt), - // FIXME: If Amt is zero, the following shift generates an undefined result - // on some architectures. DAG.getNode(ISD::SHL, dl, NVT, InH, AmtLack)); // Long: ShAmt >= NVTBits HiL = DAG.getNode(ISD::SRA, dl, NVT, InH, // Sign of Hi part. - DAG.getConstant(NVTBits-1, ShTy)); + DAG.getConstant(NVTBits - 1, dl, ShTy)); LoL = DAG.getNode(ISD::SRA, dl, NVT, InH, AmtExcess); // Lo from Hi part. - Lo = DAG.getSelect(dl, NVT, isShort, LoS, LoL); + Lo = DAG.getSelect(dl, NVT, isZero, InL, + DAG.getSelect(dl, NVT, isShort, LoS, LoL)); Hi = DAG.getSelect(dl, NVT, isShort, HiS, HiL); return true; } @@ -1706,18 +1649,50 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, return; } + bool hasOVF = + TLI.isOperationLegalOrCustom(N->getOpcode() == ISD::ADD ? + ISD::UADDO : ISD::USUBO, + TLI.getTypeToExpandTo(*DAG.getContext(), NVT)); + if (hasOVF) { + SDVTList VTList = DAG.getVTList(NVT, NVT); + TargetLoweringBase::BooleanContent BoolType = TLI.getBooleanContents(NVT); + int RevOpc; + if (N->getOpcode() == ISD::ADD) { + RevOpc = ISD::SUB; + Lo = DAG.getNode(ISD::UADDO, dl, VTList, LoOps); + Hi = DAG.getNode(ISD::ADD, dl, NVT, makeArrayRef(HiOps, 2)); + } else { + RevOpc = ISD::ADD; + Lo = DAG.getNode(ISD::USUBO, dl, VTList, LoOps); + Hi = DAG.getNode(ISD::SUB, dl, NVT, makeArrayRef(HiOps, 2)); + } + SDValue OVF = Lo.getValue(1); + + switch (BoolType) { + case TargetLoweringBase::UndefinedBooleanContent: + OVF = DAG.getNode(ISD::AND, dl, NVT, DAG.getConstant(1, dl, NVT), OVF); + // Fallthrough + case TargetLoweringBase::ZeroOrOneBooleanContent: + Hi = DAG.getNode(N->getOpcode(), dl, NVT, Hi, OVF); + break; + case TargetLoweringBase::ZeroOrNegativeOneBooleanContent: + Hi = DAG.getNode(RevOpc, dl, NVT, Hi, OVF); + } + return; + } + if (N->getOpcode() == ISD::ADD) { Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps); Hi = DAG.getNode(ISD::ADD, dl, NVT, makeArrayRef(HiOps, 2)); SDValue Cmp1 = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[0], ISD::SETULT); SDValue Carry1 = DAG.getSelect(dl, NVT, Cmp1, - DAG.getConstant(1, NVT), - DAG.getConstant(0, NVT)); + DAG.getConstant(1, dl, NVT), + DAG.getConstant(0, dl, NVT)); SDValue Cmp2 = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[1], ISD::SETULT); SDValue Carry2 = DAG.getSelect(dl, NVT, Cmp2, - DAG.getConstant(1, NVT), Carry1); + DAG.getConstant(1, dl, NVT), Carry1); Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry2); } else { Lo = DAG.getNode(ISD::SUB, dl, NVT, LoOps); @@ -1726,8 +1701,8 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, DAG.getSetCC(dl, getSetCCResultType(LoOps[0].getValueType()), LoOps[0], LoOps[1], ISD::SETULT); SDValue Borrow = DAG.getSelect(dl, NVT, Cmp, - DAG.getConstant(1, NVT), - DAG.getConstant(0, NVT)); + DAG.getConstant(1, dl, NVT), + DAG.getConstant(0, dl, NVT)); Hi = DAG.getNode(ISD::SUB, dl, NVT, Hi, Borrow); } } @@ -1824,7 +1799,7 @@ void DAGTypeLegalizer::ExpandIntRes_AssertSext(SDNode *N, Lo = DAG.getNode(ISD::AssertSext, dl, NVT, Lo, DAG.getValueType(EVT)); // The high part replicates the sign bit of Lo, make it explicit. Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo, - DAG.getConstant(NVTBits-1, TLI.getPointerTy())); + DAG.getConstant(NVTBits - 1, dl, TLI.getPointerTy())); } } @@ -1844,7 +1819,7 @@ void DAGTypeLegalizer::ExpandIntRes_AssertZext(SDNode *N, } else { Lo = DAG.getNode(ISD::AssertZext, dl, NVT, Lo, DAG.getValueType(EVT)); // The high part must be zero, make it explicit. - Hi = DAG.getConstant(0, NVT); + Hi = DAG.getConstant(0, dl, NVT); } } @@ -1864,8 +1839,9 @@ void DAGTypeLegalizer::ExpandIntRes_Constant(SDNode *N, const APInt &Cst = Constant->getAPIntValue(); bool IsTarget = Constant->isTargetOpcode(); bool IsOpaque = Constant->isOpaque(); - Lo = DAG.getConstant(Cst.trunc(NBitWidth), NVT, IsTarget, IsOpaque); - Hi = DAG.getConstant(Cst.lshr(NBitWidth).trunc(NBitWidth), NVT, IsTarget, + SDLoc dl(N); + Lo = DAG.getConstant(Cst.trunc(NBitWidth), dl, NVT, IsTarget, IsOpaque); + Hi = DAG.getConstant(Cst.lshr(NBitWidth).trunc(NBitWidth), dl, NVT, IsTarget, IsOpaque); } @@ -1877,15 +1853,16 @@ void DAGTypeLegalizer::ExpandIntRes_CTLZ(SDNode *N, EVT NVT = Lo.getValueType(); SDValue HiNotZero = DAG.getSetCC(dl, getSetCCResultType(NVT), Hi, - DAG.getConstant(0, NVT), ISD::SETNE); + DAG.getConstant(0, dl, NVT), ISD::SETNE); SDValue LoLZ = DAG.getNode(N->getOpcode(), dl, NVT, Lo); SDValue HiLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, NVT, Hi); Lo = DAG.getSelect(dl, NVT, HiNotZero, HiLZ, DAG.getNode(ISD::ADD, dl, NVT, LoLZ, - DAG.getConstant(NVT.getSizeInBits(), NVT))); - Hi = DAG.getConstant(0, NVT); + DAG.getConstant(NVT.getSizeInBits(), dl, + NVT))); + Hi = DAG.getConstant(0, dl, NVT); } void DAGTypeLegalizer::ExpandIntRes_CTPOP(SDNode *N, @@ -1896,7 +1873,7 @@ void DAGTypeLegalizer::ExpandIntRes_CTPOP(SDNode *N, EVT NVT = Lo.getValueType(); Lo = DAG.getNode(ISD::ADD, dl, NVT, DAG.getNode(ISD::CTPOP, dl, NVT, Lo), DAG.getNode(ISD::CTPOP, dl, NVT, Hi)); - Hi = DAG.getConstant(0, NVT); + Hi = DAG.getConstant(0, dl, NVT); } void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N, @@ -1907,22 +1884,27 @@ void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N, EVT NVT = Lo.getValueType(); SDValue LoNotZero = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, - DAG.getConstant(0, NVT), ISD::SETNE); + DAG.getConstant(0, dl, NVT), ISD::SETNE); SDValue LoLZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, NVT, Lo); SDValue HiLZ = DAG.getNode(N->getOpcode(), dl, NVT, Hi); Lo = DAG.getSelect(dl, NVT, LoNotZero, LoLZ, DAG.getNode(ISD::ADD, dl, NVT, HiLZ, - DAG.getConstant(NVT.getSizeInBits(), NVT))); - Hi = DAG.getConstant(0, NVT); + DAG.getConstant(NVT.getSizeInBits(), dl, + NVT))); + Hi = DAG.getConstant(0, dl, NVT); } void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo, SDValue &Hi) { SDLoc dl(N); EVT VT = N->getValueType(0); + SDValue Op = N->getOperand(0); + if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat) + Op = GetPromotedFloat(Op); + RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!"); SplitInteger(TLI.makeLibCall(DAG, LC, VT, &Op, 1, true/*irrelevant*/, @@ -1934,7 +1916,11 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo, SDValue &Hi) { SDLoc dl(N); EVT VT = N->getValueType(0); + SDValue Op = N->getOperand(0); + if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat) + Op = GetPromotedFloat(Op); + RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!"); SplitInteger(TLI.makeLibCall(DAG, LC, VT, &Op, 1, false/*irrelevant*/, @@ -1980,10 +1966,10 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, // lo part. unsigned LoSize = Lo.getValueType().getSizeInBits(); Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo, - DAG.getConstant(LoSize-1, TLI.getPointerTy())); + DAG.getConstant(LoSize - 1, dl, TLI.getPointerTy())); } else if (ExtType == ISD::ZEXTLOAD) { // The high part is just a zero. - Hi = DAG.getConstant(0, NVT); + Hi = DAG.getConstant(0, dl, NVT); } else { assert(ExtType == ISD::EXTLOAD && "Unknown extload!"); // The high part is undefined. @@ -2002,7 +1988,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, // Increment the pointer to the other half. unsigned IncrementSize = NVT.getSizeInBits()/8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, Ptr.getValueType())); + DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), NEVT, isVolatile, isNonTemporal, isInvariant, @@ -2029,7 +2015,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, // Increment the pointer to the other half. Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, Ptr.getValueType())); + DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); // Load the rest of the low bits. Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), @@ -2046,12 +2032,12 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, // Transfer low bits from the bottom of Hi to the top of Lo. Lo = DAG.getNode(ISD::OR, dl, NVT, Lo, DAG.getNode(ISD::SHL, dl, NVT, Hi, - DAG.getConstant(ExcessBits, + DAG.getConstant(ExcessBits, dl, TLI.getPointerTy()))); // Move high bits to the right position in Hi. Hi = DAG.getNode(ExtType == ISD::SEXTLOAD ? ISD::SRA : ISD::SRL, dl, NVT, Hi, - DAG.getConstant(NVT.getSizeInBits() - ExcessBits, + DAG.getConstant(NVT.getSizeInBits() - ExcessBits, dl, TLI.getPointerTy())); } } @@ -2127,7 +2113,7 @@ void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node, // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign) // EVT OType = Node->getValueType(1); - SDValue Zero = DAG.getConstant(0, LHS.getValueType()); + SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType()); SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE); SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE); @@ -2148,6 +2134,13 @@ void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT VT = N->getValueType(0); SDLoc dl(N); + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + + if (TLI.getOperationAction(ISD::SDIVREM, VT) == TargetLowering::Custom) { + SDValue Res = DAG.getNode(ISD::SDIVREM, dl, DAG.getVTList(VT, VT), Ops); + SplitInteger(Res.getValue(0), Lo, Hi); + return; + } RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; if (VT == MVT::i16) @@ -2160,7 +2153,6 @@ void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N, LC = RTLIB::SDIV_I128; assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!"); - SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true, dl).first, Lo, Hi); } @@ -2276,7 +2268,7 @@ void DAGTypeLegalizer::ExpandIntRes_SIGN_EXTEND(SDNode *N, // The high part is obtained by SRA'ing all but one of the bits of low part. unsigned LoSize = NVT.getSizeInBits(); Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo, - DAG.getConstant(LoSize-1, TLI.getPointerTy())); + DAG.getConstant(LoSize - 1, dl, TLI.getPointerTy())); } else { // For example, extension of an i48 to an i64. The operand type necessarily // promotes to the result type, so will end up being expanded too. @@ -2310,7 +2302,7 @@ ExpandIntRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi) { // The high part gets the sign extension from the lo-part. This handles // things like sextinreg V:i64 from i8. Hi = DAG.getNode(ISD::SRA, dl, Hi.getValueType(), Lo, - DAG.getConstant(Hi.getValueType().getSizeInBits()-1, + DAG.getConstant(Hi.getValueType().getSizeInBits() - 1, dl, TLI.getPointerTy())); } else { // For example, extension of an i48 to an i64. Leave the low part alone, @@ -2327,6 +2319,13 @@ void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT VT = N->getValueType(0); SDLoc dl(N); + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + + if (TLI.getOperationAction(ISD::SDIVREM, VT) == TargetLowering::Custom) { + SDValue Res = DAG.getNode(ISD::SDIVREM, dl, DAG.getVTList(VT, VT), Ops); + SplitInteger(Res.getValue(1), Lo, Hi); + return; + } RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; if (VT == MVT::i16) @@ -2339,7 +2338,6 @@ void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N, LC = RTLIB::SREM_I128; assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!"); - SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true, dl).first, Lo, Hi); } @@ -2350,7 +2348,8 @@ void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N, Lo = DAG.getNode(ISD::TRUNCATE, dl, NVT, N->getOperand(0)); Hi = DAG.getNode(ISD::SRL, dl, N->getOperand(0).getValueType(), N->getOperand(0), - DAG.getConstant(NVT.getSizeInBits(), TLI.getPointerTy())); + DAG.getConstant(NVT.getSizeInBits(), dl, + TLI.getPointerTy())); Hi = DAG.getNode(ISD::TRUNCATE, dl, NVT, Hi); } @@ -2392,14 +2391,14 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, // A divide for UMULO will be faster than a function call. Select to // make sure we aren't using 0. SDValue isZero = DAG.getSetCC(dl, getSetCCResultType(VT), - RHS, DAG.getConstant(0, VT), ISD::SETEQ); + RHS, DAG.getConstant(0, dl, VT), ISD::SETEQ); SDValue NotZero = DAG.getSelect(dl, VT, isZero, - DAG.getConstant(1, VT), RHS); + DAG.getConstant(1, dl, VT), RHS); SDValue DIV = DAG.getNode(ISD::UDIV, dl, VT, MUL, NotZero); SDValue Overflow = DAG.getSetCC(dl, N->getValueType(1), DIV, LHS, ISD::SETNE); Overflow = DAG.getSelect(dl, N->getValueType(1), isZero, - DAG.getConstant(0, N->getValueType(1)), + DAG.getConstant(0, dl, N->getValueType(1)), Overflow); ReplaceValueWith(SDValue(N, 1), Overflow); return; @@ -2422,7 +2421,7 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, SDValue Temp = DAG.CreateStackTemporary(PtrVT); // Temporary for the overflow value, default it to zero. SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, - DAG.getConstant(0, PtrVT), Temp, + DAG.getConstant(0, dl, PtrVT), Temp, MachinePointerInfo(), false, false, 0); TargetLowering::ArgListTy Args; @@ -2457,7 +2456,7 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, SDValue Temp2 = DAG.getLoad(PtrVT, dl, CallInfo.second, Temp, MachinePointerInfo(), false, false, false, 0); SDValue Ofl = DAG.getSetCC(dl, N->getValueType(1), Temp2, - DAG.getConstant(0, PtrVT), + DAG.getConstant(0, dl, PtrVT), ISD::SETNE); // Use the overflow from the libcall everywhere. ReplaceValueWith(SDValue(N, 1), Ofl); @@ -2467,6 +2466,13 @@ void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT VT = N->getValueType(0); SDLoc dl(N); + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + + if (TLI.getOperationAction(ISD::UDIVREM, VT) == TargetLowering::Custom) { + SDValue Res = DAG.getNode(ISD::UDIVREM, dl, DAG.getVTList(VT, VT), Ops); + SplitInteger(Res.getValue(0), Lo, Hi); + return; + } RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; if (VT == MVT::i16) @@ -2479,7 +2485,6 @@ void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N, LC = RTLIB::UDIV_I128; assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UDIV!"); - SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, false, dl).first, Lo, Hi); } @@ -2487,6 +2492,13 @@ void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT VT = N->getValueType(0); SDLoc dl(N); + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + + if (TLI.getOperationAction(ISD::UDIVREM, VT) == TargetLowering::Custom) { + SDValue Res = DAG.getNode(ISD::UDIVREM, dl, DAG.getVTList(VT, VT), Ops); + SplitInteger(Res.getValue(1), Lo, Hi); + return; + } RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; if (VT == MVT::i16) @@ -2499,7 +2511,6 @@ void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N, LC = RTLIB::UREM_I128; assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UREM!"); - SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, false, dl).first, Lo, Hi); } @@ -2511,7 +2522,7 @@ void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N, if (Op.getValueType().bitsLE(NVT)) { // The low part is zero extension of the input (degenerates to a copy). Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, N->getOperand(0)); - Hi = DAG.getConstant(0, NVT); // The high part is just a zero. + Hi = DAG.getConstant(0, dl, NVT); // The high part is just a zero. } else { // For example, extension of an i48 to an i64. The operand type necessarily // promotes to the result type, so will end up being expanded too. @@ -2536,7 +2547,7 @@ void DAGTypeLegalizer::ExpandIntRes_ATOMIC_LOAD(SDNode *N, SDLoc dl(N); EVT VT = cast<AtomicSDNode>(N)->getMemoryVT(); SDVTList VTs = DAG.getVTList(VT, MVT::i1, MVT::Other); - SDValue Zero = DAG.getConstant(0, VT); + SDValue Zero = DAG.getConstant(0, dl, VT); SDValue Swap = DAG.getAtomicCmpSwap( ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, cast<AtomicSDNode>(N)->getMemoryVT(), VTs, N->getOperand(0), @@ -2637,7 +2648,7 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS, NewLHS = DAG.getNode(ISD::XOR, dl, LHSLo.getValueType(), LHSLo, RHSLo); NewRHS = DAG.getNode(ISD::XOR, dl, LHSLo.getValueType(), LHSHi, RHSHi); NewLHS = DAG.getNode(ISD::OR, dl, NewLHS.getValueType(), NewLHS, NewRHS); - NewRHS = DAG.getConstant(0, NewLHS.getValueType()); + NewRHS = DAG.getConstant(0, dl, NewLHS.getValueType()); return; } @@ -2726,7 +2737,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_BR_CC(SDNode *N) { // If ExpandSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. if (!NewRHS.getNode()) { - NewRHS = DAG.getConstant(0, NewLHS.getValueType()); + NewRHS = DAG.getConstant(0, SDLoc(N), NewLHS.getValueType()); CCCode = ISD::SETNE; } @@ -2744,7 +2755,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SELECT_CC(SDNode *N) { // If ExpandSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. if (!NewRHS.getNode()) { - NewRHS = DAG.getConstant(0, NewLHS.getValueType()); + NewRHS = DAG.getConstant(0, SDLoc(N), NewLHS.getValueType()); CCCode = ISD::SETNE; } @@ -2839,7 +2850,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { // Increment the pointer to the other half. unsigned IncrementSize = NVT.getSizeInBits()/8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, Ptr.getValueType())); + DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), NEVT, isVolatile, isNonTemporal, @@ -2861,11 +2872,11 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { if (ExcessBits < NVT.getSizeInBits()) { // Transfer high bits from the top of Lo to the bottom of Hi. Hi = DAG.getNode(ISD::SHL, dl, NVT, Hi, - DAG.getConstant(NVT.getSizeInBits() - ExcessBits, + DAG.getConstant(NVT.getSizeInBits() - ExcessBits, dl, TLI.getPointerTy())); Hi = DAG.getNode(ISD::OR, dl, NVT, Hi, DAG.getNode(ISD::SRL, dl, NVT, Lo, - DAG.getConstant(ExcessBits, + DAG.getConstant(ExcessBits, dl, TLI.getPointerTy()))); } @@ -2875,7 +2886,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { // Increment the pointer to the other half. Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, Ptr.getValueType())); + DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); // Store the lowest ExcessBits bits in the second half. Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), @@ -2931,7 +2942,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { GetExpandedInteger(Op, Lo, Hi); SDValue SignSet = DAG.getSetCC(dl, getSetCCResultType(Hi.getValueType()), - Hi, DAG.getConstant(0, Hi.getValueType()), + Hi, + DAG.getConstant(0, dl, Hi.getValueType()), ISD::SETLT); // Build a 64 bit pair (0, FF) in the constant pool, with FF in the lo bits. @@ -2940,8 +2952,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { TLI.getPointerTy()); // Get a pointer to FF if the sign bit was set, or to 0 otherwise. - SDValue Zero = DAG.getIntPtrConstant(0); - SDValue Four = DAG.getIntPtrConstant(4); + SDValue Zero = DAG.getIntPtrConstant(0, dl); + SDValue Four = DAG.getIntPtrConstant(4, dl); if (TLI.isBigEndian()) std::swap(Zero, Four); SDValue Offset = DAG.getSelect(dl, Zero.getValueType(), SignSet, Zero, Four); @@ -2999,7 +3011,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) { // Extract the element from the original vector. SDValue Index = DAG.getNode(ISD::ADD, dl, BaseIdx.getValueType(), - BaseIdx, DAG.getConstant(i, BaseIdx.getValueType())); + BaseIdx, DAG.getConstant(i, dl, BaseIdx.getValueType())); SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InVT.getVectorElementType(), N->getOperand(0), Index); @@ -3017,17 +3029,13 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_SHUFFLE(SDNode *N) { EVT VT = N->getValueType(0); SDLoc dl(N); - unsigned NumElts = VT.getVectorNumElements(); - SmallVector<int, 8> NewMask; - for (unsigned i = 0; i != NumElts; ++i) { - NewMask.push_back(SV->getMaskElt(i)); - } + ArrayRef<int> NewMask = SV->getMask().slice(0, VT.getVectorNumElements()); SDValue V0 = GetPromotedInteger(N->getOperand(0)); SDValue V1 = GetPromotedInteger(N->getOperand(1)); EVT OutVT = V0.getValueType(); - return DAG.getVectorShuffle(OutVT, dl, V0, V1, &NewMask[0]); + return DAG.getVectorShuffle(OutVT, dl, V0, V1, NewMask); } @@ -3097,7 +3105,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) { SDValue Op = N->getOperand(i); for (unsigned j = 0; j < NumElem; ++j) { SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - InElemTy, Op, DAG.getConstant(j, + InElemTy, Op, DAG.getConstant(j, dl, TLI.getVectorIdxTy())); Ops[i * NumElem + j] = DAG.getNode(ISD::ANY_EXTEND, dl, OutElemTy, Ext); } @@ -3135,6 +3143,16 @@ SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N) { return DAG.getAnyExtOrTrunc(Ext, dl, N->getValueType(0)); } +SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N) { + SDLoc dl(N); + SDValue V0 = GetPromotedInteger(N->getOperand(0)); + MVT InVT = V0.getValueType().getSimpleVT(); + MVT OutVT = MVT::getVectorVT(InVT.getVectorElementType(), + N->getValueType(0).getVectorNumElements()); + SDValue Ext = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OutVT, V0, N->getOperand(1)); + return DAG.getNode(ISD::TRUNCATE, dl, N->getValueType(0), Ext); +} + SDValue DAGTypeLegalizer::PromoteIntOp_CONCAT_VECTORS(SDNode *N) { SDLoc dl(N); unsigned NumElems = N->getNumOperands(); @@ -3153,7 +3171,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_CONCAT_VECTORS(SDNode *N) { for (unsigned i=0; i<NumElem; ++i) { // Extract element from incoming vector SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SclrTy, - Incoming, DAG.getConstant(i, TLI.getVectorIdxTy())); + Incoming, DAG.getConstant(i, dl, TLI.getVectorIdxTy())); SDValue Tr = DAG.getNode(ISD::TRUNCATE, dl, RetSclrTy, Ex); NewOps.push_back(Tr); } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index ebf6b28..9c29769 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -259,6 +259,10 @@ bool DAGTypeLegalizer::run() { WidenVectorResult(N, i); Changed = true; goto NodeDone; + case TargetLowering::TypePromoteFloat: + PromoteFloatResult(N, i); + Changed = true; + goto NodeDone; } } @@ -308,6 +312,10 @@ ScanOperands: NeedsReanalyzing = WidenVectorOperand(N, i); Changed = true; break; + case TargetLowering::TypePromoteFloat: + NeedsReanalyzing = PromoteFloatOperand(N, i); + Changed = true; + break; } break; } @@ -753,6 +761,17 @@ void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) { OpEntry = Result; } +void DAGTypeLegalizer::SetPromotedFloat(SDValue Op, SDValue Result) { + assert(Result.getValueType() == + TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && + "Invalid type for promoted float"); + AnalyzeNewValue(Result); + + SDValue &OpEntry = PromotedFloats[Op]; + assert(!OpEntry.getNode() && "Node is already promoted!"); + OpEntry = Result; +} + void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) { // Note that in some cases vector operation operands may be greater than // the vector element type. For example BUILD_VECTOR of type <1 x i1> with @@ -978,9 +997,9 @@ void DAGTypeLegalizer::GetPairElements(SDValue Pair, SDLoc dl(Pair); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Pair.getValueType()); Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NVT, Pair, - DAG.getIntPtrConstant(0)); + DAG.getIntPtrConstant(0, dl)); Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NVT, Pair, - DAG.getIntPtrConstant(1)); + DAG.getIntPtrConstant(1, dl)); } SDValue DAGTypeLegalizer::GetVectorElementPointer(SDValue VecPtr, EVT EltVT, @@ -993,7 +1012,7 @@ SDValue DAGTypeLegalizer::GetVectorElementPointer(SDValue VecPtr, EVT EltVT, unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size. Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), Index, - DAG.getConstant(EltSize, Index.getValueType())); + DAG.getConstant(EltSize, dl, Index.getValueType())); return DAG.getNode(ISD::ADD, dl, Index.getValueType(), Index, VecPtr); } @@ -1010,7 +1029,8 @@ SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) { Lo = DAG.getNode(ISD::ZERO_EXTEND, dlLo, NVT, Lo); Hi = DAG.getNode(ISD::ANY_EXTEND, dlHi, NVT, Hi); Hi = DAG.getNode(ISD::SHL, dlHi, NVT, Hi, - DAG.getConstant(LVT.getSizeInBits(), TLI.getPointerTy())); + DAG.getConstant(LVT.getSizeInBits(), dlHi, + TLI.getPointerTy())); return DAG.getNode(ISD::OR, dlHi, NVT, Lo, Hi); } @@ -1096,7 +1116,8 @@ void DAGTypeLegalizer::SplitInteger(SDValue Op, Op.getValueType().getSizeInBits() && "Invalid integer splitting!"); Lo = DAG.getNode(ISD::TRUNCATE, dl, LoVT, Op); Hi = DAG.getNode(ISD::SRL, dl, Op.getValueType(), Op, - DAG.getConstant(LoVT.getSizeInBits(), TLI.getPointerTy())); + DAG.getConstant(LoVT.getSizeInBits(), dl, + TLI.getPointerTy())); Hi = DAG.getNode(ISD::TRUNCATE, dl, HiVT, Hi); } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index cef3fc9..2f27789 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -93,6 +93,11 @@ private: /// the same size, this map indicates the converted value to use. SmallDenseMap<SDValue, SDValue, 8> SoftenedFloats; + /// PromotedFloats - For floating point nodes that have a smaller precision + /// than the smallest supported precision, this map indicates what promoted + /// value to use. + SmallDenseMap<SDValue, SDValue, 8> PromotedFloats; + /// ExpandedFloats - For float nodes that need to be expanded this map /// indicates which operands are the expanded version of the input. SmallDenseMap<SDValue, std::pair<SDValue, SDValue>, 8> ExpandedFloats; @@ -273,6 +278,7 @@ private: SDValue PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_EXTRACT_ELEMENT(SDNode *N); SDValue PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N); + SDValue PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N); SDValue PromoteIntOp_CONCAT_VECTORS(SDNode *N); SDValue PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N); SDValue PromoteIntOp_SELECT(SDNode *N, unsigned OpNo); @@ -499,6 +505,44 @@ private: void FloatExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, SDLoc dl); + + //===--------------------------------------------------------------------===// + // Float promotion support: LegalizeFloatTypes.cpp + //===--------------------------------------------------------------------===// + + SDValue GetPromotedFloat(SDValue Op) { + SDValue &PromotedOp = PromotedFloats[Op]; + RemapValue(PromotedOp); + assert(PromotedOp.getNode() && "Operand wasn't promoted?"); + return PromotedOp; + } + void SetPromotedFloat(SDValue Op, SDValue Result); + + void PromoteFloatResult(SDNode *N, unsigned ResNo); + SDValue PromoteFloatRes_BITCAST(SDNode *N); + SDValue PromoteFloatRes_BinOp(SDNode *N); + SDValue PromoteFloatRes_ConstantFP(SDNode *N); + SDValue PromoteFloatRes_EXTRACT_VECTOR_ELT(SDNode *N); + SDValue PromoteFloatRes_FCOPYSIGN(SDNode *N); + SDValue PromoteFloatRes_FMAD(SDNode *N); + SDValue PromoteFloatRes_FPOWI(SDNode *N); + SDValue PromoteFloatRes_FP_ROUND(SDNode *N); + SDValue PromoteFloatRes_LOAD(SDNode *N); + SDValue PromoteFloatRes_SELECT(SDNode *N); + SDValue PromoteFloatRes_SELECT_CC(SDNode *N); + SDValue PromoteFloatRes_UnaryOp(SDNode *N); + SDValue PromoteFloatRes_UNDEF(SDNode *N); + SDValue PromoteFloatRes_XINT_TO_FP(SDNode *N); + + bool PromoteFloatOperand(SDNode *N, unsigned ResNo); + SDValue PromoteFloatOp_BITCAST(SDNode *N, unsigned OpNo); + SDValue PromoteFloatOp_FCOPYSIGN(SDNode *N, unsigned OpNo); + SDValue PromoteFloatOp_FP_EXTEND(SDNode *N, unsigned OpNo); + SDValue PromoteFloatOp_FP_TO_XINT(SDNode *N, unsigned OpNo); + SDValue PromoteFloatOp_STORE(SDNode *N, unsigned OpNo); + SDValue PromoteFloatOp_SELECT_CC(SDNode *N, unsigned OpNo); + SDValue PromoteFloatOp_SETCC(SDNode *N, unsigned OpNo); + //===--------------------------------------------------------------------===// // Scalarization Support: LegalizeVectorTypes.cpp //===--------------------------------------------------------------------===// @@ -582,6 +626,7 @@ private: void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_MLOAD(MaskedLoadSDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_MGATHER(MaskedGatherSDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi); @@ -593,14 +638,16 @@ private: bool SplitVectorOperand(SDNode *N, unsigned OpNo); SDValue SplitVecOp_VSELECT(SDNode *N, unsigned OpNo); SDValue SplitVecOp_UnaryOp(SDNode *N); + SDValue SplitVecOp_TruncateHelper(SDNode *N); SDValue SplitVecOp_BITCAST(SDNode *N); SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N); SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo); SDValue SplitVecOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo); + SDValue SplitVecOp_MSCATTER(MaskedScatterSDNode *N, unsigned OpNo); + SDValue SplitVecOp_MGATHER(MaskedGatherSDNode *N, unsigned OpNo); SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N); - SDValue SplitVecOp_TRUNCATE(SDNode *N); SDValue SplitVecOp_VSETCC(SDNode *N); SDValue SplitVecOp_FP_ROUND(SDNode *N); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index 38829b6..330c31c 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -50,6 +50,9 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { case TargetLowering::TypeLegal: case TargetLowering::TypePromoteInteger: break; + case TargetLowering::TypePromoteFloat: + llvm_unreachable("Bitcast of a promotion-needing float should never need" + "expansion"); case TargetLowering::TypeSoftenFloat: // Convert the integer operand instead. SplitInteger(GetSoftenedFloat(InOp), Lo, Hi); @@ -117,7 +120,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { SmallVector<SDValue, 8> Vals; for (unsigned i = 0; i < NumElems; ++i) Vals.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ElemVT, - CastInOp, DAG.getConstant(i, + CastInOp, DAG.getConstant(i, dl, TLI.getVectorIdxTy()))); // Build Lo, Hi pair by pairing extracted elements if needed. @@ -170,7 +173,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { // Increment the pointer to the other half. unsigned IncrementSize = NOutVT.getSizeInBits() / 8; StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, - DAG.getConstant(IncrementSize, + DAG.getConstant(IncrementSize, dl, StackPtr.getValueType())); // Load the second half from the stack slot. @@ -235,7 +238,7 @@ void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, Idx); Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, - DAG.getConstant(1, Idx.getValueType())); + DAG.getConstant(1, dl, Idx.getValueType())); Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, Idx); if (TLI.isBigEndian()) @@ -267,7 +270,7 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, // Increment the pointer to the other half. unsigned IncrementSize = NVT.getSizeInBits() / 8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, Ptr.getValueType())); + DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); Hi = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), isVolatile, isNonTemporal, isInvariant, @@ -436,7 +439,7 @@ SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) { NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Lo, Idx); Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, - DAG.getConstant(1, Idx.getValueType())); + DAG.getConstant(1, dl, Idx.getValueType())); NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Hi, Idx); // Convert the new vector to the old vector type. @@ -485,7 +488,7 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { isVolatile, isNonTemporal, Alignment, AAInfo); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, Ptr.getValueType())); + DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); Hi = DAG.getStore(Chain, dl, Hi, Ptr, St->getPointerInfo().getWithOffset(IncrementSize), isVolatile, isNonTemporal, diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 3a8c276..c06227b 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -196,6 +196,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { SDValue Result = SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops), 0); + bool HasVectorValue = false; if (Op.getOpcode() == ISD::LOAD) { LoadSDNode *LD = cast<LoadSDNode>(Op.getNode()); ISD::LoadExtType ExtType = LD->getExtensionType(); @@ -207,6 +208,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { return TranslateLegalizeResults(Op, Result); case TargetLowering::Custom: if (SDValue Lowered = TLI.LowerOperation(Result, DAG)) { + if (Lowered == Result) + return TranslateLegalizeResults(Op, Lowered); Changed = true; if (Lowered->getNumValues() != Op->getNumValues()) { // This expanded to something other than the load. Assume the @@ -232,16 +235,18 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: return TranslateLegalizeResults(Op, Result); - case TargetLowering::Custom: - Changed = true; - return TranslateLegalizeResults(Op, TLI.LowerOperation(Result, DAG)); + case TargetLowering::Custom: { + SDValue Lowered = TLI.LowerOperation(Result, DAG); + Changed = Lowered != Result; + return TranslateLegalizeResults(Op, Lowered); + } case TargetLowering::Expand: Changed = true; return LegalizeOp(ExpandStore(Op)); } - } + } else if (Op.getOpcode() == ISD::MSCATTER) + HasVectorValue = true; - bool HasVectorValue = false; for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end(); J != E; ++J) @@ -317,6 +322,10 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::ANY_EXTEND_VECTOR_INREG: case ISD::SIGN_EXTEND_VECTOR_INREG: case ISD::ZERO_EXTEND_VECTOR_INREG: + case ISD::SMIN: + case ISD::SMAX: + case ISD::UMIN: + case ISD::UMAX: QueryType = Node->getValueType(0); break; case ISD::FP_ROUND_INREG: @@ -326,6 +335,9 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::UINT_TO_FP: QueryType = Node->getOperand(0).getValueType(); break; + case ISD::MSCATTER: + QueryType = cast<MaskedScatterSDNode>(Node)->getValue().getValueType(); + break; } switch (TLI.getOperationAction(Node->getOpcode(), QueryType)) { @@ -375,8 +387,8 @@ SDValue VectorLegalizer::Promote(SDValue Op) { // There are currently two cases of vector promotion: // 1) Bitcasting a vector of integers to a different type to a vector of the - // same overall length. For example, x86 promotes ISD::AND on v2i32 to v1i64. - // 2) Extending a vector of floats to a vector of the same number oflarger + // same overall length. For example, x86 promotes ISD::AND v2i32 to v1i64. + // 2) Extending a vector of floats to a vector of the same number of larger // floats. For example, AArch64 promotes ISD::FADD on v4f16 to v4f32. MVT VT = Op.getSimpleValueType(); assert(Op.getNode()->getNumValues() == 1 && @@ -403,7 +415,7 @@ SDValue VectorLegalizer::Promote(SDValue Op) { if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) || (VT.isVector() && VT.getVectorElementType().isFloatingPoint() && NVT.isVector() && NVT.getVectorElementType().isFloatingPoint())) - return DAG.getNode(ISD::FP_ROUND, dl, VT, Op, DAG.getIntPtrConstant(0)); + return DAG.getNode(ISD::FP_ROUND, dl, VT, Op, DAG.getIntPtrConstant(0, dl)); else return DAG.getNode(ISD::BITCAST, dl, VT, Op); } @@ -512,7 +524,8 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { ScalarLoad = DAG.getLoad(WideVT, dl, Chain, BasePTR, LD->getPointerInfo().getWithOffset(Offset), LD->isVolatile(), LD->isNonTemporal(), - LD->isInvariant(), LD->getAlignment(), + LD->isInvariant(), + MinAlign(LD->getAlignment(), Offset), LD->getAAInfo()); } else { EVT LoadVT = WideVT; @@ -524,13 +537,15 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { LD->getPointerInfo().getWithOffset(Offset), LoadVT, LD->isVolatile(), LD->isNonTemporal(), LD->isInvariant(), - LD->getAlignment(), LD->getAAInfo()); + MinAlign(LD->getAlignment(), Offset), + LD->getAAInfo()); } RemainingBytes -= LoadBytes; Offset += LoadBytes; BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, - DAG.getConstant(LoadBytes, BasePTR.getValueType())); + DAG.getConstant(LoadBytes, dl, + BasePTR.getValueType())); LoadVals.push_back(ScalarLoad.getValue(0)); LoadChains.push_back(ScalarLoad.getValue(1)); @@ -538,7 +553,7 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { // Extract bits, pack and extend/trunc them into destination type. unsigned SrcEltBits = SrcEltVT.getSizeInBits(); - SDValue SrcEltBitMask = DAG.getConstant((1U << SrcEltBits) - 1, WideVT); + SDValue SrcEltBitMask = DAG.getConstant((1U << SrcEltBits) - 1, dl, WideVT); unsigned BitOffset = 0; unsigned WideIdx = 0; @@ -548,7 +563,7 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { SDValue Lo, Hi, ShAmt; if (BitOffset < WideBits) { - ShAmt = DAG.getConstant(BitOffset, TLI.getShiftAmountTy(WideVT)); + ShAmt = DAG.getConstant(BitOffset, dl, TLI.getShiftAmountTy(WideVT)); Lo = DAG.getNode(ISD::SRL, dl, WideVT, LoadVals[WideIdx], ShAmt); Lo = DAG.getNode(ISD::AND, dl, WideVT, Lo, SrcEltBitMask); } @@ -558,7 +573,7 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { WideIdx++; BitOffset -= WideBits; if (BitOffset > 0) { - ShAmt = DAG.getConstant(SrcEltBits - BitOffset, + ShAmt = DAG.getConstant(SrcEltBits - BitOffset, dl, TLI.getShiftAmountTy(WideVT)); Hi = DAG.getNode(ISD::SHL, dl, WideVT, LoadVals[WideIdx], ShAmt); Hi = DAG.getNode(ISD::AND, dl, WideVT, Hi, SrcEltBitMask); @@ -577,7 +592,7 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { Lo = DAG.getZExtOrTrunc(Lo, dl, DstEltVT); break; case ISD::SEXTLOAD: - ShAmt = DAG.getConstant(WideBits - SrcEltBits, + ShAmt = DAG.getConstant(WideBits - SrcEltBits, dl, TLI.getShiftAmountTy(WideVT)); Lo = DAG.getNode(ISD::SHL, dl, WideVT, Lo, ShAmt); Lo = DAG.getNode(ISD::SRA, dl, WideVT, Lo, ShAmt); @@ -595,10 +610,10 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { Chain, BasePTR, LD->getPointerInfo().getWithOffset(Idx * Stride), SrcVT.getScalarType(), LD->isVolatile(), LD->isNonTemporal(), LD->isInvariant(), - LD->getAlignment(), LD->getAAInfo()); + MinAlign(LD->getAlignment(), Idx * Stride), LD->getAAInfo()); BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, - DAG.getConstant(Stride, BasePTR.getValueType())); + DAG.getConstant(Stride, dl, BasePTR.getValueType())); Vals.push_back(ScalarLoad.getValue(0)); LoadChains.push_back(ScalarLoad.getValue(1)); @@ -649,15 +664,16 @@ SDValue VectorLegalizer::ExpandStore(SDValue Op) { SmallVector<SDValue, 8> Stores; for (unsigned Idx = 0; Idx < NumElem; Idx++) { SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - RegSclVT, Value, DAG.getConstant(Idx, TLI.getVectorIdxTy())); + RegSclVT, Value, DAG.getConstant(Idx, dl, TLI.getVectorIdxTy())); // This scalar TruncStore may be illegal, but we legalize it later. SDValue Store = DAG.getTruncStore(Chain, dl, Ex, BasePTR, ST->getPointerInfo().getWithOffset(Idx*Stride), MemSclVT, - isVolatile, isNonTemporal, Alignment, AAInfo); + isVolatile, isNonTemporal, MinAlign(Alignment, Idx*Stride), + AAInfo); BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, - DAG.getConstant(Stride, BasePTR.getValueType())); + DAG.getConstant(Stride, dl, BasePTR.getValueType())); Stores.push_back(Store); } @@ -727,8 +743,9 @@ SDValue VectorLegalizer::ExpandSELECT(SDValue Op) { EVT BitTy = MaskTy.getScalarType(); Mask = DAG.getSelect(DL, BitTy, Mask, - DAG.getConstant(APInt::getAllOnesValue(BitTy.getSizeInBits()), BitTy), - DAG.getConstant(0, BitTy)); + DAG.getConstant(APInt::getAllOnesValue(BitTy.getSizeInBits()), DL, + BitTy), + DAG.getConstant(0, DL, BitTy)); // Broadcast the mask so that the entire vector is all-one or all zero. SmallVector<SDValue, 8> Ops(NumElem, Mask); @@ -741,7 +758,7 @@ SDValue VectorLegalizer::ExpandSELECT(SDValue Op) { Op2 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op2); SDValue AllOnes = DAG.getConstant( - APInt::getAllOnesValue(BitTy.getSizeInBits()), MaskTy); + APInt::getAllOnesValue(BitTy.getSizeInBits()), DL, MaskTy); SDValue NotMask = DAG.getNode(ISD::XOR, DL, MaskTy, Mask, AllOnes); Op1 = DAG.getNode(ISD::AND, DL, MaskTy, Op1, Mask); @@ -763,7 +780,7 @@ SDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) { unsigned BW = VT.getScalarType().getSizeInBits(); unsigned OrigBW = OrigTy.getScalarType().getSizeInBits(); - SDValue ShiftSz = DAG.getConstant(BW - OrigBW, VT); + SDValue ShiftSz = DAG.getConstant(BW - OrigBW, DL, VT); Op = Op.getOperand(0); Op = DAG.getNode(ISD::SHL, DL, VT, Op, ShiftSz); @@ -810,7 +827,7 @@ SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op) { // without full scalarization than the sign extension does. unsigned EltWidth = VT.getVectorElementType().getSizeInBits(); unsigned SrcEltWidth = SrcVT.getVectorElementType().getSizeInBits(); - SDValue ShiftAmount = DAG.getConstant(EltWidth - SrcEltWidth, VT); + SDValue ShiftAmount = DAG.getConstant(EltWidth - SrcEltWidth, DL, VT); return DAG.getNode(ISD::SRA, DL, VT, DAG.getNode(ISD::SHL, DL, VT, Op, ShiftAmount), ShiftAmount); @@ -829,7 +846,7 @@ SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op) { // Build up a zero vector to blend into this one. EVT SrcScalarVT = SrcVT.getScalarType(); - SDValue ScalarZero = DAG.getTargetConstant(0, SrcScalarVT); + SDValue ScalarZero = DAG.getTargetConstant(0, DL, SrcScalarVT); SmallVector<SDValue, 4> BuildVectorOperands(NumSrcElements, ScalarZero); SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, DL, SrcVT, BuildVectorOperands); @@ -910,7 +927,7 @@ SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { Op2 = DAG.getNode(ISD::BITCAST, DL, VT, Op2); SDValue AllOnes = DAG.getConstant( - APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()), VT); + APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()), DL, VT); SDValue NotMask = DAG.getNode(ISD::XOR, DL, VT, Mask, AllOnes); Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask); @@ -933,16 +950,16 @@ SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) { "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide"); unsigned BW = SVT.getSizeInBits(); - SDValue HalfWord = DAG.getConstant(BW/2, VT); + SDValue HalfWord = DAG.getConstant(BW/2, DL, VT); // Constants to clear the upper part of the word. // Notice that we can also use SHL+SHR, but using a constant is slightly // faster on x86. uint64_t HWMask = (SVT.getSizeInBits()==64)?0x00000000FFFFFFFF:0x0000FFFF; - SDValue HalfWordMask = DAG.getConstant(HWMask, VT); + SDValue HalfWordMask = DAG.getConstant(HWMask, DL, VT); // Two to the power of half-word-size. - SDValue TWOHW = DAG.getConstantFP((1<<(BW/2)), Op.getValueType()); + SDValue TWOHW = DAG.getConstantFP(1 << (BW/2), DL, Op.getValueType()); // Clear upper part of LO, lower HI SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Op.getOperand(0), HalfWord); @@ -961,8 +978,9 @@ SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) { SDValue VectorLegalizer::ExpandFNEG(SDValue Op) { if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) { - SDValue Zero = DAG.getConstantFP(-0.0, Op.getValueType()); - return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), + SDLoc DL(Op); + SDValue Zero = DAG.getConstantFP(-0.0, DL, Op.getValueType()); + return DAG.getNode(ISD::FSUB, DL, Op.getValueType(), Zero, Op.getOperand(0)); } return DAG.UnrollVectorOp(Op.getNode()); @@ -978,16 +996,16 @@ SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) { SmallVector<SDValue, 8> Ops(NumElems); for (unsigned i = 0; i < NumElems; ++i) { SDValue LHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS, - DAG.getConstant(i, TLI.getVectorIdxTy())); + DAG.getConstant(i, dl, TLI.getVectorIdxTy())); SDValue RHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS, - DAG.getConstant(i, TLI.getVectorIdxTy())); + DAG.getConstant(i, dl, TLI.getVectorIdxTy())); Ops[i] = DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(*DAG.getContext(), TmpEltVT), LHSElem, RHSElem, CC); Ops[i] = DAG.getSelect(dl, EltVT, Ops[i], DAG.getConstant(APInt::getAllOnesValue - (EltVT.getSizeInBits()), EltVT), - DAG.getConstant(0, EltVT)); + (EltVT.getSizeInBits()), dl, EltVT), + DAG.getConstant(0, dl, EltVT)); } return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 63671f7..445e882 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -252,7 +252,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) { } else { EVT VT = OpVT.getVectorElementType(); Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op, - DAG.getConstant(0, TLI.getVectorIdxTy())); + DAG.getConstant(0, DL, TLI.getVectorIdxTy())); } return DAG.getNode(N->getOpcode(), SDLoc(N), DestVT, Op); } @@ -308,7 +308,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) { VecBool == TargetLowering::ZeroOrNegativeOneBooleanContent); // Vector read from all ones, scalar expects a single 1 so mask. Cond = DAG.getNode(ISD::AND, SDLoc(N), CondVT, - Cond, DAG.getConstant(1, CondVT)); + Cond, DAG.getConstant(1, SDLoc(N), CondVT)); break; case TargetLowering::ZeroOrNegativeOneBooleanContent: assert(VecBool == TargetLowering::UndefinedBooleanContent || @@ -385,9 +385,9 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) { } else { EVT VT = OpVT.getVectorElementType(); LHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, LHS, - DAG.getConstant(0, TLI.getVectorIdxTy())); + DAG.getConstant(0, DL, TLI.getVectorIdxTy())); RHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, RHS, - DAG.getConstant(0, TLI.getVectorIdxTy())); + DAG.getConstant(0, DL, TLI.getVectorIdxTy())); } // Turn it into a scalar SETCC. @@ -600,6 +600,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::MLOAD: SplitVecRes_MLOAD(cast<MaskedLoadSDNode>(N), Lo, Hi); break; + case ISD::MGATHER: + SplitVecRes_MGATHER(cast<MaskedGatherSDNode>(N), Lo, Hi); + break; case ISD::SETCC: SplitVecRes_SETCC(N, Lo, Hi); break; @@ -668,6 +671,10 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::UREM: case ISD::SREM: case ISD::FREM: + case ISD::SMIN: + case ISD::SMAX: + case ISD::UMIN: + case ISD::UMAX: SplitVecRes_BinOp(N, Lo, Hi); break; case ISD::FMA: @@ -723,6 +730,7 @@ void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, switch (getTypeAction(InVT)) { case TargetLowering::TypeLegal: case TargetLowering::TypePromoteInteger: + case TargetLowering::TypePromoteFloat: case TargetLowering::TypeSoftenFloat: case TargetLowering::TypeScalarizeVector: case TargetLowering::TypeWidenVector: @@ -810,7 +818,7 @@ void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx); uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec, - DAG.getConstant(IdxVal + LoVT.getVectorNumElements(), + DAG.getConstant(IdxVal + LoVT.getVectorNumElements(), dl, TLI.getVectorIdxTy())); } @@ -844,7 +852,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8; StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, - DAG.getConstant(IncrementSize, StackPtr.getValueType())); + DAG.getConstant(IncrementSize, dl, StackPtr.getValueType())); // Load the Hi part from the stack slot. Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(), @@ -891,7 +899,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, Lo.getValueType(), Lo, Elt, Idx); else Hi = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Hi.getValueType(), Hi, Elt, - DAG.getConstant(IdxVal - LoNumElts, + DAG.getConstant(IdxVal - LoNumElts, dl, TLI.getVectorIdxTy())); return; } @@ -923,7 +931,8 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, // Increment the pointer to the other part. unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8; StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, - DAG.getConstant(IncrementSize, StackPtr.getValueType())); + DAG.getConstant(IncrementSize, dl, + StackPtr.getValueType())); // Load the Hi part from the stack slot. Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(), @@ -966,7 +975,7 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, unsigned IncrementSize = LoMemVT.getSizeInBits()/8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, Ptr.getValueType())); + DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset, LD->getPointerInfo().getWithOffset(IncrementSize), HiMemVT, isVolatile, isNonTemporal, isInvariant, Alignment, @@ -1021,7 +1030,7 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, unsigned IncrementSize = LoMemVT.getSizeInBits()/8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, Ptr.getValueType())); + DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); MMO = DAG.getMachineFunction(). getMachineMemOperand(MLD->getPointerInfo(), @@ -1043,6 +1052,54 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, } +void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, + SDValue &Lo, SDValue &Hi) { + EVT LoVT, HiVT; + SDLoc dl(MGT); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MGT->getValueType(0)); + + SDValue Ch = MGT->getChain(); + SDValue Ptr = MGT->getBasePtr(); + SDValue Mask = MGT->getMask(); + unsigned Alignment = MGT->getOriginalAlignment(); + + SDValue MaskLo, MaskHi; + std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl); + + EVT MemoryVT = MGT->getMemoryVT(); + EVT LoMemVT, HiMemVT; + std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + + SDValue Src0Lo, Src0Hi; + std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(MGT->getValue(), dl); + + SDValue IndexHi, IndexLo; + std::tie(IndexLo, IndexHi) = DAG.SplitVector(MGT->getIndex(), dl); + + MachineMemOperand *MMO = DAG.getMachineFunction(). + getMachineMemOperand(MGT->getPointerInfo(), + MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), + Alignment, MGT->getAAInfo(), MGT->getRanges()); + + SDValue OpsLo[] = {Ch, Src0Lo, MaskLo, Ptr, IndexLo}; + Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl, OpsLo, + MMO); + + SDValue OpsHi[] = {Ch, Src0Hi, MaskHi, Ptr, IndexHi}; + Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl, OpsHi, + MMO); + + // Build a factor node to remember that this load is independent of the + // other one. + Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(MGT, 1), Ch); +} + + void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) { assert(N->getValueType(0).isVector() && N->getOperand(0).getValueType().isVector() && @@ -1236,8 +1293,9 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, // Extract the vector element by hand. SVOps.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, - Inputs[Input], DAG.getConstant(Idx, - TLI.getVectorIdxTy()))); + Inputs[Input], + DAG.getConstant(Idx, dl, + TLI.getVectorIdxTy()))); } // Construct the Lo/Hi output using a BUILD_VECTOR. @@ -1293,7 +1351,9 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break; case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break; case ISD::CONCAT_VECTORS: Res = SplitVecOp_CONCAT_VECTORS(N); break; - case ISD::TRUNCATE: Res = SplitVecOp_TRUNCATE(N); break; + case ISD::TRUNCATE: + Res = SplitVecOp_TruncateHelper(N); + break; case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break; case ISD::STORE: Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo); @@ -1301,21 +1361,37 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { case ISD::MSTORE: Res = SplitVecOp_MSTORE(cast<MaskedStoreSDNode>(N), OpNo); break; + case ISD::MSCATTER: + Res = SplitVecOp_MSCATTER(cast<MaskedScatterSDNode>(N), OpNo); + break; + case ISD::MGATHER: + Res = SplitVecOp_MGATHER(cast<MaskedGatherSDNode>(N), OpNo); + break; case ISD::VSELECT: Res = SplitVecOp_VSELECT(N, OpNo); break; - case ISD::CTTZ: - case ISD::CTLZ: - case ISD::CTPOP: - case ISD::FP_EXTEND: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: + if (N->getValueType(0).bitsLT(N->getOperand(0)->getValueType(0))) + Res = SplitVecOp_TruncateHelper(N); + else + Res = SplitVecOp_UnaryOp(N); + break; case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: - case ISD::FTRUNC: + if (N->getValueType(0).bitsLT(N->getOperand(0)->getValueType(0))) + Res = SplitVecOp_TruncateHelper(N); + else + Res = SplitVecOp_UnaryOp(N); + break; + case ISD::CTTZ: + case ISD::CTLZ: + case ISD::CTPOP: + case ISD::FP_EXTEND: case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: + case ISD::FTRUNC: Res = SplitVecOp_UnaryOp(N); break; } @@ -1420,7 +1496,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) { return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Lo, Idx); } else { return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Hi, - DAG.getConstant(IdxVal - LoElts, Idx.getValueType())); + DAG.getConstant(IdxVal - LoElts, dl, + Idx.getValueType())); } } @@ -1441,7 +1518,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { if (IdxVal < LoElts) return SDValue(DAG.UpdateNodeOperands(N, Lo, Idx), 0); return SDValue(DAG.UpdateNodeOperands(N, Hi, - DAG.getConstant(IdxVal - LoElts, + DAG.getConstant(IdxVal - LoElts, SDLoc(N), Idx.getValueType())), 0); } @@ -1462,6 +1539,68 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { MachinePointerInfo(), EltVT, false, false, false, 0); } +SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT, + unsigned OpNo) { + EVT LoVT, HiVT; + SDLoc dl(MGT); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MGT->getValueType(0)); + + SDValue Ch = MGT->getChain(); + SDValue Ptr = MGT->getBasePtr(); + SDValue Index = MGT->getIndex(); + SDValue Mask = MGT->getMask(); + unsigned Alignment = MGT->getOriginalAlignment(); + + SDValue MaskLo, MaskHi; + std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl); + + EVT MemoryVT = MGT->getMemoryVT(); + EVT LoMemVT, HiMemVT; + std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + + SDValue Src0Lo, Src0Hi; + std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(MGT->getValue(), dl); + + SDValue IndexHi, IndexLo; + if (Index.getNode()) + std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl); + else + IndexLo = IndexHi = Index; + + MachineMemOperand *MMO = DAG.getMachineFunction(). + getMachineMemOperand(MGT->getPointerInfo(), + MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), + Alignment, MGT->getAAInfo(), MGT->getRanges()); + + SDValue OpsLo[] = {Ch, Src0Lo, MaskLo, Ptr, IndexLo}; + SDValue Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl, + OpsLo, MMO); + + MMO = DAG.getMachineFunction(). + getMachineMemOperand(MGT->getPointerInfo(), + MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), + Alignment, MGT->getAAInfo(), + MGT->getRanges()); + + SDValue OpsHi[] = {Ch, Src0Hi, MaskHi, Ptr, IndexHi}; + SDValue Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl, + OpsHi, MMO); + + // Build a factor node to remember that this load is independent of the + // other one. + Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(MGT, 1), Ch); + + SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MGT->getValueType(0), Lo, + Hi); + ReplaceValueWith(SDValue(MGT, 0), Res); + return SDValue(); +} + SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo) { SDValue Ch = N->getChain(); @@ -1497,7 +1636,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, unsigned IncrementSize = LoMemVT.getSizeInBits()/8; Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, Ptr.getValueType())); + DAG.getConstant(IncrementSize, DL, Ptr.getValueType())); MMO = DAG.getMachineFunction(). getMachineMemOperand(N->getPointerInfo(), @@ -1507,11 +1646,64 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO, N->isTruncatingStore()); - // Build a factor node to remember that this store is independent of the // other one. return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); +} +SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N, + unsigned OpNo) { + SDValue Ch = N->getChain(); + SDValue Ptr = N->getBasePtr(); + SDValue Mask = N->getMask(); + SDValue Index = N->getIndex(); + SDValue Data = N->getValue(); + EVT MemoryVT = N->getMemoryVT(); + unsigned Alignment = N->getOriginalAlignment(); + SDLoc DL(N); + + EVT LoMemVT, HiMemVT; + std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + + SDValue DataLo, DataHi; + GetSplitVector(Data, DataLo, DataHi); + SDValue MaskLo, MaskHi; + GetSplitVector(Mask, MaskLo, MaskHi); + + SDValue PtrLo, PtrHi; + if (Ptr.getValueType().isVector()) // gather form vector of pointers + std::tie(PtrLo, PtrHi) = DAG.SplitVector(Ptr, DL); + else + PtrLo = PtrHi = Ptr; + + SDValue IndexHi, IndexLo; + if (Index.getNode()) + std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL); + else + IndexLo = IndexHi = Index; + + SDValue Lo, Hi; + MachineMemOperand *MMO = DAG.getMachineFunction(). + getMachineMemOperand(N->getPointerInfo(), + MachineMemOperand::MOStore, LoMemVT.getStoreSize(), + Alignment, N->getAAInfo(), N->getRanges()); + + SDValue OpsLo[] = {Ch, DataLo, MaskLo, PtrLo, IndexLo}; + Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(), + DL, OpsLo, MMO); + + MMO = DAG.getMachineFunction(). + getMachineMemOperand(N->getPointerInfo(), + MachineMemOperand::MOStore, HiMemVT.getStoreSize(), + Alignment, N->getAAInfo(), N->getRanges()); + + SDValue OpsHi[] = {Ch, DataHi, MaskHi, PtrHi, IndexHi}; + Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(), + DL, OpsHi, MMO); + + // Build a factor node to remember that this store is independent of the + // other one. + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); } SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { @@ -1544,7 +1736,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { // Increment the pointer to the other half. Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, Ptr.getValueType())); + DAG.getConstant(IncrementSize, DL, Ptr.getValueType())); if (isTruncating) Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr, @@ -1573,7 +1765,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) { for (unsigned i = 0, e = Op.getValueType().getVectorNumElements(); i != e; ++i) { Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, - Op, DAG.getConstant(i, TLI.getVectorIdxTy()))); + Op, DAG.getConstant(i, DL, TLI.getVectorIdxTy()))); } } @@ -1581,7 +1773,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) { return DAG.getNode(ISD::BUILD_VECTOR, DL, N->getValueType(0), Elts); } -SDValue DAGTypeLegalizer::SplitVecOp_TRUNCATE(SDNode *N) { +SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) { // The result type is legal, but the input type is illegal. If splitting // ends up with the result type of each half still being legal, just // do that. If, however, that would result in an illegal result type, @@ -1603,6 +1795,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_TRUNCATE(SDNode *N) { EVT InVT = InVec->getValueType(0); EVT OutVT = N->getValueType(0); unsigned NumElements = OutVT.getVectorNumElements(); + bool IsFloat = OutVT.isFloatingPoint(); + // Widening should have already made sure this is a power-two vector // if we're trying to split it at all. assert() that's true, just in case. assert(!(NumElements & 1) && "Splitting vector, but not in half!"); @@ -1621,11 +1815,13 @@ SDValue DAGTypeLegalizer::SplitVecOp_TRUNCATE(SDNode *N) { SDValue InLoVec, InHiVec; std::tie(InLoVec, InHiVec) = DAG.SplitVector(InVec, DL); // Truncate them to 1/2 the element size. - EVT HalfElementVT = EVT::getIntegerVT(*DAG.getContext(), InElementSize/2); + EVT HalfElementVT = IsFloat ? + EVT::getFloatingPointVT(InElementSize/2) : + EVT::getIntegerVT(*DAG.getContext(), InElementSize/2); EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, NumElements/2); - SDValue HalfLo = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, InLoVec); - SDValue HalfHi = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, InHiVec); + SDValue HalfLo = DAG.getNode(N->getOpcode(), DL, HalfVT, InLoVec); + SDValue HalfHi = DAG.getNode(N->getOpcode(), DL, HalfVT, InHiVec); // Concatenate them to get the full intermediate truncation result. EVT InterVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, NumElements); SDValue InterVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InterVT, HalfLo, @@ -1634,7 +1830,10 @@ SDValue DAGTypeLegalizer::SplitVecOp_TRUNCATE(SDNode *N) { // type. This should normally be something that ends up being legal directly, // but in theory if a target has very wide vectors and an annoyingly // restricted set of legal types, this split can chain to build things up. - return DAG.getNode(ISD::TRUNCATE, DL, OutVT, InterVec); + return IsFloat ? + DAG.getNode(ISD::FP_ROUND, DL, OutVT, InterVec, + DAG.getTargetConstant(0, DL, TLI.getPointerTy())) : + DAG.getNode(ISD::TRUNCATE, DL, OutVT, InterVec); } SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) { @@ -1865,9 +2064,11 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) { while (CurNumElts != 0) { while (CurNumElts >= NumElts) { SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1, - DAG.getConstant(Idx, TLI.getVectorIdxTy())); + DAG.getConstant(Idx, dl, + TLI.getVectorIdxTy())); SDValue EOp2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2, - DAG.getConstant(Idx, TLI.getVectorIdxTy())); + DAG.getConstant(Idx, dl, + TLI.getVectorIdxTy())); ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2); Idx += NumElts; CurNumElts -= NumElts; @@ -1880,11 +2081,13 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) { if (NumElts == 1) { for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) { SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, - InOp1, DAG.getConstant(Idx, - TLI.getVectorIdxTy())); + InOp1, + DAG.getConstant(Idx, dl, + TLI.getVectorIdxTy())); SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, - InOp2, DAG.getConstant(Idx, - TLI.getVectorIdxTy())); + InOp2, + DAG.getConstant(Idx, dl, + TLI.getVectorIdxTy())); ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT, EOp1, EOp2); } @@ -1922,8 +2125,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) { unsigned NumToInsert = ConcatEnd - Idx - 1; for (unsigned i = 0, OpIdx = Idx+1; i < NumToInsert; i++, OpIdx++) { VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp, - ConcatOps[OpIdx], DAG.getConstant(i, - TLI.getVectorIdxTy())); + ConcatOps[OpIdx], + DAG.getConstant(i, dl, TLI.getVectorIdxTy())); } ConcatOps[Idx+1] = VecOp; ConcatEnd = Idx + 2; @@ -2009,9 +2212,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { } if (InVTNumElts % WidenNumElts == 0) { - SDValue InVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InWidenVT, - InOp, DAG.getConstant(0, - TLI.getVectorIdxTy())); + SDValue InVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InWidenVT, InOp, + DAG.getConstant(0, DL, TLI.getVectorIdxTy())); // Extract the input and convert the shorten input vector. if (N->getNumOperands() == 1) return DAG.getNode(Opcode, DL, WidenVT, InVal); @@ -2026,7 +2228,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { unsigned i; for (i=0; i < MinElts; ++i) { SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp, - DAG.getConstant(i, TLI.getVectorIdxTy())); + DAG.getConstant(i, DL, TLI.getVectorIdxTy())); if (N->getNumOperands() == 1) Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val); else @@ -2114,6 +2316,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) { return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp); break; case TargetLowering::TypeSoftenFloat: + case TargetLowering::TypePromoteFloat: case TargetLowering::TypeExpandInteger: case TargetLowering::TypeExpandFloat: case TargetLowering::TypeScalarizeVector: @@ -2252,7 +2455,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { InOp = GetWidenedVector(InOp); for (unsigned j=0; j < NumInElts; ++j) Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, - DAG.getConstant(j, TLI.getVectorIdxTy())); + DAG.getConstant(j, dl, TLI.getVectorIdxTy())); } SDValue UndefVal = DAG.getUNDEF(EltVT); for (; Idx < WidenNumElts; ++Idx) @@ -2310,7 +2513,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) { if (InVTNumElts % WidenNumElts == 0) { // Extract the input and convert the shorten input vector. InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InWidenVT, InOp, - DAG.getConstant(0, TLI.getVectorIdxTy())); + DAG.getConstant(0, dl, TLI.getVectorIdxTy())); return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp, SatOp, CvtCode); } @@ -2326,7 +2529,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) { unsigned i; for (i=0; i < MinElts; ++i) { SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp, - DAG.getConstant(i, TLI.getVectorIdxTy())); + DAG.getConstant(i, dl, TLI.getVectorIdxTy())); Ops[i] = DAG.getConvertRndSat(WidenVT, dl, ExtVal, DTyOp, STyOp, RndOp, SatOp, CvtCode); } @@ -2369,7 +2572,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { unsigned i; for (i=0; i < NumElts; ++i) Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, - DAG.getConstant(IdxVal+i, TLI.getVectorIdxTy())); + DAG.getConstant(IdxVal + i, dl, TLI.getVectorIdxTy())); SDValue UndefVal = DAG.getUNDEF(EltVT); for (; i < WidenNumElts; ++i) @@ -2432,7 +2635,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) { unsigned NumConcat = WidenNumElts / MaskNumElts; SmallVector<SDValue, 16> Ops(NumConcat); - SDValue ZeroVal = DAG.getConstant(0, MaskVT); + SDValue ZeroVal = DAG.getConstant(0, dl, MaskVT); Ops[0] = Mask; for (unsigned i = 1; i != NumConcat; ++i) Ops[i] = ZeroVal; @@ -2553,6 +2756,16 @@ SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) { assert(InVT.isVector() && "can not widen non-vector type"); EVT WidenInVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), WidenNumElts); + + // The input and output types often differ here, and it could be that while + // we'd prefer to widen the result type, the input operands have been split. + // In this case, we also need to split the result of this node as well. + if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) { + SDValue SplitVSetCC = SplitVecOp_VSETCC(N); + SDValue Res = ModifyToType(SplitVSetCC, WidenVT); + return Res; + } + InOp1 = GetWidenedVector(InOp1); SDValue InOp2 = GetWidenedVector(N->getOperand(1)); @@ -2662,10 +2875,10 @@ SDValue DAGTypeLegalizer::WidenVecOp_EXTEND(SDNode *N) { if (FixedVT.getVectorNumElements() > InVT.getVectorNumElements()) InOp = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, FixedVT, DAG.getUNDEF(FixedVT), InOp, - DAG.getConstant(0, TLI.getVectorIdxTy())); + DAG.getConstant(0, DL, TLI.getVectorIdxTy())); else InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, FixedVT, InOp, - DAG.getConstant(0, TLI.getVectorIdxTy())); + DAG.getConstant(0, DL, TLI.getVectorIdxTy())); break; } } @@ -2710,7 +2923,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { for (unsigned i=0; i < NumElts; ++i) Ops[i] = DAG.getNode(Opcode, dl, EltVT, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp, - DAG.getConstant(i, TLI.getVectorIdxTy()))); + DAG.getConstant(i, dl, + TLI.getVectorIdxTy()))); return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); } @@ -2731,7 +2945,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) { if (TLI.isTypeLegal(NewVT)) { SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp, - DAG.getConstant(0, TLI.getVectorIdxTy())); + DAG.getConstant(0, dl, TLI.getVectorIdxTy())); } } @@ -2759,7 +2973,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) { InOp = GetWidenedVector(InOp); for (unsigned j=0; j < NumInElts; ++j) Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, - DAG.getConstant(j, TLI.getVectorIdxTy())); + DAG.getConstant(j, dl, TLI.getVectorIdxTy())); } return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); } @@ -2814,7 +3028,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) { unsigned NumConcat = WidenNumElts / MaskNumElts; SmallVector<SDValue, 16> Ops(NumConcat); - SDValue ZeroVal = DAG.getConstant(0, MaskVT); + SDValue ZeroVal = DAG.getConstant(0, dl, MaskVT); Ops[0] = Mask; for (unsigned i = 1; i != NumConcat; ++i) Ops[i] = ZeroVal; @@ -2849,8 +3063,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) { SVT.getVectorElementType(), N->getValueType(0).getVectorNumElements()); SDValue CC = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, - ResVT, WideSETCC, DAG.getConstant(0, - TLI.getVectorIdxTy())); + ResVT, WideSETCC, + DAG.getConstant(0, dl, TLI.getVectorIdxTy())); return PromoteTargetBoolean(CC, N->getValueType(0)); } @@ -2888,7 +3102,10 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI, unsigned MemVTWidth = MemVT.getSizeInBits(); if (MemVT.getSizeInBits() <= WidenEltWidth) break; - if (TLI.isTypeLegal(MemVT) && (WidenWidth % MemVTWidth) == 0 && + auto Action = TLI.getTypeAction(*DAG.getContext(), MemVT); + if ((Action == TargetLowering::TypeLegal || + Action == TargetLowering::TypePromoteInteger) && + (WidenWidth % MemVTWidth) == 0 && isPowerOf2_32(WidenWidth / MemVTWidth) && (MemVTWidth <= Width || (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) { @@ -2944,7 +3161,7 @@ static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy, LdTy = NewLdTy; } VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOps[i], - DAG.getConstant(Idx++, TLI.getVectorIdxTy())); + DAG.getConstant(Idx++, dl, TLI.getVectorIdxTy())); } return DAG.getNode(ISD::BITCAST, dl, VecTy, VecOp); } @@ -3015,7 +3232,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, unsigned Increment = NewVTWidth / 8; Offset += Increment; BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, - DAG.getConstant(Increment, BasePtr.getValueType())); + DAG.getConstant(Increment, dl, BasePtr.getValueType())); SDValue L; if (LdWidth < NewVTWidth) { @@ -3141,7 +3358,7 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain, for (i=1; i < NumElts; ++i, Offset += Increment) { SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, - DAG.getConstant(Offset, + DAG.getConstant(Offset, dl, BasePtr.getValueType())); Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr, LD->getPointerInfo().getWithOffset(Offset), LdEltVT, @@ -3192,7 +3409,8 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, unsigned NumVTElts = NewVT.getVectorNumElements(); do { SDValue EOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NewVT, ValOp, - DAG.getConstant(Idx, TLI.getVectorIdxTy())); + DAG.getConstant(Idx, dl, + TLI.getVectorIdxTy())); StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, ST->getPointerInfo().getWithOffset(Offset), isVolatile, isNonTemporal, @@ -3201,7 +3419,8 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, Offset += Increment; Idx += NumVTElts; BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, - DAG.getConstant(Increment, BasePtr.getValueType())); + DAG.getConstant(Increment, dl, + BasePtr.getValueType())); } while (StWidth != 0 && StWidth >= NewVTWidth); } else { // Cast the vector to the scalar type we can store @@ -3212,7 +3431,7 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, Idx = Idx * ValEltWidth / NewVTWidth; do { SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp, - DAG.getConstant(Idx++, TLI.getVectorIdxTy())); + DAG.getConstant(Idx++, dl, TLI.getVectorIdxTy())); StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, ST->getPointerInfo().getWithOffset(Offset), isVolatile, isNonTemporal, @@ -3220,7 +3439,8 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, StWidth -= NewVTWidth; Offset += Increment; BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, - DAG.getConstant(Increment, BasePtr.getValueType())); + DAG.getConstant(Increment, dl, + BasePtr.getValueType())); } while (StWidth != 0 && StWidth >= NewVTWidth); // Restore index back to be relative to the original widen element type Idx = Idx * NewVTWidth / ValEltWidth; @@ -3258,7 +3478,7 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain, unsigned Increment = ValEltVT.getSizeInBits() / 8; unsigned NumElts = StVT.getVectorNumElements(); SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, - DAG.getConstant(0, TLI.getVectorIdxTy())); + DAG.getConstant(0, dl, TLI.getVectorIdxTy())); StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr, ST->getPointerInfo(), StEltVT, isVolatile, isNonTemporal, Align, @@ -3266,10 +3486,11 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain, unsigned Offset = Increment; for (unsigned i=1; i < NumElts; ++i, Offset += Increment) { SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), - BasePtr, DAG.getConstant(Offset, - BasePtr.getValueType())); + BasePtr, + DAG.getConstant(Offset, dl, + BasePtr.getValueType())); SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, - DAG.getConstant(0, TLI.getVectorIdxTy())); + DAG.getConstant(0, dl, TLI.getVectorIdxTy())); StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr, ST->getPointerInfo().getWithOffset(Offset), StEltVT, isVolatile, isNonTemporal, @@ -3306,7 +3527,7 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) { if (WidenNumElts < InNumElts && InNumElts % WidenNumElts) return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, InOp, - DAG.getConstant(0, TLI.getVectorIdxTy())); + DAG.getConstant(0, dl, TLI.getVectorIdxTy())); // Fall back to extract and build. SmallVector<SDValue, 16> Ops(WidenNumElts); @@ -3315,7 +3536,7 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) { unsigned Idx; for (Idx = 0; Idx < MinNumElts; ++Idx) Ops[Idx] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, - DAG.getConstant(Idx, TLI.getVectorIdxTy())); + DAG.getConstant(Idx, dl, TLI.getVectorIdxTy())); SDValue UndefVal = DAG.getUNDEF(EltVT); for ( ; Idx < WidenNumElts; ++Idx) diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp index db38b76..6303422 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp @@ -47,7 +47,7 @@ ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS) TRI = STI.getRegisterInfo(); TLI = IS->TLI; TII = STI.getInstrInfo(); - ResourcesModel = TII->CreateTargetScheduleState(STI); + ResourcesModel.reset(TII->CreateTargetScheduleState(STI)); // This hard requirement could be relaxed, but for now // do not let it procede. assert(ResourcesModel && "Unimplemented CreateTargetScheduleState."); @@ -637,17 +637,3 @@ void ResourcePriorityQueue::remove(SUnit *SU) { Queue.pop_back(); } - - -#ifdef NDEBUG -void ResourcePriorityQueue::dump(ScheduleDAG *DAG) const {} -#else -void ResourcePriorityQueue::dump(ScheduleDAG *DAG) const { - ResourcePriorityQueue q = *this; - while (!q.empty()) { - SUnit *su = q.pop(); - dbgs() << "Height " << su->getHeight() << ": "; - su->dump(DAG); - } -} -#endif diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h index bce69d7..c27f8de 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h @@ -35,7 +35,6 @@ public: FRAMEIX = 2 // value is contents of a stack location }; private: - enum DbgValueKind kind; union { struct { SDNode *Node; // valid for expressions @@ -46,17 +45,18 @@ private: } u; MDNode *Var; MDNode *Expr; - bool IsIndirect; uint64_t Offset; DebugLoc DL; unsigned Order; - bool Invalid; + enum DbgValueKind kind; + bool IsIndirect; + bool Invalid = false; + public: // Constructor for non-constants. SDDbgValue(MDNode *Var, MDNode *Expr, SDNode *N, unsigned R, bool indir, uint64_t off, DebugLoc dl, unsigned O) - : Var(Var), Expr(Expr), IsIndirect(indir), Offset(off), DL(dl), Order(O), - Invalid(false) { + : Var(Var), Expr(Expr), Offset(off), DL(dl), Order(O), IsIndirect(indir) { kind = SDNODE; u.s.Node = N; u.s.ResNo = R; @@ -65,8 +65,7 @@ public: // Constructor for constants. SDDbgValue(MDNode *Var, MDNode *Expr, const Value *C, uint64_t off, DebugLoc dl, unsigned O) - : Var(Var), Expr(Expr), IsIndirect(false), Offset(off), DL(dl), Order(O), - Invalid(false) { + : Var(Var), Expr(Expr), Offset(off), DL(dl), Order(O), IsIndirect(false) { kind = CONST; u.Const = C; } @@ -74,8 +73,7 @@ public: // Constructor for frame indices. SDDbgValue(MDNode *Var, MDNode *Expr, unsigned FI, uint64_t off, DebugLoc dl, unsigned O) - : Var(Var), Expr(Expr), IsIndirect(false), Offset(off), DL(dl), Order(O), - Invalid(false) { + : Var(Var), Expr(Expr), Offset(off), DL(dl), Order(O), IsIndirect(false) { kind = FRAMEIX; u.FrameIx = FI; } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 5222de1..fd0fa31 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -173,7 +173,7 @@ public: HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this); } - ~ScheduleDAGRRList() { + ~ScheduleDAGRRList() override { delete HazardRec; delete AvailableQueue; } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index f2b18fc..3853ada 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -137,13 +137,9 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, } // Helper for AddGlue to clone node operands. -static void CloneNodeWithValues(SDNode *N, SelectionDAG *DAG, - SmallVectorImpl<EVT> &VTs, +static void CloneNodeWithValues(SDNode *N, SelectionDAG *DAG, ArrayRef<EVT> VTs, SDValue ExtraOper = SDValue()) { - SmallVector<SDValue, 8> Ops; - for (unsigned I = 0, E = N->getNumOperands(); I != E; ++I) - Ops.push_back(N->getOperand(I)); - + SmallVector<SDValue, 8> Ops(N->op_begin(), N->op_end()); if (ExtraOper.getNode()) Ops.push_back(ExtraOper); @@ -165,7 +161,6 @@ static void CloneNodeWithValues(SDNode *N, SelectionDAG *DAG, } static bool AddGlue(SDNode *N, SDValue Glue, bool AddGlue, SelectionDAG *DAG) { - SmallVector<EVT, 4> VTs; SDNode *GlueDestNode = Glue.getNode(); // Don't add glue from a node to itself. @@ -179,9 +174,7 @@ static bool AddGlue(SDNode *N, SDValue Glue, bool AddGlue, SelectionDAG *DAG) { // Don't add glue to something that already has a glue value. if (N->getValueType(N->getNumValues() - 1) == MVT::Glue) return false; - for (unsigned I = 0, E = N->getNumValues(); I != E; ++I) - VTs.push_back(N->getValueType(I)); - + SmallVector<EVT, 4> VTs(N->value_begin(), N->value_end()); if (AddGlue) VTs.push_back(MVT::Glue); @@ -197,11 +190,8 @@ static void RemoveUnusedGlue(SDNode *N, SelectionDAG *DAG) { !N->hasAnyUseOfValue(N->getNumValues() - 1)) && "expected an unused glue value"); - SmallVector<EVT, 4> VTs; - for (unsigned I = 0, E = N->getNumValues()-1; I != E; ++I) - VTs.push_back(N->getValueType(I)); - - CloneNodeWithValues(N, DAG, VTs); + CloneNodeWithValues(N, DAG, + makeArrayRef(N->value_begin(), N->getNumValues() - 1)); } /// ClusterNeighboringLoads - Force nearby loads together by "gluing" them. diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index 2cd1f4b..6351fa2 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -44,7 +44,7 @@ namespace llvm { explicit ScheduleDAGSDNodes(MachineFunction &mf); - virtual ~ScheduleDAGSDNodes() {} + ~ScheduleDAGSDNodes() override {} /// Run - perform scheduling. /// diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp index 418b58e..eee4a4b 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp @@ -76,7 +76,7 @@ public: HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this); } - ~ScheduleDAGVLIW() { + ~ScheduleDAGVLIW() override { delete HazardRec; delete AvailableQueue; } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index f75d5f4..efd4bd9 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -49,6 +49,7 @@ #include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> #include <cmath> +#include <utility> using namespace llvm; @@ -196,6 +197,22 @@ bool ISD::isBuildVectorOfConstantSDNodes(const SDNode *N) { return true; } +/// \brief Return true if the specified node is a BUILD_VECTOR node of +/// all ConstantFPSDNode or undef. +bool ISD::isBuildVectorOfConstantFPSDNodes(const SDNode *N) { + if (N->getOpcode() != ISD::BUILD_VECTOR) + return false; + + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + SDValue Op = N->getOperand(i); + if (Op.getOpcode() == ISD::UNDEF) + continue; + if (!isa<ConstantFPSDNode>(Op)) + return false; + } + return true; +} + /// isScalarToVector - Return true if the specified node is a /// ISD::SCALAR_TO_VECTOR node or a BUILD_VECTOR node where only the low /// element is not an undef. @@ -499,8 +516,9 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { case ISD::SUB: case ISD::SHL: { const BinaryWithFlagsSDNode *BinNode = cast<BinaryWithFlagsSDNode>(N); - AddBinaryNodeIDCustom(ID, N->getOpcode(), BinNode->hasNoUnsignedWrap(), - BinNode->hasNoSignedWrap(), BinNode->isExact()); + AddBinaryNodeIDCustom( + ID, N->getOpcode(), BinNode->Flags.hasNoUnsignedWrap(), + BinNode->Flags.hasNoSignedWrap(), BinNode->Flags.hasExact()); break; } case ISD::ATOMIC_CMP_SWAP: @@ -860,7 +878,7 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, SDValue Op, FoldingSetNodeID ID; AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops); AddNodeIDCustom(ID, N); - SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos); + SDNode *Node = FindNodeOrInsertPos(ID, N->getDebugLoc(), InsertPos); return Node; } @@ -878,7 +896,7 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, FoldingSetNodeID ID; AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops); AddNodeIDCustom(ID, N); - SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos); + SDNode *Node = FindNodeOrInsertPos(ID, N->getDebugLoc(), InsertPos); return Node; } @@ -895,7 +913,7 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, ArrayRef<SDValue> Ops, FoldingSetNodeID ID; AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops); AddNodeIDCustom(ID, N); - SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos); + SDNode *Node = FindNodeOrInsertPos(ID, N->getDebugLoc(), InsertPos); return Node; } @@ -947,9 +965,9 @@ BinarySDNode *SelectionDAG::GetBinarySDNode(unsigned Opcode, SDLoc DL, if (isBinOpWithFlags(Opcode)) { BinaryWithFlagsSDNode *FN = new (NodeAllocator) BinaryWithFlagsSDNode( Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, N1, N2); - FN->setHasNoUnsignedWrap(nuw); - FN->setHasNoSignedWrap(nsw); - FN->setIsExact(exact); + FN->Flags.setNoUnsignedWrap(nuw); + FN->Flags.setNoSignedWrap(nsw); + FN->Flags.setExact(exact); return FN; } @@ -959,6 +977,40 @@ BinarySDNode *SelectionDAG::GetBinarySDNode(unsigned Opcode, SDLoc DL, return N; } +SDNode *SelectionDAG::FindNodeOrInsertPos(const FoldingSetNodeID &ID, + void *&InsertPos) { + SDNode *N = CSEMap.FindNodeOrInsertPos(ID, InsertPos); + if (N) { + switch (N->getOpcode()) { + default: break; + case ISD::Constant: + case ISD::ConstantFP: + llvm_unreachable("Querying for Constant and ConstantFP nodes requires " + "debug location. Use another overload."); + } + } + return N; +} + +SDNode *SelectionDAG::FindNodeOrInsertPos(const FoldingSetNodeID &ID, + DebugLoc DL, void *&InsertPos) { + SDNode *N = CSEMap.FindNodeOrInsertPos(ID, InsertPos); + if (N) { + switch (N->getOpcode()) { + default: break; // Process only regular (non-target) constant nodes. + case ISD::Constant: + case ISD::ConstantFP: + // Erase debug location from the node if the node is used at several + // different places to do not propagate one location to all uses as it + // leads to incorrect debug info. + if (N->getDebugLoc() != DL) + N->setDebugLoc(DebugLoc()); + break; + } + } + return N; +} + void SelectionDAG::clear() { allnodes_clear(); OperandAllocator.Reset(); @@ -1014,7 +1066,7 @@ SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, SDLoc DL, EVT VT) { APInt Imm = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits()); return getNode(ISD::AND, DL, Op.getValueType(), Op, - getConstant(Imm, Op.getValueType())); + getConstant(Imm, DL, Op.getValueType())); } SDValue SelectionDAG::getAnyExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT) { @@ -1052,7 +1104,7 @@ SDValue SelectionDAG::getZeroExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT) { SDValue SelectionDAG::getNOT(SDLoc DL, SDValue Val, EVT VT) { EVT EltVT = VT.getScalarType(); SDValue NegOne = - getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT); + getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), DL, VT); return getNode(ISD::XOR, DL, VT, Val, NegOne); } @@ -1062,31 +1114,33 @@ SDValue SelectionDAG::getLogicalNOT(SDLoc DL, SDValue Val, EVT VT) { switch (TLI->getBooleanContents(VT)) { case TargetLowering::ZeroOrOneBooleanContent: case TargetLowering::UndefinedBooleanContent: - TrueValue = getConstant(1, VT); + TrueValue = getConstant(1, DL, VT); break; case TargetLowering::ZeroOrNegativeOneBooleanContent: - TrueValue = getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), + TrueValue = getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), DL, VT); break; } return getNode(ISD::XOR, DL, VT, Val, TrueValue); } -SDValue SelectionDAG::getConstant(uint64_t Val, EVT VT, bool isT, bool isO) { +SDValue SelectionDAG::getConstant(uint64_t Val, SDLoc DL, EVT VT, bool isT, + bool isO) { EVT EltVT = VT.getScalarType(); assert((EltVT.getSizeInBits() >= 64 || (uint64_t)((int64_t)Val >> EltVT.getSizeInBits()) + 1 < 2) && "getConstant with a uint64_t value that doesn't fit in the type!"); - return getConstant(APInt(EltVT.getSizeInBits(), Val), VT, isT, isO); + return getConstant(APInt(EltVT.getSizeInBits(), Val), DL, VT, isT, isO); } -SDValue SelectionDAG::getConstant(const APInt &Val, EVT VT, bool isT, bool isO) +SDValue SelectionDAG::getConstant(const APInt &Val, SDLoc DL, EVT VT, bool isT, + bool isO) { - return getConstant(*ConstantInt::get(*Context, Val), VT, isT, isO); + return getConstant(*ConstantInt::get(*Context, Val), DL, VT, isT, isO); } -SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT, - bool isO) { +SDValue SelectionDAG::getConstant(const ConstantInt &Val, SDLoc DL, EVT VT, + bool isT, bool isO) { assert(VT.isInteger() && "Cannot create FP integer constant!"); EVT EltVT = VT.getScalarType(); @@ -1125,7 +1179,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT, SmallVector<SDValue, 2> EltParts; for (unsigned i = 0; i < ViaVecNumElts / VT.getVectorNumElements(); ++i) { EltParts.push_back(getConstant(NewVal.lshr(i * ViaEltSizeInBits) - .trunc(ViaEltSizeInBits), + .trunc(ViaEltSizeInBits), DL, ViaEltVT, isT, isO)); } @@ -1160,12 +1214,13 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT, ID.AddBoolean(isO); void *IP = nullptr; SDNode *N = nullptr; - if ((N = CSEMap.FindNodeOrInsertPos(ID, IP))) + if ((N = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP))) if (!VT.isVector()) return SDValue(N, 0); if (!N) { - N = new (NodeAllocator) ConstantSDNode(isT, isO, Elt, EltVT); + N = new (NodeAllocator) ConstantSDNode(isT, isO, Elt, DL.getDebugLoc(), + EltVT); CSEMap.InsertNode(N, IP); InsertNode(N); } @@ -1179,16 +1234,17 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT, return Result; } -SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, bool isTarget) { - return getConstant(Val, TLI->getPointerTy(), isTarget); +SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, SDLoc DL, bool isTarget) { + return getConstant(Val, DL, TLI->getPointerTy(), isTarget); } - -SDValue SelectionDAG::getConstantFP(const APFloat& V, EVT VT, bool isTarget) { - return getConstantFP(*ConstantFP::get(*getContext(), V), VT, isTarget); +SDValue SelectionDAG::getConstantFP(const APFloat& V, SDLoc DL, EVT VT, + bool isTarget) { + return getConstantFP(*ConstantFP::get(*getContext(), V), DL, VT, isTarget); } -SDValue SelectionDAG::getConstantFP(const ConstantFP& V, EVT VT, bool isTarget){ +SDValue SelectionDAG::getConstantFP(const ConstantFP& V, SDLoc DL, EVT VT, + bool isTarget){ assert(VT.isFloatingPoint() && "Cannot create integer FP constant!"); EVT EltVT = VT.getScalarType(); @@ -1202,12 +1258,13 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP& V, EVT VT, bool isTarget){ ID.AddPointer(&V); void *IP = nullptr; SDNode *N = nullptr; - if ((N = CSEMap.FindNodeOrInsertPos(ID, IP))) + if ((N = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP))) if (!VT.isVector()) return SDValue(N, 0); if (!N) { - N = new (NodeAllocator) ConstantFPSDNode(isTarget, &V, EltVT); + N = new (NodeAllocator) ConstantFPSDNode(isTarget, &V, DL.getDebugLoc(), + EltVT); CSEMap.InsertNode(N, IP); InsertNode(N); } @@ -1216,25 +1273,25 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP& V, EVT VT, bool isTarget){ if (VT.isVector()) { SmallVector<SDValue, 8> Ops; Ops.assign(VT.getVectorNumElements(), Result); - // FIXME SDLoc info might be appropriate here Result = getNode(ISD::BUILD_VECTOR, SDLoc(), VT, Ops); } return Result; } -SDValue SelectionDAG::getConstantFP(double Val, EVT VT, bool isTarget) { +SDValue SelectionDAG::getConstantFP(double Val, SDLoc DL, EVT VT, + bool isTarget) { EVT EltVT = VT.getScalarType(); if (EltVT==MVT::f32) - return getConstantFP(APFloat((float)Val), VT, isTarget); + return getConstantFP(APFloat((float)Val), DL, VT, isTarget); else if (EltVT==MVT::f64) - return getConstantFP(APFloat(Val), VT, isTarget); + return getConstantFP(APFloat(Val), DL, VT, isTarget); else if (EltVT==MVT::f80 || EltVT==MVT::f128 || EltVT==MVT::ppcf128 || EltVT==MVT::f16) { bool ignored; APFloat apf = APFloat(Val); apf.convert(EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven, &ignored); - return getConstantFP(apf, VT, isTarget); + return getConstantFP(apf, DL, VT, isTarget); } else llvm_unreachable("Unsupported type in getConstantFP"); } @@ -1264,7 +1321,7 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, SDLoc DL, ID.AddInteger(TargetFlags); ID.AddInteger(GV->getType()->getAddressSpace()); void *IP = nullptr; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP)) return SDValue(E, 0); SDNode *N = new (NodeAllocator) GlobalAddressSDNode(Opc, DL.getIROrder(), @@ -1281,7 +1338,7 @@ SDValue SelectionDAG::getFrameIndex(int FI, EVT VT, bool isTarget) { AddNodeIDNode(ID, Opc, getVTList(VT), None); ID.AddInteger(FI); void *IP = nullptr; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); SDNode *N = new (NodeAllocator) FrameIndexSDNode(FI, VT, isTarget); @@ -1300,7 +1357,7 @@ SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget, ID.AddInteger(JTI); ID.AddInteger(TargetFlags); void *IP = nullptr; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); SDNode *N = new (NodeAllocator) JumpTableSDNode(JTI, VT, isTarget, @@ -1326,7 +1383,7 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT, ID.AddPointer(C); ID.AddInteger(TargetFlags); void *IP = nullptr; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); SDNode *N = new (NodeAllocator) ConstantPoolSDNode(isTarget, C, VT, Offset, @@ -1353,7 +1410,7 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, C->addSelectionDAGCSEId(ID); ID.AddInteger(TargetFlags); void *IP = nullptr; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); SDNode *N = new (NodeAllocator) ConstantPoolSDNode(isTarget, C, VT, Offset, @@ -1371,7 +1428,7 @@ SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset, ID.AddInteger(Offset); ID.AddInteger(TargetFlags); void *IP = nullptr; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); SDNode *N = new (NodeAllocator) TargetIndexSDNode(Index, VT, Offset, @@ -1386,7 +1443,7 @@ SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) { AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), None); ID.AddPointer(MBB); void *IP = nullptr; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); SDNode *N = new (NodeAllocator) BasicBlockSDNode(MBB); @@ -1446,13 +1503,7 @@ SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) { // N2 to point at N1. static void commuteShuffle(SDValue &N1, SDValue &N2, SmallVectorImpl<int> &M) { std::swap(N1, N2); - int NElts = M.size(); - for (int i = 0; i != NElts; ++i) { - if (M[i] >= NElts) - M[i] -= NElts; - else if (M[i] >= 0) - M[i] += NElts; - } + ShuffleVectorSDNode::commuteMask(M); } SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, @@ -1484,6 +1535,34 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, if (N1.getOpcode() == ISD::UNDEF) commuteShuffle(N1, N2, MaskVec); + // If shuffling a splat, try to blend the splat instead. We do this here so + // that even when this arises during lowering we don't have to re-handle it. + auto BlendSplat = [&](BuildVectorSDNode *BV, int Offset) { + BitVector UndefElements; + SDValue Splat = BV->getSplatValue(&UndefElements); + if (!Splat) + return; + + for (int i = 0; i < (int)NElts; ++i) { + if (MaskVec[i] < Offset || MaskVec[i] >= (Offset + (int)NElts)) + continue; + + // If this input comes from undef, mark it as such. + if (UndefElements[MaskVec[i] - Offset]) { + MaskVec[i] = -1; + continue; + } + + // If we can blend a non-undef lane, use that instead. + if (!UndefElements[i]) + MaskVec[i] = i + Offset; + } + }; + if (auto *N1BV = dyn_cast<BuildVectorSDNode>(N1)) + BlendSplat(N1BV, 0); + if (auto *N2BV = dyn_cast<BuildVectorSDNode>(N2)) + BlendSplat(N2BV, NElts); + // Canonicalize all index into lhs, -> shuffle lhs, undef // Canonicalize all index into rhs, -> shuffle rhs, undef bool AllLHS = true, AllRHS = true; @@ -1513,9 +1592,10 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, return getUNDEF(VT); // If Identity shuffle return that node. - bool Identity = true; + bool Identity = true, AllSame = true; for (unsigned i = 0; i != NElts; ++i) { if (MaskVec[i] >= 0 && MaskVec[i] != (int)i) Identity = false; + if (MaskVec[i] != MaskVec[0]) AllSame = false; } if (Identity && NElts) return N1; @@ -1537,18 +1617,35 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, if (Splat && Splat.getOpcode() == ISD::UNDEF) return getUNDEF(VT); + bool SameNumElts = + V.getValueType().getVectorNumElements() == VT.getVectorNumElements(); + // We only have a splat which can skip shuffles if there is a splatted // value and no undef lanes rearranged by the shuffle. if (Splat && UndefElements.none()) { // Splat of <x, x, ..., x>, return <x, x, ..., x>, provided that the // number of elements match or the value splatted is a zero constant. - if (V.getValueType().getVectorNumElements() == - VT.getVectorNumElements()) + if (SameNumElts) return N1; if (auto *C = dyn_cast<ConstantSDNode>(Splat)) if (C->isNullValue()) return N1; } + + // If the shuffle itself creates a splat, build the vector directly. + if (AllSame && SameNumElts) { + const SDValue &Splatted = BV->getOperand(MaskVec[0]); + SmallVector<SDValue, 8> Ops(NElts, Splatted); + + EVT BuildVT = BV->getValueType(0); + SDValue NewBV = getNode(ISD::BUILD_VECTOR, dl, BuildVT, Ops); + + // We may have jumped through bitcasts, so the type of the + // BUILD_VECTOR may not match the type of the shuffle. + if (BuildVT != VT) + NewBV = getNode(ISD::BITCAST, dl, VT, NewBV); + return NewBV; + } } } @@ -1559,7 +1656,7 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, ID.AddInteger(MaskVec[i]); void* IP = nullptr; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) return SDValue(E, 0); // Allocate the mask array for the node out of the BumpPtrAllocator, since @@ -1579,19 +1676,8 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, SDValue SelectionDAG::getCommutedVectorShuffle(const ShuffleVectorSDNode &SV) { MVT VT = SV.getSimpleValueType(0); - unsigned NumElems = VT.getVectorNumElements(); - SmallVector<int, 8> MaskVec; - - for (unsigned i = 0; i != NumElems; ++i) { - int Idx = SV.getMaskElt(i); - if (Idx >= 0) { - if (Idx < (int)NumElems) - Idx += NumElems; - else - Idx -= NumElems; - } - MaskVec.push_back(Idx); - } + SmallVector<int, 8> MaskVec(SV.getMask().begin(), SV.getMask().end()); + ShuffleVectorSDNode::commuteMask(MaskVec); SDValue Op0 = SV.getOperand(0); SDValue Op1 = SV.getOperand(1); @@ -1612,7 +1698,7 @@ SDValue SelectionDAG::getConvertRndSat(EVT VT, SDLoc dl, SDValue Ops[] = { Val, DTy, STy, Rnd, Sat }; AddNodeIDNode(ID, ISD::CONVERT_RNDSAT, getVTList(VT), Ops); void* IP = nullptr; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) return SDValue(E, 0); CvtRndSatSDNode *N = new (NodeAllocator) CvtRndSatSDNode(VT, dl.getIROrder(), @@ -1628,7 +1714,7 @@ SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) { AddNodeIDNode(ID, ISD::Register, getVTList(VT), None); ID.AddInteger(RegNo); void *IP = nullptr; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); SDNode *N = new (NodeAllocator) RegisterSDNode(RegNo, VT); @@ -1642,7 +1728,7 @@ SDValue SelectionDAG::getRegisterMask(const uint32_t *RegMask) { AddNodeIDNode(ID, ISD::RegisterMask, getVTList(MVT::Untyped), None); ID.AddPointer(RegMask); void *IP = nullptr; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); SDNode *N = new (NodeAllocator) RegisterMaskSDNode(RegMask); @@ -1657,7 +1743,7 @@ SDValue SelectionDAG::getEHLabel(SDLoc dl, SDValue Root, MCSymbol *Label) { AddNodeIDNode(ID, ISD::EH_LABEL, getVTList(MVT::Other), Ops); ID.AddPointer(Label); void *IP = nullptr; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); SDNode *N = new (NodeAllocator) EHLabelSDNode(dl.getIROrder(), @@ -1680,7 +1766,7 @@ SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT, ID.AddInteger(Offset); ID.AddInteger(TargetFlags); void *IP = nullptr; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); SDNode *N = new (NodeAllocator) BlockAddressSDNode(Opc, VT, BA, Offset, @@ -1699,7 +1785,7 @@ SDValue SelectionDAG::getSrcValue(const Value *V) { ID.AddPointer(V); void *IP = nullptr; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); SDNode *N = new (NodeAllocator) SrcValueSDNode(V); @@ -1715,7 +1801,7 @@ SDValue SelectionDAG::getMDNode(const MDNode *MD) { ID.AddPointer(MD); void *IP = nullptr; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); SDNode *N = new (NodeAllocator) MDNodeSDNode(MD); @@ -1734,7 +1820,7 @@ SDValue SelectionDAG::getAddrSpaceCast(SDLoc dl, EVT VT, SDValue Ptr, ID.AddInteger(DestAS); void *IP = nullptr; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) return SDValue(E, 0); SDNode *N = new (NodeAllocator) AddrSpaceCastSDNode(dl.getIROrder(), @@ -1791,13 +1877,14 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, switch (Cond) { default: break; case ISD::SETFALSE: - case ISD::SETFALSE2: return getConstant(0, VT); + case ISD::SETFALSE2: return getConstant(0, dl, VT); case ISD::SETTRUE: case ISD::SETTRUE2: { TargetLowering::BooleanContent Cnt = TLI->getBooleanContents(N1->getValueType(0)); return getConstant( - Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, VT); + Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, dl, + VT); } case ISD::SETOEQ: @@ -1821,16 +1908,16 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, switch (Cond) { default: llvm_unreachable("Unknown integer setcc!"); - case ISD::SETEQ: return getConstant(C1 == C2, VT); - case ISD::SETNE: return getConstant(C1 != C2, VT); - case ISD::SETULT: return getConstant(C1.ult(C2), VT); - case ISD::SETUGT: return getConstant(C1.ugt(C2), VT); - case ISD::SETULE: return getConstant(C1.ule(C2), VT); - case ISD::SETUGE: return getConstant(C1.uge(C2), VT); - case ISD::SETLT: return getConstant(C1.slt(C2), VT); - case ISD::SETGT: return getConstant(C1.sgt(C2), VT); - case ISD::SETLE: return getConstant(C1.sle(C2), VT); - case ISD::SETGE: return getConstant(C1.sge(C2), VT); + case ISD::SETEQ: return getConstant(C1 == C2, dl, VT); + case ISD::SETNE: return getConstant(C1 != C2, dl, VT); + case ISD::SETULT: return getConstant(C1.ult(C2), dl, VT); + case ISD::SETUGT: return getConstant(C1.ugt(C2), dl, VT); + case ISD::SETULE: return getConstant(C1.ule(C2), dl, VT); + case ISD::SETUGE: return getConstant(C1.uge(C2), dl, VT); + case ISD::SETLT: return getConstant(C1.slt(C2), dl, VT); + case ISD::SETGT: return getConstant(C1.sgt(C2), dl, VT); + case ISD::SETLE: return getConstant(C1.sle(C2), dl, VT); + case ISD::SETGE: return getConstant(C1.sge(C2), dl, VT); } } } @@ -1842,41 +1929,41 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, case ISD::SETEQ: if (R==APFloat::cmpUnordered) return getUNDEF(VT); // fall through - case ISD::SETOEQ: return getConstant(R==APFloat::cmpEqual, VT); + case ISD::SETOEQ: return getConstant(R==APFloat::cmpEqual, dl, VT); case ISD::SETNE: if (R==APFloat::cmpUnordered) return getUNDEF(VT); // fall through case ISD::SETONE: return getConstant(R==APFloat::cmpGreaterThan || - R==APFloat::cmpLessThan, VT); + R==APFloat::cmpLessThan, dl, VT); case ISD::SETLT: if (R==APFloat::cmpUnordered) return getUNDEF(VT); // fall through - case ISD::SETOLT: return getConstant(R==APFloat::cmpLessThan, VT); + case ISD::SETOLT: return getConstant(R==APFloat::cmpLessThan, dl, VT); case ISD::SETGT: if (R==APFloat::cmpUnordered) return getUNDEF(VT); // fall through - case ISD::SETOGT: return getConstant(R==APFloat::cmpGreaterThan, VT); + case ISD::SETOGT: return getConstant(R==APFloat::cmpGreaterThan, dl, VT); case ISD::SETLE: if (R==APFloat::cmpUnordered) return getUNDEF(VT); // fall through case ISD::SETOLE: return getConstant(R==APFloat::cmpLessThan || - R==APFloat::cmpEqual, VT); + R==APFloat::cmpEqual, dl, VT); case ISD::SETGE: if (R==APFloat::cmpUnordered) return getUNDEF(VT); // fall through case ISD::SETOGE: return getConstant(R==APFloat::cmpGreaterThan || - R==APFloat::cmpEqual, VT); - case ISD::SETO: return getConstant(R!=APFloat::cmpUnordered, VT); - case ISD::SETUO: return getConstant(R==APFloat::cmpUnordered, VT); + R==APFloat::cmpEqual, dl, VT); + case ISD::SETO: return getConstant(R!=APFloat::cmpUnordered, dl, VT); + case ISD::SETUO: return getConstant(R==APFloat::cmpUnordered, dl, VT); case ISD::SETUEQ: return getConstant(R==APFloat::cmpUnordered || - R==APFloat::cmpEqual, VT); - case ISD::SETUNE: return getConstant(R!=APFloat::cmpEqual, VT); + R==APFloat::cmpEqual, dl, VT); + case ISD::SETUNE: return getConstant(R!=APFloat::cmpEqual, dl, VT); case ISD::SETULT: return getConstant(R==APFloat::cmpUnordered || - R==APFloat::cmpLessThan, VT); + R==APFloat::cmpLessThan, dl, VT); case ISD::SETUGT: return getConstant(R==APFloat::cmpGreaterThan || - R==APFloat::cmpUnordered, VT); - case ISD::SETULE: return getConstant(R!=APFloat::cmpGreaterThan, VT); - case ISD::SETUGE: return getConstant(R!=APFloat::cmpLessThan, VT); + R==APFloat::cmpUnordered, dl, VT); + case ISD::SETULE: return getConstant(R!=APFloat::cmpGreaterThan, dl, VT); + case ISD::SETUGE: return getConstant(R!=APFloat::cmpLessThan, dl, VT); } } else { // Ensure that the constant occurs on the RHS. @@ -2323,6 +2410,21 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, KnownZero = APInt::getHighBitsSet(BitWidth, Leaders); break; } + case ISD::EXTRACT_ELEMENT: { + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + const unsigned Index = + cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); + const unsigned BitWidth = Op.getValueType().getSizeInBits(); + + // Remove low part of known bits mask + KnownZero = KnownZero.getHiBits(KnownZero.getBitWidth() - Index * BitWidth); + KnownOne = KnownOne.getHiBits(KnownOne.getBitWidth() - Index * BitWidth); + + // Remove high part of known bit mask + KnownZero = KnownZero.trunc(BitWidth); + KnownOne = KnownOne.trunc(BitWidth); + break; + } case ISD::FrameIndex: case ISD::TargetFrameIndex: if (unsigned Align = InferPtrAlignment(Op)) { @@ -2522,6 +2624,21 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ // FIXME: it's tricky to do anything useful for this, but it is an important // case for targets like X86. break; + case ISD::EXTRACT_ELEMENT: { + const int KnownSign = ComputeNumSignBits(Op.getOperand(0), Depth+1); + const int BitWidth = Op.getValueType().getSizeInBits(); + const int Items = + Op.getOperand(0).getValueType().getSizeInBits() / BitWidth; + + // Get reverse index (starting from 1), Op1 value indexes elements from + // little end. Sign starts at big end. + const int rIndex = Items - 1 - + cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); + + // If the sign portion ends in our element the substraction gives correct + // result. Otherwise it gives either negative or > bitwidth result + return std::max(std::min(KnownSign - rIndex * BitWidth, BitWidth), 0); + } } // If we are looking at the loaded value of the SDNode. @@ -2643,7 +2760,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT) { FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, getVTList(VT), None); void *IP = nullptr; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP)) return SDValue(E, 0); SDNode *N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), @@ -2666,12 +2783,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, switch (Opcode) { default: break; case ISD::SIGN_EXTEND: - return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), VT, + return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), DL, VT, C->isTargetOpcode(), C->isOpaque()); case ISD::ANY_EXTEND: case ISD::ZERO_EXTEND: case ISD::TRUNCATE: - return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), VT, + return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), DL, VT, C->isTargetOpcode(), C->isOpaque()); case ISD::UINT_TO_FP: case ISD::SINT_TO_FP: { @@ -2680,29 +2797,29 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, (void)apf.convertFromAPInt(Val, Opcode==ISD::SINT_TO_FP, APFloat::rmNearestTiesToEven); - return getConstantFP(apf, VT); + return getConstantFP(apf, DL, VT); } case ISD::BITCAST: if (VT == MVT::f16 && C->getValueType(0) == MVT::i16) - return getConstantFP(APFloat(APFloat::IEEEhalf, Val), VT); + return getConstantFP(APFloat(APFloat::IEEEhalf, Val), DL, VT); if (VT == MVT::f32 && C->getValueType(0) == MVT::i32) - return getConstantFP(APFloat(APFloat::IEEEsingle, Val), VT); + return getConstantFP(APFloat(APFloat::IEEEsingle, Val), DL, VT); else if (VT == MVT::f64 && C->getValueType(0) == MVT::i64) - return getConstantFP(APFloat(APFloat::IEEEdouble, Val), VT); + return getConstantFP(APFloat(APFloat::IEEEdouble, Val), DL, VT); break; case ISD::BSWAP: - return getConstant(Val.byteSwap(), VT, C->isTargetOpcode(), + return getConstant(Val.byteSwap(), DL, VT, C->isTargetOpcode(), C->isOpaque()); case ISD::CTPOP: - return getConstant(Val.countPopulation(), VT, C->isTargetOpcode(), + return getConstant(Val.countPopulation(), DL, VT, C->isTargetOpcode(), C->isOpaque()); case ISD::CTLZ: case ISD::CTLZ_ZERO_UNDEF: - return getConstant(Val.countLeadingZeros(), VT, C->isTargetOpcode(), + return getConstant(Val.countLeadingZeros(), DL, VT, C->isTargetOpcode(), C->isOpaque()); case ISD::CTTZ: case ISD::CTTZ_ZERO_UNDEF: - return getConstant(Val.countTrailingZeros(), VT, C->isTargetOpcode(), + return getConstant(Val.countTrailingZeros(), DL, VT, C->isTargetOpcode(), C->isOpaque()); } } @@ -2713,26 +2830,26 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, switch (Opcode) { case ISD::FNEG: V.changeSign(); - return getConstantFP(V, VT); + return getConstantFP(V, DL, VT); case ISD::FABS: V.clearSign(); - return getConstantFP(V, VT); + return getConstantFP(V, DL, VT); case ISD::FCEIL: { APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardPositive); if (fs == APFloat::opOK || fs == APFloat::opInexact) - return getConstantFP(V, VT); + return getConstantFP(V, DL, VT); break; } case ISD::FTRUNC: { APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardZero); if (fs == APFloat::opOK || fs == APFloat::opInexact) - return getConstantFP(V, VT); + return getConstantFP(V, DL, VT); break; } case ISD::FFLOOR: { APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardNegative); if (fs == APFloat::opOK || fs == APFloat::opInexact) - return getConstantFP(V, VT); + return getConstantFP(V, DL, VT); break; } case ISD::FP_EXTEND: { @@ -2741,7 +2858,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, // FIXME need to be more flexible about rounding mode. (void)V.convert(EVTToAPFloatSemantics(VT), APFloat::rmNearestTiesToEven, &ignored); - return getConstantFP(V, VT); + return getConstantFP(V, DL, VT); } case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: { @@ -2755,20 +2872,20 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, if (s==APFloat::opInvalidOp) // inexact is OK, in fact usual break; APInt api(VT.getSizeInBits(), x); - return getConstant(api, VT); + return getConstant(api, DL, VT); } case ISD::BITCAST: if (VT == MVT::i16 && C->getValueType(0) == MVT::f16) - return getConstant((uint16_t)V.bitcastToAPInt().getZExtValue(), VT); + return getConstant((uint16_t)V.bitcastToAPInt().getZExtValue(), DL, VT); else if (VT == MVT::i32 && C->getValueType(0) == MVT::f32) - return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), VT); + return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), DL, VT); else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64) - return getConstant(V.bitcastToAPInt().getZExtValue(), VT); + return getConstant(V.bitcastToAPInt().getZExtValue(), DL, VT); break; } } - // Constant fold unary operations with a vector integer operand. + // Constant fold unary operations with a vector integer or float operand. if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Operand.getNode())) { if (BV->isConstant()) { switch (Opcode) { @@ -2776,18 +2893,55 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, // FIXME: Entirely reasonable to perform folding of other unary // operations here as the need arises. break; + case ISD::FNEG: + case ISD::FABS: + case ISD::FCEIL: + case ISD::FTRUNC: + case ISD::FFLOOR: + case ISD::FP_EXTEND: + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::TRUNCATE: case ISD::UINT_TO_FP: case ISD::SINT_TO_FP: { + EVT SVT = VT.getScalarType(); + EVT InVT = BV->getValueType(0); + EVT InSVT = InVT.getScalarType(); + + // Find legal integer scalar type for constant promotion and + // ensure that its scalar size is at least as large as source. + EVT LegalSVT = SVT; + if (SVT.isInteger()) { + LegalSVT = TLI->getTypeToTransformTo(*getContext(), SVT); + if (LegalSVT.bitsLT(SVT)) break; + } + + // Let the above scalar folding handle the folding of each element. SmallVector<SDValue, 8> Ops; for (int i = 0, e = VT.getVectorNumElements(); i != e; ++i) { SDValue OpN = BV->getOperand(i); - // Let the above scalar folding handle the conversion of each - // element. - OpN = getNode(ISD::SINT_TO_FP, DL, VT.getVectorElementType(), - OpN); + EVT OpVT = OpN.getValueType(); + + // Build vector (integer) scalar operands may need implicit + // truncation - do this before constant folding. + if (OpVT.isInteger() && OpVT.bitsGT(InSVT)) + OpN = getNode(ISD::TRUNCATE, DL, InSVT, OpN); + + OpN = getNode(Opcode, DL, SVT, OpN); + + // Legalize the (integer) scalar constant if necessary. + if (LegalSVT != SVT) + OpN = getNode(ISD::ANY_EXTEND, DL, LegalSVT, OpN); + + if (OpN.getOpcode() != ISD::UNDEF && + OpN.getOpcode() != ISD::Constant && + OpN.getOpcode() != ISD::ConstantFP) + break; Ops.push_back(OpN); } - return getNode(ISD::BUILD_VECTOR, DL, VT, Ops); + if (Ops.size() == VT.getVectorNumElements()) + return getNode(ISD::BUILD_VECTOR, DL, VT, Ops); + break; } } } @@ -2825,7 +2979,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0)); else if (OpOpcode == ISD::UNDEF) // sext(undef) = 0, because the top bits will all be the same. - return getConstant(0, VT); + return getConstant(0, DL, VT); break; case ISD::ZERO_EXTEND: assert(VT.isInteger() && Operand.getValueType().isInteger() && @@ -2842,7 +2996,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, Operand.getNode()->getOperand(0)); else if (OpOpcode == ISD::UNDEF) // zext(undef) = 0, because the top bits will be zero. - return getConstant(0, VT); + return getConstant(0, DL, VT); break; case ISD::ANY_EXTEND: assert(VT.isInteger() && Operand.getValueType().isInteger() && @@ -2941,7 +3095,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDValue Ops[1] = { Operand }; AddNodeIDNode(ID, Opcode, VTs, Ops); void *IP = nullptr; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP)) return SDValue(E, 0); N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(), @@ -2956,7 +3110,54 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, return SDValue(N, 0); } -SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, EVT VT, +static std::pair<APInt, bool> FoldValue(unsigned Opcode, const APInt &C1, + const APInt &C2) { + switch (Opcode) { + case ISD::ADD: return std::make_pair(C1 + C2, true); + case ISD::SUB: return std::make_pair(C1 - C2, true); + case ISD::MUL: return std::make_pair(C1 * C2, true); + case ISD::AND: return std::make_pair(C1 & C2, true); + case ISD::OR: return std::make_pair(C1 | C2, true); + case ISD::XOR: return std::make_pair(C1 ^ C2, true); + case ISD::SHL: return std::make_pair(C1 << C2, true); + case ISD::SRL: return std::make_pair(C1.lshr(C2), true); + case ISD::SRA: return std::make_pair(C1.ashr(C2), true); + case ISD::ROTL: return std::make_pair(C1.rotl(C2), true); + case ISD::ROTR: return std::make_pair(C1.rotr(C2), true); + case ISD::UDIV: + if (!C2.getBoolValue()) + break; + return std::make_pair(C1.udiv(C2), true); + case ISD::UREM: + if (!C2.getBoolValue()) + break; + return std::make_pair(C1.urem(C2), true); + case ISD::SDIV: + if (!C2.getBoolValue()) + break; + return std::make_pair(C1.sdiv(C2), true); + case ISD::SREM: + if (!C2.getBoolValue()) + break; + return std::make_pair(C1.srem(C2), true); + } + return std::make_pair(APInt(1, 0), false); +} + +SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, SDLoc DL, EVT VT, + const ConstantSDNode *Cst1, + const ConstantSDNode *Cst2) { + if (Cst1->isOpaque() || Cst2->isOpaque()) + return SDValue(); + + std::pair<APInt, bool> Folded = FoldValue(Opcode, Cst1->getAPIntValue(), + Cst2->getAPIntValue()); + if (!Folded.second) + return SDValue(); + return getConstant(Folded.first, DL, VT); +} + +SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, SDLoc DL, EVT VT, SDNode *Cst1, SDNode *Cst2) { // If the opcode is a target-specific ISD node, there's nothing we can // do here and the operand rules may not line up with the below, so @@ -2964,116 +3165,59 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, EVT VT, if (Opcode >= ISD::BUILTIN_OP_END) return SDValue(); - SmallVector<std::pair<ConstantSDNode *, ConstantSDNode *>, 4> Inputs; - SmallVector<SDValue, 4> Outputs; - EVT SVT = VT.getScalarType(); + // Handle the case of two scalars. + if (const ConstantSDNode *Scalar1 = dyn_cast<ConstantSDNode>(Cst1)) { + if (const ConstantSDNode *Scalar2 = dyn_cast<ConstantSDNode>(Cst2)) { + if (SDValue Folded = + FoldConstantArithmetic(Opcode, DL, VT, Scalar1, Scalar2)) { + if (!VT.isVector()) + return Folded; + SmallVector<SDValue, 4> Outputs; + // We may have a vector type but a scalar result. Create a splat. + Outputs.resize(VT.getVectorNumElements(), Outputs.back()); + // Build a big vector out of the scalar elements we generated. + return getNode(ISD::BUILD_VECTOR, SDLoc(), VT, Outputs); + } else { + return SDValue(); + } + } + } - ConstantSDNode *Scalar1 = dyn_cast<ConstantSDNode>(Cst1); - ConstantSDNode *Scalar2 = dyn_cast<ConstantSDNode>(Cst2); - if (Scalar1 && Scalar2 && (Scalar1->isOpaque() || Scalar2->isOpaque())) + // For vectors extract each constant element into Inputs so we can constant + // fold them individually. + BuildVectorSDNode *BV1 = dyn_cast<BuildVectorSDNode>(Cst1); + BuildVectorSDNode *BV2 = dyn_cast<BuildVectorSDNode>(Cst2); + if (!BV1 || !BV2) return SDValue(); - if (Scalar1 && Scalar2) - // Scalar instruction. - Inputs.push_back(std::make_pair(Scalar1, Scalar2)); - else { - // For vectors extract each constant element into Inputs so we can constant - // fold them individually. - BuildVectorSDNode *BV1 = dyn_cast<BuildVectorSDNode>(Cst1); - BuildVectorSDNode *BV2 = dyn_cast<BuildVectorSDNode>(Cst2); - if (!BV1 || !BV2) - return SDValue(); - - assert(BV1->getNumOperands() == BV2->getNumOperands() && "Out of sync!"); - - for (unsigned I = 0, E = BV1->getNumOperands(); I != E; ++I) { - ConstantSDNode *V1 = dyn_cast<ConstantSDNode>(BV1->getOperand(I)); - ConstantSDNode *V2 = dyn_cast<ConstantSDNode>(BV2->getOperand(I)); - if (!V1 || !V2) // Not a constant, bail. - return SDValue(); + assert(BV1->getNumOperands() == BV2->getNumOperands() && "Out of sync!"); - if (V1->isOpaque() || V2->isOpaque()) - return SDValue(); - - // Avoid BUILD_VECTOR nodes that perform implicit truncation. - // FIXME: This is valid and could be handled by truncating the APInts. - if (V1->getValueType(0) != SVT || V2->getValueType(0) != SVT) - return SDValue(); + EVT SVT = VT.getScalarType(); + SmallVector<SDValue, 4> Outputs; + for (unsigned I = 0, E = BV1->getNumOperands(); I != E; ++I) { + ConstantSDNode *V1 = dyn_cast<ConstantSDNode>(BV1->getOperand(I)); + ConstantSDNode *V2 = dyn_cast<ConstantSDNode>(BV2->getOperand(I)); + if (!V1 || !V2) // Not a constant, bail. + return SDValue(); - Inputs.push_back(std::make_pair(V1, V2)); - } - } + if (V1->isOpaque() || V2->isOpaque()) + return SDValue(); - // We have a number of constant values, constant fold them element by element. - for (unsigned I = 0, E = Inputs.size(); I != E; ++I) { - const APInt &C1 = Inputs[I].first->getAPIntValue(); - const APInt &C2 = Inputs[I].second->getAPIntValue(); + // Avoid BUILD_VECTOR nodes that perform implicit truncation. + // FIXME: This is valid and could be handled by truncating the APInts. + if (V1->getValueType(0) != SVT || V2->getValueType(0) != SVT) + return SDValue(); - switch (Opcode) { - case ISD::ADD: - Outputs.push_back(getConstant(C1 + C2, SVT)); - break; - case ISD::SUB: - Outputs.push_back(getConstant(C1 - C2, SVT)); - break; - case ISD::MUL: - Outputs.push_back(getConstant(C1 * C2, SVT)); - break; - case ISD::UDIV: - if (!C2.getBoolValue()) - return SDValue(); - Outputs.push_back(getConstant(C1.udiv(C2), SVT)); - break; - case ISD::UREM: - if (!C2.getBoolValue()) - return SDValue(); - Outputs.push_back(getConstant(C1.urem(C2), SVT)); - break; - case ISD::SDIV: - if (!C2.getBoolValue()) - return SDValue(); - Outputs.push_back(getConstant(C1.sdiv(C2), SVT)); - break; - case ISD::SREM: - if (!C2.getBoolValue()) - return SDValue(); - Outputs.push_back(getConstant(C1.srem(C2), SVT)); - break; - case ISD::AND: - Outputs.push_back(getConstant(C1 & C2, SVT)); - break; - case ISD::OR: - Outputs.push_back(getConstant(C1 | C2, SVT)); - break; - case ISD::XOR: - Outputs.push_back(getConstant(C1 ^ C2, SVT)); - break; - case ISD::SHL: - Outputs.push_back(getConstant(C1 << C2, SVT)); - break; - case ISD::SRL: - Outputs.push_back(getConstant(C1.lshr(C2), SVT)); - break; - case ISD::SRA: - Outputs.push_back(getConstant(C1.ashr(C2), SVT)); - break; - case ISD::ROTL: - Outputs.push_back(getConstant(C1.rotl(C2), SVT)); - break; - case ISD::ROTR: - Outputs.push_back(getConstant(C1.rotr(C2), SVT)); - break; - default: + // Fold one vector element. + std::pair<APInt, bool> Folded = FoldValue(Opcode, V1->getAPIntValue(), + V2->getAPIntValue()); + if (!Folded.second) return SDValue(); - } + Outputs.push_back(getConstant(Folded.first, DL, SVT)); } - assert((Scalar1 && Scalar2) || (VT.getVectorNumElements() == Outputs.size() && - "Expected a scalar or vector!")); - - // Handle the scalar case first. - if (!VT.isVector()) - return Outputs.back(); + assert(VT.getVectorNumElements() == Outputs.size() && + "Vector size mismatch!"); // We may have a vector type but a scalar result. Create a splat. Outputs.resize(VT.getVectorNumElements(), Outputs.back()); @@ -3109,6 +3253,18 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(), N1.getNode()->op_end()); Elts.append(N2.getNode()->op_begin(), N2.getNode()->op_end()); + + // BUILD_VECTOR requires all inputs to be of the same type, find the + // maximum type and extend them all. + EVT SVT = VT.getScalarType(); + for (SDValue Op : Elts) + SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT); + if (SVT.bitsGT(VT.getScalarType())) + for (SDValue &Op : Elts) + Op = TLI->isZExtFree(Op.getValueType(), SVT) + ? getZExtOrTrunc(Op, DL, SVT) + : getSExtOrTrunc(Op, DL, SVT); + return getNode(ISD::BUILD_VECTOR, DL, VT, Elts); } break; @@ -3273,12 +3429,35 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, assert(EVT.bitsLE(VT) && "Not extending!"); if (EVT == VT) return N1; // Not actually extending + auto SignExtendInReg = [&](APInt Val) { + unsigned FromBits = EVT.getScalarType().getSizeInBits(); + Val <<= Val.getBitWidth() - FromBits; + Val = Val.ashr(Val.getBitWidth() - FromBits); + return getConstant(Val, DL, VT.getScalarType()); + }; + if (N1C) { APInt Val = N1C->getAPIntValue(); - unsigned FromBits = EVT.getScalarType().getSizeInBits(); - Val <<= Val.getBitWidth()-FromBits; - Val = Val.ashr(Val.getBitWidth()-FromBits); - return getConstant(Val, VT); + return SignExtendInReg(Val); + } + if (ISD::isBuildVectorOfConstantSDNodes(N1.getNode())) { + SmallVector<SDValue, 8> Ops; + for (int i = 0, e = VT.getVectorNumElements(); i != e; ++i) { + SDValue Op = N1.getOperand(i); + if (Op.getValueType() != VT.getScalarType()) break; + if (Op.getOpcode() == ISD::UNDEF) { + Ops.push_back(Op); + continue; + } + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getNode())) { + APInt Val = C->getAPIntValue(); + Ops.push_back(SignExtendInReg(Val)); + continue; + } + break; + } + if (Ops.size() == VT.getVectorNumElements()) + return getNode(ISD::BUILD_VECTOR, DL, VT, Ops); } break; } @@ -3287,6 +3466,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, if (N1.getOpcode() == ISD::UNDEF) return getUNDEF(VT); + // EXTRACT_VECTOR_ELT of out-of-bounds element is an UNDEF + if (N2C && N2C->getZExtValue() >= N1.getValueType().getVectorNumElements()) + return getUNDEF(VT); + // EXTRACT_VECTOR_ELT of CONCAT_VECTORS is often formed while lowering is // expanding copies of large vectors from registers. if (N2C && @@ -3296,7 +3479,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, N1.getOperand(0).getValueType().getVectorNumElements(); return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(N2C->getZExtValue() / Factor), - getConstant(N2C->getZExtValue() % Factor, + getConstant(N2C->getZExtValue() % Factor, DL, N2.getValueType())); } @@ -3353,7 +3536,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, unsigned ElementSize = VT.getSizeInBits(); unsigned Shift = ElementSize * N2C->getZExtValue(); APInt ShiftedVal = C->getAPIntValue().lshr(Shift); - return getConstant(ShiftedVal.trunc(ElementSize), VT); + return getConstant(ShiftedVal.trunc(ElementSize), DL, VT); } break; case ISD::EXTRACT_SUBVECTOR: { @@ -3384,7 +3567,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, // Perform trivial constant folding. if (SDValue SV = - FoldConstantArithmetic(Opcode, VT, N1.getNode(), N2.getNode())) + FoldConstantArithmetic(Opcode, DL, VT, N1.getNode(), N2.getNode())) return SV; // Canonicalize constant to RHS if commutative. @@ -3409,35 +3592,35 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, case ISD::FADD: s = V1.add(V2, APFloat::rmNearestTiesToEven); if (!HasFPExceptions || s != APFloat::opInvalidOp) - return getConstantFP(V1, VT); + return getConstantFP(V1, DL, VT); break; case ISD::FSUB: s = V1.subtract(V2, APFloat::rmNearestTiesToEven); if (!HasFPExceptions || s!=APFloat::opInvalidOp) - return getConstantFP(V1, VT); + return getConstantFP(V1, DL, VT); break; case ISD::FMUL: s = V1.multiply(V2, APFloat::rmNearestTiesToEven); if (!HasFPExceptions || s!=APFloat::opInvalidOp) - return getConstantFP(V1, VT); + return getConstantFP(V1, DL, VT); break; case ISD::FDIV: s = V1.divide(V2, APFloat::rmNearestTiesToEven); if (!HasFPExceptions || (s!=APFloat::opInvalidOp && s!=APFloat::opDivByZero)) { - return getConstantFP(V1, VT); + return getConstantFP(V1, DL, VT); } break; case ISD::FREM : s = V1.mod(V2, APFloat::rmNearestTiesToEven); if (!HasFPExceptions || (s!=APFloat::opInvalidOp && s!=APFloat::opDivByZero)) { - return getConstantFP(V1, VT); + return getConstantFP(V1, DL, VT); } break; case ISD::FCOPYSIGN: V1.copySign(V2); - return getConstantFP(V1, VT); + return getConstantFP(V1, DL, VT); default: break; } } @@ -3449,7 +3632,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, // FIXME need to be more flexible about rounding mode. (void)V.convert(EVTToAPFloatSemantics(VT), APFloat::rmNearestTiesToEven, &ignored); - return getConstantFP(V, VT); + return getConstantFP(V, DL, VT); } } @@ -3474,7 +3657,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, case ISD::SRL: case ISD::SHL: if (!VT.isVector()) - return getConstant(0, VT); // fold op(undef, arg2) -> 0 + return getConstant(0, DL, VT); // fold op(undef, arg2) -> 0 // For vectors, we can't easily build an all zero vector, just return // the LHS. return N2; @@ -3489,7 +3672,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, if (N1.getOpcode() == ISD::UNDEF) // Handle undef ^ undef -> 0 special case. This is a common // idiom (misuse). - return getConstant(0, VT); + return getConstant(0, DL, VT); // fallthrough case ISD::ADD: case ISD::ADDC: @@ -3513,13 +3696,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, case ISD::SRL: case ISD::SHL: if (!VT.isVector()) - return getConstant(0, VT); // fold op(arg1, undef) -> 0 + return getConstant(0, DL, VT); // fold op(arg1, undef) -> 0 // For vectors, we can't easily build an all zero vector, just return // the LHS. return N1; case ISD::OR: if (!VT.isVector()) - return getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT); + return getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), DL, VT); // For vectors, we can't easily build an all one vector, just return // the LHS. return N1; @@ -3539,14 +3722,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, if (BinOpHasFlags) AddBinaryNodeIDCustom(ID, Opcode, nuw, nsw, exact); void *IP = nullptr; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP)) return SDValue(E, 0); N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, nuw, nsw, exact); CSEMap.InsertNode(N, IP); } else { - N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, nuw, nsw, exact); } @@ -3569,8 +3751,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, const APFloat &V3 = N3CFP->getValueAPF(); APFloat::opStatus s = V1.fusedMultiplyAdd(V2, V3, APFloat::rmNearestTiesToEven); - if (s != APFloat::opInvalidOp) - return getConstantFP(V1, VT); + if (!TLI->hasFloatingPointExceptions() || s != APFloat::opInvalidOp) + return getConstantFP(V1, DL, VT); } break; } @@ -3643,7 +3825,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTs, Ops); void *IP = nullptr; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP)) return SDValue(E, 0); N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(), @@ -3705,16 +3887,32 @@ static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG, assert(C->getAPIntValue().getBitWidth() == 8); APInt Val = APInt::getSplat(NumBits, C->getAPIntValue()); if (VT.isInteger()) - return DAG.getConstant(Val, VT); - return DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(VT), Val), VT); + return DAG.getConstant(Val, dl, VT); + return DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(VT), Val), dl, + VT); } - Value = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Value); + assert(Value.getValueType() == MVT::i8 && "memset with non-byte fill value?"); + EVT IntVT = VT.getScalarType(); + if (!IntVT.isInteger()) + IntVT = EVT::getIntegerVT(*DAG.getContext(), IntVT.getSizeInBits()); + + Value = DAG.getNode(ISD::ZERO_EXTEND, dl, IntVT, Value); if (NumBits > 8) { // Use a multiplication with 0x010101... to extend the input to the // required length. APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01)); - Value = DAG.getNode(ISD::MUL, dl, VT, Value, DAG.getConstant(Magic, VT)); + Value = DAG.getNode(ISD::MUL, dl, IntVT, Value, + DAG.getConstant(Magic, dl, IntVT)); + } + + if (VT != Value.getValueType() && !VT.isInteger()) + Value = DAG.getNode(ISD::BITCAST, dl, VT.getScalarType(), Value); + if (VT != Value.getValueType()) { + assert(VT.getVectorElementType() == Value.getValueType() && + "value type should be one vector element here"); + SmallVector<SDValue, 8> BVOps(VT.getVectorNumElements(), Value); + Value = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, BVOps); } return Value; @@ -3728,15 +3926,16 @@ static SDValue getMemsetStringVal(EVT VT, SDLoc dl, SelectionDAG &DAG, // Handle vector with all elements zero. if (Str.empty()) { if (VT.isInteger()) - return DAG.getConstant(0, VT); + return DAG.getConstant(0, dl, VT); else if (VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128) - return DAG.getConstantFP(0.0, VT); + return DAG.getConstantFP(0.0, dl, VT); else if (VT.isVector()) { unsigned NumElts = VT.getVectorNumElements(); MVT EltVT = (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64; return DAG.getNode(ISD::BITCAST, dl, VT, - DAG.getConstant(0, EVT::getVectorVT(*DAG.getContext(), - EltVT, NumElts))); + DAG.getConstant(0, dl, + EVT::getVectorVT(*DAG.getContext(), + EltVT, NumElts))); } else llvm_unreachable("Expected type!"); } @@ -3759,7 +3958,7 @@ static SDValue getMemsetStringVal(EVT VT, SDLoc dl, SelectionDAG &DAG, // of a load, then it is cost effective to turn the load into the immediate. Type *Ty = VT.getTypeForEVT(*DAG.getContext()); if (TLI.shouldConvertConstantLoadToIntImm(Val, Ty)) - return DAG.getConstant(Val, VT); + return DAG.getConstant(Val, dl, VT); return SDValue(nullptr, 0); } @@ -3769,7 +3968,7 @@ static SDValue getMemBasePlusOffset(SDValue Base, unsigned Offset, SDLoc dl, SelectionDAG &DAG) { EVT VT = Base.getValueType(); return DAG.getNode(ISD::ADD, dl, - VT, Base, DAG.getConstant(Offset, VT)); + VT, Base, DAG.getConstant(Offset, dl, VT)); } /// isMemSrcFromString - Returns true if memcpy source is a string constant. @@ -3918,9 +4117,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl, bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - bool OptSize = - MF.getFunction()->getAttributes(). - hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize); + bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize); FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; @@ -4033,8 +4230,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl, bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - bool OptSize = MF.getFunction()->getAttributes(). - hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize); + bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize); FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; @@ -4128,8 +4324,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, SDLoc dl, bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - bool OptSize = MF.getFunction()->getAttributes(). - hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize); + bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize); FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; @@ -4198,7 +4393,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, SDLoc dl, SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVol, bool AlwaysInline, - MachinePointerInfo DstPtrInfo, + bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) { assert(Align && "The SDAG layer expects explicit alignment and reserves 0"); @@ -4219,11 +4414,13 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst, // Then check to see if we should lower the memcpy with target-specific // code. If the target chooses to do this, this is the next best. - SDValue Result = - TSI->EmitTargetCodeForMemcpy(*this, dl, Chain, Dst, Src, Size, Align, - isVol, AlwaysInline, DstPtrInfo, SrcPtrInfo); - if (Result.getNode()) - return Result; + if (TSI) { + SDValue Result = TSI->EmitTargetCodeForMemcpy( + *this, dl, Chain, Dst, Src, Size, Align, isVol, AlwaysInline, + DstPtrInfo, SrcPtrInfo); + if (Result.getNode()) + return Result; + } // If we really need inline code and the target declined to provide it, // use a (potentially long) sequence of loads and stores. @@ -4254,15 +4451,16 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst, Type::getVoidTy(*getContext()), getExternalSymbol(TLI->getLibcallName(RTLIB::MEMCPY), TLI->getPointerTy()), std::move(Args), 0) - .setDiscardResult(); - std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); + .setDiscardResult() + .setTailCall(isTailCall); + std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); return CallResult.second; } SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst, SDValue Src, SDValue Size, - unsigned Align, bool isVol, + unsigned Align, bool isVol, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) { assert(Align && "The SDAG layer expects explicit alignment and reserves 0"); @@ -4285,10 +4483,12 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst, // Then check to see if we should lower the memmove with target-specific // code. If the target chooses to do this, this is the next best. - SDValue Result = TSI->EmitTargetCodeForMemmove( - *this, dl, Chain, Dst, Src, Size, Align, isVol, DstPtrInfo, SrcPtrInfo); - if (Result.getNode()) - return Result; + if (TSI) { + SDValue Result = TSI->EmitTargetCodeForMemmove( + *this, dl, Chain, Dst, Src, Size, Align, isVol, DstPtrInfo, SrcPtrInfo); + if (Result.getNode()) + return Result; + } // FIXME: If the memmove is volatile, lowering it to plain libc memmove may // not be safe. See memcpy above for more details. @@ -4307,15 +4507,16 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst, Type::getVoidTy(*getContext()), getExternalSymbol(TLI->getLibcallName(RTLIB::MEMMOVE), TLI->getPointerTy()), std::move(Args), 0) - .setDiscardResult(); - std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); + .setDiscardResult() + .setTailCall(isTailCall); + std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); return CallResult.second; } SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst, SDValue Src, SDValue Size, - unsigned Align, bool isVol, + unsigned Align, bool isVol, bool isTailCall, MachinePointerInfo DstPtrInfo) { assert(Align && "The SDAG layer expects explicit alignment and reserves 0"); @@ -4337,10 +4538,12 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst, // Then check to see if we should lower the memset with target-specific // code. If the target chooses to do this, this is the next best. - SDValue Result = TSI->EmitTargetCodeForMemset(*this, dl, Chain, Dst, Src, - Size, Align, isVol, DstPtrInfo); - if (Result.getNode()) - return Result; + if (TSI) { + SDValue Result = TSI->EmitTargetCodeForMemset( + *this, dl, Chain, Dst, Src, Size, Align, isVol, DstPtrInfo); + if (Result.getNode()) + return Result; + } // Emit a library call. Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(*getContext()); @@ -4362,7 +4565,8 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst, Type::getVoidTy(*getContext()), getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET), TLI->getPointerTy()), std::move(Args), 0) - .setDiscardResult(); + .setDiscardResult() + .setTailCall(isTailCall); std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); return CallResult.second; @@ -4379,7 +4583,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, AddNodeIDNode(ID, Opcode, VTList, Ops); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void* IP = nullptr; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) { cast<AtomicSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); } @@ -4584,7 +4788,7 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList, AddNodeIDNode(ID, Opcode, VTList, Ops); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = nullptr; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) { cast<MemIntrinsicSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); } @@ -4685,10 +4889,10 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, assert(VT.isInteger() == MemVT.isInteger() && "Cannot convert from FP to Int or Int -> FP!"); assert(VT.isVector() == MemVT.isVector() && - "Cannot use trunc store to convert to or from a vector!"); + "Cannot use an ext load to convert to or from a vector!"); assert((!VT.isVector() || VT.getVectorNumElements() == MemVT.getVectorNumElements()) && - "Cannot use trunc store to change the number of vector elements!"); + "Cannot use an ext load to change the number of vector elements!"); } bool Indexed = AM != ISD::UNINDEXED; @@ -4706,7 +4910,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, MMO->isInvariant())); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = nullptr; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) { cast<LoadSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); } @@ -4814,7 +5018,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, SDLoc dl, SDValue Val, MMO->isNonTemporal(), MMO->isInvariant())); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = nullptr; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) { cast<StoreSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); } @@ -4883,7 +5087,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, SDLoc dl, SDValue Val, MMO->isNonTemporal(), MMO->isInvariant())); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = nullptr; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) { cast<StoreSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); } @@ -4909,7 +5113,7 @@ SelectionDAG::getIndexedStore(SDValue OrigStore, SDLoc dl, SDValue Base, ID.AddInteger(ST->getRawSubclassData()); ID.AddInteger(ST->getPointerInfo().getAddrSpace()); void *IP = nullptr; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) return SDValue(E, 0); SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl.getIROrder(), @@ -4938,7 +5142,7 @@ SelectionDAG::getMaskedLoad(EVT VT, SDLoc dl, SDValue Chain, MMO->isInvariant())); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = nullptr; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) { cast<MaskedLoadSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); } @@ -4965,7 +5169,7 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, SDLoc dl, SDValue Val, MMO->isNonTemporal(), MMO->isInvariant())); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = nullptr; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) { cast<MaskedStoreSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); } @@ -4977,11 +5181,60 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, SDLoc dl, SDValue Val, return SDValue(N, 0); } +SDValue +SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, SDLoc dl, + ArrayRef<SDValue> Ops, + MachineMemOperand *MMO) { + + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::MGATHER, VTs, Ops); + ID.AddInteger(VT.getRawBits()); + ID.AddInteger(encodeMemSDNodeFlags(ISD::NON_EXTLOAD, ISD::UNINDEXED, + MMO->isVolatile(), + MMO->isNonTemporal(), + MMO->isInvariant())); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) { + cast<MaskedGatherSDNode>(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + MaskedGatherSDNode *N = + new (NodeAllocator) MaskedGatherSDNode(dl.getIROrder(), dl.getDebugLoc(), + Ops, VTs, VT, MMO); + CSEMap.InsertNode(N, IP); + InsertNode(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, SDLoc dl, + ArrayRef<SDValue> Ops, + MachineMemOperand *MMO) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::MSCATTER, VTs, Ops); + ID.AddInteger(VT.getRawBits()); + ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(), + MMO->isNonTemporal(), + MMO->isInvariant())); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) { + cast<MaskedScatterSDNode>(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + SDNode *N = + new (NodeAllocator) MaskedScatterSDNode(dl.getIROrder(), dl.getDebugLoc(), + Ops, VTs, VT, MMO); + CSEMap.InsertNode(N, IP); + InsertNode(N); + return SDValue(N, 0); +} + SDValue SelectionDAG::getVAArg(EVT VT, SDLoc dl, SDValue Chain, SDValue Ptr, SDValue SV, unsigned Align) { - SDValue Ops[] = { Chain, Ptr, SV, getTargetConstant(Align, MVT::i32) }; + SDValue Ops[] = { Chain, Ptr, SV, getTargetConstant(Align, dl, MVT::i32) }; return getNode(ISD::VAARG, dl, getVTList(VT, MVT::Other), Ops); } @@ -5041,7 +5294,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, AddNodeIDNode(ID, Opcode, VTs, Ops); void *IP = nullptr; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP)) return SDValue(E, 0); N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), @@ -5096,7 +5349,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTList, Ops); void *IP = nullptr; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP)) return SDValue(E, 0); if (NumOps == 1) { @@ -5340,17 +5593,9 @@ UpdateNodeOperands(SDNode *N, ArrayRef<SDValue> Ops) { assert(N->getNumOperands() == NumOps && "Update with wrong number of operands"); - // Check to see if there is no change. - bool AnyChange = false; - for (unsigned i = 0; i != NumOps; ++i) { - if (Ops[i] != N->getOperand(i)) { - AnyChange = true; - break; - } - } - - // No operands changed, just return the input node. - if (!AnyChange) return N; + // If no operands changed just return the input node. + if (Ops.empty() || std::equal(Ops.begin(), Ops.end(), N->op_begin())) + return N; // See if the modified node already exists. void *InsertPos = nullptr; @@ -5498,8 +5743,7 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, /// For IROrder, we keep the smaller of the two SDNode *SelectionDAG::UpdadeSDLocOnMergedSDNode(SDNode *N, SDLoc OLoc) { DebugLoc NLoc = N->getDebugLoc(); - if (!(NLoc.isUnknown()) && (OptLevel == CodeGenOpt::None) && - (OLoc.getDebugLoc() != NLoc)) { + if (NLoc && OptLevel == CodeGenOpt::None && OLoc.getDebugLoc() != NLoc) { N->setDebugLoc(DebugLoc()); } unsigned Order = std::min(N->getIROrder(), OLoc.getIROrder()); @@ -5531,7 +5775,7 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, if (VTs.VTs[VTs.NumVTs-1] != MVT::Glue) { FoldingSetNodeID ID; AddNodeIDNode(ID, Opc, VTs, Ops); - if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *ON = FindNodeOrInsertPos(ID, N->getDebugLoc(), IP)) return UpdadeSDLocOnMergedSDNode(ON, SDLoc(N)); } @@ -5737,7 +5981,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, SDLoc DL, SDVTList VTs, FoldingSetNodeID ID; AddNodeIDNode(ID, ~Opcode, VTs, OpsArray); IP = nullptr; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP)) { return cast<MachineSDNode>(UpdadeSDLocOnMergedSDNode(E, DL)); } } @@ -5769,7 +6013,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, SDLoc DL, SDVTList VTs, SDValue SelectionDAG::getTargetExtractSubreg(int SRIdx, SDLoc DL, EVT VT, SDValue Operand) { - SDValue SRIdxVal = getTargetConstant(SRIdx, MVT::i32); + SDValue SRIdxVal = getTargetConstant(SRIdx, DL, MVT::i32); SDNode *Subreg = getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT, Operand, SRIdxVal); return SDValue(Subreg, 0); @@ -5780,7 +6024,7 @@ SelectionDAG::getTargetExtractSubreg(int SRIdx, SDLoc DL, EVT VT, SDValue SelectionDAG::getTargetInsertSubreg(int SRIdx, SDLoc DL, EVT VT, SDValue Operand, SDValue Subreg) { - SDValue SRIdxVal = getTargetConstant(SRIdx, MVT::i32); + SDValue SRIdxVal = getTargetConstant(SRIdx, DL, MVT::i32); SDNode *Result = getMachineNode(TargetOpcode::INSERT_SUBREG, DL, VT, Operand, Subreg, SRIdxVal); return SDValue(Result, 0); @@ -5797,7 +6041,7 @@ SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList, if (isBinOpWithFlags(Opcode)) AddBinaryNodeIDCustom(ID, nuw, nsw, exact); void *IP = nullptr; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, DebugLoc(), IP)) return E; } return nullptr; @@ -5809,21 +6053,28 @@ SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList, SDDbgValue *SelectionDAG::getDbgValue(MDNode *Var, MDNode *Expr, SDNode *N, unsigned R, bool IsIndirect, uint64_t Off, DebugLoc DL, unsigned O) { - return new (Allocator) SDDbgValue(Var, Expr, N, R, IsIndirect, Off, DL, O); + assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && + "Expected inlined-at fields to agree"); + return new (DbgInfo->getAlloc()) + SDDbgValue(Var, Expr, N, R, IsIndirect, Off, DL, O); } /// Constant SDDbgValue *SelectionDAG::getConstantDbgValue(MDNode *Var, MDNode *Expr, const Value *C, uint64_t Off, DebugLoc DL, unsigned O) { - return new (Allocator) SDDbgValue(Var, Expr, C, Off, DL, O); + assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && + "Expected inlined-at fields to agree"); + return new (DbgInfo->getAlloc()) SDDbgValue(Var, Expr, C, Off, DL, O); } /// FrameIndex SDDbgValue *SelectionDAG::getFrameIndexDbgValue(MDNode *Var, MDNode *Expr, unsigned FI, uint64_t Off, DebugLoc DL, unsigned O) { - return new (Allocator) SDDbgValue(Var, Expr, FI, Off, DL, O); + assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && + "Expected inlined-at fields to agree"); + return new (DbgInfo->getAlloc()) SDDbgValue(Var, Expr, FI, Off, DL, O); } namespace { @@ -6489,7 +6740,7 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { Operands[j] = getNode(ISD::EXTRACT_VECTOR_ELT, dl, OperandEltVT, Operand, - getConstant(i, TLI->getVectorIdxTy())); + getConstant(i, dl, TLI->getVectorIdxTy())); } else { // A scalar operand; just use it as is. Operands[j] = Operand; @@ -6595,8 +6846,8 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) { unsigned PtrWidth = TLI->getPointerTypeSizeInBits(GV->getType()); APInt KnownZero(PtrWidth, 0), KnownOne(PtrWidth, 0); - llvm::computeKnownBits(const_cast<GlobalValue*>(GV), KnownZero, KnownOne, - TLI->getDataLayout()); + llvm::computeKnownBits(const_cast<GlobalValue *>(GV), KnownZero, KnownOne, + *TLI->getDataLayout()); unsigned AlignBits = KnownZero.countTrailingOnes(); unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0; if (Align) @@ -6652,9 +6903,10 @@ SelectionDAG::SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, "More vector elements requested than available!"); SDValue Lo, Hi; Lo = getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, N, - getConstant(0, TLI->getVectorIdxTy())); + getConstant(0, DL, TLI->getVectorIdxTy())); Hi = getNode(ISD::EXTRACT_SUBVECTOR, DL, HiVT, N, - getConstant(LoVT.getVectorNumElements(), TLI->getVectorIdxTy())); + getConstant(LoVT.getVectorNumElements(), DL, + TLI->getVectorIdxTy())); return std::make_pair(Lo, Hi); } @@ -6670,7 +6922,7 @@ void SelectionDAG::ExtractVectorElements(SDValue Op, SDLoc SL(Op); for (unsigned i = Start, e = Start + Count; i != e; ++i) { Args.push_back(getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT, - Op, getConstant(i, IdxTy))); + Op, getConstant(i, SL, IdxTy))); } } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index fe9e442..85303d2 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -20,8 +20,8 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GCMetadata.h" @@ -34,6 +34,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/StackMaps.h" +#include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -57,7 +58,6 @@ #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" -#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetSelectionDAGInfo.h" @@ -161,7 +161,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL, EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi); Hi = DAG.getNode(ISD::SHL, DL, TotalVT, Hi, - DAG.getConstant(Lo.getValueType().getSizeInBits(), + DAG.getConstant(Lo.getValueType().getSizeInBits(), DL, TLI.getPointerTy())); Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo); Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi); @@ -208,7 +208,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL, // FP_ROUND's are always exact here. if (ValueVT.bitsLT(Val.getValueType())) return DAG.getNode(ISD::FP_ROUND, DL, ValueVT, Val, - DAG.getTargetConstant(1, TLI.getPointerTy())); + DAG.getTargetConstant(1, DL, TLI.getPointerTy())); return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val); } @@ -301,7 +301,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL, assert(PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements() && "Cannot narrow, it would be a lossy transformation"); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, - DAG.getConstant(0, TLI.getVectorIdxTy())); + DAG.getConstant(0, DL, TLI.getVectorIdxTy())); } // Vector/Vector bitcast. @@ -425,7 +425,7 @@ static void getCopyToParts(SelectionDAG &DAG, SDLoc DL, unsigned RoundBits = RoundParts * PartBits; unsigned OddParts = NumParts - RoundParts; SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val, - DAG.getIntPtrConstant(RoundBits)); + DAG.getIntPtrConstant(RoundBits, DL)); getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V); if (TLI.isBigEndian()) @@ -452,9 +452,9 @@ static void getCopyToParts(SelectionDAG &DAG, SDLoc DL, SDValue &Part1 = Parts[i+StepSize/2]; Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, - ThisVT, Part0, DAG.getIntPtrConstant(1)); + ThisVT, Part0, DAG.getIntPtrConstant(1, DL)); Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, - ThisVT, Part0, DAG.getIntPtrConstant(0)); + ThisVT, Part0, DAG.getIntPtrConstant(0, DL)); if (ThisBits == PartBits && ThisVT != PartVT) { Part0 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part0); @@ -493,7 +493,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL, SmallVector<SDValue, 16> Ops; for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i) Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, - ElementVT, Val, DAG.getConstant(i, + ElementVT, Val, DAG.getConstant(i, DL, TLI.getVectorIdxTy()))); for (unsigned i = ValueVT.getVectorNumElements(), @@ -520,7 +520,8 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL, assert(ValueVT.getVectorNumElements() == 1 && "Only trivial vector-to-scalar conversions should get here!"); Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, - PartVT, Val, DAG.getConstant(0, TLI.getVectorIdxTy())); + PartVT, Val, + DAG.getConstant(0, DL, TLI.getVectorIdxTy())); bool Smaller = ValueVT.bitsLE(PartVT); Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), @@ -550,12 +551,12 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL, if (IntermediateVT.isVector()) Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, IntermediateVT, Val, - DAG.getConstant(i * (NumElements / NumIntermediates), + DAG.getConstant(i * (NumElements / NumIntermediates), DL, TLI.getVectorIdxTy())); else Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntermediateVT, Val, - DAG.getConstant(i, TLI.getVectorIdxTy())); + DAG.getConstant(i, DL, TLI.getVectorIdxTy())); } // Split the intermediate operands into legal parts. @@ -576,93 +577,25 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL, } } -namespace { - /// RegsForValue - This struct represents the registers (physical or virtual) - /// that a particular set of values is assigned, and the type information - /// about the value. The most common situation is to represent one value at a - /// time, but struct or array values are handled element-wise as multiple - /// values. The splitting of aggregates is performed recursively, so that we - /// never have aggregate-typed registers. The values at this point do not - /// necessarily have legal types, so each value may require one or more - /// registers of some legal type. - /// - struct RegsForValue { - /// ValueVTs - The value types of the values, which may not be legal, and - /// may need be promoted or synthesized from one or more registers. - /// - SmallVector<EVT, 4> ValueVTs; +RegsForValue::RegsForValue() {} - /// RegVTs - The value types of the registers. This is the same size as - /// ValueVTs and it records, for each value, what the type of the assigned - /// register or registers are. (Individual values are never synthesized - /// from more than one type of register.) - /// - /// With virtual registers, the contents of RegVTs is redundant with TLI's - /// getRegisterType member function, however when with physical registers - /// it is necessary to have a separate record of the types. - /// - SmallVector<MVT, 4> RegVTs; - - /// Regs - This list holds the registers assigned to the values. - /// Each legal or promoted value requires one register, and each - /// expanded value requires multiple registers. - /// - SmallVector<unsigned, 4> Regs; - - RegsForValue() {} - - RegsForValue(const SmallVector<unsigned, 4> ®s, - MVT regvt, EVT valuevt) - : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {} - - RegsForValue(LLVMContext &Context, const TargetLowering &tli, - unsigned Reg, Type *Ty) { - ComputeValueVTs(tli, Ty, ValueVTs); - - for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { - EVT ValueVT = ValueVTs[Value]; - unsigned NumRegs = tli.getNumRegisters(Context, ValueVT); - MVT RegisterVT = tli.getRegisterType(Context, ValueVT); - for (unsigned i = 0; i != NumRegs; ++i) - Regs.push_back(Reg + i); - RegVTs.push_back(RegisterVT); - Reg += NumRegs; - } - } +RegsForValue::RegsForValue(const SmallVector<unsigned, 4> ®s, MVT regvt, + EVT valuevt) + : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {} - /// append - Add the specified values to this one. - void append(const RegsForValue &RHS) { - ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end()); - RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end()); - Regs.append(RHS.Regs.begin(), RHS.Regs.end()); - } +RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &tli, + unsigned Reg, Type *Ty) { + ComputeValueVTs(tli, Ty, ValueVTs); - /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from - /// this value and returns the result as a ValueVTs value. This uses - /// Chain/Flag as the input and updates them for the output Chain/Flag. - /// If the Flag pointer is NULL, no flag is used. - SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo, - SDLoc dl, - SDValue &Chain, SDValue *Flag, - const Value *V = nullptr) const; - - /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the - /// specified value into the registers specified by this object. This uses - /// Chain/Flag as the input and updates them for the output Chain/Flag. - /// If the Flag pointer is NULL, no flag is used. - void - getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, SDValue &Chain, - SDValue *Flag, const Value *V, - ISD::NodeType PreferredExtendType = ISD::ANY_EXTEND) const; - - /// AddInlineAsmOperands - Add this value to the specified inlineasm node - /// operand list. This adds the code marker, matching input operand index - /// (if applicable), and includes the number of values added into it. - void AddInlineAsmOperands(unsigned Kind, - bool HasMatching, unsigned MatchingIdx, - SelectionDAG &DAG, - std::vector<SDValue> &Ops) const; - }; + for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { + EVT ValueVT = ValueVTs[Value]; + unsigned NumRegs = tli.getNumRegisters(Context, ValueVT); + MVT RegisterVT = tli.getRegisterType(Context, ValueVT); + for (unsigned i = 0; i != NumRegs; ++i) + Regs.push_back(Reg + i); + RegVTs.push_back(RegisterVT); + Reg += NumRegs; + } } /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from @@ -721,7 +654,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, // The current value is a zero. // Explicitly express that as it would be easier for // optimizations to kick in. - Parts[i] = DAG.getConstant(0, RegisterVT); + Parts[i] = DAG.getConstant(0, dl, RegisterVT); continue; } @@ -823,7 +756,7 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, /// operand list. This adds the code marker and includes the number of /// values added into it. void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, - unsigned MatchingIdx, + unsigned MatchingIdx, SDLoc dl, SelectionDAG &DAG, std::vector<SDValue> &Ops) const { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -843,7 +776,7 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID()); } - SDValue Res = DAG.getTargetConstant(Flag, MVT::i32); + SDValue Res = DAG.getTargetConstant(Flag, dl, MVT::i32); Ops.push_back(Res); unsigned SP = TLI.getStackPointerRegisterToSaveRestore(); @@ -869,7 +802,7 @@ void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa, AA = &aa; GFI = gfi; LibInfo = li; - DL = DAG.getSubtarget().getDataLayout(); + DL = DAG.getTarget().getDataLayout(); Context = DAG.getContext(); LPadToCallSiteMap.clear(); } @@ -997,14 +930,16 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, const DbgValueInst *DI = DDI.getDI(); DebugLoc dl = DDI.getdl(); unsigned DbgSDNodeOrder = DDI.getSDNodeOrder(); - MDNode *Variable = DI->getVariable(); - MDNode *Expr = DI->getExpression(); + DILocalVariable *Variable = DI->getVariable(); + DIExpression *Expr = DI->getExpression(); + assert(Variable->isValidLocationForIntrinsic(dl) && + "Expected inlined-at fields to agree"); uint64_t Offset = DI->getOffset(); // A dbg.value for an alloca is always indirect. bool IsIndirect = isa<AllocaInst>(V) || Offset != 0; SDDbgValue *SDV; if (Val.getNode()) { - if (!EmitFuncArgumentDbgValue(V, Variable, Expr, Offset, IsIndirect, + if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, Offset, IsIndirect, Val)) { SDV = DAG.getDbgValue(Variable, Expr, Val.getNode(), Val.getResNo(), IsIndirect, Offset, dl, DbgSDNodeOrder); @@ -1016,6 +951,24 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, } } +/// getCopyFromRegs - If there was virtual register allocated for the value V +/// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise. +SDValue SelectionDAGBuilder::getCopyFromRegs(const Value *V, Type *Ty) { + DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V); + SDValue Result; + + if (It != FuncInfo.ValueMap.end()) { + unsigned InReg = It->second; + RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), InReg, + Ty); + SDValue Chain = DAG.getEntryNode(); + Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); + resolveDanglingDebugInfo(V, Result); + } + + return Result; +} + /// getValue - Return an SDValue for the given Value. SDValue SelectionDAGBuilder::getValue(const Value *V) { // If we already have an SDValue for this value, use it. It's important @@ -1026,15 +979,9 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) { // If there's a virtual register allocated and initialized for this // value, use it. - DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V); - if (It != FuncInfo.ValueMap.end()) { - unsigned InReg = It->second; - RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), InReg, - V->getType()); - SDValue Chain = DAG.getEntryNode(); - N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); - resolveDanglingDebugInfo(V, N); - return N; + SDValue copyFromReg = getCopyFromRegs(V, V->getType()); + if (copyFromReg.getNode()) { + return copyFromReg; } // Otherwise create a new SDValue and remember it. @@ -1044,6 +991,12 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) { return Val; } +// Return true if SDValue exists for the given Value +bool SelectionDAGBuilder::findValue(const Value *V) const { + return (NodeMap.find(V) != NodeMap.end()) || + (FuncInfo.ValueMap.find(V) != FuncInfo.ValueMap.end()); +} + /// getNonRegisterValue - Return an SDValue for the given Value, but /// don't look in FuncInfo.ValueMap for a virtual register. SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) { @@ -1067,18 +1020,18 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { EVT VT = TLI.getValueType(V->getType(), true); if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) - return DAG.getConstant(*CI, VT); + return DAG.getConstant(*CI, getCurSDLoc(), VT); if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) return DAG.getGlobalAddress(GV, getCurSDLoc(), VT); if (isa<ConstantPointerNull>(C)) { unsigned AS = V->getType()->getPointerAddressSpace(); - return DAG.getConstant(0, TLI.getPointerTy(AS)); + return DAG.getConstant(0, getCurSDLoc(), TLI.getPointerTy(AS)); } if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) - return DAG.getConstantFP(*CFP, VT); + return DAG.getConstantFP(*CFP, getCurSDLoc(), VT); if (isa<UndefValue>(C) && !V->getType()->isAggregateType()) return DAG.getUNDEF(VT); @@ -1138,9 +1091,9 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { if (isa<UndefValue>(C)) Constants[i] = DAG.getUNDEF(EltVT); else if (EltVT.isFloatingPoint()) - Constants[i] = DAG.getConstantFP(0, EltVT); + Constants[i] = DAG.getConstantFP(0, getCurSDLoc(), EltVT); else - Constants[i] = DAG.getConstant(0, EltVT); + Constants[i] = DAG.getConstant(0, getCurSDLoc(), EltVT); } return DAG.getMergeValues(Constants, getCurSDLoc()); @@ -1164,9 +1117,9 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { SDValue Op; if (EltVT.isFloatingPoint()) - Op = DAG.getConstantFP(0, EltVT); + Op = DAG.getConstantFP(0, getCurSDLoc(), EltVT); else - Op = DAG.getConstant(0, EltVT); + Op = DAG.getConstant(0, getCurSDLoc(), EltVT); Ops.assign(NumElements, Op); } @@ -1223,7 +1176,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { for (unsigned i = 0; i != NumValues; ++i) { SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(), RetPtr.getValueType(), RetPtr, - DAG.getIntPtrConstant(Offsets[i])); + DAG.getIntPtrConstant(Offsets[i], + getCurSDLoc())); Chains[i] = DAG.getStore(Chain, getCurSDLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + i), @@ -1573,19 +1527,13 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { // Update machine-CFG edges. MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)]; - // Figure out which block is immediately after the current one. - MachineBasicBlock *NextBlock = nullptr; - MachineFunction::iterator BBI = BrMBB; - if (++BBI != FuncInfo.MF->end()) - NextBlock = BBI; - if (I.isUnconditional()) { // Update machine-CFG edges. BrMBB->addSuccessor(Succ0MBB); // If this is not a fall-through branch or optimizations are switched off, // emit the branch. - if (Succ0MBB != NextBlock || TM.getOptLevel() == CodeGenOpt::None) + if (Succ0MBB != NextBlock(BrMBB) || TM.getOptLevel() == CodeGenOpt::None) DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, getControlRoot(), DAG.getBasicBlock(Succ0MBB))); @@ -1674,7 +1622,7 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, Cond = CondLHS; else if (CB.CmpRHS == ConstantInt::getFalse(*DAG.getContext()) && CB.CC == ISD::SETEQ) { - SDValue True = DAG.getConstant(1, CondLHS.getValueType()); + SDValue True = DAG.getConstant(1, dl, CondLHS.getValueType()); Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True); } else Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC); @@ -1682,19 +1630,19 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now"); const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue(); - const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue(); + const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue(); SDValue CmpOp = getValue(CB.CmpMHS); EVT VT = CmpOp.getValueType(); if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) { - Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, VT), + Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, dl, VT), ISD::SETLE); } else { SDValue SUB = DAG.getNode(ISD::SUB, dl, - VT, CmpOp, DAG.getConstant(Low, VT)); + VT, CmpOp, DAG.getConstant(Low, dl, VT)); Cond = DAG.getSetCC(dl, MVT::i1, SUB, - DAG.getConstant(High-Low, VT), ISD::SETULE); + DAG.getConstant(High-Low, dl, VT), ISD::SETULE); } } @@ -1705,18 +1653,11 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, if (CB.TrueBB != CB.FalseBB) addSuccessorWithWeight(SwitchBB, CB.FalseBB, CB.FalseWeight); - // Set NextBlock to be the MBB immediately after the current one, if any. - // This is used to avoid emitting unnecessary branches to the next block. - MachineBasicBlock *NextBlock = nullptr; - MachineFunction::iterator BBI = SwitchBB; - if (++BBI != FuncInfo.MF->end()) - NextBlock = BBI; - // If the lhs block is the next block, invert the condition so that we can // fall through to the lhs instead of the rhs block. - if (CB.TrueBB == NextBlock) { + if (CB.TrueBB == NextBlock(SwitchBB)) { std::swap(CB.TrueBB, CB.FalseBB); - SDValue True = DAG.getConstant(1, Cond.getValueType()); + SDValue True = DAG.getConstant(1, dl, Cond.getValueType()); Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True); } @@ -1752,13 +1693,15 @@ void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) { void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, JumpTableHeader &JTH, MachineBasicBlock *SwitchBB) { + SDLoc dl = getCurSDLoc(); + // Subtract the lowest switch case value from the value being switched on and // conditional branch to default mbb if the result is greater than the // difference between smallest and largest cases. SDValue SwitchOp = getValue(JTH.SValue); EVT VT = SwitchOp.getValueType(); - SDValue Sub = DAG.getNode(ISD::SUB, getCurSDLoc(), VT, SwitchOp, - DAG.getConstant(JTH.First, VT)); + SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp, + DAG.getConstant(JTH.First, dl, VT)); // The SDNode we just created, which holds the value being switched on minus // the smallest case value, needs to be copied to a virtual register so it @@ -1766,10 +1709,10 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, // This value may be smaller or larger than the target's pointer type, and // therefore require extension or truncating. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - SwitchOp = DAG.getZExtOrTrunc(Sub, getCurSDLoc(), TLI.getPointerTy()); + SwitchOp = DAG.getZExtOrTrunc(Sub, dl, TLI.getPointerTy()); unsigned JumpTableReg = FuncInfo.CreateReg(TLI.getPointerTy()); - SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurSDLoc(), + SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl, JumpTableReg, SwitchOp); JT.Reg = JumpTableReg; @@ -1777,24 +1720,18 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, // for the switch statement if the value being switched on exceeds the largest // case in the switch. SDValue CMP = - DAG.getSetCC(getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(), - Sub.getValueType()), - Sub, DAG.getConstant(JTH.Last - JTH.First, VT), ISD::SETUGT); + DAG.getSetCC(dl, TLI.getSetCCResultType(*DAG.getContext(), + Sub.getValueType()), + Sub, DAG.getConstant(JTH.Last - JTH.First, dl, VT), + ISD::SETUGT); - // Set NextBlock to be the MBB immediately after the current one, if any. - // This is used to avoid emitting unnecessary branches to the next block. - MachineBasicBlock *NextBlock = nullptr; - MachineFunction::iterator BBI = SwitchBB; - - if (++BBI != FuncInfo.MF->end()) - NextBlock = BBI; - - SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurSDLoc(), + SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, MVT::Other, CopyTo, CMP, DAG.getBasicBlock(JT.Default)); - if (JT.MBB != NextBlock) - BrCond = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, BrCond, + // Avoid emitting unnecessary branches to the next block. + if (JT.MBB != NextBlock(SwitchBB)) + BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond, DAG.getBasicBlock(JT.MBB)); DAG.setRoot(BrCond); @@ -1824,6 +1761,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, TLI.getDataLayout()->getPrefTypeAlignment(IRGuard->getType()); SDValue Guard; + SDLoc dl = getCurSDLoc(); // If GuardReg is set and useLoadStackGuardNode returns true, retrieve the // guard value from the virtual register holding the value. Otherwise, emit a @@ -1831,34 +1769,34 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, unsigned GuardReg = SPD.getGuardReg(); if (GuardReg && TLI.useLoadStackGuardNode()) - Guard = DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), GuardReg, + Guard = DAG.getCopyFromReg(DAG.getEntryNode(), dl, GuardReg, PtrTy); else - Guard = DAG.getLoad(PtrTy, getCurSDLoc(), DAG.getEntryNode(), + Guard = DAG.getLoad(PtrTy, dl, DAG.getEntryNode(), GuardPtr, MachinePointerInfo(IRGuard, 0), true, false, false, Align); - SDValue StackSlot = DAG.getLoad(PtrTy, getCurSDLoc(), DAG.getEntryNode(), + SDValue StackSlot = DAG.getLoad(PtrTy, dl, DAG.getEntryNode(), StackSlotPtr, MachinePointerInfo::getFixedStack(FI), true, false, false, Align); // Perform the comparison via a subtract/getsetcc. EVT VT = Guard.getValueType(); - SDValue Sub = DAG.getNode(ISD::SUB, getCurSDLoc(), VT, Guard, StackSlot); + SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, Guard, StackSlot); SDValue Cmp = - DAG.getSetCC(getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(), + DAG.getSetCC(dl, TLI.getSetCCResultType(*DAG.getContext(), Sub.getValueType()), - Sub, DAG.getConstant(0, VT), ISD::SETNE); + Sub, DAG.getConstant(0, dl, VT), ISD::SETNE); // If the sub is not 0, then we know the guard/stackslot do not equal, so // branch to failure MBB. - SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurSDLoc(), + SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, MVT::Other, StackSlot.getOperand(0), Cmp, DAG.getBasicBlock(SPD.getFailureMBB())); // Otherwise branch to success MBB. - SDValue Br = DAG.getNode(ISD::BR, getCurSDLoc(), + SDValue Br = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond, DAG.getBasicBlock(SPD.getSuccessMBB())); @@ -1886,18 +1824,20 @@ SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) { /// suitable for "bit tests" void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, MachineBasicBlock *SwitchBB) { + SDLoc dl = getCurSDLoc(); + // Subtract the minimum value SDValue SwitchOp = getValue(B.SValue); EVT VT = SwitchOp.getValueType(); - SDValue Sub = DAG.getNode(ISD::SUB, getCurSDLoc(), VT, SwitchOp, - DAG.getConstant(B.First, VT)); + SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp, + DAG.getConstant(B.First, dl, VT)); // Check range const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue RangeCmp = - DAG.getSetCC(getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(), - Sub.getValueType()), - Sub, DAG.getConstant(B.Range, VT), ISD::SETUGT); + DAG.getSetCC(dl, TLI.getSetCCResultType(*DAG.getContext(), + Sub.getValueType()), + Sub, DAG.getConstant(B.Range, dl, VT), ISD::SETUGT); // Determine the type of the test operands. bool UsePtrType = false; @@ -1914,32 +1854,25 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, } if (UsePtrType) { VT = TLI.getPointerTy(); - Sub = DAG.getZExtOrTrunc(Sub, getCurSDLoc(), VT); + Sub = DAG.getZExtOrTrunc(Sub, dl, VT); } B.RegVT = VT.getSimpleVT(); B.Reg = FuncInfo.CreateReg(B.RegVT); - SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurSDLoc(), - B.Reg, Sub); - - // Set NextBlock to be the MBB immediately after the current one, if any. - // This is used to avoid emitting unnecessary branches to the next block. - MachineBasicBlock *NextBlock = nullptr; - MachineFunction::iterator BBI = SwitchBB; - if (++BBI != FuncInfo.MF->end()) - NextBlock = BBI; + SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl, B.Reg, Sub); MachineBasicBlock* MBB = B.Cases[0].ThisBB; addSuccessorWithWeight(SwitchBB, B.Default); addSuccessorWithWeight(SwitchBB, MBB); - SDValue BrRange = DAG.getNode(ISD::BRCOND, getCurSDLoc(), + SDValue BrRange = DAG.getNode(ISD::BRCOND, dl, MVT::Other, CopyTo, RangeCmp, DAG.getBasicBlock(B.Default)); - if (MBB != NextBlock) - BrRange = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, CopyTo, + // Avoid emitting unnecessary branches to the next block. + if (MBB != NextBlock(SwitchBB)) + BrRange = DAG.getNode(ISD::BR, dl, MVT::Other, BrRange, DAG.getBasicBlock(MBB)); DAG.setRoot(BrRange); @@ -1952,34 +1885,33 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, unsigned Reg, BitTestCase &B, MachineBasicBlock *SwitchBB) { + SDLoc dl = getCurSDLoc(); MVT VT = BB.RegVT; - SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurSDLoc(), - Reg, VT); + SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), dl, Reg, VT); SDValue Cmp; - unsigned PopCount = CountPopulation_64(B.Mask); + unsigned PopCount = countPopulation(B.Mask); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (PopCount == 1) { // Testing for a single bit; just compare the shift count with what it // would need to be to shift a 1 bit in that position. Cmp = DAG.getSetCC( - getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(), VT), ShiftOp, - DAG.getConstant(countTrailingZeros(B.Mask), VT), ISD::SETEQ); + dl, TLI.getSetCCResultType(*DAG.getContext(), VT), ShiftOp, + DAG.getConstant(countTrailingZeros(B.Mask), dl, VT), ISD::SETEQ); } else if (PopCount == BB.Range) { // There is only one zero bit in the range, test for it directly. Cmp = DAG.getSetCC( - getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(), VT), ShiftOp, - DAG.getConstant(CountTrailingOnes_64(B.Mask), VT), ISD::SETNE); + dl, TLI.getSetCCResultType(*DAG.getContext(), VT), ShiftOp, + DAG.getConstant(countTrailingOnes(B.Mask), dl, VT), ISD::SETNE); } else { // Make desired shift - SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurSDLoc(), VT, - DAG.getConstant(1, VT), ShiftOp); + SDValue SwitchVal = DAG.getNode(ISD::SHL, dl, VT, + DAG.getConstant(1, dl, VT), ShiftOp); // Emit bit tests and jumps - SDValue AndOp = DAG.getNode(ISD::AND, getCurSDLoc(), - VT, SwitchVal, DAG.getConstant(B.Mask, VT)); - Cmp = DAG.getSetCC(getCurSDLoc(), - TLI.getSetCCResultType(*DAG.getContext(), VT), AndOp, - DAG.getConstant(0, VT), ISD::SETNE); + SDValue AndOp = DAG.getNode(ISD::AND, dl, + VT, SwitchVal, DAG.getConstant(B.Mask, dl, VT)); + Cmp = DAG.getSetCC(dl, TLI.getSetCCResultType(*DAG.getContext(), VT), AndOp, + DAG.getConstant(0, dl, VT), ISD::SETNE); } // The branch weight from SwitchBB to B.TargetBB is B.ExtraWeight. @@ -1987,19 +1919,13 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, // The branch weight from SwitchBB to NextMBB is BranchWeightToNext. addSuccessorWithWeight(SwitchBB, NextMBB, BranchWeightToNext); - SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurSDLoc(), + SDValue BrAnd = DAG.getNode(ISD::BRCOND, dl, MVT::Other, getControlRoot(), Cmp, DAG.getBasicBlock(B.TargetBB)); - // Set NextBlock to be the MBB immediately after the current one, if any. - // This is used to avoid emitting unnecessary branches to the next block. - MachineBasicBlock *NextBlock = nullptr; - MachineFunction::iterator BBI = SwitchBB; - if (++BBI != FuncInfo.MF->end()) - NextBlock = BBI; - - if (NextMBB != NextBlock) - BrAnd = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, BrAnd, + // Avoid emitting unnecessary branches to the next block. + if (NextMBB != NextBlock(SwitchBB)) + BrAnd = DAG.getNode(ISD::BR, dl, MVT::Other, BrAnd, DAG.getBasicBlock(NextMBB)); DAG.setRoot(BrAnd); @@ -2027,13 +1953,20 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { case Intrinsic::experimental_patchpoint_i64: visitPatchpoint(&I, LandingPad); break; + case Intrinsic::experimental_gc_statepoint: + LowerStatepoint(ImmutableStatepoint(&I), LandingPad); + break; } } else LowerCallTo(&I, getValue(Callee), false, LandingPad); // If the value of the invoke is used outside of its defining block, make it // available as a virtual register. - CopyToExportRegsIfNeeded(&I); + // We already took care of the exported value for the statepoint instruction + // during call to the LowerStatepoint. + if (!isStatepoint(I)) { + CopyToExportRegsIfNeeded(&I); + } // Update successor info addSuccessorWithWeight(InvokeMBB, Return); @@ -2065,622 +1998,86 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) { return; SmallVector<EVT, 2> ValueVTs; + SDLoc dl = getCurSDLoc(); ComputeValueVTs(TLI, LP.getType(), ValueVTs); assert(ValueVTs.size() == 2 && "Only two-valued landingpads are supported"); // Get the two live-in registers as SDValues. The physregs have already been // copied into virtual registers. SDValue Ops[2]; - Ops[0] = DAG.getZExtOrTrunc( - DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), - FuncInfo.ExceptionPointerVirtReg, TLI.getPointerTy()), - getCurSDLoc(), ValueVTs[0]); + if (FuncInfo.ExceptionPointerVirtReg) { + Ops[0] = DAG.getZExtOrTrunc( + DAG.getCopyFromReg(DAG.getEntryNode(), dl, + FuncInfo.ExceptionPointerVirtReg, TLI.getPointerTy()), + dl, ValueVTs[0]); + } else { + Ops[0] = DAG.getConstant(0, dl, TLI.getPointerTy()); + } Ops[1] = DAG.getZExtOrTrunc( - DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), + DAG.getCopyFromReg(DAG.getEntryNode(), dl, FuncInfo.ExceptionSelectorVirtReg, TLI.getPointerTy()), - getCurSDLoc(), ValueVTs[1]); + dl, ValueVTs[1]); // Merge into one. - SDValue Res = DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), + SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(ValueVTs), Ops); setValue(&LP, Res); } -/// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for -/// small case ranges). -bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, - CaseRecVector& WorkList, - const Value* SV, - MachineBasicBlock *Default, - MachineBasicBlock *SwitchBB) { - // Size is the number of Cases represented by this range. - size_t Size = CR.Range.second - CR.Range.first; - if (Size > 3) - return false; - - // Get the MachineFunction which holds the current MBB. This is used when - // inserting any additional MBBs necessary to represent the switch. - MachineFunction *CurMF = FuncInfo.MF; - - // Figure out which block is immediately after the current one. - MachineBasicBlock *NextBlock = nullptr; - MachineFunction::iterator BBI = CR.CaseBB; - - if (++BBI != FuncInfo.MF->end()) - NextBlock = BBI; - - BranchProbabilityInfo *BPI = FuncInfo.BPI; - // If any two of the cases has the same destination, and if one value - // is the same as the other, but has one bit unset that the other has set, - // use bit manipulation to do two compares at once. For example: - // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)" - // TODO: This could be extended to merge any 2 cases in switches with 3 cases. - // TODO: Handle cases where CR.CaseBB != SwitchBB. - if (Size == 2 && CR.CaseBB == SwitchBB) { - Case &Small = *CR.Range.first; - Case &Big = *(CR.Range.second-1); - - if (Small.Low == Small.High && Big.Low == Big.High && Small.BB == Big.BB) { - const APInt& SmallValue = cast<ConstantInt>(Small.Low)->getValue(); - const APInt& BigValue = cast<ConstantInt>(Big.Low)->getValue(); - - // Check that there is only one bit different. - if (BigValue.countPopulation() == SmallValue.countPopulation() + 1 && - (SmallValue | BigValue) == BigValue) { - // Isolate the common bit. - APInt CommonBit = BigValue & ~SmallValue; - assert((SmallValue | CommonBit) == BigValue && - CommonBit.countPopulation() == 1 && "Not a common bit?"); - - SDValue CondLHS = getValue(SV); - EVT VT = CondLHS.getValueType(); - SDLoc DL = getCurSDLoc(); - - SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS, - DAG.getConstant(CommonBit, VT)); - SDValue Cond = DAG.getSetCC(DL, MVT::i1, - Or, DAG.getConstant(BigValue, VT), - ISD::SETEQ); - - // Update successor info. - // Both Small and Big will jump to Small.BB, so we sum up the weights. - addSuccessorWithWeight(SwitchBB, Small.BB, - Small.ExtraWeight + Big.ExtraWeight); - addSuccessorWithWeight(SwitchBB, Default, - // The default destination is the first successor in IR. - BPI ? BPI->getEdgeWeight(SwitchBB->getBasicBlock(), (unsigned)0) : 0); - - // Insert the true branch. - SDValue BrCond = DAG.getNode(ISD::BRCOND, DL, MVT::Other, - getControlRoot(), Cond, - DAG.getBasicBlock(Small.BB)); - - // Insert the false branch. - BrCond = DAG.getNode(ISD::BR, DL, MVT::Other, BrCond, - DAG.getBasicBlock(Default)); - - DAG.setRoot(BrCond); - return true; - } - } - } - - // Order cases by weight so the most likely case will be checked first. - uint32_t UnhandledWeights = 0; - if (BPI) { - for (CaseItr I = CR.Range.first, IE = CR.Range.second; I != IE; ++I) { - uint32_t IWeight = I->ExtraWeight; - UnhandledWeights += IWeight; - for (CaseItr J = CR.Range.first; J < I; ++J) { - uint32_t JWeight = J->ExtraWeight; - if (IWeight > JWeight) - std::swap(*I, *J); - } - } - } - // Rearrange the case blocks so that the last one falls through if possible. - Case &BackCase = *(CR.Range.second-1); - if (Size > 1 && - NextBlock && Default != NextBlock && BackCase.BB != NextBlock) { - // The last case block won't fall through into 'NextBlock' if we emit the - // branches in this order. See if rearranging a case value would help. - // We start at the bottom as it's the case with the least weight. - for (Case *I = &*(CR.Range.second-2), *E = &*CR.Range.first-1; I != E; --I) - if (I->BB == NextBlock) { - std::swap(*I, BackCase); - break; - } - } - - // Create a CaseBlock record representing a conditional branch to - // the Case's target mbb if the value being switched on SV is equal - // to C. - MachineBasicBlock *CurBlock = CR.CaseBB; - for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) { - MachineBasicBlock *FallThrough; - if (I != E-1) { - FallThrough = CurMF->CreateMachineBasicBlock(CurBlock->getBasicBlock()); - CurMF->insert(BBI, FallThrough); - - // Put SV in a virtual register to make it available from the new blocks. - ExportFromCurrentBlock(SV); - } else { - // If the last case doesn't match, go to the default block. - FallThrough = Default; - } - - const Value *RHS, *LHS, *MHS; - ISD::CondCode CC; - if (I->High == I->Low) { - // This is just small small case range :) containing exactly 1 case - CC = ISD::SETEQ; - LHS = SV; RHS = I->High; MHS = nullptr; - } else { - CC = ISD::SETLE; - LHS = I->Low; MHS = SV; RHS = I->High; - } - - // The false weight should be sum of all un-handled cases. - UnhandledWeights -= I->ExtraWeight; - CaseBlock CB(CC, LHS, RHS, MHS, /* truebb */ I->BB, /* falsebb */ FallThrough, - /* me */ CurBlock, - /* trueweight */ I->ExtraWeight, - /* falseweight */ UnhandledWeights); - - // If emitting the first comparison, just call visitSwitchCase to emit the - // code into the current block. Otherwise, push the CaseBlock onto the - // vector to be later processed by SDISel, and insert the node's MBB - // before the next MBB. - if (CurBlock == SwitchBB) - visitSwitchCase(CB, SwitchBB); - else - SwitchCases.push_back(CB); - - CurBlock = FallThrough; - } - - return true; -} - -static inline bool areJTsAllowed(const TargetLowering &TLI) { - return TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || - TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other); -} - -static APInt ComputeRange(const APInt &First, const APInt &Last) { - uint32_t BitWidth = std::max(Last.getBitWidth(), First.getBitWidth()) + 1; - APInt LastExt = Last.sext(BitWidth), FirstExt = First.sext(BitWidth); - return (LastExt - FirstExt + 1ULL); -} - -/// handleJTSwitchCase - Emit jumptable for current switch case range -bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR, - CaseRecVector &WorkList, - const Value *SV, - MachineBasicBlock *Default, - MachineBasicBlock *SwitchBB) { - Case& FrontCase = *CR.Range.first; - Case& BackCase = *(CR.Range.second-1); - - const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue(); - const APInt &Last = cast<ConstantInt>(BackCase.High)->getValue(); - - APInt TSize(First.getBitWidth(), 0); - for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) - TSize += I->size(); +unsigned +SelectionDAGBuilder::visitLandingPadClauseBB(GlobalValue *ClauseGV, + MachineBasicBlock *LPadBB) { + SDValue Chain = getControlRoot(); + SDLoc dl = getCurSDLoc(); + // Get the typeid that we will dispatch on later. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (!areJTsAllowed(TLI) || TSize.ult(TLI.getMinimumJumpTableEntries())) - return false; - - APInt Range = ComputeRange(First, Last); - // The density is TSize / Range. Require at least 40%. - // It should not be possible for IntTSize to saturate for sane code, but make - // sure we handle Range saturation correctly. - uint64_t IntRange = Range.getLimitedValue(UINT64_MAX/10); - uint64_t IntTSize = TSize.getLimitedValue(UINT64_MAX/10); - if (IntTSize * 10 < IntRange * 4) - return false; - - DEBUG(dbgs() << "Lowering jump table\n" - << "First entry: " << First << ". Last entry: " << Last << '\n' - << "Range: " << Range << ". Size: " << TSize << ".\n\n"); - - // Get the MachineFunction which holds the current MBB. This is used when - // inserting any additional MBBs necessary to represent the switch. - MachineFunction *CurMF = FuncInfo.MF; - - // Figure out which block is immediately after the current one. - MachineFunction::iterator BBI = CR.CaseBB; - ++BBI; + const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy()); + unsigned VReg = FuncInfo.MF->getRegInfo().createVirtualRegister(RC); + unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(ClauseGV); + SDValue Sel = DAG.getConstant(TypeID, dl, TLI.getPointerTy()); + Chain = DAG.getCopyToReg(Chain, dl, VReg, Sel); + + // Branch to the main landing pad block. + MachineBasicBlock *ClauseMBB = FuncInfo.MBB; + ClauseMBB->addSuccessor(LPadBB); + DAG.setRoot(DAG.getNode(ISD::BR, dl, MVT::Other, Chain, + DAG.getBasicBlock(LPadBB))); + return VReg; +} + +void SelectionDAGBuilder::sortAndRangeify(CaseClusterVector &Clusters) { +#ifndef NDEBUG + for (const CaseCluster &CC : Clusters) + assert(CC.Low == CC.High && "Input clusters must be single-case"); +#endif - const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock(); - - // Create a new basic block to hold the code for loading the address - // of the jump table, and jumping to it. Update successor information; - // we will either branch to the default case for the switch, or the jump - // table. - MachineBasicBlock *JumpTableBB = CurMF->CreateMachineBasicBlock(LLVMBB); - CurMF->insert(BBI, JumpTableBB); - - addSuccessorWithWeight(CR.CaseBB, Default); - addSuccessorWithWeight(CR.CaseBB, JumpTableBB); - - // Build a vector of destination BBs, corresponding to each target - // of the jump table. If the value of the jump table slot corresponds to - // a case statement, push the case's BB onto the vector, otherwise, push - // the default BB. - std::vector<MachineBasicBlock*> DestBBs; - APInt TEI = First; - for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++TEI) { - const APInt &Low = cast<ConstantInt>(I->Low)->getValue(); - const APInt &High = cast<ConstantInt>(I->High)->getValue(); - - if (Low.sle(TEI) && TEI.sle(High)) { - DestBBs.push_back(I->BB); - if (TEI==High) - ++I; + std::sort(Clusters.begin(), Clusters.end(), + [](const CaseCluster &a, const CaseCluster &b) { + return a.Low->getValue().slt(b.Low->getValue()); + }); + + // Merge adjacent clusters with the same destination. + const unsigned N = Clusters.size(); + unsigned DstIndex = 0; + for (unsigned SrcIndex = 0; SrcIndex < N; ++SrcIndex) { + CaseCluster &CC = Clusters[SrcIndex]; + const ConstantInt *CaseVal = CC.Low; + MachineBasicBlock *Succ = CC.MBB; + + if (DstIndex != 0 && Clusters[DstIndex - 1].MBB == Succ && + (CaseVal->getValue() - Clusters[DstIndex - 1].High->getValue()) == 1) { + // If this case has the same successor and is a neighbour, merge it into + // the previous cluster. + Clusters[DstIndex - 1].High = CaseVal; + Clusters[DstIndex - 1].Weight += CC.Weight; + assert(Clusters[DstIndex - 1].Weight >= CC.Weight && "Weight overflow!"); } else { - DestBBs.push_back(Default); + std::memmove(&Clusters[DstIndex++], &Clusters[SrcIndex], + sizeof(Clusters[SrcIndex])); } } - - // Calculate weight for each unique destination in CR. - DenseMap<MachineBasicBlock*, uint32_t> DestWeights; - if (FuncInfo.BPI) - for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) { - DenseMap<MachineBasicBlock*, uint32_t>::iterator Itr = - DestWeights.find(I->BB); - if (Itr != DestWeights.end()) - Itr->second += I->ExtraWeight; - else - DestWeights[I->BB] = I->ExtraWeight; - } - - // Update successor info. Add one edge to each unique successor. - BitVector SuccsHandled(CR.CaseBB->getParent()->getNumBlockIDs()); - for (std::vector<MachineBasicBlock*>::iterator I = DestBBs.begin(), - E = DestBBs.end(); I != E; ++I) { - if (!SuccsHandled[(*I)->getNumber()]) { - SuccsHandled[(*I)->getNumber()] = true; - DenseMap<MachineBasicBlock*, uint32_t>::iterator Itr = - DestWeights.find(*I); - addSuccessorWithWeight(JumpTableBB, *I, - Itr != DestWeights.end() ? Itr->second : 0); - } - } - - // Create a jump table index for this jump table. - unsigned JTEncoding = TLI.getJumpTableEncoding(); - unsigned JTI = CurMF->getOrCreateJumpTableInfo(JTEncoding) - ->createJumpTableIndex(DestBBs); - - // Set the jump table information so that we can codegen it as a second - // MachineBasicBlock - JumpTable JT(-1U, JTI, JumpTableBB, Default); - JumpTableHeader JTH(First, Last, SV, CR.CaseBB, (CR.CaseBB == SwitchBB)); - if (CR.CaseBB == SwitchBB) - visitJumpTableHeader(JT, JTH, SwitchBB); - - JTCases.push_back(JumpTableBlock(JTH, JT)); - return true; -} - -/// handleBTSplitSwitchCase - emit comparison and split binary search tree into -/// 2 subtrees. -bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, - CaseRecVector& WorkList, - const Value* SV, - MachineBasicBlock* SwitchBB) { - // Get the MachineFunction which holds the current MBB. This is used when - // inserting any additional MBBs necessary to represent the switch. - MachineFunction *CurMF = FuncInfo.MF; - - // Figure out which block is immediately after the current one. - MachineFunction::iterator BBI = CR.CaseBB; - ++BBI; - - Case& FrontCase = *CR.Range.first; - Case& BackCase = *(CR.Range.second-1); - const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock(); - - // Size is the number of Cases represented by this range. - unsigned Size = CR.Range.second - CR.Range.first; - - const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue(); - const APInt &Last = cast<ConstantInt>(BackCase.High)->getValue(); - double FMetric = 0; - CaseItr Pivot = CR.Range.first + Size/2; - - // Select optimal pivot, maximizing sum density of LHS and RHS. This will - // (heuristically) allow us to emit JumpTable's later. - APInt TSize(First.getBitWidth(), 0); - for (CaseItr I = CR.Range.first, E = CR.Range.second; - I!=E; ++I) - TSize += I->size(); - - APInt LSize = FrontCase.size(); - APInt RSize = TSize-LSize; - DEBUG(dbgs() << "Selecting best pivot: \n" - << "First: " << First << ", Last: " << Last <<'\n' - << "LSize: " << LSize << ", RSize: " << RSize << '\n'); - for (CaseItr I = CR.Range.first, J=I+1, E = CR.Range.second; - J!=E; ++I, ++J) { - const APInt &LEnd = cast<ConstantInt>(I->High)->getValue(); - const APInt &RBegin = cast<ConstantInt>(J->Low)->getValue(); - APInt Range = ComputeRange(LEnd, RBegin); - assert((Range - 2ULL).isNonNegative() && - "Invalid case distance"); - // Use volatile double here to avoid excess precision issues on some hosts, - // e.g. that use 80-bit X87 registers. - volatile double LDensity = - (double)LSize.roundToDouble() / - (LEnd - First + 1ULL).roundToDouble(); - volatile double RDensity = - (double)RSize.roundToDouble() / - (Last - RBegin + 1ULL).roundToDouble(); - volatile double Metric = Range.logBase2()*(LDensity+RDensity); - // Should always split in some non-trivial place - DEBUG(dbgs() <<"=>Step\n" - << "LEnd: " << LEnd << ", RBegin: " << RBegin << '\n' - << "LDensity: " << LDensity - << ", RDensity: " << RDensity << '\n' - << "Metric: " << Metric << '\n'); - if (FMetric < Metric) { - Pivot = J; - FMetric = Metric; - DEBUG(dbgs() << "Current metric set to: " << FMetric << '\n'); - } - - LSize += J->size(); - RSize -= J->size(); - } - - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (areJTsAllowed(TLI)) { - // If our case is dense we *really* should handle it earlier! - assert((FMetric > 0) && "Should handle dense range earlier!"); - } else { - Pivot = CR.Range.first + Size/2; - } - - CaseRange LHSR(CR.Range.first, Pivot); - CaseRange RHSR(Pivot, CR.Range.second); - const Constant *C = Pivot->Low; - MachineBasicBlock *FalseBB = nullptr, *TrueBB = nullptr; - - // We know that we branch to the LHS if the Value being switched on is - // less than the Pivot value, C. We use this to optimize our binary - // tree a bit, by recognizing that if SV is greater than or equal to the - // LHS's Case Value, and that Case Value is exactly one less than the - // Pivot's Value, then we can branch directly to the LHS's Target, - // rather than creating a leaf node for it. - if ((LHSR.second - LHSR.first) == 1 && - LHSR.first->High == CR.GE && - cast<ConstantInt>(C)->getValue() == - (cast<ConstantInt>(CR.GE)->getValue() + 1LL)) { - TrueBB = LHSR.first->BB; - } else { - TrueBB = CurMF->CreateMachineBasicBlock(LLVMBB); - CurMF->insert(BBI, TrueBB); - WorkList.push_back(CaseRec(TrueBB, C, CR.GE, LHSR)); - - // Put SV in a virtual register to make it available from the new blocks. - ExportFromCurrentBlock(SV); - } - - // Similar to the optimization above, if the Value being switched on is - // known to be less than the Constant CR.LT, and the current Case Value - // is CR.LT - 1, then we can branch directly to the target block for - // the current Case Value, rather than emitting a RHS leaf node for it. - if ((RHSR.second - RHSR.first) == 1 && CR.LT && - cast<ConstantInt>(RHSR.first->Low)->getValue() == - (cast<ConstantInt>(CR.LT)->getValue() - 1LL)) { - FalseBB = RHSR.first->BB; - } else { - FalseBB = CurMF->CreateMachineBasicBlock(LLVMBB); - CurMF->insert(BBI, FalseBB); - WorkList.push_back(CaseRec(FalseBB,CR.LT,C,RHSR)); - - // Put SV in a virtual register to make it available from the new blocks. - ExportFromCurrentBlock(SV); - } - - // Create a CaseBlock record representing a conditional branch to - // the LHS node if the value being switched on SV is less than C. - // Otherwise, branch to LHS. - CaseBlock CB(ISD::SETLT, SV, C, nullptr, TrueBB, FalseBB, CR.CaseBB); - - if (CR.CaseBB == SwitchBB) - visitSwitchCase(CB, SwitchBB); - else - SwitchCases.push_back(CB); - - return true; -} - -/// handleBitTestsSwitchCase - if current case range has few destination and -/// range span less, than machine word bitwidth, encode case range into series -/// of masks and emit bit tests with these masks. -bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, - CaseRecVector& WorkList, - const Value* SV, - MachineBasicBlock* Default, - MachineBasicBlock* SwitchBB) { - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - EVT PTy = TLI.getPointerTy(); - unsigned IntPtrBits = PTy.getSizeInBits(); - - Case& FrontCase = *CR.Range.first; - Case& BackCase = *(CR.Range.second-1); - - // Get the MachineFunction which holds the current MBB. This is used when - // inserting any additional MBBs necessary to represent the switch. - MachineFunction *CurMF = FuncInfo.MF; - - // If target does not have legal shift left, do not emit bit tests at all. - if (!TLI.isOperationLegal(ISD::SHL, PTy)) - return false; - - size_t numCmps = 0; - for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) { - // Single case counts one, case range - two. - numCmps += (I->Low == I->High ? 1 : 2); - } - - // Count unique destinations - SmallSet<MachineBasicBlock*, 4> Dests; - for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) { - Dests.insert(I->BB); - if (Dests.size() > 3) - // Don't bother the code below, if there are too much unique destinations - return false; - } - DEBUG(dbgs() << "Total number of unique destinations: " - << Dests.size() << '\n' - << "Total number of comparisons: " << numCmps << '\n'); - - // Compute span of values. - const APInt& minValue = cast<ConstantInt>(FrontCase.Low)->getValue(); - const APInt& maxValue = cast<ConstantInt>(BackCase.High)->getValue(); - APInt cmpRange = maxValue - minValue; - - DEBUG(dbgs() << "Compare range: " << cmpRange << '\n' - << "Low bound: " << minValue << '\n' - << "High bound: " << maxValue << '\n'); - - if (cmpRange.uge(IntPtrBits) || - (!(Dests.size() == 1 && numCmps >= 3) && - !(Dests.size() == 2 && numCmps >= 5) && - !(Dests.size() >= 3 && numCmps >= 6))) - return false; - - DEBUG(dbgs() << "Emitting bit tests\n"); - APInt lowBound = APInt::getNullValue(cmpRange.getBitWidth()); - - // Optimize the case where all the case values fit in a - // word without having to subtract minValue. In this case, - // we can optimize away the subtraction. - if (minValue.isNonNegative() && maxValue.slt(IntPtrBits)) { - cmpRange = maxValue; - } else { - lowBound = minValue; - } - - CaseBitsVector CasesBits; - unsigned i, count = 0; - - for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) { - MachineBasicBlock* Dest = I->BB; - for (i = 0; i < count; ++i) - if (Dest == CasesBits[i].BB) - break; - - if (i == count) { - assert((count < 3) && "Too much destinations to test!"); - CasesBits.push_back(CaseBits(0, Dest, 0, 0/*Weight*/)); - count++; - } - - const APInt& lowValue = cast<ConstantInt>(I->Low)->getValue(); - const APInt& highValue = cast<ConstantInt>(I->High)->getValue(); - - uint64_t lo = (lowValue - lowBound).getZExtValue(); - uint64_t hi = (highValue - lowBound).getZExtValue(); - CasesBits[i].ExtraWeight += I->ExtraWeight; - - for (uint64_t j = lo; j <= hi; j++) { - CasesBits[i].Mask |= 1ULL << j; - CasesBits[i].Bits++; - } - - } - std::sort(CasesBits.begin(), CasesBits.end(), CaseBitsCmp()); - - BitTestInfo BTC; - - // Figure out which block is immediately after the current one. - MachineFunction::iterator BBI = CR.CaseBB; - ++BBI; - - const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock(); - - DEBUG(dbgs() << "Cases:\n"); - for (unsigned i = 0, e = CasesBits.size(); i!=e; ++i) { - DEBUG(dbgs() << "Mask: " << CasesBits[i].Mask - << ", Bits: " << CasesBits[i].Bits - << ", BB: " << CasesBits[i].BB << '\n'); - - MachineBasicBlock *CaseBB = CurMF->CreateMachineBasicBlock(LLVMBB); - CurMF->insert(BBI, CaseBB); - BTC.push_back(BitTestCase(CasesBits[i].Mask, - CaseBB, - CasesBits[i].BB, CasesBits[i].ExtraWeight)); - - // Put SV in a virtual register to make it available from the new blocks. - ExportFromCurrentBlock(SV); - } - - BitTestBlock BTB(lowBound, cmpRange, SV, - -1U, MVT::Other, (CR.CaseBB == SwitchBB), - CR.CaseBB, Default, std::move(BTC)); - - if (CR.CaseBB == SwitchBB) - visitBitTestHeader(BTB, SwitchBB); - - BitTestCases.push_back(std::move(BTB)); - - return true; -} - -/// Clusterify - Transform simple list of Cases into list of CaseRange's -void SelectionDAGBuilder::Clusterify(CaseVector& Cases, - const SwitchInst& SI) { - BranchProbabilityInfo *BPI = FuncInfo.BPI; - // Start with "simple" cases. - for (SwitchInst::ConstCaseIt i : SI.cases()) { - const BasicBlock *SuccBB = i.getCaseSuccessor(); - MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB]; - - uint32_t ExtraWeight = - BPI ? BPI->getEdgeWeight(SI.getParent(), i.getSuccessorIndex()) : 0; - - Cases.push_back(Case(i.getCaseValue(), i.getCaseValue(), - SMBB, ExtraWeight)); - } - std::sort(Cases.begin(), Cases.end(), CaseCmp()); - - // Merge case into clusters - if (Cases.size() >= 2) - // Must recompute end() each iteration because it may be - // invalidated by erase if we hold on to it - for (CaseItr I = Cases.begin(), J = std::next(Cases.begin()); - J != Cases.end(); ) { - const APInt& nextValue = cast<ConstantInt>(J->Low)->getValue(); - const APInt& currentValue = cast<ConstantInt>(I->High)->getValue(); - MachineBasicBlock* nextBB = J->BB; - MachineBasicBlock* currentBB = I->BB; - - // If the two neighboring cases go to the same destination, merge them - // into a single case. - if ((nextValue - currentValue == 1) && (currentBB == nextBB)) { - I->High = J->High; - I->ExtraWeight += J->ExtraWeight; - J = Cases.erase(J); - } else { - I = J++; - } - } - - DEBUG({ - size_t numCmps = 0; - for (auto &I : Cases) - // A range counts double, since it requires two compares. - numCmps += I.Low != I.High ? 2 : 1; - - dbgs() << "Clusterify finished. Total clusters: " << Cases.size() - << ". Total compares: " << numCmps << '\n'; - }); + Clusters.resize(DstIndex); } void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First, @@ -2696,96 +2093,6 @@ void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First, BitTestCases[i].Parent = Last; } -void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { - MachineBasicBlock *SwitchMBB = FuncInfo.MBB; - - // Figure out which block is immediately after the current one. - MachineBasicBlock *NextBlock = nullptr; - if (SwitchMBB + 1 != FuncInfo.MF->end()) - NextBlock = SwitchMBB + 1; - - - // Create a vector of Cases, sorted so that we can efficiently create a binary - // search tree from them. - CaseVector Cases; - Clusterify(Cases, SI); - - // Get the default destination MBB. - MachineBasicBlock *Default = FuncInfo.MBBMap[SI.getDefaultDest()]; - - if (isa<UnreachableInst>(SI.getDefaultDest()->getFirstNonPHIOrDbg()) && - !Cases.empty()) { - // Replace an unreachable default destination with the most popular case - // destination. - DenseMap<const BasicBlock *, unsigned> Popularity; - unsigned MaxPop = 0; - const BasicBlock *MaxBB = nullptr; - for (auto I : SI.cases()) { - const BasicBlock *BB = I.getCaseSuccessor(); - if (++Popularity[BB] > MaxPop) { - MaxPop = Popularity[BB]; - MaxBB = BB; - } - } - - // Set new default. - assert(MaxPop > 0); - assert(MaxBB); - Default = FuncInfo.MBBMap[MaxBB]; - - // Remove cases that were pointing to the destination that is now the default. - Cases.erase(std::remove_if(Cases.begin(), Cases.end(), - [&](const Case &C) { return C.BB == Default; }), - Cases.end()); - } - - // If there is only the default destination, go there directly. - if (Cases.empty()) { - // Update machine-CFG edges. - SwitchMBB->addSuccessor(Default); - - // If this is not a fall-through branch, emit the branch. - if (Default != NextBlock) { - DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, - getControlRoot(), DAG.getBasicBlock(Default))); - } - return; - } - - // Get the Value to be switched on. - const Value *SV = SI.getCondition(); - - // Push the initial CaseRec onto the worklist - CaseRecVector WorkList; - WorkList.push_back(CaseRec(SwitchMBB,nullptr,nullptr, - CaseRange(Cases.begin(),Cases.end()))); - - while (!WorkList.empty()) { - // Grab a record representing a case range to process off the worklist - CaseRec CR = WorkList.back(); - WorkList.pop_back(); - - if (handleBitTestsSwitchCase(CR, WorkList, SV, Default, SwitchMBB)) - continue; - - // If the range has few cases (two or less) emit a series of specific - // tests. - if (handleSmallSwitchRange(CR, WorkList, SV, Default, SwitchMBB)) - continue; - - // If the switch has more than N blocks, and is at least 40% dense, and the - // target supports indirect branches, then emit a jump table rather than - // lowering the switch to a binary tree of conditional branches. - // N defaults to 4 and is controlled via TLS.getMinimumJumpTableEntries(). - if (handleJTSwitchCase(CR, WorkList, SV, Default, SwitchMBB)) - continue; - - // Emit binary tree. We need to pick a pivot, and push left and right ranges - // onto the worklist. Leafs are handled via handleSmallSwitchRange() call. - handleBTSplitSwitchCase(CR, WorkList, SV, SwitchMBB); - } -} - void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) { MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB; @@ -2950,19 +2257,47 @@ void SelectionDAGBuilder::visitSelect(const User &I) { SmallVector<SDValue, 4> Values(NumValues); SDValue Cond = getValue(I.getOperand(0)); - SDValue TrueVal = getValue(I.getOperand(1)); - SDValue FalseVal = getValue(I.getOperand(2)); + SDValue LHSVal = getValue(I.getOperand(1)); + SDValue RHSVal = getValue(I.getOperand(2)); + auto BaseOps = {Cond}; ISD::NodeType OpCode = Cond.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT; - for (unsigned i = 0; i != NumValues; ++i) + // Min/max matching is only viable if all output VTs are the same. + if (std::equal(ValueVTs.begin(), ValueVTs.end(), ValueVTs.begin())) { + Value *LHS, *RHS; + SelectPatternFlavor SPF = matchSelectPattern(const_cast<User*>(&I), LHS, RHS); + ISD::NodeType Opc = ISD::DELETED_NODE; + switch (SPF) { + case SPF_UMAX: Opc = ISD::UMAX; break; + case SPF_UMIN: Opc = ISD::UMIN; break; + case SPF_SMAX: Opc = ISD::SMAX; break; + case SPF_SMIN: Opc = ISD::SMIN; break; + default: break; + } + + EVT VT = ValueVTs[0]; + LLVMContext &Ctx = *DAG.getContext(); + auto &TLI = DAG.getTargetLoweringInfo(); + while (TLI.getTypeAction(Ctx, VT) == TargetLoweringBase::TypeSplitVector) + VT = TLI.getTypeToTransformTo(Ctx, VT); + + if (Opc != ISD::DELETED_NODE && TLI.isOperationLegalOrCustom(Opc, VT)) { + OpCode = Opc; + LHSVal = getValue(LHS); + RHSVal = getValue(RHS); + BaseOps = {}; + } + } + + for (unsigned i = 0; i != NumValues; ++i) { + SmallVector<SDValue, 3> Ops(BaseOps.begin(), BaseOps.end()); + Ops.push_back(SDValue(LHSVal.getNode(), LHSVal.getResNo() + i)); + Ops.push_back(SDValue(RHSVal.getNode(), RHSVal.getResNo() + i)); Values[i] = DAG.getNode(OpCode, getCurSDLoc(), - TrueVal.getNode()->getValueType(TrueVal.getResNo()+i), - Cond, - SDValue(TrueVal.getNode(), - TrueVal.getResNo() + i), - SDValue(FalseVal.getNode(), - FalseVal.getResNo() + i)); + LHSVal.getNode()->getValueType(LHSVal.getResNo()+i), + Ops); + } setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), DAG.getVTList(ValueVTs), Values)); @@ -2994,10 +2329,11 @@ void SelectionDAGBuilder::visitSExt(const User &I) { void SelectionDAGBuilder::visitFPTrunc(const User &I) { // FPTrunc is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); + SDLoc dl = getCurSDLoc(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT DestVT = TLI.getValueType(I.getType()); - setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurSDLoc(), DestVT, N, - DAG.getTargetConstant(0, TLI.getPointerTy()))); + setValue(&I, DAG.getNode(ISD::FP_ROUND, dl, DestVT, N, + DAG.getTargetConstant(0, dl, TLI.getPointerTy()))); } void SelectionDAGBuilder::visitFPExt(const User &I) { @@ -3053,19 +2389,20 @@ void SelectionDAGBuilder::visitIntToPtr(const User &I) { void SelectionDAGBuilder::visitBitCast(const User &I) { SDValue N = getValue(I.getOperand(0)); + SDLoc dl = getCurSDLoc(); EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); // BitCast assures us that source and destination are the same size so this is // either a BITCAST or a no-op. if (DestVT != N.getValueType()) - setValue(&I, DAG.getNode(ISD::BITCAST, getCurSDLoc(), + setValue(&I, DAG.getNode(ISD::BITCAST, dl, DestVT, N)); // convert types. // Check if the original LLVM IR Operand was a ConstantInt, because getValue() // might fold any kind of constant expression to an integer constant and that // is not what we are looking for. Only regcognize a bitcast of a genuine // constant integer as an opaque constant. else if(ConstantInt *C = dyn_cast<ConstantInt>(I.getOperand(0))) - setValue(&I, DAG.getConstant(C->getValue(), DestVT, /*isTarget=*/false, + setValue(&I, DAG.getConstant(C->getValue(), dl, DestVT, /*isTarget=*/false, /*isOpaque*/true)); else setValue(&I, N); // noop cast. @@ -3243,10 +2580,12 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { SDValue &Src = Input == 0 ? Src1 : Src2; if (RangeUse[Input] == 0) Src = DAG.getUNDEF(VT); - else + else { + SDLoc dl = getCurSDLoc(); Src = DAG.getNode( - ISD::EXTRACT_SUBVECTOR, getCurSDLoc(), VT, Src, - DAG.getConstant(StartIdx[Input], TLI.getVectorIdxTy())); + ISD::EXTRACT_SUBVECTOR, dl, VT, Src, + DAG.getConstant(StartIdx[Input], dl, TLI.getVectorIdxTy())); + } } // Calculate new mask. @@ -3273,6 +2612,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { // to insert and build vector. EVT EltVT = VT.getVectorElementType(); EVT IdxVT = TLI.getVectorIdxTy(); + SDLoc dl = getCurSDLoc(); SmallVector<SDValue,8> Ops; for (unsigned i = 0; i != MaskNumElts; ++i) { int Idx = Mask[i]; @@ -3284,14 +2624,14 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2; if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts; - Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(), - EltVT, Src, DAG.getConstant(Idx, IdxVT)); + Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + EltVT, Src, DAG.getConstant(Idx, dl, IdxVT)); } Ops.push_back(Res); } - setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(), VT, Ops)); + setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops)); } void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { @@ -3383,6 +2723,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { Type *Ty = Op0->getType()->getScalarType(); unsigned AS = Ty->getPointerAddressSpace(); SDValue N = getValue(Op0); + SDLoc dl = getCurSDLoc(); for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end(); OI != E; ++OI) { @@ -3392,8 +2733,8 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { if (Field) { // N = N + Offset uint64_t Offset = DL->getStructLayout(StTy)->getElementOffset(Field); - N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N, - DAG.getConstant(Offset, N.getValueType())); + N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, + DAG.getConstant(Offset, dl, N.getValueType())); } Ty = StTy->getElementType(Field); @@ -3408,8 +2749,8 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { if (CI->isZero()) continue; APInt Offs = ElementSize * CI->getValue().sextOrTrunc(PtrSize); - SDValue OffsVal = DAG.getConstant(Offs, PtrTy); - N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N, OffsVal); + SDValue OffsVal = DAG.getConstant(Offs, dl, PtrTy); + N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal); continue; } @@ -3418,24 +2759,24 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { // If the index is smaller or larger than intptr_t, truncate or extend // it. - IdxN = DAG.getSExtOrTrunc(IdxN, getCurSDLoc(), N.getValueType()); + IdxN = DAG.getSExtOrTrunc(IdxN, dl, N.getValueType()); // If this is a multiply by a power of two, turn it into a shl // immediately. This is a very common case. if (ElementSize != 1) { if (ElementSize.isPowerOf2()) { unsigned Amt = ElementSize.logBase2(); - IdxN = DAG.getNode(ISD::SHL, getCurSDLoc(), + IdxN = DAG.getNode(ISD::SHL, dl, N.getValueType(), IdxN, - DAG.getConstant(Amt, IdxN.getValueType())); + DAG.getConstant(Amt, dl, IdxN.getValueType())); } else { - SDValue Scale = DAG.getConstant(ElementSize, IdxN.getValueType()); - IdxN = DAG.getNode(ISD::MUL, getCurSDLoc(), + SDValue Scale = DAG.getConstant(ElementSize, dl, IdxN.getValueType()); + IdxN = DAG.getNode(ISD::MUL, dl, N.getValueType(), IdxN, Scale); } } - N = DAG.getNode(ISD::ADD, getCurSDLoc(), + N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, IdxN); } } @@ -3449,6 +2790,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { if (FuncInfo.StaticAllocaMap.count(&I)) return; // getValue will auto-populate this. + SDLoc dl = getCurSDLoc(); Type *Ty = I.getAllocatedType(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty); @@ -3460,11 +2802,11 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { EVT IntPtr = TLI.getPointerTy(); if (AllocSize.getValueType() != IntPtr) - AllocSize = DAG.getZExtOrTrunc(AllocSize, getCurSDLoc(), IntPtr); + AllocSize = DAG.getZExtOrTrunc(AllocSize, dl, IntPtr); - AllocSize = DAG.getNode(ISD::MUL, getCurSDLoc(), IntPtr, + AllocSize = DAG.getNode(ISD::MUL, dl, IntPtr, AllocSize, - DAG.getConstant(TySize, IntPtr)); + DAG.getConstant(TySize, dl, IntPtr)); // Handle alignment. If the requested alignment is less than or equal to // the stack alignment, ignore it. If the size is greater than or equal to @@ -3476,18 +2818,19 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { // Round the size of the allocation up to the stack alignment size // by add SA-1 to the size. - AllocSize = DAG.getNode(ISD::ADD, getCurSDLoc(), + AllocSize = DAG.getNode(ISD::ADD, dl, AllocSize.getValueType(), AllocSize, - DAG.getIntPtrConstant(StackAlign-1)); + DAG.getIntPtrConstant(StackAlign - 1, dl)); // Mask out the low bits for alignment purposes. - AllocSize = DAG.getNode(ISD::AND, getCurSDLoc(), + AllocSize = DAG.getNode(ISD::AND, dl, AllocSize.getValueType(), AllocSize, - DAG.getIntPtrConstant(~(uint64_t)(StackAlign-1))); + DAG.getIntPtrConstant(~(uint64_t)(StackAlign - 1), + dl)); - SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align) }; + SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align, dl) }; SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other); - SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, getCurSDLoc(), VTs, Ops); + SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, dl, VTs, Ops); setValue(&I, DSA); DAG.setRoot(DSA.getValue(1)); @@ -3535,8 +2878,10 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { Root = DAG.getRoot(); } + SDLoc dl = getCurSDLoc(); + if (isVolatile) - Root = TLI.prepareVolatileOrAtomicLoad(Root, getCurSDLoc(), DAG); + Root = TLI.prepareVolatileOrAtomicLoad(Root, dl, DAG); SmallVector<SDValue, 4> Values(NumValues); SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains), @@ -3552,15 +2897,15 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { // (MaxParallelChains should always remain as failsafe). if (ChainI == MaxParallelChains) { assert(PendingLoads.empty() && "PendingLoads must be serialized first"); - SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, + SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, makeArrayRef(Chains.data(), ChainI)); Root = Chain; ChainI = 0; } - SDValue A = DAG.getNode(ISD::ADD, getCurSDLoc(), + SDValue A = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, - DAG.getConstant(Offsets[i], PtrVT)); - SDValue L = DAG.getLoad(ValueVTs[i], getCurSDLoc(), Root, + DAG.getConstant(Offsets[i], dl, PtrVT)); + SDValue L = DAG.getLoad(ValueVTs[i], dl, Root, A, MachinePointerInfo(SV, Offsets[i]), isVolatile, isNonTemporal, isInvariant, Alignment, AAInfo, Ranges); @@ -3570,7 +2915,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { } if (!ConstantMemory) { - SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, + SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, makeArrayRef(Chains.data(), ChainI)); if (isVolatile) DAG.setRoot(Chain); @@ -3578,7 +2923,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { PendingLoads.push_back(Chain); } - setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), + setValue(&I, DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(ValueVTs), Values)); } @@ -3610,6 +2955,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { bool isVolatile = I.isVolatile(); bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr; unsigned Alignment = I.getAlignment(); + SDLoc dl = getCurSDLoc(); AAMDNodes AAInfo; I.getAAMetadata(AAInfo); @@ -3618,21 +2964,21 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { // See visitLoad comments. if (ChainI == MaxParallelChains) { - SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, + SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, makeArrayRef(Chains.data(), ChainI)); Root = Chain; ChainI = 0; } - SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(), PtrVT, Ptr, - DAG.getConstant(Offsets[i], PtrVT)); - SDValue St = DAG.getStore(Root, getCurSDLoc(), + SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, + DAG.getConstant(Offsets[i], dl, PtrVT)); + SDValue St = DAG.getStore(Root, dl, SDValue(Src.getNode(), Src.getResNo() + i), Add, MachinePointerInfo(PtrV, Offsets[i]), isVolatile, isNonTemporal, Alignment, AAInfo); Chains[ChainI] = St; } - SDValue StoreNode = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, + SDValue StoreNode = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, makeArrayRef(Chains.data(), ChainI)); DAG.setRoot(StoreNode); } @@ -3664,6 +3010,94 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I) { setValue(&I, StoreNode); } +// Gather/scatter receive a vector of pointers. +// This vector of pointers may be represented as a base pointer + vector of +// indices, it depends on GEP and instruction preceeding GEP +// that calculates indices +static bool getUniformBase(Value *& Ptr, SDValue& Base, SDValue& Index, + SelectionDAGBuilder* SDB) { + + assert (Ptr->getType()->isVectorTy() && "Uexpected pointer type"); + GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr); + if (!Gep || Gep->getNumOperands() > 2) + return false; + ShuffleVectorInst *ShuffleInst = + dyn_cast<ShuffleVectorInst>(Gep->getPointerOperand()); + if (!ShuffleInst || !ShuffleInst->getMask()->isNullValue() || + cast<Instruction>(ShuffleInst->getOperand(0))->getOpcode() != + Instruction::InsertElement) + return false; + + Ptr = cast<InsertElementInst>(ShuffleInst->getOperand(0))->getOperand(1); + + SelectionDAG& DAG = SDB->DAG; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + // Check is the Ptr is inside current basic block + // If not, look for the shuffle instruction + if (SDB->findValue(Ptr)) + Base = SDB->getValue(Ptr); + else if (SDB->findValue(ShuffleInst)) { + SDValue ShuffleNode = SDB->getValue(ShuffleInst); + SDLoc sdl = ShuffleNode; + Base = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, sdl, + ShuffleNode.getValueType().getScalarType(), ShuffleNode, + DAG.getConstant(0, sdl, TLI.getVectorIdxTy())); + SDB->setValue(Ptr, Base); + } + else + return false; + + Value *IndexVal = Gep->getOperand(1); + if (SDB->findValue(IndexVal)) { + Index = SDB->getValue(IndexVal); + + if (SExtInst* Sext = dyn_cast<SExtInst>(IndexVal)) { + IndexVal = Sext->getOperand(0); + if (SDB->findValue(IndexVal)) + Index = SDB->getValue(IndexVal); + } + return true; + } + return false; +} + +void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { + SDLoc sdl = getCurSDLoc(); + + // llvm.masked.scatter.*(Src0, Ptrs, alignemt, Mask) + Value *Ptr = I.getArgOperand(1); + SDValue Src0 = getValue(I.getArgOperand(0)); + SDValue Mask = getValue(I.getArgOperand(3)); + EVT VT = Src0.getValueType(); + unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(2)))->getZExtValue(); + if (!Alignment) + Alignment = DAG.getEVTAlignment(VT); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + AAMDNodes AAInfo; + I.getAAMetadata(AAInfo); + + SDValue Base; + SDValue Index; + Value *BasePtr = Ptr; + bool UniformBase = getUniformBase(BasePtr, Base, Index, this); + + Value *MemOpBasePtr = UniformBase ? BasePtr : nullptr; + MachineMemOperand *MMO = DAG.getMachineFunction(). + getMachineMemOperand(MachinePointerInfo(MemOpBasePtr), + MachineMemOperand::MOStore, VT.getStoreSize(), + Alignment, AAInfo); + if (!UniformBase) { + Base = DAG.getTargetConstant(0, sdl, TLI.getPointerTy()); + Index = getValue(Ptr); + } + SDValue Ops[] = { getRoot(), Src0, Mask, Base, Index }; + SDValue Scatter = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), VT, sdl, + Ops, MMO); + DAG.setRoot(Scatter); + setValue(&I, Scatter); +} + void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) { SDLoc sdl = getCurSDLoc(); @@ -3705,6 +3139,59 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) { setValue(&I, Load); } +void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { + SDLoc sdl = getCurSDLoc(); + + // @llvm.masked.gather.*(Ptrs, alignment, Mask, Src0) + Value *Ptr = I.getArgOperand(0); + SDValue Src0 = getValue(I.getArgOperand(3)); + SDValue Mask = getValue(I.getArgOperand(2)); + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT VT = TLI.getValueType(I.getType()); + unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(1)))->getZExtValue(); + if (!Alignment) + Alignment = DAG.getEVTAlignment(VT); + + AAMDNodes AAInfo; + I.getAAMetadata(AAInfo); + const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); + + SDValue Root = DAG.getRoot(); + SDValue Base; + SDValue Index; + Value *BasePtr = Ptr; + bool UniformBase = getUniformBase(BasePtr, Base, Index, this); + bool ConstantMemory = false; + if (UniformBase && AA->pointsToConstantMemory( + AliasAnalysis::Location(BasePtr, + AA->getTypeStoreSize(I.getType()), + AAInfo))) { + // Do not serialize (non-volatile) loads of constant memory with anything. + Root = DAG.getEntryNode(); + ConstantMemory = true; + } + + MachineMemOperand *MMO = + DAG.getMachineFunction(). + getMachineMemOperand(MachinePointerInfo(UniformBase ? BasePtr : nullptr), + MachineMemOperand::MOLoad, VT.getStoreSize(), + Alignment, AAInfo, Ranges); + + if (!UniformBase) { + Base = DAG.getTargetConstant(0, sdl, TLI.getPointerTy()); + Index = getValue(Ptr); + } + SDValue Ops[] = { Root, Src0, Mask, Base, Index }; + SDValue Gather = DAG.getMaskedGather(DAG.getVTList(VT, MVT::Other), VT, sdl, + Ops, MMO); + + SDValue OutChain = Gather.getValue(1); + if (!ConstantMemory) + PendingLoads.push_back(OutChain); + setValue(&I, Gather); +} + void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { SDLoc dl = getCurSDLoc(); AtomicOrdering SuccessOrder = I.getSuccessOrdering(); @@ -3769,8 +3256,8 @@ void SelectionDAGBuilder::visitFence(const FenceInst &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Ops[3]; Ops[0] = getRoot(); - Ops[1] = DAG.getConstant(I.getOrdering(), TLI.getPointerTy()); - Ops[2] = DAG.getConstant(I.getSynchScope(), TLI.getPointerTy()); + Ops[1] = DAG.getConstant(I.getOrdering(), dl, TLI.getPointerTy()); + Ops[2] = DAG.getConstant(I.getSynchScope(), dl, TLI.getPointerTy()); DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops)); } @@ -3859,7 +3346,8 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, // Add the intrinsic ID as an integer operand if it's not a target intrinsic. if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID || Info.opc == ISD::INTRINSIC_W_CHAIN) - Ops.push_back(DAG.getTargetConstant(Intrinsic, TLI.getPointerTy())); + Ops.push_back(DAG.getTargetConstant(Intrinsic, getCurSDLoc(), + TLI.getPointerTy())); // Add all operands of the call to the operand list. for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { @@ -3919,9 +3407,9 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, static SDValue GetSignificand(SelectionDAG &DAG, SDValue Op, SDLoc dl) { SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, - DAG.getConstant(0x007fffff, MVT::i32)); + DAG.getConstant(0x007fffff, dl, MVT::i32)); SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1, - DAG.getConstant(0x3f800000, MVT::i32)); + DAG.getConstant(0x3f800000, dl, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, MVT::f32, t2); } @@ -3934,21 +3422,108 @@ static SDValue GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI, SDLoc dl) { SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, - DAG.getConstant(0x7f800000, MVT::i32)); + DAG.getConstant(0x7f800000, dl, MVT::i32)); SDValue t1 = DAG.getNode(ISD::SRL, dl, MVT::i32, t0, - DAG.getConstant(23, TLI.getPointerTy())); + DAG.getConstant(23, dl, TLI.getPointerTy())); SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1, - DAG.getConstant(127, MVT::i32)); + DAG.getConstant(127, dl, MVT::i32)); return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2); } /// getF32Constant - Get 32-bit floating point constant. static SDValue -getF32Constant(SelectionDAG &DAG, unsigned Flt) { - return DAG.getConstantFP(APFloat(APFloat::IEEEsingle, APInt(32, Flt)), +getF32Constant(SelectionDAG &DAG, unsigned Flt, SDLoc dl) { + return DAG.getConstantFP(APFloat(APFloat::IEEEsingle, APInt(32, Flt)), dl, MVT::f32); } +static SDValue getLimitedPrecisionExp2(SDValue t0, SDLoc dl, + SelectionDAG &DAG) { + // IntegerPartOfX = ((int32_t)(t0); + SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0); + + // FractionalPartOfX = t0 - (float)IntegerPartOfX; + SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX); + SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1); + + // IntegerPartOfX <<= 23; + IntegerPartOfX = DAG.getNode( + ISD::SHL, dl, MVT::i32, IntegerPartOfX, + DAG.getConstant(23, dl, DAG.getTargetLoweringInfo().getPointerTy())); + + SDValue TwoToFractionalPartOfX; + if (LimitFloatPrecision <= 6) { + // For floating-point precision of 6: + // + // TwoToFractionalPartOfX = + // 0.997535578f + + // (0.735607626f + 0.252464424f * x) * x; + // + // error 0.0144103317, which is 6 bits + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3e814304, dl)); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3f3c50c8, dl)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3f7f5e7e, dl)); + } else if (LimitFloatPrecision <= 12) { + // For floating-point precision of 12: + // + // TwoToFractionalPartOfX = + // 0.999892986f + + // (0.696457318f + + // (0.224338339f + 0.792043434e-1f * x) * x) * x; + // + // error 0.000107046256, which is 13 to 14 bits + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3da235e3, dl)); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3e65b8f3, dl)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3f324b07, dl)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, + getF32Constant(DAG, 0x3f7ff8fd, dl)); + } else { // LimitFloatPrecision <= 18 + // For floating-point precision of 18: + // + // TwoToFractionalPartOfX = + // 0.999999982f + + // (0.693148872f + + // (0.240227044f + + // (0.554906021e-1f + + // (0.961591928e-2f + + // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x; + // error 2.47208000*10^(-7), which is better than 18 bits + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3924b03e, dl)); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3ab24b87, dl)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3c1d8c17, dl)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, + getF32Constant(DAG, 0x3d634a1d, dl)); + SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); + SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, + getF32Constant(DAG, 0x3e75fe14, dl)); + SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); + SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10, + getF32Constant(DAG, 0x3f317234, dl)); + SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); + TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, + getF32Constant(DAG, 0x3f800000, dl)); + } + + // Add the exponent into the result in integer domain. + SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, TwoToFractionalPartOfX); + return DAG.getNode(ISD::BITCAST, dl, MVT::f32, + DAG.getNode(ISD::ADD, dl, MVT::i32, t13, IntegerPartOfX)); +} + /// expandExp - Lower an exp intrinsic. Handles the special sequences for /// limited-precision mode. static SDValue expandExp(SDLoc dl, SDValue Op, SelectionDAG &DAG, @@ -3960,92 +3535,10 @@ static SDValue expandExp(SDLoc dl, SDValue Op, SelectionDAG &DAG, // final result: // // #define LOG2OFe 1.4426950f - // IntegerPartOfX = ((int32_t)(X * LOG2OFe)); + // t0 = Op * LOG2OFe SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op, - getF32Constant(DAG, 0x3fb8aa3b)); - SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0); - - // FractionalPartOfX = (X * LOG2OFe) - (float)IntegerPartOfX; - SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX); - SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1); - - // IntegerPartOfX <<= 23; - IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX, - DAG.getConstant(23, TLI.getPointerTy())); - - SDValue TwoToFracPartOfX; - if (LimitFloatPrecision <= 6) { - // For floating-point precision of 6: - // - // TwoToFractionalPartOfX = - // 0.997535578f + - // (0.735607626f + 0.252464424f * x) * x; - // - // error 0.0144103317, which is 6 bits - SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0x3e814304)); - SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3f3c50c8)); - SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); - TwoToFracPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x3f7f5e7e)); - } else if (LimitFloatPrecision <= 12) { - // For floating-point precision of 12: - // - // TwoToFractionalPartOfX = - // 0.999892986f + - // (0.696457318f + - // (0.224338339f + 0.792043434e-1f * x) * x) * x; - // - // 0.000107046256 error, which is 13 to 14 bits - SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0x3da235e3)); - SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3e65b8f3)); - SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); - SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x3f324b07)); - SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); - TwoToFracPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, - getF32Constant(DAG, 0x3f7ff8fd)); - } else { // LimitFloatPrecision <= 18 - // For floating-point precision of 18: - // - // TwoToFractionalPartOfX = - // 0.999999982f + - // (0.693148872f + - // (0.240227044f + - // (0.554906021e-1f + - // (0.961591928e-2f + - // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x; - // - // error 2.47208000*10^(-7), which is better than 18 bits - SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0x3924b03e)); - SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3ab24b87)); - SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); - SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x3c1d8c17)); - SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); - SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, - getF32Constant(DAG, 0x3d634a1d)); - SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); - SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, - getF32Constant(DAG, 0x3e75fe14)); - SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); - SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10, - getF32Constant(DAG, 0x3f317234)); - SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); - TwoToFracPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, - getF32Constant(DAG, 0x3f800000)); - } - - // Add the exponent into the result in integer domain. - SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, TwoToFracPartOfX); - return DAG.getNode(ISD::BITCAST, dl, MVT::f32, - DAG.getNode(ISD::ADD, dl, MVT::i32, - t13, IntegerPartOfX)); + getF32Constant(DAG, 0x3fb8aa3b, dl)); + return getLimitedPrecisionExp2(t0, dl, DAG); } // No special expansion. @@ -4063,7 +3556,7 @@ static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG, // Scale the exponent by log(2) [0.69314718f]. SDValue Exp = GetExponent(DAG, Op1, TLI, dl); SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp, - getF32Constant(DAG, 0x3f317218)); + getF32Constant(DAG, 0x3f317218, dl)); // Get the significand and build it into a floating-point number with // exponent of 1. @@ -4079,12 +3572,12 @@ static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG, // // error 0.0034276066, which is better than 8 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0xbe74c456)); + getF32Constant(DAG, 0xbe74c456, dl)); SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, - getF32Constant(DAG, 0x3fb3a2b1)); + getF32Constant(DAG, 0x3fb3a2b1, dl)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3f949a29)); + getF32Constant(DAG, 0x3f949a29, dl)); } else if (LimitFloatPrecision <= 12) { // For floating-point precision of 12: // @@ -4096,18 +3589,18 @@ static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG, // // error 0.000061011436, which is 14 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0xbd67b6d6)); + getF32Constant(DAG, 0xbd67b6d6, dl)); SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, - getF32Constant(DAG, 0x3ee4f4b8)); + getF32Constant(DAG, 0x3ee4f4b8, dl)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3fbc278b)); + getF32Constant(DAG, 0x3fbc278b, dl)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x40348e95)); + getF32Constant(DAG, 0x40348e95, dl)); SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, - getF32Constant(DAG, 0x3fdef31a)); + getF32Constant(DAG, 0x3fdef31a, dl)); } else { // LimitFloatPrecision <= 18 // For floating-point precision of 18: // @@ -4121,24 +3614,24 @@ static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG, // // error 0.0000023660568, which is better than 18 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0xbc91e5ac)); + getF32Constant(DAG, 0xbc91e5ac, dl)); SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, - getF32Constant(DAG, 0x3e4350aa)); + getF32Constant(DAG, 0x3e4350aa, dl)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3f60d3e3)); + getF32Constant(DAG, 0x3f60d3e3, dl)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x4011cdf0)); + getF32Constant(DAG, 0x4011cdf0, dl)); SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, - getF32Constant(DAG, 0x406cfd1c)); + getF32Constant(DAG, 0x406cfd1c, dl)); SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, - getF32Constant(DAG, 0x408797cb)); + getF32Constant(DAG, 0x408797cb, dl)); SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10, - getF32Constant(DAG, 0x4006dcab)); + getF32Constant(DAG, 0x4006dcab, dl)); } return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, LogOfMantissa); @@ -4173,12 +3666,12 @@ static SDValue expandLog2(SDLoc dl, SDValue Op, SelectionDAG &DAG, // // error 0.0049451742, which is more than 7 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0xbeb08fe0)); + getF32Constant(DAG, 0xbeb08fe0, dl)); SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, - getF32Constant(DAG, 0x40019463)); + getF32Constant(DAG, 0x40019463, dl)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3fd6633d)); + getF32Constant(DAG, 0x3fd6633d, dl)); } else if (LimitFloatPrecision <= 12) { // For floating-point precision of 12: // @@ -4190,18 +3683,18 @@ static SDValue expandLog2(SDLoc dl, SDValue Op, SelectionDAG &DAG, // // error 0.0000876136000, which is better than 13 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0xbda7262e)); + getF32Constant(DAG, 0xbda7262e, dl)); SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, - getF32Constant(DAG, 0x3f25280b)); + getF32Constant(DAG, 0x3f25280b, dl)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, - getF32Constant(DAG, 0x4007b923)); + getF32Constant(DAG, 0x4007b923, dl)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x40823e2f)); + getF32Constant(DAG, 0x40823e2f, dl)); SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, - getF32Constant(DAG, 0x4020d29c)); + getF32Constant(DAG, 0x4020d29c, dl)); } else { // LimitFloatPrecision <= 18 // For floating-point precision of 18: // @@ -4216,24 +3709,24 @@ static SDValue expandLog2(SDLoc dl, SDValue Op, SelectionDAG &DAG, // // error 0.0000018516, which is better than 18 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0xbcd2769e)); + getF32Constant(DAG, 0xbcd2769e, dl)); SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, - getF32Constant(DAG, 0x3e8ce0b9)); + getF32Constant(DAG, 0x3e8ce0b9, dl)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3fa22ae7)); + getF32Constant(DAG, 0x3fa22ae7, dl)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x40525723)); + getF32Constant(DAG, 0x40525723, dl)); SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, - getF32Constant(DAG, 0x40aaf200)); + getF32Constant(DAG, 0x40aaf200, dl)); SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, - getF32Constant(DAG, 0x40c39dad)); + getF32Constant(DAG, 0x40c39dad, dl)); SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10, - getF32Constant(DAG, 0x4042902c)); + getF32Constant(DAG, 0x4042902c, dl)); } return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log2ofMantissa); @@ -4254,7 +3747,7 @@ static SDValue expandLog10(SDLoc dl, SDValue Op, SelectionDAG &DAG, // Scale the exponent by log10(2) [0.30102999f]. SDValue Exp = GetExponent(DAG, Op1, TLI, dl); SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp, - getF32Constant(DAG, 0x3e9a209a)); + getF32Constant(DAG, 0x3e9a209a, dl)); // Get the significand and build it into a floating-point number with // exponent of 1. @@ -4270,12 +3763,12 @@ static SDValue expandLog10(SDLoc dl, SDValue Op, SelectionDAG &DAG, // // error 0.0014886165, which is 6 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0xbdd49a13)); + getF32Constant(DAG, 0xbdd49a13, dl)); SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, - getF32Constant(DAG, 0x3f1c0789)); + getF32Constant(DAG, 0x3f1c0789, dl)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3f011300)); + getF32Constant(DAG, 0x3f011300, dl)); } else if (LimitFloatPrecision <= 12) { // For floating-point precision of 12: // @@ -4286,15 +3779,15 @@ static SDValue expandLog10(SDLoc dl, SDValue Op, SelectionDAG &DAG, // // error 0.00019228036, which is better than 12 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0x3d431f31)); + getF32Constant(DAG, 0x3d431f31, dl)); SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, - getF32Constant(DAG, 0x3ea21fb2)); + getF32Constant(DAG, 0x3ea21fb2, dl)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3f6ae232)); + getF32Constant(DAG, 0x3f6ae232, dl)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4, - getF32Constant(DAG, 0x3f25f7c3)); + getF32Constant(DAG, 0x3f25f7c3, dl)); } else { // LimitFloatPrecision <= 18 // For floating-point precision of 18: // @@ -4307,21 +3800,21 @@ static SDValue expandLog10(SDLoc dl, SDValue Op, SelectionDAG &DAG, // // error 0.0000037995730, which is better than 18 bits SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0x3c5d51ce)); + getF32Constant(DAG, 0x3c5d51ce, dl)); SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, - getF32Constant(DAG, 0x3e00685a)); + getF32Constant(DAG, 0x3e00685a, dl)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3efb6798)); + getF32Constant(DAG, 0x3efb6798, dl)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); SDValue t5 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4, - getF32Constant(DAG, 0x3f88d192)); + getF32Constant(DAG, 0x3f88d192, dl)); SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, - getF32Constant(DAG, 0x3fc4316c)); + getF32Constant(DAG, 0x3fc4316c, dl)); SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8, - getF32Constant(DAG, 0x3f57ce70)); + getF32Constant(DAG, 0x3f57ce70, dl)); } return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log10ofMantissa); @@ -4336,91 +3829,8 @@ static SDValue expandLog10(SDLoc dl, SDValue Op, SelectionDAG &DAG, static SDValue expandExp2(SDLoc dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { if (Op.getValueType() == MVT::f32 && - LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { - SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Op); - - // FractionalPartOfX = x - (float)IntegerPartOfX; - SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX); - SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, Op, t1); - - // IntegerPartOfX <<= 23; - IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX, - DAG.getConstant(23, TLI.getPointerTy())); - - SDValue TwoToFractionalPartOfX; - if (LimitFloatPrecision <= 6) { - // For floating-point precision of 6: - // - // TwoToFractionalPartOfX = - // 0.997535578f + - // (0.735607626f + 0.252464424f * x) * x; - // - // error 0.0144103317, which is 6 bits - SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0x3e814304)); - SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3f3c50c8)); - SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); - TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x3f7f5e7e)); - } else if (LimitFloatPrecision <= 12) { - // For floating-point precision of 12: - // - // TwoToFractionalPartOfX = - // 0.999892986f + - // (0.696457318f + - // (0.224338339f + 0.792043434e-1f * x) * x) * x; - // - // error 0.000107046256, which is 13 to 14 bits - SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0x3da235e3)); - SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3e65b8f3)); - SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); - SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x3f324b07)); - SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); - TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, - getF32Constant(DAG, 0x3f7ff8fd)); - } else { // LimitFloatPrecision <= 18 - // For floating-point precision of 18: - // - // TwoToFractionalPartOfX = - // 0.999999982f + - // (0.693148872f + - // (0.240227044f + - // (0.554906021e-1f + - // (0.961591928e-2f + - // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x; - // error 2.47208000*10^(-7), which is better than 18 bits - SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0x3924b03e)); - SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3ab24b87)); - SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); - SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x3c1d8c17)); - SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); - SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, - getF32Constant(DAG, 0x3d634a1d)); - SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); - SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, - getF32Constant(DAG, 0x3e75fe14)); - SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); - SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10, - getF32Constant(DAG, 0x3f317234)); - SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); - TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, - getF32Constant(DAG, 0x3f800000)); - } - - // Add the exponent into the result in integer domain. - SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, - TwoToFractionalPartOfX); - return DAG.getNode(ISD::BITCAST, dl, MVT::f32, - DAG.getNode(ISD::ADD, dl, MVT::i32, - t13, IntegerPartOfX)); - } + LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) + return getLimitedPrecisionExp2(Op, dl, DAG); // No special expansion. return DAG.getNode(ISD::FEXP2, dl, Op.getValueType(), Op); @@ -4444,90 +3854,10 @@ static SDValue expandPow(SDLoc dl, SDValue LHS, SDValue RHS, // final result: // // #define LOG2OF10 3.3219281f - // IntegerPartOfX = (int32_t)(x * LOG2OF10); + // t0 = Op * LOG2OF10; SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, RHS, - getF32Constant(DAG, 0x40549a78)); - SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0); - - // FractionalPartOfX = x - (float)IntegerPartOfX; - SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX); - SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1); - - // IntegerPartOfX <<= 23; - IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX, - DAG.getConstant(23, TLI.getPointerTy())); - - SDValue TwoToFractionalPartOfX; - if (LimitFloatPrecision <= 6) { - // For floating-point precision of 6: - // - // twoToFractionalPartOfX = - // 0.997535578f + - // (0.735607626f + 0.252464424f * x) * x; - // - // error 0.0144103317, which is 6 bits - SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0x3e814304)); - SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3f3c50c8)); - SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); - TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x3f7f5e7e)); - } else if (LimitFloatPrecision <= 12) { - // For floating-point precision of 12: - // - // TwoToFractionalPartOfX = - // 0.999892986f + - // (0.696457318f + - // (0.224338339f + 0.792043434e-1f * x) * x) * x; - // - // error 0.000107046256, which is 13 to 14 bits - SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0x3da235e3)); - SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3e65b8f3)); - SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); - SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x3f324b07)); - SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); - TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, - getF32Constant(DAG, 0x3f7ff8fd)); - } else { // LimitFloatPrecision <= 18 - // For floating-point precision of 18: - // - // TwoToFractionalPartOfX = - // 0.999999982f + - // (0.693148872f + - // (0.240227044f + - // (0.554906021e-1f + - // (0.961591928e-2f + - // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x; - // error 2.47208000*10^(-7), which is better than 18 bits - SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0x3924b03e)); - SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3ab24b87)); - SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); - SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x3c1d8c17)); - SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); - SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, - getF32Constant(DAG, 0x3d634a1d)); - SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); - SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, - getF32Constant(DAG, 0x3e75fe14)); - SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); - SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10, - getF32Constant(DAG, 0x3f317234)); - SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); - TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, - getF32Constant(DAG, 0x3f800000)); - } - - SDValue t13 = DAG.getNode(ISD::BITCAST, dl,MVT::i32,TwoToFractionalPartOfX); - return DAG.getNode(ISD::BITCAST, dl, MVT::f32, - DAG.getNode(ISD::ADD, dl, MVT::i32, - t13, IntegerPartOfX)); + getF32Constant(DAG, 0x40549a78, dl)); + return getLimitedPrecisionExp2(t0, dl, DAG); } // No special expansion. @@ -4549,14 +3879,13 @@ static SDValue ExpandPowI(SDLoc DL, SDValue LHS, SDValue RHS, // powi(x, 0) -> 1.0 if (Val == 0) - return DAG.getConstantFP(1.0, LHS.getValueType()); + return DAG.getConstantFP(1.0, DL, LHS.getValueType()); const Function *F = DAG.getMachineFunction().getFunction(); - if (!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::OptimizeForSize) || + if (!F->hasFnAttribute(Attribute::OptimizeForSize) || // If optimizing for size, don't insert too many multiplies. This // inserts up to 5 multiplies. - CountPopulation_32(Val)+Log2_32(Val) < 7) { + countPopulation(Val) + Log2_32(Val) < 7) { // We use the simple binary decomposition method to generate the multiply // sequence. There are more optimal ways to do this (for example, // powi(x,15) generates one more multiply than it should), but this has @@ -4579,7 +3908,7 @@ static SDValue ExpandPowI(SDLoc DL, SDValue LHS, SDValue RHS, // If the original was negative, invert the result, producing 1/(x*x*x). if (RHSC->getSExtValue() < 0) Res = DAG.getNode(ISD::FDIV, DL, LHS.getValueType(), - DAG.getConstantFP(1.0, LHS.getValueType()), Res); + DAG.getConstantFP(1.0, DL, LHS.getValueType()), Res); return Res; } } @@ -4609,11 +3938,9 @@ static unsigned getTruncatedArgReg(const SDValue &N) { /// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a function /// argument, create the corresponding DBG_VALUE machine instruction for it now. /// At the end of instruction selection, they will be inserted to the entry BB. -bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, - MDNode *Variable, - MDNode *Expr, int64_t Offset, - bool IsIndirect, - const SDValue &N) { +bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( + const Value *V, DILocalVariable *Variable, DIExpression *Expr, + DILocation *DL, int64_t Offset, bool IsIndirect, const SDValue &N) { const Argument *Arg = dyn_cast<Argument>(V); if (!Arg) return false; @@ -4622,8 +3949,9 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo(); // Ignore inlined function arguments here. - DIVariable DV(Variable); - if (DV.isInlinedFnArgument(MF.getFunction())) + // + // FIXME: Should we be checking DL->inlinedAt() to determine this? + if (!Variable->getScope()->getSubprogram()->describes(MF.getFunction())) return false; Optional<MachineOperand> Op; @@ -4664,13 +3992,15 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, if (!Op) return false; + assert(Variable->isValidLocationForIntrinsic(DL) && + "Expected inlined-at fields to agree"); if (Op->isReg()) FuncInfo.ArgDbgValues.push_back( - BuildMI(MF, getCurDebugLoc(), TII->get(TargetOpcode::DBG_VALUE), - IsIndirect, Op->getReg(), Offset, Variable, Expr)); + BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect, + Op->getReg(), Offset, Variable, Expr)); else FuncInfo.ArgDbgValues.push_back( - BuildMI(MF, getCurDebugLoc(), TII->get(TargetOpcode::DBG_VALUE)) + BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE)) .addOperand(*Op) .addImm(Offset) .addMetadata(Variable) @@ -4715,16 +4045,20 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; case Intrinsic::read_register: { Value *Reg = I.getArgOperand(0); + SDValue Chain = getRoot(); SDValue RegName = DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata())); EVT VT = TLI.getValueType(I.getType()); - setValue(&I, DAG.getNode(ISD::READ_REGISTER, sdl, VT, RegName)); + Res = DAG.getNode(ISD::READ_REGISTER, sdl, + DAG.getVTList(VT, MVT::Other), Chain, RegName); + setValue(&I, Res); + DAG.setRoot(Res.getValue(1)); return nullptr; } case Intrinsic::write_register: { Value *Reg = I.getArgOperand(0); Value *RegValue = I.getArgOperand(1); - SDValue Chain = getValue(RegValue).getOperand(0); + SDValue Chain = getRoot(); SDValue RegName = DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata())); DAG.setRoot(DAG.getNode(ISD::WRITE_REGISTER, sdl, MVT::Other, Chain, @@ -4736,6 +4070,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::longjmp: return &"_longjmp"[!TLI.usesUnderscoreLongJmp()]; case Intrinsic::memcpy: { + // FIXME: this definition of "user defined address space" is x86-specific // Assert for address < 256 since we support only user defined address // spaces. assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace() @@ -4750,12 +4085,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { if (!Align) Align = 1; // @llvm.memcpy defines 0 and 1 to both mean no alignment. bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); - DAG.setRoot(DAG.getMemcpy(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, false, - MachinePointerInfo(I.getArgOperand(0)), - MachinePointerInfo(I.getArgOperand(1)))); + bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); + SDValue MC = DAG.getMemcpy(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, + false, isTC, + MachinePointerInfo(I.getArgOperand(0)), + MachinePointerInfo(I.getArgOperand(1))); + updateDAGForMaybeTailCall(MC); return nullptr; } case Intrinsic::memset: { + // FIXME: this definition of "user defined address space" is x86-specific // Assert for address < 256 since we support only user defined address // spaces. assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace() @@ -4768,11 +4107,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { if (!Align) Align = 1; // @llvm.memset defines 0 and 1 to both mean no alignment. bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); - DAG.setRoot(DAG.getMemset(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, - MachinePointerInfo(I.getArgOperand(0)))); + bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); + SDValue MS = DAG.getMemset(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, + isTC, MachinePointerInfo(I.getArgOperand(0))); + updateDAGForMaybeTailCall(MS); return nullptr; } case Intrinsic::memmove: { + // FIXME: this definition of "user defined address space" is x86-specific // Assert for address < 256 since we support only user defined address // spaces. assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace() @@ -4787,20 +4129,20 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { if (!Align) Align = 1; // @llvm.memmove defines 0 and 1 to both mean no alignment. bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); - DAG.setRoot(DAG.getMemmove(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, - MachinePointerInfo(I.getArgOperand(0)), - MachinePointerInfo(I.getArgOperand(1)))); + bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); + SDValue MM = DAG.getMemmove(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, + isTC, MachinePointerInfo(I.getArgOperand(0)), + MachinePointerInfo(I.getArgOperand(1))); + updateDAGForMaybeTailCall(MM); return nullptr; } case Intrinsic::dbg_declare: { const DbgDeclareInst &DI = cast<DbgDeclareInst>(I); - MDNode *Variable = DI.getVariable(); - MDNode *Expression = DI.getExpression(); + DILocalVariable *Variable = DI.getVariable(); + DIExpression *Expression = DI.getExpression(); const Value *Address = DI.getAddress(); - DIVariable DIVar(Variable); - assert((!DIVar || DIVar.isVariable()) && - "Variable in DbgDeclareInst should be either null or a DIVariable."); - if (!Address || !DIVar) { + assert(Variable && "Missing variable"); + if (!Address) { DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); return nullptr; } @@ -4821,9 +4163,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address)) Address = BCI->getOperand(0); // Parameters are handled specially. - bool isParameter = - (DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable || - isa<Argument>(Address)); + bool isParameter = Variable->getTag() == dwarf::DW_TAG_arg_variable || + isa<Argument>(Address); const AllocaInst *AI = dyn_cast<AllocaInst>(Address); @@ -4836,7 +4177,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { else { // Address is an argument, so try to emit its dbg value using // virtual register info from the FuncInfo.ValueMap. - EmitFuncArgumentDbgValue(Address, Variable, Expression, 0, false, N); + EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, false, + N); return nullptr; } } else if (AI) @@ -4853,7 +4195,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } else { // If Address is an argument then try to emit its dbg value using // virtual register info from the FuncInfo.ValueMap. - if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, 0, false, + if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, false, N)) { // If variable is pinned by a alloca in dominating bb then // use StaticAllocaMap. @@ -4876,14 +4218,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::dbg_value: { const DbgValueInst &DI = cast<DbgValueInst>(I); - DIVariable DIVar(DI.getVariable()); - assert((!DIVar || DIVar.isVariable()) && - "Variable in DbgValueInst should be either null or a DIVariable."); - if (!DIVar) - return nullptr; + assert(DI.getVariable() && "Missing variable"); - MDNode *Variable = DI.getVariable(); - MDNode *Expression = DI.getExpression(); + DILocalVariable *Variable = DI.getVariable(); + DIExpression *Expression = DI.getExpression(); uint64_t Offset = DI.getOffset(); const Value *V = DI.getValue(); if (!V) @@ -4904,7 +4242,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { if (N.getNode()) { // A dbg.value for an alloca is always indirect. bool IsIndirect = isa<AllocaInst>(V) || Offset != 0; - if (!EmitFuncArgumentDbgValue(V, Variable, Expression, Offset, + if (!EmitFuncArgumentDbgValue(V, Variable, Expression, dl, Offset, IsIndirect, N)) { SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(), IsIndirect, Offset, dl, SDNodeOrder); @@ -4943,7 +4281,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { // Find the type id for the given typeinfo. GlobalValue *GV = ExtractTypeInfo(I.getArgOperand(0)); unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(GV); - Res = DAG.getConstant(TypeID, MVT::i32); + Res = DAG.getConstant(TypeID, sdl, MVT::i32); setValue(&I, Res); return nullptr; } @@ -4969,7 +4307,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { CfaArg.getValueType()), CfaArg); SDValue FA = DAG.getNode(ISD::FRAMEADDR, sdl, TLI.getPointerTy(), - DAG.getConstant(0, TLI.getPointerTy())); + DAG.getConstant(0, sdl, TLI.getPointerTy())); setValue(&I, DAG.getNode(ISD::ADD, sdl, FA.getValueType(), FA, Offset)); return nullptr; @@ -5008,9 +4346,15 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; } + case Intrinsic::masked_gather: + visitMaskedGather(I); + return nullptr; case Intrinsic::masked_load: visitMaskedLoad(I); return nullptr; + case Intrinsic::masked_scatter: + visitMaskedScatter(I); + return nullptr; case Intrinsic::masked_store: visitMaskedStore(I); return nullptr; @@ -5063,44 +4407,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { // We must do this early because v2i32 is not a legal type. SDValue ShOps[2]; ShOps[0] = ShAmt; - ShOps[1] = DAG.getConstant(0, MVT::i32); + ShOps[1] = DAG.getConstant(0, sdl, MVT::i32); ShAmt = DAG.getNode(ISD::BUILD_VECTOR, sdl, ShAmtVT, ShOps); EVT DestVT = TLI.getValueType(I.getType()); ShAmt = DAG.getNode(ISD::BITCAST, sdl, DestVT, ShAmt); Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, sdl, DestVT, - DAG.getConstant(NewIntrinsic, MVT::i32), + DAG.getConstant(NewIntrinsic, sdl, MVT::i32), getValue(I.getArgOperand(0)), ShAmt); setValue(&I, Res); return nullptr; } - case Intrinsic::x86_avx_vinsertf128_pd_256: - case Intrinsic::x86_avx_vinsertf128_ps_256: - case Intrinsic::x86_avx_vinsertf128_si_256: - case Intrinsic::x86_avx2_vinserti128: { - EVT DestVT = TLI.getValueType(I.getType()); - EVT ElVT = TLI.getValueType(I.getArgOperand(1)->getType()); - uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(2))->getZExtValue() & 1) * - ElVT.getVectorNumElements(); - Res = - DAG.getNode(ISD::INSERT_SUBVECTOR, sdl, DestVT, - getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), - DAG.getConstant(Idx, TLI.getVectorIdxTy())); - setValue(&I, Res); - return nullptr; - } - case Intrinsic::x86_avx_vextractf128_pd_256: - case Intrinsic::x86_avx_vextractf128_ps_256: - case Intrinsic::x86_avx_vextractf128_si_256: - case Intrinsic::x86_avx2_vextracti128: { - EVT DestVT = TLI.getValueType(I.getType()); - uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(1))->getZExtValue() & 1) * - DestVT.getVectorNumElements(); - Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, sdl, DestVT, - getValue(I.getArgOperand(0)), - DAG.getConstant(Idx, TLI.getVectorIdxTy())); - setValue(&I, Res); - return nullptr; - } case Intrinsic::convertff: case Intrinsic::convertfsi: case Intrinsic::convertfui: @@ -5238,7 +4554,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { setValue(&I, DAG.getNode(ISD::BITCAST, sdl, MVT::i16, DAG.getNode(ISD::FP_ROUND, sdl, MVT::f16, getValue(I.getArgOperand(0)), - DAG.getTargetConstant(0, MVT::i32)))); + DAG.getTargetConstant(0, sdl, + MVT::i32)))); return nullptr; case Intrinsic::convert_from_fp16: setValue(&I, @@ -5366,9 +4683,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { EVT Ty = Arg.getValueType(); if (CI->isZero()) - Res = DAG.getConstant(-1ULL, Ty); + Res = DAG.getConstant(-1ULL, sdl, Ty); else - Res = DAG.getConstant(0, Ty); + Res = DAG.getConstant(0, sdl, Ty); setValue(&I, Res); return nullptr; @@ -5498,7 +4815,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; SmallVector<Value *, 4> Allocas; - GetUnderlyingObjects(I.getArgOperand(1), Allocas, DL); + GetUnderlyingObjects(I.getArgOperand(1), Allocas, *DL); for (SmallVectorImpl<Value*>::iterator Object = Allocas.begin(), E = Allocas.end(); Object != E; ++Object) { @@ -5547,6 +4864,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::clear_cache: return TLI.getClearCacheBuiltinName(); + case Intrinsic::eh_actions: + setValue(&I, DAG.getUNDEF(TLI.getPointerTy())); + return nullptr; case Intrinsic::donothing: // ignore return nullptr; @@ -5565,7 +4885,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::experimental_gc_result_int: case Intrinsic::experimental_gc_result_float: - case Intrinsic::experimental_gc_result_ptr: { + case Intrinsic::experimental_gc_result_ptr: + case Intrinsic::experimental_gc_result: { visitGCResult(I); return nullptr; } @@ -5576,45 +4897,49 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::instrprof_increment: llvm_unreachable("instrprof failed to lower an increment"); - case Intrinsic::frameallocate: { + case Intrinsic::frameescape: { MachineFunction &MF = DAG.getMachineFunction(); const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo(); - // Do the allocation and map it as a normal value. - // FIXME: Maybe we should add this to the alloca map so that we don't have - // to register allocate it? - uint64_t Size = cast<ConstantInt>(I.getArgOperand(0))->getZExtValue(); - int Alloc = MF.getFrameInfo()->CreateFrameAllocation(Size); - MVT PtrVT = TLI.getPointerTy(0); - SDValue FIVal = DAG.getFrameIndex(Alloc, PtrVT); - setValue(&I, FIVal); - - // Directly emit a FRAME_ALLOC machine instr. Label assignment emission is - // the same on all targets. - MCSymbol *FrameAllocSym = - MF.getMMI().getContext().getOrCreateFrameAllocSymbol(MF.getName()); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, dl, - TII->get(TargetOpcode::FRAME_ALLOC)) - .addSym(FrameAllocSym) - .addFrameIndex(Alloc); + // Directly emit some FRAME_ALLOC machine instrs. Label assignment emission + // is the same on all targets. + for (unsigned Idx = 0, E = I.getNumArgOperands(); Idx < E; ++Idx) { + Value *Arg = I.getArgOperand(Idx)->stripPointerCasts(); + if (isa<ConstantPointerNull>(Arg)) + continue; // Skip null pointers. They represent a hole in index space. + AllocaInst *Slot = cast<AllocaInst>(Arg); + assert(FuncInfo.StaticAllocaMap.count(Slot) && + "can only escape static allocas"); + int FI = FuncInfo.StaticAllocaMap[Slot]; + MCSymbol *FrameAllocSym = + MF.getMMI().getContext().getOrCreateFrameAllocSymbol( + GlobalValue::getRealLinkageName(MF.getName()), Idx); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, dl, + TII->get(TargetOpcode::FRAME_ALLOC)) + .addSym(FrameAllocSym) + .addFrameIndex(FI); + } return nullptr; } case Intrinsic::framerecover: { - // i8* @llvm.framerecover(i8* %fn, i8* %fp) + // i8* @llvm.framerecover(i8* %fn, i8* %fp, i32 %idx) MachineFunction &MF = DAG.getMachineFunction(); MVT PtrVT = TLI.getPointerTy(0); // Get the symbol that defines the frame offset. - Function *Fn = cast<Function>(I.getArgOperand(0)->stripPointerCasts()); + auto *Fn = cast<Function>(I.getArgOperand(0)->stripPointerCasts()); + auto *Idx = cast<ConstantInt>(I.getArgOperand(2)); + unsigned IdxVal = unsigned(Idx->getLimitedValue(INT_MAX)); MCSymbol *FrameAllocSym = - MF.getMMI().getContext().getOrCreateFrameAllocSymbol(Fn->getName()); + MF.getMMI().getContext().getOrCreateFrameAllocSymbol( + GlobalValue::getRealLinkageName(Fn->getName()), IdxVal); // Create a TargetExternalSymbol for the label to avoid any target lowering // that would make this PC relative. StringRef Name = FrameAllocSym->getName(); - assert(Name.size() == strlen(Name.data()) && "not null terminated"); + assert(Name.data()[Name.size()] == '\0' && "not null terminated"); SDValue OffsetSym = DAG.getTargetExternalSymbol(Name.data(), PtrVT); SDValue OffsetVal = DAG.getNode(ISD::FRAME_ALLOC_RECOVER, sdl, PtrVT, OffsetSym); @@ -5627,6 +4952,21 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; } + case Intrinsic::eh_begincatch: + case Intrinsic::eh_endcatch: + llvm_unreachable("begin/end catch intrinsics not lowered in codegen"); + case Intrinsic::eh_exceptioncode: { + unsigned Reg = TLI.getExceptionPointerRegister(); + assert(Reg && "cannot get exception code on this platform"); + MVT PtrVT = TLI.getPointerTy(); + const TargetRegisterClass *PtrRC = TLI.getRegClassFor(PtrVT); + unsigned VReg = FuncInfo.MBB->addLiveIn(Reg, PtrRC); + SDValue N = + DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), VReg, PtrVT); + N = DAG.getZExtOrTrunc(N, getCurSDLoc(), MVT::i32); + setValue(&I, N); + return nullptr; + } } } @@ -5639,7 +4979,7 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, if (LandingPad) { // Insert a label before the invoke call to mark the try range. This can be // used to detect deletion of the invoke via the MachineModuleInfo. - BeginLabel = MMI.getContext().CreateTempSymbol(); + BeginLabel = MMI.getContext().createTempSymbol(); // For SjLj, keep track of which landing pads go with which invokes // so as to maintain the ordering of pads in the LSDA. @@ -5659,9 +4999,8 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, CLI.setChain(getRoot()); } - - const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering(); - std::pair<SDValue, SDValue> Result = TLI->LowerCallTo(CLI); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI); assert((CLI.IsTailCall || Result.second.getNode()) && "Non-null chain expected with non-tail call!"); @@ -5683,7 +5022,7 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, if (LandingPad) { // Insert a label at the end of the invoke call to mark the try range. This // can be used to detect deletion of the invoke via the MachineModuleInfo. - MCSymbol *EndLabel = MMI.getContext().CreateTempSymbol(); + MCSymbol *EndLabel = MMI.getContext().createTempSymbol(); DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getRoot(), EndLabel)); // Inform MachineModuleInfo of range. @@ -5766,9 +5105,8 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, LoadInput = ConstantExpr::getBitCast(const_cast<Constant *>(LoadInput), PointerType::getUnqual(LoadTy)); - if (const Constant *LoadCst = - ConstantFoldLoadFromConstPtr(const_cast<Constant *>(LoadInput), - Builder.DL)) + if (const Constant *LoadCst = ConstantFoldLoadFromConstPtr( + const_cast<Constant *>(LoadInput), *Builder.DL)) return Builder.getValue(LoadCst); } @@ -5829,7 +5167,7 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { const ConstantInt *CSize = dyn_cast<ConstantInt>(Size); if (CSize && CSize->getZExtValue() == 0) { EVT CallVT = DAG.getTargetLoweringInfo().getValueType(I.getType(), true); - setValue(&I, DAG.getConstant(0, CallVT)); + setValue(&I, DAG.getConstant(0, getCurSDLoc(), CallVT)); return true; } @@ -6112,7 +5450,7 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { return; } } - if (unsigned IID = F->getIntrinsicID()) { + if (Intrinsic::ID IID = F->getIntrinsicID()) { RenameFn = visitIntrinsicCall(I, IID); if (!RenameFn) return; @@ -6364,9 +5702,10 @@ static void GetRegistersForValue(SelectionDAG &DAG, // If this is a constraint for a single physreg, or a constraint for a // register class, find it. - std::pair<unsigned, const TargetRegisterClass*> PhysReg = - TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode, - OpInfo.ConstraintVT); + std::pair<unsigned, const TargetRegisterClass *> PhysReg = + TLI.getRegForInlineAsmConstraint(MF.getSubtarget().getRegisterInfo(), + OpInfo.ConstraintCode, + OpInfo.ConstraintVT); unsigned NumRegs = 1; if (OpInfo.ConstraintVT != MVT::Other) { @@ -6462,8 +5801,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { SDISelAsmOperandInfoVector ConstraintOperands; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - TargetLowering::AsmOperandInfoVector - TargetConstraints = TLI.ParseConstraints(CS); + TargetLowering::AsmOperandInfoVector TargetConstraints = + TLI.ParseConstraints(DAG.getSubtarget().getRegisterInfo(), CS); bool hasMemory = false; @@ -6555,12 +5894,13 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; if (OpInfo.ConstraintVT != Input.ConstraintVT) { - std::pair<unsigned, const TargetRegisterClass*> MatchRC = - TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode, - OpInfo.ConstraintVT); - std::pair<unsigned, const TargetRegisterClass*> InputRC = - TLI.getRegForInlineAsmConstraint(Input.ConstraintCode, - Input.ConstraintVT); + const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo(); + std::pair<unsigned, const TargetRegisterClass *> MatchRC = + TLI.getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode, + OpInfo.ConstraintVT); + std::pair<unsigned, const TargetRegisterClass *> InputRC = + TLI.getRegForInlineAsmConstraint(TRI, Input.ConstraintCode, + Input.ConstraintVT); if ((OpInfo.ConstraintVT.isInteger() != Input.ConstraintVT.isInteger()) || (MatchRC.second != InputRC.second)) { @@ -6687,7 +6027,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } } - AsmNodeOperands.push_back(DAG.getTargetConstant(ExtraInfo, + AsmNodeOperands.push_back(DAG.getTargetConstant(ExtraInfo, getCurSDLoc(), TLI.getPointerTy())); // Loop over all of the inputs, copying the operand values into the @@ -6707,10 +6047,16 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Memory output, or 'other' output (e.g. 'X' constraint). assert(OpInfo.isIndirect && "Memory output must be indirect operand"); + unsigned ConstraintID = + TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode); + assert(ConstraintID != InlineAsm::Constraint_Unknown && + "Failed to convert memory constraint code to constraint id."); + // Add information to the INLINEASM node to know about this output. unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); - AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags, - TLI.getPointerTy())); + OpFlags = InlineAsm::getFlagWordForMem(OpFlags, ConstraintID); + AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags, getCurSDLoc(), + MVT::i32)); AsmNodeOperands.push_back(OpInfo.CallOperand); break; } @@ -6745,7 +6091,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { .AddInlineAsmOperands(OpInfo.isEarlyClobber ? InlineAsm::Kind_RegDefEarlyClobber : InlineAsm::Kind_RegDef, - false, 0, DAG, AsmNodeOperands); + false, 0, getCurSDLoc(), DAG, AsmNodeOperands); break; } case InlineAsm::isInput: { @@ -6800,11 +6146,12 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { return; } } + SDLoc dl = getCurSDLoc(); // Use the produced MatchedRegs object to - MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurSDLoc(), + MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Flag, CS.getInstruction()); MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, - true, OpInfo.getMatchedOperand(), + true, OpInfo.getMatchedOperand(), dl, DAG, AsmNodeOperands); break; } @@ -6814,9 +6161,10 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { "Unexpected number of operands"); // Add information to the INLINEASM node to know about this input. // See InlineAsm.h isUseOperandTiedToDef. + OpFlag = InlineAsm::convertMemFlagWordToMatchingFlagWord(OpFlag); OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag, OpInfo.getMatchedOperand()); - AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag, + AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag, getCurSDLoc(), TLI.getPointerTy())); AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]); break; @@ -6843,6 +6191,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size()); AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, + getCurSDLoc(), TLI.getPointerTy())); AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end()); break; @@ -6853,10 +6202,17 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { assert(InOperandVal.getValueType() == TLI.getPointerTy() && "Memory operands expect pointer values"); + unsigned ConstraintID = + TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode); + assert(ConstraintID != InlineAsm::Constraint_Unknown && + "Failed to convert memory constraint code to constraint id."); + // Add information to the INLINEASM node to know about this input. unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); + ResOpType = InlineAsm::getFlagWordForMem(ResOpType, ConstraintID); AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, - TLI.getPointerTy())); + getCurSDLoc(), + MVT::i32)); AsmNodeOperands.push_back(InOperandVal); break; } @@ -6884,11 +6240,13 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { return; } - OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurSDLoc(), + SDLoc dl = getCurSDLoc(); + + OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Flag, CS.getInstruction()); OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0, - DAG, AsmNodeOperands); + dl, DAG, AsmNodeOperands); break; } case InlineAsm::isClobber: { @@ -6896,7 +6254,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // allocator is aware that the physreg got clobbered. if (!OpInfo.AssignedRegs.Regs.empty()) OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_Clobber, - false, 0, DAG, + false, 0, getCurSDLoc(), DAG, AsmNodeOperands); break; } @@ -7020,7 +6378,7 @@ void SelectionDAGBuilder::visitVACopy(const CallInst &I) { std::pair<SDValue, SDValue> SelectionDAGBuilder::lowerCallOperands(ImmutableCallSite CS, unsigned ArgIdx, unsigned NumArgs, SDValue Callee, - bool UseVoidTy, + Type *ReturnTy, MachineBasicBlock *LandingPad, bool IsPatchPoint) { TargetLowering::ArgListTy Args; @@ -7041,10 +6399,9 @@ SelectionDAGBuilder::lowerCallOperands(ImmutableCallSite CS, unsigned ArgIdx, Args.push_back(Entry); } - Type *retTy = UseVoidTy ? Type::getVoidTy(*DAG.getContext()) : CS->getType(); TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot()) - .setCallee(CS.getCallingConv(), retTy, Callee, std::move(Args), NumArgs) + .setCallee(CS.getCallingConv(), ReturnTy, Callee, std::move(Args), NumArgs) .setDiscardResult(CS->use_empty()).setIsPatchPoint(IsPatchPoint); return lowerInvokable(CLI, LandingPad); @@ -7068,15 +6425,15 @@ SelectionDAGBuilder::lowerCallOperands(ImmutableCallSite CS, unsigned ArgIdx, /// only available in a register, then the runtime would need to trap when /// execution reaches the StackMap in order to read the alloca's location. static void addStackMapLiveVars(ImmutableCallSite CS, unsigned StartIdx, - SmallVectorImpl<SDValue> &Ops, + SDLoc DL, SmallVectorImpl<SDValue> &Ops, SelectionDAGBuilder &Builder) { for (unsigned i = StartIdx, e = CS.arg_size(); i != e; ++i) { SDValue OpVal = Builder.getValue(CS.getArgument(i)); if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(OpVal)) { Ops.push_back( - Builder.DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64)); + Builder.DAG.getTargetConstant(StackMaps::ConstantOp, DL, MVT::i64)); Ops.push_back( - Builder.DAG.getTargetConstant(C->getSExtValue(), MVT::i64)); + Builder.DAG.getTargetConstant(C->getSExtValue(), DL, MVT::i64)); } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(OpVal)) { const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo(); Ops.push_back( @@ -7098,7 +6455,7 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { SDLoc DL = getCurSDLoc(); Callee = getValue(CI.getCalledValue()); - NullPtr = DAG.getIntPtrConstant(0, true); + NullPtr = DAG.getIntPtrConstant(0, DL, true); // The stackmap intrinsic only records the live variables (the arguemnts // passed to it) and emits NOPS (if requested). Unlike the patchpoint @@ -7116,13 +6473,14 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { // Add the <id> and <numBytes> constants. SDValue IDVal = getValue(CI.getOperand(PatchPointOpers::IDPos)); Ops.push_back(DAG.getTargetConstant( - cast<ConstantSDNode>(IDVal)->getZExtValue(), MVT::i64)); + cast<ConstantSDNode>(IDVal)->getZExtValue(), DL, MVT::i64)); SDValue NBytesVal = getValue(CI.getOperand(PatchPointOpers::NBytesPos)); Ops.push_back(DAG.getTargetConstant( - cast<ConstantSDNode>(NBytesVal)->getZExtValue(), MVT::i32)); + cast<ConstantSDNode>(NBytesVal)->getZExtValue(), DL, + MVT::i32)); // Push live variables for the stack map. - addStackMapLiveVars(&CI, 2, Ops, *this); + addStackMapLiveVars(&CI, 2, DL, Ops, *this); // We are not pushing any register mask info here on the operands list, // because the stackmap doesn't clobber anything. @@ -7161,7 +6519,17 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, CallingConv::ID CC = CS.getCallingConv(); bool IsAnyRegCC = CC == CallingConv::AnyReg; bool HasDef = !CS->getType()->isVoidTy(); - SDValue Callee = getValue(CS->getOperand(2)); // <target> + SDLoc dl = getCurSDLoc(); + SDValue Callee = getValue(CS->getOperand(PatchPointOpers::TargetPos)); + + // Handle immediate and symbolic callees. + if (auto* ConstCallee = dyn_cast<ConstantSDNode>(Callee)) + Callee = DAG.getIntPtrConstant(ConstCallee->getZExtValue(), dl, + /*isTarget=*/true); + else if (auto* SymbolicCallee = dyn_cast<GlobalAddressSDNode>(Callee)) + Callee = DAG.getTargetGlobalAddress(SymbolicCallee->getGlobal(), + SDLoc(SymbolicCallee), + SymbolicCallee->getValueType(0)); // Get the real number of arguments participating in the call <numArgs> SDValue NArgVal = getValue(CS.getArgument(PatchPointOpers::NArgPos)); @@ -7175,8 +6543,10 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, // For AnyRegCC the arguments are lowered later on manually. unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs; + Type *ReturnTy = + IsAnyRegCC ? Type::getVoidTy(*DAG.getContext()) : CS->getType(); std::pair<SDValue, SDValue> Result = - lowerCallOperands(CS, NumMetaOpers, NumCallArgs, Callee, IsAnyRegCC, + lowerCallOperands(CS, NumMetaOpers, NumCallArgs, Callee, ReturnTy, LandingPad, true); SDNode *CallEnd = Result.second.getNode(); @@ -7196,26 +6566,24 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, // Add the <id> and <numBytes> constants. SDValue IDVal = getValue(CS->getOperand(PatchPointOpers::IDPos)); Ops.push_back(DAG.getTargetConstant( - cast<ConstantSDNode>(IDVal)->getZExtValue(), MVT::i64)); + cast<ConstantSDNode>(IDVal)->getZExtValue(), dl, MVT::i64)); SDValue NBytesVal = getValue(CS->getOperand(PatchPointOpers::NBytesPos)); Ops.push_back(DAG.getTargetConstant( - cast<ConstantSDNode>(NBytesVal)->getZExtValue(), MVT::i32)); + cast<ConstantSDNode>(NBytesVal)->getZExtValue(), dl, + MVT::i32)); - // Assume that the Callee is a constant address. - // FIXME: handle function symbols in the future. - Ops.push_back( - DAG.getIntPtrConstant(cast<ConstantSDNode>(Callee)->getZExtValue(), - /*isTarget=*/true)); + // Add the callee. + Ops.push_back(Callee); // Adjust <numArgs> to account for any arguments that have been passed on the // stack instead. // Call Node: Chain, Target, {Args}, RegMask, [Glue] unsigned NumCallRegArgs = Call->getNumOperands() - (HasGlue ? 4 : 3); NumCallRegArgs = IsAnyRegCC ? NumArgs : NumCallRegArgs; - Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, MVT::i32)); + Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, dl, MVT::i32)); // Add the calling convention - Ops.push_back(DAG.getTargetConstant((unsigned)CC, MVT::i32)); + Ops.push_back(DAG.getTargetConstant((unsigned)CC, dl, MVT::i32)); // Add the arguments we omitted previously. The register allocator should // place these in any free register. @@ -7225,11 +6593,10 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, // Push the arguments from the call instruction up to the register mask. SDNode::op_iterator e = HasGlue ? Call->op_end()-2 : Call->op_end()-1; - for (SDNode::op_iterator i = Call->op_begin()+2; i != e; ++i) - Ops.push_back(*i); + Ops.append(Call->op_begin() + 2, e); // Push live variables for the stack map. - addStackMapLiveVars(CS, NumMetaOpers + NumArgs, Ops, *this); + addStackMapLiveVars(CS, NumMetaOpers + NumArgs, dl, Ops, *this); // Push the register mask info. if (HasGlue) @@ -7262,7 +6629,7 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, // Replace the target specific call node with a PATCHPOINT node. MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHPOINT, - getCurSDLoc(), NodeTys, Ops); + dl, NodeTys, Ops); // Update the NodeMap. if (HasDef) { @@ -7529,7 +6896,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { for (unsigned i = 0; i < NumValues; ++i) { SDValue Add = CLI.DAG.getNode(ISD::ADD, CLI.DL, PtrVT, DemoteStackSlot, - CLI.DAG.getConstant(Offsets[i], PtrVT)); + CLI.DAG.getConstant(Offsets[i], CLI.DL, + PtrVT)); SDValue L = CLI.DAG.getLoad( RetTys[i], CLI.DL, CLI.Chain, Add, MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]), false, @@ -7849,7 +7217,6 @@ void SelectionDAGISel::LowerArguments(const Function &F) { assert(i == InVals.size() && "Argument register count mismatch!"); // Finally, if the target has anything special to do, allow it to do so. - // FIXME: this should insert code into the DAG! EmitFunctionEntryCode(); } @@ -7866,8 +7233,8 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled; - // Check successor nodes' PHI nodes that expect a constant to be available - // from this block. + // Check PHI nodes in successors that expect a value to be available from this + // block. for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) { const BasicBlock *SuccBB = TI->getSuccessor(succ); if (!isa<PHINode>(SuccBB->begin())) continue; @@ -7954,3 +7321,816 @@ AddSuccessorMBB(const BasicBlock *BB, SuccMBB, BranchProbabilityInfo::getBranchWeightStackProtector(IsLikely)); return SuccMBB; } + +MachineBasicBlock *SelectionDAGBuilder::NextBlock(MachineBasicBlock *MBB) { + MachineFunction::iterator I = MBB; + if (++I == FuncInfo.MF->end()) + return nullptr; + return I; +} + +/// During lowering new call nodes can be created (such as memset, etc.). +/// Those will become new roots of the current DAG, but complications arise +/// when they are tail calls. In such cases, the call lowering will update +/// the root, but the builder still needs to know that a tail call has been +/// lowered in order to avoid generating an additional return. +void SelectionDAGBuilder::updateDAGForMaybeTailCall(SDValue MaybeTC) { + // If the node is null, we do have a tail call. + if (MaybeTC.getNode() != nullptr) + DAG.setRoot(MaybeTC); + else + HasTailCall = true; +} + +bool SelectionDAGBuilder::isDense(const CaseClusterVector &Clusters, + unsigned *TotalCases, unsigned First, + unsigned Last) { + assert(Last >= First); + assert(TotalCases[Last] >= TotalCases[First]); + + APInt LowCase = Clusters[First].Low->getValue(); + APInt HighCase = Clusters[Last].High->getValue(); + assert(LowCase.getBitWidth() == HighCase.getBitWidth()); + + // FIXME: A range of consecutive cases has 100% density, but only requires one + // comparison to lower. We should discriminate against such consecutive ranges + // in jump tables. + + uint64_t Diff = (HighCase - LowCase).getLimitedValue((UINT64_MAX - 1) / 100); + uint64_t Range = Diff + 1; + + uint64_t NumCases = + TotalCases[Last] - (First == 0 ? 0 : TotalCases[First - 1]); + + assert(NumCases < UINT64_MAX / 100); + assert(Range >= NumCases); + + return NumCases * 100 >= Range * MinJumpTableDensity; +} + +static inline bool areJTsAllowed(const TargetLowering &TLI) { + return TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || + TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other); +} + +bool SelectionDAGBuilder::buildJumpTable(CaseClusterVector &Clusters, + unsigned First, unsigned Last, + const SwitchInst *SI, + MachineBasicBlock *DefaultMBB, + CaseCluster &JTCluster) { + assert(First <= Last); + + uint32_t Weight = 0; + unsigned NumCmps = 0; + std::vector<MachineBasicBlock*> Table; + DenseMap<MachineBasicBlock*, uint32_t> JTWeights; + for (unsigned I = First; I <= Last; ++I) { + assert(Clusters[I].Kind == CC_Range); + Weight += Clusters[I].Weight; + assert(Weight >= Clusters[I].Weight && "Weight overflow!"); + APInt Low = Clusters[I].Low->getValue(); + APInt High = Clusters[I].High->getValue(); + NumCmps += (Low == High) ? 1 : 2; + if (I != First) { + // Fill the gap between this and the previous cluster. + APInt PreviousHigh = Clusters[I - 1].High->getValue(); + assert(PreviousHigh.slt(Low)); + uint64_t Gap = (Low - PreviousHigh).getLimitedValue() - 1; + for (uint64_t J = 0; J < Gap; J++) + Table.push_back(DefaultMBB); + } + uint64_t ClusterSize = (High - Low).getLimitedValue() + 1; + for (uint64_t J = 0; J < ClusterSize; ++J) + Table.push_back(Clusters[I].MBB); + JTWeights[Clusters[I].MBB] += Clusters[I].Weight; + } + + unsigned NumDests = JTWeights.size(); + if (isSuitableForBitTests(NumDests, NumCmps, + Clusters[First].Low->getValue(), + Clusters[Last].High->getValue())) { + // Clusters[First..Last] should be lowered as bit tests instead. + return false; + } + + // Create the MBB that will load from and jump through the table. + // Note: We create it here, but it's not inserted into the function yet. + MachineFunction *CurMF = FuncInfo.MF; + MachineBasicBlock *JumpTableMBB = + CurMF->CreateMachineBasicBlock(SI->getParent()); + + // Add successors. Note: use table order for determinism. + SmallPtrSet<MachineBasicBlock *, 8> Done; + for (MachineBasicBlock *Succ : Table) { + if (Done.count(Succ)) + continue; + addSuccessorWithWeight(JumpTableMBB, Succ, JTWeights[Succ]); + Done.insert(Succ); + } + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + unsigned JTI = CurMF->getOrCreateJumpTableInfo(TLI.getJumpTableEncoding()) + ->createJumpTableIndex(Table); + + // Set up the jump table info. + JumpTable JT(-1U, JTI, JumpTableMBB, nullptr); + JumpTableHeader JTH(Clusters[First].Low->getValue(), + Clusters[Last].High->getValue(), SI->getCondition(), + nullptr, false); + JTCases.push_back(JumpTableBlock(JTH, JT)); + + JTCluster = CaseCluster::jumpTable(Clusters[First].Low, Clusters[Last].High, + JTCases.size() - 1, Weight); + return true; +} + +void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters, + const SwitchInst *SI, + MachineBasicBlock *DefaultMBB) { +#ifndef NDEBUG + // Clusters must be non-empty, sorted, and only contain Range clusters. + assert(!Clusters.empty()); + for (CaseCluster &C : Clusters) + assert(C.Kind == CC_Range); + for (unsigned i = 1, e = Clusters.size(); i < e; ++i) + assert(Clusters[i - 1].High->getValue().slt(Clusters[i].Low->getValue())); +#endif + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (!areJTsAllowed(TLI)) + return; + + const int64_t N = Clusters.size(); + const unsigned MinJumpTableSize = TLI.getMinimumJumpTableEntries(); + + // Split Clusters into minimum number of dense partitions. The algorithm uses + // the same idea as Kannan & Proebsting "Correction to 'Producing Good Code + // for the Case Statement'" (1994), but builds the MinPartitions array in + // reverse order to make it easier to reconstruct the partitions in ascending + // order. In the choice between two optimal partitionings, it picks the one + // which yields more jump tables. + + // MinPartitions[i] is the minimum nbr of partitions of Clusters[i..N-1]. + SmallVector<unsigned, 8> MinPartitions(N); + // LastElement[i] is the last element of the partition starting at i. + SmallVector<unsigned, 8> LastElement(N); + // NumTables[i]: nbr of >= MinJumpTableSize partitions from Clusters[i..N-1]. + SmallVector<unsigned, 8> NumTables(N); + // TotalCases[i]: Total nbr of cases in Clusters[0..i]. + SmallVector<unsigned, 8> TotalCases(N); + + for (unsigned i = 0; i < N; ++i) { + APInt Hi = Clusters[i].High->getValue(); + APInt Lo = Clusters[i].Low->getValue(); + TotalCases[i] = (Hi - Lo).getLimitedValue() + 1; + if (i != 0) + TotalCases[i] += TotalCases[i - 1]; + } + + // Base case: There is only one way to partition Clusters[N-1]. + MinPartitions[N - 1] = 1; + LastElement[N - 1] = N - 1; + assert(MinJumpTableSize > 1); + NumTables[N - 1] = 0; + + // Note: loop indexes are signed to avoid underflow. + for (int64_t i = N - 2; i >= 0; i--) { + // Find optimal partitioning of Clusters[i..N-1]. + // Baseline: Put Clusters[i] into a partition on its own. + MinPartitions[i] = MinPartitions[i + 1] + 1; + LastElement[i] = i; + NumTables[i] = NumTables[i + 1]; + + // Search for a solution that results in fewer partitions. + for (int64_t j = N - 1; j > i; j--) { + // Try building a partition from Clusters[i..j]. + if (isDense(Clusters, &TotalCases[0], i, j)) { + unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]); + bool IsTable = j - i + 1 >= MinJumpTableSize; + unsigned Tables = IsTable + (j == N - 1 ? 0 : NumTables[j + 1]); + + // If this j leads to fewer partitions, or same number of partitions + // with more lookup tables, it is a better partitioning. + if (NumPartitions < MinPartitions[i] || + (NumPartitions == MinPartitions[i] && Tables > NumTables[i])) { + MinPartitions[i] = NumPartitions; + LastElement[i] = j; + NumTables[i] = Tables; + } + } + } + } + + // Iterate over the partitions, replacing some with jump tables in-place. + unsigned DstIndex = 0; + for (unsigned First = 0, Last; First < N; First = Last + 1) { + Last = LastElement[First]; + assert(Last >= First); + assert(DstIndex <= First); + unsigned NumClusters = Last - First + 1; + + CaseCluster JTCluster; + if (NumClusters >= MinJumpTableSize && + buildJumpTable(Clusters, First, Last, SI, DefaultMBB, JTCluster)) { + Clusters[DstIndex++] = JTCluster; + } else { + for (unsigned I = First; I <= Last; ++I) + std::memmove(&Clusters[DstIndex++], &Clusters[I], sizeof(Clusters[I])); + } + } + Clusters.resize(DstIndex); +} + +bool SelectionDAGBuilder::rangeFitsInWord(const APInt &Low, const APInt &High) { + // FIXME: Using the pointer type doesn't seem ideal. + uint64_t BW = DAG.getTargetLoweringInfo().getPointerTy().getSizeInBits(); + uint64_t Range = (High - Low).getLimitedValue(UINT64_MAX - 1) + 1; + return Range <= BW; +} + +bool SelectionDAGBuilder::isSuitableForBitTests(unsigned NumDests, + unsigned NumCmps, + const APInt &Low, + const APInt &High) { + // FIXME: I don't think NumCmps is the correct metric: a single case and a + // range of cases both require only one branch to lower. Just looking at the + // number of clusters and destinations should be enough to decide whether to + // build bit tests. + + // To lower a range with bit tests, the range must fit the bitwidth of a + // machine word. + if (!rangeFitsInWord(Low, High)) + return false; + + // Decide whether it's profitable to lower this range with bit tests. Each + // destination requires a bit test and branch, and there is an overall range + // check branch. For a small number of clusters, separate comparisons might be + // cheaper, and for many destinations, splitting the range might be better. + return (NumDests == 1 && NumCmps >= 3) || + (NumDests == 2 && NumCmps >= 5) || + (NumDests == 3 && NumCmps >= 6); +} + +bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters, + unsigned First, unsigned Last, + const SwitchInst *SI, + CaseCluster &BTCluster) { + assert(First <= Last); + if (First == Last) + return false; + + BitVector Dests(FuncInfo.MF->getNumBlockIDs()); + unsigned NumCmps = 0; + for (int64_t I = First; I <= Last; ++I) { + assert(Clusters[I].Kind == CC_Range); + Dests.set(Clusters[I].MBB->getNumber()); + NumCmps += (Clusters[I].Low == Clusters[I].High) ? 1 : 2; + } + unsigned NumDests = Dests.count(); + + APInt Low = Clusters[First].Low->getValue(); + APInt High = Clusters[Last].High->getValue(); + assert(Low.slt(High)); + + if (!isSuitableForBitTests(NumDests, NumCmps, Low, High)) + return false; + + APInt LowBound; + APInt CmpRange; + + const int BitWidth = + DAG.getTargetLoweringInfo().getPointerTy().getSizeInBits(); + assert((High - Low + 1).sle(BitWidth) && "Case range must fit in bit mask!"); + + if (Low.isNonNegative() && High.slt(BitWidth)) { + // Optimize the case where all the case values fit in a + // word without having to subtract minValue. In this case, + // we can optimize away the subtraction. + LowBound = APInt::getNullValue(Low.getBitWidth()); + CmpRange = High; + } else { + LowBound = Low; + CmpRange = High - Low; + } + + CaseBitsVector CBV; + uint32_t TotalWeight = 0; + for (unsigned i = First; i <= Last; ++i) { + // Find the CaseBits for this destination. + unsigned j; + for (j = 0; j < CBV.size(); ++j) + if (CBV[j].BB == Clusters[i].MBB) + break; + if (j == CBV.size()) + CBV.push_back(CaseBits(0, Clusters[i].MBB, 0, 0)); + CaseBits *CB = &CBV[j]; + + // Update Mask, Bits and ExtraWeight. + uint64_t Lo = (Clusters[i].Low->getValue() - LowBound).getZExtValue(); + uint64_t Hi = (Clusters[i].High->getValue() - LowBound).getZExtValue(); + for (uint64_t j = Lo; j <= Hi; ++j) { + CB->Mask |= 1ULL << j; + CB->Bits++; + } + CB->ExtraWeight += Clusters[i].Weight; + TotalWeight += Clusters[i].Weight; + assert(TotalWeight >= Clusters[i].Weight && "Weight overflow!"); + } + + BitTestInfo BTI; + std::sort(CBV.begin(), CBV.end(), [](const CaseBits &a, const CaseBits &b) { + // Sort by weight first, number of bits second. + if (a.ExtraWeight != b.ExtraWeight) + return a.ExtraWeight > b.ExtraWeight; + return a.Bits > b.Bits; + }); + + for (auto &CB : CBV) { + MachineBasicBlock *BitTestBB = + FuncInfo.MF->CreateMachineBasicBlock(SI->getParent()); + BTI.push_back(BitTestCase(CB.Mask, BitTestBB, CB.BB, CB.ExtraWeight)); + } + BitTestCases.push_back(BitTestBlock(LowBound, CmpRange, SI->getCondition(), + -1U, MVT::Other, false, nullptr, + nullptr, std::move(BTI))); + + BTCluster = CaseCluster::bitTests(Clusters[First].Low, Clusters[Last].High, + BitTestCases.size() - 1, TotalWeight); + return true; +} + +void SelectionDAGBuilder::findBitTestClusters(CaseClusterVector &Clusters, + const SwitchInst *SI) { +// Partition Clusters into as few subsets as possible, where each subset has a +// range that fits in a machine word and has <= 3 unique destinations. + +#ifndef NDEBUG + // Clusters must be sorted and contain Range or JumpTable clusters. + assert(!Clusters.empty()); + assert(Clusters[0].Kind == CC_Range || Clusters[0].Kind == CC_JumpTable); + for (const CaseCluster &C : Clusters) + assert(C.Kind == CC_Range || C.Kind == CC_JumpTable); + for (unsigned i = 1; i < Clusters.size(); ++i) + assert(Clusters[i-1].High->getValue().slt(Clusters[i].Low->getValue())); +#endif + + // If target does not have legal shift left, do not emit bit tests at all. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT PTy = TLI.getPointerTy(); + if (!TLI.isOperationLegal(ISD::SHL, PTy)) + return; + + int BitWidth = PTy.getSizeInBits(); + const int64_t N = Clusters.size(); + + // MinPartitions[i] is the minimum nbr of partitions of Clusters[i..N-1]. + SmallVector<unsigned, 8> MinPartitions(N); + // LastElement[i] is the last element of the partition starting at i. + SmallVector<unsigned, 8> LastElement(N); + + // FIXME: This might not be the best algorithm for finding bit test clusters. + + // Base case: There is only one way to partition Clusters[N-1]. + MinPartitions[N - 1] = 1; + LastElement[N - 1] = N - 1; + + // Note: loop indexes are signed to avoid underflow. + for (int64_t i = N - 2; i >= 0; --i) { + // Find optimal partitioning of Clusters[i..N-1]. + // Baseline: Put Clusters[i] into a partition on its own. + MinPartitions[i] = MinPartitions[i + 1] + 1; + LastElement[i] = i; + + // Search for a solution that results in fewer partitions. + // Note: the search is limited by BitWidth, reducing time complexity. + for (int64_t j = std::min(N - 1, i + BitWidth - 1); j > i; --j) { + // Try building a partition from Clusters[i..j]. + + // Check the range. + if (!rangeFitsInWord(Clusters[i].Low->getValue(), + Clusters[j].High->getValue())) + continue; + + // Check nbr of destinations and cluster types. + // FIXME: This works, but doesn't seem very efficient. + bool RangesOnly = true; + BitVector Dests(FuncInfo.MF->getNumBlockIDs()); + for (int64_t k = i; k <= j; k++) { + if (Clusters[k].Kind != CC_Range) { + RangesOnly = false; + break; + } + Dests.set(Clusters[k].MBB->getNumber()); + } + if (!RangesOnly || Dests.count() > 3) + break; + + // Check if it's a better partition. + unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]); + if (NumPartitions < MinPartitions[i]) { + // Found a better partition. + MinPartitions[i] = NumPartitions; + LastElement[i] = j; + } + } + } + + // Iterate over the partitions, replacing with bit-test clusters in-place. + unsigned DstIndex = 0; + for (unsigned First = 0, Last; First < N; First = Last + 1) { + Last = LastElement[First]; + assert(First <= Last); + assert(DstIndex <= First); + + CaseCluster BitTestCluster; + if (buildBitTests(Clusters, First, Last, SI, BitTestCluster)) { + Clusters[DstIndex++] = BitTestCluster; + } else { + for (unsigned I = First; I <= Last; ++I) + std::memmove(&Clusters[DstIndex++], &Clusters[I], sizeof(Clusters[I])); + } + } + Clusters.resize(DstIndex); +} + +void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, + MachineBasicBlock *SwitchMBB, + MachineBasicBlock *DefaultMBB) { + MachineFunction *CurMF = FuncInfo.MF; + MachineBasicBlock *NextMBB = nullptr; + MachineFunction::iterator BBI = W.MBB; + if (++BBI != FuncInfo.MF->end()) + NextMBB = BBI; + + unsigned Size = W.LastCluster - W.FirstCluster + 1; + + BranchProbabilityInfo *BPI = FuncInfo.BPI; + + if (Size == 2 && W.MBB == SwitchMBB) { + // If any two of the cases has the same destination, and if one value + // is the same as the other, but has one bit unset that the other has set, + // use bit manipulation to do two compares at once. For example: + // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)" + // TODO: This could be extended to merge any 2 cases in switches with 3 + // cases. + // TODO: Handle cases where W.CaseBB != SwitchBB. + CaseCluster &Small = *W.FirstCluster; + CaseCluster &Big = *W.LastCluster; + + if (Small.Low == Small.High && Big.Low == Big.High && + Small.MBB == Big.MBB) { + const APInt &SmallValue = Small.Low->getValue(); + const APInt &BigValue = Big.Low->getValue(); + + // Check that there is only one bit different. + if (BigValue.countPopulation() == SmallValue.countPopulation() + 1 && + (SmallValue | BigValue) == BigValue) { + // Isolate the common bit. + APInt CommonBit = BigValue & ~SmallValue; + assert((SmallValue | CommonBit) == BigValue && + CommonBit.countPopulation() == 1 && "Not a common bit?"); + + SDValue CondLHS = getValue(Cond); + EVT VT = CondLHS.getValueType(); + SDLoc DL = getCurSDLoc(); + + SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS, + DAG.getConstant(CommonBit, DL, VT)); + SDValue Cond = DAG.getSetCC(DL, MVT::i1, Or, + DAG.getConstant(BigValue, DL, VT), + ISD::SETEQ); + + // Update successor info. + // Both Small and Big will jump to Small.BB, so we sum up the weights. + addSuccessorWithWeight(SwitchMBB, Small.MBB, Small.Weight + Big.Weight); + addSuccessorWithWeight( + SwitchMBB, DefaultMBB, + // The default destination is the first successor in IR. + BPI ? BPI->getEdgeWeight(SwitchMBB->getBasicBlock(), (unsigned)0) + : 0); + + // Insert the true branch. + SDValue BrCond = + DAG.getNode(ISD::BRCOND, DL, MVT::Other, getControlRoot(), Cond, + DAG.getBasicBlock(Small.MBB)); + // Insert the false branch. + BrCond = DAG.getNode(ISD::BR, DL, MVT::Other, BrCond, + DAG.getBasicBlock(DefaultMBB)); + + DAG.setRoot(BrCond); + return; + } + } + } + + if (TM.getOptLevel() != CodeGenOpt::None) { + // Order cases by weight so the most likely case will be checked first. + std::sort(W.FirstCluster, W.LastCluster + 1, + [](const CaseCluster &a, const CaseCluster &b) { + return a.Weight > b.Weight; + }); + + // Rearrange the case blocks so that the last one falls through if possible + // without without changing the order of weights. + for (CaseClusterIt I = W.LastCluster; I > W.FirstCluster; ) { + --I; + if (I->Weight > W.LastCluster->Weight) + break; + if (I->Kind == CC_Range && I->MBB == NextMBB) { + std::swap(*I, *W.LastCluster); + break; + } + } + } + + // Compute total weight. + uint32_t UnhandledWeights = 0; + for (CaseClusterIt I = W.FirstCluster; I <= W.LastCluster; ++I) { + UnhandledWeights += I->Weight; + assert(UnhandledWeights >= I->Weight && "Weight overflow!"); + } + + MachineBasicBlock *CurMBB = W.MBB; + for (CaseClusterIt I = W.FirstCluster, E = W.LastCluster; I <= E; ++I) { + MachineBasicBlock *Fallthrough; + if (I == W.LastCluster) { + // For the last cluster, fall through to the default destination. + Fallthrough = DefaultMBB; + } else { + Fallthrough = CurMF->CreateMachineBasicBlock(CurMBB->getBasicBlock()); + CurMF->insert(BBI, Fallthrough); + // Put Cond in a virtual register to make it available from the new blocks. + ExportFromCurrentBlock(Cond); + } + + switch (I->Kind) { + case CC_JumpTable: { + // FIXME: Optimize away range check based on pivot comparisons. + JumpTableHeader *JTH = &JTCases[I->JTCasesIndex].first; + JumpTable *JT = &JTCases[I->JTCasesIndex].second; + + // The jump block hasn't been inserted yet; insert it here. + MachineBasicBlock *JumpMBB = JT->MBB; + CurMF->insert(BBI, JumpMBB); + addSuccessorWithWeight(CurMBB, Fallthrough); + addSuccessorWithWeight(CurMBB, JumpMBB); + + // The jump table header will be inserted in our current block, do the + // range check, and fall through to our fallthrough block. + JTH->HeaderBB = CurMBB; + JT->Default = Fallthrough; // FIXME: Move Default to JumpTableHeader. + + // If we're in the right place, emit the jump table header right now. + if (CurMBB == SwitchMBB) { + visitJumpTableHeader(*JT, *JTH, SwitchMBB); + JTH->Emitted = true; + } + break; + } + case CC_BitTests: { + // FIXME: Optimize away range check based on pivot comparisons. + BitTestBlock *BTB = &BitTestCases[I->BTCasesIndex]; + + // The bit test blocks haven't been inserted yet; insert them here. + for (BitTestCase &BTC : BTB->Cases) + CurMF->insert(BBI, BTC.ThisBB); + + // Fill in fields of the BitTestBlock. + BTB->Parent = CurMBB; + BTB->Default = Fallthrough; + + // If we're in the right place, emit the bit test header header right now. + if (CurMBB ==SwitchMBB) { + visitBitTestHeader(*BTB, SwitchMBB); + BTB->Emitted = true; + } + break; + } + case CC_Range: { + const Value *RHS, *LHS, *MHS; + ISD::CondCode CC; + if (I->Low == I->High) { + // Check Cond == I->Low. + CC = ISD::SETEQ; + LHS = Cond; + RHS=I->Low; + MHS = nullptr; + } else { + // Check I->Low <= Cond <= I->High. + CC = ISD::SETLE; + LHS = I->Low; + MHS = Cond; + RHS = I->High; + } + + // The false weight is the sum of all unhandled cases. + UnhandledWeights -= I->Weight; + CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB, I->Weight, + UnhandledWeights); + + if (CurMBB == SwitchMBB) + visitSwitchCase(CB, SwitchMBB); + else + SwitchCases.push_back(CB); + + break; + } + } + CurMBB = Fallthrough; + } +} + +void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList, + const SwitchWorkListItem &W, + Value *Cond, + MachineBasicBlock *SwitchMBB) { + assert(W.FirstCluster->Low->getValue().slt(W.LastCluster->Low->getValue()) && + "Clusters not sorted?"); + + assert(W.LastCluster - W.FirstCluster + 1 >= 2 && "Too small to split!"); + + // Balance the tree based on branch weights to create a near-optimal (in terms + // of search time given key frequency) binary search tree. See e.g. Kurt + // Mehlhorn "Nearly Optimal Binary Search Trees" (1975). + CaseClusterIt LastLeft = W.FirstCluster; + CaseClusterIt FirstRight = W.LastCluster; + uint32_t LeftWeight = LastLeft->Weight; + uint32_t RightWeight = FirstRight->Weight; + + // Move LastLeft and FirstRight towards each other from opposite directions to + // find a partitioning of the clusters which balances the weight on both + // sides. If LeftWeight and RightWeight are equal, alternate which side is + // taken to ensure 0-weight nodes are distributed evenly. + unsigned I = 0; + while (LastLeft + 1 < FirstRight) { + if (LeftWeight < RightWeight || (LeftWeight == RightWeight && (I & 1))) + LeftWeight += (++LastLeft)->Weight; + else + RightWeight += (--FirstRight)->Weight; + I++; + } + assert(LastLeft + 1 == FirstRight); + assert(LastLeft >= W.FirstCluster); + assert(FirstRight <= W.LastCluster); + + // Use the first element on the right as pivot since we will make less-than + // comparisons against it. + CaseClusterIt PivotCluster = FirstRight; + assert(PivotCluster > W.FirstCluster); + assert(PivotCluster <= W.LastCluster); + + CaseClusterIt FirstLeft = W.FirstCluster; + CaseClusterIt LastRight = W.LastCluster; + + const ConstantInt *Pivot = PivotCluster->Low; + + // New blocks will be inserted immediately after the current one. + MachineFunction::iterator BBI = W.MBB; + ++BBI; + + // We will branch to the LHS if Value < Pivot. If LHS is a single cluster, + // we can branch to its destination directly if it's squeezed exactly in + // between the known lower bound and Pivot - 1. + MachineBasicBlock *LeftMBB; + if (FirstLeft == LastLeft && FirstLeft->Kind == CC_Range && + FirstLeft->Low == W.GE && + (FirstLeft->High->getValue() + 1LL) == Pivot->getValue()) { + LeftMBB = FirstLeft->MBB; + } else { + LeftMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock()); + FuncInfo.MF->insert(BBI, LeftMBB); + WorkList.push_back({LeftMBB, FirstLeft, LastLeft, W.GE, Pivot}); + // Put Cond in a virtual register to make it available from the new blocks. + ExportFromCurrentBlock(Cond); + } + + // Similarly, we will branch to the RHS if Value >= Pivot. If RHS is a + // single cluster, RHS.Low == Pivot, and we can branch to its destination + // directly if RHS.High equals the current upper bound. + MachineBasicBlock *RightMBB; + if (FirstRight == LastRight && FirstRight->Kind == CC_Range && + W.LT && (FirstRight->High->getValue() + 1ULL) == W.LT->getValue()) { + RightMBB = FirstRight->MBB; + } else { + RightMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock()); + FuncInfo.MF->insert(BBI, RightMBB); + WorkList.push_back({RightMBB, FirstRight, LastRight, Pivot, W.LT}); + // Put Cond in a virtual register to make it available from the new blocks. + ExportFromCurrentBlock(Cond); + } + + // Create the CaseBlock record that will be used to lower the branch. + CaseBlock CB(ISD::SETLT, Cond, Pivot, nullptr, LeftMBB, RightMBB, W.MBB, + LeftWeight, RightWeight); + + if (W.MBB == SwitchMBB) + visitSwitchCase(CB, SwitchMBB); + else + SwitchCases.push_back(CB); +} + +void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { + // Extract cases from the switch. + BranchProbabilityInfo *BPI = FuncInfo.BPI; + CaseClusterVector Clusters; + Clusters.reserve(SI.getNumCases()); + for (auto I : SI.cases()) { + MachineBasicBlock *Succ = FuncInfo.MBBMap[I.getCaseSuccessor()]; + const ConstantInt *CaseVal = I.getCaseValue(); + uint32_t Weight = + BPI ? BPI->getEdgeWeight(SI.getParent(), I.getSuccessorIndex()) : 0; + Clusters.push_back(CaseCluster::range(CaseVal, CaseVal, Succ, Weight)); + } + + MachineBasicBlock *DefaultMBB = FuncInfo.MBBMap[SI.getDefaultDest()]; + + // Cluster adjacent cases with the same destination. We do this at all + // optimization levels because it's cheap to do and will make codegen faster + // if there are many clusters. + sortAndRangeify(Clusters); + + if (TM.getOptLevel() != CodeGenOpt::None) { + // Replace an unreachable default with the most popular destination. + // FIXME: Exploit unreachable default more aggressively. + bool UnreachableDefault = + isa<UnreachableInst>(SI.getDefaultDest()->getFirstNonPHIOrDbg()); + if (UnreachableDefault && !Clusters.empty()) { + DenseMap<const BasicBlock *, unsigned> Popularity; + unsigned MaxPop = 0; + const BasicBlock *MaxBB = nullptr; + for (auto I : SI.cases()) { + const BasicBlock *BB = I.getCaseSuccessor(); + if (++Popularity[BB] > MaxPop) { + MaxPop = Popularity[BB]; + MaxBB = BB; + } + } + // Set new default. + assert(MaxPop > 0 && MaxBB); + DefaultMBB = FuncInfo.MBBMap[MaxBB]; + + // Remove cases that were pointing to the destination that is now the + // default. + CaseClusterVector New; + New.reserve(Clusters.size()); + for (CaseCluster &CC : Clusters) { + if (CC.MBB != DefaultMBB) + New.push_back(CC); + } + Clusters = std::move(New); + } + } + + // If there is only the default destination, jump there directly. + MachineBasicBlock *SwitchMBB = FuncInfo.MBB; + if (Clusters.empty()) { + SwitchMBB->addSuccessor(DefaultMBB); + if (DefaultMBB != NextBlock(SwitchMBB)) { + DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, + getControlRoot(), DAG.getBasicBlock(DefaultMBB))); + } + return; + } + + if (TM.getOptLevel() != CodeGenOpt::None) { + findJumpTables(Clusters, &SI, DefaultMBB); + findBitTestClusters(Clusters, &SI); + } + + + DEBUG({ + dbgs() << "Case clusters: "; + for (const CaseCluster &C : Clusters) { + if (C.Kind == CC_JumpTable) dbgs() << "JT:"; + if (C.Kind == CC_BitTests) dbgs() << "BT:"; + + C.Low->getValue().print(dbgs(), true); + if (C.Low != C.High) { + dbgs() << '-'; + C.High->getValue().print(dbgs(), true); + } + dbgs() << ' '; + } + dbgs() << '\n'; + }); + + assert(!Clusters.empty()); + SwitchWorkList WorkList; + CaseClusterIt First = Clusters.begin(); + CaseClusterIt Last = Clusters.end() - 1; + WorkList.push_back({SwitchMBB, First, Last, nullptr, nullptr}); + + while (!WorkList.empty()) { + SwitchWorkListItem W = WorkList.back(); + WorkList.pop_back(); + unsigned NumClusters = W.LastCluster - W.FirstCluster + 1; + + if (NumClusters > 3 && TM.getOptLevel() != CodeGenOpt::None) { + // For optimized builds, lower large range as a balanced binary tree. + splitWorkItem(WorkList, W, SI.getCondition(), SwitchMBB); + continue; + } + + lowerWorkItem(W, SI.getCondition(), SwitchMBB, DefaultMBB); + } +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 9070091..f0c03af 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -17,9 +17,11 @@ #include "StatepointLowering.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/IR/CallSite.h" +#include "llvm/IR/Statepoint.h" #include "llvm/IR/Constants.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetLowering.h" @@ -133,26 +135,65 @@ private: /// SDNodes we create. unsigned SDNodeOrder; - /// Case - A struct to record the Value for a switch case, and the - /// case's target basic block. - struct Case { - const Constant *Low; - const Constant *High; - MachineBasicBlock* BB; - uint32_t ExtraWeight; + enum CaseClusterKind { + /// A cluster of adjacent case labels with the same destination, or just one + /// case. + CC_Range, + /// A cluster of cases suitable for jump table lowering. + CC_JumpTable, + /// A cluster of cases suitable for bit test lowering. + CC_BitTests + }; - Case() : Low(nullptr), High(nullptr), BB(nullptr), ExtraWeight(0) { } - Case(const Constant *low, const Constant *high, MachineBasicBlock *bb, - uint32_t extraweight) : Low(low), High(high), BB(bb), - ExtraWeight(extraweight) { } + /// A cluster of case labels. + struct CaseCluster { + CaseClusterKind Kind; + const ConstantInt *Low, *High; + union { + MachineBasicBlock *MBB; + unsigned JTCasesIndex; + unsigned BTCasesIndex; + }; + uint32_t Weight; + + static CaseCluster range(const ConstantInt *Low, const ConstantInt *High, + MachineBasicBlock *MBB, uint32_t Weight) { + CaseCluster C; + C.Kind = CC_Range; + C.Low = Low; + C.High = High; + C.MBB = MBB; + C.Weight = Weight; + return C; + } - APInt size() const { - const APInt &rHigh = cast<ConstantInt>(High)->getValue(); - const APInt &rLow = cast<ConstantInt>(Low)->getValue(); - return (rHigh - rLow + 1ULL); + static CaseCluster jumpTable(const ConstantInt *Low, + const ConstantInt *High, unsigned JTCasesIndex, + uint32_t Weight) { + CaseCluster C; + C.Kind = CC_JumpTable; + C.Low = Low; + C.High = High; + C.JTCasesIndex = JTCasesIndex; + C.Weight = Weight; + return C; + } + + static CaseCluster bitTests(const ConstantInt *Low, const ConstantInt *High, + unsigned BTCasesIndex, uint32_t Weight) { + CaseCluster C; + C.Kind = CC_BitTests; + C.Low = Low; + C.High = High; + C.BTCasesIndex = BTCasesIndex; + C.Weight = Weight; + return C; } }; + typedef std::vector<CaseCluster> CaseClusterVector; + typedef CaseClusterVector::iterator CaseClusterIt; + struct CaseBits { uint64_t Mask; MachineBasicBlock* BB; @@ -162,51 +203,14 @@ private: CaseBits(uint64_t mask, MachineBasicBlock* bb, unsigned bits, uint32_t Weight): Mask(mask), BB(bb), Bits(bits), ExtraWeight(Weight) { } - }; - - typedef std::vector<Case> CaseVector; - typedef std::vector<CaseBits> CaseBitsVector; - typedef CaseVector::iterator CaseItr; - typedef std::pair<CaseItr, CaseItr> CaseRange; - - /// CaseRec - A struct with ctor used in lowering switches to a binary tree - /// of conditional branches. - struct CaseRec { - CaseRec(MachineBasicBlock *bb, const Constant *lt, const Constant *ge, - CaseRange r) : - CaseBB(bb), LT(lt), GE(ge), Range(r) {} - - /// CaseBB - The MBB in which to emit the compare and branch - MachineBasicBlock *CaseBB; - /// LT, GE - If nonzero, we know the current case value must be less-than or - /// greater-than-or-equal-to these Constants. - const Constant *LT; - const Constant *GE; - /// Range - A pair of iterators representing the range of case values to be - /// processed at this point in the binary search tree. - CaseRange Range; - }; - typedef std::vector<CaseRec> CaseRecVector; - - /// The comparison function for sorting the switch case values in the vector. - /// WARNING: Case ranges should be disjoint! - struct CaseCmp { - bool operator()(const Case &C1, const Case &C2) { - assert(isa<ConstantInt>(C1.Low) && isa<ConstantInt>(C2.High)); - const ConstantInt* CI1 = cast<const ConstantInt>(C1.Low); - const ConstantInt* CI2 = cast<const ConstantInt>(C2.High); - return CI1->getValue().slt(CI2->getValue()); - } + CaseBits() : Mask(0), BB(nullptr), Bits(0), ExtraWeight(0) {} }; - struct CaseBitsCmp { - bool operator()(const CaseBits &C1, const CaseBits &C2) { - return C1.Bits > C2.Bits; - } - }; + typedef std::vector<CaseBits> CaseBitsVector; - void Clusterify(CaseVector &Cases, const SwitchInst &SI); + /// Sort Clusters and merge adjacent cases. + void sortAndRangeify(CaseClusterVector &Clusters); /// CaseBlock - This structure is used to communicate between /// SelectionDAGBuilder and SDISel for the code generation of additional basic @@ -296,6 +300,58 @@ private: BitTestInfo Cases; }; + /// Minimum jump table density, in percent. + enum { MinJumpTableDensity = 40 }; + + /// Check whether a range of clusters is dense enough for a jump table. + bool isDense(const CaseClusterVector &Clusters, unsigned *TotalCases, + unsigned First, unsigned Last); + + /// Build a jump table cluster from Clusters[First..Last]. Returns false if it + /// decides it's not a good idea. + bool buildJumpTable(CaseClusterVector &Clusters, unsigned First, + unsigned Last, const SwitchInst *SI, + MachineBasicBlock *DefaultMBB, CaseCluster &JTCluster); + + /// Find clusters of cases suitable for jump table lowering. + void findJumpTables(CaseClusterVector &Clusters, const SwitchInst *SI, + MachineBasicBlock *DefaultMBB); + + /// Check whether the range [Low,High] fits in a machine word. + bool rangeFitsInWord(const APInt &Low, const APInt &High); + + /// Check whether these clusters are suitable for lowering with bit tests based + /// on the number of destinations, comparison metric, and range. + bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps, + const APInt &Low, const APInt &High); + + /// Build a bit test cluster from Clusters[First..Last]. Returns false if it + /// decides it's not a good idea. + bool buildBitTests(CaseClusterVector &Clusters, unsigned First, unsigned Last, + const SwitchInst *SI, CaseCluster &BTCluster); + + /// Find clusters of cases suitable for bit test lowering. + void findBitTestClusters(CaseClusterVector &Clusters, const SwitchInst *SI); + + struct SwitchWorkListItem { + MachineBasicBlock *MBB; + CaseClusterIt FirstCluster; + CaseClusterIt LastCluster; + const ConstantInt *GE; + const ConstantInt *LT; + }; + typedef SmallVector<SwitchWorkListItem, 4> SwitchWorkList; + + /// Emit comparison and split W into two subtrees. + void splitWorkItem(SwitchWorkList &WorkList, const SwitchWorkListItem &W, + Value *Cond, MachineBasicBlock *SwitchMBB); + + /// Lower W. + void lowerWorkItem(SwitchWorkListItem W, Value *Cond, + MachineBasicBlock *SwitchMBB, + MachineBasicBlock *DefaultMBB); + + /// A class which encapsulates all of the information needed to generate a /// stack protector check and signals to isel via its state being initialized /// that a stack protector needs to be generated. @@ -405,7 +461,6 @@ private: StackProtectorDescriptor() : ParentMBB(nullptr), SuccessMBB(nullptr), FailureMBB(nullptr), Guard(nullptr), GuardReg(0) { } - ~StackProtectorDescriptor() { } /// Returns true if all fields of the stack protector descriptor are /// initialized implying that we should/are ready to emit a stack protector. @@ -605,10 +660,16 @@ public: void visit(unsigned Opcode, const User &I); + /// getCopyFromRegs - If there was virtual register allocated for the value V + /// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise. + SDValue getCopyFromRegs(const Value *V, Type *Ty); + // resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V, // generate the debug data structures now that we've seen its definition. void resolveDanglingDebugInfo(const Value *V, SDValue Val); SDValue getValue(const Value *V); + bool findValue(const Value *V) const; + SDValue getNonRegisterValue(const Value *V); SDValue getValueImpl(const Value *V); @@ -618,13 +679,6 @@ public: N = NewN; } - void removeValue(const Value *V) { - // This is to support hack in lowerCallFromStatepoint - // Should be removed when hack is resolved - if (NodeMap.count(V)) - NodeMap.erase(V); - } - void setUnusedArgValue(const Value *V, SDValue NewN) { SDValue &N = UnusedArgNodeMap[V]; assert(!N.getNode() && "Already set a value for this node!"); @@ -652,7 +706,7 @@ public: unsigned ArgIdx, unsigned NumArgs, SDValue Callee, - bool UseVoidTy = false, + Type *ReturnTy, MachineBasicBlock *LandingPad = nullptr, bool IsPatchPoint = false); @@ -660,6 +714,10 @@ public: /// references that need to refer to the last resulting block. void UpdateSplitBlock(MachineBasicBlock *First, MachineBasicBlock *Last); + // This function is responsible for the whole statepoint lowering process. + // It uniformly handles invoke and call statepoints. + void LowerStatepoint(ImmutableStatepoint Statepoint, + MachineBasicBlock *LandingPad = nullptr); private: std::pair<SDValue, SDValue> lowerInvokable( TargetLowering::CallLoweringInfo &CLI, @@ -672,27 +730,6 @@ private: void visitIndirectBr(const IndirectBrInst &I); void visitUnreachable(const UnreachableInst &I); - // Helpers for visitSwitch - bool handleSmallSwitchRange(CaseRec& CR, - CaseRecVector& WorkList, - const Value* SV, - MachineBasicBlock* Default, - MachineBasicBlock *SwitchBB); - bool handleJTSwitchCase(CaseRec& CR, - CaseRecVector& WorkList, - const Value* SV, - MachineBasicBlock* Default, - MachineBasicBlock *SwitchBB); - bool handleBTSplitSwitchCase(CaseRec& CR, - CaseRecVector& WorkList, - const Value* SV, - MachineBasicBlock *SwitchBB); - bool handleBitTestsSwitchCase(CaseRec& CR, - CaseRecVector& WorkList, - const Value* SV, - MachineBasicBlock* Default, - MachineBasicBlock *SwitchBB); - uint32_t getEdgeWeight(const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const; void addSuccessorWithWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst, @@ -713,6 +750,8 @@ public: void visitJumpTable(JumpTable &JT); void visitJumpTableHeader(JumpTable &JT, JumpTableHeader &JTH, MachineBasicBlock *SwitchBB); + unsigned visitLandingPadClauseBB(GlobalValue *ClauseGV, + MachineBasicBlock *LPadMBB); private: // These all get lowered before this pass. @@ -772,6 +811,8 @@ private: void visitStore(const StoreInst &I); void visitMaskedLoad(const CallInst &I); void visitMaskedStore(const CallInst &I); + void visitMaskedGather(const CallInst &I); + void visitMaskedScatter(const CallInst &I); void visitAtomicCmpXchg(const AtomicCmpXchgInst &I); void visitAtomicRMW(const AtomicRMWInst &I); void visitFence(const FenceInst &I); @@ -820,9 +861,91 @@ private: /// EmitFuncArgumentDbgValue - If V is an function argument then create /// corresponding DBG_VALUE machine instruction for it now. At the end of /// instruction selection, they will be inserted to the entry BB. - bool EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, MDNode *Expr, + bool EmitFuncArgumentDbgValue(const Value *V, DILocalVariable *Variable, + DIExpression *Expr, DILocation *DL, int64_t Offset, bool IsIndirect, const SDValue &N); + + /// Return the next block after MBB, or nullptr if there is none. + MachineBasicBlock *NextBlock(MachineBasicBlock *MBB); + + /// Update the DAG and DAG builder with the relevant information after + /// a new root node has been created which could be a tail call. + void updateDAGForMaybeTailCall(SDValue MaybeTC); +}; + +/// RegsForValue - This struct represents the registers (physical or virtual) +/// that a particular set of values is assigned, and the type information about +/// the value. The most common situation is to represent one value at a time, +/// but struct or array values are handled element-wise as multiple values. The +/// splitting of aggregates is performed recursively, so that we never have +/// aggregate-typed registers. The values at this point do not necessarily have +/// legal types, so each value may require one or more registers of some legal +/// type. +/// +struct RegsForValue { + /// ValueVTs - The value types of the values, which may not be legal, and + /// may need be promoted or synthesized from one or more registers. + /// + SmallVector<EVT, 4> ValueVTs; + + /// RegVTs - The value types of the registers. This is the same size as + /// ValueVTs and it records, for each value, what the type of the assigned + /// register or registers are. (Individual values are never synthesized + /// from more than one type of register.) + /// + /// With virtual registers, the contents of RegVTs is redundant with TLI's + /// getRegisterType member function, however when with physical registers + /// it is necessary to have a separate record of the types. + /// + SmallVector<MVT, 4> RegVTs; + + /// Regs - This list holds the registers assigned to the values. + /// Each legal or promoted value requires one register, and each + /// expanded value requires multiple registers. + /// + SmallVector<unsigned, 4> Regs; + + RegsForValue(); + + RegsForValue(const SmallVector<unsigned, 4> ®s, MVT regvt, EVT valuevt); + + RegsForValue(LLVMContext &Context, const TargetLowering &tli, unsigned Reg, + Type *Ty); + + /// append - Add the specified values to this one. + void append(const RegsForValue &RHS) { + ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end()); + RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end()); + Regs.append(RHS.Regs.begin(), RHS.Regs.end()); + } + + /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from + /// this value and returns the result as a ValueVTs value. This uses + /// Chain/Flag as the input and updates them for the output Chain/Flag. + /// If the Flag pointer is NULL, no flag is used. + SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo, + SDLoc dl, + SDValue &Chain, SDValue *Flag, + const Value *V = nullptr) const; + + /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the specified + /// value into the registers specified by this object. This uses Chain/Flag + /// as the input and updates them for the output Chain/Flag. If the Flag + /// pointer is nullptr, no flag is used. If V is not nullptr, then it is used + /// in printing better diagnostic messages on error. + void + getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, SDValue &Chain, + SDValue *Flag, const Value *V = nullptr, + ISD::NodeType PreferredExtendType = ISD::ANY_EXTEND) const; + + /// AddInlineAsmOperands - Add this value to the specified inlineasm node + /// operand list. This adds the code marker, matching input operand index + /// (if applicable), and includes the number of values added into it. + void AddInlineAsmOperands(unsigned Kind, + bool HasMatching, unsigned MatchingIdx, SDLoc dl, + SelectionDAG &DAG, + std::vector<SDValue> &Ops) const; }; } // end namespace llvm diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index e8577d8..96ee899 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -95,6 +95,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE"; case ISD::RETURNADDR: return "RETURNADDR"; case ISD::FRAMEADDR: return "FRAMEADDR"; + case ISD::FRAME_ALLOC_RECOVER: return "FRAME_ALLOC_RECOVER"; case ISD::READ_REGISTER: return "READ_REGISTER"; case ISD::WRITE_REGISTER: return "WRITE_REGISTER"; case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET"; @@ -187,10 +188,15 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FMUL: return "fmul"; case ISD::FDIV: return "fdiv"; case ISD::FMA: return "fma"; + case ISD::FMAD: return "fmad"; case ISD::FREM: return "frem"; case ISD::FCOPYSIGN: return "fcopysign"; case ISD::FGETSIGN: return "fgetsign"; case ISD::FPOW: return "fpow"; + case ISD::SMIN: return "smin"; + case ISD::SMAX: return "smax"; + case ISD::UMIN: return "umin"; + case ISD::UMAX: return "umax"; case ISD::FPOWI: return "fpowi"; case ISD::SETCC: return "setcc"; @@ -271,6 +277,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::STORE: return "store"; case ISD::MLOAD: return "masked_load"; case ISD::MSTORE: return "masked_store"; + case ISD::MGATHER: return "masked_gather"; + case ISD::MSCATTER: return "masked_scatter"; case ISD::VAARG: return "vaarg"; case ISD::VACOPY: return "vacopy"; case ISD::VAEND: return "vaend"; @@ -284,6 +292,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::DEBUGTRAP: return "debugtrap"; case ISD::LIFETIME_START: return "lifetime.start"; case ISD::LIFETIME_END: return "lifetime.end"; + case ISD::GC_TRANSITION_START: return "gc_transition.start"; + case ISD::GC_TRANSITION_END: return "gc_transition.end"; // Bit manipulation case ISD::BSWAP: return "bswap"; @@ -518,22 +528,20 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { if (getNodeId() != -1) OS << " [ID=" << getNodeId() << ']'; - DebugLoc dl = getDebugLoc(); - if (G && !dl.isUnknown()) { - DIScope - Scope(dl.getScope(G->getMachineFunction().getFunction()->getContext())); - OS << " dbg:"; - assert((!Scope || Scope.isScope()) && - "Scope of a DebugLoc should be null or a DIScope."); - // Omit the directory, since it's usually long and uninteresting. - if (Scope) - OS << Scope.getFilename(); - else - OS << "<unknown>"; - OS << ':' << dl.getLine(); - if (dl.getCol() != 0) - OS << ':' << dl.getCol(); - } + if (!G) + return; + + DILocation *L = getDebugLoc(); + if (!L) + return; + + if (auto *Scope = L->getScope()) + OS << Scope->getFilename(); + else + OS << "<unknown>"; + OS << ':' << L->getLine(); + if (unsigned C = L->getColumn()) + OS << ':' << C; } static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) { diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index ef54525..22f592a 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -11,7 +11,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/CodeGen/GCStrategy.h" #include "ScheduleDAGSDNodes.h" #include "SelectionDAGBuilder.h" #include "llvm/ADT/PostOrderIterator.h" @@ -19,10 +19,11 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GCMetadata.h" -#include "llvm/CodeGen/GCStrategy.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -31,6 +32,8 @@ #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/Function.h" @@ -40,6 +43,7 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -47,7 +51,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" -#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" @@ -166,14 +169,13 @@ static cl::opt<bool> EnableFastISelVerbose("fast-isel-verbose", cl::Hidden, cl::desc("Enable verbose messages in the \"fast\" " "instruction selector")); -static cl::opt<bool> -EnableFastISelAbort("fast-isel-abort", cl::Hidden, - cl::desc("Enable abort calls when \"fast\" instruction selection " - "fails to lower an instruction")); -static cl::opt<bool> -EnableFastISelAbortArgs("fast-isel-abort-args", cl::Hidden, - cl::desc("Enable abort calls when \"fast\" instruction selection " - "fails to lower a formal argument")); +static cl::opt<int> EnableFastISelAbort( + "fast-isel-abort", cl::Hidden, + cl::desc("Enable abort calls when \"fast\" instruction selection " + "fails to lower an instruction: 0 disable the abort, 1 will " + "abort but for args, calls and terminators, 2 will also " + "abort for argument lowering, and 3 will never fallback " + "to SelectionDAG.")); static cl::opt<bool> UseMBPI("use-mbpi", @@ -291,7 +293,8 @@ namespace llvm { const TargetLowering *TLI = IS->TLI; const TargetSubtargetInfo &ST = IS->MF->getSubtarget(); - if (OptLevel == CodeGenOpt::None || ST.useMachineScheduler() || + if (OptLevel == CodeGenOpt::None || + (ST.enableMachineScheduler() && ST.enableMachineSchedDefaultSched()) || TLI->getSchedulingPreference() == Sched::Source) return createSourceListDAGScheduler(IS, OptLevel); if (TLI->getSchedulingPreference() == Sched::RegPressure) @@ -349,7 +352,8 @@ SelectionDAGISel::SelectionDAGISel(TargetMachine &tm, initializeGCModuleInfoPass(*PassRegistry::getPassRegistry()); initializeAliasAnalysisAnalysisGroup(*PassRegistry::getPassRegistry()); initializeBranchProbabilityInfoPass(*PassRegistry::getPassRegistry()); - initializeTargetLibraryInfoPass(*PassRegistry::getPassRegistry()); + initializeTargetLibraryInfoWrapperPassPass( + *PassRegistry::getPassRegistry()); } SelectionDAGISel::~SelectionDAGISel() { @@ -363,7 +367,7 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<AliasAnalysis>(); AU.addRequired<GCModuleInfo>(); AU.addPreserved<GCModuleInfo>(); - AU.addRequired<TargetLibraryInfo>(); + AU.addRequired<TargetLibraryInfoWrapperPass>(); if (UseMBPI && OptLevel != CodeGenOpt::None) AU.addRequired<BranchProbabilityInfo>(); MachineFunctionPass::getAnalysisUsage(AU); @@ -376,7 +380,7 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { /// /// This is required for correctness, so it must be done at -O0. /// -static void SplitCriticalSideEffectEdges(Function &Fn, Pass *SDISel) { +static void SplitCriticalSideEffectEdges(Function &Fn, AliasAnalysis *AA) { // Loop for blocks with phi nodes. for (Function::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { PHINode *PN = dyn_cast<PHINode>(BB->begin()); @@ -400,8 +404,9 @@ static void SplitCriticalSideEffectEdges(Function &Fn, Pass *SDISel) { continue; // Okay, we have to split this edge. - SplitCriticalEdge(Pred->getTerminator(), - GetSuccessorNumber(Pred, BB), SDISel, true); + SplitCriticalEdge( + Pred->getTerminator(), GetSuccessorNumber(Pred, BB), + CriticalEdgeSplittingOptions(AA).setMergeIdenticalEdges()); goto ReprocessBlock; } } @@ -412,7 +417,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { assert((!EnableFastISelVerbose || TM.Options.EnableFastISel) && "-fast-isel-verbose requires -fast-isel"); assert((!EnableFastISelAbort || TM.Options.EnableFastISel) && - "-fast-isel-abort requires -fast-isel"); + "-fast-isel-abort > 0 requires -fast-isel"); const Function &Fn = *mf.getFunction(); MF = &mf; @@ -433,12 +438,12 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { TLI = MF->getSubtarget().getTargetLowering(); RegInfo = &MF->getRegInfo(); AA = &getAnalysis<AliasAnalysis>(); - LibInfo = &getAnalysis<TargetLibraryInfo>(); + LibInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : nullptr; DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n"); - SplitCriticalSideEffectEdges(const_cast<Function&>(Fn), this); + SplitCriticalSideEffectEdges(const_cast<Function&>(Fn), AA); CurDAG->init(*MF); FuncInfo->set(Fn, *MF, CurDAG); @@ -496,12 +501,14 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { MachineBasicBlock::iterator InsertPos = Def; const MDNode *Variable = MI->getDebugVariable(); const MDNode *Expr = MI->getDebugExpression(); + DebugLoc DL = MI->getDebugLoc(); bool IsIndirect = MI->isIndirectDebugValue(); unsigned Offset = IsIndirect ? MI->getOperand(1).getImm() : 0; + assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) && + "Expected inlined-at fields to agree"); // Def is never a terminator here, so it is ok to increment InsertPos. - BuildMI(*EntryMBB, ++InsertPos, MI->getDebugLoc(), - TII->get(TargetOpcode::DBG_VALUE), IsIndirect, LDI->second, Offset, - Variable, Expr); + BuildMI(*EntryMBB, ++InsertPos, DL, TII->get(TargetOpcode::DBG_VALUE), + IsIndirect, LDI->second, Offset, Variable, Expr); // If this vreg is directly copied into an exported register then // that COPY instructions also need DBG_VALUE, if it is the only @@ -519,9 +526,10 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { CopyUseMI = nullptr; break; } if (CopyUseMI) { + // Use MI's debug location, which describes where Variable was + // declared, rather than whatever is attached to CopyUseMI. MachineInstr *NewMI = - BuildMI(*MF, CopyUseMI->getDebugLoc(), - TII->get(TargetOpcode::DBG_VALUE), IsIndirect, + BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect, CopyUseMI->getOperand(0).getReg(), Offset, Variable, Expr); MachineBasicBlock::iterator Pos = CopyUseMI; EntryMBB->insertAfter(Pos, NewMI); @@ -570,6 +578,13 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { TargetRegisterInfo::isVirtualRegister(To)) MRI.constrainRegClass(To, MRI.getRegClass(From)); // Replace it. + + + // Replacing one register with another won't touch the kill flags. + // We need to conservatively clear the kill flags as a kill on the old + // register might dominate existing uses of the new register. + if (!MRI.use_empty(To)) + MRI.clearKillFlags(From); MRI.replaceRegWith(From, To); } @@ -591,9 +606,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { void SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin, BasicBlock::const_iterator End, bool &HadTailCall) { - // Lower all of the non-terminator instructions. If a call is emitted - // as a tail call, cease emitting nodes for this block. Terminators - // are handled below. + // Lower the instructions. If a call is emitted as a tail call, cease emitting + // nodes for this block. for (BasicBlock::const_iterator I = Begin; I != End && !SDB->HasTailCall; ++I) SDB->visit(*I); @@ -656,7 +670,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { (void)BlockNumber; bool MatchFilterBB = false; (void)MatchFilterBB; #ifndef NDEBUG - MatchFilterBB = (!FilterDAGBasicBlockName.empty() && + MatchFilterBB = (FilterDAGBasicBlockName.empty() || FilterDAGBasicBlockName == FuncInfo->MBB->getBasicBlock()->getName().str()); #endif @@ -667,8 +681,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { #endif { BlockNumber = FuncInfo->MBB->getNumber(); - BlockName = MF->getName().str() + ":" + - FuncInfo->MBB->getBasicBlock()->getName().str(); + BlockName = + (MF->getName() + ":" + FuncInfo->MBB->getBasicBlock()->getName()).str(); } DEBUG(dbgs() << "Initial selection DAG: BB#" << BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump()); @@ -904,9 +918,11 @@ void SelectionDAGISel::DoInstructionSelection() { /// PrepareEHLandingPad - Emit an EH_LABEL, set up live-in registers, and /// do other setup for EH landing-pad blocks. -void SelectionDAGISel::PrepareEHLandingPad() { +bool SelectionDAGISel::PrepareEHLandingPad() { MachineBasicBlock *MBB = FuncInfo->MBB; + const TargetRegisterClass *PtrRC = TLI->getRegClassFor(TLI->getPointerTy()); + // Add a label to mark the beginning of the landing pad. Deletion of the // landing pad can thus be detected via the MachineModuleInfo. MCSymbol *Label = MF->getMMI().addLandingPad(MBB); @@ -918,14 +934,53 @@ void SelectionDAGISel::PrepareEHLandingPad() { BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II) .addSym(Label); + // If this is an MSVC-style personality function, we need to split the landing + // pad into several BBs. + const BasicBlock *LLVMBB = MBB->getBasicBlock(); + const LandingPadInst *LPadInst = LLVMBB->getLandingPadInst(); + MF->getMMI().addPersonality( + MBB, cast<Function>(LPadInst->getPersonalityFn()->stripPointerCasts())); + EHPersonality Personality = MF->getMMI().getPersonalityType(); + + if (isMSVCEHPersonality(Personality)) { + SmallVector<MachineBasicBlock *, 4> ClauseBBs; + const IntrinsicInst *ActionsCall = + dyn_cast<IntrinsicInst>(LLVMBB->getFirstInsertionPt()); + // Get all invoke BBs that unwind to this landingpad. + SmallVector<MachineBasicBlock *, 4> InvokeBBs(MBB->pred_begin(), + MBB->pred_end()); + if (ActionsCall && ActionsCall->getIntrinsicID() == Intrinsic::eh_actions) { + // If this is a call to llvm.eh.actions followed by indirectbr, then we've + // run WinEHPrepare, and we should remove this block from the machine CFG. + // Mark the targets of the indirectbr as landingpads instead. + for (const BasicBlock *LLVMSucc : successors(LLVMBB)) { + MachineBasicBlock *ClauseBB = FuncInfo->MBBMap[LLVMSucc]; + // Add the edge from the invoke to the clause. + for (MachineBasicBlock *InvokeBB : InvokeBBs) + InvokeBB->addSuccessor(ClauseBB); + + // Mark the clause as a landing pad or MI passes will delete it. + ClauseBB->setIsLandingPad(); + } + } + + // Remove the edge from the invoke to the lpad. + for (MachineBasicBlock *InvokeBB : InvokeBBs) + InvokeBB->removeSuccessor(MBB); + + // Don't select instructions for the landingpad. + return false; + } + // Mark exception register as live in. - const TargetRegisterClass *PtrRC = TLI->getRegClassFor(TLI->getPointerTy()); if (unsigned Reg = TLI->getExceptionPointerRegister()) FuncInfo->ExceptionPointerVirtReg = MBB->addLiveIn(Reg, PtrRC); // Mark exception selector register as live in. if (unsigned Reg = TLI->getExceptionSelectorRegister()) FuncInfo->ExceptionSelectorVirtReg = MBB->addLiveIn(Reg, PtrRC); + + return true; } /// isFoldedOrDeadInstruction - Return true if the specified instruction is @@ -1095,8 +1150,9 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Setup an EH landing-pad block. FuncInfo->ExceptionPointerVirtReg = 0; FuncInfo->ExceptionSelectorVirtReg = 0; - if (FuncInfo->MBB->isLandingPad()) - PrepareEHLandingPad(); + if (LLVMBB->isLandingPad()) + if (!PrepareEHLandingPad()) + continue; // Before doing SelectionDAG ISel, see if FastISel has been requested. if (FastIS) { @@ -1111,8 +1167,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { if (!FastIS->lowerArguments()) { // Fast isel failed to lower these arguments ++NumFastIselFailLowerArguments; - if (EnableFastISelAbortArgs) - llvm_unreachable("FastISel didn't lower all arguments"); + if (EnableFastISelAbort > 1) + report_fatal_error("FastISel didn't lower all arguments"); // Use SelectionDAG argument lowering LowerArguments(Fn); @@ -1181,6 +1237,10 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { dbgs() << "FastISel missed call: "; Inst->dump(); } + if (EnableFastISelAbort > 2) + // FastISel selector couldn't handle something and bailed. + // For the purpose of debugging, just abort. + report_fatal_error("FastISel didn't select the entire block"); if (!Inst->getType()->isVoidTy() && !Inst->use_empty()) { unsigned &R = FuncInfo->ValueMap[Inst]; @@ -1208,24 +1268,24 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { continue; } - if (isa<TerminatorInst>(Inst) && !isa<BranchInst>(Inst)) { - // Don't abort, and use a different message for terminator misses. - NumFastIselFailures += NumFastIselRemaining; - if (EnableFastISelVerbose || EnableFastISelAbort) { + bool ShouldAbort = EnableFastISelAbort; + if (EnableFastISelVerbose || EnableFastISelAbort) { + if (isa<TerminatorInst>(Inst)) { + // Use a different message for terminator misses. dbgs() << "FastISel missed terminator: "; - Inst->dump(); - } - } else { - NumFastIselFailures += NumFastIselRemaining; - if (EnableFastISelVerbose || EnableFastISelAbort) { + // Don't abort unless for terminator unless the level is really high + ShouldAbort = (EnableFastISelAbort > 2); + } else { dbgs() << "FastISel miss: "; - Inst->dump(); } - if (EnableFastISelAbort) - // The "fast" selector couldn't handle something and bailed. - // For the purpose of debugging, just abort. - llvm_unreachable("FastISel didn't select the entire block"); + Inst->dump(); } + if (ShouldAbort) + // FastISel selector couldn't handle something and bailed. + // For the purpose of debugging, just abort. + report_fatal_error("FastISel didn't select the entire block"); + + NumFastIselFailures += NumFastIselRemaining; break; } @@ -1354,21 +1414,15 @@ SelectionDAGISel::FinishBasicBlock() { << FuncInfo->PHINodesToUpdate[i].first << ", " << FuncInfo->PHINodesToUpdate[i].second << ")\n"); - const bool MustUpdatePHINodes = SDB->SwitchCases.empty() && - SDB->JTCases.empty() && - SDB->BitTestCases.empty(); - // Next, now that we know what the last MBB the LLVM BB expanded is, update // PHI nodes in successors. - if (MustUpdatePHINodes) { - for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) { - MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[i].first); - assert(PHI->isPHI() && - "This is not a machine PHI node that we are updating!"); - if (!FuncInfo->MBB->isSuccessor(PHI->getParent())) - continue; - PHI.addReg(FuncInfo->PHINodesToUpdate[i].second).addMBB(FuncInfo->MBB); - } + for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) { + MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[i].first); + assert(PHI->isPHI() && + "This is not a machine PHI node that we are updating!"); + if (!FuncInfo->MBB->isSuccessor(PHI->getParent())) + continue; + PHI.addReg(FuncInfo->PHINodesToUpdate[i].second).addMBB(FuncInfo->MBB); } // Handle stack protector. @@ -1413,10 +1467,6 @@ SelectionDAGISel::FinishBasicBlock() { SDB->SPDescriptor.resetPerBBState(); } - // If we updated PHI Nodes, return early. - if (MustUpdatePHINodes) - return; - for (unsigned i = 0, e = SDB->BitTestCases.size(); i != e; ++i) { // Lower header first, if it wasn't already lowered if (!SDB->BitTestCases[i].Emitted) { @@ -1530,16 +1580,6 @@ SelectionDAGISel::FinishBasicBlock() { } SDB->JTCases.clear(); - // If the switch block involved a branch to one of the actual successors, we - // need to update PHI nodes in that block. - for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) { - MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[i].first); - assert(PHI->isPHI() && - "This is not a machine PHI node that we are updating!"); - if (FuncInfo->MBB->isSuccessor(PHI->getParent())) - PHI.addReg(FuncInfo->PHINodesToUpdate[i].second).addMBB(FuncInfo->MBB); - } - // If we generated any switch lowering information, build and codegen any // additional DAGs necessary. for (unsigned i = 0, e = SDB->SwitchCases.size(); i != e; ++i) { @@ -1677,11 +1717,10 @@ bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS, return false; } - /// SelectInlineAsmMemoryOperands - Calls to this are automatically generated /// by tblgen. Others should not call it. void SelectionDAGISel:: -SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops) { +SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops, SDLoc DL) { std::vector<SDValue> InOps; std::swap(InOps, Ops); @@ -1704,16 +1743,30 @@ SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops) { } else { assert(InlineAsm::getNumOperandRegisters(Flags) == 1 && "Memory operand with multiple values?"); + + unsigned TiedToOperand; + if (InlineAsm::isUseOperandTiedToDef(Flags, TiedToOperand)) { + // We need the constraint ID from the operand this is tied to. + unsigned CurOp = InlineAsm::Op_FirstOperand; + Flags = cast<ConstantSDNode>(InOps[CurOp])->getZExtValue(); + for (; TiedToOperand; --TiedToOperand) { + CurOp += InlineAsm::getNumOperandRegisters(Flags)+1; + Flags = cast<ConstantSDNode>(InOps[CurOp])->getZExtValue(); + } + } + // Otherwise, this is a memory operand. Ask the target to select it. std::vector<SDValue> SelOps; - if (SelectInlineAsmMemoryOperand(InOps[i+1], 'm', SelOps)) + if (SelectInlineAsmMemoryOperand(InOps[i+1], + InlineAsm::getMemoryConstraintID(Flags), + SelOps)) report_fatal_error("Could not match memory address. Inline asm" " failure!"); // Add this to the output node. unsigned NewFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, SelOps.size()); - Ops.push_back(CurDAG->getTargetConstant(NewFlags, MVT::i32)); + Ops.push_back(CurDAG->getTargetConstant(NewFlags, DL, MVT::i32)); Ops.insert(Ops.end(), SelOps.begin(), SelOps.end()); i += 2; } @@ -1859,11 +1912,13 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root, } SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) { + SDLoc DL(N); + std::vector<SDValue> Ops(N->op_begin(), N->op_end()); - SelectInlineAsmMemoryOperands(Ops); + SelectInlineAsmMemoryOperands(Ops, DL); - EVT VTs[] = { MVT::Other, MVT::Glue }; - SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N), VTs, Ops); + const EVT VTs[] = {MVT::Other, MVT::Glue}; + SDValue New = CurDAG->getNode(ISD::INLINEASM, DL, VTs, Ops); New->setNodeId(-1); return New.getNode(); } @@ -1871,12 +1926,12 @@ SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) { SDNode *SelectionDAGISel::Select_READ_REGISTER(SDNode *Op) { SDLoc dl(Op); - MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(Op->getOperand(0)); + MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(Op->getOperand(1)); const MDString *RegStr = dyn_cast<MDString>(MD->getMD()->getOperand(0)); unsigned Reg = TLI->getRegisterByName(RegStr->getString().data(), Op->getValueType(0)); SDValue New = CurDAG->getCopyFromReg( - CurDAG->getEntryNode(), dl, Reg, Op->getValueType(0)); + Op->getOperand(0), dl, Reg, Op->getValueType(0)); New->setNodeId(-1); return New.getNode(); } @@ -1889,7 +1944,7 @@ SDNode unsigned Reg = TLI->getRegisterByName(RegStr->getString().data(), Op->getOperand(2).getValueType()); SDValue New = CurDAG->getCopyToReg( - CurDAG->getEntryNode(), dl, Reg, Op->getOperand(2)); + Op->getOperand(0), dl, Reg, Op->getOperand(2)); New->setNodeId(-1); return New.getNode(); } @@ -2464,7 +2519,7 @@ public: SelectionDAG::DAGUpdateListener(DAG), RecordedNodes(RN), MatchScopes(MS) { } - void NodeDeleted(SDNode *N, SDNode *E) { + void NodeDeleted(SDNode *N, SDNode *E) override { // Some early-returns here to avoid the search if we deleted the node or // if the update comes from MorphNodeTo (MorphNodeTo is the last thing we // do, so it's unnecessary to update matching state at that point). @@ -2885,7 +2940,8 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, if (Val & 128) Val = GetVBR(Val, MatcherTable, MatcherIndex); RecordedNodes.push_back(std::pair<SDValue, SDNode*>( - CurDAG->getTargetConstant(Val, VT), nullptr)); + CurDAG->getTargetConstant(Val, SDLoc(NodeToMatch), + VT), nullptr)); continue; } case OPC_EmitRegister: { @@ -2917,10 +2973,12 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, if (Imm->getOpcode() == ISD::Constant) { const ConstantInt *Val=cast<ConstantSDNode>(Imm)->getConstantIntValue(); - Imm = CurDAG->getConstant(*Val, Imm.getValueType(), true); + Imm = CurDAG->getConstant(*Val, SDLoc(NodeToMatch), Imm.getValueType(), + true); } else if (Imm->getOpcode() == ISD::ConstantFP) { const ConstantFP *Val=cast<ConstantFPSDNode>(Imm)->getConstantFPValue(); - Imm = CurDAG->getConstantFP(*Val, Imm.getValueType(), true); + Imm = CurDAG->getConstantFP(*Val, SDLoc(NodeToMatch), + Imm.getValueType(), true); } RecordedNodes.push_back(std::make_pair(Imm, RecordedNodes[RecNo].second)); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index 33c20d3..2d4ab6c 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" +#include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/GCStrategy.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/StackMaps.h" @@ -37,13 +38,19 @@ STATISTIC(NumOfStatepoints, "Number of statepoint nodes encountered"); STATISTIC(StatepointMaxSlotsRequired, "Maximum number of stack slots required for a singe statepoint"); -void -StatepointLoweringState::startNewStatepoint(SelectionDAGBuilder &Builder) { +static void pushStackMapConstant(SmallVectorImpl<SDValue>& Ops, + SelectionDAGBuilder &Builder, uint64_t Value) { + SDLoc L = Builder.getCurSDLoc(); + Ops.push_back(Builder.DAG.getTargetConstant(StackMaps::ConstantOp, L, + MVT::i64)); + Ops.push_back(Builder.DAG.getTargetConstant(Value, L, MVT::i64)); +} + +void StatepointLoweringState::startNewStatepoint(SelectionDAGBuilder &Builder) { // Consistency check assert(PendingGCRelocateCalls.empty() && "Trying to visit statepoint before finished processing previous one"); Locations.clear(); - RelocLocations.clear(); NextSlotToAllocate = 0; // Need to resize this on each safepoint - we need the two to stay in // sync and the clear patterns of a SelectionDAGBuilder have no relation @@ -53,9 +60,9 @@ StatepointLoweringState::startNewStatepoint(SelectionDAGBuilder &Builder) { AllocatedStackSlots[i] = false; } } + void StatepointLoweringState::clear() { Locations.clear(); - RelocLocations.clear(); AllocatedStackSlots.clear(); assert(PendingGCRelocateCalls.empty() && "cleared before statepoint sequence completed"); @@ -222,75 +229,94 @@ static void removeDuplicatesGCPtrs(SmallVectorImpl<const Value *> &Bases, /// Extract call from statepoint, lower it and return pointer to the /// call node. Also update NodeMap so that getValue(statepoint) will /// reference lowered call result -static SDNode *lowerCallFromStatepoint(const CallInst &CI, - SelectionDAGBuilder &Builder) { - - assert(Intrinsic::experimental_gc_statepoint == - dyn_cast<IntrinsicInst>(&CI)->getIntrinsicID() && - "function called must be the statepoint function"); +static SDNode * +lowerCallFromStatepoint(ImmutableStatepoint ISP, MachineBasicBlock *LandingPad, + SelectionDAGBuilder &Builder, + SmallVectorImpl<SDValue> &PendingExports) { + + ImmutableCallSite CS(ISP.getCallSite()); + + SDValue ActualCallee = Builder.getValue(ISP.getActualCallee()); + + // Handle immediate and symbolic callees. + if (auto *ConstCallee = dyn_cast<ConstantSDNode>(ActualCallee.getNode())) + ActualCallee = Builder.DAG.getIntPtrConstant(ConstCallee->getZExtValue(), + Builder.getCurSDLoc(), + /*isTarget=*/true); + else if (auto *SymbolicCallee = + dyn_cast<GlobalAddressSDNode>(ActualCallee.getNode())) + ActualCallee = Builder.DAG.getTargetGlobalAddress( + SymbolicCallee->getGlobal(), SDLoc(SymbolicCallee), + SymbolicCallee->getValueType(0)); + + assert(CS.getCallingConv() != CallingConv::AnyReg && + "anyregcc is not supported on statepoints!"); + + Type *DefTy = ISP.getActualReturnType(); + bool HasDef = !DefTy->isVoidTy(); + + SDValue ReturnValue, CallEndVal; + std::tie(ReturnValue, CallEndVal) = Builder.lowerCallOperands( + ISP.getCallSite(), ImmutableStatepoint::CallArgsBeginPos, + ISP.getNumCallArgs(), ActualCallee, DefTy, LandingPad, + false /* IsPatchPoint */); + + SDNode *CallEnd = CallEndVal.getNode(); + + // Get a call instruction from the call sequence chain. Tail calls are not + // allowed. The following code is essentially reverse engineering X86's + // LowerCallTo. + // + // We are expecting DAG to have the following form: + // + // ch = eh_label (only in case of invoke statepoint) + // ch, glue = callseq_start ch + // ch, glue = X86::Call ch, glue + // ch, glue = callseq_end ch, glue + // get_return_value ch, glue + // + // get_return_value can either be a CopyFromReg to grab the return value from + // %RAX, or it can be a LOAD to load a value returned by reference via a stack + // slot. + + if (HasDef && (CallEnd->getOpcode() == ISD::CopyFromReg || + CallEnd->getOpcode() == ISD::LOAD)) + CallEnd = CallEnd->getOperand(0).getNode(); + + assert(CallEnd->getOpcode() == ISD::CALLSEQ_END && "expected!"); - ImmutableStatepoint StatepointOperands(&CI); - - // Lower the actual call itself - This is a bit of a hack, but we want to - // avoid modifying the actual lowering code. This is similiar in intent to - // the LowerCallOperands mechanism used by PATCHPOINT, but is structured - // differently. Hopefully, this is slightly more robust w.r.t. calling - // convention, return values, and other function attributes. - Value *ActualCallee = const_cast<Value *>(StatepointOperands.actualCallee()); - - std::vector<Value *> Args; - CallInst::const_op_iterator arg_begin = StatepointOperands.call_args_begin(); - CallInst::const_op_iterator arg_end = StatepointOperands.call_args_end(); - Args.insert(Args.end(), arg_begin, arg_end); - // TODO: remove the creation of a new instruction! We should not be - // modifying the IR (even temporarily) at this point. - CallInst *Tmp = CallInst::Create(ActualCallee, Args); - Tmp->setTailCall(CI.isTailCall()); - Tmp->setCallingConv(CI.getCallingConv()); - Tmp->setAttributes(CI.getAttributes()); - Builder.LowerCallTo(Tmp, Builder.getValue(ActualCallee), false); - - // Handle the return value of the call iff any. - const bool HasDef = !Tmp->getType()->isVoidTy(); if (HasDef) { - // The value of the statepoint itself will be the value of call itself. - // We'll replace the actually call node shortly. gc_result will grab - // this value. - Builder.setValue(&CI, Builder.getValue(Tmp)); + if (CS.isInvoke()) { + // Result value will be used in different basic block for invokes + // so we need to export it now. But statepoint call has a different type + // than the actuall call. It means that standart exporting mechanism will + // create register of the wrong type. So instead we need to create + // register with correct type and save value into it manually. + // TODO: To eliminate this problem we can remove gc.result intrinsics + // completelly and make statepoint call to return a tuple. + unsigned Reg = Builder.FuncInfo.CreateRegs(ISP.getActualReturnType()); + RegsForValue RFV(*Builder.DAG.getContext(), + Builder.DAG.getTargetLoweringInfo(), Reg, + ISP.getActualReturnType()); + SDValue Chain = Builder.DAG.getEntryNode(); + + RFV.getCopyToRegs(ReturnValue, Builder.DAG, Builder.getCurSDLoc(), Chain, + nullptr); + PendingExports.push_back(Chain); + Builder.FuncInfo.ValueMap[CS.getInstruction()] = Reg; + } else { + // The value of the statepoint itself will be the value of call itself. + // We'll replace the actually call node shortly. gc_result will grab + // this value. + Builder.setValue(CS.getInstruction(), ReturnValue); + } } else { // The token value is never used from here on, just generate a poison value - Builder.setValue(&CI, Builder.DAG.getIntPtrConstant(-1)); - } - // Remove the fake entry we created so we don't have a hanging reference - // after we delete this node. - Builder.removeValue(Tmp); - delete Tmp; - Tmp = nullptr; - - // Search for the call node - // The following code is essentially reverse engineering X86's - // LowerCallTo. - SDNode *CallNode = nullptr; - - // We just emitted a call, so it should be last thing generated - SDValue Chain = Builder.DAG.getRoot(); - - // Find closest CALLSEQ_END walking back through lowered nodes if needed - SDNode *CallEnd = Chain.getNode(); - int Sanity = 0; - while (CallEnd->getOpcode() != ISD::CALLSEQ_END) { - CallEnd = CallEnd->getGluedNode(); - assert(CallEnd && "Can not find call node"); - assert(Sanity < 20 && "should have found call end already"); - Sanity++; + Builder.setValue(CS.getInstruction(), + Builder.DAG.getIntPtrConstant(-1, Builder.getCurSDLoc())); } - assert(CallEnd->getOpcode() == ISD::CALLSEQ_END && - "Expected a callseq node."); - assert(CallEnd->getGluedNode()); - // Step back inside the CALLSEQ - CallNode = CallEnd->getGluedNode(); - return CallNode; + return CallEnd->getOperand(0).getNode(); } /// Callect all gc pointers coming into statepoint intrinsic, clean them up, @@ -300,24 +326,15 @@ static SDNode *lowerCallFromStatepoint(const CallInst &CI, /// Relocs - the gc_relocate corresponding to each base/ptr pair /// Elements of this arrays should be in one-to-one correspondence with each /// other i.e Bases[i], Ptrs[i] are from the same gcrelocate call -static void -getIncomingStatepointGCValues(SmallVectorImpl<const Value *> &Bases, - SmallVectorImpl<const Value *> &Ptrs, - SmallVectorImpl<const Value *> &Relocs, - ImmutableCallSite Statepoint, - SelectionDAGBuilder &Builder) { - // Search for relocated pointers. Note that working backwards from the - // gc_relocates ensures that we only get pairs which are actually relocated - // and used after the statepoint. - // TODO: This logic should probably become a utility function in Statepoint.h - for (const User *U : cast<CallInst>(Statepoint.getInstruction())->users()) { - if (!isGCRelocate(U)) { - continue; - } - GCRelocateOperands relocateOpers(U); - Relocs.push_back(cast<Value>(U)); - Bases.push_back(relocateOpers.basePtr()); - Ptrs.push_back(relocateOpers.derivedPtr()); +static void getIncomingStatepointGCValues( + SmallVectorImpl<const Value *> &Bases, SmallVectorImpl<const Value *> &Ptrs, + SmallVectorImpl<const Value *> &Relocs, ImmutableStatepoint StatepointSite, + SelectionDAGBuilder &Builder) { + for (GCRelocateOperands relocateOpers : + StatepointSite.getRelocates(StatepointSite)) { + Relocs.push_back(relocateOpers.getUnderlyingCallSite().getInstruction()); + Bases.push_back(relocateOpers.getBasePtr()); + Ptrs.push_back(relocateOpers.getDerivedPtr()); } // Remove any redundant llvm::Values which map to the same SDValue as another @@ -376,14 +393,13 @@ static void lowerIncomingStatepointValue(SDValue Incoming, // such in the stackmap. This is required so that the consumer can // parse any internal format to the deopt state. It also handles null // pointers and other constant pointers in GC states - Ops.push_back( - Builder.DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64)); - Ops.push_back(Builder.DAG.getTargetConstant(C->getSExtValue(), MVT::i64)); + pushStackMapConstant(Ops, Builder, C->getSExtValue()); } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Incoming)) { - // This handles allocas as arguments to the statepoint - const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo(); - Ops.push_back( - Builder.DAG.getTargetFrameIndex(FI->getIndex(), TLI.getPointerTy())); + // This handles allocas as arguments to the statepoint (this is only + // really meaningful for a deopt value. For GC, we'd be trying to + // relocate the address of the alloca itself?) + Ops.push_back(Builder.DAG.getTargetFrameIndex(FI->getIndex(), + Incoming.getValueType())); } else { // Otherwise, locate a spill slot and explicitly spill it so it // can be found by the runtime later. We currently do not support @@ -408,15 +424,15 @@ static void lowerIncomingStatepointValue(SDValue Incoming, /// statepoint. The chain nodes will have already been created and the DAG root /// will be set to the last value spilled (if any were). static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, - ImmutableStatepoint Statepoint, + ImmutableStatepoint StatepointSite, SelectionDAGBuilder &Builder) { // Lower the deopt and gc arguments for this statepoint. Layout will // be: deopt argument length, deopt arguments.., gc arguments... SmallVector<const Value *, 64> Bases, Ptrs, Relocations; - getIncomingStatepointGCValues(Bases, Ptrs, Relocations, - Statepoint.getCallSite(), Builder); + getIncomingStatepointGCValues(Bases, Ptrs, Relocations, StatepointSite, + Builder); #ifndef NDEBUG // Check that each of the gc pointer and bases we've gotten out of the @@ -424,61 +440,54 @@ static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, // heap. This is basically just here to help catch errors during statepoint // insertion. TODO: This should actually be in the Verifier, but we can't get // to the GCStrategy from there (yet). - if (Builder.GFI) { - GCStrategy &S = Builder.GFI->getStrategy(); - for (const Value *V : Bases) { - auto Opt = S.isGCManagedPointer(V); - if (Opt.hasValue()) { - assert(Opt.getValue() && - "non gc managed base pointer found in statepoint"); - } + GCStrategy &S = Builder.GFI->getStrategy(); + for (const Value *V : Bases) { + auto Opt = S.isGCManagedPointer(V); + if (Opt.hasValue()) { + assert(Opt.getValue() && + "non gc managed base pointer found in statepoint"); } - for (const Value *V : Ptrs) { - auto Opt = S.isGCManagedPointer(V); - if (Opt.hasValue()) { - assert(Opt.getValue() && - "non gc managed derived pointer found in statepoint"); - } + } + for (const Value *V : Ptrs) { + auto Opt = S.isGCManagedPointer(V); + if (Opt.hasValue()) { + assert(Opt.getValue() && + "non gc managed derived pointer found in statepoint"); } - for (const Value *V : Relocations) { - auto Opt = S.isGCManagedPointer(V); - if (Opt.hasValue()) { - assert(Opt.getValue() && "non gc managed pointer relocated"); - } + } + for (const Value *V : Relocations) { + auto Opt = S.isGCManagedPointer(V); + if (Opt.hasValue()) { + assert(Opt.getValue() && "non gc managed pointer relocated"); } } #endif - - // Before we actually start lowering (and allocating spill slots for values), // reserve any stack slots which we judge to be profitable to reuse for a // particular value. This is purely an optimization over the code below and // doesn't change semantics at all. It is important for performance that we // reserve slots for both deopt and gc values before lowering either. - for (auto I = Statepoint.vm_state_begin() + 1, E = Statepoint.vm_state_end(); - I != E; ++I) { - Value *V = *I; + for (const Value *V : StatepointSite.vm_state_args()) { SDValue Incoming = Builder.getValue(V); reservePreviousStackSlotForValue(Incoming, Builder); } - for (unsigned i = 0; i < Bases.size() * 2; ++i) { - // Even elements will contain base, odd elements - derived ptr - const Value *V = i % 2 ? Bases[i / 2] : Ptrs[i / 2]; - SDValue Incoming = Builder.getValue(V); - reservePreviousStackSlotForValue(Incoming, Builder); + for (unsigned i = 0; i < Bases.size(); ++i) { + const Value *Base = Bases[i]; + reservePreviousStackSlotForValue(Builder.getValue(Base), Builder); + + const Value *Ptr = Ptrs[i]; + reservePreviousStackSlotForValue(Builder.getValue(Ptr), Builder); } // First, prefix the list with the number of unique values to be // lowered. Note that this is the number of *Values* not the // number of SDValues required to lower them. - const int NumVMSArgs = Statepoint.numTotalVMSArgs(); - Ops.push_back( - Builder.DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64)); - Ops.push_back(Builder.DAG.getTargetConstant(NumVMSArgs, MVT::i64)); + const int NumVMSArgs = StatepointSite.getNumTotalVMSArgs(); + pushStackMapConstant(Ops, Builder, NumVMSArgs); - assert(NumVMSArgs + 1 == std::distance(Statepoint.vm_state_begin(), - Statepoint.vm_state_end())); + assert(NumVMSArgs == std::distance(StatepointSite.vm_state_begin(), + StatepointSite.vm_state_end())); // The vm state arguments are lowered in an opaque manner. We do // not know what type of values are contained within. We skip the @@ -486,9 +495,7 @@ static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, // explicitly just above. We could have left it in the loop and // not done it explicitly, but it's far easier to understand this // way. - for (auto I = Statepoint.vm_state_begin() + 1, E = Statepoint.vm_state_end(); - I != E; ++I) { - const Value *V = *I; + for (const Value *V : StatepointSite.vm_state_args()) { SDValue Incoming = Builder.getValue(V); lowerIncomingStatepointValue(Incoming, Ops, Builder); } @@ -498,35 +505,96 @@ static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, // arrays interwoven with each (lowered) base pointer immediately followed by // it's (lowered) derived pointer. i.e // (base[0], ptr[0], base[1], ptr[1], ...) - for (unsigned i = 0; i < Bases.size() * 2; ++i) { - // Even elements will contain base, odd elements - derived ptr - const Value *V = i % 2 ? Bases[i / 2] : Ptrs[i / 2]; + for (unsigned i = 0; i < Bases.size(); ++i) { + const Value *Base = Bases[i]; + lowerIncomingStatepointValue(Builder.getValue(Base), Ops, Builder); + + const Value *Ptr = Ptrs[i]; + lowerIncomingStatepointValue(Builder.getValue(Ptr), Ops, Builder); + } + + // If there are any explicit spill slots passed to the statepoint, record + // them, but otherwise do not do anything special. These are user provided + // allocas and give control over placement to the consumer. In this case, + // it is the contents of the slot which may get updated, not the pointer to + // the alloca + for (Value *V : StatepointSite.gc_args()) { SDValue Incoming = Builder.getValue(V); - lowerIncomingStatepointValue(Incoming, Ops, Builder); + if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Incoming)) { + // This handles allocas as arguments to the statepoint + Ops.push_back(Builder.DAG.getTargetFrameIndex(FI->getIndex(), + Incoming.getValueType())); + } + } + + // Record computed locations for all lowered values. + // This can not be embedded in lowering loops as we need to record *all* + // values, while previous loops account only values with unique SDValues. + const Instruction *StatepointInstr = + StatepointSite.getCallSite().getInstruction(); + FunctionLoweringInfo::StatepointSpilledValueMapTy &SpillMap = + Builder.FuncInfo.StatepointRelocatedValues[StatepointInstr]; + + for (GCRelocateOperands RelocateOpers : + StatepointSite.getRelocates(StatepointSite)) { + const Value *V = RelocateOpers.getDerivedPtr(); + SDValue SDV = Builder.getValue(V); + SDValue Loc = Builder.StatepointLowering.getLocation(SDV); + + if (Loc.getNode()) { + SpillMap[V] = cast<FrameIndexSDNode>(Loc)->getIndex(); + } else { + // Record value as visited, but not spilled. This is case for allocas + // and constants. For this values we can avoid emiting spill load while + // visiting corresponding gc_relocate. + // Actually we do not need to record them in this map at all. + // We do this only to check that we are not relocating any unvisited value. + SpillMap[V] = None; + + // Default llvm mechanisms for exporting values which are used in + // different basic blocks does not work for gc relocates. + // Note that it would be incorrect to teach llvm that all relocates are + // uses of the corresponging values so that it would automatically + // export them. Relocates of the spilled values does not use original + // value. + if (StatepointSite.getCallSite().isInvoke()) + Builder.ExportFromCurrentBlock(V); + } } } + void SelectionDAGBuilder::visitStatepoint(const CallInst &CI) { + // Check some preconditions for sanity + assert(isStatepoint(&CI) && + "function called must be the statepoint function"); + + LowerStatepoint(ImmutableStatepoint(&CI)); +} + +void SelectionDAGBuilder::LowerStatepoint( + ImmutableStatepoint ISP, MachineBasicBlock *LandingPad /*=nullptr*/) { // The basic scheme here is that information about both the original call and // the safepoint is encoded in the CallInst. We create a temporary call and // lower it, then reverse engineer the calling sequence. - // Check some preconditions for sanity - assert(isStatepoint(&CI) && - "function called must be the statepoint function"); NumOfStatepoints++; // Clear state StatepointLowering.startNewStatepoint(*this); + ImmutableCallSite CS(ISP.getCallSite()); + #ifndef NDEBUG - // Consistency check - for (const User *U : CI.users()) { - const CallInst *Call = cast<CallInst>(U); - if (isGCRelocate(Call)) - StatepointLowering.scheduleRelocCall(*Call); + // Consistency check. Don't do this for invokes. It would be too + // expensive to preserve this information across different basic blocks + if (!CS.isInvoke()) { + for (const User *U : CS->users()) { + const CallInst *Call = cast<CallInst>(U); + if (isGCRelocate(Call)) + StatepointLowering.scheduleRelocCall(*Call); + } } #endif - ImmutableStatepoint ISP(&CI); #ifndef NDEBUG // If this is a malformed statepoint, report it early to simplify debugging. // This should catch any IR level mistake that's made when constructing or @@ -534,42 +602,82 @@ void SelectionDAGBuilder::visitStatepoint(const CallInst &CI) { ISP.verify(); // Check that the associated GCStrategy expects to encounter statepoints. - // TODO: This if should become an assert. For now, we allow the GCStrategy - // to be optional for backwards compatibility. This will only last a short - // period (i.e. a couple of weeks). - if (GFI) { - assert(GFI->getStrategy().useStatepoints() && - "GCStrategy does not expect to encounter statepoints"); - } + assert(GFI->getStrategy().useStatepoints() && + "GCStrategy does not expect to encounter statepoints"); #endif - // Lower statepoint vmstate and gcstate arguments - SmallVector<SDValue, 10> LoweredArgs; - lowerStatepointMetaArgs(LoweredArgs, ISP, *this); + SmallVector<SDValue, 10> LoweredMetaArgs; + lowerStatepointMetaArgs(LoweredMetaArgs, ISP, *this); // Get call node, we will replace it later with statepoint - SDNode *CallNode = lowerCallFromStatepoint(CI, *this); + SDNode *CallNode = + lowerCallFromStatepoint(ISP, LandingPad, *this, PendingExports); - // Construct the actual STATEPOINT node with all the appropriate arguments - // and return values. + // Construct the actual GC_TRANSITION_START, STATEPOINT, and GC_TRANSITION_END + // nodes with all the appropriate arguments and return values. + + // Call Node: Chain, Target, {Args}, RegMask, [Glue] + SDValue Chain = CallNode->getOperand(0); + + SDValue Glue; + bool CallHasIncomingGlue = CallNode->getGluedNode(); + if (CallHasIncomingGlue) { + // Glue is always last operand + Glue = CallNode->getOperand(CallNode->getNumOperands() - 1); + } + + // Build the GC_TRANSITION_START node if necessary. + // + // The operands to the GC_TRANSITION_{START,END} nodes are laid out in the + // order in which they appear in the call to the statepoint intrinsic. If + // any of the operands is a pointer-typed, that operand is immediately + // followed by a SRCVALUE for the pointer that may be used during lowering + // (e.g. to form MachinePointerInfo values for loads/stores). + const bool IsGCTransition = + (ISP.getFlags() & (uint64_t)StatepointFlags::GCTransition) == + (uint64_t)StatepointFlags::GCTransition; + if (IsGCTransition) { + SmallVector<SDValue, 8> TSOps; + + // Add chain + TSOps.push_back(Chain); + + // Add GC transition arguments + for (const Value *V : ISP.gc_transition_args()) { + TSOps.push_back(getValue(V)); + if (V->getType()->isPointerTy()) + TSOps.push_back(DAG.getSrcValue(V)); + } + + // Add glue if necessary + if (CallHasIncomingGlue) + TSOps.push_back(Glue); + + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + + SDValue GCTransitionStart = + DAG.getNode(ISD::GC_TRANSITION_START, getCurSDLoc(), NodeTys, TSOps); + + Chain = GCTransitionStart.getValue(0); + Glue = GCTransitionStart.getValue(1); + } // TODO: Currently, all of these operands are being marked as read/write in // PrologEpilougeInserter.cpp, we should special case the VMState arguments // and flags to be read-only. SmallVector<SDValue, 40> Ops; + // Add the <id> and <numBytes> constants. + Ops.push_back(DAG.getTargetConstant(ISP.getID(), getCurSDLoc(), MVT::i64)); + Ops.push_back( + DAG.getTargetConstant(ISP.getNumPatchBytes(), getCurSDLoc(), MVT::i32)); + // Calculate and push starting position of vmstate arguments - // Call Node: Chain, Target, {Args}, RegMask, [Glue] - SDValue Glue; - if (CallNode->getGluedNode()) { - // Glue is always last operand - Glue = CallNode->getOperand(CallNode->getNumOperands() - 1); - } // Get number of arguments incoming directly into call node unsigned NumCallRegArgs = - CallNode->getNumOperands() - (Glue.getNode() ? 4 : 3); - Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, MVT::i32)); + CallNode->getNumOperands() - (CallHasIncomingGlue ? 4 : 3); + Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, getCurSDLoc(), MVT::i32)); // Add call target SDValue CallTarget = SDValue(CallNode->getOperand(1).getNode(), 0); @@ -578,47 +686,74 @@ void SelectionDAGBuilder::visitStatepoint(const CallInst &CI) { // Add call arguments // Get position of register mask in the call SDNode::op_iterator RegMaskIt; - if (Glue.getNode()) + if (CallHasIncomingGlue) RegMaskIt = CallNode->op_end() - 2; else RegMaskIt = CallNode->op_end() - 1; Ops.insert(Ops.end(), CallNode->op_begin() + 2, RegMaskIt); - // Add a leading constant argument with the Flags and the calling convention - // masked together - CallingConv::ID CallConv = CI.getCallingConv(); - int Flags = dyn_cast<ConstantInt>(CI.getArgOperand(2))->getZExtValue(); - assert(Flags == 0 && "not expected to be used"); - Ops.push_back(DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64)); - Ops.push_back( - DAG.getTargetConstant(Flags | ((unsigned)CallConv << 1), MVT::i64)); + // Add a constant argument for the calling convention + pushStackMapConstant(Ops, *this, CS.getCallingConv()); + + // Add a constant argument for the flags + uint64_t Flags = ISP.getFlags(); + assert( + ((Flags & ~(uint64_t)StatepointFlags::MaskAll) == 0) + && "unknown flag used"); + pushStackMapConstant(Ops, *this, Flags); // Insert all vmstate and gcstate arguments - Ops.insert(Ops.end(), LoweredArgs.begin(), LoweredArgs.end()); + Ops.insert(Ops.end(), LoweredMetaArgs.begin(), LoweredMetaArgs.end()); // Add register mask from call node Ops.push_back(*RegMaskIt); // Add chain - Ops.push_back(CallNode->getOperand(0)); + Ops.push_back(Chain); // Same for the glue, but we add it only if original call had it if (Glue.getNode()) Ops.push_back(Glue); - // Compute return values - SmallVector<EVT, 21> ValueVTs; - ValueVTs.push_back(MVT::Other); - ValueVTs.push_back(MVT::Glue); // provide a glue output since we consume one - // as input. This allows someone else to chain - // off us as needed. - SDVTList NodeTys = DAG.getVTList(ValueVTs); + // Compute return values. Provide a glue output since we consume one as + // input. This allows someone else to chain off us as needed. + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + + SDNode *StatepointMCNode = + DAG.getMachineNode(TargetOpcode::STATEPOINT, getCurSDLoc(), NodeTys, Ops); + + SDNode *SinkNode = StatepointMCNode; - SDNode *StatepointMCNode = DAG.getMachineNode(TargetOpcode::STATEPOINT, - getCurSDLoc(), NodeTys, Ops); + // Build the GC_TRANSITION_END node if necessary. + // + // See the comment above regarding GC_TRANSITION_START for the layout of + // the operands to the GC_TRANSITION_END node. + if (IsGCTransition) { + SmallVector<SDValue, 8> TEOps; + + // Add chain + TEOps.push_back(SDValue(StatepointMCNode, 0)); + + // Add GC transition arguments + for (const Value *V : ISP.gc_transition_args()) { + TEOps.push_back(getValue(V)); + if (V->getType()->isPointerTy()) + TEOps.push_back(DAG.getSrcValue(V)); + } + + // Add glue + TEOps.push_back(SDValue(StatepointMCNode, 1)); + + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + + SDValue GCTransitionStart = + DAG.getNode(ISD::GC_TRANSITION_END, getCurSDLoc(), NodeTys, TEOps); + + SinkNode = GCTransitionStart.getNode(); + } // Replace original call - DAG.ReplaceAllUsesWith(CallNode, StatepointMCNode); // This may update Root + DAG.ReplaceAllUsesWith(CallNode, SinkNode); // This may update Root // Remove originall call node DAG.DeleteNode(CallNode); @@ -636,49 +771,72 @@ void SelectionDAGBuilder::visitGCResult(const CallInst &CI) { // The result value of the gc_result is simply the result of the actual // call. We've already emitted this, so just grab the value. Instruction *I = cast<Instruction>(CI.getArgOperand(0)); - assert(isStatepoint(I) && - "first argument must be a statepoint token"); - - setValue(&CI, getValue(I)); + assert(isStatepoint(I) && "first argument must be a statepoint token"); + + if (isa<InvokeInst>(I)) { + // For invokes we should have stored call result in a virtual register. + // We can not use default getValue() functionality to copy value from this + // register because statepoint and actuall call return types can be + // different, and getValue() will use CopyFromReg of the wrong type, + // which is always i32 in our case. + PointerType *CalleeType = + cast<PointerType>(ImmutableStatepoint(I).getActualCallee()->getType()); + Type *RetTy = + cast<FunctionType>(CalleeType->getElementType())->getReturnType(); + SDValue CopyFromReg = getCopyFromRegs(I, RetTy); + + assert(CopyFromReg.getNode()); + setValue(&CI, CopyFromReg); + } else { + setValue(&CI, getValue(I)); + } } void SelectionDAGBuilder::visitGCRelocate(const CallInst &CI) { + GCRelocateOperands RelocateOpers(&CI); + #ifndef NDEBUG // Consistency check - StatepointLowering.relocCallVisited(CI); + // We skip this check for invoke statepoints. It would be too expensive to + // preserve validation info through different basic blocks. + if (!RelocateOpers.isTiedToInvoke()) { + StatepointLowering.relocCallVisited(CI); + } #endif - GCRelocateOperands relocateOpers(&CI); - SDValue SD = getValue(relocateOpers.derivedPtr()); + const Value *DerivedPtr = RelocateOpers.getDerivedPtr(); + SDValue SD = getValue(DerivedPtr); + + FunctionLoweringInfo::StatepointSpilledValueMapTy &SpillMap = + FuncInfo.StatepointRelocatedValues[RelocateOpers.getStatepoint()]; - if (isa<ConstantSDNode>(SD) || isa<FrameIndexSDNode>(SD)) { - // We didn't need to spill these special cases (constants and allocas). - // See the handling in spillIncomingValueForStatepoint for detail. + // We should have recorded location for this pointer + assert(SpillMap.count(DerivedPtr) && "Relocating not lowered gc value"); + Optional<int> DerivedPtrLocation = SpillMap[DerivedPtr]; + + // We didn't need to spill these special cases (constants and allocas). + // See the handling in spillIncomingValueForStatepoint for detail. + if (!DerivedPtrLocation) { setValue(&CI, SD); return; } - SDValue Loc = StatepointLowering.getRelocLocation(SD); - // Emit new load if we did not emit it before - if (!Loc.getNode()) { - SDValue SpillSlot = StatepointLowering.getLocation(SD); - int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); + SDValue SpillSlot = DAG.getTargetFrameIndex(*DerivedPtrLocation, + SD.getValueType()); - // Be conservative: flush all pending loads - // TODO: Probably we can be less restrictive on this, - // it may allow more scheduling opprtunities - SDValue Chain = getRoot(); + // Be conservative: flush all pending loads + // TODO: Probably we can be less restrictive on this, + // it may allow more scheduling opprtunities + SDValue Chain = getRoot(); - Loc = DAG.getLoad(SpillSlot.getValueType(), getCurSDLoc(), Chain, - SpillSlot, MachinePointerInfo::getFixedStack(FI), false, - false, false, 0); + SDValue SpillLoad = + DAG.getLoad(SpillSlot.getValueType(), getCurSDLoc(), Chain, SpillSlot, + MachinePointerInfo::getFixedStack(*DerivedPtrLocation), + false, false, false, 0); - StatepointLowering.setRelocLocation(SD, Loc); + // Again, be conservative, don't emit pending loads + DAG.setRoot(SpillLoad.getValue(1)); - // Again, be conservative, don't emit pending loads - DAG.setRoot(Loc.getValue(1)); - } - - assert(Loc.getNode()); - setValue(&CI, Loc); + assert(SpillLoad.getNode()); + setValue(&CI, SpillLoad); } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h index 673112c..82d0c62 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h @@ -30,8 +30,7 @@ class SelectionDAGBuilder; /// works in concert with information in FunctionLoweringInfo. class StatepointLoweringState { public: - StatepointLoweringState() : NextSlotToAllocate(0) { - } + StatepointLoweringState() : NextSlotToAllocate(0) {} /// Reset all state tracking for a newly encountered safepoint. Also /// performs some consistency checking. @@ -57,25 +56,6 @@ public: Locations[val] = Location; } - /// Returns the relocated value for a given input pointer. Will - /// return SDValue() if this value hasn't yet been reloaded from - /// it's stack slot after the statepoint. Otherwise, the value - /// has already been reloaded and the SDValue of that reload will - /// be returned. Note that VMState values are spilled but not - /// reloaded (since they don't change at the safepoint unless - /// also listed in the GC pointer section) and will thus never - /// be in this map - SDValue getRelocLocation(SDValue val) { - if (!RelocLocations.count(val)) - return SDValue(); - return RelocLocations[val]; - } - void setRelocLocation(SDValue val, SDValue Location) { - assert(!RelocLocations.count(val) && - "Trying to allocate already allocated location"); - RelocLocations[val] = Location; - } - /// Record the fact that we expect to encounter a given gc_relocate /// before the next statepoint. If we don't see it, we'll report /// an assertion. @@ -118,8 +98,6 @@ private: /// Maps pre-relocation value (gc pointer directly incoming into statepoint) /// into it's location (currently only stack slots) DenseMap<SDValue, SDValue> Locations; - /// Map pre-relocated value into it's new relocated location - DenseMap<SDValue, SDValue> RelocLocations; /// A boolean indicator for each slot listed in the FunctionInfo as to /// whether it has been used in the current statepoint. Since we try to diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index f12c035..833da4b 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -100,6 +100,8 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, Entry.isZExt = !shouldSignExtendTypeInLibCall(Ops[i].getValueType(), isSigned); Args.push_back(Entry); } + if (LC == RTLIB::UNKNOWN_LIBCALL) + report_fatal_error("Unsupported library call operation!"); SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), getPointerTy()); Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); @@ -201,7 +203,7 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue Ops[2] = { NewLHS, NewRHS }; NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, 2, false/*sign irrelevant*/, dl).first; - NewRHS = DAG.getConstant(0, RetVT); + NewRHS = DAG.getConstant(0, dl, RetVT); CCCode = getCmpLibcallCC(LC1); if (LC2 != RTLIB::UNKNOWN_LIBCALL) { SDValue Tmp = DAG.getNode(ISD::SETCC, dl, @@ -303,7 +305,7 @@ bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op, SDValue New = DAG.getNode(Op.getOpcode(), dl, VT, Op.getOperand(0), DAG.getConstant(Demanded & C->getAPIntValue(), - VT)); + dl, VT)); return CombineTo(Op, New); } @@ -447,7 +449,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, return TLO.CombineTo(Op, Op.getOperand(1)); // If all of the demanded bits in the inputs are known zeros, return zero. if ((NewMask & (KnownZero|KnownZero2)) == NewMask) - return TLO.CombineTo(Op, TLO.DAG.getConstant(0, Op.getValueType())); + return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, Op.getValueType())); // If the RHS is a constant, see if we can simplify it. if (TLO.ShrinkDemandedConstant(Op, ~KnownZero2 & NewMask)) return true; @@ -535,7 +537,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if ((NewMask & (KnownZero|KnownOne)) == NewMask) { // all known on one side if (KnownOne == KnownOne2) { // set bits are the same on both sides EVT VT = Op.getValueType(); - SDValue ANDC = TLO.DAG.getConstant(~KnownOne & NewMask, VT); + SDValue ANDC = TLO.DAG.getConstant(~KnownOne & NewMask, dl, VT); return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), ANDC)); } @@ -551,7 +553,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (Expanded != C->getAPIntValue()) { EVT VT = Op.getValueType(); SDValue New = TLO.DAG.getNode(Op.getOpcode(), dl,VT, Op.getOperand(0), - TLO.DAG.getConstant(Expanded, VT)); + TLO.DAG.getConstant(Expanded, dl, VT)); return TLO.CombineTo(Op, New); } // if it already has all the bits set, nothing to change @@ -624,7 +626,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, } SDValue NewSA = - TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType()); + TLO.DAG.getConstant(Diff, dl, Op.getOperand(1).getValueType()); EVT VT = Op.getValueType(); return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, InOp.getOperand(0), NewSA)); @@ -648,7 +650,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, ShTy = InnerVT; SDValue NarrowShl = TLO.DAG.getNode(ISD::SHL, dl, InnerVT, InnerOp, - TLO.DAG.getConstant(ShAmt, ShTy)); + TLO.DAG.getConstant(ShAmt, dl, ShTy)); return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(), @@ -670,7 +672,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, NewMask.lshr(InnerBits - InnerShAmt + ShAmt) == 0 && NewMask.trunc(ShAmt) == 0) { SDValue NewSA = - TLO.DAG.getConstant(ShAmt - InnerShAmt, + TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, Op.getOperand(1).getValueType()); EVT VT = Op.getValueType(); SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, @@ -713,7 +715,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, } SDValue NewSA = - TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType()); + TLO.DAG.getConstant(Diff, dl, Op.getOperand(1).getValueType()); return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, InOp.getOperand(0), NewSA)); } @@ -778,7 +780,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (Log2 >= 0) { // The bit must come from the sign. SDValue NewSA = - TLO.DAG.getConstant(BitWidth - 1 - Log2, + TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, Op.getOperand(1).getValueType()); return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op.getOperand(0), NewSA)); @@ -794,19 +796,27 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, APInt MsbMask = APInt::getHighBitsSet(BitWidth, 1); // If we only care about the highest bit, don't bother shifting right. - if (MsbMask == DemandedMask) { + if (MsbMask == NewMask) { unsigned ShAmt = ExVT.getScalarType().getSizeInBits(); SDValue InOp = Op.getOperand(0); - - // Compute the correct shift amount type, which must be getShiftAmountTy - // for scalar types after legalization. - EVT ShiftAmtTy = Op.getValueType(); - if (TLO.LegalTypes() && !ShiftAmtTy.isVector()) - ShiftAmtTy = getShiftAmountTy(ShiftAmtTy); - - SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ShAmt, ShiftAmtTy); - return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, - Op.getValueType(), InOp, ShiftAmt)); + unsigned VTBits = Op->getValueType(0).getScalarType().getSizeInBits(); + bool AlreadySignExtended = + TLO.DAG.ComputeNumSignBits(InOp) >= VTBits-ShAmt+1; + // However if the input is already sign extended we expect the sign + // extension to be dropped altogether later and do not simplify. + if (!AlreadySignExtended) { + // Compute the correct shift amount type, which must be getShiftAmountTy + // for scalar types after legalization. + EVT ShiftAmtTy = Op.getValueType(); + if (TLO.LegalTypes() && !ShiftAmtTy.isVector()) + ShiftAmtTy = getShiftAmountTy(ShiftAmtTy); + + SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ShAmt, dl, + ShiftAmtTy); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, + Op.getValueType(), InOp, + ShiftAmt)); + } } // Sign extension. Compute the demanded bits in the result that are not @@ -985,7 +995,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (TLO.LegalTypes()) { uint64_t ShVal = ShAmt->getZExtValue(); Shift = - TLO.DAG.getConstant(ShVal, getShiftAmountTy(Op.getValueType())); + TLO.DAG.getConstant(ShVal, dl, getShiftAmountTy(Op.getValueType())); } APInt HighBits = APInt::getHighBitsSet(OperandBitWidth, @@ -1043,7 +1053,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (!OpVTLegal && OpVTSizeInBits > 32) Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, Op.getValueType(), Sign); unsigned ShVal = Op.getValueType().getSizeInBits()-1; - SDValue ShAmt = TLO.DAG.getConstant(ShVal, Op.getValueType()); + SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, Op.getValueType()); return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, Op.getValueType(), Sign, ShAmt)); @@ -1076,8 +1086,19 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // If we know the value of all of the demanded bits, return this as a // constant. - if ((NewMask & (KnownZero|KnownOne)) == NewMask) - return TLO.CombineTo(Op, TLO.DAG.getConstant(KnownOne, Op.getValueType())); + if ((NewMask & (KnownZero|KnownOne)) == NewMask) { + // Avoid folding to a constant if any OpaqueConstant is involved. + const SDNode *N = Op.getNode(); + for (SDNodeIterator I = SDNodeIterator::begin(N), + E = SDNodeIterator::end(N); I != E; ++I) { + SDNode *Op = *I; + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) + if (C->isOpaque()) + return false; + } + return TLO.CombineTo(Op, + TLO.DAG.getConstant(KnownOne, dl, Op.getValueType())); + } return false; } @@ -1213,13 +1234,14 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, switch (Cond) { default: break; case ISD::SETFALSE: - case ISD::SETFALSE2: return DAG.getConstant(0, VT); + case ISD::SETFALSE2: return DAG.getConstant(0, dl, VT); case ISD::SETTRUE: case ISD::SETTRUE2: { TargetLowering::BooleanContent Cnt = getBooleanContents(N0->getValueType(0)); return DAG.getConstant( - Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, VT); + Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, dl, + VT); } } @@ -1253,7 +1275,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // (srl (ctlz x), 5) == 1 -> X == 0 Cond = ISD::SETEQ; } - SDValue Zero = DAG.getConstant(0, N0.getValueType()); + SDValue Zero = DAG.getConstant(0, dl, N0.getValueType()); return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), Zero, Cond); } @@ -1274,10 +1296,10 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // (ctpop x) u> 1 -> (x & x-1) != 0 if ((Cond == ISD::SETULT && C1 == 2) || (Cond == ISD::SETUGT && C1 == 1)){ SDValue Sub = DAG.getNode(ISD::SUB, dl, CTVT, CTOp, - DAG.getConstant(1, CTVT)); + DAG.getConstant(1, dl, CTVT)); SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Sub); ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE; - return DAG.getSetCC(dl, VT, And, DAG.getConstant(0, CTVT), CC); + return DAG.getSetCC(dl, VT, And, DAG.getConstant(0, dl, CTVT), CC); } // TODO: (ctpop x) == 1 -> x && (x & x-1) == 0 iff ctpop is illegal. @@ -1331,7 +1353,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (isTypeDesirableForOp(ISD::SETCC, MinVT)) { // Will get folded away. SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt); - SDValue C = DAG.getConstant(C1.trunc(MinBits), MinVT); + SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT); return DAG.getSetCC(dl, VT, Trunc, C, Cond); } } @@ -1382,7 +1404,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, SDValue Ptr = Lod->getBasePtr(); if (bestOffset != 0) Ptr = DAG.getNode(ISD::ADD, dl, PtrType, Lod->getBasePtr(), - DAG.getConstant(bestOffset, PtrType)); + DAG.getConstant(bestOffset, dl, PtrType)); unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset); SDValue NewLoad = DAG.getLoad(newVT, dl, Lod->getChain(), Ptr, Lod->getPointerInfo().getWithOffset(bestOffset), @@ -1390,8 +1412,8 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, return DAG.getSetCC(dl, VT, DAG.getNode(ISD::AND, dl, newVT, NewLoad, DAG.getConstant(bestMask.trunc(bestWidth), - newVT)), - DAG.getConstant(0LL, newVT), Cond); + dl, newVT)), + DAG.getConstant(0LL, dl, newVT), Cond); } } } @@ -1407,18 +1429,18 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, switch (Cond) { case ISD::SETUGT: case ISD::SETUGE: - case ISD::SETEQ: return DAG.getConstant(0, VT); + case ISD::SETEQ: return DAG.getConstant(0, dl, VT); case ISD::SETULT: case ISD::SETULE: - case ISD::SETNE: return DAG.getConstant(1, VT); + case ISD::SETNE: return DAG.getConstant(1, dl, VT); case ISD::SETGT: case ISD::SETGE: // True if the sign bit of C1 is set. - return DAG.getConstant(C1.isNegative(), VT); + return DAG.getConstant(C1.isNegative(), dl, VT); case ISD::SETLT: case ISD::SETLE: // True if the sign bit of C1 isn't set. - return DAG.getConstant(C1.isNonNegative(), VT); + return DAG.getConstant(C1.isNonNegative(), dl, VT); default: break; } @@ -1437,7 +1459,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, (isOperationLegal(ISD::SETCC, newVT) && getCondCodeAction(Cond, newVT.getSimpleVT()) == Legal)) { EVT NewSetCCVT = getSetCCResultType(*DAG.getContext(), newVT); - SDValue NewConst = DAG.getConstant(C1.trunc(InSize), newVT); + SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT); SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0), NewConst, Cond); @@ -1458,7 +1480,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // If the constant doesn't fit into the number of bits for the source of // the sign extension, it is impossible for both sides to be equal. if (C1.getMinSignedBits() > ExtSrcTyBits) - return DAG.getConstant(Cond == ISD::SETNE, VT); + return DAG.getConstant(Cond == ISD::SETNE, dl, VT); SDValue ZextOp; EVT Op0Ty = N0.getOperand(0).getValueType(); @@ -1467,7 +1489,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } else { APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits); ZextOp = DAG.getNode(ISD::AND, dl, Op0Ty, N0.getOperand(0), - DAG.getConstant(Imm, Op0Ty)); + DAG.getConstant(Imm, dl, Op0Ty)); } if (!DCI.isCalledByLegalizer()) DCI.AddToWorklist(ZextOp.getNode()); @@ -1476,7 +1498,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, DAG.getConstant(C1 & APInt::getLowBitsSet( ExtDstTyBits, ExtSrcTyBits), - ExtDstTy), + dl, ExtDstTy), Cond); } else if ((N1C->isNullValue() || N1C->getAPIntValue() == 1) && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { @@ -1546,20 +1568,20 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (Op0.getValueType().bitsGT(VT)) Op0 = DAG.getNode(ISD::AND, dl, VT, DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)), - DAG.getConstant(1, VT)); + DAG.getConstant(1, dl, VT)); else if (Op0.getValueType().bitsLT(VT)) Op0 = DAG.getNode(ISD::AND, dl, VT, DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)), - DAG.getConstant(1, VT)); + DAG.getConstant(1, dl, VT)); return DAG.getSetCC(dl, VT, Op0, - DAG.getConstant(0, Op0.getValueType()), + DAG.getConstant(0, dl, Op0.getValueType()), Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ); } if (Op0.getOpcode() == ISD::AssertZext && cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1) return DAG.getSetCC(dl, VT, Op0, - DAG.getConstant(0, Op0.getValueType()), + DAG.getConstant(0, dl, Op0.getValueType()), Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ); } } @@ -1576,7 +1598,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // Canonicalize GE/LE comparisons to use GT/LT comparisons. if (Cond == ISD::SETGE || Cond == ISD::SETUGE) { - if (C1 == MinVal) return DAG.getConstant(1, VT); // X >= MIN --> true + if (C1 == MinVal) return DAG.getConstant(1, dl, VT); // X >= MIN --> true // X >= C0 --> X > (C0 - 1) APInt C = C1 - 1; ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT; @@ -1585,13 +1607,13 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, (!N1C->isOpaque() || (N1C->isOpaque() && C.getBitWidth() <= 64 && isLegalICmpImmediate(C.getSExtValue())))) { return DAG.getSetCC(dl, VT, N0, - DAG.getConstant(C, N1.getValueType()), + DAG.getConstant(C, dl, N1.getValueType()), NewCC); } } if (Cond == ISD::SETLE || Cond == ISD::SETULE) { - if (C1 == MaxVal) return DAG.getConstant(1, VT); // X <= MAX --> true + if (C1 == MaxVal) return DAG.getConstant(1, dl, VT); // X <= MAX --> true // X <= C0 --> X < (C0 + 1) APInt C = C1 + 1; ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT; @@ -1600,19 +1622,19 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, (!N1C->isOpaque() || (N1C->isOpaque() && C.getBitWidth() <= 64 && isLegalICmpImmediate(C.getSExtValue())))) { return DAG.getSetCC(dl, VT, N0, - DAG.getConstant(C, N1.getValueType()), + DAG.getConstant(C, dl, N1.getValueType()), NewCC); } } if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal) - return DAG.getConstant(0, VT); // X < MIN --> false + return DAG.getConstant(0, dl, VT); // X < MIN --> false if ((Cond == ISD::SETGE || Cond == ISD::SETUGE) && C1 == MinVal) - return DAG.getConstant(1, VT); // X >= MIN --> true + return DAG.getConstant(1, dl, VT); // X >= MIN --> true if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal) - return DAG.getConstant(0, VT); // X > MAX --> false + return DAG.getConstant(0, dl, VT); // X > MAX --> false if ((Cond == ISD::SETLE || Cond == ISD::SETULE) && C1 == MaxVal) - return DAG.getConstant(1, VT); // X <= MAX --> true + return DAG.getConstant(1, dl, VT); // X <= MAX --> true // Canonicalize setgt X, Min --> setne X, Min if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MinVal) @@ -1624,12 +1646,12 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // If we have setult X, 1, turn it into seteq X, 0 if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal+1) return DAG.getSetCC(dl, VT, N0, - DAG.getConstant(MinVal, N0.getValueType()), + DAG.getConstant(MinVal, dl, N0.getValueType()), ISD::SETEQ); // If we have setugt X, Max-1, turn it into seteq X, Max if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal-1) return DAG.getSetCC(dl, VT, N0, - DAG.getConstant(MaxVal, N0.getValueType()), + DAG.getConstant(MaxVal, dl, N0.getValueType()), ISD::SETEQ); // If we have "setcc X, C0", check to see if we can shrink the immediate @@ -1639,14 +1661,14 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (Cond == ISD::SETUGT && C1 == APInt::getSignedMaxValue(OperandBitSize)) return DAG.getSetCC(dl, VT, N0, - DAG.getConstant(0, N1.getValueType()), + DAG.getConstant(0, dl, N1.getValueType()), ISD::SETLT); // SETULT X, SINTMIN -> SETGT X, -1 if (Cond == ISD::SETULT && C1 == APInt::getSignedMinValue(OperandBitSize)) { SDValue ConstMinusOne = - DAG.getConstant(APInt::getAllOnesValue(OperandBitSize), + DAG.getConstant(APInt::getAllOnesValue(OperandBitSize), dl, N1.getValueType()); return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT); } @@ -1665,7 +1687,8 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (AndRHS->getAPIntValue().isPowerOf2()) { return DAG.getNode(ISD::TRUNCATE, dl, VT, DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0, - DAG.getConstant(AndRHS->getAPIntValue().logBase2(), ShiftTy))); + DAG.getConstant(AndRHS->getAPIntValue().logBase2(), dl, + ShiftTy))); } } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) { // (X & 8) == 8 --> (X & 8) >> 3 @@ -1673,7 +1696,8 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (C1.isPowerOf2()) { return DAG.getNode(ISD::TRUNCATE, dl, VT, DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0, - DAG.getConstant(C1.logBase2(), ShiftTy))); + DAG.getConstant(C1.logBase2(), dl, + ShiftTy))); } } } @@ -1692,8 +1716,9 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, getPointerTy() : getShiftAmountTy(N0.getValueType()); EVT CmpTy = N0.getValueType(); SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0.getOperand(0), - DAG.getConstant(ShiftBits, ShiftTy)); - SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), CmpTy); + DAG.getConstant(ShiftBits, dl, + ShiftTy)); + SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, CmpTy); return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond); } } @@ -1715,13 +1740,14 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ShiftBits = C1.countTrailingZeros(); } NewC = NewC.lshr(ShiftBits); - if (ShiftBits && isLegalICmpImmediate(NewC.getSExtValue())) { + if (ShiftBits && NewC.getMinSignedBits() <= 64 && + isLegalICmpImmediate(NewC.getSExtValue())) { EVT ShiftTy = DCI.isBeforeLegalize() ? getPointerTy() : getShiftAmountTy(N0.getValueType()); EVT CmpTy = N0.getValueType(); SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0, - DAG.getConstant(ShiftBits, ShiftTy)); - SDValue CmpRHS = DAG.getConstant(NewC, CmpTy); + DAG.getConstant(ShiftBits, dl, ShiftTy)); + SDValue CmpRHS = DAG.getConstant(NewC, dl, CmpTy); return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond); } } @@ -1740,9 +1766,9 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, switch (ISD::getUnorderedFlavor(Cond)) { default: llvm_unreachable("Unknown flavor!"); case 0: // Known false. - return DAG.getConstant(0, VT); + return DAG.getConstant(0, dl, VT); case 1: // Known true. - return DAG.getConstant(1, VT); + return DAG.getConstant(1, dl, VT); case 2: // Undefined. return DAG.getUNDEF(VT); } @@ -1809,13 +1835,13 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // We can always fold X == X for integer setcc's. if (N0.getValueType().isInteger()) { - return DAG.getConstant(EqVal, VT); + return DAG.getConstant(EqVal, dl, VT); } unsigned UOF = ISD::getUnorderedFlavor(Cond); if (UOF == 2) // FP operators that are undefined on NaNs. - return DAG.getConstant(EqVal, VT); + return DAG.getConstant(EqVal, dl, VT); if (UOF == unsigned(ISD::isTrueWhenEqual(Cond))) - return DAG.getConstant(EqVal, VT); + return DAG.getConstant(EqVal, dl, VT); // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO // if it is not already. ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO; @@ -1856,7 +1882,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, return DAG.getSetCC(dl, VT, N0.getOperand(0), DAG.getConstant(RHSC->getAPIntValue()- LHSR->getAPIntValue(), - N0.getValueType()), Cond); + dl, N0.getValueType()), Cond); } // Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0. @@ -1868,7 +1894,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, DAG.getSetCC(dl, VT, N0.getOperand(0), DAG.getConstant(LHSR->getAPIntValue() ^ RHSC->getAPIntValue(), - N0.getValueType()), + dl, N0.getValueType()), Cond); } @@ -1879,7 +1905,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, DAG.getSetCC(dl, VT, N0.getOperand(1), DAG.getConstant(SUBC->getAPIntValue() - RHSC->getAPIntValue(), - N0.getValueType()), + dl, N0.getValueType()), Cond); } } @@ -1896,16 +1922,18 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (!LegalRHSImm || N0.getNode()->hasOneUse()) { if (N0.getOperand(0) == N1) return DAG.getSetCC(dl, VT, N0.getOperand(1), - DAG.getConstant(0, N0.getValueType()), Cond); + DAG.getConstant(0, dl, N0.getValueType()), Cond); if (N0.getOperand(1) == N1) { if (DAG.isCommutativeBinOp(N0.getOpcode())) return DAG.getSetCC(dl, VT, N0.getOperand(0), - DAG.getConstant(0, N0.getValueType()), Cond); + DAG.getConstant(0, dl, N0.getValueType()), + Cond); if (N0.getNode()->hasOneUse()) { assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!"); // (Z-X) == X --> Z == X<<1 SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N1, - DAG.getConstant(1, getShiftAmountTy(N1.getValueType()))); + DAG.getConstant(1, dl, + getShiftAmountTy(N1.getValueType()))); if (!DCI.isCalledByLegalizer()) DCI.AddToWorklist(SH.getNode()); return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond); @@ -1919,16 +1947,17 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // Simplify X == (X+Z) --> Z == 0 if (N1.getOperand(0) == N0) return DAG.getSetCC(dl, VT, N1.getOperand(1), - DAG.getConstant(0, N1.getValueType()), Cond); + DAG.getConstant(0, dl, N1.getValueType()), Cond); if (N1.getOperand(1) == N0) { if (DAG.isCommutativeBinOp(N1.getOpcode())) return DAG.getSetCC(dl, VT, N1.getOperand(0), - DAG.getConstant(0, N1.getValueType()), Cond); + DAG.getConstant(0, dl, N1.getValueType()), Cond); if (N1.getNode()->hasOneUse()) { assert(N1.getOpcode() == ISD::SUB && "Unexpected operation!"); // X == (Z-X) --> X<<1 == Z SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N0, - DAG.getConstant(1, getShiftAmountTy(N0.getValueType()))); + DAG.getConstant(1, dl, + getShiftAmountTy(N0.getValueType()))); if (!DCI.isCalledByLegalizer()) DCI.AddToWorklist(SH.getNode()); return DAG.getSetCC(dl, VT, SH, N1.getOperand(0), Cond); @@ -1946,7 +1975,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true); if (DCI.isBeforeLegalizeOps() || isCondCodeLegal(Cond, N0.getSimpleValueType())) { - SDValue Zero = DAG.getConstant(0, N1.getValueType()); + SDValue Zero = DAG.getConstant(0, dl, N1.getValueType()); return DAG.getSetCC(dl, VT, N0, Zero, Cond); } } @@ -1957,7 +1986,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true); if (DCI.isBeforeLegalizeOps() || isCondCodeLegal(Cond, N1.getSimpleValueType())) { - SDValue Zero = DAG.getConstant(0, N0.getValueType()); + SDValue Zero = DAG.getConstant(0, dl, N0.getValueType()); return DAG.getSetCC(dl, VT, N1, Zero, Cond); } } @@ -2172,7 +2201,7 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, // now; without this it would get ZExt'd later in // ScheduleDAGSDNodes::EmitNode, which is very generic. Ops.push_back(DAG.getTargetConstant(C->getAPIntValue().getSExtValue(), - MVT::i64)); + SDLoc(C), MVT::i64)); return; } } @@ -2181,9 +2210,10 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, } } -std::pair<unsigned, const TargetRegisterClass*> TargetLowering:: -getRegForInlineAsmConstraint(const std::string &Constraint, - MVT VT) const { +std::pair<unsigned, const TargetRegisterClass *> +TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI, + const std::string &Constraint, + MVT VT) const { if (Constraint.empty() || Constraint[0] != '{') return std::make_pair(0u, static_cast<TargetRegisterClass*>(nullptr)); assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?"); @@ -2195,8 +2225,6 @@ getRegForInlineAsmConstraint(const std::string &Constraint, std::make_pair(0u, static_cast<const TargetRegisterClass*>(nullptr)); // Figure out which register class contains this reg. - const TargetRegisterInfo *RI = - getTargetMachine().getSubtargetImpl()->getRegisterInfo(); for (TargetRegisterInfo::regclass_iterator RCI = RI->regclass_begin(), E = RI->regclass_end(); RCI != E; ++RCI) { const TargetRegisterClass *RC = *RCI; @@ -2249,8 +2277,9 @@ unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const { /// and also tie in the associated operand values. /// If this returns an empty vector, and if the constraint string itself /// isn't empty, there was an error parsing. -TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints( - ImmutableCallSite CS) const { +TargetLowering::AsmOperandInfoVector +TargetLowering::ParseConstraints(const TargetRegisterInfo *TRI, + ImmutableCallSite CS) const { /// ConstraintOperands - Information about all of the constraints. AsmOperandInfoVector ConstraintOperands; const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue()); @@ -2341,7 +2370,7 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints( } // If we have multiple alternative constraints, select the best alternative. - if (ConstraintOperands.size()) { + if (!ConstraintOperands.empty()) { if (maCount) { unsigned bestMAIndex = 0; int bestWeight = -1; @@ -2412,12 +2441,12 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints( AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; if (OpInfo.ConstraintVT != Input.ConstraintVT) { - std::pair<unsigned, const TargetRegisterClass*> MatchRC = - getRegForInlineAsmConstraint(OpInfo.ConstraintCode, - OpInfo.ConstraintVT); - std::pair<unsigned, const TargetRegisterClass*> InputRC = - getRegForInlineAsmConstraint(Input.ConstraintCode, - Input.ConstraintVT); + std::pair<unsigned, const TargetRegisterClass *> MatchRC = + getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode, + OpInfo.ConstraintVT); + std::pair<unsigned, const TargetRegisterClass *> InputRC = + getRegForInlineAsmConstraint(TRI, Input.ConstraintCode, + Input.ConstraintVT); if ((OpInfo.ConstraintVT.isInteger() != Input.ConstraintVT.isInteger()) || (MatchRC.second != InputRC.second)) { @@ -2640,7 +2669,8 @@ SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, SDLoc dl, unsigned ShAmt = d.countTrailingZeros(); if (ShAmt) { // TODO: For UDIV use SRL instead of SRA. - SDValue Amt = DAG.getConstant(ShAmt, getShiftAmountTy(Op1.getValueType())); + SDValue Amt = + DAG.getConstant(ShAmt, dl, getShiftAmountTy(Op1.getValueType())); Op1 = DAG.getNode(ISD::SRA, dl, Op1.getValueType(), Op1, Amt, false, false, true); d = d.ashr(ShAmt); @@ -2651,7 +2681,7 @@ SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, SDLoc dl, while ((t = d*xn) != 1) xn *= APInt(d.getBitWidth(), 2) - t; - Op2 = DAG.getConstant(xn, Op1.getValueType()); + Op2 = DAG.getConstant(xn, dl, Op1.getValueType()); return DAG.getNode(ISD::MUL, dl, Op1.getValueType(), Op1, Op2); } @@ -2680,12 +2710,12 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor, if (IsAfterLegalization ? isOperationLegal(ISD::MULHS, VT) : isOperationLegalOrCustom(ISD::MULHS, VT)) Q = DAG.getNode(ISD::MULHS, dl, VT, N->getOperand(0), - DAG.getConstant(magics.m, VT)); + DAG.getConstant(magics.m, dl, VT)); else if (IsAfterLegalization ? isOperationLegal(ISD::SMUL_LOHI, VT) : isOperationLegalOrCustom(ISD::SMUL_LOHI, VT)) Q = SDValue(DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), N->getOperand(0), - DAG.getConstant(magics.m, VT)).getNode(), 1); + DAG.getConstant(magics.m, dl, VT)).getNode(), 1); else return SDValue(); // No mulhs or equvialent // If d > 0 and m < 0, add the numerator @@ -2701,12 +2731,13 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor, // Shift right algebraic if shift value is nonzero if (magics.s > 0) { Q = DAG.getNode(ISD::SRA, dl, VT, Q, - DAG.getConstant(magics.s, getShiftAmountTy(Q.getValueType()))); + DAG.getConstant(magics.s, dl, + getShiftAmountTy(Q.getValueType()))); Created->push_back(Q.getNode()); } // Extract the sign bit and add it to the quotient SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, - DAG.getConstant(VT.getScalarSizeInBits() - 1, + DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, getShiftAmountTy(Q.getValueType()))); Created->push_back(T.getNode()); return DAG.getNode(ISD::ADD, dl, VT, Q, T); @@ -2740,7 +2771,8 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor, if (magics.a != 0 && !Divisor[0]) { unsigned Shift = Divisor.countTrailingZeros(); Q = DAG.getNode(ISD::SRL, dl, VT, Q, - DAG.getConstant(Shift, getShiftAmountTy(Q.getValueType()))); + DAG.getConstant(Shift, dl, + getShiftAmountTy(Q.getValueType()))); Created->push_back(Q.getNode()); // Get magic number for the shifted divisor. @@ -2752,11 +2784,11 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor, // FIXME: We should support doing a MUL in a wider type if (IsAfterLegalization ? isOperationLegal(ISD::MULHU, VT) : isOperationLegalOrCustom(ISD::MULHU, VT)) - Q = DAG.getNode(ISD::MULHU, dl, VT, Q, DAG.getConstant(magics.m, VT)); + Q = DAG.getNode(ISD::MULHU, dl, VT, Q, DAG.getConstant(magics.m, dl, VT)); else if (IsAfterLegalization ? isOperationLegal(ISD::UMUL_LOHI, VT) : isOperationLegalOrCustom(ISD::UMUL_LOHI, VT)) Q = SDValue(DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), Q, - DAG.getConstant(magics.m, VT)).getNode(), 1); + DAG.getConstant(magics.m, dl, VT)).getNode(), 1); else return SDValue(); // No mulhu or equvialent @@ -2766,17 +2798,20 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor, assert(magics.s < Divisor.getBitWidth() && "We shouldn't generate an undefined shift!"); return DAG.getNode(ISD::SRL, dl, VT, Q, - DAG.getConstant(magics.s, getShiftAmountTy(Q.getValueType()))); + DAG.getConstant(magics.s, dl, + getShiftAmountTy(Q.getValueType()))); } else { SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N->getOperand(0), Q); Created->push_back(NPQ.getNode()); NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, - DAG.getConstant(1, getShiftAmountTy(NPQ.getValueType()))); + DAG.getConstant(1, dl, + getShiftAmountTy(NPQ.getValueType()))); Created->push_back(NPQ.getNode()); NPQ = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q); Created->push_back(NPQ.getNode()); return DAG.getNode(ISD::SRL, dl, VT, NPQ, - DAG.getConstant(magics.s-1, getShiftAmountTy(NPQ.getValueType()))); + DAG.getConstant(magics.s - 1, dl, + getShiftAmountTy(NPQ.getValueType()))); } } @@ -2863,7 +2898,7 @@ bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, isOperationLegalOrCustom(ISD::SRL, VT) && isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) { unsigned ShiftAmt = VT.getSizeInBits() - HiLoVT.getSizeInBits(); - SDValue Shift = DAG.getConstant(ShiftAmt, getShiftAmountTy(VT)); + SDValue Shift = DAG.getConstant(ShiftAmt, dl, getShiftAmountTy(VT)); LH = DAG.getNode(ISD::SRL, dl, VT, N->getOperand(0), Shift); LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH); RH = DAG.getNode(ISD::SRL, dl, VT, N->getOperand(1), Shift); @@ -2913,13 +2948,13 @@ bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result, // https://github.com/llvm-mirror/compiler-rt/blob/master/lib/builtins/fixsfdi.c EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); - SDValue ExponentMask = DAG.getConstant(0x7F800000, IntVT); - SDValue ExponentLoBit = DAG.getConstant(23, IntVT); - SDValue Bias = DAG.getConstant(127, IntVT); - SDValue SignMask = DAG.getConstant(APInt::getSignBit(VT.getSizeInBits()), + SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT); + SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT); + SDValue Bias = DAG.getConstant(127, dl, IntVT); + SDValue SignMask = DAG.getConstant(APInt::getSignBit(VT.getSizeInBits()), dl, IntVT); - SDValue SignLowBit = DAG.getConstant(VT.getSizeInBits() - 1, IntVT); - SDValue MantissaMask = DAG.getConstant(0x007FFFFF, IntVT); + SDValue SignLowBit = DAG.getConstant(VT.getSizeInBits() - 1, dl, IntVT); + SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT); SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Node->getOperand(0)); @@ -2935,7 +2970,7 @@ bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result, SDValue R = DAG.getNode(ISD::OR, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask), - DAG.getConstant(0x00800000, IntVT)); + DAG.getConstant(0x00800000, dl, IntVT)); R = DAG.getZExtOrTrunc(R, dl, NVT); @@ -2955,7 +2990,7 @@ bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result, DAG.getNode(ISD::XOR, dl, NVT, R, Sign), Sign); - Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, IntVT), - DAG.getConstant(0, NVT), Ret, ISD::SETLT); + Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT), + DAG.getConstant(0, dl, NVT), Ret, ISD::SETLT); return true; } diff --git a/contrib/llvm/lib/CodeGen/ShadowStackGC.cpp b/contrib/llvm/lib/CodeGen/ShadowStackGC.cpp index 0be00f0..b12e943 100644 --- a/contrib/llvm/lib/CodeGen/ShadowStackGC.cpp +++ b/contrib/llvm/lib/CodeGen/ShadowStackGC.cpp @@ -38,416 +38,18 @@ using namespace llvm; #define DEBUG_TYPE "shadowstackgc" namespace { - - class ShadowStackGC : public GCStrategy { - /// RootChain - This is the global linked-list that contains the chain of GC - /// roots. - GlobalVariable *Head; - - /// StackEntryTy - Abstract type of a link in the shadow stack. - /// - StructType *StackEntryTy; - StructType *FrameMapTy; - - /// Roots - GC roots in the current function. Each is a pair of the - /// intrinsic call and its corresponding alloca. - std::vector<std::pair<CallInst*,AllocaInst*> > Roots; - - public: - ShadowStackGC(); - - bool initializeCustomLowering(Module &M) override; - bool performCustomLowering(Function &F) override; - - private: - bool IsNullValue(Value *V); - Constant *GetFrameMap(Function &F); - Type* GetConcreteStackEntryType(Function &F); - void CollectRoots(Function &F); - static GetElementPtrInst *CreateGEP(LLVMContext &Context, - IRBuilder<> &B, Value *BasePtr, - int Idx1, const char *Name); - static GetElementPtrInst *CreateGEP(LLVMContext &Context, - IRBuilder<> &B, Value *BasePtr, - int Idx1, int Idx2, const char *Name); - }; - +class ShadowStackGC : public GCStrategy { +public: + ShadowStackGC(); +}; } static GCRegistry::Add<ShadowStackGC> -X("shadow-stack", "Very portable GC for uncooperative code generators"); - -namespace { - /// EscapeEnumerator - This is a little algorithm to find all escape points - /// from a function so that "finally"-style code can be inserted. In addition - /// to finding the existing return and unwind instructions, it also (if - /// necessary) transforms any call instructions into invokes and sends them to - /// a landing pad. - /// - /// It's wrapped up in a state machine using the same transform C# uses for - /// 'yield return' enumerators, This transform allows it to be non-allocating. - class EscapeEnumerator { - Function &F; - const char *CleanupBBName; - - // State. - int State; - Function::iterator StateBB, StateE; - IRBuilder<> Builder; - - public: - EscapeEnumerator(Function &F, const char *N = "cleanup") - : F(F), CleanupBBName(N), State(0), Builder(F.getContext()) {} - - IRBuilder<> *Next() { - switch (State) { - default: - return nullptr; - - case 0: - StateBB = F.begin(); - StateE = F.end(); - State = 1; - - case 1: - // Find all 'return', 'resume', and 'unwind' instructions. - while (StateBB != StateE) { - BasicBlock *CurBB = StateBB++; - - // Branches and invokes do not escape, only unwind, resume, and return - // do. - TerminatorInst *TI = CurBB->getTerminator(); - if (!isa<ReturnInst>(TI) && !isa<ResumeInst>(TI)) - continue; - - Builder.SetInsertPoint(TI->getParent(), TI); - return &Builder; - } - - State = 2; - - // Find all 'call' instructions. - SmallVector<Instruction*,16> Calls; - for (Function::iterator BB = F.begin(), - E = F.end(); BB != E; ++BB) - for (BasicBlock::iterator II = BB->begin(), - EE = BB->end(); II != EE; ++II) - if (CallInst *CI = dyn_cast<CallInst>(II)) - if (!CI->getCalledFunction() || - !CI->getCalledFunction()->getIntrinsicID()) - Calls.push_back(CI); - - if (Calls.empty()) - return nullptr; + X("shadow-stack", "Very portable GC for uncooperative code generators"); - // Create a cleanup block. - LLVMContext &C = F.getContext(); - BasicBlock *CleanupBB = BasicBlock::Create(C, CleanupBBName, &F); - Type *ExnTy = StructType::get(Type::getInt8PtrTy(C), - Type::getInt32Ty(C), nullptr); - Constant *PersFn = - F.getParent()-> - getOrInsertFunction("__gcc_personality_v0", - FunctionType::get(Type::getInt32Ty(C), true)); - LandingPadInst *LPad = LandingPadInst::Create(ExnTy, PersFn, 1, - "cleanup.lpad", - CleanupBB); - LPad->setCleanup(true); - ResumeInst *RI = ResumeInst::Create(LPad, CleanupBB); +void llvm::linkShadowStackGC() {} - // Transform the 'call' instructions into 'invoke's branching to the - // cleanup block. Go in reverse order to make prettier BB names. - SmallVector<Value*,16> Args; - for (unsigned I = Calls.size(); I != 0; ) { - CallInst *CI = cast<CallInst>(Calls[--I]); - - // Split the basic block containing the function call. - BasicBlock *CallBB = CI->getParent(); - BasicBlock *NewBB = - CallBB->splitBasicBlock(CI, CallBB->getName() + ".cont"); - - // Remove the unconditional branch inserted at the end of CallBB. - CallBB->getInstList().pop_back(); - NewBB->getInstList().remove(CI); - - // Create a new invoke instruction. - Args.clear(); - CallSite CS(CI); - Args.append(CS.arg_begin(), CS.arg_end()); - - InvokeInst *II = InvokeInst::Create(CI->getCalledValue(), - NewBB, CleanupBB, - Args, CI->getName(), CallBB); - II->setCallingConv(CI->getCallingConv()); - II->setAttributes(CI->getAttributes()); - CI->replaceAllUsesWith(II); - delete CI; - } - - Builder.SetInsertPoint(RI->getParent(), RI); - return &Builder; - } - } - }; -} - -// ----------------------------------------------------------------------------- - -void llvm::linkShadowStackGC() { } - -ShadowStackGC::ShadowStackGC() : Head(nullptr), StackEntryTy(nullptr) { +ShadowStackGC::ShadowStackGC() { InitRoots = true; CustomRoots = true; } - -Constant *ShadowStackGC::GetFrameMap(Function &F) { - // doInitialization creates the abstract type of this value. - Type *VoidPtr = Type::getInt8PtrTy(F.getContext()); - - // Truncate the ShadowStackDescriptor if some metadata is null. - unsigned NumMeta = 0; - SmallVector<Constant*, 16> Metadata; - for (unsigned I = 0; I != Roots.size(); ++I) { - Constant *C = cast<Constant>(Roots[I].first->getArgOperand(1)); - if (!C->isNullValue()) - NumMeta = I + 1; - Metadata.push_back(ConstantExpr::getBitCast(C, VoidPtr)); - } - Metadata.resize(NumMeta); - - Type *Int32Ty = Type::getInt32Ty(F.getContext()); - - Constant *BaseElts[] = { - ConstantInt::get(Int32Ty, Roots.size(), false), - ConstantInt::get(Int32Ty, NumMeta, false), - }; - - Constant *DescriptorElts[] = { - ConstantStruct::get(FrameMapTy, BaseElts), - ConstantArray::get(ArrayType::get(VoidPtr, NumMeta), Metadata) - }; - - Type *EltTys[] = { DescriptorElts[0]->getType(),DescriptorElts[1]->getType()}; - StructType *STy = StructType::create(EltTys, "gc_map."+utostr(NumMeta)); - - Constant *FrameMap = ConstantStruct::get(STy, DescriptorElts); - - // FIXME: Is this actually dangerous as WritingAnLLVMPass.html claims? Seems - // that, short of multithreaded LLVM, it should be safe; all that is - // necessary is that a simple Module::iterator loop not be invalidated. - // Appending to the GlobalVariable list is safe in that sense. - // - // All of the output passes emit globals last. The ExecutionEngine - // explicitly supports adding globals to the module after - // initialization. - // - // Still, if it isn't deemed acceptable, then this transformation needs - // to be a ModulePass (which means it cannot be in the 'llc' pipeline - // (which uses a FunctionPassManager (which segfaults (not asserts) if - // provided a ModulePass))). - Constant *GV = new GlobalVariable(*F.getParent(), FrameMap->getType(), true, - GlobalVariable::InternalLinkage, - FrameMap, "__gc_" + F.getName()); - - Constant *GEPIndices[2] = { - ConstantInt::get(Type::getInt32Ty(F.getContext()), 0), - ConstantInt::get(Type::getInt32Ty(F.getContext()), 0) - }; - return ConstantExpr::getGetElementPtr(GV, GEPIndices); -} - -Type* ShadowStackGC::GetConcreteStackEntryType(Function &F) { - // doInitialization creates the generic version of this type. - std::vector<Type*> EltTys; - EltTys.push_back(StackEntryTy); - for (size_t I = 0; I != Roots.size(); I++) - EltTys.push_back(Roots[I].second->getAllocatedType()); - - return StructType::create(EltTys, "gc_stackentry."+F.getName().str()); -} - -/// doInitialization - If this module uses the GC intrinsics, find them now. If -/// not, exit fast. -bool ShadowStackGC::initializeCustomLowering(Module &M) { - // struct FrameMap { - // int32_t NumRoots; // Number of roots in stack frame. - // int32_t NumMeta; // Number of metadata descriptors. May be < NumRoots. - // void *Meta[]; // May be absent for roots without metadata. - // }; - std::vector<Type*> EltTys; - // 32 bits is ok up to a 32GB stack frame. :) - EltTys.push_back(Type::getInt32Ty(M.getContext())); - // Specifies length of variable length array. - EltTys.push_back(Type::getInt32Ty(M.getContext())); - FrameMapTy = StructType::create(EltTys, "gc_map"); - PointerType *FrameMapPtrTy = PointerType::getUnqual(FrameMapTy); - - // struct StackEntry { - // ShadowStackEntry *Next; // Caller's stack entry. - // FrameMap *Map; // Pointer to constant FrameMap. - // void *Roots[]; // Stack roots (in-place array, so we pretend). - // }; - - StackEntryTy = StructType::create(M.getContext(), "gc_stackentry"); - - EltTys.clear(); - EltTys.push_back(PointerType::getUnqual(StackEntryTy)); - EltTys.push_back(FrameMapPtrTy); - StackEntryTy->setBody(EltTys); - PointerType *StackEntryPtrTy = PointerType::getUnqual(StackEntryTy); - - // Get the root chain if it already exists. - Head = M.getGlobalVariable("llvm_gc_root_chain"); - if (!Head) { - // If the root chain does not exist, insert a new one with linkonce - // linkage! - Head = new GlobalVariable(M, StackEntryPtrTy, false, - GlobalValue::LinkOnceAnyLinkage, - Constant::getNullValue(StackEntryPtrTy), - "llvm_gc_root_chain"); - } else if (Head->hasExternalLinkage() && Head->isDeclaration()) { - Head->setInitializer(Constant::getNullValue(StackEntryPtrTy)); - Head->setLinkage(GlobalValue::LinkOnceAnyLinkage); - } - - return true; -} - -bool ShadowStackGC::IsNullValue(Value *V) { - if (Constant *C = dyn_cast<Constant>(V)) - return C->isNullValue(); - return false; -} - -void ShadowStackGC::CollectRoots(Function &F) { - // FIXME: Account for original alignment. Could fragment the root array. - // Approach 1: Null initialize empty slots at runtime. Yuck. - // Approach 2: Emit a map of the array instead of just a count. - - assert(Roots.empty() && "Not cleaned up?"); - - SmallVector<std::pair<CallInst*, AllocaInst*>, 16> MetaRoots; - - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) - for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) - if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++)) - if (Function *F = CI->getCalledFunction()) - if (F->getIntrinsicID() == Intrinsic::gcroot) { - std::pair<CallInst*, AllocaInst*> Pair = std::make_pair( - CI, cast<AllocaInst>(CI->getArgOperand(0)->stripPointerCasts())); - if (IsNullValue(CI->getArgOperand(1))) - Roots.push_back(Pair); - else - MetaRoots.push_back(Pair); - } - - // Number roots with metadata (usually empty) at the beginning, so that the - // FrameMap::Meta array can be elided. - Roots.insert(Roots.begin(), MetaRoots.begin(), MetaRoots.end()); -} - -GetElementPtrInst * -ShadowStackGC::CreateGEP(LLVMContext &Context, IRBuilder<> &B, Value *BasePtr, - int Idx, int Idx2, const char *Name) { - Value *Indices[] = { ConstantInt::get(Type::getInt32Ty(Context), 0), - ConstantInt::get(Type::getInt32Ty(Context), Idx), - ConstantInt::get(Type::getInt32Ty(Context), Idx2) }; - Value* Val = B.CreateGEP(BasePtr, Indices, Name); - - assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant"); - - return dyn_cast<GetElementPtrInst>(Val); -} - -GetElementPtrInst * -ShadowStackGC::CreateGEP(LLVMContext &Context, IRBuilder<> &B, Value *BasePtr, - int Idx, const char *Name) { - Value *Indices[] = { ConstantInt::get(Type::getInt32Ty(Context), 0), - ConstantInt::get(Type::getInt32Ty(Context), Idx) }; - Value *Val = B.CreateGEP(BasePtr, Indices, Name); - - assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant"); - - return dyn_cast<GetElementPtrInst>(Val); -} - -/// runOnFunction - Insert code to maintain the shadow stack. -bool ShadowStackGC::performCustomLowering(Function &F) { - LLVMContext &Context = F.getContext(); - - // Find calls to llvm.gcroot. - CollectRoots(F); - - // If there are no roots in this function, then there is no need to add a - // stack map entry for it. - if (Roots.empty()) - return false; - - // Build the constant map and figure the type of the shadow stack entry. - Value *FrameMap = GetFrameMap(F); - Type *ConcreteStackEntryTy = GetConcreteStackEntryType(F); - - // Build the shadow stack entry at the very start of the function. - BasicBlock::iterator IP = F.getEntryBlock().begin(); - IRBuilder<> AtEntry(IP->getParent(), IP); - - Instruction *StackEntry = AtEntry.CreateAlloca(ConcreteStackEntryTy, nullptr, - "gc_frame"); - - while (isa<AllocaInst>(IP)) ++IP; - AtEntry.SetInsertPoint(IP->getParent(), IP); - - // Initialize the map pointer and load the current head of the shadow stack. - Instruction *CurrentHead = AtEntry.CreateLoad(Head, "gc_currhead"); - Instruction *EntryMapPtr = CreateGEP(Context, AtEntry, StackEntry, - 0,1,"gc_frame.map"); - AtEntry.CreateStore(FrameMap, EntryMapPtr); - - // After all the allocas... - for (unsigned I = 0, E = Roots.size(); I != E; ++I) { - // For each root, find the corresponding slot in the aggregate... - Value *SlotPtr = CreateGEP(Context, AtEntry, StackEntry, 1 + I, "gc_root"); - - // And use it in lieu of the alloca. - AllocaInst *OriginalAlloca = Roots[I].second; - SlotPtr->takeName(OriginalAlloca); - OriginalAlloca->replaceAllUsesWith(SlotPtr); - } - - // Move past the original stores inserted by GCStrategy::InitRoots. This isn't - // really necessary (the collector would never see the intermediate state at - // runtime), but it's nicer not to push the half-initialized entry onto the - // shadow stack. - while (isa<StoreInst>(IP)) ++IP; - AtEntry.SetInsertPoint(IP->getParent(), IP); - - // Push the entry onto the shadow stack. - Instruction *EntryNextPtr = CreateGEP(Context, AtEntry, - StackEntry,0,0,"gc_frame.next"); - Instruction *NewHeadVal = CreateGEP(Context, AtEntry, - StackEntry, 0, "gc_newhead"); - AtEntry.CreateStore(CurrentHead, EntryNextPtr); - AtEntry.CreateStore(NewHeadVal, Head); - - // For each instruction that escapes... - EscapeEnumerator EE(F, "gc_cleanup"); - while (IRBuilder<> *AtExit = EE.Next()) { - // Pop the entry from the shadow stack. Don't reuse CurrentHead from - // AtEntry, since that would make the value live for the entire function. - Instruction *EntryNextPtr2 = CreateGEP(Context, *AtExit, StackEntry, 0, 0, - "gc_frame.next"); - Value *SavedHead = AtExit->CreateLoad(EntryNextPtr2, "gc_savedhead"); - AtExit->CreateStore(SavedHead, Head); - } - - // Delete the original allocas (which are no longer used) and the intrinsic - // calls (which are no longer valid). Doing this last avoids invalidating - // iterators. - for (unsigned I = 0, E = Roots.size(); I != E; ++I) { - Roots[I].first->eraseFromParent(); - Roots[I].second->eraseFromParent(); - } - - Roots.clear(); - return true; -} diff --git a/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp b/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp new file mode 100644 index 0000000..7c0b2bb --- /dev/null +++ b/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp @@ -0,0 +1,460 @@ +//===-- ShadowStackGCLowering.cpp - Custom lowering for shadow-stack gc ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the custom lowering code required by the shadow-stack GC +// strategy. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" + +using namespace llvm; + +#define DEBUG_TYPE "shadowstackgclowering" + +namespace { + +class ShadowStackGCLowering : public FunctionPass { + /// RootChain - This is the global linked-list that contains the chain of GC + /// roots. + GlobalVariable *Head; + + /// StackEntryTy - Abstract type of a link in the shadow stack. + /// + StructType *StackEntryTy; + StructType *FrameMapTy; + + /// Roots - GC roots in the current function. Each is a pair of the + /// intrinsic call and its corresponding alloca. + std::vector<std::pair<CallInst *, AllocaInst *>> Roots; + +public: + static char ID; + ShadowStackGCLowering(); + + bool doInitialization(Module &M) override; + bool runOnFunction(Function &F) override; + +private: + bool IsNullValue(Value *V); + Constant *GetFrameMap(Function &F); + Type *GetConcreteStackEntryType(Function &F); + void CollectRoots(Function &F); + static GetElementPtrInst *CreateGEP(LLVMContext &Context, IRBuilder<> &B, + Type *Ty, Value *BasePtr, int Idx1, + const char *Name); + static GetElementPtrInst *CreateGEP(LLVMContext &Context, IRBuilder<> &B, + Type *Ty, Value *BasePtr, int Idx1, int Idx2, + const char *Name); +}; +} + +INITIALIZE_PASS_BEGIN(ShadowStackGCLowering, "shadow-stack-gc-lowering", + "Shadow Stack GC Lowering", false, false) +INITIALIZE_PASS_DEPENDENCY(GCModuleInfo) +INITIALIZE_PASS_END(ShadowStackGCLowering, "shadow-stack-gc-lowering", + "Shadow Stack GC Lowering", false, false) + +FunctionPass *llvm::createShadowStackGCLoweringPass() { return new ShadowStackGCLowering(); } + +char ShadowStackGCLowering::ID = 0; + +ShadowStackGCLowering::ShadowStackGCLowering() + : FunctionPass(ID), Head(nullptr), StackEntryTy(nullptr), + FrameMapTy(nullptr) { + initializeShadowStackGCLoweringPass(*PassRegistry::getPassRegistry()); +} + +namespace { +/// EscapeEnumerator - This is a little algorithm to find all escape points +/// from a function so that "finally"-style code can be inserted. In addition +/// to finding the existing return and unwind instructions, it also (if +/// necessary) transforms any call instructions into invokes and sends them to +/// a landing pad. +/// +/// It's wrapped up in a state machine using the same transform C# uses for +/// 'yield return' enumerators, This transform allows it to be non-allocating. +class EscapeEnumerator { + Function &F; + const char *CleanupBBName; + + // State. + int State; + Function::iterator StateBB, StateE; + IRBuilder<> Builder; + +public: + EscapeEnumerator(Function &F, const char *N = "cleanup") + : F(F), CleanupBBName(N), State(0), Builder(F.getContext()) {} + + IRBuilder<> *Next() { + switch (State) { + default: + return nullptr; + + case 0: + StateBB = F.begin(); + StateE = F.end(); + State = 1; + + case 1: + // Find all 'return', 'resume', and 'unwind' instructions. + while (StateBB != StateE) { + BasicBlock *CurBB = StateBB++; + + // Branches and invokes do not escape, only unwind, resume, and return + // do. + TerminatorInst *TI = CurBB->getTerminator(); + if (!isa<ReturnInst>(TI) && !isa<ResumeInst>(TI)) + continue; + + Builder.SetInsertPoint(TI->getParent(), TI); + return &Builder; + } + + State = 2; + + // Find all 'call' instructions. + SmallVector<Instruction *, 16> Calls; + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) + for (BasicBlock::iterator II = BB->begin(), EE = BB->end(); II != EE; + ++II) + if (CallInst *CI = dyn_cast<CallInst>(II)) + if (!CI->getCalledFunction() || + !CI->getCalledFunction()->getIntrinsicID()) + Calls.push_back(CI); + + if (Calls.empty()) + return nullptr; + + // Create a cleanup block. + LLVMContext &C = F.getContext(); + BasicBlock *CleanupBB = BasicBlock::Create(C, CleanupBBName, &F); + Type *ExnTy = + StructType::get(Type::getInt8PtrTy(C), Type::getInt32Ty(C), nullptr); + Constant *PersFn = F.getParent()->getOrInsertFunction( + "__gcc_personality_v0", FunctionType::get(Type::getInt32Ty(C), true)); + LandingPadInst *LPad = + LandingPadInst::Create(ExnTy, PersFn, 1, "cleanup.lpad", CleanupBB); + LPad->setCleanup(true); + ResumeInst *RI = ResumeInst::Create(LPad, CleanupBB); + + // Transform the 'call' instructions into 'invoke's branching to the + // cleanup block. Go in reverse order to make prettier BB names. + SmallVector<Value *, 16> Args; + for (unsigned I = Calls.size(); I != 0;) { + CallInst *CI = cast<CallInst>(Calls[--I]); + + // Split the basic block containing the function call. + BasicBlock *CallBB = CI->getParent(); + BasicBlock *NewBB = + CallBB->splitBasicBlock(CI, CallBB->getName() + ".cont"); + + // Remove the unconditional branch inserted at the end of CallBB. + CallBB->getInstList().pop_back(); + NewBB->getInstList().remove(CI); + + // Create a new invoke instruction. + Args.clear(); + CallSite CS(CI); + Args.append(CS.arg_begin(), CS.arg_end()); + + InvokeInst *II = + InvokeInst::Create(CI->getCalledValue(), NewBB, CleanupBB, Args, + CI->getName(), CallBB); + II->setCallingConv(CI->getCallingConv()); + II->setAttributes(CI->getAttributes()); + CI->replaceAllUsesWith(II); + delete CI; + } + + Builder.SetInsertPoint(RI->getParent(), RI); + return &Builder; + } + } +}; +} + + +Constant *ShadowStackGCLowering::GetFrameMap(Function &F) { + // doInitialization creates the abstract type of this value. + Type *VoidPtr = Type::getInt8PtrTy(F.getContext()); + + // Truncate the ShadowStackDescriptor if some metadata is null. + unsigned NumMeta = 0; + SmallVector<Constant *, 16> Metadata; + for (unsigned I = 0; I != Roots.size(); ++I) { + Constant *C = cast<Constant>(Roots[I].first->getArgOperand(1)); + if (!C->isNullValue()) + NumMeta = I + 1; + Metadata.push_back(ConstantExpr::getBitCast(C, VoidPtr)); + } + Metadata.resize(NumMeta); + + Type *Int32Ty = Type::getInt32Ty(F.getContext()); + + Constant *BaseElts[] = { + ConstantInt::get(Int32Ty, Roots.size(), false), + ConstantInt::get(Int32Ty, NumMeta, false), + }; + + Constant *DescriptorElts[] = { + ConstantStruct::get(FrameMapTy, BaseElts), + ConstantArray::get(ArrayType::get(VoidPtr, NumMeta), Metadata)}; + + Type *EltTys[] = {DescriptorElts[0]->getType(), DescriptorElts[1]->getType()}; + StructType *STy = StructType::create(EltTys, "gc_map." + utostr(NumMeta)); + + Constant *FrameMap = ConstantStruct::get(STy, DescriptorElts); + + // FIXME: Is this actually dangerous as WritingAnLLVMPass.html claims? Seems + // that, short of multithreaded LLVM, it should be safe; all that is + // necessary is that a simple Module::iterator loop not be invalidated. + // Appending to the GlobalVariable list is safe in that sense. + // + // All of the output passes emit globals last. The ExecutionEngine + // explicitly supports adding globals to the module after + // initialization. + // + // Still, if it isn't deemed acceptable, then this transformation needs + // to be a ModulePass (which means it cannot be in the 'llc' pipeline + // (which uses a FunctionPassManager (which segfaults (not asserts) if + // provided a ModulePass))). + Constant *GV = new GlobalVariable(*F.getParent(), FrameMap->getType(), true, + GlobalVariable::InternalLinkage, FrameMap, + "__gc_" + F.getName()); + + Constant *GEPIndices[2] = { + ConstantInt::get(Type::getInt32Ty(F.getContext()), 0), + ConstantInt::get(Type::getInt32Ty(F.getContext()), 0)}; + return ConstantExpr::getGetElementPtr(FrameMap->getType(), GV, GEPIndices); +} + +Type *ShadowStackGCLowering::GetConcreteStackEntryType(Function &F) { + // doInitialization creates the generic version of this type. + std::vector<Type *> EltTys; + EltTys.push_back(StackEntryTy); + for (size_t I = 0; I != Roots.size(); I++) + EltTys.push_back(Roots[I].second->getAllocatedType()); + + return StructType::create(EltTys, ("gc_stackentry." + F.getName()).str()); +} + +/// doInitialization - If this module uses the GC intrinsics, find them now. If +/// not, exit fast. +bool ShadowStackGCLowering::doInitialization(Module &M) { + bool Active = false; + for (Function &F : M) { + if (F.hasGC() && F.getGC() == std::string("shadow-stack")) { + Active = true; + break; + } + } + if (!Active) + return false; + + // struct FrameMap { + // int32_t NumRoots; // Number of roots in stack frame. + // int32_t NumMeta; // Number of metadata descriptors. May be < NumRoots. + // void *Meta[]; // May be absent for roots without metadata. + // }; + std::vector<Type *> EltTys; + // 32 bits is ok up to a 32GB stack frame. :) + EltTys.push_back(Type::getInt32Ty(M.getContext())); + // Specifies length of variable length array. + EltTys.push_back(Type::getInt32Ty(M.getContext())); + FrameMapTy = StructType::create(EltTys, "gc_map"); + PointerType *FrameMapPtrTy = PointerType::getUnqual(FrameMapTy); + + // struct StackEntry { + // ShadowStackEntry *Next; // Caller's stack entry. + // FrameMap *Map; // Pointer to constant FrameMap. + // void *Roots[]; // Stack roots (in-place array, so we pretend). + // }; + + StackEntryTy = StructType::create(M.getContext(), "gc_stackentry"); + + EltTys.clear(); + EltTys.push_back(PointerType::getUnqual(StackEntryTy)); + EltTys.push_back(FrameMapPtrTy); + StackEntryTy->setBody(EltTys); + PointerType *StackEntryPtrTy = PointerType::getUnqual(StackEntryTy); + + // Get the root chain if it already exists. + Head = M.getGlobalVariable("llvm_gc_root_chain"); + if (!Head) { + // If the root chain does not exist, insert a new one with linkonce + // linkage! + Head = new GlobalVariable( + M, StackEntryPtrTy, false, GlobalValue::LinkOnceAnyLinkage, + Constant::getNullValue(StackEntryPtrTy), "llvm_gc_root_chain"); + } else if (Head->hasExternalLinkage() && Head->isDeclaration()) { + Head->setInitializer(Constant::getNullValue(StackEntryPtrTy)); + Head->setLinkage(GlobalValue::LinkOnceAnyLinkage); + } + + return true; +} + +bool ShadowStackGCLowering::IsNullValue(Value *V) { + if (Constant *C = dyn_cast<Constant>(V)) + return C->isNullValue(); + return false; +} + +void ShadowStackGCLowering::CollectRoots(Function &F) { + // FIXME: Account for original alignment. Could fragment the root array. + // Approach 1: Null initialize empty slots at runtime. Yuck. + // Approach 2: Emit a map of the array instead of just a count. + + assert(Roots.empty() && "Not cleaned up?"); + + SmallVector<std::pair<CallInst *, AllocaInst *>, 16> MetaRoots; + + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) + for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) + if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++)) + if (Function *F = CI->getCalledFunction()) + if (F->getIntrinsicID() == Intrinsic::gcroot) { + std::pair<CallInst *, AllocaInst *> Pair = std::make_pair( + CI, + cast<AllocaInst>(CI->getArgOperand(0)->stripPointerCasts())); + if (IsNullValue(CI->getArgOperand(1))) + Roots.push_back(Pair); + else + MetaRoots.push_back(Pair); + } + + // Number roots with metadata (usually empty) at the beginning, so that the + // FrameMap::Meta array can be elided. + Roots.insert(Roots.begin(), MetaRoots.begin(), MetaRoots.end()); +} + +GetElementPtrInst *ShadowStackGCLowering::CreateGEP(LLVMContext &Context, + IRBuilder<> &B, Type *Ty, + Value *BasePtr, int Idx, + int Idx2, + const char *Name) { + Value *Indices[] = {ConstantInt::get(Type::getInt32Ty(Context), 0), + ConstantInt::get(Type::getInt32Ty(Context), Idx), + ConstantInt::get(Type::getInt32Ty(Context), Idx2)}; + Value *Val = B.CreateGEP(Ty, BasePtr, Indices, Name); + + assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant"); + + return dyn_cast<GetElementPtrInst>(Val); +} + +GetElementPtrInst *ShadowStackGCLowering::CreateGEP(LLVMContext &Context, + IRBuilder<> &B, Type *Ty, Value *BasePtr, + int Idx, const char *Name) { + Value *Indices[] = {ConstantInt::get(Type::getInt32Ty(Context), 0), + ConstantInt::get(Type::getInt32Ty(Context), Idx)}; + Value *Val = B.CreateGEP(Ty, BasePtr, Indices, Name); + + assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant"); + + return dyn_cast<GetElementPtrInst>(Val); +} + +/// runOnFunction - Insert code to maintain the shadow stack. +bool ShadowStackGCLowering::runOnFunction(Function &F) { + // Quick exit for functions that do not use the shadow stack GC. + if (!F.hasGC() || + F.getGC() != std::string("shadow-stack")) + return false; + + LLVMContext &Context = F.getContext(); + + // Find calls to llvm.gcroot. + CollectRoots(F); + + // If there are no roots in this function, then there is no need to add a + // stack map entry for it. + if (Roots.empty()) + return false; + + // Build the constant map and figure the type of the shadow stack entry. + Value *FrameMap = GetFrameMap(F); + Type *ConcreteStackEntryTy = GetConcreteStackEntryType(F); + + // Build the shadow stack entry at the very start of the function. + BasicBlock::iterator IP = F.getEntryBlock().begin(); + IRBuilder<> AtEntry(IP->getParent(), IP); + + Instruction *StackEntry = + AtEntry.CreateAlloca(ConcreteStackEntryTy, nullptr, "gc_frame"); + + while (isa<AllocaInst>(IP)) + ++IP; + AtEntry.SetInsertPoint(IP->getParent(), IP); + + // Initialize the map pointer and load the current head of the shadow stack. + Instruction *CurrentHead = AtEntry.CreateLoad(Head, "gc_currhead"); + Instruction *EntryMapPtr = CreateGEP(Context, AtEntry, ConcreteStackEntryTy, + StackEntry, 0, 1, "gc_frame.map"); + AtEntry.CreateStore(FrameMap, EntryMapPtr); + + // After all the allocas... + for (unsigned I = 0, E = Roots.size(); I != E; ++I) { + // For each root, find the corresponding slot in the aggregate... + Value *SlotPtr = CreateGEP(Context, AtEntry, ConcreteStackEntryTy, + StackEntry, 1 + I, "gc_root"); + + // And use it in lieu of the alloca. + AllocaInst *OriginalAlloca = Roots[I].second; + SlotPtr->takeName(OriginalAlloca); + OriginalAlloca->replaceAllUsesWith(SlotPtr); + } + + // Move past the original stores inserted by GCStrategy::InitRoots. This isn't + // really necessary (the collector would never see the intermediate state at + // runtime), but it's nicer not to push the half-initialized entry onto the + // shadow stack. + while (isa<StoreInst>(IP)) + ++IP; + AtEntry.SetInsertPoint(IP->getParent(), IP); + + // Push the entry onto the shadow stack. + Instruction *EntryNextPtr = CreateGEP(Context, AtEntry, ConcreteStackEntryTy, + StackEntry, 0, 0, "gc_frame.next"); + Instruction *NewHeadVal = CreateGEP(Context, AtEntry, ConcreteStackEntryTy, + StackEntry, 0, "gc_newhead"); + AtEntry.CreateStore(CurrentHead, EntryNextPtr); + AtEntry.CreateStore(NewHeadVal, Head); + + // For each instruction that escapes... + EscapeEnumerator EE(F, "gc_cleanup"); + while (IRBuilder<> *AtExit = EE.Next()) { + // Pop the entry from the shadow stack. Don't reuse CurrentHead from + // AtEntry, since that would make the value live for the entire function. + Instruction *EntryNextPtr2 = + CreateGEP(Context, *AtExit, ConcreteStackEntryTy, StackEntry, 0, 0, + "gc_frame.next"); + Value *SavedHead = AtExit->CreateLoad(EntryNextPtr2, "gc_savedhead"); + AtExit->CreateStore(SavedHead, Head); + } + + // Delete the original allocas (which are no longer used) and the intrinsic + // calls (which are no longer valid). Doing this last avoids invalidating + // iterators. + for (unsigned I = 0, E = Roots.size(); I != E; ++I) { + Roots[I].first->eraseFromParent(); + Roots[I].second->eraseFromParent(); + } + + Roots.clear(); + return true; +} diff --git a/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp b/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp new file mode 100644 index 0000000..4463cc7 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp @@ -0,0 +1,388 @@ +//===-- ShrinkWrap.cpp - Compute safe point for prolog/epilog insertion ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass looks for safe point where the prologue and epilogue can be +// inserted. +// The safe point for the prologue (resp. epilogue) is called Save +// (resp. Restore). +// A point is safe for prologue (resp. epilogue) if and only if +// it 1) dominates (resp. post-dominates) all the frame related operations and +// between 2) two executions of the Save (resp. Restore) point there is an +// execution of the Restore (resp. Save) point. +// +// For instance, the following points are safe: +// for (int i = 0; i < 10; ++i) { +// Save +// ... +// Restore +// } +// Indeed, the execution looks like Save -> Restore -> Save -> Restore ... +// And the following points are not: +// for (int i = 0; i < 10; ++i) { +// Save +// ... +// } +// for (int i = 0; i < 10; ++i) { +// ... +// Restore +// } +// Indeed, the execution looks like Save -> Save -> ... -> Restore -> Restore. +// +// This pass also ensures that the safe points are 3) cheaper than the regular +// entry and exits blocks. +// +// Property #1 is ensured via the use of MachineDominatorTree and +// MachinePostDominatorTree. +// Property #2 is ensured via property #1 and MachineLoopInfo, i.e., both +// points must be in the same loop. +// Property #3 is ensured via the MachineBlockFrequencyInfo. +// +// If this pass found points matching all this properties, then +// MachineFrameInfo is updated this that information. +//===----------------------------------------------------------------------===// +#include "llvm/ADT/Statistic.h" +// To check for profitability. +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +// For property #1 for Save. +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +// To record the result of the analysis. +#include "llvm/CodeGen/MachineFrameInfo.h" +// For property #2. +#include "llvm/CodeGen/MachineLoopInfo.h" +// For property #1 for Restore. +#include "llvm/CodeGen/MachinePostDominators.h" +#include "llvm/CodeGen/Passes.h" +// To know about callee-saved. +#include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/Support/Debug.h" +// To query the target about frame lowering. +#include "llvm/Target/TargetFrameLowering.h" +// To know about frame setup operation. +#include "llvm/Target/TargetInstrInfo.h" +// To access TargetInstrInfo. +#include "llvm/Target/TargetSubtargetInfo.h" + +#define DEBUG_TYPE "shrink-wrap" + +using namespace llvm; + +STATISTIC(NumFunc, "Number of functions"); +STATISTIC(NumCandidates, "Number of shrink-wrapping candidates"); +STATISTIC(NumCandidatesDropped, + "Number of shrink-wrapping candidates dropped because of frequency"); + +namespace { +/// \brief Class to determine where the safe point to insert the +/// prologue and epilogue are. +/// Unlike the paper from Fred C. Chow, PLDI'88, that introduces the +/// shrink-wrapping term for prologue/epilogue placement, this pass +/// does not rely on expensive data-flow analysis. Instead we use the +/// dominance properties and loop information to decide which point +/// are safe for such insertion. +class ShrinkWrap : public MachineFunctionPass { + /// Hold callee-saved information. + RegisterClassInfo RCI; + MachineDominatorTree *MDT; + MachinePostDominatorTree *MPDT; + /// Current safe point found for the prologue. + /// The prologue will be inserted before the first instruction + /// in this basic block. + MachineBasicBlock *Save; + /// Current safe point found for the epilogue. + /// The epilogue will be inserted before the first terminator instruction + /// in this basic block. + MachineBasicBlock *Restore; + /// Hold the information of the basic block frequency. + /// Use to check the profitability of the new points. + MachineBlockFrequencyInfo *MBFI; + /// Hold the loop information. Used to determine if Save and Restore + /// are in the same loop. + MachineLoopInfo *MLI; + /// Frequency of the Entry block. + uint64_t EntryFreq; + /// Current opcode for frame setup. + unsigned FrameSetupOpcode; + /// Current opcode for frame destroy. + unsigned FrameDestroyOpcode; + /// Entry block. + const MachineBasicBlock *Entry; + + /// \brief Check if \p MI uses or defines a callee-saved register or + /// a frame index. If this is the case, this means \p MI must happen + /// after Save and before Restore. + bool useOrDefCSROrFI(const MachineInstr &MI) const; + + /// \brief Update the Save and Restore points such that \p MBB is in + /// the region that is dominated by Save and post-dominated by Restore + /// and Save and Restore still match the safe point definition. + /// Such point may not exist and Save and/or Restore may be null after + /// this call. + void updateSaveRestorePoints(MachineBasicBlock &MBB); + + /// \brief Initialize the pass for \p MF. + void init(MachineFunction &MF) { + RCI.runOnMachineFunction(MF); + MDT = &getAnalysis<MachineDominatorTree>(); + MPDT = &getAnalysis<MachinePostDominatorTree>(); + Save = nullptr; + Restore = nullptr; + MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); + MLI = &getAnalysis<MachineLoopInfo>(); + EntryFreq = MBFI->getEntryFreq(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + FrameSetupOpcode = TII.getCallFrameSetupOpcode(); + FrameDestroyOpcode = TII.getCallFrameDestroyOpcode(); + Entry = &MF.front(); + + ++NumFunc; + } + + /// Check whether or not Save and Restore points are still interesting for + /// shrink-wrapping. + bool ArePointsInteresting() const { return Save != Entry && Save && Restore; } + +public: + static char ID; + + ShrinkWrap() : MachineFunctionPass(ID) { + initializeShrinkWrapPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + AU.addRequired<MachineBlockFrequencyInfo>(); + AU.addRequired<MachineDominatorTree>(); + AU.addRequired<MachinePostDominatorTree>(); + AU.addRequired<MachineLoopInfo>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + const char *getPassName() const override { + return "Shrink Wrapping analysis"; + } + + /// \brief Perform the shrink-wrapping analysis and update + /// the MachineFrameInfo attached to \p MF with the results. + bool runOnMachineFunction(MachineFunction &MF) override; +}; +} // End anonymous namespace. + +char ShrinkWrap::ID = 0; +char &llvm::ShrinkWrapID = ShrinkWrap::ID; + +INITIALIZE_PASS_BEGIN(ShrinkWrap, "shrink-wrap", "Shrink Wrap Pass", false, + false) +INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_END(ShrinkWrap, "shrink-wrap", "Shrink Wrap Pass", false, false) + +bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI) const { + if (MI.getOpcode() == FrameSetupOpcode || + MI.getOpcode() == FrameDestroyOpcode) { + DEBUG(dbgs() << "Frame instruction: " << MI << '\n'); + return true; + } + for (const MachineOperand &MO : MI.operands()) { + bool UseCSR = false; + if (MO.isReg()) { + unsigned PhysReg = MO.getReg(); + if (!PhysReg) + continue; + assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) && + "Unallocated register?!"); + UseCSR = RCI.getLastCalleeSavedAlias(PhysReg); + } + // TODO: Handle regmask more accurately. + // For now, be conservative about them. + if (UseCSR || MO.isFI() || MO.isRegMask()) { + DEBUG(dbgs() << "Use or define CSR(" << UseCSR << ") or FI(" << MO.isFI() + << "): " << MI << '\n'); + return true; + } + } + return false; +} + +/// \brief Helper function to find the immediate (post) dominator. +template <typename ListOfBBs, typename DominanceAnalysis> +MachineBasicBlock *FindIDom(MachineBasicBlock &Block, ListOfBBs BBs, + DominanceAnalysis &Dom) { + MachineBasicBlock *IDom = &Block; + for (MachineBasicBlock *BB : BBs) { + IDom = Dom.findNearestCommonDominator(IDom, BB); + if (!IDom) + break; + } + return IDom; +} + +void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB) { + // Get rid of the easy cases first. + if (!Save) + Save = &MBB; + else + Save = MDT->findNearestCommonDominator(Save, &MBB); + + if (!Save) { + DEBUG(dbgs() << "Found a block that is not reachable from Entry\n"); + return; + } + + if (!Restore) + Restore = &MBB; + else + Restore = MPDT->findNearestCommonDominator(Restore, &MBB); + + // Make sure we would be able to insert the restore code before the + // terminator. + if (Restore == &MBB) { + for (const MachineInstr &Terminator : MBB.terminators()) { + if (!useOrDefCSROrFI(Terminator)) + continue; + // One of the terminator needs to happen before the restore point. + if (MBB.succ_empty()) { + Restore = nullptr; + break; + } + // Look for a restore point that post-dominates all the successors. + // The immediate post-dominator is what we are looking for. + Restore = FindIDom<>(*Restore, Restore->successors(), *MPDT); + break; + } + } + + if (!Restore) { + DEBUG(dbgs() << "Restore point needs to be spanned on several blocks\n"); + return; + } + + // Make sure Save and Restore are suitable for shrink-wrapping: + // 1. all path from Save needs to lead to Restore before exiting. + // 2. all path to Restore needs to go through Save from Entry. + // We achieve that by making sure that: + // A. Save dominates Restore. + // B. Restore post-dominates Save. + // C. Save and Restore are in the same loop. + bool SaveDominatesRestore = false; + bool RestorePostDominatesSave = false; + while (Save && Restore && + (!(SaveDominatesRestore = MDT->dominates(Save, Restore)) || + !(RestorePostDominatesSave = MPDT->dominates(Restore, Save)) || + MLI->getLoopFor(Save) != MLI->getLoopFor(Restore))) { + // Fix (A). + if (!SaveDominatesRestore) { + Save = MDT->findNearestCommonDominator(Save, Restore); + continue; + } + // Fix (B). + if (!RestorePostDominatesSave) + Restore = MPDT->findNearestCommonDominator(Restore, Save); + + // Fix (C). + if (Save && Restore && Save != Restore && + MLI->getLoopFor(Save) != MLI->getLoopFor(Restore)) { + if (MLI->getLoopDepth(Save) > MLI->getLoopDepth(Restore)) + // Push Save outside of this loop. + Save = FindIDom<>(*Save, Save->predecessors(), *MDT); + else + // Push Restore outside of this loop. + Restore = FindIDom<>(*Restore, Restore->successors(), *MPDT); + } + } +} + +bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) { + if (MF.empty()) + return false; + DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n'); + + init(MF); + + for (MachineBasicBlock &MBB : MF) { + DEBUG(dbgs() << "Look into: " << MBB.getNumber() << ' ' << MBB.getName() + << '\n'); + + for (const MachineInstr &MI : MBB) { + if (!useOrDefCSROrFI(MI)) + continue; + // Save (resp. restore) point must dominate (resp. post dominate) + // MI. Look for the proper basic block for those. + updateSaveRestorePoints(MBB); + // If we are at a point where we cannot improve the placement of + // save/restore instructions, just give up. + if (!ArePointsInteresting()) { + DEBUG(dbgs() << "No Shrink wrap candidate found\n"); + return false; + } + // No need to look for other instructions, this basic block + // will already be part of the handled region. + break; + } + } + if (!ArePointsInteresting()) { + // If the points are not interesting at this point, then they must be null + // because it means we did not encounter any frame/CSR related code. + // Otherwise, we would have returned from the previous loop. + assert(!Save && !Restore && "We miss a shrink-wrap opportunity?!"); + DEBUG(dbgs() << "Nothing to shrink-wrap\n"); + return false; + } + + DEBUG(dbgs() << "\n ** Results **\nFrequency of the Entry: " << EntryFreq + << '\n'); + + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + do { + DEBUG(dbgs() << "Shrink wrap candidates (#, Name, Freq):\nSave: " + << Save->getNumber() << ' ' << Save->getName() << ' ' + << MBFI->getBlockFreq(Save).getFrequency() << "\nRestore: " + << Restore->getNumber() << ' ' << Restore->getName() << ' ' + << MBFI->getBlockFreq(Restore).getFrequency() << '\n'); + + bool IsSaveCheap, TargetCanUseSaveAsPrologue = false; + if (((IsSaveCheap = EntryFreq >= MBFI->getBlockFreq(Save).getFrequency()) && + EntryFreq >= MBFI->getBlockFreq(Restore).getFrequency()) && + ((TargetCanUseSaveAsPrologue = TFI->canUseAsPrologue(*Save)) && + TFI->canUseAsEpilogue(*Restore))) + break; + DEBUG(dbgs() << "New points are too expensive or invalid for the target\n"); + MachineBasicBlock *NewBB; + if (!IsSaveCheap || !TargetCanUseSaveAsPrologue) { + Save = FindIDom<>(*Save, Save->predecessors(), *MDT); + if (!Save) + break; + NewBB = Save; + } else { + // Restore is expensive. + Restore = FindIDom<>(*Restore, Restore->successors(), *MPDT); + if (!Restore) + break; + NewBB = Restore; + } + updateSaveRestorePoints(*NewBB); + } while (Save && Restore); + + if (!ArePointsInteresting()) { + ++NumCandidatesDropped; + return false; + } + + DEBUG(dbgs() << "Final shrink wrap candidates:\nSave: " << Save->getNumber() + << ' ' << Save->getName() << "\nRestore: " + << Restore->getNumber() << ' ' << Restore->getName() << '\n'); + + MachineFrameInfo *MFI = MF.getFrameInfo(); + MFI->setSavePoint(Save); + MFI->setRestorePoint(Restore); + ++NumCandidates; + return false; +} diff --git a/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp index 7fd8107..42d277e 100644 --- a/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp +++ b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp @@ -46,6 +46,8 @@ STATISTIC(NumSpilled, "Number of registers live across unwind edges"); namespace { class SjLjEHPrepare : public FunctionPass { const TargetMachine *TM; + Type *doubleUnderDataTy; + Type *doubleUnderJBufTy; Type *FunctionContextTy; Constant *RegisterFn; Constant *UnregisterFn; @@ -93,12 +95,14 @@ bool SjLjEHPrepare::doInitialization(Module &M) { // builtin_setjmp uses a five word jbuf Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext()); Type *Int32Ty = Type::getInt32Ty(M.getContext()); - FunctionContextTy = StructType::get(VoidPtrTy, // __prev - Int32Ty, // call_site - ArrayType::get(Int32Ty, 4), // __data - VoidPtrTy, // __personality - VoidPtrTy, // __lsda - ArrayType::get(VoidPtrTy, 5), // __jbuf + doubleUnderDataTy = ArrayType::get(Int32Ty, 4); + doubleUnderJBufTy = ArrayType::get(VoidPtrTy, 5); + FunctionContextTy = StructType::get(VoidPtrTy, // __prev + Int32Ty, // call_site + doubleUnderDataTy, // __data + VoidPtrTy, // __personality + VoidPtrTy, // __lsda + doubleUnderJBufTy, // __jbuf nullptr); RegisterFn = M.getOrInsertFunction( "_Unwind_SjLj_Register", Type::getVoidTy(M.getContext()), @@ -128,7 +132,8 @@ void SjLjEHPrepare::insertCallSiteStore(Instruction *I, int Number) { Value *Zero = ConstantInt::get(Int32Ty, 0); Value *One = ConstantInt::get(Int32Ty, 1); Value *Idxs[2] = { Zero, One }; - Value *CallSite = Builder.CreateGEP(FuncCtx, Idxs, "call_site"); + Value *CallSite = + Builder.CreateGEP(FunctionContextTy, FuncCtx, Idxs, "call_site"); // Insert a store of the call-site number ConstantInt *CallSiteNoC = @@ -191,7 +196,7 @@ Value *SjLjEHPrepare::setupFunctionContext(Function &F, // Create an alloca for the incoming jump buffer ptr and the new jump buffer // that needs to be restored on all exits from the function. This is an alloca // because the value needs to be added to the global context list. - const TargetLowering *TLI = TM->getSubtargetImpl()->getTargetLowering(); + const TargetLowering *TLI = TM->getSubtargetImpl(F)->getTargetLowering(); unsigned Align = TLI->getDataLayout()->getPrefTypeAlignment(FunctionContextTy); FuncCtx = new AllocaInst(FunctionContextTy, nullptr, Align, "fn_context", @@ -203,16 +208,17 @@ Value *SjLjEHPrepare::setupFunctionContext(Function &F, IRBuilder<> Builder(LPI->getParent()->getFirstInsertionPt()); // Reference the __data field. - Value *FCData = Builder.CreateConstGEP2_32(FuncCtx, 0, 2, "__data"); + Value *FCData = + Builder.CreateConstGEP2_32(FunctionContextTy, FuncCtx, 0, 2, "__data"); // The exception values come back in context->__data[0]. - Value *ExceptionAddr = - Builder.CreateConstGEP2_32(FCData, 0, 0, "exception_gep"); + Value *ExceptionAddr = Builder.CreateConstGEP2_32(doubleUnderDataTy, FCData, + 0, 0, "exception_gep"); Value *ExnVal = Builder.CreateLoad(ExceptionAddr, true, "exn_val"); ExnVal = Builder.CreateIntToPtr(ExnVal, Builder.getInt8PtrTy()); - Value *SelectorAddr = - Builder.CreateConstGEP2_32(FCData, 0, 1, "exn_selector_gep"); + Value *SelectorAddr = Builder.CreateConstGEP2_32(doubleUnderDataTy, FCData, + 0, 1, "exn_selector_gep"); Value *SelVal = Builder.CreateLoad(SelectorAddr, true, "exn_selector_val"); substituteLPadValues(LPI, ExnVal, SelVal); @@ -222,15 +228,16 @@ Value *SjLjEHPrepare::setupFunctionContext(Function &F, IRBuilder<> Builder(EntryBB->getTerminator()); if (!PersonalityFn) PersonalityFn = LPads[0]->getPersonalityFn(); - Value *PersonalityFieldPtr = - Builder.CreateConstGEP2_32(FuncCtx, 0, 3, "pers_fn_gep"); + Value *PersonalityFieldPtr = Builder.CreateConstGEP2_32( + FunctionContextTy, FuncCtx, 0, 3, "pers_fn_gep"); Builder.CreateStore( Builder.CreateBitCast(PersonalityFn, Builder.getInt8PtrTy()), PersonalityFieldPtr, /*isVolatile=*/true); // LSDA address - Value *LSDA = Builder.CreateCall(LSDAAddrFn, "lsda_addr"); - Value *LSDAFieldPtr = Builder.CreateConstGEP2_32(FuncCtx, 0, 4, "lsda_gep"); + Value *LSDA = Builder.CreateCall(LSDAAddrFn, {}, "lsda_addr"); + Value *LSDAFieldPtr = + Builder.CreateConstGEP2_32(FunctionContextTy, FuncCtx, 0, 4, "lsda_gep"); Builder.CreateStore(LSDA, LSDAFieldPtr, /*isVolatile=*/true); return FuncCtx; @@ -399,18 +406,21 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) { IRBuilder<> Builder(EntryBB->getTerminator()); // Get a reference to the jump buffer. - Value *JBufPtr = Builder.CreateConstGEP2_32(FuncCtx, 0, 5, "jbuf_gep"); + Value *JBufPtr = + Builder.CreateConstGEP2_32(FunctionContextTy, FuncCtx, 0, 5, "jbuf_gep"); // Save the frame pointer. - Value *FramePtr = Builder.CreateConstGEP2_32(JBufPtr, 0, 0, "jbuf_fp_gep"); + Value *FramePtr = Builder.CreateConstGEP2_32(doubleUnderJBufTy, JBufPtr, 0, 0, + "jbuf_fp_gep"); Value *Val = Builder.CreateCall(FrameAddrFn, Builder.getInt32(0), "fp"); Builder.CreateStore(Val, FramePtr, /*isVolatile=*/true); // Save the stack pointer. - Value *StackPtr = Builder.CreateConstGEP2_32(JBufPtr, 0, 2, "jbuf_sp_gep"); + Value *StackPtr = Builder.CreateConstGEP2_32(doubleUnderJBufTy, JBufPtr, 0, 2, + "jbuf_sp_gep"); - Val = Builder.CreateCall(StackAddrFn, "sp"); + Val = Builder.CreateCall(StackAddrFn, {}, "sp"); Builder.CreateStore(Val, StackPtr, /*isVolatile=*/true); // Call the setjmp instrinsic. It fills in the rest of the jmpbuf. diff --git a/contrib/llvm/lib/CodeGen/SlotIndexes.cpp b/contrib/llvm/lib/CodeGen/SlotIndexes.cpp index d46621d..025ae70 100644 --- a/contrib/llvm/lib/CodeGen/SlotIndexes.cpp +++ b/contrib/llvm/lib/CodeGen/SlotIndexes.cpp @@ -127,7 +127,7 @@ void SlotIndexes::renumberIndexes() { void SlotIndexes::renumberIndexes(IndexList::iterator curItr) { // Number indexes with half the default spacing so we can catch up quickly. const unsigned Space = SlotIndex::InstrDist/2; - assert((Space & 3) == 0 && "InstrDist must be a multiple of 2*NUM"); + static_assert((Space & 3) == 0, "InstrDist must be a multiple of 2*NUM"); IndexList::iterator startItr = std::prev(curItr); unsigned index = startItr->getIndex(); diff --git a/contrib/llvm/lib/CodeGen/SpillPlacement.h b/contrib/llvm/lib/CodeGen/SpillPlacement.h index 622361e..03dd58d 100644 --- a/contrib/llvm/lib/CodeGen/SpillPlacement.h +++ b/contrib/llvm/lib/CodeGen/SpillPlacement.h @@ -70,7 +70,7 @@ public: static char ID; // Pass identification, replacement for typeid. SpillPlacement() : MachineFunctionPass(ID), nodes(nullptr) {} - ~SpillPlacement() { releaseMemory(); } + ~SpillPlacement() override { releaseMemory(); } /// BorderConstraint - A basic block has separate constraints for entry and /// exit. diff --git a/contrib/llvm/lib/CodeGen/SplitKit.cpp b/contrib/llvm/lib/CodeGen/SplitKit.cpp index 4c8801a..dab1dfe 100644 --- a/contrib/llvm/lib/CodeGen/SplitKit.cpp +++ b/contrib/llvm/lib/CodeGen/SplitKit.cpp @@ -623,8 +623,7 @@ void SplitEditor::removeBackCopies(SmallVectorImpl<VNInfo*> &Copies) { AssignI.setMap(RegAssign); for (unsigned i = 0, e = Copies.size(); i != e; ++i) { - VNInfo *VNI = Copies[i]; - SlotIndex Def = VNI->def; + SlotIndex Def = Copies[i]->def; MachineInstr *MI = LIS.getInstructionFromIndex(Def); assert(MI && "No instruction for back-copy"); @@ -635,13 +634,12 @@ void SplitEditor::removeBackCopies(SmallVectorImpl<VNInfo*> &Copies) { while (!AtBegin && (--MBBI)->isDebugValue()); DEBUG(dbgs() << "Removing " << Def << '\t' << *MI); - LI->removeValNo(VNI); + LIS.removeVRegDefAt(*LI, Def); LIS.RemoveMachineInstrFromMaps(MI); MI->eraseFromParent(); - // Adjust RegAssign if a register assignment is killed at VNI->def. We - // want to avoid calculating the live range of the source register if - // possible. + // Adjust RegAssign if a register assignment is killed at Def. We want to + // avoid calculating the live range of the source register if possible. AssignI.find(Def.getPrevSlot()); if (!AssignI.valid() || AssignI.start() >= Def) continue; diff --git a/contrib/llvm/lib/CodeGen/SplitKit.h b/contrib/llvm/lib/CodeGen/SplitKit.h index 2e60c14..a062763 100644 --- a/contrib/llvm/lib/CodeGen/SplitKit.h +++ b/contrib/llvm/lib/CodeGen/SplitKit.h @@ -419,7 +419,7 @@ public: /// There may be extra indices created by dead code elimination. void finish(SmallVectorImpl<unsigned> *LRMap = nullptr); - /// dump - print the current interval maping to dbgs(). + /// dump - print the current interval mapping to dbgs(). void dump() const; // ===--- High level methods ---=== diff --git a/contrib/llvm/lib/CodeGen/StackColoring.cpp b/contrib/llvm/lib/CodeGen/StackColoring.cpp index faf94b6..3541b33 100644 --- a/contrib/llvm/lib/CodeGen/StackColoring.cpp +++ b/contrib/llvm/lib/CodeGen/StackColoring.cpp @@ -48,7 +48,6 @@ #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" -#include "llvm/MC/MCInstrItineraries.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -364,7 +363,7 @@ void StackColoring::calculateLocalLiveness() { } } - BBSet = NextBBSet; + BBSet = std::move(NextBBSet); }// while changed. } @@ -464,7 +463,7 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) { continue; if (SlotRemap.count(VI.Slot)) { DEBUG(dbgs() << "Remapping debug info for [" - << DIVariable(VI.Var).getName() << "].\n"); + << cast<DILocalVariable>(VI.Var)->getName() << "].\n"); VI.Slot = SlotRemap[VI.Slot]; FixedDbg++; } diff --git a/contrib/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp b/contrib/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp index 767f43a..d88be57 100644 --- a/contrib/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp +++ b/contrib/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp @@ -14,24 +14,24 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/StackMapLivenessAnalysis.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; #define DEBUG_TYPE "stackmaps" -namespace llvm { -cl::opt<bool> EnablePatchPointLiveness("enable-patchpoint-liveness", - cl::Hidden, cl::init(true), - cl::desc("Enable PatchPoint Liveness Analysis Pass")); -} +static cl::opt<bool> EnablePatchPointLiveness( + "enable-patchpoint-liveness", cl::Hidden, cl::init(true), + cl::desc("Enable PatchPoint Liveness Analysis Pass")); STATISTIC(NumStackMapFuncVisited, "Number of functions visited"); STATISTIC(NumStackMapFuncSkipped, "Number of functions skipped"); @@ -39,6 +39,46 @@ STATISTIC(NumBBsVisited, "Number of basic blocks visited"); STATISTIC(NumBBsHaveNoStackmap, "Number of basic blocks with no stackmap"); STATISTIC(NumStackMaps, "Number of StackMaps visited"); +namespace { +/// \brief This pass calculates the liveness information for each basic block in +/// a function and attaches the register live-out information to a patchpoint +/// intrinsic if present. +/// +/// This pass can be disabled via the -enable-patchpoint-liveness=false flag. +/// The pass skips functions that don't have any patchpoint intrinsics. The +/// information provided by this pass is optional and not required by the +/// aformentioned intrinsic to function. +class StackMapLiveness : public MachineFunctionPass { + MachineFunction *MF; + const TargetRegisterInfo *TRI; + LivePhysRegs LiveRegs; + +public: + static char ID; + + /// \brief Default construct and initialize the pass. + StackMapLiveness(); + + /// \brief Tell the pass manager which passes we depend on and what + /// information we preserve. + void getAnalysisUsage(AnalysisUsage &AU) const override; + + /// \brief Calculate the liveness information for the given machine function. + bool runOnMachineFunction(MachineFunction &MF) override; + +private: + /// \brief Performs the actual liveness calculation for the function. + bool calculateLiveness(); + + /// \brief Add the current register live set to the instruction. + void addLiveOutSetToMI(MachineInstr &MI); + + /// \brief Create a register mask and initialize it with the registers from + /// the register live set. + uint32_t *createRegisterMask() const; +}; +} // namespace + char StackMapLiveness::ID = 0; char &llvm::StackMapLivenessID = StackMapLiveness::ID; INITIALIZE_PASS(StackMapLiveness, "stackmap-liveness", @@ -60,18 +100,18 @@ void StackMapLiveness::getAnalysisUsage(AnalysisUsage &AU) const { } /// Calculate the liveness information for the given machine function. -bool StackMapLiveness::runOnMachineFunction(MachineFunction &_MF) { +bool StackMapLiveness::runOnMachineFunction(MachineFunction &MF) { if (!EnablePatchPointLiveness) return false; - DEBUG(dbgs() << "********** COMPUTING STACKMAP LIVENESS: " - << _MF.getName() << " **********\n"); - MF = &_MF; - TRI = MF->getSubtarget().getRegisterInfo(); + DEBUG(dbgs() << "********** COMPUTING STACKMAP LIVENESS: " << MF.getName() + << " **********\n"); + this->MF = &MF; + TRI = MF.getSubtarget().getRegisterInfo(); ++NumStackMapFuncVisited; // Skip this function if there are no patchpoints to process. - if (!MF->getFrameInfo()->hasPatchPoint()) { + if (!MF.getFrameInfo()->hasPatchPoint()) { ++NumStackMapFuncSkipped; return false; } diff --git a/contrib/llvm/lib/CodeGen/StackMaps.cpp b/contrib/llvm/lib/CodeGen/StackMaps.cpp index f1d1160..ffe59c1 100644 --- a/contrib/llvm/lib/CodeGen/StackMaps.cpp +++ b/contrib/llvm/lib/CodeGen/StackMaps.cpp @@ -19,8 +19,6 @@ #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOpcodes.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -76,21 +74,32 @@ StackMaps::StackMaps(AsmPrinter &AP) : AP(AP) { llvm_unreachable("Unsupported stackmap version!"); } +/// Go up the super-register chain until we hit a valid dwarf register number. +static unsigned getDwarfRegNum(unsigned Reg, const TargetRegisterInfo *TRI) { + int RegNo = TRI->getDwarfRegNum(Reg, false); + for (MCSuperRegIterator SR(Reg, TRI); SR.isValid() && RegNo < 0; ++SR) + RegNo = TRI->getDwarfRegNum(*SR, false); + + assert(RegNo >= 0 && "Invalid Dwarf register number."); + return (unsigned) RegNo; +} + MachineInstr::const_mop_iterator StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI, MachineInstr::const_mop_iterator MOE, LocationVec &Locs, LiveOutVec &LiveOuts) const { + const TargetRegisterInfo *TRI = AP.MF->getSubtarget().getRegisterInfo(); if (MOI->isImm()) { switch (MOI->getImm()) { default: llvm_unreachable("Unrecognized operand type."); case StackMaps::DirectMemRefOp: { - unsigned Size = - AP.TM.getSubtargetImpl()->getDataLayout()->getPointerSizeInBits(); + unsigned Size = AP.TM.getDataLayout()->getPointerSizeInBits(); assert((Size % 8) == 0 && "Need pointer size in bytes."); Size /= 8; unsigned Reg = (++MOI)->getReg(); int64_t Imm = (++MOI)->getImm(); - Locs.push_back(Location(StackMaps::Location::Direct, Size, Reg, Imm)); + Locs.push_back(Location(StackMaps::Location::Direct, Size, + getDwarfRegNum(Reg, TRI), Imm)); break; } case StackMaps::IndirectMemRefOp: { @@ -98,7 +107,8 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI, assert(Size > 0 && "Need a valid size for indirect memory locations."); unsigned Reg = (++MOI)->getReg(); int64_t Imm = (++MOI)->getImm(); - Locs.push_back(Location(StackMaps::Location::Indirect, Size, Reg, Imm)); + Locs.push_back(Location(StackMaps::Location::Indirect, Size, + getDwarfRegNum(Reg, TRI), Imm)); break; } case StackMaps::ConstantOp: { @@ -123,12 +133,18 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI, assert(TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) && "Virtreg operands should have been rewritten before now."); - const TargetRegisterClass *RC = - AP.TM.getSubtargetImpl()->getRegisterInfo()->getMinimalPhysRegClass( - MOI->getReg()); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(MOI->getReg()); assert(!MOI->getSubReg() && "Physical subreg still around."); + + unsigned Offset = 0; + unsigned RegNo = getDwarfRegNum(MOI->getReg(), TRI); + unsigned LLVMRegNo = TRI->getLLVMRegNum(RegNo, false); + unsigned SubRegIdx = TRI->getSubRegIndex(LLVMRegNo, MOI->getReg()); + if (SubRegIdx) + Offset = TRI->getSubRegIdxOffset(SubRegIdx); + Locs.push_back( - Location(Location::Register, RC->getSize(), MOI->getReg(), 0)); + Location(Location::Register, RC->getSize(), RegNo, Offset)); return ++MOI; } @@ -138,14 +154,74 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI, return ++MOI; } -/// Go up the super-register chain until we hit a valid dwarf register number. -static unsigned getDwarfRegNum(unsigned Reg, const TargetRegisterInfo *TRI) { - int RegNo = TRI->getDwarfRegNum(Reg, false); - for (MCSuperRegIterator SR(Reg, TRI); SR.isValid() && RegNo < 0; ++SR) - RegNo = TRI->getDwarfRegNum(*SR, false); +void StackMaps::print(raw_ostream &OS) { + const TargetRegisterInfo *TRI = + AP.MF ? AP.MF->getSubtarget().getRegisterInfo() : nullptr; + OS << WSMP << "callsites:\n"; + for (const auto &CSI : CSInfos) { + const LocationVec &CSLocs = CSI.Locations; + const LiveOutVec &LiveOuts = CSI.LiveOuts; - assert(RegNo >= 0 && "Invalid Dwarf register number."); - return (unsigned) RegNo; + OS << WSMP << "callsite " << CSI.ID << "\n"; + OS << WSMP << " has " << CSLocs.size() << " locations\n"; + + unsigned OperIdx = 0; + for (const auto &Loc : CSLocs) { + OS << WSMP << " Loc " << OperIdx << ": "; + switch (Loc.LocType) { + case Location::Unprocessed: + OS << "<Unprocessed operand>"; + break; + case Location::Register: + OS << "Register "; + if (TRI) + OS << TRI->getName(Loc.Reg); + else + OS << Loc.Reg; + break; + case Location::Direct: + OS << "Direct "; + if (TRI) + OS << TRI->getName(Loc.Reg); + else + OS << Loc.Reg; + if (Loc.Offset) + OS << " + " << Loc.Offset; + break; + case Location::Indirect: + OS << "Indirect "; + if (TRI) + OS << TRI->getName(Loc.Reg); + else + OS << Loc.Reg; + OS << "+" << Loc.Offset; + break; + case Location::Constant: + OS << "Constant " << Loc.Offset; + break; + case Location::ConstantIndex: + OS << "Constant Index " << Loc.Offset; + break; + } + OS << " [encoding: .byte " << Loc.LocType << ", .byte " << Loc.Size + << ", .short " << Loc.Reg << ", .int " << Loc.Offset << "]\n"; + OperIdx++; + } + + OS << WSMP << " has " << LiveOuts.size() << " live-out registers\n"; + + OperIdx = 0; + for (const auto &LO : LiveOuts) { + OS << WSMP << " LO " << OperIdx << ": "; + if (TRI) + OS << TRI->getName(LO.Reg); + else + OS << LO.Reg; + OS << " [encoding: .short " << LO.RegNo << ", .byte 0, .byte " + << LO.Size << "]\n"; + OperIdx++; + } + } } /// Create a live-out register record for the given register Reg. @@ -161,7 +237,7 @@ StackMaps::createLiveOutReg(unsigned Reg, const TargetRegisterInfo *TRI) const { StackMaps::LiveOutVec StackMaps::parseRegisterLiveOutMask(const uint32_t *Mask) const { assert(Mask && "No register mask specified"); - const TargetRegisterInfo *TRI = AP.TM.getSubtargetImpl()->getRegisterInfo(); + const TargetRegisterInfo *TRI = AP.MF->getSubtarget().getRegisterInfo(); LiveOutVec LiveOuts; // Create a LiveOutReg for each bit that is set in the register mask. @@ -197,9 +273,9 @@ void StackMaps::recordStackMapOpers(const MachineInstr &MI, uint64_t ID, MachineInstr::const_mop_iterator MOE, bool recordResult) { - MCContext &OutContext = AP.OutStreamer.getContext(); - MCSymbol *MILabel = OutContext.CreateTempSymbol(); - AP.OutStreamer.EmitLabel(MILabel); + MCContext &OutContext = AP.OutStreamer->getContext(); + MCSymbol *MILabel = OutContext.createTempSymbol(); + AP.OutStreamer->EmitLabel(MILabel); LocationVec Locations; LiveOutVec LiveOuts; @@ -294,9 +370,8 @@ void StackMaps::recordStatepoint(const MachineInstr &MI) { // Record all the deopt and gc operands (they're contiguous and run from the // initial index to the end of the operand list) const unsigned StartIdx = opers.getVarIdx(); - recordStackMapOpers(MI, 0xABCDEF00, - MI.operands_begin() + StartIdx, MI.operands_end(), - false); + recordStackMapOpers(MI, opers.getID(), MI.operands_begin() + StartIdx, + MI.operands_end(), false); } /// Emit the stackmap header. @@ -384,16 +459,13 @@ void StackMaps::emitConstantPoolEntries(MCStreamer &OS) { /// 0x3, Indirect, [Reg + Offset] (spilled value) /// 0x4, Constant, Offset (small constant) /// 0x5, ConstIndex, Constants[Offset] (large constant) -void StackMaps::emitCallsiteEntries(MCStreamer &OS, - const TargetRegisterInfo *TRI) { +void StackMaps::emitCallsiteEntries(MCStreamer &OS) { + DEBUG(print(dbgs())); // Callsite entries. - DEBUG(dbgs() << WSMP << "callsites:\n"); for (const auto &CSI : CSInfos) { const LocationVec &CSLocs = CSI.Locations; const LiveOutVec &LiveOuts = CSI.LiveOuts; - DEBUG(dbgs() << WSMP << "callsite " << CSI.ID << "\n"); - // Verify stack map entry. It's better to communicate a problem to the // runtime than crash in case of in-process compilation. Currently, we do // simple overflow checks, but we may eventually communicate other @@ -414,83 +486,20 @@ void StackMaps::emitCallsiteEntries(MCStreamer &OS, // Reserved for flags. OS.EmitIntValue(0, 2); - - DEBUG(dbgs() << WSMP << " has " << CSLocs.size() << " locations\n"); - OS.EmitIntValue(CSLocs.size(), 2); - unsigned OperIdx = 0; for (const auto &Loc : CSLocs) { - unsigned RegNo = 0; - int Offset = Loc.Offset; - if(Loc.Reg) { - RegNo = getDwarfRegNum(Loc.Reg, TRI); - - // If this is a register location, put the subregister byte offset in - // the location offset. - if (Loc.LocType == Location::Register) { - assert(!Loc.Offset && "Register location should have zero offset"); - unsigned LLVMRegNo = TRI->getLLVMRegNum(RegNo, false); - unsigned SubRegIdx = TRI->getSubRegIndex(LLVMRegNo, Loc.Reg); - if (SubRegIdx) - Offset = TRI->getSubRegIdxOffset(SubRegIdx); - } - } - else { - assert(Loc.LocType != Location::Register && - "Missing location register"); - } - - DEBUG(dbgs() << WSMP << " Loc " << OperIdx << ": "; - switch (Loc.LocType) { - case Location::Unprocessed: - dbgs() << "<Unprocessed operand>"; - break; - case Location::Register: - dbgs() << "Register " << TRI->getName(Loc.Reg); - break; - case Location::Direct: - dbgs() << "Direct " << TRI->getName(Loc.Reg); - if (Loc.Offset) - dbgs() << " + " << Loc.Offset; - break; - case Location::Indirect: - dbgs() << "Indirect " << TRI->getName(Loc.Reg) - << " + " << Loc.Offset; - break; - case Location::Constant: - dbgs() << "Constant " << Loc.Offset; - break; - case Location::ConstantIndex: - dbgs() << "Constant Index " << Loc.Offset; - break; - } - dbgs() << " [encoding: .byte " << Loc.LocType - << ", .byte " << Loc.Size - << ", .short " << RegNo - << ", .int " << Offset << "]\n"; - ); - OS.EmitIntValue(Loc.LocType, 1); OS.EmitIntValue(Loc.Size, 1); - OS.EmitIntValue(RegNo, 2); - OS.EmitIntValue(Offset, 4); - OperIdx++; + OS.EmitIntValue(Loc.Reg, 2); + OS.EmitIntValue(Loc.Offset, 4); } - DEBUG(dbgs() << WSMP << " has " << LiveOuts.size() - << " live-out registers\n"); - // Num live-out registers and padding to align to 4 byte. OS.EmitIntValue(0, 2); OS.EmitIntValue(LiveOuts.size(), 2); - OperIdx = 0; for (const auto &LO : LiveOuts) { - DEBUG(dbgs() << WSMP << " LO " << OperIdx << ": " - << TRI->getName(LO.Reg) - << " [encoding: .short " << LO.RegNo - << ", .byte 0, .byte " << LO.Size << "]\n"); OS.EmitIntValue(LO.RegNo, 2); OS.EmitIntValue(0, 1); OS.EmitIntValue(LO.Size, 1); @@ -511,24 +520,23 @@ void StackMaps::serializeToStackMapSection() { if (CSInfos.empty()) return; - MCContext &OutContext = AP.OutStreamer.getContext(); - MCStreamer &OS = AP.OutStreamer; - const TargetRegisterInfo *TRI = AP.TM.getSubtargetImpl()->getRegisterInfo(); + MCContext &OutContext = AP.OutStreamer->getContext(); + MCStreamer &OS = *AP.OutStreamer; // Create the section. - const MCSection *StackMapSection = - OutContext.getObjectFileInfo()->getStackMapSection(); + MCSection *StackMapSection = + OutContext.getObjectFileInfo()->getStackMapSection(); OS.SwitchSection(StackMapSection); // Emit a dummy symbol to force section inclusion. - OS.EmitLabel(OutContext.GetOrCreateSymbol(Twine("__LLVM_StackMaps"))); + OS.EmitLabel(OutContext.getOrCreateSymbol(Twine("__LLVM_StackMaps"))); // Serialize data. DEBUG(dbgs() << "********** Stack Map Output **********\n"); emitStackmapHeader(OS); emitFunctionFrameRecords(OS); emitConstantPoolEntries(OS); - emitCallsiteEntries(OS, TRI); + emitCallsiteEntries(OS); OS.AddBlankLine(); // Clean up. diff --git a/contrib/llvm/lib/CodeGen/StackProtector.cpp b/contrib/llvm/lib/CodeGen/StackProtector.cpp index a132805..0824d6f 100644 --- a/contrib/llvm/lib/CodeGen/StackProtector.cpp +++ b/contrib/llvm/lib/CodeGen/StackProtector.cpp @@ -88,10 +88,9 @@ bool StackProtector::runOnFunction(Function &Fn) { DominatorTreeWrapperPass *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); DT = DTWP ? &DTWP->getDomTree() : nullptr; - TLI = TM->getSubtargetImpl()->getTargetLowering(); + TLI = TM->getSubtargetImpl(Fn)->getTargetLowering(); - Attribute Attr = Fn.getAttributes().getAttribute( - AttributeSet::FunctionIndex, "stack-protector-buffer-size"); + Attribute Attr = Fn.getFnAttribute("stack-protector-buffer-size"); if (Attr.isStringAttribute() && Attr.getValueAsString().getAsInteger(10, SSPBufferSize)) return false; // Invalid integer string @@ -201,15 +200,12 @@ bool StackProtector::HasAddressTaken(const Instruction *AI) { bool StackProtector::RequiresStackProtector() { bool Strong = false; bool NeedsProtector = false; - if (F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::StackProtectReq)) { + if (F->hasFnAttribute(Attribute::StackProtectReq)) { NeedsProtector = true; Strong = true; // Use the same heuristic as strong to determine SSPLayout - } else if (F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::StackProtectStrong)) + } else if (F->hasFnAttribute(Attribute::StackProtectStrong)) Strong = true; - else if (!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::StackProtect)) + else if (!F->hasFnAttribute(Attribute::StackProtect)) return false; for (const BasicBlock &BB : *F) { @@ -357,8 +353,8 @@ static bool CreatePrologue(Function *F, Module *M, ReturnInst *RI, IRBuilder<> B(&F->getEntryBlock().front()); AI = B.CreateAlloca(PtrTy, nullptr, "StackGuardSlot"); LoadInst *LI = B.CreateLoad(StackGuardVar, "StackGuard"); - B.CreateCall2(Intrinsic::getDeclaration(M, Intrinsic::stackprotector), LI, - AI); + B.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::stackprotector), + {LI, AI}); return SupportsSelectionDAGSP; } @@ -492,7 +488,7 @@ BasicBlock *StackProtector::CreateFailBB() { Constant *StackChkFail = M->getOrInsertFunction("__stack_chk_fail", Type::getVoidTy(Context), nullptr); - B.CreateCall(StackChkFail); + B.CreateCall(StackChkFail, {}); } B.CreateUnreachable(); return FailBB; diff --git a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp index cc72e5e..a5a175f 100644 --- a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp +++ b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp @@ -184,10 +184,18 @@ void StackSlotColoring::InitializeSlots() { UsedColors.resize(LastFI); Assignments.resize(LastFI); + typedef std::iterator_traits<LiveStacks::iterator>::value_type Pair; + SmallVector<Pair *, 16> Intervals; + Intervals.reserve(LS->getNumIntervals()); + for (auto &I : *LS) + Intervals.push_back(&I); + std::sort(Intervals.begin(), Intervals.end(), + [](Pair *LHS, Pair *RHS) { return LHS->first < RHS->first; }); + // Gather all spill slots into a list. DEBUG(dbgs() << "Spill slot intervals:\n"); - for (LiveStacks::iterator i = LS->begin(), e = LS->end(); i != e; ++i) { - LiveInterval &li = i->second; + for (auto *I : Intervals) { + LiveInterval &li = I->second; DEBUG(li.dump()); int FI = TargetRegisterInfo::stackSlot2Index(li.reg); if (MFI->isDeadObjectIndex(FI)) diff --git a/contrib/llvm/lib/CodeGen/StatepointExampleGC.cpp b/contrib/llvm/lib/CodeGen/StatepointExampleGC.cpp index 802cf13..95dfd75 100644 --- a/contrib/llvm/lib/CodeGen/StatepointExampleGC.cpp +++ b/contrib/llvm/lib/CodeGen/StatepointExampleGC.cpp @@ -12,7 +12,7 @@ // suitable as a default implementation usable with any collector which can // consume the standard stackmap format generated by statepoints, uses the // default addrespace to distinguish between gc managed and non-gc managed -// pointers, and has reasonable relocation semantics. +// pointers, and has reasonable relocation semantics. // //===----------------------------------------------------------------------===// @@ -33,21 +33,22 @@ public: NeededSafePoints = 0; UsesMetadata = false; CustomRoots = false; - CustomSafePoints = false; } Optional<bool> isGCManagedPointer(const Value *V) const override { // Method is only valid on pointer typed values. PointerType *PT = cast<PointerType>(V->getType()); // For the sake of this example GC, we arbitrarily pick addrspace(1) as our // GC managed heap. We know that a pointer into this heap needs to be - // updated and that no other pointer does. + // updated and that no other pointer does. Note that addrspace(1) is used + // only as an example, it has no special meaning, and is not reserved for + // GC usage. return (1 == PT->getAddressSpace()); } }; } -static GCRegistry::Add<StatepointGC> -X("statepoint-example", "an example strategy for statepoint"); +static GCRegistry::Add<StatepointGC> X("statepoint-example", + "an example strategy for statepoint"); namespace llvm { void linkStatepointExampleGC() {} diff --git a/contrib/llvm/lib/CodeGen/TailDuplication.cpp b/contrib/llvm/lib/CodeGen/TailDuplication.cpp index 4377236..23f41c8 100644 --- a/contrib/llvm/lib/CodeGen/TailDuplication.cpp +++ b/contrib/llvm/lib/CodeGen/TailDuplication.cpp @@ -449,6 +449,9 @@ void TailDuplicatePass::DuplicateInstruction(MachineInstr *MI, DenseMap<unsigned, unsigned>::iterator VI = LocalVRMap.find(Reg); if (VI != LocalVRMap.end()) { MO.setReg(VI->second); + // Clear any kill flags from this operand. The new register could have + // uses after this one, so kills are not valid here. + MO.setIsKill(false); MRI->constrainRegClass(VI->second, MRI->getRegClass(Reg)); } } @@ -560,8 +563,7 @@ TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF, // compensate for the duplication. unsigned MaxDuplicateCount; if (TailDuplicateSize.getNumOccurrences() == 0 && - MF.getFunction()->getAttributes(). - hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize)) + MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize)) MaxDuplicateCount = 1; else MaxDuplicateCount = TailDuplicateSize; diff --git a/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp b/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp index e3f0191..5638324 100644 --- a/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp +++ b/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp @@ -14,6 +14,7 @@ #include "llvm/Target/TargetFrameLowering.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/IR/Function.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <cstdlib> @@ -22,6 +23,12 @@ using namespace llvm; TargetFrameLowering::~TargetFrameLowering() { } +/// The default implementation just looks at attribute "no-frame-pointer-elim". +bool TargetFrameLowering::noFramePointerElim(const MachineFunction &MF) const { + auto Attr = MF.getFunction()->getFnAttribute("no-frame-pointer-elim"); + return Attr.getValueAsString() == "true"; +} + /// getFrameIndexOffset - Returns the displacement from the frame register to /// the stack frame of the specified index. This is the default implementation /// which is overridden for some targets. diff --git a/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp b/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp index 608b806..92488de 100644 --- a/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -142,6 +142,10 @@ MachineInstr *TargetInstrInfo::commuteInstruction(MachineInstr *MI, unsigned SubReg2 = MI->getOperand(Idx2).getSubReg(); bool Reg1IsKill = MI->getOperand(Idx1).isKill(); bool Reg2IsKill = MI->getOperand(Idx2).isKill(); + bool Reg1IsUndef = MI->getOperand(Idx1).isUndef(); + bool Reg2IsUndef = MI->getOperand(Idx2).isUndef(); + bool Reg1IsInternal = MI->getOperand(Idx1).isInternalRead(); + bool Reg2IsInternal = MI->getOperand(Idx2).isInternalRead(); // If destination is tied to either of the commuted source register, then // it must be updated. if (HasDef && Reg0 == Reg1 && @@ -172,6 +176,10 @@ MachineInstr *TargetInstrInfo::commuteInstruction(MachineInstr *MI, MI->getOperand(Idx1).setSubReg(SubReg2); MI->getOperand(Idx2).setIsKill(Reg1IsKill); MI->getOperand(Idx1).setIsKill(Reg2IsKill); + MI->getOperand(Idx2).setIsUndef(Reg1IsUndef); + MI->getOperand(Idx1).setIsUndef(Reg2IsUndef); + MI->getOperand(Idx2).setIsInternalRead(Reg1IsInternal); + MI->getOperand(Idx1).setIsInternalRead(Reg2IsInternal); return MI; } @@ -285,21 +293,20 @@ bool TargetInstrInfo::hasStoreToStackSlot(const MachineInstr *MI, bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC, unsigned SubIdx, unsigned &Size, unsigned &Offset, - const TargetMachine *TM) const { + const MachineFunction &MF) const { if (!SubIdx) { Size = RC->getSize(); Offset = 0; return true; } - unsigned BitSize = - TM->getSubtargetImpl()->getRegisterInfo()->getSubRegIdxSize(SubIdx); + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + unsigned BitSize = TRI->getSubRegIdxSize(SubIdx); // Convert bit size to byte size to be consistent with // MCRegisterClass::getSize(). if (BitSize % 8) return false; - int BitOffset = - TM->getSubtargetImpl()->getRegisterInfo()->getSubRegIdxOffset(SubIdx); + int BitOffset = TRI->getSubRegIdxOffset(SubIdx); if (BitOffset < 0 || BitOffset % 8) return false; @@ -308,7 +315,7 @@ bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC, assert(RC->getSize() >= (Offset + Size) && "bad subregister range"); - if (!TM->getSubtargetImpl()->getDataLayout()->isLittleEndian()) { + if (!MF.getTarget().getDataLayout()->isLittleEndian()) { Offset = RC->getSize() - (Offset + Size); } return true; @@ -377,16 +384,13 @@ void TargetInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { llvm_unreachable("Not a MachO target"); } -bool TargetInstrInfo:: -canFoldMemoryOperand(const MachineInstr *MI, - const SmallVectorImpl<unsigned> &Ops) const { +bool TargetInstrInfo::canFoldMemoryOperand(const MachineInstr *MI, + ArrayRef<unsigned> Ops) const { return MI->isCopy() && Ops.size() == 1 && canFoldCopy(MI, Ops[0]); } -static MachineInstr* foldPatchpoint(MachineFunction &MF, - MachineInstr *MI, - const SmallVectorImpl<unsigned> &Ops, - int FrameIndex, +static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr *MI, + ArrayRef<unsigned> Ops, int FrameIndex, const TargetInstrInfo &TII) { unsigned StartIdx = 0; switch (MI->getOpcode()) { @@ -405,9 +409,8 @@ static MachineInstr* foldPatchpoint(MachineFunction &MF, // Return false if any operands requested for folding are not foldable (not // part of the stackmap's live values). - for (SmallVectorImpl<unsigned>::const_iterator I = Ops.begin(), E = Ops.end(); - I != E; ++I) { - if (*I < StartIdx) + for (unsigned Op : Ops) { + if (Op < StartIdx) return nullptr; } @@ -427,8 +430,8 @@ static MachineInstr* foldPatchpoint(MachineFunction &MF, // Compute the spill slot size and offset. const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(MO.getReg()); - bool Valid = TII.getStackSlotRange(RC, MO.getSubReg(), SpillSize, - SpillOffset, &MF.getTarget()); + bool Valid = + TII.getStackSlotRange(RC, MO.getSubReg(), SpillSize, SpillOffset, MF); if (!Valid) report_fatal_error("cannot spill patchpoint subregister operand"); MIB.addImm(StackMaps::IndirectMemRefOp); @@ -448,10 +451,9 @@ static MachineInstr* foldPatchpoint(MachineFunction &MF, /// operand folded, otherwise NULL is returned. The client is responsible for /// removing the old instruction and adding the new one in the instruction /// stream. -MachineInstr* -TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, - const SmallVectorImpl<unsigned> &Ops, - int FI) const { +MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, + ArrayRef<unsigned> Ops, + int FI) const { unsigned Flags = 0; for (unsigned i = 0, e = Ops.size(); i != e; ++i) if (MI->getOperand(Ops[i]).isDef()) @@ -517,10 +519,9 @@ TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, /// foldMemoryOperand - Same as the previous version except it allows folding /// of any load and store from / to any address, not just from a specific /// stack slot. -MachineInstr* -TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, - const SmallVectorImpl<unsigned> &Ops, - MachineInstr* LoadMI) const { +MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, + ArrayRef<unsigned> Ops, + MachineInstr *LoadMI) const { assert(LoadMI->canFoldAsLoad() && "LoadMI isn't foldable!"); #ifndef NDEBUG for (unsigned i = 0, e = Ops.size(); i != e; ++i) @@ -651,8 +652,8 @@ int TargetInstrInfo::getSPAdjust(const MachineInstr *MI) const { bool StackGrowsDown = TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; - int FrameSetupOpcode = getCallFrameSetupOpcode(); - int FrameDestroyOpcode = getCallFrameDestroyOpcode(); + unsigned FrameSetupOpcode = getCallFrameSetupOpcode(); + unsigned FrameDestroyOpcode = getCallFrameDestroyOpcode(); if (MI->getOpcode() != FrameSetupOpcode && MI->getOpcode() != FrameDestroyOpcode) diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp index 9b2fdff..b7f1db6 100644 --- a/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -420,6 +420,14 @@ static void InitLibcallNames(const char **Names, const Triple &TT) { // These are generally not available. Names[RTLIB::STACKPROTECTOR_CHECK_FAIL] = nullptr; } + + // For f16/f32 conversions, Darwin uses the standard naming scheme, instead + // of the gnueabi-style __gnu_*_ieee. + // FIXME: What about other targets? + if (TT.isOSDarwin()) { + Names[RTLIB::FPEXT_F16_F32] = "__extendhfsf2"; + Names[RTLIB::FPROUND_F32_F16] = "__truncsfhf2"; + } } /// InitLibcallCallingConvs - Set default libcall CallingConvs. @@ -664,6 +672,44 @@ RTLIB::Libcall RTLIB::getUINTTOFP(EVT OpVT, EVT RetVT) { return UNKNOWN_LIBCALL; } +RTLIB::Libcall RTLIB::getATOMIC(unsigned Opc, MVT VT) { +#define OP_TO_LIBCALL(Name, Enum) \ + case Name: \ + switch (VT.SimpleTy) { \ + default: \ + return UNKNOWN_LIBCALL; \ + case MVT::i8: \ + return Enum##_1; \ + case MVT::i16: \ + return Enum##_2; \ + case MVT::i32: \ + return Enum##_4; \ + case MVT::i64: \ + return Enum##_8; \ + case MVT::i128: \ + return Enum##_16; \ + } + + switch (Opc) { + OP_TO_LIBCALL(ISD::ATOMIC_SWAP, SYNC_LOCK_TEST_AND_SET) + OP_TO_LIBCALL(ISD::ATOMIC_CMP_SWAP, SYNC_VAL_COMPARE_AND_SWAP) + OP_TO_LIBCALL(ISD::ATOMIC_LOAD_ADD, SYNC_FETCH_AND_ADD) + OP_TO_LIBCALL(ISD::ATOMIC_LOAD_SUB, SYNC_FETCH_AND_SUB) + OP_TO_LIBCALL(ISD::ATOMIC_LOAD_AND, SYNC_FETCH_AND_AND) + OP_TO_LIBCALL(ISD::ATOMIC_LOAD_OR, SYNC_FETCH_AND_OR) + OP_TO_LIBCALL(ISD::ATOMIC_LOAD_XOR, SYNC_FETCH_AND_XOR) + OP_TO_LIBCALL(ISD::ATOMIC_LOAD_NAND, SYNC_FETCH_AND_NAND) + OP_TO_LIBCALL(ISD::ATOMIC_LOAD_MAX, SYNC_FETCH_AND_MAX) + OP_TO_LIBCALL(ISD::ATOMIC_LOAD_UMAX, SYNC_FETCH_AND_UMAX) + OP_TO_LIBCALL(ISD::ATOMIC_LOAD_MIN, SYNC_FETCH_AND_MIN) + OP_TO_LIBCALL(ISD::ATOMIC_LOAD_UMIN, SYNC_FETCH_AND_UMIN) + } + +#undef OP_TO_LIBCALL + + return UNKNOWN_LIBCALL; +} + /// InitCmpLibcallCCs - Set default comparison libcall CC. /// static void InitCmpLibcallCCs(ISD::CondCode *CCs) { @@ -695,12 +741,11 @@ static void InitCmpLibcallCCs(ISD::CondCode *CCs) { } /// NOTE: The TargetMachine owns TLOF. -TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) - : TM(tm), DL(TM.getSubtargetImpl()->getDataLayout()) { +TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) { initActions(); // Perform these initializations only once. - IsLittleEndian = DL->isLittleEndian(); + IsLittleEndian = getDataLayout()->isLittleEndian(); MaxStoresPerMemset = MaxStoresPerMemcpy = MaxStoresPerMemmove = 8; MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize = MaxStoresPerMemmoveOptSize = 4; @@ -765,6 +810,19 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::CONCAT_VECTORS, VT, Expand); setOperationAction(ISD::FMINNUM, VT, Expand); setOperationAction(ISD::FMAXNUM, VT, Expand); + setOperationAction(ISD::FMAD, VT, Expand); + setOperationAction(ISD::SMIN, VT, Expand); + setOperationAction(ISD::SMAX, VT, Expand); + setOperationAction(ISD::UMIN, VT, Expand); + setOperationAction(ISD::UMAX, VT, Expand); + + // Overflow operations default to expand + setOperationAction(ISD::SADDO, VT, Expand); + setOperationAction(ISD::SSUBO, VT, Expand); + setOperationAction(ISD::UADDO, VT, Expand); + setOperationAction(ISD::USUBO, VT, Expand); + setOperationAction(ISD::SMULO, VT, Expand); + setOperationAction(ISD::UMULO, VT, Expand); // These library functions default to expand. setOperationAction(ISD::FROUND, VT, Expand); @@ -791,58 +849,21 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::ConstantFP, MVT::f128, Expand); // These library functions default to expand. - setOperationAction(ISD::FLOG , MVT::f16, Expand); - setOperationAction(ISD::FLOG2, MVT::f16, Expand); - setOperationAction(ISD::FLOG10, MVT::f16, Expand); - setOperationAction(ISD::FEXP , MVT::f16, Expand); - setOperationAction(ISD::FEXP2, MVT::f16, Expand); - setOperationAction(ISD::FFLOOR, MVT::f16, Expand); - setOperationAction(ISD::FMINNUM, MVT::f16, Expand); - setOperationAction(ISD::FMAXNUM, MVT::f16, Expand); - setOperationAction(ISD::FNEARBYINT, MVT::f16, Expand); - setOperationAction(ISD::FCEIL, MVT::f16, Expand); - setOperationAction(ISD::FRINT, MVT::f16, Expand); - setOperationAction(ISD::FTRUNC, MVT::f16, Expand); - setOperationAction(ISD::FROUND, MVT::f16, Expand); - setOperationAction(ISD::FLOG , MVT::f32, Expand); - setOperationAction(ISD::FLOG2, MVT::f32, Expand); - setOperationAction(ISD::FLOG10, MVT::f32, Expand); - setOperationAction(ISD::FEXP , MVT::f32, Expand); - setOperationAction(ISD::FEXP2, MVT::f32, Expand); - setOperationAction(ISD::FFLOOR, MVT::f32, Expand); - setOperationAction(ISD::FMINNUM, MVT::f32, Expand); - setOperationAction(ISD::FMAXNUM, MVT::f32, Expand); - setOperationAction(ISD::FNEARBYINT, MVT::f32, Expand); - setOperationAction(ISD::FCEIL, MVT::f32, Expand); - setOperationAction(ISD::FRINT, MVT::f32, Expand); - setOperationAction(ISD::FTRUNC, MVT::f32, Expand); - setOperationAction(ISD::FROUND, MVT::f32, Expand); - setOperationAction(ISD::FLOG , MVT::f64, Expand); - setOperationAction(ISD::FLOG2, MVT::f64, Expand); - setOperationAction(ISD::FLOG10, MVT::f64, Expand); - setOperationAction(ISD::FEXP , MVT::f64, Expand); - setOperationAction(ISD::FEXP2, MVT::f64, Expand); - setOperationAction(ISD::FFLOOR, MVT::f64, Expand); - setOperationAction(ISD::FMINNUM, MVT::f64, Expand); - setOperationAction(ISD::FMAXNUM, MVT::f64, Expand); - setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand); - setOperationAction(ISD::FCEIL, MVT::f64, Expand); - setOperationAction(ISD::FRINT, MVT::f64, Expand); - setOperationAction(ISD::FTRUNC, MVT::f64, Expand); - setOperationAction(ISD::FROUND, MVT::f64, Expand); - setOperationAction(ISD::FLOG , MVT::f128, Expand); - setOperationAction(ISD::FLOG2, MVT::f128, Expand); - setOperationAction(ISD::FLOG10, MVT::f128, Expand); - setOperationAction(ISD::FEXP , MVT::f128, Expand); - setOperationAction(ISD::FEXP2, MVT::f128, Expand); - setOperationAction(ISD::FFLOOR, MVT::f128, Expand); - setOperationAction(ISD::FMINNUM, MVT::f128, Expand); - setOperationAction(ISD::FMAXNUM, MVT::f128, Expand); - setOperationAction(ISD::FNEARBYINT, MVT::f128, Expand); - setOperationAction(ISD::FCEIL, MVT::f128, Expand); - setOperationAction(ISD::FRINT, MVT::f128, Expand); - setOperationAction(ISD::FTRUNC, MVT::f128, Expand); - setOperationAction(ISD::FROUND, MVT::f128, Expand); + for (MVT VT : {MVT::f32, MVT::f64, MVT::f128}) { + setOperationAction(ISD::FLOG , VT, Expand); + setOperationAction(ISD::FLOG2, VT, Expand); + setOperationAction(ISD::FLOG10, VT, Expand); + setOperationAction(ISD::FEXP , VT, Expand); + setOperationAction(ISD::FEXP2, VT, Expand); + setOperationAction(ISD::FFLOOR, VT, Expand); + setOperationAction(ISD::FMINNUM, VT, Expand); + setOperationAction(ISD::FMAXNUM, VT, Expand); + setOperationAction(ISD::FNEARBYINT, VT, Expand); + setOperationAction(ISD::FCEIL, VT, Expand); + setOperationAction(ISD::FRINT, VT, Expand); + setOperationAction(ISD::FTRUNC, VT, Expand); + setOperationAction(ISD::FROUND, VT, Expand); + } // Default ISD::TRAP to expand (which turns it into abort). setOperationAction(ISD::TRAP, MVT::Other, Expand); @@ -858,7 +879,7 @@ MVT TargetLoweringBase::getPointerTy(uint32_t AS) const { } unsigned TargetLoweringBase::getPointerSizeInBits(uint32_t AS) const { - return DL->getPointerSizeInBits(AS); + return getDataLayout()->getPointerSizeInBits(AS); } unsigned TargetLoweringBase::getPointerTypeSizeInBits(Type *Ty) const { @@ -867,7 +888,7 @@ unsigned TargetLoweringBase::getPointerTypeSizeInBits(Type *Ty) const { } MVT TargetLoweringBase::getScalarShiftAmountTy(EVT LHSTy) const { - return MVT::getIntegerVT(8*DL->getPointerSize(0)); + return MVT::getIntegerVT(8 * getDataLayout()->getPointerSize(0)); } EVT TargetLoweringBase::getShiftAmountTy(EVT LHSTy) const { @@ -894,6 +915,138 @@ bool TargetLoweringBase::canOpTrap(unsigned Op, EVT VT) const { } } +TargetLoweringBase::LegalizeKind +TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const { + // If this is a simple type, use the ComputeRegisterProp mechanism. + if (VT.isSimple()) { + MVT SVT = VT.getSimpleVT(); + assert((unsigned)SVT.SimpleTy < array_lengthof(TransformToType)); + MVT NVT = TransformToType[SVT.SimpleTy]; + LegalizeTypeAction LA = ValueTypeActions.getTypeAction(SVT); + + assert((LA == TypeLegal || LA == TypeSoftenFloat || + ValueTypeActions.getTypeAction(NVT) != TypePromoteInteger) && + "Promote may not follow Expand or Promote"); + + if (LA == TypeSplitVector) + return LegalizeKind(LA, + EVT::getVectorVT(Context, SVT.getVectorElementType(), + SVT.getVectorNumElements() / 2)); + if (LA == TypeScalarizeVector) + return LegalizeKind(LA, SVT.getVectorElementType()); + return LegalizeKind(LA, NVT); + } + + // Handle Extended Scalar Types. + if (!VT.isVector()) { + assert(VT.isInteger() && "Float types must be simple"); + unsigned BitSize = VT.getSizeInBits(); + // First promote to a power-of-two size, then expand if necessary. + if (BitSize < 8 || !isPowerOf2_32(BitSize)) { + EVT NVT = VT.getRoundIntegerType(Context); + assert(NVT != VT && "Unable to round integer VT"); + LegalizeKind NextStep = getTypeConversion(Context, NVT); + // Avoid multi-step promotion. + if (NextStep.first == TypePromoteInteger) + return NextStep; + // Return rounded integer type. + return LegalizeKind(TypePromoteInteger, NVT); + } + + return LegalizeKind(TypeExpandInteger, + EVT::getIntegerVT(Context, VT.getSizeInBits() / 2)); + } + + // Handle vector types. + unsigned NumElts = VT.getVectorNumElements(); + EVT EltVT = VT.getVectorElementType(); + + // Vectors with only one element are always scalarized. + if (NumElts == 1) + return LegalizeKind(TypeScalarizeVector, EltVT); + + // Try to widen vector elements until the element type is a power of two and + // promote it to a legal type later on, for example: + // <3 x i8> -> <4 x i8> -> <4 x i32> + if (EltVT.isInteger()) { + // Vectors with a number of elements that is not a power of two are always + // widened, for example <3 x i8> -> <4 x i8>. + if (!VT.isPow2VectorType()) { + NumElts = (unsigned)NextPowerOf2(NumElts); + EVT NVT = EVT::getVectorVT(Context, EltVT, NumElts); + return LegalizeKind(TypeWidenVector, NVT); + } + + // Examine the element type. + LegalizeKind LK = getTypeConversion(Context, EltVT); + + // If type is to be expanded, split the vector. + // <4 x i140> -> <2 x i140> + if (LK.first == TypeExpandInteger) + return LegalizeKind(TypeSplitVector, + EVT::getVectorVT(Context, EltVT, NumElts / 2)); + + // Promote the integer element types until a legal vector type is found + // or until the element integer type is too big. If a legal type was not + // found, fallback to the usual mechanism of widening/splitting the + // vector. + EVT OldEltVT = EltVT; + while (1) { + // Increase the bitwidth of the element to the next pow-of-two + // (which is greater than 8 bits). + EltVT = EVT::getIntegerVT(Context, 1 + EltVT.getSizeInBits()) + .getRoundIntegerType(Context); + + // Stop trying when getting a non-simple element type. + // Note that vector elements may be greater than legal vector element + // types. Example: X86 XMM registers hold 64bit element on 32bit + // systems. + if (!EltVT.isSimple()) + break; + + // Build a new vector type and check if it is legal. + MVT NVT = MVT::getVectorVT(EltVT.getSimpleVT(), NumElts); + // Found a legal promoted vector type. + if (NVT != MVT() && ValueTypeActions.getTypeAction(NVT) == TypeLegal) + return LegalizeKind(TypePromoteInteger, + EVT::getVectorVT(Context, EltVT, NumElts)); + } + + // Reset the type to the unexpanded type if we did not find a legal vector + // type with a promoted vector element type. + EltVT = OldEltVT; + } + + // Try to widen the vector until a legal type is found. + // If there is no wider legal type, split the vector. + while (1) { + // Round up to the next power of 2. + NumElts = (unsigned)NextPowerOf2(NumElts); + + // If there is no simple vector type with this many elements then there + // cannot be a larger legal vector type. Note that this assumes that + // there are no skipped intermediate vector types in the simple types. + if (!EltVT.isSimple()) + break; + MVT LargerVector = MVT::getVectorVT(EltVT.getSimpleVT(), NumElts); + if (LargerVector == MVT()) + break; + + // If this type is legal then widen the vector. + if (ValueTypeActions.getTypeAction(LargerVector) == TypeLegal) + return LegalizeKind(TypeWidenVector, LargerVector); + } + + // Widen odd vectors to next power of two. + if (!VT.isPow2VectorType()) { + EVT NVT = VT.getPow2VectorType(Context); + return LegalizeKind(TypeWidenVector, NVT); + } + + // Vectors with illegal element types are expanded. + EVT NVT = EVT::getVectorVT(Context, EltVT, VT.getVectorNumElements() / 2); + return LegalizeKind(TypeSplitVector, NVT); +} static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT, unsigned &NumIntermediates, @@ -997,8 +1150,7 @@ TargetLoweringBase::emitPatchPoint(MachineInstr *MI, } MachineMemOperand *MMO = MF.getMachineMemOperand( MachinePointerInfo::getFixedStack(FI), Flags, - TM.getSubtargetImpl()->getDataLayout()->getPointerSize(), - MFI.getObjectAlignment(FI)); + TM.getDataLayout()->getPointerSize(), MFI.getObjectAlignment(FI)); MIB->addMemOperand(MF, MMO); // Replace the instruction and update the operand index. @@ -1012,10 +1164,13 @@ TargetLoweringBase::emitPatchPoint(MachineInstr *MI, /// findRepresentativeClass - Return the largest legal super-reg register class /// of the register class for the specified type and its associated "cost". -std::pair<const TargetRegisterClass*, uint8_t> -TargetLoweringBase::findRepresentativeClass(MVT VT) const { - const TargetRegisterInfo *TRI = - getTargetMachine().getSubtargetImpl()->getRegisterInfo(); +// This function is in TargetLowering because it uses RegClassForVT which would +// need to be moved to TargetRegisterInfo and would necessitate moving +// isTypeLegal over as well - a massive change that would just require +// TargetLowering having a TargetRegisterInfo class member that it would use. +std::pair<const TargetRegisterClass *, uint8_t> +TargetLoweringBase::findRepresentativeClass(const TargetRegisterInfo *TRI, + MVT VT) const { const TargetRegisterClass *RC = RegClassForVT[VT.SimpleTy]; if (!RC) return std::make_pair(RC, 0); @@ -1041,7 +1196,8 @@ TargetLoweringBase::findRepresentativeClass(MVT VT) const { /// computeRegisterProperties - Once all of the register classes are added, /// this allows us to compute derived properties we expose. -void TargetLoweringBase::computeRegisterProperties() { +void TargetLoweringBase::computeRegisterProperties( + const TargetRegisterInfo *TRI) { static_assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE, "Too many value types for ValueTypeActions to hold!"); @@ -1110,27 +1266,29 @@ void TargetLoweringBase::computeRegisterProperties() { ValueTypeActions.setTypeAction(MVT::f64, TypeSoftenFloat); } - // Decide how to handle f32. If the target does not have native support for - // f32, promote it to f64 if it is legal. Otherwise, expand it to i32. + // Decide how to handle f32. If the target does not have native f32 support, + // expand it to i32 and we will be generating soft float library calls. if (!isTypeLegal(MVT::f32)) { - if (isTypeLegal(MVT::f64)) { - NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::f64]; - RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::f64]; - TransformToType[MVT::f32] = MVT::f64; - ValueTypeActions.setTypeAction(MVT::f32, TypePromoteInteger); - } else { - NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::i32]; - RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::i32]; - TransformToType[MVT::f32] = MVT::i32; - ValueTypeActions.setTypeAction(MVT::f32, TypeSoftenFloat); - } + NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::i32]; + RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::i32]; + TransformToType[MVT::f32] = MVT::i32; + ValueTypeActions.setTypeAction(MVT::f32, TypeSoftenFloat); } if (!isTypeLegal(MVT::f16)) { - NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::i16]; - RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::i16]; - TransformToType[MVT::f16] = MVT::i16; - ValueTypeActions.setTypeAction(MVT::f16, TypeSoftenFloat); + // If the target has native f32 support, promote f16 operations to f32. If + // f32 is not supported, generate soft float library calls. + if (isTypeLegal(MVT::f32)) { + NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::f32]; + RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::f32]; + TransformToType[MVT::f16] = MVT::f32; + ValueTypeActions.setTypeAction(MVT::f16, TypePromoteFloat); + } else { + NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::i16]; + RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::i16]; + TransformToType[MVT::f16] = MVT::i16; + ValueTypeActions.setTypeAction(MVT::f16, TypeSoftenFloat); + } } // Loop over all of the vector value types to see which need transformations. @@ -1223,7 +1381,7 @@ void TargetLoweringBase::computeRegisterProperties() { for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) { const TargetRegisterClass* RRC; uint8_t Cost; - std::tie(RRC, Cost) = findRepresentativeClass((MVT::SimpleValueType)i); + std::tie(RRC, Cost) = findRepresentativeClass(TRI, (MVT::SimpleValueType)i); RepRegClassForVT[i] = RRC; RepRegClassCostForVT[i] = Cost; } @@ -1366,7 +1524,7 @@ void llvm::GetReturnInfo(Type* ReturnType, AttributeSet attr, /// function arguments in the caller parameter area. This is the actual /// alignment, not its logarithm. unsigned TargetLoweringBase::getByValTypeAlignment(Type *Ty) const { - return DL->getABITypeAlignment(Ty); + return getDataLayout()->getABITypeAlignment(Ty); } //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 9f1e06b..a32bdf8 100644 --- a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -31,6 +31,7 @@ #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCValue.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" @@ -50,7 +51,7 @@ MCSymbol *TargetLoweringObjectFileELF::getCFIPersonalitySymbol( MachineModuleInfo *MMI) const { unsigned Encoding = getPersonalityEncoding(); if ((Encoding & 0x80) == dwarf::DW_EH_PE_indirect) - return getContext().GetOrCreateSymbol(StringRef("DW.ref.") + + return getContext().getOrCreateSymbol(StringRef("DW.ref.") + TM.getSymbol(GV, Mang)->getName()); if ((Encoding & 0x70) == dwarf::DW_EH_PE_absptr) return TM.getSymbol(GV, Mang); @@ -62,21 +63,17 @@ void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer, const MCSymbol *Sym) const { SmallString<64> NameData("DW.ref."); NameData += Sym->getName(); - MCSymbol *Label = getContext().GetOrCreateSymbol(NameData); + MCSymbol *Label = getContext().getOrCreateSymbol(NameData); Streamer.EmitSymbolAttribute(Label, MCSA_Hidden); Streamer.EmitSymbolAttribute(Label, MCSA_Weak); StringRef Prefix = ".data."; NameData.insert(NameData.begin(), Prefix.begin(), Prefix.end()); unsigned Flags = ELF::SHF_ALLOC | ELF::SHF_WRITE | ELF::SHF_GROUP; - const MCSection *Sec = getContext().getELFSection(NameData, - ELF::SHT_PROGBITS, - Flags, - SectionKind::getDataRel(), - 0, Label->getName()); - unsigned Size = TM.getSubtargetImpl()->getDataLayout()->getPointerSize(); + MCSection *Sec = getContext().getELFSection(NameData, ELF::SHT_PROGBITS, + Flags, 0, Label->getName()); + unsigned Size = TM.getDataLayout()->getPointerSize(); Streamer.SwitchSection(Sec); - Streamer.EmitValueToAlignment( - TM.getSubtargetImpl()->getDataLayout()->getPointerABIAlignment()); + Streamer.EmitValueToAlignment(TM.getDataLayout()->getPointerABIAlignment()); Streamer.EmitSymbolAttribute(Label, MCSA_ELF_TypeObject); const MCExpr *E = MCConstantExpr::Create(Size, getContext()); Streamer.EmitELFSize(Label, E); @@ -166,9 +163,7 @@ static unsigned getELFSectionType(StringRef Name, SectionKind K) { return ELF::SHT_PROGBITS; } - -static unsigned -getELFSectionFlags(SectionKind K) { +static unsigned getELFSectionFlags(SectionKind K) { unsigned Flags = 0; if (!K.isMetadata()) @@ -183,9 +178,7 @@ getELFSectionFlags(SectionKind K) { if (K.isThreadLocal()) Flags |= ELF::SHF_TLS; - // K.isMergeableConst() is left out to honour PR4650 - if (K.isMergeableCString() || K.isMergeableConst4() || - K.isMergeableConst8() || K.isMergeableConst16()) + if (K.isMergeableCString() || K.isMergeableConst()) Flags |= ELF::SHF_MERGE; if (K.isMergeableCString()) @@ -206,7 +199,7 @@ static const Comdat *getELFComdat(const GlobalValue *GV) { return C; } -const MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal( +MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal( const GlobalValue *GV, SectionKind Kind, Mangler &Mang, const TargetMachine &TM) const { StringRef SectionName = GV->getSection(); @@ -222,126 +215,141 @@ const MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal( } return getContext().getELFSection(SectionName, getELFSectionType(SectionName, Kind), Flags, - Kind, /*EntrySize=*/0, Group); + /*EntrySize=*/0, Group); } -/// getSectionPrefixForGlobal - Return the section prefix name used by options -/// FunctionsSections and DataSections. +/// Return the section prefix name used by options FunctionsSections and +/// DataSections. static StringRef getSectionPrefixForGlobal(SectionKind Kind) { - if (Kind.isText()) return ".text."; - if (Kind.isReadOnly()) return ".rodata."; - if (Kind.isBSS()) return ".bss."; - - if (Kind.isThreadData()) return ".tdata."; - if (Kind.isThreadBSS()) return ".tbss."; - - if (Kind.isDataNoRel()) return ".data."; - if (Kind.isDataRelLocal()) return ".data.rel.local."; - if (Kind.isDataRel()) return ".data.rel."; - if (Kind.isReadOnlyWithRelLocal()) return ".data.rel.ro.local."; - + if (Kind.isText()) + return ".text"; + if (Kind.isReadOnly()) + return ".rodata"; + if (Kind.isBSS()) + return ".bss"; + if (Kind.isThreadData()) + return ".tdata"; + if (Kind.isThreadBSS()) + return ".tbss"; + if (Kind.isDataNoRel()) + return ".data"; + if (Kind.isDataRelLocal()) + return ".data.rel.local"; + if (Kind.isDataRel()) + return ".data.rel"; + if (Kind.isReadOnlyWithRelLocal()) + return ".data.rel.ro.local"; assert(Kind.isReadOnlyWithRel() && "Unknown section kind"); - return ".data.rel.ro."; + return ".data.rel.ro"; } -const MCSection *TargetLoweringObjectFileELF:: -SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, - Mangler &Mang, const TargetMachine &TM) const { - // If we have -ffunction-section or -fdata-section then we should emit the - // global value to a uniqued section specifically for it. - bool EmitUniquedSection; - if (Kind.isText()) - EmitUniquedSection = TM.getFunctionSections(); - else - EmitUniquedSection = TM.getDataSections(); - - // If this global is linkonce/weak and the target handles this by emitting it - // into a 'uniqued' section name, create and return the section now. - if ((GV->isWeakForLinker() || EmitUniquedSection || GV->hasComdat()) && - !Kind.isCommon()) { - StringRef Prefix = getSectionPrefixForGlobal(Kind); - - SmallString<128> Name(Prefix); - TM.getNameWithPrefix(Name, GV, Mang, true); - - StringRef Group = ""; - unsigned Flags = getELFSectionFlags(Kind); - if (GV->isWeakForLinker() || GV->hasComdat()) { - if (const Comdat *C = getELFComdat(GV)) - Group = C->getName(); - else - Group = Name.substr(Prefix.size()); - Flags |= ELF::SHF_GROUP; +static MCSectionELF * +selectELFSectionForGlobal(MCContext &Ctx, const GlobalValue *GV, + SectionKind Kind, Mangler &Mang, + const TargetMachine &TM, bool EmitUniqueSection, + unsigned Flags, unsigned *NextUniqueID) { + unsigned EntrySize = 0; + if (Kind.isMergeableCString()) { + if (Kind.isMergeable2ByteCString()) { + EntrySize = 2; + } else if (Kind.isMergeable4ByteCString()) { + EntrySize = 4; + } else { + EntrySize = 1; + assert(Kind.isMergeable1ByteCString() && "unknown string width"); + } + } else if (Kind.isMergeableConst()) { + if (Kind.isMergeableConst4()) { + EntrySize = 4; + } else if (Kind.isMergeableConst8()) { + EntrySize = 8; + } else { + assert(Kind.isMergeableConst16() && "unknown data width"); + EntrySize = 16; } - - return getContext().getELFSection(Name.str(), - getELFSectionType(Name.str(), Kind), - Flags, Kind, 0, Group); } - if (Kind.isText()) return TextSection; - - if (Kind.isMergeable1ByteCString() || - Kind.isMergeable2ByteCString() || - Kind.isMergeable4ByteCString()) { + StringRef Group = ""; + if (const Comdat *C = getELFComdat(GV)) { + Flags |= ELF::SHF_GROUP; + Group = C->getName(); + } + bool UniqueSectionNames = TM.getUniqueSectionNames(); + SmallString<128> Name; + if (Kind.isMergeableCString()) { // We also need alignment here. // FIXME: this is getting the alignment of the character, not the // alignment of the global! unsigned Align = - TM.getSubtargetImpl()->getDataLayout()->getPreferredAlignment( - cast<GlobalVariable>(GV)); - - const char *SizeSpec = ".rodata.str1."; - if (Kind.isMergeable2ByteCString()) - SizeSpec = ".rodata.str2."; - else if (Kind.isMergeable4ByteCString()) - SizeSpec = ".rodata.str4."; - else - assert(Kind.isMergeable1ByteCString() && "unknown string width"); - + TM.getDataLayout()->getPreferredAlignment(cast<GlobalVariable>(GV)); - std::string Name = SizeSpec + utostr(Align); - return getContext().getELFSection(Name, ELF::SHT_PROGBITS, - ELF::SHF_ALLOC | - ELF::SHF_MERGE | - ELF::SHF_STRINGS, - Kind); + std::string SizeSpec = ".rodata.str" + utostr(EntrySize) + "."; + Name = SizeSpec + utostr(Align); + } else if (Kind.isMergeableConst()) { + Name = ".rodata.cst"; + Name += utostr(EntrySize); + } else { + Name = getSectionPrefixForGlobal(Kind); } - if (Kind.isMergeableConst()) { - if (Kind.isMergeableConst4() && MergeableConst4Section) - return MergeableConst4Section; - if (Kind.isMergeableConst8() && MergeableConst8Section) - return MergeableConst8Section; - if (Kind.isMergeableConst16() && MergeableConst16Section) - return MergeableConst16Section; - return ReadOnlySection; // .const + if (EmitUniqueSection && UniqueSectionNames) { + Name.push_back('.'); + TM.getNameWithPrefix(Name, GV, Mang, true); + } + unsigned UniqueID = ~0; + if (EmitUniqueSection && !UniqueSectionNames) { + UniqueID = *NextUniqueID; + (*NextUniqueID)++; } + return Ctx.getELFSection(Name, getELFSectionType(Name, Kind), Flags, + EntrySize, Group, UniqueID); +} - if (Kind.isReadOnly()) return ReadOnlySection; +MCSection *TargetLoweringObjectFileELF::SelectSectionForGlobal( + const GlobalValue *GV, SectionKind Kind, Mangler &Mang, + const TargetMachine &TM) const { + unsigned Flags = getELFSectionFlags(Kind); - if (Kind.isThreadData()) return TLSDataSection; - if (Kind.isThreadBSS()) return TLSBSSSection; + // If we have -ffunction-section or -fdata-section then we should emit the + // global value to a uniqued section specifically for it. + bool EmitUniqueSection = false; + if (!(Flags & ELF::SHF_MERGE) && !Kind.isCommon()) { + if (Kind.isText()) + EmitUniqueSection = TM.getFunctionSections(); + else + EmitUniqueSection = TM.getDataSections(); + } + EmitUniqueSection |= GV->hasComdat(); - // Note: we claim that common symbols are put in BSSSection, but they are - // really emitted with the magic .comm directive, which creates a symbol table - // entry but not a section. - if (Kind.isBSS() || Kind.isCommon()) return BSSSection; + return selectELFSectionForGlobal(getContext(), GV, Kind, Mang, TM, + EmitUniqueSection, Flags, &NextUniqueID); +} - if (Kind.isDataNoRel()) return DataSection; - if (Kind.isDataRelLocal()) return DataRelLocalSection; - if (Kind.isDataRel()) return DataRelSection; - if (Kind.isReadOnlyWithRelLocal()) return DataRelROLocalSection; +MCSection *TargetLoweringObjectFileELF::getSectionForJumpTable( + const Function &F, Mangler &Mang, const TargetMachine &TM) const { + // If the function can be removed, produce a unique section so that + // the table doesn't prevent the removal. + const Comdat *C = F.getComdat(); + bool EmitUniqueSection = TM.getFunctionSections() || C; + if (!EmitUniqueSection) + return ReadOnlySection; - assert(Kind.isReadOnlyWithRel() && "Unknown section kind"); - return DataRelROSection; + return selectELFSectionForGlobal(getContext(), &F, SectionKind::getReadOnly(), + Mang, TM, EmitUniqueSection, ELF::SHF_ALLOC, + &NextUniqueID); +} + +bool TargetLoweringObjectFileELF::shouldPutJumpTableInFunctionSection( + bool UsesLabelDifference, const Function &F) const { + // We can always create relative relocations, so use another section + // that can be marked non-executable. + return false; } -/// getSectionForConstant - Given a mergeable constant with the -/// specified size and relocation information, return a section that it -/// should be placed in. -const MCSection * +/// Given a mergeable constant with the specified size and relocation +/// information, return a section that it should be placed in. +MCSection * TargetLoweringObjectFileELF::getSectionForConstant(SectionKind Kind, const Constant *C) const { if (Kind.isMergeableConst4() && MergeableConst4Section) @@ -358,15 +366,12 @@ TargetLoweringObjectFileELF::getSectionForConstant(SectionKind Kind, return DataRelROSection; } -static const MCSectionELF *getStaticStructorSection(MCContext &Ctx, - bool UseInitArray, - bool IsCtor, - unsigned Priority, - const MCSymbol *KeySym) { +static MCSectionELF *getStaticStructorSection(MCContext &Ctx, bool UseInitArray, + bool IsCtor, unsigned Priority, + const MCSymbol *KeySym) { std::string Name; unsigned Type; unsigned Flags = ELF::SHF_ALLOC | ELF::SHF_WRITE; - SectionKind Kind = SectionKind::getDataRel(); StringRef COMDAT = KeySym ? KeySym->getName() : ""; if (KeySym) @@ -398,16 +403,16 @@ static const MCSectionELF *getStaticStructorSection(MCContext &Ctx, Type = ELF::SHT_PROGBITS; } - return Ctx.getELFSection(Name, Type, Flags, Kind, 0, COMDAT); + return Ctx.getELFSection(Name, Type, Flags, 0, COMDAT); } -const MCSection *TargetLoweringObjectFileELF::getStaticCtorSection( +MCSection *TargetLoweringObjectFileELF::getStaticCtorSection( unsigned Priority, const MCSymbol *KeySym) const { return getStaticStructorSection(getContext(), UseInitArray, true, Priority, KeySym); } -const MCSection *TargetLoweringObjectFileELF::getStaticDtorSection( +MCSection *TargetLoweringObjectFileELF::getStaticDtorSection( unsigned Priority, const MCSymbol *KeySym) const { return getStaticStructorSection(getContext(), UseInitArray, false, Priority, KeySym); @@ -419,22 +424,21 @@ TargetLoweringObjectFileELF::InitializeELF(bool UseInitArray_) { if (!UseInitArray) return; - StaticCtorSection = - getContext().getELFSection(".init_array", ELF::SHT_INIT_ARRAY, - ELF::SHF_WRITE | - ELF::SHF_ALLOC, - SectionKind::getDataRel()); - StaticDtorSection = - getContext().getELFSection(".fini_array", ELF::SHT_FINI_ARRAY, - ELF::SHF_WRITE | - ELF::SHF_ALLOC, - SectionKind::getDataRel()); + StaticCtorSection = getContext().getELFSection( + ".init_array", ELF::SHT_INIT_ARRAY, ELF::SHF_WRITE | ELF::SHF_ALLOC); + StaticDtorSection = getContext().getELFSection( + ".fini_array", ELF::SHT_FINI_ARRAY, ELF::SHF_WRITE | ELF::SHF_ALLOC); } //===----------------------------------------------------------------------===// // MachO //===----------------------------------------------------------------------===// +TargetLoweringObjectFileMachO::TargetLoweringObjectFileMachO() + : TargetLoweringObjectFile() { + SupportIndirectSymViaGOTPCRel = true; +} + /// getDepLibFromLinkerOpt - Extract the dependent library name from a linker /// option string. Returns StringRef() if the option does not specify a library. StringRef TargetLoweringObjectFileMachO:: @@ -511,12 +515,11 @@ emitModuleFlags(MCStreamer &Streamer, ErrorCode + "."); // Get the section. - const MCSectionMachO *S = - getContext().getMachOSection(Segment, Section, TAA, StubSize, - SectionKind::getDataNoRel()); + MCSectionMachO *S = getContext().getMachOSection( + Segment, Section, TAA, StubSize, SectionKind::getDataNoRel()); Streamer.SwitchSection(S); Streamer.EmitLabel(getContext(). - GetOrCreateSymbol(StringRef("L_OBJC_IMAGE_INFO"))); + getOrCreateSymbol(StringRef("L_OBJC_IMAGE_INFO"))); Streamer.EmitIntValue(VersionVal, 4); Streamer.EmitIntValue(ImageInfoFlags, 4); Streamer.AddBlankLine(); @@ -531,7 +534,7 @@ static void checkMachOComdat(const GlobalValue *GV) { "' cannot be lowered."); } -const MCSection *TargetLoweringObjectFileMachO::getExplicitSectionGlobal( +MCSection *TargetLoweringObjectFileMachO::getExplicitSectionGlobal( const GlobalValue *GV, SectionKind Kind, Mangler &Mang, const TargetMachine &TM) const { // Parse the section specifier and create it if valid. @@ -552,8 +555,8 @@ const MCSection *TargetLoweringObjectFileMachO::getExplicitSectionGlobal( } // Get the section. - const MCSectionMachO *S = - getContext().getMachOSection(Segment, Section, TAA, StubSize, Kind); + MCSectionMachO *S = + getContext().getMachOSection(Segment, Section, TAA, StubSize, Kind); // If TAA wasn't set by ParseSectionSpecifier() above, // use the value returned by getMachOSection() as a default. @@ -573,9 +576,9 @@ const MCSection *TargetLoweringObjectFileMachO::getExplicitSectionGlobal( return S; } -const MCSection *TargetLoweringObjectFileMachO:: -SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, - Mangler &Mang, const TargetMachine &TM) const { +MCSection *TargetLoweringObjectFileMachO::SelectSectionForGlobal( + const GlobalValue *GV, SectionKind Kind, Mangler &Mang, + const TargetMachine &TM) const { checkMachOComdat(GV); // Handle thread local data. @@ -595,16 +598,14 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, // FIXME: Alignment check should be handled by section classifier. if (Kind.isMergeable1ByteCString() && - TM.getSubtargetImpl()->getDataLayout()->getPreferredAlignment( - cast<GlobalVariable>(GV)) < 32) + TM.getDataLayout()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32) return CStringSection; // Do not put 16-bit arrays in the UString section if they have an // externally visible label, this runs into issues with certain linker // versions. if (Kind.isMergeable2ByteCString() && !GV->hasExternalLinkage() && - TM.getSubtargetImpl()->getDataLayout()->getPreferredAlignment( - cast<GlobalVariable>(GV)) < 32) + TM.getDataLayout()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32) return UStringSection; // With MachO only variables whose corresponding symbol starts with 'l' or @@ -642,7 +643,7 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, return DataSection; } -const MCSection * +MCSection * TargetLoweringObjectFileMachO::getSectionForConstant(SectionKind Kind, const Constant *C) const { // If this constant requires a relocation, we have to put it in the data @@ -711,6 +712,66 @@ MCSymbol *TargetLoweringObjectFileMachO::getCFIPersonalitySymbol( return SSym; } +const MCExpr *TargetLoweringObjectFileMachO::getIndirectSymViaGOTPCRel( + const MCSymbol *Sym, const MCValue &MV, int64_t Offset, + MachineModuleInfo *MMI, MCStreamer &Streamer) const { + // Although MachO 32-bit targets do not explictly have a GOTPCREL relocation + // as 64-bit do, we replace the GOT equivalent by accessing the final symbol + // through a non_lazy_ptr stub instead. One advantage is that it allows the + // computation of deltas to final external symbols. Example: + // + // _extgotequiv: + // .long _extfoo + // + // _delta: + // .long _extgotequiv-_delta + // + // is transformed to: + // + // _delta: + // .long L_extfoo$non_lazy_ptr-(_delta+0) + // + // .section __IMPORT,__pointers,non_lazy_symbol_pointers + // L_extfoo$non_lazy_ptr: + // .indirect_symbol _extfoo + // .long 0 + // + MachineModuleInfoMachO &MachOMMI = + MMI->getObjFileInfo<MachineModuleInfoMachO>(); + MCContext &Ctx = getContext(); + + // The offset must consider the original displacement from the base symbol + // since 32-bit targets don't have a GOTPCREL to fold the PC displacement. + Offset = -MV.getConstant(); + const MCSymbol *BaseSym = &MV.getSymB()->getSymbol(); + + // Access the final symbol via sym$non_lazy_ptr and generate the appropriated + // non_lazy_ptr stubs. + SmallString<128> Name; + StringRef Suffix = "$non_lazy_ptr"; + Name += DL->getPrivateGlobalPrefix(); + Name += Sym->getName(); + Name += Suffix; + MCSymbol *Stub = Ctx.getOrCreateSymbol(Name); + + MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(Stub); + if (!StubSym.getPointer()) + StubSym = MachineModuleInfoImpl:: + StubValueTy(const_cast<MCSymbol *>(Sym), true /* access indirectly */); + + const MCExpr *BSymExpr = + MCSymbolRefExpr::Create(BaseSym, MCSymbolRefExpr::VK_None, Ctx); + const MCExpr *LHS = + MCSymbolRefExpr::Create(Stub, MCSymbolRefExpr::VK_None, Ctx); + + if (!Offset) + return MCBinaryExpr::CreateSub(LHS, BSymExpr, Ctx); + + const MCExpr *RHS = + MCBinaryExpr::CreateAdd(BSymExpr, MCConstantExpr::Create(Offset, Ctx), Ctx); + return MCBinaryExpr::CreateSub(LHS, RHS, Ctx); +} + //===----------------------------------------------------------------------===// // COFF //===----------------------------------------------------------------------===// @@ -794,14 +855,14 @@ static int getSelectionForCOFF(const GlobalValue *GV) { return 0; } -const MCSection *TargetLoweringObjectFileCOFF::getExplicitSectionGlobal( +MCSection *TargetLoweringObjectFileCOFF::getExplicitSectionGlobal( const GlobalValue *GV, SectionKind Kind, Mangler &Mang, const TargetMachine &TM) const { int Selection = 0; unsigned Characteristics = getCOFFSectionFlags(Kind); StringRef Name = GV->getSection(); StringRef COMDATSymName = ""; - if ((GV->isWeakForLinker() || GV->hasComdat()) && !Kind.isCommon()) { + if (GV->hasComdat()) { Selection = getSelectionForCOFF(GV); const GlobalValue *ComdatGV; if (Selection == COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE) @@ -836,10 +897,9 @@ static const char *getCOFFSectionNameForUniqueGlobal(SectionKind Kind) { return ".data"; } - -const MCSection *TargetLoweringObjectFileCOFF:: -SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, - Mangler &Mang, const TargetMachine &TM) const { +MCSection *TargetLoweringObjectFileCOFF::SelectSectionForGlobal( + const GlobalValue *GV, SectionKind Kind, Mangler &Mang, + const TargetMachine &TM) const { // If we have -ffunction-sections then we should emit the global value to a // uniqued section specifically for it. bool EmitUniquedSection; @@ -848,12 +908,7 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, else EmitUniquedSection = TM.getDataSections(); - // If this global is linkonce/weak and the target handles this by emitting it - // into a 'uniqued' section name, create and return the section now. - // Section names depend on the name of the symbol which is not feasible if the - // symbol has private linkage. - if ((GV->isWeakForLinker() || EmitUniquedSection || GV->hasComdat()) && - !Kind.isCommon()) { + if ((EmitUniquedSection && !Kind.isCommon()) || GV->hasComdat()) { const char *Name = getCOFFSectionNameForUniqueGlobal(Kind); unsigned Characteristics = getCOFFSectionFlags(Kind); @@ -872,6 +927,11 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, StringRef COMDATSymName = Sym->getName(); return getContext().getCOFFSection(Name, Characteristics, Kind, COMDATSymName, Selection); + } else { + SmallString<256> TmpData; + getNameWithPrefix(TmpData, GV, /*CannotUsePrivateLabel=*/true, Mang, TM); + return getContext().getCOFFSection(Name, Characteristics, Kind, TmpData, + Selection); } } @@ -893,6 +953,42 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, return DataSection; } +void TargetLoweringObjectFileCOFF::getNameWithPrefix( + SmallVectorImpl<char> &OutName, const GlobalValue *GV, + bool CannotUsePrivateLabel, Mangler &Mang, const TargetMachine &TM) const { + if (GV->hasPrivateLinkage() && + ((isa<Function>(GV) && TM.getFunctionSections()) || + (isa<GlobalVariable>(GV) && TM.getDataSections()))) + CannotUsePrivateLabel = true; + + Mang.getNameWithPrefix(OutName, GV, CannotUsePrivateLabel); +} + +MCSection *TargetLoweringObjectFileCOFF::getSectionForJumpTable( + const Function &F, Mangler &Mang, const TargetMachine &TM) const { + // If the function can be removed, produce a unique section so that + // the table doesn't prevent the removal. + const Comdat *C = F.getComdat(); + bool EmitUniqueSection = TM.getFunctionSections() || C; + if (!EmitUniqueSection) + return ReadOnlySection; + + // FIXME: we should produce a symbol for F instead. + if (F.hasPrivateLinkage()) + return ReadOnlySection; + + MCSymbol *Sym = TM.getSymbol(&F, Mang); + StringRef COMDATSymName = Sym->getName(); + + SectionKind Kind = SectionKind::getReadOnly(); + const char *Name = getCOFFSectionNameForUniqueGlobal(Kind); + unsigned Characteristics = getCOFFSectionFlags(Kind); + Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT; + + return getContext().getCOFFSection(Name, Characteristics, Kind, COMDATSymName, + COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE); +} + StringRef TargetLoweringObjectFileCOFF:: getDepLibFromLinkerOpt(StringRef LinkerOption) const { const char *LibCmd = "/DEFAULTLIB:"; @@ -923,38 +1019,27 @@ emitModuleFlags(MCStreamer &Streamer, // Emit the linker options to the linker .drectve section. According to the // spec, this section is a space-separated string containing flags for linker. - const MCSection *Sec = getDrectveSection(); + MCSection *Sec = getDrectveSection(); Streamer.SwitchSection(Sec); for (unsigned i = 0, e = LinkerOptions->getNumOperands(); i != e; ++i) { MDNode *MDOptions = cast<MDNode>(LinkerOptions->getOperand(i)); for (unsigned ii = 0, ie = MDOptions->getNumOperands(); ii != ie; ++ii) { MDString *MDOption = cast<MDString>(MDOptions->getOperand(ii)); - StringRef Op = MDOption->getString(); // Lead with a space for consistency with our dllexport implementation. - std::string Escaped(" "); - if (!Op.startswith("\"") && (Op.find(" ") != StringRef::npos)) { - // The PE-COFF spec says args with spaces must be quoted. It doesn't say - // how to escape quotes, but it probably uses this algorithm: - // http://msdn.microsoft.com/en-us/library/17w5ykft(v=vs.85).aspx - // FIXME: Reuse escaping code from Support/Windows/Program.inc - Escaped.push_back('\"'); - Escaped.append(Op); - Escaped.push_back('\"'); - } else { - Escaped.append(Op); - } - Streamer.EmitBytes(Escaped); + std::string Directive(" "); + Directive.append(MDOption->getString()); + Streamer.EmitBytes(Directive); } } } -const MCSection *TargetLoweringObjectFileCOFF::getStaticCtorSection( +MCSection *TargetLoweringObjectFileCOFF::getStaticCtorSection( unsigned Priority, const MCSymbol *KeySym) const { return getContext().getAssociativeCOFFSection( cast<MCSectionCOFF>(StaticCtorSection), KeySym); } -const MCSection *TargetLoweringObjectFileCOFF::getStaticDtorSection( +MCSection *TargetLoweringObjectFileCOFF::getStaticDtorSection( unsigned Priority, const MCSymbol *KeySym) const { return getContext().getAssociativeCOFFSection( cast<MCSectionCOFF>(StaticDtorSection), KeySym); diff --git a/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp b/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp index 618d903..f4926cb 100644 --- a/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp +++ b/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp @@ -12,23 +12,26 @@ //===----------------------------------------------------------------------===// #include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; /// DisableFramePointerElim - This returns true if frame pointer elimination /// optimization should be disabled for the given machine function. bool TargetOptions::DisableFramePointerElim(const MachineFunction &MF) const { - // Check to see if we should eliminate non-leaf frame pointers and then - // check to see if we should eliminate all frame pointers. - if (MF.getFunction()->hasFnAttribute("no-frame-pointer-elim-non-leaf") && - !NoFramePointerElim) { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - return MFI->hasCalls(); - } + // Check to see if we should eliminate all frame pointers. + if (MF.getSubtarget().getFrameLowering()->noFramePointerElim(MF)) + return true; - return NoFramePointerElim; + // Check to see if we should eliminate non-leaf frame pointers. + if (MF.getFunction()->hasFnAttribute("no-frame-pointer-elim-non-leaf")) + return MF.getFrameInfo()->hasCalls(); + + return false; } /// LessPreciseFPMAD - This flag return true when -enable-fp-mad option @@ -51,10 +54,3 @@ bool TargetOptions::HonorSignDependentRoundingFPMath() const { StringRef TargetOptions::getTrapFunctionName() const { return TrapFuncName; } - -/// getCFIFuncName - If this returns a non-empty string, then it is the name of -/// the function that gets called on CFI violations in CFI non-enforcing mode -/// (!TargetOptions::CFIEnforcing). -StringRef TargetOptions::getCFIFuncName() const { - return CFIFuncName; -} diff --git a/contrib/llvm/lib/CodeGen/TargetSchedule.cpp b/contrib/llvm/lib/CodeGen/TargetSchedule.cpp index ef2dab1..299380d 100644 --- a/contrib/llvm/lib/CodeGen/TargetSchedule.cpp +++ b/contrib/llvm/lib/CodeGen/TargetSchedule.cpp @@ -224,26 +224,29 @@ unsigned TargetSchedModel::computeOperandLatency( return DefMI->isTransient() ? 0 : TII->defaultDefLatency(SchedModel, DefMI); } +unsigned +TargetSchedModel::computeInstrLatency(const MCSchedClassDesc &SCDesc) const { + unsigned Latency = 0; + for (unsigned DefIdx = 0, DefEnd = SCDesc.NumWriteLatencyEntries; + DefIdx != DefEnd; ++DefIdx) { + // Lookup the definition's write latency in SubtargetInfo. + const MCWriteLatencyEntry *WLEntry = + STI->getWriteLatencyEntry(&SCDesc, DefIdx); + Latency = std::max(Latency, capLatency(WLEntry->Cycles)); + } + return Latency; +} + unsigned TargetSchedModel::computeInstrLatency(unsigned Opcode) const { assert(hasInstrSchedModel() && "Only call this function with a SchedModel"); unsigned SCIdx = TII->get(Opcode).getSchedClass(); const MCSchedClassDesc *SCDesc = SchedModel.getSchedClassDesc(SCIdx); - unsigned Latency = 0; - if (SCDesc->isValid() && !SCDesc->isVariant()) { - for (unsigned DefIdx = 0, DefEnd = SCDesc->NumWriteLatencyEntries; - DefIdx != DefEnd; ++DefIdx) { - // Lookup the definition's write latency in SubtargetInfo. - const MCWriteLatencyEntry *WLEntry = - STI->getWriteLatencyEntry(SCDesc, DefIdx); - Latency = std::max(Latency, capLatency(WLEntry->Cycles)); - } - return Latency; - } + if (SCDesc->isValid() && !SCDesc->isVariant()) + return computeInstrLatency(*SCDesc); - assert(Latency && "No MI sched latency"); - return 0; + llvm_unreachable("No MI sched latency"); } unsigned @@ -257,17 +260,8 @@ TargetSchedModel::computeInstrLatency(const MachineInstr *MI, if (hasInstrSchedModel()) { const MCSchedClassDesc *SCDesc = resolveSchedClass(MI); - if (SCDesc->isValid()) { - unsigned Latency = 0; - for (unsigned DefIdx = 0, DefEnd = SCDesc->NumWriteLatencyEntries; - DefIdx != DefEnd; ++DefIdx) { - // Lookup the definition's write latency in SubtargetInfo. - const MCWriteLatencyEntry *WLEntry = - STI->getWriteLatencyEntry(SCDesc, DefIdx); - Latency = std::max(Latency, capLatency(WLEntry->Cycles)); - } - return Latency; - } + if (SCDesc->isValid()) + return computeInstrLatency(*SCDesc); } return TII->defaultDefLatency(SchedModel, MI); } diff --git a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp index e218a83..6bceccc 100644 --- a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -45,6 +45,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -102,6 +103,8 @@ class TwoAddressInstructionPass : public MachineFunctionPass { bool sink3AddrInstruction(MachineInstr *MI, unsigned Reg, MachineBasicBlock::iterator OldPos); + bool isRevCopyChain(unsigned FromReg, unsigned ToReg, int Maxlen); + bool noUseAfterLastDef(unsigned Reg, unsigned Dist, unsigned &LastDef); bool isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC, @@ -186,7 +189,7 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg, // Check if it's safe to move this instruction. bool SeenStore = true; // Be conservative. - if (!MI->isSafeToMove(TII, AA, SeenStore)) + if (!MI->isSafeToMove(AA, SeenStore)) return false; unsigned DefReg = 0; @@ -309,6 +312,45 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg, return true; } +/// getSingleDef -- return the MachineInstr* if it is the single def of the Reg +/// in current BB. +static MachineInstr *getSingleDef(unsigned Reg, MachineBasicBlock *BB, + const MachineRegisterInfo *MRI) { + MachineInstr *Ret = nullptr; + for (MachineInstr &DefMI : MRI->def_instructions(Reg)) { + if (DefMI.getParent() != BB || DefMI.isDebugValue()) + continue; + if (!Ret) + Ret = &DefMI; + else if (Ret != &DefMI) + return nullptr; + } + return Ret; +} + +/// Check if there is a reversed copy chain from FromReg to ToReg: +/// %Tmp1 = copy %Tmp2; +/// %FromReg = copy %Tmp1; +/// %ToReg = add %FromReg ... +/// %Tmp2 = copy %ToReg; +/// MaxLen specifies the maximum length of the copy chain the func +/// can walk through. +bool TwoAddressInstructionPass::isRevCopyChain(unsigned FromReg, unsigned ToReg, + int Maxlen) { + unsigned TmpReg = FromReg; + for (int i = 0; i < Maxlen; i++) { + MachineInstr *Def = getSingleDef(TmpReg, MBB, MRI); + if (!Def || !Def->isCopy()) + return false; + + TmpReg = Def->getOperand(1).getReg(); + + if (TmpReg == ToReg) + return true; + } + return false; +} + /// noUseAfterLastDef - Return true if there are no intervening uses between the /// last instruction in the MBB that defines the specified register and the /// two-address instruction which is being processed. It also returns the last @@ -574,6 +616,27 @@ isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC, if (!noUseAfterLastDef(regB, Dist, LastDefB)) return true; + // Look for situation like this: + // %reg101 = MOV %reg100 + // %reg102 = ... + // %reg103 = ADD %reg102, %reg101 + // ... = %reg103 ... + // %reg100 = MOV %reg103 + // If there is a reversed copy chain from reg101 to reg103, commute the ADD + // to eliminate an otherwise unavoidable copy. + // FIXME: + // We can extend the logic further: If an pair of operands in an insn has + // been merged, the insn could be regarded as a virtual copy, and the virtual + // copy could also be used to construct a copy chain. + // To more generally minimize register copies, ideally the logic of two addr + // instruction pass should be integrated with register allocation pass where + // interference graph is available. + if (isRevCopyChain(regC, regA, 3)) + return true; + + if (isRevCopyChain(regB, regA, 3)) + return false; + // Since there are no intervening uses for both registers, then commute // if the def of regC is closer. Its live interval is shorter. return LastDefB && LastDefC && LastDefC > LastDefB; @@ -798,7 +861,7 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, return false; bool SeenStore = true; - if (!MI->isSafeToMove(TII, AA, SeenStore)) + if (!MI->isSafeToMove(AA, SeenStore)) return false; if (TII->getInstrLatency(InstrItins, MI) > 1) @@ -985,7 +1048,7 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, return false; bool SeenStore = true; - if (!KillMI->isSafeToMove(TII, AA, SeenStore)) + if (!KillMI->isSafeToMove(AA, SeenStore)) return false; SmallSet<unsigned, 2> Uses; @@ -1515,9 +1578,9 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { MF = &Func; const TargetMachine &TM = MF->getTarget(); MRI = &MF->getRegInfo(); - TII = TM.getSubtargetImpl()->getInstrInfo(); - TRI = TM.getSubtargetImpl()->getRegisterInfo(); - InstrItins = TM.getSubtargetImpl()->getInstrItineraryData(); + TII = MF->getSubtarget().getInstrInfo(); + TRI = MF->getSubtarget().getRegisterInfo(); + InstrItins = MF->getSubtarget().getInstrItineraryData(); LV = getAnalysisIfAvailable<LiveVariables>(); LIS = getAnalysisIfAvailable<LiveIntervals>(); AA = &getAnalysis<AliasAnalysis>(); diff --git a/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp b/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp index 7824f92..d393e10 100644 --- a/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp +++ b/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp @@ -88,7 +88,7 @@ bool UnreachableBlockElim::runOnFunction(Function &F) { DeadBlocks[i]->eraseFromParent(); } - return DeadBlocks.size(); + return !DeadBlocks.empty(); } @@ -204,5 +204,5 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) { F.RenumberBlocks(); - return (DeadBlocks.size() || ModifiedPHI); + return (!DeadBlocks.empty() || ModifiedPHI); } diff --git a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp index 7d3b0ce..9fb1b5b 100644 --- a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp +++ b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp @@ -264,8 +264,7 @@ void VirtRegRewriter::addMBBLiveIns() { if ((SubRegLaneMask & S.LaneMask) == 0) continue; for (unsigned i = 0, e = LiveIn.size(); i != e; ++i) { - if (!LiveIn[i]->isLiveIn(SubReg)) - LiveIn[i]->addLiveIn(SubReg); + LiveIn[i]->addLiveIn(SubReg); } } LiveIn.clear(); @@ -277,16 +276,20 @@ void VirtRegRewriter::addMBBLiveIns() { if (!Indexes->findLiveInMBBs(Seg.start, Seg.end, LiveIn)) continue; for (unsigned i = 0, e = LiveIn.size(); i != e; ++i) - if (!LiveIn[i]->isLiveIn(PhysReg)) - LiveIn[i]->addLiveIn(PhysReg); + LiveIn[i]->addLiveIn(PhysReg); LiveIn.clear(); } } } + + // Sort and unique MBB LiveIns as we've not checked if SubReg/PhysReg were in + // each MBB's LiveIns set before calling addLiveIn on them. + for (MachineBasicBlock &MBB : *MF) + MBB.sortUniqueLiveIns(); } void VirtRegRewriter::rewrite() { - bool NoSubRegLiveness = !MRI->tracksSubRegLiveness(); + bool NoSubRegLiveness = !MRI->subRegLivenessEnabled(); SmallVector<unsigned, 8> SuperDeads; SmallVector<unsigned, 8> SuperDefs; SmallVector<unsigned, 8> SuperKills; diff --git a/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp b/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp new file mode 100644 index 0000000..7246e1c --- /dev/null +++ b/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp @@ -0,0 +1,2482 @@ +//===-- WinEHPrepare - Prepare exception handling for code generation ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass lowers LLVM IR exception handling into something closer to what the +// backend wants for functions using a personality function from a runtime +// provided by MSVC. Functions with other personality functions are left alone +// and may be prepared by other passes. In particular, all supported MSVC +// personality functions require cleanup code to be outlined, and the C++ +// personality requires catch handler code to be outlined. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/Triple.h" +#include "llvm/ADT/TinyPtrVector.h" +#include "llvm/Analysis/LibCallSemantics.h" +#include "llvm/CodeGen/WinEHFuncInfo.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/PromoteMemToReg.h" +#include <memory> + +using namespace llvm; +using namespace llvm::PatternMatch; + +#define DEBUG_TYPE "winehprepare" + +namespace { + +// This map is used to model frame variable usage during outlining, to +// construct a structure type to hold the frame variables in a frame +// allocation block, and to remap the frame variable allocas (including +// spill locations as needed) to GEPs that get the variable from the +// frame allocation structure. +typedef MapVector<Value *, TinyPtrVector<AllocaInst *>> FrameVarInfoMap; + +// TinyPtrVector cannot hold nullptr, so we need our own sentinel that isn't +// quite null. +AllocaInst *getCatchObjectSentinel() { + return static_cast<AllocaInst *>(nullptr) + 1; +} + +typedef SmallSet<BasicBlock *, 4> VisitedBlockSet; + +class LandingPadActions; +class LandingPadMap; + +typedef DenseMap<const BasicBlock *, CatchHandler *> CatchHandlerMapTy; +typedef DenseMap<const BasicBlock *, CleanupHandler *> CleanupHandlerMapTy; + +class WinEHPrepare : public FunctionPass { +public: + static char ID; // Pass identification, replacement for typeid. + WinEHPrepare(const TargetMachine *TM = nullptr) + : FunctionPass(ID) { + if (TM) + TheTriple = Triple(TM->getTargetTriple()); + } + + bool runOnFunction(Function &Fn) override; + + bool doFinalization(Module &M) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override; + + const char *getPassName() const override { + return "Windows exception handling preparation"; + } + +private: + bool prepareExceptionHandlers(Function &F, + SmallVectorImpl<LandingPadInst *> &LPads); + void identifyEHBlocks(Function &F, SmallVectorImpl<LandingPadInst *> &LPads); + void promoteLandingPadValues(LandingPadInst *LPad); + void demoteValuesLiveAcrossHandlers(Function &F, + SmallVectorImpl<LandingPadInst *> &LPads); + void findSEHEHReturnPoints(Function &F, + SetVector<BasicBlock *> &EHReturnBlocks); + void findCXXEHReturnPoints(Function &F, + SetVector<BasicBlock *> &EHReturnBlocks); + void getPossibleReturnTargets(Function *ParentF, Function *HandlerF, + SetVector<BasicBlock*> &Targets); + void completeNestedLandingPad(Function *ParentFn, + LandingPadInst *OutlinedLPad, + const LandingPadInst *OriginalLPad, + FrameVarInfoMap &VarInfo); + Function *createHandlerFunc(Type *RetTy, const Twine &Name, Module *M, + Value *&ParentFP); + bool outlineHandler(ActionHandler *Action, Function *SrcFn, + LandingPadInst *LPad, BasicBlock *StartBB, + FrameVarInfoMap &VarInfo); + void addStubInvokeToHandlerIfNeeded(Function *Handler, Value *PersonalityFn); + + void mapLandingPadBlocks(LandingPadInst *LPad, LandingPadActions &Actions); + CatchHandler *findCatchHandler(BasicBlock *BB, BasicBlock *&NextBB, + VisitedBlockSet &VisitedBlocks); + void findCleanupHandlers(LandingPadActions &Actions, BasicBlock *StartBB, + BasicBlock *EndBB); + + void processSEHCatchHandler(CatchHandler *Handler, BasicBlock *StartBB); + + Triple TheTriple; + + // All fields are reset by runOnFunction. + DominatorTree *DT = nullptr; + EHPersonality Personality = EHPersonality::Unknown; + CatchHandlerMapTy CatchHandlerMap; + CleanupHandlerMapTy CleanupHandlerMap; + DenseMap<const LandingPadInst *, LandingPadMap> LPadMaps; + SmallPtrSet<BasicBlock *, 4> NormalBlocks; + SmallPtrSet<BasicBlock *, 4> EHBlocks; + SetVector<BasicBlock *> EHReturnBlocks; + + // This maps landing pad instructions found in outlined handlers to + // the landing pad instruction in the parent function from which they + // were cloned. The cloned/nested landing pad is used as the key + // because the landing pad may be cloned into multiple handlers. + // This map will be used to add the llvm.eh.actions call to the nested + // landing pads after all handlers have been outlined. + DenseMap<LandingPadInst *, const LandingPadInst *> NestedLPtoOriginalLP; + + // This maps blocks in the parent function which are destinations of + // catch handlers to cloned blocks in (other) outlined handlers. This + // handles the case where a nested landing pads has a catch handler that + // returns to a handler function rather than the parent function. + // The original block is used as the key here because there should only + // ever be one handler function from which the cloned block is not pruned. + // The original block will be pruned from the parent function after all + // handlers have been outlined. This map will be used to adjust the + // return instructions of handlers which return to the block that was + // outlined into a handler. This is done after all handlers have been + // outlined but before the outlined code is pruned from the parent function. + DenseMap<const BasicBlock *, BasicBlock *> LPadTargetBlocks; + + // Map from outlined handler to call to llvm.frameaddress(1). Only used for + // 32-bit EH. + DenseMap<Function *, Value *> HandlerToParentFP; + + AllocaInst *SEHExceptionCodeSlot = nullptr; +}; + +class WinEHFrameVariableMaterializer : public ValueMaterializer { +public: + WinEHFrameVariableMaterializer(Function *OutlinedFn, Value *ParentFP, + FrameVarInfoMap &FrameVarInfo); + ~WinEHFrameVariableMaterializer() override {} + + Value *materializeValueFor(Value *V) override; + + void escapeCatchObject(Value *V); + +private: + FrameVarInfoMap &FrameVarInfo; + IRBuilder<> Builder; +}; + +class LandingPadMap { +public: + LandingPadMap() : OriginLPad(nullptr) {} + void mapLandingPad(const LandingPadInst *LPad); + + bool isInitialized() { return OriginLPad != nullptr; } + + bool isOriginLandingPadBlock(const BasicBlock *BB) const; + bool isLandingPadSpecificInst(const Instruction *Inst) const; + + void remapEHValues(ValueToValueMapTy &VMap, Value *EHPtrValue, + Value *SelectorValue) const; + +private: + const LandingPadInst *OriginLPad; + // We will normally only see one of each of these instructions, but + // if more than one occurs for some reason we can handle that. + TinyPtrVector<const ExtractValueInst *> ExtractedEHPtrs; + TinyPtrVector<const ExtractValueInst *> ExtractedSelectors; +}; + +class WinEHCloningDirectorBase : public CloningDirector { +public: + WinEHCloningDirectorBase(Function *HandlerFn, Value *ParentFP, + FrameVarInfoMap &VarInfo, LandingPadMap &LPadMap) + : Materializer(HandlerFn, ParentFP, VarInfo), + SelectorIDType(Type::getInt32Ty(HandlerFn->getContext())), + Int8PtrType(Type::getInt8PtrTy(HandlerFn->getContext())), + LPadMap(LPadMap), ParentFP(ParentFP) {} + + CloningAction handleInstruction(ValueToValueMapTy &VMap, + const Instruction *Inst, + BasicBlock *NewBB) override; + + virtual CloningAction handleBeginCatch(ValueToValueMapTy &VMap, + const Instruction *Inst, + BasicBlock *NewBB) = 0; + virtual CloningAction handleEndCatch(ValueToValueMapTy &VMap, + const Instruction *Inst, + BasicBlock *NewBB) = 0; + virtual CloningAction handleTypeIdFor(ValueToValueMapTy &VMap, + const Instruction *Inst, + BasicBlock *NewBB) = 0; + virtual CloningAction handleIndirectBr(ValueToValueMapTy &VMap, + const IndirectBrInst *IBr, + BasicBlock *NewBB) = 0; + virtual CloningAction handleInvoke(ValueToValueMapTy &VMap, + const InvokeInst *Invoke, + BasicBlock *NewBB) = 0; + virtual CloningAction handleResume(ValueToValueMapTy &VMap, + const ResumeInst *Resume, + BasicBlock *NewBB) = 0; + virtual CloningAction handleCompare(ValueToValueMapTy &VMap, + const CmpInst *Compare, + BasicBlock *NewBB) = 0; + virtual CloningAction handleLandingPad(ValueToValueMapTy &VMap, + const LandingPadInst *LPad, + BasicBlock *NewBB) = 0; + + ValueMaterializer *getValueMaterializer() override { return &Materializer; } + +protected: + WinEHFrameVariableMaterializer Materializer; + Type *SelectorIDType; + Type *Int8PtrType; + LandingPadMap &LPadMap; + + /// The value representing the parent frame pointer. + Value *ParentFP; +}; + +class WinEHCatchDirector : public WinEHCloningDirectorBase { +public: + WinEHCatchDirector( + Function *CatchFn, Value *ParentFP, Value *Selector, + FrameVarInfoMap &VarInfo, LandingPadMap &LPadMap, + DenseMap<LandingPadInst *, const LandingPadInst *> &NestedLPads, + DominatorTree *DT, SmallPtrSetImpl<BasicBlock *> &EHBlocks) + : WinEHCloningDirectorBase(CatchFn, ParentFP, VarInfo, LPadMap), + CurrentSelector(Selector->stripPointerCasts()), + ExceptionObjectVar(nullptr), NestedLPtoOriginalLP(NestedLPads), + DT(DT), EHBlocks(EHBlocks) {} + + CloningAction handleBeginCatch(ValueToValueMapTy &VMap, + const Instruction *Inst, + BasicBlock *NewBB) override; + CloningAction handleEndCatch(ValueToValueMapTy &VMap, const Instruction *Inst, + BasicBlock *NewBB) override; + CloningAction handleTypeIdFor(ValueToValueMapTy &VMap, + const Instruction *Inst, + BasicBlock *NewBB) override; + CloningAction handleIndirectBr(ValueToValueMapTy &VMap, + const IndirectBrInst *IBr, + BasicBlock *NewBB) override; + CloningAction handleInvoke(ValueToValueMapTy &VMap, const InvokeInst *Invoke, + BasicBlock *NewBB) override; + CloningAction handleResume(ValueToValueMapTy &VMap, const ResumeInst *Resume, + BasicBlock *NewBB) override; + CloningAction handleCompare(ValueToValueMapTy &VMap, const CmpInst *Compare, + BasicBlock *NewBB) override; + CloningAction handleLandingPad(ValueToValueMapTy &VMap, + const LandingPadInst *LPad, + BasicBlock *NewBB) override; + + Value *getExceptionVar() { return ExceptionObjectVar; } + TinyPtrVector<BasicBlock *> &getReturnTargets() { return ReturnTargets; } + +private: + Value *CurrentSelector; + + Value *ExceptionObjectVar; + TinyPtrVector<BasicBlock *> ReturnTargets; + + // This will be a reference to the field of the same name in the WinEHPrepare + // object which instantiates this WinEHCatchDirector object. + DenseMap<LandingPadInst *, const LandingPadInst *> &NestedLPtoOriginalLP; + DominatorTree *DT; + SmallPtrSetImpl<BasicBlock *> &EHBlocks; +}; + +class WinEHCleanupDirector : public WinEHCloningDirectorBase { +public: + WinEHCleanupDirector(Function *CleanupFn, Value *ParentFP, + FrameVarInfoMap &VarInfo, LandingPadMap &LPadMap) + : WinEHCloningDirectorBase(CleanupFn, ParentFP, VarInfo, + LPadMap) {} + + CloningAction handleBeginCatch(ValueToValueMapTy &VMap, + const Instruction *Inst, + BasicBlock *NewBB) override; + CloningAction handleEndCatch(ValueToValueMapTy &VMap, const Instruction *Inst, + BasicBlock *NewBB) override; + CloningAction handleTypeIdFor(ValueToValueMapTy &VMap, + const Instruction *Inst, + BasicBlock *NewBB) override; + CloningAction handleIndirectBr(ValueToValueMapTy &VMap, + const IndirectBrInst *IBr, + BasicBlock *NewBB) override; + CloningAction handleInvoke(ValueToValueMapTy &VMap, const InvokeInst *Invoke, + BasicBlock *NewBB) override; + CloningAction handleResume(ValueToValueMapTy &VMap, const ResumeInst *Resume, + BasicBlock *NewBB) override; + CloningAction handleCompare(ValueToValueMapTy &VMap, const CmpInst *Compare, + BasicBlock *NewBB) override; + CloningAction handleLandingPad(ValueToValueMapTy &VMap, + const LandingPadInst *LPad, + BasicBlock *NewBB) override; +}; + +class LandingPadActions { +public: + LandingPadActions() : HasCleanupHandlers(false) {} + + void insertCatchHandler(CatchHandler *Action) { Actions.push_back(Action); } + void insertCleanupHandler(CleanupHandler *Action) { + Actions.push_back(Action); + HasCleanupHandlers = true; + } + + bool includesCleanup() const { return HasCleanupHandlers; } + + SmallVectorImpl<ActionHandler *> &actions() { return Actions; } + SmallVectorImpl<ActionHandler *>::iterator begin() { return Actions.begin(); } + SmallVectorImpl<ActionHandler *>::iterator end() { return Actions.end(); } + +private: + // Note that this class does not own the ActionHandler objects in this vector. + // The ActionHandlers are owned by the CatchHandlerMap and CleanupHandlerMap + // in the WinEHPrepare class. + SmallVector<ActionHandler *, 4> Actions; + bool HasCleanupHandlers; +}; + +} // end anonymous namespace + +char WinEHPrepare::ID = 0; +INITIALIZE_TM_PASS(WinEHPrepare, "winehprepare", "Prepare Windows exceptions", + false, false) + +FunctionPass *llvm::createWinEHPass(const TargetMachine *TM) { + return new WinEHPrepare(TM); +} + +bool WinEHPrepare::runOnFunction(Function &Fn) { + // No need to prepare outlined handlers. + if (Fn.hasFnAttribute("wineh-parent")) + return false; + + SmallVector<LandingPadInst *, 4> LPads; + SmallVector<ResumeInst *, 4> Resumes; + for (BasicBlock &BB : Fn) { + if (auto *LP = BB.getLandingPadInst()) + LPads.push_back(LP); + if (auto *Resume = dyn_cast<ResumeInst>(BB.getTerminator())) + Resumes.push_back(Resume); + } + + // No need to prepare functions that lack landing pads. + if (LPads.empty()) + return false; + + // Classify the personality to see what kind of preparation we need. + Personality = classifyEHPersonality(LPads.back()->getPersonalityFn()); + + // Do nothing if this is not an MSVC personality. + if (!isMSVCEHPersonality(Personality)) + return false; + + DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + + // If there were any landing pads, prepareExceptionHandlers will make changes. + prepareExceptionHandlers(Fn, LPads); + return true; +} + +bool WinEHPrepare::doFinalization(Module &M) { return false; } + +void WinEHPrepare::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<DominatorTreeWrapperPass>(); +} + +static bool isSelectorDispatch(BasicBlock *BB, BasicBlock *&CatchHandler, + Constant *&Selector, BasicBlock *&NextBB); + +// Finds blocks reachable from the starting set Worklist. Does not follow unwind +// edges or blocks listed in StopPoints. +static void findReachableBlocks(SmallPtrSetImpl<BasicBlock *> &ReachableBBs, + SetVector<BasicBlock *> &Worklist, + const SetVector<BasicBlock *> *StopPoints) { + while (!Worklist.empty()) { + BasicBlock *BB = Worklist.pop_back_val(); + + // Don't cross blocks that we should stop at. + if (StopPoints && StopPoints->count(BB)) + continue; + + if (!ReachableBBs.insert(BB).second) + continue; // Already visited. + + // Don't follow unwind edges of invokes. + if (auto *II = dyn_cast<InvokeInst>(BB->getTerminator())) { + Worklist.insert(II->getNormalDest()); + continue; + } + + // Otherwise, follow all successors. + Worklist.insert(succ_begin(BB), succ_end(BB)); + } +} + +// Attempt to find an instruction where a block can be split before +// a call to llvm.eh.begincatch and its operands. If the block +// begins with the begincatch call or one of its adjacent operands +// the block will not be split. +static Instruction *findBeginCatchSplitPoint(BasicBlock *BB, + IntrinsicInst *II) { + // If the begincatch call is already the first instruction in the block, + // don't split. + Instruction *FirstNonPHI = BB->getFirstNonPHI(); + if (II == FirstNonPHI) + return nullptr; + + // If either operand is in the same basic block as the instruction and + // isn't used by another instruction before the begincatch call, include it + // in the split block. + auto *Op0 = dyn_cast<Instruction>(II->getOperand(0)); + auto *Op1 = dyn_cast<Instruction>(II->getOperand(1)); + + Instruction *I = II->getPrevNode(); + Instruction *LastI = II; + + while (I == Op0 || I == Op1) { + // If the block begins with one of the operands and there are no other + // instructions between the operand and the begincatch call, don't split. + if (I == FirstNonPHI) + return nullptr; + + LastI = I; + I = I->getPrevNode(); + } + + // If there is at least one instruction in the block before the begincatch + // call and its operands, split the block at either the begincatch or + // its operand. + return LastI; +} + +/// Find all points where exceptional control rejoins normal control flow via +/// llvm.eh.endcatch. Add them to the normal bb reachability worklist. +void WinEHPrepare::findCXXEHReturnPoints( + Function &F, SetVector<BasicBlock *> &EHReturnBlocks) { + for (auto BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) { + BasicBlock *BB = BBI; + for (Instruction &I : *BB) { + if (match(&I, m_Intrinsic<Intrinsic::eh_begincatch>())) { + Instruction *SplitPt = + findBeginCatchSplitPoint(BB, cast<IntrinsicInst>(&I)); + if (SplitPt) { + // Split the block before the llvm.eh.begincatch call to allow + // cleanup and catch code to be distinguished later. + // Do not update BBI because we still need to process the + // portion of the block that we are splitting off. + SplitBlock(BB, SplitPt, DT); + break; + } + } + if (match(&I, m_Intrinsic<Intrinsic::eh_endcatch>())) { + // Split the block after the call to llvm.eh.endcatch if there is + // anything other than an unconditional branch, or if the successor + // starts with a phi. + auto *Br = dyn_cast<BranchInst>(I.getNextNode()); + if (!Br || !Br->isUnconditional() || + isa<PHINode>(Br->getSuccessor(0)->begin())) { + DEBUG(dbgs() << "splitting block " << BB->getName() + << " with llvm.eh.endcatch\n"); + BBI = SplitBlock(BB, I.getNextNode(), DT); + } + // The next BB is normal control flow. + EHReturnBlocks.insert(BB->getTerminator()->getSuccessor(0)); + break; + } + } + } +} + +static bool isCatchAllLandingPad(const BasicBlock *BB) { + const LandingPadInst *LP = BB->getLandingPadInst(); + if (!LP) + return false; + unsigned N = LP->getNumClauses(); + return (N > 0 && LP->isCatch(N - 1) && + isa<ConstantPointerNull>(LP->getClause(N - 1))); +} + +/// Find all points where exceptions control rejoins normal control flow via +/// selector dispatch. +void WinEHPrepare::findSEHEHReturnPoints( + Function &F, SetVector<BasicBlock *> &EHReturnBlocks) { + for (auto BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) { + BasicBlock *BB = BBI; + // If the landingpad is a catch-all, treat the whole lpad as if it is + // reachable from normal control flow. + // FIXME: This is imprecise. We need a better way of identifying where a + // catch-all starts and cleanups stop. As far as LLVM is concerned, there + // is no difference. + if (isCatchAllLandingPad(BB)) { + EHReturnBlocks.insert(BB); + continue; + } + + BasicBlock *CatchHandler; + BasicBlock *NextBB; + Constant *Selector; + if (isSelectorDispatch(BB, CatchHandler, Selector, NextBB)) { + // Split the edge if there is a phi node. Returning from EH to a phi node + // is just as impossible as having a phi after an indirectbr. + if (isa<PHINode>(CatchHandler->begin())) { + DEBUG(dbgs() << "splitting EH return edge from " << BB->getName() + << " to " << CatchHandler->getName() << '\n'); + BBI = CatchHandler = SplitCriticalEdge( + BB, std::find(succ_begin(BB), succ_end(BB), CatchHandler)); + } + EHReturnBlocks.insert(CatchHandler); + } + } +} + +void WinEHPrepare::identifyEHBlocks(Function &F, + SmallVectorImpl<LandingPadInst *> &LPads) { + DEBUG(dbgs() << "Demoting values live across exception handlers in function " + << F.getName() << '\n'); + + // Build a set of all non-exceptional blocks and exceptional blocks. + // - Non-exceptional blocks are blocks reachable from the entry block while + // not following invoke unwind edges. + // - Exceptional blocks are blocks reachable from landingpads. Analysis does + // not follow llvm.eh.endcatch blocks, which mark a transition from + // exceptional to normal control. + + if (Personality == EHPersonality::MSVC_CXX) + findCXXEHReturnPoints(F, EHReturnBlocks); + else + findSEHEHReturnPoints(F, EHReturnBlocks); + + DEBUG({ + dbgs() << "identified the following blocks as EH return points:\n"; + for (BasicBlock *BB : EHReturnBlocks) + dbgs() << " " << BB->getName() << '\n'; + }); + +// Join points should not have phis at this point, unless they are a +// landingpad, in which case we will demote their phis later. +#ifndef NDEBUG + for (BasicBlock *BB : EHReturnBlocks) + assert((BB->isLandingPad() || !isa<PHINode>(BB->begin())) && + "non-lpad EH return block has phi"); +#endif + + // Normal blocks are the blocks reachable from the entry block and all EH + // return points. + SetVector<BasicBlock *> Worklist; + Worklist = EHReturnBlocks; + Worklist.insert(&F.getEntryBlock()); + findReachableBlocks(NormalBlocks, Worklist, nullptr); + DEBUG({ + dbgs() << "marked the following blocks as normal:\n"; + for (BasicBlock *BB : NormalBlocks) + dbgs() << " " << BB->getName() << '\n'; + }); + + // Exceptional blocks are the blocks reachable from landingpads that don't + // cross EH return points. + Worklist.clear(); + for (auto *LPI : LPads) + Worklist.insert(LPI->getParent()); + findReachableBlocks(EHBlocks, Worklist, &EHReturnBlocks); + DEBUG({ + dbgs() << "marked the following blocks as exceptional:\n"; + for (BasicBlock *BB : EHBlocks) + dbgs() << " " << BB->getName() << '\n'; + }); + +} + +/// Ensure that all values live into and out of exception handlers are stored +/// in memory. +/// FIXME: This falls down when values are defined in one handler and live into +/// another handler. For example, a cleanup defines a value used only by a +/// catch handler. +void WinEHPrepare::demoteValuesLiveAcrossHandlers( + Function &F, SmallVectorImpl<LandingPadInst *> &LPads) { + DEBUG(dbgs() << "Demoting values live across exception handlers in function " + << F.getName() << '\n'); + + // identifyEHBlocks() should have been called before this function. + assert(!NormalBlocks.empty()); + + SetVector<Argument *> ArgsToDemote; + SetVector<Instruction *> InstrsToDemote; + for (BasicBlock &BB : F) { + bool IsNormalBB = NormalBlocks.count(&BB); + bool IsEHBB = EHBlocks.count(&BB); + if (!IsNormalBB && !IsEHBB) + continue; // Blocks that are neither normal nor EH are unreachable. + for (Instruction &I : BB) { + for (Value *Op : I.operands()) { + // Don't demote static allocas, constants, and labels. + if (isa<Constant>(Op) || isa<BasicBlock>(Op) || isa<InlineAsm>(Op)) + continue; + auto *AI = dyn_cast<AllocaInst>(Op); + if (AI && AI->isStaticAlloca()) + continue; + + if (auto *Arg = dyn_cast<Argument>(Op)) { + if (IsEHBB) { + DEBUG(dbgs() << "Demoting argument " << *Arg + << " used by EH instr: " << I << "\n"); + ArgsToDemote.insert(Arg); + } + continue; + } + + auto *OpI = cast<Instruction>(Op); + BasicBlock *OpBB = OpI->getParent(); + // If a value is produced and consumed in the same BB, we don't need to + // demote it. + if (OpBB == &BB) + continue; + bool IsOpNormalBB = NormalBlocks.count(OpBB); + bool IsOpEHBB = EHBlocks.count(OpBB); + if (IsNormalBB != IsOpNormalBB || IsEHBB != IsOpEHBB) { + DEBUG({ + dbgs() << "Demoting instruction live in-out from EH:\n"; + dbgs() << "Instr: " << *OpI << '\n'; + dbgs() << "User: " << I << '\n'; + }); + InstrsToDemote.insert(OpI); + } + } + } + } + + // Demote values live into and out of handlers. + // FIXME: This demotion is inefficient. We should insert spills at the point + // of definition, insert one reload in each handler that uses the value, and + // insert reloads in the BB used to rejoin normal control flow. + Instruction *AllocaInsertPt = F.getEntryBlock().getFirstInsertionPt(); + for (Instruction *I : InstrsToDemote) + DemoteRegToStack(*I, false, AllocaInsertPt); + + // Demote arguments separately, and only for uses in EH blocks. + for (Argument *Arg : ArgsToDemote) { + auto *Slot = new AllocaInst(Arg->getType(), nullptr, + Arg->getName() + ".reg2mem", AllocaInsertPt); + SmallVector<User *, 4> Users(Arg->user_begin(), Arg->user_end()); + for (User *U : Users) { + auto *I = dyn_cast<Instruction>(U); + if (I && EHBlocks.count(I->getParent())) { + auto *Reload = new LoadInst(Slot, Arg->getName() + ".reload", false, I); + U->replaceUsesOfWith(Arg, Reload); + } + } + new StoreInst(Arg, Slot, AllocaInsertPt); + } + + // Demote landingpad phis, as the landingpad will be removed from the machine + // CFG. + for (LandingPadInst *LPI : LPads) { + BasicBlock *BB = LPI->getParent(); + while (auto *Phi = dyn_cast<PHINode>(BB->begin())) + DemotePHIToStack(Phi, AllocaInsertPt); + } + + DEBUG(dbgs() << "Demoted " << InstrsToDemote.size() << " instructions and " + << ArgsToDemote.size() << " arguments for WinEHPrepare\n\n"); +} + +bool WinEHPrepare::prepareExceptionHandlers( + Function &F, SmallVectorImpl<LandingPadInst *> &LPads) { + // Don't run on functions that are already prepared. + for (LandingPadInst *LPad : LPads) { + BasicBlock *LPadBB = LPad->getParent(); + for (Instruction &Inst : *LPadBB) + if (match(&Inst, m_Intrinsic<Intrinsic::eh_actions>())) + return false; + } + + identifyEHBlocks(F, LPads); + demoteValuesLiveAcrossHandlers(F, LPads); + + // These containers are used to re-map frame variables that are used in + // outlined catch and cleanup handlers. They will be populated as the + // handlers are outlined. + FrameVarInfoMap FrameVarInfo; + + bool HandlersOutlined = false; + + Module *M = F.getParent(); + LLVMContext &Context = M->getContext(); + + // Create a new function to receive the handler contents. + PointerType *Int8PtrType = Type::getInt8PtrTy(Context); + Type *Int32Type = Type::getInt32Ty(Context); + Function *ActionIntrin = Intrinsic::getDeclaration(M, Intrinsic::eh_actions); + + if (isAsynchronousEHPersonality(Personality)) { + // FIXME: Switch the ehptr type to i32 and then switch this. + SEHExceptionCodeSlot = + new AllocaInst(Int8PtrType, nullptr, "seh_exception_code", + F.getEntryBlock().getFirstInsertionPt()); + } + + // In order to handle the case where one outlined catch handler returns + // to a block within another outlined catch handler that would otherwise + // be unreachable, we need to outline the nested landing pad before we + // outline the landing pad which encloses it. + if (!isAsynchronousEHPersonality(Personality)) + std::sort(LPads.begin(), LPads.end(), + [this](LandingPadInst *const &L, LandingPadInst *const &R) { + return DT->properlyDominates(R->getParent(), L->getParent()); + }); + + // This container stores the llvm.eh.recover and IndirectBr instructions + // that make up the body of each landing pad after it has been outlined. + // We need to defer the population of the target list for the indirectbr + // until all landing pads have been outlined so that we can handle the + // case of blocks in the target that are reached only from nested + // landing pads. + SmallVector<std::pair<CallInst*, IndirectBrInst *>, 4> LPadImpls; + + for (LandingPadInst *LPad : LPads) { + // Look for evidence that this landingpad has already been processed. + bool LPadHasActionList = false; + BasicBlock *LPadBB = LPad->getParent(); + for (Instruction &Inst : *LPadBB) { + if (match(&Inst, m_Intrinsic<Intrinsic::eh_actions>())) { + LPadHasActionList = true; + break; + } + } + + // If we've already outlined the handlers for this landingpad, + // there's nothing more to do here. + if (LPadHasActionList) + continue; + + // If either of the values in the aggregate returned by the landing pad is + // extracted and stored to memory, promote the stored value to a register. + promoteLandingPadValues(LPad); + + LandingPadActions Actions; + mapLandingPadBlocks(LPad, Actions); + + HandlersOutlined |= !Actions.actions().empty(); + for (ActionHandler *Action : Actions) { + if (Action->hasBeenProcessed()) + continue; + BasicBlock *StartBB = Action->getStartBlock(); + + // SEH doesn't do any outlining for catches. Instead, pass the handler + // basic block addr to llvm.eh.actions and list the block as a return + // target. + if (isAsynchronousEHPersonality(Personality)) { + if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) { + processSEHCatchHandler(CatchAction, StartBB); + continue; + } + } + + outlineHandler(Action, &F, LPad, StartBB, FrameVarInfo); + } + + // Split the block after the landingpad instruction so that it is just a + // call to llvm.eh.actions followed by indirectbr. + assert(!isa<PHINode>(LPadBB->begin()) && "lpad phi not removed"); + SplitBlock(LPadBB, LPad->getNextNode(), DT); + // Erase the branch inserted by the split so we can insert indirectbr. + LPadBB->getTerminator()->eraseFromParent(); + + // Replace all extracted values with undef and ultimately replace the + // landingpad with undef. + SmallVector<Instruction *, 4> SEHCodeUses; + SmallVector<Instruction *, 4> EHUndefs; + for (User *U : LPad->users()) { + auto *E = dyn_cast<ExtractValueInst>(U); + if (!E) + continue; + assert(E->getNumIndices() == 1 && + "Unexpected operation: extracting both landing pad values"); + unsigned Idx = *E->idx_begin(); + assert((Idx == 0 || Idx == 1) && "unexpected index"); + if (Idx == 0 && isAsynchronousEHPersonality(Personality)) + SEHCodeUses.push_back(E); + else + EHUndefs.push_back(E); + } + for (Instruction *E : EHUndefs) { + E->replaceAllUsesWith(UndefValue::get(E->getType())); + E->eraseFromParent(); + } + LPad->replaceAllUsesWith(UndefValue::get(LPad->getType())); + + // Rewrite uses of the exception pointer to loads of an alloca. + for (Instruction *E : SEHCodeUses) { + SmallVector<Use *, 4> Uses; + for (Use &U : E->uses()) + Uses.push_back(&U); + for (Use *U : Uses) { + auto *I = cast<Instruction>(U->getUser()); + if (isa<ResumeInst>(I)) + continue; + LoadInst *LI; + if (auto *Phi = dyn_cast<PHINode>(I)) + LI = new LoadInst(SEHExceptionCodeSlot, "sehcode", false, + Phi->getIncomingBlock(*U)); + else + LI = new LoadInst(SEHExceptionCodeSlot, "sehcode", false, I); + U->set(LI); + } + E->replaceAllUsesWith(UndefValue::get(E->getType())); + E->eraseFromParent(); + } + + // Add a call to describe the actions for this landing pad. + std::vector<Value *> ActionArgs; + for (ActionHandler *Action : Actions) { + // Action codes from docs are: 0 cleanup, 1 catch. + if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) { + ActionArgs.push_back(ConstantInt::get(Int32Type, 1)); + ActionArgs.push_back(CatchAction->getSelector()); + // Find the frame escape index of the exception object alloca in the + // parent. + int FrameEscapeIdx = -1; + Value *EHObj = const_cast<Value *>(CatchAction->getExceptionVar()); + if (EHObj && !isa<ConstantPointerNull>(EHObj)) { + auto I = FrameVarInfo.find(EHObj); + assert(I != FrameVarInfo.end() && + "failed to map llvm.eh.begincatch var"); + FrameEscapeIdx = std::distance(FrameVarInfo.begin(), I); + } + ActionArgs.push_back(ConstantInt::get(Int32Type, FrameEscapeIdx)); + } else { + ActionArgs.push_back(ConstantInt::get(Int32Type, 0)); + } + ActionArgs.push_back(Action->getHandlerBlockOrFunc()); + } + CallInst *Recover = + CallInst::Create(ActionIntrin, ActionArgs, "recover", LPadBB); + + SetVector<BasicBlock *> ReturnTargets; + for (ActionHandler *Action : Actions) { + if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) { + const auto &CatchTargets = CatchAction->getReturnTargets(); + ReturnTargets.insert(CatchTargets.begin(), CatchTargets.end()); + } + } + IndirectBrInst *Branch = + IndirectBrInst::Create(Recover, ReturnTargets.size(), LPadBB); + for (BasicBlock *Target : ReturnTargets) + Branch->addDestination(Target); + + if (!isAsynchronousEHPersonality(Personality)) { + // C++ EH must repopulate the targets later to handle the case of + // targets that are reached indirectly through nested landing pads. + LPadImpls.push_back(std::make_pair(Recover, Branch)); + } + + } // End for each landingpad + + // If nothing got outlined, there is no more processing to be done. + if (!HandlersOutlined) + return false; + + // Replace any nested landing pad stubs with the correct action handler. + // This must be done before we remove unreachable blocks because it + // cleans up references to outlined blocks that will be deleted. + for (auto &LPadPair : NestedLPtoOriginalLP) + completeNestedLandingPad(&F, LPadPair.first, LPadPair.second, FrameVarInfo); + NestedLPtoOriginalLP.clear(); + + // Update the indirectbr instructions' target lists if necessary. + SetVector<BasicBlock*> CheckedTargets; + SmallVector<std::unique_ptr<ActionHandler>, 4> ActionList; + for (auto &LPadImplPair : LPadImpls) { + IntrinsicInst *Recover = cast<IntrinsicInst>(LPadImplPair.first); + IndirectBrInst *Branch = LPadImplPair.second; + + // Get a list of handlers called by + parseEHActions(Recover, ActionList); + + // Add an indirect branch listing possible successors of the catch handlers. + SetVector<BasicBlock *> ReturnTargets; + for (const auto &Action : ActionList) { + if (auto *CA = dyn_cast<CatchHandler>(Action.get())) { + Function *Handler = cast<Function>(CA->getHandlerBlockOrFunc()); + getPossibleReturnTargets(&F, Handler, ReturnTargets); + } + } + ActionList.clear(); + // Clear any targets we already knew about. + for (unsigned int I = 0, E = Branch->getNumDestinations(); I < E; ++I) { + BasicBlock *KnownTarget = Branch->getDestination(I); + if (ReturnTargets.count(KnownTarget)) + ReturnTargets.remove(KnownTarget); + } + for (BasicBlock *Target : ReturnTargets) { + Branch->addDestination(Target); + // The target may be a block that we excepted to get pruned. + // If it is, it may contain a call to llvm.eh.endcatch. + if (CheckedTargets.insert(Target)) { + // Earlier preparations guarantee that all calls to llvm.eh.endcatch + // will be followed by an unconditional branch. + auto *Br = dyn_cast<BranchInst>(Target->getTerminator()); + if (Br && Br->isUnconditional() && + Br != Target->getFirstNonPHIOrDbgOrLifetime()) { + Instruction *Prev = Br->getPrevNode(); + if (match(cast<Value>(Prev), m_Intrinsic<Intrinsic::eh_endcatch>())) + Prev->eraseFromParent(); + } + } + } + } + LPadImpls.clear(); + + F.addFnAttr("wineh-parent", F.getName()); + + // Delete any blocks that were only used by handlers that were outlined above. + removeUnreachableBlocks(F); + + BasicBlock *Entry = &F.getEntryBlock(); + IRBuilder<> Builder(F.getParent()->getContext()); + Builder.SetInsertPoint(Entry->getFirstInsertionPt()); + + Function *FrameEscapeFn = + Intrinsic::getDeclaration(M, Intrinsic::frameescape); + Function *RecoverFrameFn = + Intrinsic::getDeclaration(M, Intrinsic::framerecover); + SmallVector<Value *, 8> AllocasToEscape; + + // Scan the entry block for an existing call to llvm.frameescape. We need to + // keep escaping those objects. + for (Instruction &I : F.front()) { + auto *II = dyn_cast<IntrinsicInst>(&I); + if (II && II->getIntrinsicID() == Intrinsic::frameescape) { + auto Args = II->arg_operands(); + AllocasToEscape.append(Args.begin(), Args.end()); + II->eraseFromParent(); + break; + } + } + + // Finally, replace all of the temporary allocas for frame variables used in + // the outlined handlers with calls to llvm.framerecover. + for (auto &VarInfoEntry : FrameVarInfo) { + Value *ParentVal = VarInfoEntry.first; + TinyPtrVector<AllocaInst *> &Allocas = VarInfoEntry.second; + AllocaInst *ParentAlloca = cast<AllocaInst>(ParentVal); + + // FIXME: We should try to sink unescaped allocas from the parent frame into + // the child frame. If the alloca is escaped, we have to use the lifetime + // markers to ensure that the alloca is only live within the child frame. + + // Add this alloca to the list of things to escape. + AllocasToEscape.push_back(ParentAlloca); + + // Next replace all outlined allocas that are mapped to it. + for (AllocaInst *TempAlloca : Allocas) { + if (TempAlloca == getCatchObjectSentinel()) + continue; // Skip catch parameter sentinels. + Function *HandlerFn = TempAlloca->getParent()->getParent(); + llvm::Value *FP = HandlerToParentFP[HandlerFn]; + assert(FP); + + // FIXME: Sink this framerecover into the blocks where it is used. + Builder.SetInsertPoint(TempAlloca); + Builder.SetCurrentDebugLocation(TempAlloca->getDebugLoc()); + Value *RecoverArgs[] = { + Builder.CreateBitCast(&F, Int8PtrType, ""), FP, + llvm::ConstantInt::get(Int32Type, AllocasToEscape.size() - 1)}; + Instruction *RecoveredAlloca = + Builder.CreateCall(RecoverFrameFn, RecoverArgs); + + // Add a pointer bitcast if the alloca wasn't an i8. + if (RecoveredAlloca->getType() != TempAlloca->getType()) { + RecoveredAlloca->setName(Twine(TempAlloca->getName()) + ".i8"); + RecoveredAlloca = cast<Instruction>( + Builder.CreateBitCast(RecoveredAlloca, TempAlloca->getType())); + } + TempAlloca->replaceAllUsesWith(RecoveredAlloca); + TempAlloca->removeFromParent(); + RecoveredAlloca->takeName(TempAlloca); + delete TempAlloca; + } + } // End for each FrameVarInfo entry. + + // Insert 'call void (...)* @llvm.frameescape(...)' at the end of the entry + // block. + Builder.SetInsertPoint(&F.getEntryBlock().back()); + Builder.CreateCall(FrameEscapeFn, AllocasToEscape); + + if (SEHExceptionCodeSlot) { + if (SEHExceptionCodeSlot->hasNUses(0)) + SEHExceptionCodeSlot->eraseFromParent(); + else if (isAllocaPromotable(SEHExceptionCodeSlot)) + PromoteMemToReg(SEHExceptionCodeSlot, *DT); + } + + // Clean up the handler action maps we created for this function + DeleteContainerSeconds(CatchHandlerMap); + CatchHandlerMap.clear(); + DeleteContainerSeconds(CleanupHandlerMap); + CleanupHandlerMap.clear(); + HandlerToParentFP.clear(); + DT = nullptr; + SEHExceptionCodeSlot = nullptr; + EHBlocks.clear(); + NormalBlocks.clear(); + EHReturnBlocks.clear(); + + return HandlersOutlined; +} + +void WinEHPrepare::promoteLandingPadValues(LandingPadInst *LPad) { + // If the return values of the landing pad instruction are extracted and + // stored to memory, we want to promote the store locations to reg values. + SmallVector<AllocaInst *, 2> EHAllocas; + + // The landingpad instruction returns an aggregate value. Typically, its + // value will be passed to a pair of extract value instructions and the + // results of those extracts are often passed to store instructions. + // In unoptimized code the stored value will often be loaded and then stored + // again. + for (auto *U : LPad->users()) { + ExtractValueInst *Extract = dyn_cast<ExtractValueInst>(U); + if (!Extract) + continue; + + for (auto *EU : Extract->users()) { + if (auto *Store = dyn_cast<StoreInst>(EU)) { + auto *AV = cast<AllocaInst>(Store->getPointerOperand()); + EHAllocas.push_back(AV); + } + } + } + + // We can't do this without a dominator tree. + assert(DT); + + if (!EHAllocas.empty()) { + PromoteMemToReg(EHAllocas, *DT); + EHAllocas.clear(); + } + + // After promotion, some extracts may be trivially dead. Remove them. + SmallVector<Value *, 4> Users(LPad->user_begin(), LPad->user_end()); + for (auto *U : Users) + RecursivelyDeleteTriviallyDeadInstructions(U); +} + +void WinEHPrepare::getPossibleReturnTargets(Function *ParentF, + Function *HandlerF, + SetVector<BasicBlock*> &Targets) { + for (BasicBlock &BB : *HandlerF) { + // If the handler contains landing pads, check for any + // handlers that may return directly to a block in the + // parent function. + if (auto *LPI = BB.getLandingPadInst()) { + IntrinsicInst *Recover = cast<IntrinsicInst>(LPI->getNextNode()); + SmallVector<std::unique_ptr<ActionHandler>, 4> ActionList; + parseEHActions(Recover, ActionList); + for (const auto &Action : ActionList) { + if (auto *CH = dyn_cast<CatchHandler>(Action.get())) { + Function *NestedF = cast<Function>(CH->getHandlerBlockOrFunc()); + getPossibleReturnTargets(ParentF, NestedF, Targets); + } + } + } + + auto *Ret = dyn_cast<ReturnInst>(BB.getTerminator()); + if (!Ret) + continue; + + // Handler functions must always return a block address. + BlockAddress *BA = cast<BlockAddress>(Ret->getReturnValue()); + + // If this is the handler for a nested landing pad, the + // return address may have been remapped to a block in the + // parent handler. We're not interested in those. + if (BA->getFunction() != ParentF) + continue; + + Targets.insert(BA->getBasicBlock()); + } +} + +void WinEHPrepare::completeNestedLandingPad(Function *ParentFn, + LandingPadInst *OutlinedLPad, + const LandingPadInst *OriginalLPad, + FrameVarInfoMap &FrameVarInfo) { + // Get the nested block and erase the unreachable instruction that was + // temporarily inserted as its terminator. + LLVMContext &Context = ParentFn->getContext(); + BasicBlock *OutlinedBB = OutlinedLPad->getParent(); + // If the nested landing pad was outlined before the landing pad that enclosed + // it, it will already be in outlined form. In that case, we just need to see + // if the returns and the enclosing branch instruction need to be updated. + IndirectBrInst *Branch = + dyn_cast<IndirectBrInst>(OutlinedBB->getTerminator()); + if (!Branch) { + // If the landing pad wasn't in outlined form, it should be a stub with + // an unreachable terminator. + assert(isa<UnreachableInst>(OutlinedBB->getTerminator())); + OutlinedBB->getTerminator()->eraseFromParent(); + // That should leave OutlinedLPad as the last instruction in its block. + assert(&OutlinedBB->back() == OutlinedLPad); + } + + // The original landing pad will have already had its action intrinsic + // built by the outlining loop. We need to clone that into the outlined + // location. It may also be necessary to add references to the exception + // variables to the outlined handler in which this landing pad is nested + // and remap return instructions in the nested handlers that should return + // to an address in the outlined handler. + Function *OutlinedHandlerFn = OutlinedBB->getParent(); + BasicBlock::const_iterator II = OriginalLPad; + ++II; + // The instruction after the landing pad should now be a call to eh.actions. + const Instruction *Recover = II; + assert(match(Recover, m_Intrinsic<Intrinsic::eh_actions>())); + const IntrinsicInst *EHActions = cast<IntrinsicInst>(Recover); + + // Remap the return target in the nested handler. + SmallVector<BlockAddress *, 4> ActionTargets; + SmallVector<std::unique_ptr<ActionHandler>, 4> ActionList; + parseEHActions(EHActions, ActionList); + for (const auto &Action : ActionList) { + auto *Catch = dyn_cast<CatchHandler>(Action.get()); + if (!Catch) + continue; + // The dyn_cast to function here selects C++ catch handlers and skips + // SEH catch handlers. + auto *Handler = dyn_cast<Function>(Catch->getHandlerBlockOrFunc()); + if (!Handler) + continue; + // Visit all the return instructions, looking for places that return + // to a location within OutlinedHandlerFn. + for (BasicBlock &NestedHandlerBB : *Handler) { + auto *Ret = dyn_cast<ReturnInst>(NestedHandlerBB.getTerminator()); + if (!Ret) + continue; + + // Handler functions must always return a block address. + BlockAddress *BA = cast<BlockAddress>(Ret->getReturnValue()); + // The original target will have been in the main parent function, + // but if it is the address of a block that has been outlined, it + // should be a block that was outlined into OutlinedHandlerFn. + assert(BA->getFunction() == ParentFn); + + // Ignore targets that aren't part of an outlined handler function. + if (!LPadTargetBlocks.count(BA->getBasicBlock())) + continue; + + // If the return value is the address ofF a block that we + // previously outlined into the parent handler function, replace + // the return instruction and add the mapped target to the list + // of possible return addresses. + BasicBlock *MappedBB = LPadTargetBlocks[BA->getBasicBlock()]; + assert(MappedBB->getParent() == OutlinedHandlerFn); + BlockAddress *NewBA = BlockAddress::get(OutlinedHandlerFn, MappedBB); + Ret->eraseFromParent(); + ReturnInst::Create(Context, NewBA, &NestedHandlerBB); + ActionTargets.push_back(NewBA); + } + } + ActionList.clear(); + + if (Branch) { + // If the landing pad was already in outlined form, just update its targets. + for (unsigned int I = Branch->getNumDestinations(); I > 0; --I) + Branch->removeDestination(I); + // Add the previously collected action targets. + for (auto *Target : ActionTargets) + Branch->addDestination(Target->getBasicBlock()); + } else { + // If the landing pad was previously stubbed out, fill in its outlined form. + IntrinsicInst *NewEHActions = cast<IntrinsicInst>(EHActions->clone()); + OutlinedBB->getInstList().push_back(NewEHActions); + + // Insert an indirect branch into the outlined landing pad BB. + IndirectBrInst *IBr = IndirectBrInst::Create(NewEHActions, 0, OutlinedBB); + // Add the previously collected action targets. + for (auto *Target : ActionTargets) + IBr->addDestination(Target->getBasicBlock()); + } +} + +// This function examines a block to determine whether the block ends with a +// conditional branch to a catch handler based on a selector comparison. +// This function is used both by the WinEHPrepare::findSelectorComparison() and +// WinEHCleanupDirector::handleTypeIdFor(). +static bool isSelectorDispatch(BasicBlock *BB, BasicBlock *&CatchHandler, + Constant *&Selector, BasicBlock *&NextBB) { + ICmpInst::Predicate Pred; + BasicBlock *TBB, *FBB; + Value *LHS, *RHS; + + if (!match(BB->getTerminator(), + m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)), TBB, FBB))) + return false; + + if (!match(LHS, + m_Intrinsic<Intrinsic::eh_typeid_for>(m_Constant(Selector))) && + !match(RHS, m_Intrinsic<Intrinsic::eh_typeid_for>(m_Constant(Selector)))) + return false; + + if (Pred == CmpInst::ICMP_EQ) { + CatchHandler = TBB; + NextBB = FBB; + return true; + } + + if (Pred == CmpInst::ICMP_NE) { + CatchHandler = FBB; + NextBB = TBB; + return true; + } + + return false; +} + +static bool isCatchBlock(BasicBlock *BB) { + for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end(); + II != IE; ++II) { + if (match(cast<Value>(II), m_Intrinsic<Intrinsic::eh_begincatch>())) + return true; + } + return false; +} + +static BasicBlock *createStubLandingPad(Function *Handler, + Value *PersonalityFn) { + // FIXME: Finish this! + LLVMContext &Context = Handler->getContext(); + BasicBlock *StubBB = BasicBlock::Create(Context, "stub"); + Handler->getBasicBlockList().push_back(StubBB); + IRBuilder<> Builder(StubBB); + LandingPadInst *LPad = Builder.CreateLandingPad( + llvm::StructType::get(Type::getInt8PtrTy(Context), + Type::getInt32Ty(Context), nullptr), + PersonalityFn, 0); + // Insert a call to llvm.eh.actions so that we don't try to outline this lpad. + Function *ActionIntrin = + Intrinsic::getDeclaration(Handler->getParent(), Intrinsic::eh_actions); + Builder.CreateCall(ActionIntrin, {}, "recover"); + LPad->setCleanup(true); + Builder.CreateUnreachable(); + return StubBB; +} + +// Cycles through the blocks in an outlined handler function looking for an +// invoke instruction and inserts an invoke of llvm.donothing with an empty +// landing pad if none is found. The code that generates the .xdata tables for +// the handler needs at least one landing pad to identify the parent function's +// personality. +void WinEHPrepare::addStubInvokeToHandlerIfNeeded(Function *Handler, + Value *PersonalityFn) { + ReturnInst *Ret = nullptr; + UnreachableInst *Unreached = nullptr; + for (BasicBlock &BB : *Handler) { + TerminatorInst *Terminator = BB.getTerminator(); + // If we find an invoke, there is nothing to be done. + auto *II = dyn_cast<InvokeInst>(Terminator); + if (II) + return; + // If we've already recorded a return instruction, keep looking for invokes. + if (!Ret) + Ret = dyn_cast<ReturnInst>(Terminator); + // If we haven't recorded an unreachable instruction, try this terminator. + if (!Unreached) + Unreached = dyn_cast<UnreachableInst>(Terminator); + } + + // If we got this far, the handler contains no invokes. We should have seen + // at least one return or unreachable instruction. We'll insert an invoke of + // llvm.donothing ahead of that instruction. + assert(Ret || Unreached); + TerminatorInst *Term; + if (Ret) + Term = Ret; + else + Term = Unreached; + BasicBlock *OldRetBB = Term->getParent(); + BasicBlock *NewRetBB = SplitBlock(OldRetBB, Term, DT); + // SplitBlock adds an unconditional branch instruction at the end of the + // parent block. We want to replace that with an invoke call, so we can + // erase it now. + OldRetBB->getTerminator()->eraseFromParent(); + BasicBlock *StubLandingPad = createStubLandingPad(Handler, PersonalityFn); + Function *F = + Intrinsic::getDeclaration(Handler->getParent(), Intrinsic::donothing); + InvokeInst::Create(F, NewRetBB, StubLandingPad, None, "", OldRetBB); +} + +// FIXME: Consider sinking this into lib/Target/X86 somehow. TargetLowering +// usually doesn't build LLVM IR, so that's probably the wrong place. +Function *WinEHPrepare::createHandlerFunc(Type *RetTy, const Twine &Name, + Module *M, Value *&ParentFP) { + // x64 uses a two-argument prototype where the parent FP is the second + // argument. x86 uses no arguments, just the incoming EBP value. + LLVMContext &Context = M->getContext(); + FunctionType *FnType; + if (TheTriple.getArch() == Triple::x86_64) { + Type *Int8PtrType = Type::getInt8PtrTy(Context); + Type *ArgTys[2] = {Int8PtrType, Int8PtrType}; + FnType = FunctionType::get(RetTy, ArgTys, false); + } else { + FnType = FunctionType::get(RetTy, None, false); + } + + Function *Handler = + Function::Create(FnType, GlobalVariable::InternalLinkage, Name, M); + BasicBlock *Entry = BasicBlock::Create(Context, "entry"); + Handler->getBasicBlockList().push_front(Entry); + if (TheTriple.getArch() == Triple::x86_64) { + ParentFP = &(Handler->getArgumentList().back()); + } else { + assert(M); + Function *FrameAddressFn = + Intrinsic::getDeclaration(M, Intrinsic::frameaddress); + Value *Args[1] = {ConstantInt::get(Type::getInt32Ty(Context), 1)}; + ParentFP = CallInst::Create(FrameAddressFn, Args, "parent_fp", + &Handler->getEntryBlock()); + } + return Handler; +} + +bool WinEHPrepare::outlineHandler(ActionHandler *Action, Function *SrcFn, + LandingPadInst *LPad, BasicBlock *StartBB, + FrameVarInfoMap &VarInfo) { + Module *M = SrcFn->getParent(); + LLVMContext &Context = M->getContext(); + Type *Int8PtrType = Type::getInt8PtrTy(Context); + + // Create a new function to receive the handler contents. + Value *ParentFP; + Function *Handler; + if (Action->getType() == Catch) { + Handler = createHandlerFunc(Int8PtrType, SrcFn->getName() + ".catch", M, + ParentFP); + } else { + Handler = createHandlerFunc(Type::getVoidTy(Context), + SrcFn->getName() + ".cleanup", M, ParentFP); + } + HandlerToParentFP[Handler] = ParentFP; + Handler->addFnAttr("wineh-parent", SrcFn->getName()); + BasicBlock *Entry = &Handler->getEntryBlock(); + + // Generate a standard prolog to setup the frame recovery structure. + IRBuilder<> Builder(Context); + Builder.SetInsertPoint(Entry); + Builder.SetCurrentDebugLocation(LPad->getDebugLoc()); + + std::unique_ptr<WinEHCloningDirectorBase> Director; + + ValueToValueMapTy VMap; + + LandingPadMap &LPadMap = LPadMaps[LPad]; + if (!LPadMap.isInitialized()) + LPadMap.mapLandingPad(LPad); + if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) { + Constant *Sel = CatchAction->getSelector(); + Director.reset(new WinEHCatchDirector(Handler, ParentFP, Sel, VarInfo, + LPadMap, NestedLPtoOriginalLP, DT, + EHBlocks)); + LPadMap.remapEHValues(VMap, UndefValue::get(Int8PtrType), + ConstantInt::get(Type::getInt32Ty(Context), 1)); + } else { + Director.reset( + new WinEHCleanupDirector(Handler, ParentFP, VarInfo, LPadMap)); + LPadMap.remapEHValues(VMap, UndefValue::get(Int8PtrType), + UndefValue::get(Type::getInt32Ty(Context))); + } + + SmallVector<ReturnInst *, 8> Returns; + ClonedCodeInfo OutlinedFunctionInfo; + + // If the start block contains PHI nodes, we need to map them. + BasicBlock::iterator II = StartBB->begin(); + while (auto *PN = dyn_cast<PHINode>(II)) { + bool Mapped = false; + // Look for PHI values that we have already mapped (such as the selector). + for (Value *Val : PN->incoming_values()) { + if (VMap.count(Val)) { + VMap[PN] = VMap[Val]; + Mapped = true; + } + } + // If we didn't find a match for this value, map it as an undef. + if (!Mapped) { + VMap[PN] = UndefValue::get(PN->getType()); + } + ++II; + } + + // The landing pad value may be used by PHI nodes. It will ultimately be + // eliminated, but we need it in the map for intermediate handling. + VMap[LPad] = UndefValue::get(LPad->getType()); + + // Skip over PHIs and, if applicable, landingpad instructions. + II = StartBB->getFirstInsertionPt(); + + CloneAndPruneIntoFromInst(Handler, SrcFn, II, VMap, + /*ModuleLevelChanges=*/false, Returns, "", + &OutlinedFunctionInfo, Director.get()); + + // Move all the instructions in the cloned "entry" block into our entry block. + // Depending on how the parent function was laid out, the block that will + // correspond to the outlined entry block may not be the first block in the + // list. We can recognize it, however, as the cloned block which has no + // predecessors. Any other block wouldn't have been cloned if it didn't + // have a predecessor which was also cloned. + Function::iterator ClonedIt = std::next(Function::iterator(Entry)); + while (!pred_empty(ClonedIt)) + ++ClonedIt; + BasicBlock *ClonedEntryBB = ClonedIt; + assert(ClonedEntryBB); + Entry->getInstList().splice(Entry->end(), ClonedEntryBB->getInstList()); + ClonedEntryBB->eraseFromParent(); + + // Make sure we can identify the handler's personality later. + addStubInvokeToHandlerIfNeeded(Handler, LPad->getPersonalityFn()); + + if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) { + WinEHCatchDirector *CatchDirector = + reinterpret_cast<WinEHCatchDirector *>(Director.get()); + CatchAction->setExceptionVar(CatchDirector->getExceptionVar()); + CatchAction->setReturnTargets(CatchDirector->getReturnTargets()); + + // Look for blocks that are not part of the landing pad that we just + // outlined but terminate with a call to llvm.eh.endcatch and a + // branch to a block that is in the handler we just outlined. + // These blocks will be part of a nested landing pad that intends to + // return to an address in this handler. This case is best handled + // after both landing pads have been outlined, so for now we'll just + // save the association of the blocks in LPadTargetBlocks. The + // return instructions which are created from these branches will be + // replaced after all landing pads have been outlined. + for (const auto MapEntry : VMap) { + // VMap maps all values and blocks that were just cloned, but dead + // blocks which were pruned will map to nullptr. + if (!isa<BasicBlock>(MapEntry.first) || MapEntry.second == nullptr) + continue; + const BasicBlock *MappedBB = cast<BasicBlock>(MapEntry.first); + for (auto *Pred : predecessors(const_cast<BasicBlock *>(MappedBB))) { + auto *Branch = dyn_cast<BranchInst>(Pred->getTerminator()); + if (!Branch || !Branch->isUnconditional() || Pred->size() <= 1) + continue; + BasicBlock::iterator II = const_cast<BranchInst *>(Branch); + --II; + if (match(cast<Value>(II), m_Intrinsic<Intrinsic::eh_endcatch>())) { + // This would indicate that a nested landing pad wants to return + // to a block that is outlined into two different handlers. + assert(!LPadTargetBlocks.count(MappedBB)); + LPadTargetBlocks[MappedBB] = cast<BasicBlock>(MapEntry.second); + } + } + } + } // End if (CatchAction) + + Action->setHandlerBlockOrFunc(Handler); + + return true; +} + +/// This BB must end in a selector dispatch. All we need to do is pass the +/// handler block to llvm.eh.actions and list it as a possible indirectbr +/// target. +void WinEHPrepare::processSEHCatchHandler(CatchHandler *CatchAction, + BasicBlock *StartBB) { + BasicBlock *HandlerBB; + BasicBlock *NextBB; + Constant *Selector; + bool Res = isSelectorDispatch(StartBB, HandlerBB, Selector, NextBB); + if (Res) { + // If this was EH dispatch, this must be a conditional branch to the handler + // block. + // FIXME: Handle instructions in the dispatch block. Currently we drop them, + // leading to crashes if some optimization hoists stuff here. + assert(CatchAction->getSelector() && HandlerBB && + "expected catch EH dispatch"); + } else { + // This must be a catch-all. Split the block after the landingpad. + assert(CatchAction->getSelector()->isNullValue() && "expected catch-all"); + HandlerBB = SplitBlock(StartBB, StartBB->getFirstInsertionPt(), DT); + } + IRBuilder<> Builder(HandlerBB->getFirstInsertionPt()); + Function *EHCodeFn = Intrinsic::getDeclaration( + StartBB->getParent()->getParent(), Intrinsic::eh_exceptioncode); + Value *Code = Builder.CreateCall(EHCodeFn, {}, "sehcode"); + Code = Builder.CreateIntToPtr(Code, SEHExceptionCodeSlot->getAllocatedType()); + Builder.CreateStore(Code, SEHExceptionCodeSlot); + CatchAction->setHandlerBlockOrFunc(BlockAddress::get(HandlerBB)); + TinyPtrVector<BasicBlock *> Targets(HandlerBB); + CatchAction->setReturnTargets(Targets); +} + +void LandingPadMap::mapLandingPad(const LandingPadInst *LPad) { + // Each instance of this class should only ever be used to map a single + // landing pad. + assert(OriginLPad == nullptr || OriginLPad == LPad); + + // If the landing pad has already been mapped, there's nothing more to do. + if (OriginLPad == LPad) + return; + + OriginLPad = LPad; + + // The landingpad instruction returns an aggregate value. Typically, its + // value will be passed to a pair of extract value instructions and the + // results of those extracts will have been promoted to reg values before + // this routine is called. + for (auto *U : LPad->users()) { + const ExtractValueInst *Extract = dyn_cast<ExtractValueInst>(U); + if (!Extract) + continue; + assert(Extract->getNumIndices() == 1 && + "Unexpected operation: extracting both landing pad values"); + unsigned int Idx = *(Extract->idx_begin()); + assert((Idx == 0 || Idx == 1) && + "Unexpected operation: extracting an unknown landing pad element"); + if (Idx == 0) { + ExtractedEHPtrs.push_back(Extract); + } else if (Idx == 1) { + ExtractedSelectors.push_back(Extract); + } + } +} + +bool LandingPadMap::isOriginLandingPadBlock(const BasicBlock *BB) const { + return BB->getLandingPadInst() == OriginLPad; +} + +bool LandingPadMap::isLandingPadSpecificInst(const Instruction *Inst) const { + if (Inst == OriginLPad) + return true; + for (auto *Extract : ExtractedEHPtrs) { + if (Inst == Extract) + return true; + } + for (auto *Extract : ExtractedSelectors) { + if (Inst == Extract) + return true; + } + return false; +} + +void LandingPadMap::remapEHValues(ValueToValueMapTy &VMap, Value *EHPtrValue, + Value *SelectorValue) const { + // Remap all landing pad extract instructions to the specified values. + for (auto *Extract : ExtractedEHPtrs) + VMap[Extract] = EHPtrValue; + for (auto *Extract : ExtractedSelectors) + VMap[Extract] = SelectorValue; +} + +static bool isFrameAddressCall(const Value *V) { + return match(const_cast<Value *>(V), + m_Intrinsic<Intrinsic::frameaddress>(m_SpecificInt(0))); +} + +CloningDirector::CloningAction WinEHCloningDirectorBase::handleInstruction( + ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) { + // If this is one of the boilerplate landing pad instructions, skip it. + // The instruction will have already been remapped in VMap. + if (LPadMap.isLandingPadSpecificInst(Inst)) + return CloningDirector::SkipInstruction; + + // Nested landing pads that have not already been outlined will be cloned as + // stubs, with just the landingpad instruction and an unreachable instruction. + // When all landingpads have been outlined, we'll replace this with the + // llvm.eh.actions call and indirect branch created when the landing pad was + // outlined. + if (auto *LPad = dyn_cast<LandingPadInst>(Inst)) { + return handleLandingPad(VMap, LPad, NewBB); + } + + // Nested landing pads that have already been outlined will be cloned in their + // outlined form, but we need to intercept the ibr instruction to filter out + // targets that do not return to the handler we are outlining. + if (auto *IBr = dyn_cast<IndirectBrInst>(Inst)) { + return handleIndirectBr(VMap, IBr, NewBB); + } + + if (auto *Invoke = dyn_cast<InvokeInst>(Inst)) + return handleInvoke(VMap, Invoke, NewBB); + + if (auto *Resume = dyn_cast<ResumeInst>(Inst)) + return handleResume(VMap, Resume, NewBB); + + if (auto *Cmp = dyn_cast<CmpInst>(Inst)) + return handleCompare(VMap, Cmp, NewBB); + + if (match(Inst, m_Intrinsic<Intrinsic::eh_begincatch>())) + return handleBeginCatch(VMap, Inst, NewBB); + if (match(Inst, m_Intrinsic<Intrinsic::eh_endcatch>())) + return handleEndCatch(VMap, Inst, NewBB); + if (match(Inst, m_Intrinsic<Intrinsic::eh_typeid_for>())) + return handleTypeIdFor(VMap, Inst, NewBB); + + // When outlining llvm.frameaddress(i32 0), remap that to the second argument, + // which is the FP of the parent. + if (isFrameAddressCall(Inst)) { + VMap[Inst] = ParentFP; + return CloningDirector::SkipInstruction; + } + + // Continue with the default cloning behavior. + return CloningDirector::CloneInstruction; +} + +CloningDirector::CloningAction WinEHCatchDirector::handleLandingPad( + ValueToValueMapTy &VMap, const LandingPadInst *LPad, BasicBlock *NewBB) { + // If the instruction after the landing pad is a call to llvm.eh.actions + // the landing pad has already been outlined. In this case, we should + // clone it because it may return to a block in the handler we are + // outlining now that would otherwise be unreachable. The landing pads + // are sorted before outlining begins to enable this case to work + // properly. + const Instruction *NextI = LPad->getNextNode(); + if (match(NextI, m_Intrinsic<Intrinsic::eh_actions>())) + return CloningDirector::CloneInstruction; + + // If the landing pad hasn't been outlined yet, the landing pad we are + // outlining now does not dominate it and so it cannot return to a block + // in this handler. In that case, we can just insert a stub landing + // pad now and patch it up later. + Instruction *NewInst = LPad->clone(); + if (LPad->hasName()) + NewInst->setName(LPad->getName()); + // Save this correlation for later processing. + NestedLPtoOriginalLP[cast<LandingPadInst>(NewInst)] = LPad; + VMap[LPad] = NewInst; + BasicBlock::InstListType &InstList = NewBB->getInstList(); + InstList.push_back(NewInst); + InstList.push_back(new UnreachableInst(NewBB->getContext())); + return CloningDirector::StopCloningBB; +} + +CloningDirector::CloningAction WinEHCatchDirector::handleBeginCatch( + ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) { + // The argument to the call is some form of the first element of the + // landingpad aggregate value, but that doesn't matter. It isn't used + // here. + // The second argument is an outparameter where the exception object will be + // stored. Typically the exception object is a scalar, but it can be an + // aggregate when catching by value. + // FIXME: Leave something behind to indicate where the exception object lives + // for this handler. Should it be part of llvm.eh.actions? + assert(ExceptionObjectVar == nullptr && "Multiple calls to " + "llvm.eh.begincatch found while " + "outlining catch handler."); + ExceptionObjectVar = Inst->getOperand(1)->stripPointerCasts(); + if (isa<ConstantPointerNull>(ExceptionObjectVar)) + return CloningDirector::SkipInstruction; + assert(cast<AllocaInst>(ExceptionObjectVar)->isStaticAlloca() && + "catch parameter is not static alloca"); + Materializer.escapeCatchObject(ExceptionObjectVar); + return CloningDirector::SkipInstruction; +} + +CloningDirector::CloningAction +WinEHCatchDirector::handleEndCatch(ValueToValueMapTy &VMap, + const Instruction *Inst, BasicBlock *NewBB) { + auto *IntrinCall = dyn_cast<IntrinsicInst>(Inst); + // It might be interesting to track whether or not we are inside a catch + // function, but that might make the algorithm more brittle than it needs + // to be. + + // The end catch call can occur in one of two places: either in a + // landingpad block that is part of the catch handlers exception mechanism, + // or at the end of the catch block. However, a catch-all handler may call + // end catch from the original landing pad. If the call occurs in a nested + // landing pad block, we must skip it and continue so that the landing pad + // gets cloned. + auto *ParentBB = IntrinCall->getParent(); + if (ParentBB->isLandingPad() && !LPadMap.isOriginLandingPadBlock(ParentBB)) + return CloningDirector::SkipInstruction; + + // If an end catch occurs anywhere else we want to terminate the handler + // with a return to the code that follows the endcatch call. If the + // next instruction is not an unconditional branch, we need to split the + // block to provide a clear target for the return instruction. + BasicBlock *ContinueBB; + auto Next = std::next(BasicBlock::const_iterator(IntrinCall)); + const BranchInst *Branch = dyn_cast<BranchInst>(Next); + if (!Branch || !Branch->isUnconditional()) { + // We're interrupting the cloning process at this location, so the + // const_cast we're doing here will not cause a problem. + ContinueBB = SplitBlock(const_cast<BasicBlock *>(ParentBB), + const_cast<Instruction *>(cast<Instruction>(Next))); + } else { + ContinueBB = Branch->getSuccessor(0); + } + + ReturnInst::Create(NewBB->getContext(), BlockAddress::get(ContinueBB), NewBB); + ReturnTargets.push_back(ContinueBB); + + // We just added a terminator to the cloned block. + // Tell the caller to stop processing the current basic block so that + // the branch instruction will be skipped. + return CloningDirector::StopCloningBB; +} + +CloningDirector::CloningAction WinEHCatchDirector::handleTypeIdFor( + ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) { + auto *IntrinCall = dyn_cast<IntrinsicInst>(Inst); + Value *Selector = IntrinCall->getArgOperand(0)->stripPointerCasts(); + // This causes a replacement that will collapse the landing pad CFG based + // on the filter function we intend to match. + if (Selector == CurrentSelector) + VMap[Inst] = ConstantInt::get(SelectorIDType, 1); + else + VMap[Inst] = ConstantInt::get(SelectorIDType, 0); + // Tell the caller not to clone this instruction. + return CloningDirector::SkipInstruction; +} + +CloningDirector::CloningAction WinEHCatchDirector::handleIndirectBr( + ValueToValueMapTy &VMap, + const IndirectBrInst *IBr, + BasicBlock *NewBB) { + // If this indirect branch is not part of a landing pad block, just clone it. + const BasicBlock *ParentBB = IBr->getParent(); + if (!ParentBB->isLandingPad()) + return CloningDirector::CloneInstruction; + + // If it is part of a landing pad, we want to filter out target blocks + // that are not part of the handler we are outlining. + const LandingPadInst *LPad = ParentBB->getLandingPadInst(); + + // Save this correlation for later processing. + NestedLPtoOriginalLP[cast<LandingPadInst>(VMap[LPad])] = LPad; + + // We should only get here for landing pads that have already been outlined. + assert(match(LPad->getNextNode(), m_Intrinsic<Intrinsic::eh_actions>())); + + // Copy the indirectbr, but only include targets that were previously + // identified as EH blocks and are dominated by the nested landing pad. + SetVector<const BasicBlock *> ReturnTargets; + for (int I = 0, E = IBr->getNumDestinations(); I < E; ++I) { + auto *TargetBB = IBr->getDestination(I); + if (EHBlocks.count(const_cast<BasicBlock*>(TargetBB)) && + DT->dominates(ParentBB, TargetBB)) { + DEBUG(dbgs() << " Adding destination " << TargetBB->getName() << "\n"); + ReturnTargets.insert(TargetBB); + } + } + IndirectBrInst *NewBranch = + IndirectBrInst::Create(const_cast<Value *>(IBr->getAddress()), + ReturnTargets.size(), NewBB); + for (auto *Target : ReturnTargets) + NewBranch->addDestination(const_cast<BasicBlock*>(Target)); + + // The operands and targets of the branch instruction are remapped later + // because it is a terminator. Tell the cloning code to clone the + // blocks we just added to the target list. + return CloningDirector::CloneSuccessors; +} + +CloningDirector::CloningAction +WinEHCatchDirector::handleInvoke(ValueToValueMapTy &VMap, + const InvokeInst *Invoke, BasicBlock *NewBB) { + return CloningDirector::CloneInstruction; +} + +CloningDirector::CloningAction +WinEHCatchDirector::handleResume(ValueToValueMapTy &VMap, + const ResumeInst *Resume, BasicBlock *NewBB) { + // Resume instructions shouldn't be reachable from catch handlers. + // We still need to handle it, but it will be pruned. + BasicBlock::InstListType &InstList = NewBB->getInstList(); + InstList.push_back(new UnreachableInst(NewBB->getContext())); + return CloningDirector::StopCloningBB; +} + +CloningDirector::CloningAction +WinEHCatchDirector::handleCompare(ValueToValueMapTy &VMap, + const CmpInst *Compare, BasicBlock *NewBB) { + const IntrinsicInst *IntrinCall = nullptr; + if (match(Compare->getOperand(0), m_Intrinsic<Intrinsic::eh_typeid_for>())) { + IntrinCall = dyn_cast<IntrinsicInst>(Compare->getOperand(0)); + } else if (match(Compare->getOperand(1), + m_Intrinsic<Intrinsic::eh_typeid_for>())) { + IntrinCall = dyn_cast<IntrinsicInst>(Compare->getOperand(1)); + } + if (IntrinCall) { + Value *Selector = IntrinCall->getArgOperand(0)->stripPointerCasts(); + // This causes a replacement that will collapse the landing pad CFG based + // on the filter function we intend to match. + if (Selector == CurrentSelector->stripPointerCasts()) { + VMap[Compare] = ConstantInt::get(SelectorIDType, 1); + } else { + VMap[Compare] = ConstantInt::get(SelectorIDType, 0); + } + return CloningDirector::SkipInstruction; + } + return CloningDirector::CloneInstruction; +} + +CloningDirector::CloningAction WinEHCleanupDirector::handleLandingPad( + ValueToValueMapTy &VMap, const LandingPadInst *LPad, BasicBlock *NewBB) { + // The MS runtime will terminate the process if an exception occurs in a + // cleanup handler, so we shouldn't encounter landing pads in the actual + // cleanup code, but they may appear in catch blocks. Depending on where + // we started cloning we may see one, but it will get dropped during dead + // block pruning. + Instruction *NewInst = new UnreachableInst(NewBB->getContext()); + VMap[LPad] = NewInst; + BasicBlock::InstListType &InstList = NewBB->getInstList(); + InstList.push_back(NewInst); + return CloningDirector::StopCloningBB; +} + +CloningDirector::CloningAction WinEHCleanupDirector::handleBeginCatch( + ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) { + // Cleanup code may flow into catch blocks or the catch block may be part + // of a branch that will be optimized away. We'll insert a return + // instruction now, but it may be pruned before the cloning process is + // complete. + ReturnInst::Create(NewBB->getContext(), nullptr, NewBB); + return CloningDirector::StopCloningBB; +} + +CloningDirector::CloningAction WinEHCleanupDirector::handleEndCatch( + ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) { + // Cleanup handlers nested within catch handlers may begin with a call to + // eh.endcatch. We can just ignore that instruction. + return CloningDirector::SkipInstruction; +} + +CloningDirector::CloningAction WinEHCleanupDirector::handleTypeIdFor( + ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) { + // If we encounter a selector comparison while cloning a cleanup handler, + // we want to stop cloning immediately. Anything after the dispatch + // will be outlined into a different handler. + BasicBlock *CatchHandler; + Constant *Selector; + BasicBlock *NextBB; + if (isSelectorDispatch(const_cast<BasicBlock *>(Inst->getParent()), + CatchHandler, Selector, NextBB)) { + ReturnInst::Create(NewBB->getContext(), nullptr, NewBB); + return CloningDirector::StopCloningBB; + } + // If eg.typeid.for is called for any other reason, it can be ignored. + VMap[Inst] = ConstantInt::get(SelectorIDType, 0); + return CloningDirector::SkipInstruction; +} + +CloningDirector::CloningAction WinEHCleanupDirector::handleIndirectBr( + ValueToValueMapTy &VMap, + const IndirectBrInst *IBr, + BasicBlock *NewBB) { + // No special handling is required for cleanup cloning. + return CloningDirector::CloneInstruction; +} + +CloningDirector::CloningAction WinEHCleanupDirector::handleInvoke( + ValueToValueMapTy &VMap, const InvokeInst *Invoke, BasicBlock *NewBB) { + // All invokes in cleanup handlers can be replaced with calls. + SmallVector<Value *, 16> CallArgs(Invoke->op_begin(), Invoke->op_end() - 3); + // Insert a normal call instruction... + CallInst *NewCall = + CallInst::Create(const_cast<Value *>(Invoke->getCalledValue()), CallArgs, + Invoke->getName(), NewBB); + NewCall->setCallingConv(Invoke->getCallingConv()); + NewCall->setAttributes(Invoke->getAttributes()); + NewCall->setDebugLoc(Invoke->getDebugLoc()); + VMap[Invoke] = NewCall; + + // Remap the operands. + llvm::RemapInstruction(NewCall, VMap, RF_None, nullptr, &Materializer); + + // Insert an unconditional branch to the normal destination. + BranchInst::Create(Invoke->getNormalDest(), NewBB); + + // The unwind destination won't be cloned into the new function, so + // we don't need to clean up its phi nodes. + + // We just added a terminator to the cloned block. + // Tell the caller to stop processing the current basic block. + return CloningDirector::CloneSuccessors; +} + +CloningDirector::CloningAction WinEHCleanupDirector::handleResume( + ValueToValueMapTy &VMap, const ResumeInst *Resume, BasicBlock *NewBB) { + ReturnInst::Create(NewBB->getContext(), nullptr, NewBB); + + // We just added a terminator to the cloned block. + // Tell the caller to stop processing the current basic block so that + // the branch instruction will be skipped. + return CloningDirector::StopCloningBB; +} + +CloningDirector::CloningAction +WinEHCleanupDirector::handleCompare(ValueToValueMapTy &VMap, + const CmpInst *Compare, BasicBlock *NewBB) { + if (match(Compare->getOperand(0), m_Intrinsic<Intrinsic::eh_typeid_for>()) || + match(Compare->getOperand(1), m_Intrinsic<Intrinsic::eh_typeid_for>())) { + VMap[Compare] = ConstantInt::get(SelectorIDType, 1); + return CloningDirector::SkipInstruction; + } + return CloningDirector::CloneInstruction; +} + +WinEHFrameVariableMaterializer::WinEHFrameVariableMaterializer( + Function *OutlinedFn, Value *ParentFP, FrameVarInfoMap &FrameVarInfo) + : FrameVarInfo(FrameVarInfo), Builder(OutlinedFn->getContext()) { + BasicBlock *EntryBB = &OutlinedFn->getEntryBlock(); + + // New allocas should be inserted in the entry block, but after the parent FP + // is established if it is an instruction. + Instruction *InsertPoint = EntryBB->getFirstInsertionPt(); + if (auto *FPInst = dyn_cast<Instruction>(ParentFP)) + InsertPoint = FPInst->getNextNode(); + Builder.SetInsertPoint(EntryBB, InsertPoint); +} + +Value *WinEHFrameVariableMaterializer::materializeValueFor(Value *V) { + // If we're asked to materialize a static alloca, we temporarily create an + // alloca in the outlined function and add this to the FrameVarInfo map. When + // all the outlining is complete, we'll replace these temporary allocas with + // calls to llvm.framerecover. + if (auto *AV = dyn_cast<AllocaInst>(V)) { + assert(AV->isStaticAlloca() && + "cannot materialize un-demoted dynamic alloca"); + AllocaInst *NewAlloca = dyn_cast<AllocaInst>(AV->clone()); + Builder.Insert(NewAlloca, AV->getName()); + FrameVarInfo[AV].push_back(NewAlloca); + return NewAlloca; + } + + if (isa<Instruction>(V) || isa<Argument>(V)) { + Function *Parent = isa<Instruction>(V) + ? cast<Instruction>(V)->getParent()->getParent() + : cast<Argument>(V)->getParent(); + errs() + << "Failed to demote instruction used in exception handler of function " + << GlobalValue::getRealLinkageName(Parent->getName()) << ":\n"; + errs() << " " << *V << '\n'; + report_fatal_error("WinEHPrepare failed to demote instruction"); + } + + // Don't materialize other values. + return nullptr; +} + +void WinEHFrameVariableMaterializer::escapeCatchObject(Value *V) { + // Catch parameter objects have to live in the parent frame. When we see a use + // of a catch parameter, add a sentinel to the multimap to indicate that it's + // used from another handler. This will prevent us from trying to sink the + // alloca into the handler and ensure that the catch parameter is present in + // the call to llvm.frameescape. + FrameVarInfo[V].push_back(getCatchObjectSentinel()); +} + +// This function maps the catch and cleanup handlers that are reachable from the +// specified landing pad. The landing pad sequence will have this basic shape: +// +// <cleanup handler> +// <selector comparison> +// <catch handler> +// <cleanup handler> +// <selector comparison> +// <catch handler> +// <cleanup handler> +// ... +// +// Any of the cleanup slots may be absent. The cleanup slots may be occupied by +// any arbitrary control flow, but all paths through the cleanup code must +// eventually reach the next selector comparison and no path can skip to a +// different selector comparisons, though some paths may terminate abnormally. +// Therefore, we will use a depth first search from the start of any given +// cleanup block and stop searching when we find the next selector comparison. +// +// If the landingpad instruction does not have a catch clause, we will assume +// that any instructions other than selector comparisons and catch handlers can +// be ignored. In practice, these will only be the boilerplate instructions. +// +// The catch handlers may also have any control structure, but we are only +// interested in the start of the catch handlers, so we don't need to actually +// follow the flow of the catch handlers. The start of the catch handlers can +// be located from the compare instructions, but they can be skipped in the +// flow by following the contrary branch. +void WinEHPrepare::mapLandingPadBlocks(LandingPadInst *LPad, + LandingPadActions &Actions) { + unsigned int NumClauses = LPad->getNumClauses(); + unsigned int HandlersFound = 0; + BasicBlock *BB = LPad->getParent(); + + DEBUG(dbgs() << "Mapping landing pad: " << BB->getName() << "\n"); + + if (NumClauses == 0) { + findCleanupHandlers(Actions, BB, nullptr); + return; + } + + VisitedBlockSet VisitedBlocks; + + while (HandlersFound != NumClauses) { + BasicBlock *NextBB = nullptr; + + // Skip over filter clauses. + if (LPad->isFilter(HandlersFound)) { + ++HandlersFound; + continue; + } + + // See if the clause we're looking for is a catch-all. + // If so, the catch begins immediately. + Constant *ExpectedSelector = + LPad->getClause(HandlersFound)->stripPointerCasts(); + if (isa<ConstantPointerNull>(ExpectedSelector)) { + // The catch all must occur last. + assert(HandlersFound == NumClauses - 1); + + // There can be additional selector dispatches in the call chain that we + // need to ignore. + BasicBlock *CatchBlock = nullptr; + Constant *Selector; + while (BB && isSelectorDispatch(BB, CatchBlock, Selector, NextBB)) { + DEBUG(dbgs() << " Found extra catch dispatch in block " + << CatchBlock->getName() << "\n"); + BB = NextBB; + } + + // Add the catch handler to the action list. + CatchHandler *Action = nullptr; + if (CatchHandlerMap.count(BB) && CatchHandlerMap[BB] != nullptr) { + // If the CatchHandlerMap already has an entry for this BB, re-use it. + Action = CatchHandlerMap[BB]; + assert(Action->getSelector() == ExpectedSelector); + } else { + // We don't expect a selector dispatch, but there may be a call to + // llvm.eh.begincatch, which separates catch handling code from + // cleanup code in the same control flow. This call looks for the + // begincatch intrinsic. + Action = findCatchHandler(BB, NextBB, VisitedBlocks); + if (Action) { + // For C++ EH, check if there is any interesting cleanup code before + // we begin the catch. This is important because cleanups cannot + // rethrow exceptions but code called from catches can. For SEH, it + // isn't important if some finally code before a catch-all is executed + // out of line or after recovering from the exception. + if (Personality == EHPersonality::MSVC_CXX) + findCleanupHandlers(Actions, BB, BB); + } else { + // If an action was not found, it means that the control flows + // directly into the catch-all handler and there is no cleanup code. + // That's an expected situation and we must create a catch action. + // Since this is a catch-all handler, the selector won't actually + // appear in the code anywhere. ExpectedSelector here is the constant + // null ptr that we got from the landing pad instruction. + Action = new CatchHandler(BB, ExpectedSelector, nullptr); + CatchHandlerMap[BB] = Action; + } + } + Actions.insertCatchHandler(Action); + DEBUG(dbgs() << " Catch all handler at block " << BB->getName() << "\n"); + ++HandlersFound; + + // Once we reach a catch-all, don't expect to hit a resume instruction. + BB = nullptr; + break; + } + + CatchHandler *CatchAction = findCatchHandler(BB, NextBB, VisitedBlocks); + assert(CatchAction); + + // See if there is any interesting code executed before the dispatch. + findCleanupHandlers(Actions, BB, CatchAction->getStartBlock()); + + // When the source program contains multiple nested try blocks the catch + // handlers can get strung together in such a way that we can encounter + // a dispatch for a selector that we've already had a handler for. + if (CatchAction->getSelector()->stripPointerCasts() == ExpectedSelector) { + ++HandlersFound; + + // Add the catch handler to the action list. + DEBUG(dbgs() << " Found catch dispatch in block " + << CatchAction->getStartBlock()->getName() << "\n"); + Actions.insertCatchHandler(CatchAction); + } else { + // Under some circumstances optimized IR will flow unconditionally into a + // handler block without checking the selector. This can only happen if + // the landing pad has a catch-all handler and the handler for the + // preceeding catch clause is identical to the catch-call handler + // (typically an empty catch). In this case, the handler must be shared + // by all remaining clauses. + if (isa<ConstantPointerNull>( + CatchAction->getSelector()->stripPointerCasts())) { + DEBUG(dbgs() << " Applying early catch-all handler in block " + << CatchAction->getStartBlock()->getName() + << " to all remaining clauses.\n"); + Actions.insertCatchHandler(CatchAction); + return; + } + + DEBUG(dbgs() << " Found extra catch dispatch in block " + << CatchAction->getStartBlock()->getName() << "\n"); + } + + // Move on to the block after the catch handler. + BB = NextBB; + } + + // If we didn't wind up in a catch-all, see if there is any interesting code + // executed before the resume. + findCleanupHandlers(Actions, BB, BB); + + // It's possible that some optimization moved code into a landingpad that + // wasn't + // previously being used for cleanup. If that happens, we need to execute + // that + // extra code from a cleanup handler. + if (Actions.includesCleanup() && !LPad->isCleanup()) + LPad->setCleanup(true); +} + +// This function searches starting with the input block for the next +// block that terminates with a branch whose condition is based on a selector +// comparison. This may be the input block. See the mapLandingPadBlocks +// comments for a discussion of control flow assumptions. +// +CatchHandler *WinEHPrepare::findCatchHandler(BasicBlock *BB, + BasicBlock *&NextBB, + VisitedBlockSet &VisitedBlocks) { + // See if we've already found a catch handler use it. + // Call count() first to avoid creating a null entry for blocks + // we haven't seen before. + if (CatchHandlerMap.count(BB) && CatchHandlerMap[BB] != nullptr) { + CatchHandler *Action = cast<CatchHandler>(CatchHandlerMap[BB]); + NextBB = Action->getNextBB(); + return Action; + } + + // VisitedBlocks applies only to the current search. We still + // need to consider blocks that we've visited while mapping other + // landing pads. + VisitedBlocks.insert(BB); + + BasicBlock *CatchBlock = nullptr; + Constant *Selector = nullptr; + + // If this is the first time we've visited this block from any landing pad + // look to see if it is a selector dispatch block. + if (!CatchHandlerMap.count(BB)) { + if (isSelectorDispatch(BB, CatchBlock, Selector, NextBB)) { + CatchHandler *Action = new CatchHandler(BB, Selector, NextBB); + CatchHandlerMap[BB] = Action; + return Action; + } + // If we encounter a block containing an llvm.eh.begincatch before we + // find a selector dispatch block, the handler is assumed to be + // reached unconditionally. This happens for catch-all blocks, but + // it can also happen for other catch handlers that have been combined + // with the catch-all handler during optimization. + if (isCatchBlock(BB)) { + PointerType *Int8PtrTy = Type::getInt8PtrTy(BB->getContext()); + Constant *NullSelector = ConstantPointerNull::get(Int8PtrTy); + CatchHandler *Action = new CatchHandler(BB, NullSelector, nullptr); + CatchHandlerMap[BB] = Action; + return Action; + } + } + + // Visit each successor, looking for the dispatch. + // FIXME: We expect to find the dispatch quickly, so this will probably + // work better as a breadth first search. + for (BasicBlock *Succ : successors(BB)) { + if (VisitedBlocks.count(Succ)) + continue; + + CatchHandler *Action = findCatchHandler(Succ, NextBB, VisitedBlocks); + if (Action) + return Action; + } + return nullptr; +} + +// These are helper functions to combine repeated code from findCleanupHandlers. +static void createCleanupHandler(LandingPadActions &Actions, + CleanupHandlerMapTy &CleanupHandlerMap, + BasicBlock *BB) { + CleanupHandler *Action = new CleanupHandler(BB); + CleanupHandlerMap[BB] = Action; + Actions.insertCleanupHandler(Action); + DEBUG(dbgs() << " Found cleanup code in block " + << Action->getStartBlock()->getName() << "\n"); +} + +static CallSite matchOutlinedFinallyCall(BasicBlock *BB, + Instruction *MaybeCall) { + // Look for finally blocks that Clang has already outlined for us. + // %fp = call i8* @llvm.frameaddress(i32 0) + // call void @"fin$parent"(iN 1, i8* %fp) + if (isFrameAddressCall(MaybeCall) && MaybeCall != BB->getTerminator()) + MaybeCall = MaybeCall->getNextNode(); + CallSite FinallyCall(MaybeCall); + if (!FinallyCall || FinallyCall.arg_size() != 2) + return CallSite(); + if (!match(FinallyCall.getArgument(0), m_SpecificInt(1))) + return CallSite(); + if (!isFrameAddressCall(FinallyCall.getArgument(1))) + return CallSite(); + return FinallyCall; +} + +static BasicBlock *followSingleUnconditionalBranches(BasicBlock *BB) { + // Skip single ubr blocks. + while (BB->getFirstNonPHIOrDbg() == BB->getTerminator()) { + auto *Br = dyn_cast<BranchInst>(BB->getTerminator()); + if (Br && Br->isUnconditional()) + BB = Br->getSuccessor(0); + else + return BB; + } + return BB; +} + +// This function searches starting with the input block for the next block that +// contains code that is not part of a catch handler and would not be eliminated +// during handler outlining. +// +void WinEHPrepare::findCleanupHandlers(LandingPadActions &Actions, + BasicBlock *StartBB, BasicBlock *EndBB) { + // Here we will skip over the following: + // + // landing pad prolog: + // + // Unconditional branches + // + // Selector dispatch + // + // Resume pattern + // + // Anything else marks the start of an interesting block + + BasicBlock *BB = StartBB; + // Anything other than an unconditional branch will kick us out of this loop + // one way or another. + while (BB) { + BB = followSingleUnconditionalBranches(BB); + // If we've already scanned this block, don't scan it again. If it is + // a cleanup block, there will be an action in the CleanupHandlerMap. + // If we've scanned it and it is not a cleanup block, there will be a + // nullptr in the CleanupHandlerMap. If we have not scanned it, there will + // be no entry in the CleanupHandlerMap. We must call count() first to + // avoid creating a null entry for blocks we haven't scanned. + if (CleanupHandlerMap.count(BB)) { + if (auto *Action = CleanupHandlerMap[BB]) { + Actions.insertCleanupHandler(Action); + DEBUG(dbgs() << " Found cleanup code in block " + << Action->getStartBlock()->getName() << "\n"); + // FIXME: This cleanup might chain into another, and we need to discover + // that. + return; + } else { + // Here we handle the case where the cleanup handler map contains a + // value for this block but the value is a nullptr. This means that + // we have previously analyzed the block and determined that it did + // not contain any cleanup code. Based on the earlier analysis, we + // know the the block must end in either an unconditional branch, a + // resume or a conditional branch that is predicated on a comparison + // with a selector. Either the resume or the selector dispatch + // would terminate the search for cleanup code, so the unconditional + // branch is the only case for which we might need to continue + // searching. + BasicBlock *SuccBB = followSingleUnconditionalBranches(BB); + if (SuccBB == BB || SuccBB == EndBB) + return; + BB = SuccBB; + continue; + } + } + + // Create an entry in the cleanup handler map for this block. Initially + // we create an entry that says this isn't a cleanup block. If we find + // cleanup code, the caller will replace this entry. + CleanupHandlerMap[BB] = nullptr; + + TerminatorInst *Terminator = BB->getTerminator(); + + // Landing pad blocks have extra instructions we need to accept. + LandingPadMap *LPadMap = nullptr; + if (BB->isLandingPad()) { + LandingPadInst *LPad = BB->getLandingPadInst(); + LPadMap = &LPadMaps[LPad]; + if (!LPadMap->isInitialized()) + LPadMap->mapLandingPad(LPad); + } + + // Look for the bare resume pattern: + // %lpad.val1 = insertvalue { i8*, i32 } undef, i8* %exn, 0 + // %lpad.val2 = insertvalue { i8*, i32 } %lpad.val1, i32 %sel, 1 + // resume { i8*, i32 } %lpad.val2 + if (auto *Resume = dyn_cast<ResumeInst>(Terminator)) { + InsertValueInst *Insert1 = nullptr; + InsertValueInst *Insert2 = nullptr; + Value *ResumeVal = Resume->getOperand(0); + // If the resume value isn't a phi or landingpad value, it should be a + // series of insertions. Identify them so we can avoid them when scanning + // for cleanups. + if (!isa<PHINode>(ResumeVal) && !isa<LandingPadInst>(ResumeVal)) { + Insert2 = dyn_cast<InsertValueInst>(ResumeVal); + if (!Insert2) + return createCleanupHandler(Actions, CleanupHandlerMap, BB); + Insert1 = dyn_cast<InsertValueInst>(Insert2->getAggregateOperand()); + if (!Insert1) + return createCleanupHandler(Actions, CleanupHandlerMap, BB); + } + for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end(); + II != IE; ++II) { + Instruction *Inst = II; + if (LPadMap && LPadMap->isLandingPadSpecificInst(Inst)) + continue; + if (Inst == Insert1 || Inst == Insert2 || Inst == Resume) + continue; + if (!Inst->hasOneUse() || + (Inst->user_back() != Insert1 && Inst->user_back() != Insert2)) { + return createCleanupHandler(Actions, CleanupHandlerMap, BB); + } + } + return; + } + + BranchInst *Branch = dyn_cast<BranchInst>(Terminator); + if (Branch && Branch->isConditional()) { + // Look for the selector dispatch. + // %2 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIf to i8*)) + // %matches = icmp eq i32 %sel, %2 + // br i1 %matches, label %catch14, label %eh.resume + CmpInst *Compare = dyn_cast<CmpInst>(Branch->getCondition()); + if (!Compare || !Compare->isEquality()) + return createCleanupHandler(Actions, CleanupHandlerMap, BB); + for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end(); + II != IE; ++II) { + Instruction *Inst = II; + if (LPadMap && LPadMap->isLandingPadSpecificInst(Inst)) + continue; + if (Inst == Compare || Inst == Branch) + continue; + if (match(Inst, m_Intrinsic<Intrinsic::eh_typeid_for>())) + continue; + return createCleanupHandler(Actions, CleanupHandlerMap, BB); + } + // The selector dispatch block should always terminate our search. + assert(BB == EndBB); + return; + } + + if (isAsynchronousEHPersonality(Personality)) { + // If this is a landingpad block, split the block at the first non-landing + // pad instruction. + Instruction *MaybeCall = BB->getFirstNonPHIOrDbg(); + if (LPadMap) { + while (MaybeCall != BB->getTerminator() && + LPadMap->isLandingPadSpecificInst(MaybeCall)) + MaybeCall = MaybeCall->getNextNode(); + } + + // Look for outlined finally calls. + if (CallSite FinallyCall = matchOutlinedFinallyCall(BB, MaybeCall)) { + Function *Fin = FinallyCall.getCalledFunction(); + assert(Fin && "outlined finally call should be direct"); + auto *Action = new CleanupHandler(BB); + Action->setHandlerBlockOrFunc(Fin); + Actions.insertCleanupHandler(Action); + CleanupHandlerMap[BB] = Action; + DEBUG(dbgs() << " Found frontend-outlined finally call to " + << Fin->getName() << " in block " + << Action->getStartBlock()->getName() << "\n"); + + // Split the block if there were more interesting instructions and look + // for finally calls in the normal successor block. + BasicBlock *SuccBB = BB; + if (FinallyCall.getInstruction() != BB->getTerminator() && + FinallyCall.getInstruction()->getNextNode() != + BB->getTerminator()) { + SuccBB = + SplitBlock(BB, FinallyCall.getInstruction()->getNextNode(), DT); + } else { + if (FinallyCall.isInvoke()) { + SuccBB = + cast<InvokeInst>(FinallyCall.getInstruction())->getNormalDest(); + } else { + SuccBB = BB->getUniqueSuccessor(); + assert(SuccBB && + "splitOutlinedFinallyCalls didn't insert a branch"); + } + } + BB = SuccBB; + if (BB == EndBB) + return; + continue; + } + } + + // Anything else is either a catch block or interesting cleanup code. + for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end(); + II != IE; ++II) { + Instruction *Inst = II; + if (LPadMap && LPadMap->isLandingPadSpecificInst(Inst)) + continue; + // Unconditional branches fall through to this loop. + if (Inst == Branch) + continue; + // If this is a catch block, there is no cleanup code to be found. + if (match(Inst, m_Intrinsic<Intrinsic::eh_begincatch>())) + return; + // If this a nested landing pad, it may contain an endcatch call. + if (match(Inst, m_Intrinsic<Intrinsic::eh_endcatch>())) + return; + // Anything else makes this interesting cleanup code. + return createCleanupHandler(Actions, CleanupHandlerMap, BB); + } + + // Only unconditional branches in empty blocks should get this far. + assert(Branch && Branch->isUnconditional()); + if (BB == EndBB) + return; + BB = Branch->getSuccessor(0); + } +} + +// This is a public function, declared in WinEHFuncInfo.h and is also +// referenced by WinEHNumbering in FunctionLoweringInfo.cpp. +void llvm::parseEHActions( + const IntrinsicInst *II, + SmallVectorImpl<std::unique_ptr<ActionHandler>> &Actions) { + for (unsigned I = 0, E = II->getNumArgOperands(); I != E;) { + uint64_t ActionKind = + cast<ConstantInt>(II->getArgOperand(I))->getZExtValue(); + if (ActionKind == /*catch=*/1) { + auto *Selector = cast<Constant>(II->getArgOperand(I + 1)); + ConstantInt *EHObjIndex = cast<ConstantInt>(II->getArgOperand(I + 2)); + int64_t EHObjIndexVal = EHObjIndex->getSExtValue(); + Constant *Handler = cast<Constant>(II->getArgOperand(I + 3)); + I += 4; + auto CH = make_unique<CatchHandler>(/*BB=*/nullptr, Selector, + /*NextBB=*/nullptr); + CH->setHandlerBlockOrFunc(Handler); + CH->setExceptionVarIndex(EHObjIndexVal); + Actions.push_back(std::move(CH)); + } else if (ActionKind == 0) { + Constant *Handler = cast<Constant>(II->getArgOperand(I + 1)); + I += 2; + auto CH = make_unique<CleanupHandler>(/*BB=*/nullptr); + CH->setHandlerBlockOrFunc(Handler); + Actions.push_back(std::move(CH)); + } else { + llvm_unreachable("Expected either a catch or cleanup handler!"); + } + } + std::reverse(Actions.begin(), Actions.end()); +} |