diff options
Diffstat (limited to 'contrib/llvm/lib/CodeGen')
196 files changed, 19958 insertions, 7727 deletions
diff --git a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp index a736884..bb90861 100644 --- a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -161,8 +161,8 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { // Mark live-out callee-saved registers. In a return block this is // all callee-saved registers. In non-return this is any // callee-saved register that is not saved in the prolog. - const MachineFrameInfo *MFI = MF.getFrameInfo(); - BitVector Pristine = MFI->getPristineRegs(MF); + const MachineFrameInfo &MFI = MF.getFrameInfo(); + BitVector Pristine = MFI.getPristineRegs(MF); for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) { unsigned Reg = *I; if (!IsReturnBlock && !Pristine.test(Reg)) continue; diff --git a/contrib/llvm/lib/CodeGen/AllocationOrder.cpp b/contrib/llvm/lib/CodeGen/AllocationOrder.cpp index 40451c0..d840a2f 100644 --- a/contrib/llvm/lib/CodeGen/AllocationOrder.cpp +++ b/contrib/llvm/lib/CodeGen/AllocationOrder.cpp @@ -48,7 +48,7 @@ AllocationOrder::AllocationOrder(unsigned VirtReg, }); #ifndef NDEBUG for (unsigned I = 0, E = Hints.size(); I != E; ++I) - assert(std::find(Order.begin(), Order.end(), Hints[I]) != Order.end() && + assert(is_contained(Order, Hints[I]) && "Target hint is outside allocation order."); #endif } diff --git a/contrib/llvm/lib/CodeGen/AllocationOrder.h b/contrib/llvm/lib/CodeGen/AllocationOrder.h index 2aee3a6..8223a52 100644 --- a/contrib/llvm/lib/CodeGen/AllocationOrder.h +++ b/contrib/llvm/lib/CodeGen/AllocationOrder.h @@ -18,6 +18,7 @@ #define LLVM_LIB_CODEGEN_ALLOCATIONORDER_H #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/MC/MCRegisterInfo.h" namespace llvm { @@ -79,9 +80,7 @@ public: bool isHint() const { return Pos <= 0; } /// Return true if PhysReg is a preferred register. - bool isHint(unsigned PhysReg) const { - return std::find(Hints.begin(), Hints.end(), PhysReg) != Hints.end(); - } + bool isHint(unsigned PhysReg) const { return is_contained(Hints, PhysReg); } }; } // end namespace llvm diff --git a/contrib/llvm/lib/CodeGen/Analysis.cpp b/contrib/llvm/lib/CodeGen/Analysis.cpp index d690734..79ecc43 100644 --- a/contrib/llvm/lib/CodeGen/Analysis.cpp +++ b/contrib/llvm/lib/CodeGen/Analysis.cpp @@ -272,28 +272,10 @@ static const Value *getNoopInput(const Value *V, TLI.allowTruncateForTailCall(Op->getType(), I->getType())) { DataBits = std::min(DataBits, I->getType()->getPrimitiveSizeInBits()); NoopInput = Op; - } else if (isa<CallInst>(I)) { - // Look through call (skipping callee) - for (User::const_op_iterator i = I->op_begin(), e = I->op_end() - 1; - i != e; ++i) { - unsigned attrInd = i - I->op_begin() + 1; - if (cast<CallInst>(I)->paramHasAttr(attrInd, Attribute::Returned) && - isNoopBitcast((*i)->getType(), I->getType(), TLI)) { - NoopInput = *i; - break; - } - } - } else if (isa<InvokeInst>(I)) { - // Look through invoke (skipping BB, BB, Callee) - for (User::const_op_iterator i = I->op_begin(), e = I->op_end() - 3; - i != e; ++i) { - unsigned attrInd = i - I->op_begin() + 1; - if (cast<InvokeInst>(I)->paramHasAttr(attrInd, Attribute::Returned) && - isNoopBitcast((*i)->getType(), I->getType(), TLI)) { - NoopInput = *i; - break; - } - } + } else if (auto CS = ImmutableCallSite(I)) { + const Value *ReturnedOp = CS.getReturnedArgOperand(); + if (ReturnedOp && isNoopBitcast(ReturnedOp->getType(), I->getType(), TLI)) + NoopInput = ReturnedOp; } else if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(V)) { // Value may come from either the aggregate or the scalar ArrayRef<unsigned> InsertLoc = IVI->getIndices(); @@ -525,19 +507,15 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, const TargetMachine &TM) { F, I, Ret, *TM.getSubtargetImpl(*F)->getTargetLowering()); } -bool llvm::returnTypeIsEligibleForTailCall(const Function *F, - const Instruction *I, - const ReturnInst *Ret, - const TargetLoweringBase &TLI) { - // If the block ends with a void return or unreachable, it doesn't matter - // what the call's return type is. - if (!Ret || Ret->getNumOperands() == 0) return true; +bool llvm::attributesPermitTailCall(const Function *F, const Instruction *I, + const ReturnInst *Ret, + const TargetLoweringBase &TLI, + bool *AllowDifferingSizes) { + // ADS may be null, so don't write to it directly. + bool DummyADS; + bool &ADS = AllowDifferingSizes ? *AllowDifferingSizes : DummyADS; + ADS = true; - // If the return value is undef, it doesn't matter what the call's - // return type is. - if (isa<UndefValue>(Ret->getOperand(0))) return true; - - // Make sure the attributes attached to each return are compatible. AttrBuilder CallerAttrs(F->getAttributes(), AttributeSet::ReturnIndex); AttrBuilder CalleeAttrs(cast<CallInst>(I)->getAttributes(), @@ -545,22 +523,21 @@ bool llvm::returnTypeIsEligibleForTailCall(const Function *F, // Noalias is completely benign as far as calling convention goes, it // shouldn't affect whether the call is a tail call. - CallerAttrs = CallerAttrs.removeAttribute(Attribute::NoAlias); - CalleeAttrs = CalleeAttrs.removeAttribute(Attribute::NoAlias); + CallerAttrs.removeAttribute(Attribute::NoAlias); + CalleeAttrs.removeAttribute(Attribute::NoAlias); - bool AllowDifferingSizes = true; if (CallerAttrs.contains(Attribute::ZExt)) { if (!CalleeAttrs.contains(Attribute::ZExt)) return false; - AllowDifferingSizes = false; + ADS = false; CallerAttrs.removeAttribute(Attribute::ZExt); CalleeAttrs.removeAttribute(Attribute::ZExt); } else if (CallerAttrs.contains(Attribute::SExt)) { if (!CalleeAttrs.contains(Attribute::SExt)) return false; - AllowDifferingSizes = false; + ADS = false; CallerAttrs.removeAttribute(Attribute::SExt); CalleeAttrs.removeAttribute(Attribute::SExt); } @@ -568,7 +545,24 @@ bool llvm::returnTypeIsEligibleForTailCall(const Function *F, // If they're still different, there's some facet we don't understand // (currently only "inreg", but in future who knows). It may be OK but the // only safe option is to reject the tail call. - if (CallerAttrs != CalleeAttrs) + return CallerAttrs == CalleeAttrs; +} + +bool llvm::returnTypeIsEligibleForTailCall(const Function *F, + const Instruction *I, + const ReturnInst *Ret, + const TargetLoweringBase &TLI) { + // If the block ends with a void return or unreachable, it doesn't matter + // what the call's return type is. + if (!Ret || Ret->getNumOperands() == 0) return true; + + // If the return value is undef, it doesn't matter what the call's + // return type is. + if (isa<UndefValue>(Ret->getOperand(0))) return true; + + // Make sure the attributes attached to each return are compatible. + bool AllowDifferingSizes; + if (!attributesPermitTailCall(F, I, Ret, TLI, &AllowDifferingSizes)) return false; const Value *RetVal = Ret->getOperand(0), *CallVal = I; @@ -672,7 +666,7 @@ llvm::getFuncletMembership(const MachineFunction &MF) { DenseMap<const MachineBasicBlock *, int> FuncletMembership; // We don't have anything to do if there aren't any EH pads. - if (!MF.getMMI().hasEHFunclets()) + if (!MF.hasEHFunclets()) return FuncletMembership; int EntryBBNumber = MF.front().getNumber(); @@ -694,9 +688,10 @@ llvm::getFuncletMembership(const MachineFunction &MF) { } MachineBasicBlock::const_iterator MBBI = MBB.getFirstTerminator(); + // CatchPads are not funclets for SEH so do not consider CatchRet to // transfer control to another funclet. - if (MBBI->getOpcode() != TII->getCatchReturnOpcode()) + if (MBBI == MBB.end() || MBBI->getOpcode() != TII->getCatchReturnOpcode()) continue; // FIXME: SEH CatchPads are not necessarily in the parent function: diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp index 5294c98..61149d9 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp @@ -43,13 +43,6 @@ ARMTargetStreamer &ARMException::getTargetStreamer() { return static_cast<ARMTargetStreamer &>(TS); } -/// endModule - Emit all exception information that should come after the -/// content. -void ARMException::endModule() { - if (shouldEmitCFI) - Asm->OutStreamer->EmitCFISections(false, true); -} - void ARMException::beginFunction(const MachineFunction *MF) { if (Asm->MAI->getExceptionHandlingType() == ExceptionHandling::ARM) getTargetStreamer().emitFnStart(); @@ -57,7 +50,14 @@ void ARMException::beginFunction(const MachineFunction *MF) { AsmPrinter::CFIMoveType MoveType = Asm->needsCFIMoves(); assert(MoveType != AsmPrinter::CFI_M_EH && "non-EH CFI not yet supported in prologue with EHABI lowering"); + if (MoveType == AsmPrinter::CFI_M_Debug) { + if (!hasEmittedCFISections) { + if (Asm->needsOnlyDebugCFIMoves()) + Asm->OutStreamer->EmitCFISections(false, true); + hasEmittedCFISections = true; + } + shouldEmitCFI = true; Asm->OutStreamer->EmitCFIStartProc(false); } @@ -75,7 +75,7 @@ void ARMException::endFunction(const MachineFunction *MF) { F->hasPersonalityFn() && !isNoOpWithoutInvoke(classifyEHPersonality(Per)) && F->needsUnwindTableEntry(); bool shouldEmitPersonality = forceEmitPersonality || - !MMI->getLandingPads().empty(); + !MF->getLandingPads().empty(); if (!Asm->MF->getFunction()->needsUnwindTableEntry() && !shouldEmitPersonality) ATS.emitCantUnwind(); @@ -99,8 +99,9 @@ void ARMException::endFunction(const MachineFunction *MF) { } void ARMException::emitTypeInfos(unsigned TTypeEncoding) { - const std::vector<const GlobalValue *> &TypeInfos = MMI->getTypeInfos(); - const std::vector<unsigned> &FilterIds = MMI->getFilterIds(); + const MachineFunction *MF = Asm->MF; + const std::vector<const GlobalValue *> &TypeInfos = MF->getTypeInfos(); + const std::vector<unsigned> &FilterIds = MF->getFilterIds(); bool VerboseAsm = Asm->OutStreamer->isVerboseAsm(); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp index 8c68383..ec552e0 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp @@ -14,8 +14,6 @@ using namespace llvm; -class MCExpr; - unsigned AddressPool::getIndex(const MCSymbol *Sym, bool TLS) { HasBeenUsed = true; auto IterBool = diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 272bace..24fdbfc 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -37,6 +37,8 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCSection.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/MCValue.h" @@ -55,10 +57,15 @@ using namespace llvm; #define DEBUG_TYPE "asm-printer" -static const char *const DWARFGroupName = "DWARF Emission"; -static const char *const DbgTimerName = "Debug Info Emission"; -static const char *const EHTimerName = "DWARF Exception Writer"; -static const char *const CodeViewLineTablesGroupName = "CodeView Line Tables"; +static const char *const DWARFGroupName = "dwarf"; +static const char *const DWARFGroupDescription = "DWARF Emission"; +static const char *const DbgTimerName = "emit"; +static const char *const DbgTimerDescription = "Debug Info Emission"; +static const char *const EHTimerName = "write_exception"; +static const char *const EHTimerDescription = "DWARF Exception Writer"; +static const char *const CodeViewLineTablesGroupName = "linetables"; +static const char *const CodeViewLineTablesGroupDescription = + "CodeView Line Tables"; STATISTIC(EmittedInsts, "Number of machine instrs printed"); @@ -101,7 +108,7 @@ static unsigned getGVAlignmentLog2(const GlobalValue *GV, const DataLayout &DL, AsmPrinter::AsmPrinter(TargetMachine &tm, std::unique_ptr<MCStreamer> Streamer) : MachineFunctionPass(ID), TM(tm), MAI(tm.getMCAsmInfo()), OutContext(Streamer->getContext()), OutStreamer(std::move(Streamer)), - LastMI(nullptr), LastFn(0), Counter(~0U) { + isCFIMoveForDebugging(false), LastMI(nullptr), LastFn(0), Counter(~0U) { DD = nullptr; MMI = nullptr; LI = nullptr; @@ -143,7 +150,7 @@ const DataLayout &AsmPrinter::getDataLayout() const { } // Do not use the cached DataLayout because some client use it without a Module -// (llmv-dsymutil, llvm-dwarfdump). +// (llvm-dsymutil, llvm-dwarfdump). unsigned AsmPrinter::getPointerSize() const { return TM.getPointerSize(); } const MCSubtargetInfo &AsmPrinter::getSubtargetInfo() const { @@ -155,17 +162,11 @@ void AsmPrinter::EmitToStreamer(MCStreamer &S, const MCInst &Inst) { S.EmitInstruction(Inst, getSubtargetInfo()); } -StringRef AsmPrinter::getTargetTriple() const { - return TM.getTargetTriple().str(); -} - /// getCurrentSection() - Return the current section we are emitting to. const MCSection *AsmPrinter::getCurrentSection() const { - return OutStreamer->getCurrentSection().first; + return OutStreamer->getCurrentSectionOnly(); } - - void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); MachineFunctionPass::getAnalysisUsage(AU); @@ -184,8 +185,6 @@ bool AsmPrinter::doInitialization(Module &M) { OutStreamer->InitSections(false); - Mang = new Mangler(); - // Emit the version-min deplyment target directive if needed. // // FIXME: If we end up with a collection of these sorts of Darwin-specific @@ -194,7 +193,7 @@ bool AsmPrinter::doInitialization(Module &M) { // alternative is duplicated code in each of the target asm printers that // use the directive, where it would need the same conditionalization // anyway. - Triple TT(getTargetTriple()); + const Triple &TT = TM.getTargetTriple(); // If there is a version specified, Major will be non-zero. if (TT.isOSDarwin() && TT.getOSMajorVersion() != 0) { unsigned Major, Minor, Update; @@ -250,18 +249,43 @@ bool AsmPrinter::doInitialization(Module &M) { if (MAI->doesSupportDebugInformation()) { bool EmitCodeView = MMI->getModule()->getCodeViewFlag(); - if (EmitCodeView && TM.getTargetTriple().isKnownWindowsMSVCEnvironment()) { + if (EmitCodeView && (TM.getTargetTriple().isKnownWindowsMSVCEnvironment() || + TM.getTargetTriple().isWindowsItaniumEnvironment())) { Handlers.push_back(HandlerInfo(new CodeViewDebug(this), - DbgTimerName, - CodeViewLineTablesGroupName)); + DbgTimerName, DbgTimerDescription, + CodeViewLineTablesGroupName, + CodeViewLineTablesGroupDescription)); } if (!EmitCodeView || MMI->getModule()->getDwarfVersion()) { DD = new DwarfDebug(this, &M); DD->beginModule(); - Handlers.push_back(HandlerInfo(DD, DbgTimerName, DWARFGroupName)); + Handlers.push_back(HandlerInfo(DD, DbgTimerName, DbgTimerDescription, + DWARFGroupName, DWARFGroupDescription)); } } + switch (MAI->getExceptionHandlingType()) { + case ExceptionHandling::SjLj: + case ExceptionHandling::DwarfCFI: + case ExceptionHandling::ARM: + isCFIMoveForDebugging = true; + if (MAI->getExceptionHandlingType() != ExceptionHandling::DwarfCFI) + break; + for (auto &F: M.getFunctionList()) { + // If the module contains any function with unwind data, + // .eh_frame has to be emitted. + // Ignore functions that won't get emitted. + if (!F.isDeclarationForLinker() && F.needsUnwindTableEntry()) { + isCFIMoveForDebugging = false; + break; + } + } + break; + default: + isCFIMoveForDebugging = false; + break; + } + EHStreamer *ES = nullptr; switch (MAI->getExceptionHandlingType()) { case ExceptionHandling::None: @@ -286,7 +310,8 @@ bool AsmPrinter::doInitialization(Module &M) { break; } if (ES) - Handlers.push_back(HandlerInfo(ES, EHTimerName, DWARFGroupName)); + Handlers.push_back(HandlerInfo(ES, EHTimerName, EHTimerDescription, + DWARFGroupName, DWARFGroupDescription)); return false; } @@ -340,11 +365,11 @@ void AsmPrinter::EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const { void AsmPrinter::getNameWithPrefix(SmallVectorImpl<char> &Name, const GlobalValue *GV) const { - TM.getNameWithPrefix(Name, GV, *Mang); + TM.getNameWithPrefix(Name, GV, getObjFileLowering().getMangler()); } MCSymbol *AsmPrinter::getSymbol(const GlobalValue *GV) const { - return TM.getSymbol(GV, *Mang); + return TM.getSymbol(GV); } /// EmitGlobalVariable - Emit the specified global variable to the .s file. @@ -407,7 +432,9 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { unsigned AlignLog = getGVAlignmentLog2(GV, DL); for (const HandlerInfo &HI : Handlers) { - NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, TimePassesIsEnabled); + NamedRegionTimer T(HI.TimerName, HI.TimerDescription, + HI.TimerGroupName, HI.TimerGroupDescription, + TimePassesIsEnabled); HI.Handler->setSymbolSize(GVSym, Size); } @@ -424,8 +451,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { } // Determine to which section this global should be emitted. - MCSection *TheSection = - getObjFileLowering().SectionForGlobal(GV, GVKind, *Mang, TM); + MCSection *TheSection = getObjFileLowering().SectionForGlobal(GV, GVKind, TM); // If we have a bss global going to a section that supports the // zerofill directive, do so here. @@ -483,7 +509,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { if (GVKind.isThreadLocal() && MAI->hasMachoTBSSDirective()) { // Emit the .tbss symbol MCSymbol *MangSym = - OutContext.getOrCreateSymbol(GVSym->getName() + Twine("$tlv$init")); + OutContext.getOrCreateSymbol(GVSym->getName() + Twine("$tlv$init")); if (GVKind.isThreadBSS()) { TheSection = getObjFileLowering().getTLSBSSSection(); @@ -535,12 +561,21 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { if (MAI->hasDotTypeDotSizeDirective()) // .size foo, 42 - OutStreamer->emitELFSize(cast<MCSymbolELF>(EmittedInitSym), + OutStreamer->emitELFSize(EmittedInitSym, MCConstantExpr::create(Size, OutContext)); OutStreamer->AddBlankLine(); } +/// Emit the directive and value for debug thread local expression +/// +/// \p Value - The value to emit. +/// \p Size - The size of the integer (in bytes) to emit. +void AsmPrinter::EmitDebugValue(const MCExpr *Value, + unsigned Size) const { + OutStreamer->EmitValue(Value, Size); +} + /// EmitFunctionHeader - This method emits the header for the current /// function. void AsmPrinter::EmitFunctionHeader() { @@ -550,8 +585,7 @@ void AsmPrinter::EmitFunctionHeader() { // Print the 'header' of function. const Function *F = MF->getFunction(); - OutStreamer->SwitchSection( - getObjFileLowering().SectionForGlobal(F, *Mang, TM)); + OutStreamer->SwitchSection(getObjFileLowering().SectionForGlobal(F, TM)); EmitVisibility(CurrentFnSym, F->getVisibility()); EmitLinkage(F, CurrentFnSym); @@ -598,7 +632,8 @@ void AsmPrinter::EmitFunctionHeader() { // Emit pre-function debug and/or EH information. for (const HandlerInfo &HI : Handlers) { - NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, TimePassesIsEnabled); + NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName, + HI.TimerGroupDescription, TimePassesIsEnabled); HI.Handler->beginFunction(MF); } @@ -632,26 +667,26 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) { // Check for spills and reloads int FI; - const MachineFrameInfo *FrameInfo = MF->getFrameInfo(); + const MachineFrameInfo &MFI = MF->getFrameInfo(); // We assume a single instruction only has a spill or reload, not // both. const MachineMemOperand *MMO; if (TII->isLoadFromStackSlotPostFE(MI, FI)) { - if (FrameInfo->isSpillSlotObjectIndex(FI)) { + if (MFI.isSpillSlotObjectIndex(FI)) { MMO = *MI.memoperands_begin(); CommentOS << MMO->getSize() << "-byte Reload\n"; } } else if (TII->hasLoadFromStackSlot(MI, MMO, FI)) { - if (FrameInfo->isSpillSlotObjectIndex(FI)) + if (MFI.isSpillSlotObjectIndex(FI)) CommentOS << MMO->getSize() << "-byte Folded Reload\n"; } else if (TII->isStoreToStackSlotPostFE(MI, FI)) { - if (FrameInfo->isSpillSlotObjectIndex(FI)) { + if (MFI.isSpillSlotObjectIndex(FI)) { MMO = *MI.memoperands_begin(); CommentOS << MMO->getSize() << "-byte Spill\n"; } } else if (TII->hasStoreToStackSlot(MI, MMO, FI)) { - if (FrameInfo->isSpillSlotObjectIndex(FI)) + if (MFI.isSpillSlotObjectIndex(FI)) CommentOS << MMO->getSize() << "-byte Folded Spill\n"; } @@ -711,9 +746,10 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { OS << V->getName(); const DIExpression *Expr = MI->getDebugExpression(); - if (Expr->isBitPiece()) - OS << " [bit_piece offset=" << Expr->getBitPieceOffset() - << " size=" << Expr->getBitPieceSize() << "]"; + auto Fragment = Expr->getFragmentInfo(); + if (Fragment) + OS << " [fragment offset=" << Fragment->OffsetInBits + << " size=" << Fragment->SizeInBits << "]"; OS << " <- "; // The second operand is only an offset if it's an immediate. @@ -721,21 +757,21 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { int64_t Offset = Deref ? MI->getOperand(1).getImm() : 0; for (unsigned i = 0; i < Expr->getNumElements(); ++i) { - if (Deref) { + uint64_t Op = Expr->getElement(i); + if (Op == dwarf::DW_OP_LLVM_fragment) { + // There can't be any operands after this in a valid expression + break; + } else if (Deref) { // We currently don't support extra Offsets or derefs after the first // one. Bail out early instead of emitting an incorrect comment OS << " [complex expression]"; AP.OutStreamer->emitRawComment(OS.str()); return true; - } - uint64_t Op = Expr->getElement(i); - if (Op == dwarf::DW_OP_deref) { + } else if (Op == dwarf::DW_OP_deref) { Deref = true; continue; - } else if (Op == dwarf::DW_OP_bit_piece) { - // There can't be any operands after this in a valid expression - break; } + uint64_t ExtraOffset = Expr->getElement(i++); if (Op == dwarf::DW_OP_plus) Offset += ExtraOffset; @@ -756,7 +792,7 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { // There is no good way to print long double. Convert a copy to // double. Ah well, it's only a comment. bool ignored; - APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, + APF.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &ignored); OS << "(long double) " << APF.convertToDouble(); } @@ -819,8 +855,7 @@ void AsmPrinter::emitCFIInstruction(const MachineInstr &MI) { if (needsCFIMoves() == CFI_M_None) return; - const MachineModuleInfo &MMI = MF->getMMI(); - const std::vector<MCCFIInstruction> &Instrs = MMI.getFrameInstructions(); + const std::vector<MCCFIInstruction> &Instrs = MF->getFrameInstructions(); unsigned CFIIndex = MI.getOperand(0).getCFIIndex(); const MCCFIInstruction &CFI = Instrs[CFIIndex]; emitCFIInstruction(CFI); @@ -862,7 +897,8 @@ void AsmPrinter::EmitFunctionBody() { if (ShouldPrintDebugScopes) { for (const HandlerInfo &HI : Handlers) { - NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, + NamedRegionTimer T(HI.TimerName, HI.TimerDescription, + HI.TimerGroupName, HI.TimerGroupDescription, TimePassesIsEnabled); HI.Handler->beginInstruction(&MI); } @@ -906,7 +942,8 @@ void AsmPrinter::EmitFunctionBody() { if (ShouldPrintDebugScopes) { for (const HandlerInfo &HI : Handlers) { - NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, + NamedRegionTimer T(HI.TimerName, HI.TimerDescription, + HI.TimerGroupName, HI.TimerGroupDescription, TimePassesIsEnabled); HI.Handler->endInstruction(); } @@ -944,8 +981,8 @@ void AsmPrinter::EmitFunctionBody() { // Emit target-specific gunk after the function body. EmitFunctionBodyEnd(); - if (!MMI->getLandingPads().empty() || MMI->hasDebugInfo() || - MMI->hasEHFunclets() || MAI->hasDotTypeDotSizeDirective()) { + if (!MF->getLandingPads().empty() || MMI->hasDebugInfo() || + MF->hasEHFunclets() || MAI->hasDotTypeDotSizeDirective()) { // Create a symbol for the end of function. CurrentFnEnd = createTempSymbol("func_end"); OutStreamer->EmitLabel(CurrentFnEnd); @@ -959,12 +996,12 @@ void AsmPrinter::EmitFunctionBody() { const MCExpr *SizeExp = MCBinaryExpr::createSub( MCSymbolRefExpr::create(CurrentFnEnd, OutContext), MCSymbolRefExpr::create(CurrentFnSymForSize, OutContext), OutContext); - if (auto Sym = dyn_cast<MCSymbolELF>(CurrentFnSym)) - OutStreamer->emitELFSize(Sym, SizeExp); + OutStreamer->emitELFSize(CurrentFnSym, SizeExp); } for (const HandlerInfo &HI : Handlers) { - NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, TimePassesIsEnabled); + NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName, + HI.TimerGroupDescription, TimePassesIsEnabled); HI.Handler->markFunctionEnd(); } @@ -973,10 +1010,10 @@ void AsmPrinter::EmitFunctionBody() { // Emit post-function debug and/or EH information. for (const HandlerInfo &HI : Handlers) { - NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, TimePassesIsEnabled); + NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName, + HI.TimerGroupDescription, TimePassesIsEnabled); HI.Handler->endFunction(MF); } - MMI->EndFunction(); OutStreamer->AddBlankLine(); } @@ -1100,8 +1137,7 @@ void AsmPrinter::emitGlobalIndirectSymbol(Module &M, (!BaseObject || BaseObject->hasPrivateLinkage())) { const DataLayout &DL = M.getDataLayout(); uint64_t Size = DL.getTypeAllocSize(GA->getValueType()); - OutStreamer->emitELFSize(cast<MCSymbolELF>(Name), - MCConstantExpr::create(Size, OutContext)); + OutStreamer->emitELFSize(Name, MCConstantExpr::create(Size, OutContext)); } } } @@ -1143,7 +1179,7 @@ bool AsmPrinter::doFinalization(Module &M) { SmallVector<Module::ModuleFlagEntry, 8> ModuleFlags; M.getModuleFlagsMetadata(ModuleFlags); if (!ModuleFlags.empty()) - TLOF.emitModuleFlags(*OutStreamer, ModuleFlags, *Mang, TM); + TLOF.emitModuleFlags(*OutStreamer, ModuleFlags, TM); if (TM.getTargetTriple().isOSBinFormatELF()) { MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>(); @@ -1164,8 +1200,8 @@ bool AsmPrinter::doFinalization(Module &M) { // Finalize debug and EH information. for (const HandlerInfo &HI : Handlers) { - NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, - TimePassesIsEnabled); + NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName, + HI.TimerGroupDescription, TimePassesIsEnabled); HI.Handler->endModule(); delete HI.Handler; } @@ -1246,7 +1282,6 @@ bool AsmPrinter::doFinalization(Module &M) { // after everything else has gone out. EmitEndOfAsmFile(M); - delete Mang; Mang = nullptr; MMI = nullptr; OutStreamer->Finish(); @@ -1269,8 +1304,8 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) { CurrentFnBegin = nullptr; CurExceptionSym = nullptr; bool NeedsLocalForSize = MAI->needsLocalForSize(); - if (!MMI->getLandingPads().empty() || MMI->hasDebugInfo() || - MMI->hasEHFunclets() || NeedsLocalForSize) { + if (!MF.getLandingPads().empty() || MMI->hasDebugInfo() || + MF.hasEHFunclets() || NeedsLocalForSize) { CurrentFnBegin = createTempSymbol("func_begin"); if (NeedsLocalForSize) CurrentFnSymForSize = CurrentFnBegin; @@ -1392,7 +1427,7 @@ void AsmPrinter::EmitJumpTableInfo() { *F); if (JTInDiffSection) { // Drop it in the readonly section. - MCSection *ReadOnlySection = TLOF.getSectionForJumpTable(*F, *Mang, TM); + MCSection *ReadOnlySection = TLOF.getSectionForJumpTable(*F, TM); OutStreamer->SwitchSection(ReadOnlySection); } @@ -1536,12 +1571,6 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) { EmitXXStructorList(GV->getParent()->getDataLayout(), GV->getInitializer(), /* isCtor */ true); - if (TM.getRelocationModel() == Reloc::Static && - MAI->hasStaticCtorDtorReferenceInStaticMode()) { - StringRef Sym(".constructors_used"); - OutStreamer->EmitSymbolAttribute(OutContext.getOrCreateSymbol(Sym), - MCSA_Reference); - } return true; } @@ -1549,12 +1578,6 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) { EmitXXStructorList(GV->getParent()->getDataLayout(), GV->getInitializer(), /* isCtor */ false); - if (TM.getRelocationModel() == Reloc::Static && - MAI->hasStaticCtorDtorReferenceInStaticMode()) { - StringRef Sym(".destructors_used"); - OutStreamer->EmitSymbolAttribute(OutContext.getOrCreateSymbol(Sym), - MCSA_Reference); - } return true; } @@ -1699,7 +1722,9 @@ void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset, unsigned Size, bool IsSectionRelative) const { if (MAI->needsDwarfSectionOffsetDirective() && IsSectionRelative) { - OutStreamer->EmitCOFFSecRel32(Label); + OutStreamer->EmitCOFFSecRel32(Label, Offset); + if (Size > 4) + OutStreamer->EmitZeros(Size - 4); return; } @@ -1764,7 +1789,7 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) { // If the code isn't optimized, there may be outstanding folding // opportunities. Attempt to fold the expression using DataLayout as a // last resort before giving up. - if (Constant *C = ConstantFoldConstantExpression(CE, getDataLayout())) + if (Constant *C = ConstantFoldConstant(CE, getDataLayout())) if (C != CE) return lowerConstant(C); @@ -1796,7 +1821,7 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) { // expression properly. This is important for differences between // blockaddress labels. Since the two labels are in the same function, it // is reasonable to treat their delta as a 32-bit value. - // FALL THROUGH. + LLVM_FALLTHROUGH; case Instruction::BitCast: return lowerConstant(CE->getOperand(0)); @@ -1843,8 +1868,8 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) { APInt RHSOffset; if (IsConstantOffsetFromGlobal(CE->getOperand(1), RHSGV, RHSOffset, getDataLayout())) { - const MCExpr *RelocExpr = getObjFileLowering().lowerRelativeReference( - LHSGV, RHSGV, *Mang, TM); + const MCExpr *RelocExpr = + getObjFileLowering().lowerRelativeReference(LHSGV, RHSGV, TM); if (!RelocExpr) RelocExpr = MCBinaryExpr::createSub( MCSymbolRefExpr::create(getSymbol(LHSGV), Ctx), @@ -2299,7 +2324,7 @@ static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *CV, // If the constant expression's size is greater than 64-bits, then we have // to emit the value in chunks. Try to constant fold the value and emit it // that way. - Constant *New = ConstantFoldConstantExpression(CE, DL); + Constant *New = ConstantFoldConstant(CE, DL); if (New && New != CE) return emitGlobalConstantImpl(DL, New, AP); } @@ -2385,8 +2410,7 @@ MCSymbol *AsmPrinter::GetJTSetSymbol(unsigned UID, unsigned MBBID) const { MCSymbol *AsmPrinter::getSymbolWithGlobalValueBase(const GlobalValue *GV, StringRef Suffix) const { - return getObjFileLowering().getSymbolWithGlobalValueBase(GV, Suffix, *Mang, - TM); + return getObjFileLowering().getSymbolWithGlobalValueBase(GV, Suffix, TM); } /// Return the MCSymbol for the specified ExternalSymbol. @@ -2599,12 +2623,12 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy &S) { if (GCPI != GCMap.end()) return GCPI->second.get(); - const char *Name = S.getName().c_str(); + auto Name = S.getName(); for (GCMetadataPrinterRegistry::iterator I = GCMetadataPrinterRegistry::begin(), E = GCMetadataPrinterRegistry::end(); I != E; ++I) - if (strcmp(Name, I->getName()) == 0) { + if (Name == I->getName()) { std::unique_ptr<GCMetadataPrinter> GMP = I->instantiate(); GMP->S = &S; auto IterBool = GCMap.insert(std::make_pair(&S, std::move(GMP))); @@ -2618,3 +2642,76 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy &S) { AsmPrinterHandler::~AsmPrinterHandler() {} void AsmPrinterHandler::markFunctionEnd() {} + +// In the binary's "xray_instr_map" section, an array of these function entries +// describes each instrumentation point. When XRay patches your code, the index +// into this table will be given to your handler as a patch point identifier. +void AsmPrinter::XRayFunctionEntry::emit(int Bytes, MCStreamer *Out, + const MCSymbol *CurrentFnSym) const { + Out->EmitSymbolValue(Sled, Bytes); + Out->EmitSymbolValue(CurrentFnSym, Bytes); + auto Kind8 = static_cast<uint8_t>(Kind); + Out->EmitBytes(StringRef(reinterpret_cast<const char *>(&Kind8), 1)); + Out->EmitBytes( + StringRef(reinterpret_cast<const char *>(&AlwaysInstrument), 1)); + Out->EmitZeros(2 * Bytes - 2); // Pad the previous two entries +} + +void AsmPrinter::emitXRayTable() { + if (Sleds.empty()) + return; + + auto PrevSection = OutStreamer->getCurrentSectionOnly(); + auto Fn = MF->getFunction(); + MCSection *Section = nullptr; + if (MF->getSubtarget().getTargetTriple().isOSBinFormatELF()) { + if (Fn->hasComdat()) { + Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS, + ELF::SHF_ALLOC | ELF::SHF_GROUP, 0, + Fn->getComdat()->getName()); + } else { + Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS, + ELF::SHF_ALLOC); + } + } else if (MF->getSubtarget().getTargetTriple().isOSBinFormatMachO()) { + Section = OutContext.getMachOSection("__DATA", "xray_instr_map", 0, + SectionKind::getReadOnlyWithRel()); + } else { + llvm_unreachable("Unsupported target"); + } + + // Before we switch over, we force a reference to a label inside the + // xray_instr_map section. Since this function is always called just + // before the function's end, we assume that this is happening after + // the last return instruction. + + auto WordSizeBytes = TM.getPointerSize(); + MCSymbol *Tmp = OutContext.createTempSymbol("xray_synthetic_", true); + OutStreamer->EmitCodeAlignment(16); + OutStreamer->EmitSymbolValue(Tmp, WordSizeBytes, false); + OutStreamer->SwitchSection(Section); + OutStreamer->EmitLabel(Tmp); + for (const auto &Sled : Sleds) + Sled.emit(WordSizeBytes, OutStreamer.get(), CurrentFnSym); + + OutStreamer->SwitchSection(PrevSection); + Sleds.clear(); +} + +void AsmPrinter::recordSled(MCSymbol *Sled, const MachineInstr &MI, + SledKind Kind) { + auto Fn = MI.getParent()->getParent()->getFunction(); + auto Attr = Fn->getFnAttribute("function-instrument"); + bool AlwaysInstrument = + Attr.isStringAttribute() && Attr.getValueAsString() == "xray-always"; + Sleds.emplace_back( + XRayFunctionEntry{ Sled, CurrentFnSym, Kind, AlwaysInstrument, Fn }); +} + +uint16_t AsmPrinter::getDwarfVersion() const { + return OutStreamer->getContext().getDwarfVersion(); +} + +void AsmPrinter::setDwarfVersion(uint16_t Version) { + OutStreamer->getContext().setDwarfVersion(Version); +} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index 60f40d0..0185c38 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -138,8 +138,7 @@ void AsmPrinter::EmitTTypeReference(const GlobalValue *GV, const TargetLoweringObjectFile &TLOF = getObjFileLowering(); const MCExpr *Exp = - TLOF.getTTypeGlobalReference(GV, Encoding, *Mang, TM, MMI, - *OutStreamer); + TLOF.getTTypeGlobalReference(GV, Encoding, TM, MMI, *OutStreamer); OutStreamer->EmitValue(Exp, GetSizeOfEncodedValue(Encoding)); } else OutStreamer->EmitIntValue(0, GetSizeOfEncodedValue(Encoding)); @@ -150,7 +149,7 @@ void AsmPrinter::emitDwarfSymbolReference(const MCSymbol *Label, if (!ForceOffset) { // On COFF targets, we have to emit the special .secrel32 directive. if (MAI->needsDwarfSectionOffsetDirective()) { - OutStreamer->EmitCOFFSecRel32(Label); + OutStreamer->EmitCOFFSecRel32(Label, /*Offset=*/0); return; } @@ -175,36 +174,6 @@ void AsmPrinter::emitDwarfStringOffset(DwarfStringPoolEntryRef S) const { EmitInt32(S.getOffset()); } -/// EmitDwarfRegOp - Emit dwarf register operation. -void AsmPrinter::EmitDwarfRegOp(ByteStreamer &Streamer, - const MachineLocation &MLoc) const { - DebugLocDwarfExpression Expr(getDwarfDebug()->getDwarfVersion(), Streamer); - const MCRegisterInfo *MRI = MMI->getContext().getRegisterInfo(); - int Reg = MRI->getDwarfRegNum(MLoc.getReg(), false); - if (Reg < 0) { - // We assume that pointers are always in an addressable register. - if (MLoc.isIndirect()) - // FIXME: We have no reasonable way of handling errors in here. The - // caller might be in the middle of a dwarf expression. We should - // probably assert that Reg >= 0 once debug info generation is more - // mature. - return Expr.EmitOp(dwarf::DW_OP_nop, - "nop (could not find a dwarf register number)"); - - // Attempt to find a valid super- or sub-register. - if (!Expr.AddMachineRegPiece(*MF->getSubtarget().getRegisterInfo(), - MLoc.getReg())) - Expr.EmitOp(dwarf::DW_OP_nop, - "nop (could not find a dwarf register number)"); - return; - } - - if (MLoc.isIndirect()) - Expr.AddRegIndirect(Reg, MLoc.getOffset()); - else - Expr.AddReg(Reg); -} - //===----------------------------------------------------------------------===// // Dwarf Lowering Routines //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index 2ce6c18..57864e4 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -100,6 +100,8 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI, } SourceMgr SrcMgr; + SrcMgr.setIncludeDirs(MCOptions.IASSearchPaths); + SrcMgrDiagInfo DiagInfo; // If the current LLVMContext has an inline asm handler, set it in SourceMgr. @@ -193,6 +195,23 @@ static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI, } if (Done) break; + // If we have ${:foo}, then this is not a real operand reference, it is a + // "magic" string reference, just like in .td files. Arrange to call + // PrintSpecial. + if (LastEmitted[0] == '{' && LastEmitted[1] == ':') { + LastEmitted += 2; + const char *StrStart = LastEmitted; + const char *StrEnd = strchr(StrStart, '}'); + if (!StrEnd) + report_fatal_error("Unterminated ${:foo} operand in inline asm" + " string: '" + Twine(AsmStr) + "'"); + + std::string Val(StrStart, StrEnd); + AP->PrintSpecial(MI, OS, Val.c_str()); + LastEmitted = StrEnd+1; + break; + } + const char *IDStart = LastEmitted; const char *IDEnd = IDStart; while (*IDEnd >= '0' && *IDEnd <= '9') ++IDEnd; diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp index ebf80de..8344051 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp @@ -13,17 +13,20 @@ #include "CodeViewDebug.h" #include "llvm/ADT/TinyPtrVector.h" -#include "llvm/DebugInfo/CodeView/ByteStream.h" +#include "llvm/DebugInfo/CodeView/CVTypeDumper.h" #include "llvm/DebugInfo/CodeView/CVTypeVisitor.h" #include "llvm/DebugInfo/CodeView/CodeView.h" -#include "llvm/DebugInfo/CodeView/FieldListRecordBuilder.h" #include "llvm/DebugInfo/CodeView/Line.h" #include "llvm/DebugInfo/CodeView/SymbolRecord.h" -#include "llvm/DebugInfo/CodeView/TypeDumper.h" +#include "llvm/DebugInfo/CodeView/TypeDatabase.h" +#include "llvm/DebugInfo/CodeView/TypeDumpVisitor.h" #include "llvm/DebugInfo/CodeView/TypeIndex.h" #include "llvm/DebugInfo/CodeView/TypeRecord.h" #include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h" +#include "llvm/DebugInfo/MSF/ByteStream.h" +#include "llvm/DebugInfo/MSF/StreamReader.h" #include "llvm/IR/Constants.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSectionCOFF.h" #include "llvm/MC/MCSymbol.h" @@ -35,9 +38,11 @@ using namespace llvm; using namespace llvm::codeview; +using namespace llvm::msf; CodeViewDebug::CodeViewDebug(AsmPrinter *AP) - : DebugHandlerBase(AP), OS(*Asm->OutStreamer), CurFn(nullptr) { + : DebugHandlerBase(AP), OS(*Asm->OutStreamer), Allocator(), + TypeTable(Allocator), CurFn(nullptr) { // If module doesn't have named metadata anchors or COFF debug section // is not available, skip any debug info related stuff. if (!MMI->getModule()->getNamedMetadata("llvm.dbg.cu") || @@ -108,8 +113,9 @@ unsigned CodeViewDebug::maybeRecordFile(const DIFile *F) { if (Insertion.second) { // We have to compute the full filepath and emit a .cv_file directive. StringRef FullPath = getFullFilepath(F); - NextId = OS.EmitCVFileDirective(NextId, FullPath); - assert(NextId == FileIdMap.size() && ".cv_file directive failed"); + bool Success = OS.EmitCVFileDirective(NextId, FullPath); + (void)Success; + assert(Success && ".cv_file directive failed"); } return Insertion.first->second; } @@ -120,7 +126,16 @@ CodeViewDebug::getInlineSite(const DILocation *InlinedAt, auto SiteInsertion = CurFn->InlineSites.insert({InlinedAt, InlineSite()}); InlineSite *Site = &SiteInsertion.first->second; if (SiteInsertion.second) { + unsigned ParentFuncId = CurFn->FuncId; + if (const DILocation *OuterIA = InlinedAt->getInlinedAt()) + ParentFuncId = + getInlineSite(OuterIA, InlinedAt->getScope()->getSubprogram()) + .SiteFuncId; + Site->SiteFuncId = NextFuncId++; + OS.EmitCVInlineSiteIdDirective( + Site->SiteFuncId, ParentFuncId, maybeRecordFile(InlinedAt->getFile()), + InlinedAt->getLine(), InlinedAt->getColumn(), SMLoc()); Site->Inlinee = Inlinee; InlinedSubprograms.insert(Inlinee); getFuncIdForSubprogram(Inlinee); @@ -208,8 +223,8 @@ TypeIndex CodeViewDebug::getScopeIndex(const DIScope *Scope) { // Build the fully qualified name of the scope. std::string ScopeName = getFullyQualifiedName(Scope); - TypeIndex TI = - TypeTable.writeStringId(StringIdRecord(TypeIndex(), ScopeName)); + StringIdRecord SID(TypeIndex(), ScopeName); + auto TI = TypeTable.writeKnownType(SID); return recordTypeIndexForDINode(Scope, TI); } @@ -234,12 +249,12 @@ TypeIndex CodeViewDebug::getFuncIdForSubprogram(const DISubprogram *SP) { TypeIndex ClassType = getTypeIndex(Class); MemberFuncIdRecord MFuncId(ClassType, getMemberFunctionType(SP, Class), DisplayName); - TI = TypeTable.writeMemberFuncId(MFuncId); + TI = TypeTable.writeKnownType(MFuncId); } else { // Otherwise, this must be a free function. TypeIndex ParentScope = getScopeIndex(Scope); FuncIdRecord FuncId(ParentScope, getTypeIndex(SP->getType()), DisplayName); - TI = TypeTable.writeFuncId(FuncId); + TI = TypeTable.writeKnownType(FuncId); } return recordTypeIndexForDINode(SP, TI); @@ -353,8 +368,8 @@ void CodeViewDebug::maybeRecordLocation(const DebugLoc &DL, } OS.EmitCVLocDirective(FuncId, FileId, DL.getLine(), DL.getCol(), - /*PrologueEnd=*/false, - /*IsStmt=*/false, DL->getFilename()); + /*PrologueEnd=*/false, /*IsStmt=*/false, + DL->getFilename(), SMLoc()); } void CodeViewDebug::emitCodeViewMagicVersion() { @@ -377,6 +392,11 @@ void CodeViewDebug::endModule() { // Use the generic .debug$S section, and make a subsection for all the inlined // subprograms. switchToDebugSectionForSymbol(nullptr); + + MCSymbol *CompilerInfo = beginCVSubsection(ModuleSubstreamKind::Symbols); + emitCompilerInformation(); + endCVSubsection(CompilerInfo); + emitInlineeLinesSubsection(); // Emit per-function debug information. @@ -418,10 +438,13 @@ void CodeViewDebug::endModule() { } static void emitNullTerminatedSymbolName(MCStreamer &OS, StringRef S) { - // Microsoft's linker seems to have trouble with symbol names longer than - // 0xffd8 bytes. - S = S.substr(0, 0xffd8); - SmallString<32> NullTerminatedString(S); + // The maximum CV record length is 0xFF00. Most of the strings we emit appear + // after a fixed length portion of the record. The fixed length portion should + // always be less than 0xF00 (3840) bytes, so truncate the string so that the + // overall record size is less than the maximum allowed. + unsigned MaxFixedRecordLength = 0xF00; + SmallString<32> NullTerminatedString( + S.take_front(MaxRecordLength - MaxFixedRecordLength - 1)); NullTerminatedString.push_back('\0'); OS.EmitBytes(NullTerminatedString); } @@ -446,48 +469,175 @@ void CodeViewDebug::emitTypeInformation() { CommentPrefix += ' '; } - CVTypeDumper CVTD(nullptr, /*PrintRecordBytes=*/false); - TypeTable.ForEachRecord( - [&](TypeIndex Index, StringRef Record) { - if (OS.isVerboseAsm()) { - // Emit a block comment describing the type record for readability. - SmallString<512> CommentBlock; - raw_svector_ostream CommentOS(CommentBlock); - ScopedPrinter SP(CommentOS); - SP.setPrefix(CommentPrefix); - CVTD.setPrinter(&SP); - Error E = CVTD.dump({Record.bytes_begin(), Record.bytes_end()}); - if (E) { - logAllUnhandledErrors(std::move(E), errs(), "error: "); - llvm_unreachable("produced malformed type record"); - } - // emitRawComment will insert its own tab and comment string before - // the first line, so strip off our first one. It also prints its own - // newline. - OS.emitRawComment( - CommentOS.str().drop_front(CommentPrefix.size() - 1).rtrim()); - } else { + TypeDatabase TypeDB; + CVTypeDumper CVTD(TypeDB); + TypeTable.ForEachRecord([&](TypeIndex Index, ArrayRef<uint8_t> Record) { + if (OS.isVerboseAsm()) { + // Emit a block comment describing the type record for readability. + SmallString<512> CommentBlock; + raw_svector_ostream CommentOS(CommentBlock); + ScopedPrinter SP(CommentOS); + SP.setPrefix(CommentPrefix); + TypeDumpVisitor TDV(TypeDB, &SP, false); + Error E = CVTD.dump(Record, TDV); + if (E) { + logAllUnhandledErrors(std::move(E), errs(), "error: "); + llvm_unreachable("produced malformed type record"); + } + // emitRawComment will insert its own tab and comment string before + // the first line, so strip off our first one. It also prints its own + // newline. + OS.emitRawComment( + CommentOS.str().drop_front(CommentPrefix.size() - 1).rtrim()); + } else { #ifndef NDEBUG - // Assert that the type data is valid even if we aren't dumping - // comments. The MSVC linker doesn't do much type record validation, - // so the first link of an invalid type record can succeed while - // subsequent links will fail with LNK1285. - ByteStream<> Stream({Record.bytes_begin(), Record.bytes_end()}); - CVTypeArray Types; - StreamReader Reader(Stream); - Error E = Reader.readArray(Types, Reader.getLength()); - if (!E) { - TypeVisitorCallbacks C; - E = CVTypeVisitor(C).visitTypeStream(Types); - } - if (E) { - logAllUnhandledErrors(std::move(E), errs(), "error: "); - llvm_unreachable("produced malformed type record"); - } + // Assert that the type data is valid even if we aren't dumping + // comments. The MSVC linker doesn't do much type record validation, + // so the first link of an invalid type record can succeed while + // subsequent links will fail with LNK1285. + ByteStream Stream(Record); + CVTypeArray Types; + StreamReader Reader(Stream); + Error E = Reader.readArray(Types, Reader.getLength()); + if (!E) { + TypeVisitorCallbacks C; + E = CVTypeVisitor(C).visitTypeStream(Types); + } + if (E) { + logAllUnhandledErrors(std::move(E), errs(), "error: "); + llvm_unreachable("produced malformed type record"); + } #endif - } - OS.EmitBinaryData(Record); - }); + } + StringRef S(reinterpret_cast<const char *>(Record.data()), Record.size()); + OS.EmitBinaryData(S); + }); +} + +namespace { + +static SourceLanguage MapDWLangToCVLang(unsigned DWLang) { + switch (DWLang) { + case dwarf::DW_LANG_C: + case dwarf::DW_LANG_C89: + case dwarf::DW_LANG_C99: + case dwarf::DW_LANG_C11: + case dwarf::DW_LANG_ObjC: + return SourceLanguage::C; + case dwarf::DW_LANG_C_plus_plus: + case dwarf::DW_LANG_C_plus_plus_03: + case dwarf::DW_LANG_C_plus_plus_11: + case dwarf::DW_LANG_C_plus_plus_14: + return SourceLanguage::Cpp; + case dwarf::DW_LANG_Fortran77: + case dwarf::DW_LANG_Fortran90: + case dwarf::DW_LANG_Fortran03: + case dwarf::DW_LANG_Fortran08: + return SourceLanguage::Fortran; + case dwarf::DW_LANG_Pascal83: + return SourceLanguage::Pascal; + case dwarf::DW_LANG_Cobol74: + case dwarf::DW_LANG_Cobol85: + return SourceLanguage::Cobol; + case dwarf::DW_LANG_Java: + return SourceLanguage::Java; + default: + // There's no CodeView representation for this language, and CV doesn't + // have an "unknown" option for the language field, so we'll use MASM, + // as it's very low level. + return SourceLanguage::Masm; + } +} + +struct Version { + int Part[4]; +}; + +// Takes a StringRef like "clang 4.0.0.0 (other nonsense 123)" and parses out +// the version number. +static Version parseVersion(StringRef Name) { + Version V = {{0}}; + int N = 0; + for (const char C : Name) { + if (isdigit(C)) { + V.Part[N] *= 10; + V.Part[N] += C - '0'; + } else if (C == '.') { + ++N; + if (N >= 4) + return V; + } else if (N > 0) + return V; + } + return V; +} + +static CPUType mapArchToCVCPUType(Triple::ArchType Type) { + switch (Type) { + case Triple::ArchType::x86: + return CPUType::Pentium3; + case Triple::ArchType::x86_64: + return CPUType::X64; + case Triple::ArchType::thumb: + return CPUType::Thumb; + default: + report_fatal_error("target architecture doesn't map to a CodeView " + "CPUType"); + } +} + +} // anonymous namespace + +void CodeViewDebug::emitCompilerInformation() { + MCContext &Context = MMI->getContext(); + MCSymbol *CompilerBegin = Context.createTempSymbol(), + *CompilerEnd = Context.createTempSymbol(); + OS.AddComment("Record length"); + OS.emitAbsoluteSymbolDiff(CompilerEnd, CompilerBegin, 2); + OS.EmitLabel(CompilerBegin); + OS.AddComment("Record kind: S_COMPILE3"); + OS.EmitIntValue(SymbolKind::S_COMPILE3, 2); + uint32_t Flags = 0; + + NamedMDNode *CUs = MMI->getModule()->getNamedMetadata("llvm.dbg.cu"); + const MDNode *Node = *CUs->operands().begin(); + const auto *CU = cast<DICompileUnit>(Node); + + // The low byte of the flags indicates the source language. + Flags = MapDWLangToCVLang(CU->getSourceLanguage()); + // TODO: Figure out which other flags need to be set. + + OS.AddComment("Flags and language"); + OS.EmitIntValue(Flags, 4); + + OS.AddComment("CPUType"); + CPUType CPU = + mapArchToCVCPUType(Triple(MMI->getModule()->getTargetTriple()).getArch()); + OS.EmitIntValue(static_cast<uint64_t>(CPU), 2); + + StringRef CompilerVersion = CU->getProducer(); + Version FrontVer = parseVersion(CompilerVersion); + OS.AddComment("Frontend version"); + for (int N = 0; N < 4; ++N) + OS.EmitIntValue(FrontVer.Part[N], 2); + + // Some Microsoft tools, like Binscope, expect a backend version number of at + // least 8.something, so we'll coerce the LLVM version into a form that + // guarantees it'll be big enough without really lying about the version. + int Major = 1000 * LLVM_VERSION_MAJOR + + 10 * LLVM_VERSION_MINOR + + LLVM_VERSION_PATCH; + // Clamp it for builds that use unusually large version numbers. + Major = std::min<int>(Major, std::numeric_limits<uint16_t>::max()); + Version BackVer = {{ Major, 0, 0, 0 }}; + OS.AddComment("Backend version"); + for (int N = 0; N < 4; ++N) + OS.EmitIntValue(BackVer.Part[N], 2); + + OS.AddComment("Null-terminated compiler version string"); + emitNullTerminatedSymbolName(OS, CompilerVersion); + + OS.EmitLabel(CompilerEnd); } void CodeViewDebug::emitInlineeLinesSubsection() { @@ -525,17 +675,6 @@ void CodeViewDebug::emitInlineeLinesSubsection() { endCVSubsection(InlineEnd); } -void CodeViewDebug::collectInlineSiteChildren( - SmallVectorImpl<unsigned> &Children, const FunctionInfo &FI, - const InlineSite &Site) { - for (const DILocation *ChildSiteLoc : Site.ChildSites) { - auto I = FI.InlineSites.find(ChildSiteLoc); - const InlineSite &ChildSite = I->second; - Children.push_back(ChildSite.SiteFuncId); - collectInlineSiteChildren(Children, FI, ChildSite); - } -} - void CodeViewDebug::emitInlinedCallSite(const FunctionInfo &FI, const DILocation *InlinedAt, const InlineSite &Site) { @@ -561,11 +700,9 @@ void CodeViewDebug::emitInlinedCallSite(const FunctionInfo &FI, unsigned FileId = maybeRecordFile(Site.Inlinee->getFile()); unsigned StartLineNum = Site.Inlinee->getLine(); - SmallVector<unsigned, 3> SecondaryFuncIds; - collectInlineSiteChildren(SecondaryFuncIds, FI, Site); OS.EmitCVInlineLinetableDirective(Site.SiteFuncId, FileId, StartLineNum, - FI.Begin, FI.End, SecondaryFuncIds); + FI.Begin, FI.End); OS.EmitLabel(InlineEnd); @@ -641,13 +778,13 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV, OS.emitAbsoluteSymbolDiff(ProcRecordEnd, ProcRecordBegin, 2); OS.EmitLabel(ProcRecordBegin); - if (GV->hasLocalLinkage()) { - OS.AddComment("Record kind: S_LPROC32_ID"); - OS.EmitIntValue(unsigned(SymbolKind::S_LPROC32_ID), 2); - } else { - OS.AddComment("Record kind: S_GPROC32_ID"); - OS.EmitIntValue(unsigned(SymbolKind::S_GPROC32_ID), 2); - } + if (GV->hasLocalLinkage()) { + OS.AddComment("Record kind: S_LPROC32_ID"); + OS.EmitIntValue(unsigned(SymbolKind::S_LPROC32_ID), 2); + } else { + OS.AddComment("Record kind: S_GPROC32_ID"); + OS.EmitIntValue(unsigned(SymbolKind::S_GPROC32_ID), 2); + } // These fields are filled in by tools like CVPACK which run after the fact. OS.AddComment("PtrParent"); @@ -667,7 +804,7 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV, OS.AddComment("Function type index"); OS.EmitIntValue(getFuncIdForSubprogram(GV->getSubprogram()).getIndex(), 4); OS.AddComment("Function section relative address"); - OS.EmitCOFFSecRel32(Fn); + OS.EmitCOFFSecRel32(Fn, /*Offset=*/0); OS.AddComment("Function section index"); OS.EmitCOFFSectionIndex(Fn); OS.AddComment("Flags"); @@ -711,29 +848,33 @@ CodeViewDebug::createDefRangeMem(uint16_t CVRegister, int Offset) { DR.InMemory = -1; DR.DataOffset = Offset; assert(DR.DataOffset == Offset && "truncation"); + DR.IsSubfield = 0; DR.StructOffset = 0; DR.CVRegister = CVRegister; return DR; } CodeViewDebug::LocalVarDefRange -CodeViewDebug::createDefRangeReg(uint16_t CVRegister) { +CodeViewDebug::createDefRangeGeneral(uint16_t CVRegister, bool InMemory, + int Offset, bool IsSubfield, + uint16_t StructOffset) { LocalVarDefRange DR; - DR.InMemory = 0; - DR.DataOffset = 0; - DR.StructOffset = 0; + DR.InMemory = InMemory; + DR.DataOffset = Offset; + DR.IsSubfield = IsSubfield; + DR.StructOffset = StructOffset; DR.CVRegister = CVRegister; return DR; } -void CodeViewDebug::collectVariableInfoFromMMITable( +void CodeViewDebug::collectVariableInfoFromMFTable( DenseSet<InlinedVariable> &Processed) { - const TargetSubtargetInfo &TSI = Asm->MF->getSubtarget(); + const MachineFunction &MF = *Asm->MF; + const TargetSubtargetInfo &TSI = MF.getSubtarget(); const TargetFrameLowering *TFI = TSI.getFrameLowering(); const TargetRegisterInfo *TRI = TSI.getRegisterInfo(); - for (const MachineModuleInfo::VariableDbgInfo &VI : - MMI->getVariableDbgInfo()) { + for (const MachineFunction::VariableDbgInfo &VI : MF.getVariableDbgInfo()) { if (!VI.Var) continue; assert(VI.Var->isValidLocationForIntrinsic(VI.Loc) && @@ -770,7 +911,7 @@ void CodeViewDebug::collectVariableInfoFromMMITable( void CodeViewDebug::collectVariableInfo(const DISubprogram *SP) { DenseSet<InlinedVariable> Processed; // Grab the variable info that was squirreled away in the MMI side-table. - collectVariableInfoFromMMITable(Processed); + collectVariableInfoFromMFTable(Processed); const TargetRegisterInfo *TRI = Asm->MF->getSubtarget().getRegisterInfo(); @@ -802,10 +943,17 @@ void CodeViewDebug::collectVariableInfo(const DISubprogram *SP) { const MachineInstr *DVInst = Range.first; assert(DVInst->isDebugValue() && "Invalid History entry"); const DIExpression *DIExpr = DVInst->getDebugExpression(); - - // Bail if there is a complex DWARF expression for now. - if (DIExpr && DIExpr->getNumElements() > 0) - continue; + bool IsSubfield = false; + unsigned StructOffset = 0; + + // Handle fragments. + auto Fragment = DIExpr->getFragmentInfo(); + if (DIExpr && Fragment) { + IsSubfield = true; + StructOffset = Fragment->OffsetInBits / 8; + } else if (DIExpr && DIExpr->getNumElements() > 0) { + continue; // Ignore unrecognized exprs. + } // Bail if operand 0 is not a valid register. This means the variable is a // simple constant, or is described by a complex expression. @@ -817,19 +965,20 @@ void CodeViewDebug::collectVariableInfo(const DISubprogram *SP) { continue; // Handle the two cases we can handle: indirect in memory and in register. - bool IsIndirect = DVInst->getOperand(1).isImm(); - unsigned CVReg = TRI->getCodeViewRegNum(DVInst->getOperand(0).getReg()); + unsigned CVReg = TRI->getCodeViewRegNum(Reg); + bool InMemory = DVInst->getOperand(1).isImm(); + int Offset = InMemory ? DVInst->getOperand(1).getImm() : 0; { - LocalVarDefRange DefRange; - if (IsIndirect) { - int64_t Offset = DVInst->getOperand(1).getImm(); - DefRange = createDefRangeMem(CVReg, Offset); - } else { - DefRange = createDefRangeReg(CVReg); - } + LocalVarDefRange DR; + DR.CVRegister = CVReg; + DR.InMemory = InMemory; + DR.DataOffset = Offset; + DR.IsSubfield = IsSubfield; + DR.StructOffset = StructOffset; + if (Var.DefRanges.empty() || - Var.DefRanges.back().isDifferentLocation(DefRange)) { - Var.DefRanges.emplace_back(std::move(DefRange)); + Var.DefRanges.back().isDifferentLocation(DR)) { + Var.DefRanges.emplace_back(std::move(DR)); } } @@ -837,8 +986,14 @@ void CodeViewDebug::collectVariableInfo(const DISubprogram *SP) { const MCSymbol *Begin = getLabelBeforeInsn(Range.first); const MCSymbol *End = getLabelAfterInsn(Range.second); if (!End) { - if (std::next(I) != E) - End = getLabelBeforeInsn(std::next(I)->first); + // This range is valid until the next overlapping bitpiece. In the + // common case, ranges will not be bitpieces, so they will overlap. + auto J = std::next(I); + while (J != E && + !fragmentsOverlap(DIExpr, J->first->getDebugExpression())) + ++J; + if (J != E) + End = getLabelBeforeInsn(J->first); else End = Asm->getFunctionEnd(); } @@ -873,6 +1028,8 @@ void CodeViewDebug::beginFunction(const MachineFunction *MF) { CurFn->FuncId = NextFuncId++; CurFn->Begin = Asm->getFunctionBegin(); + OS.EmitCVFuncIdDirective(CurFn->FuncId); + // Find the end of the function prolog. First known non-DBG_VALUE and // non-frame setup location marks the beginning of the function body. // FIXME: is there a simpler a way to do this? Can we just search @@ -933,6 +1090,9 @@ TypeIndex CodeViewDebug::lowerType(const DIType *Ty, const DIType *ClassTy) { case dwarf::DW_TAG_base_type: return lowerTypeBasic(cast<DIBasicType>(Ty)); case dwarf::DW_TAG_pointer_type: + if (cast<DIDerivedType>(Ty)->getName() == "__vtbl_ptr_type") + return lowerTypeVFTableShape(cast<DIDerivedType>(Ty)); + LLVM_FALLTHROUGH; case dwarf::DW_TAG_reference_type: case dwarf::DW_TAG_rvalue_reference_type: return lowerTypePointer(cast<DIDerivedType>(Ty)); @@ -940,6 +1100,7 @@ TypeIndex CodeViewDebug::lowerType(const DIType *Ty, const DIType *ClassTy) { return lowerTypeMemberPointer(cast<DIDerivedType>(Ty)); case dwarf::DW_TAG_const_type: case dwarf::DW_TAG_volatile_type: + // TODO: add support for DW_TAG_atomic_type here return lowerTypeModifier(cast<DIDerivedType>(Ty)); case dwarf::DW_TAG_subroutine_type: if (ClassTy) { @@ -989,20 +1150,25 @@ TypeIndex CodeViewDebug::lowerTypeArray(const DICompositeType *Ty) { uint64_t ElementSize = getBaseTypeSize(ElementTypeRef) / 8; - bool UndefinedSubrange = false; - // FIXME: - // There is a bug in the front-end where an array of a structure, which was - // declared as incomplete structure first, ends up not getting a size assigned - // to it. (PR28303) + // We want to assert that the element type multiplied by the array lengths + // match the size of the overall array. However, if we don't have complete + // type information for the base type, we can't make this assertion. This + // happens if limited debug info is enabled in this case: + // struct VTableOptzn { VTableOptzn(); virtual ~VTableOptzn(); }; + // VTableOptzn array[3]; + // The DICompositeType of VTableOptzn will have size zero, and the array will + // have size 3 * sizeof(void*), and we should avoid asserting. + // + // There is a related bug in the front-end where an array of a structure, + // which was declared as incomplete structure first, ends up not getting a + // size assigned to it. (PR28303) // Example: // struct A(*p)[3]; // struct A { int f; } a[3]; - // - // This needs to be fixed in the front-end, but in the meantime we don't want - // to trigger an assertion because of this. - if (Ty->getSizeInBits() == 0) { - UndefinedSubrange = true; + bool PartiallyIncomplete = false; + if (Ty->getSizeInBits() == 0 || ElementSize == 0) { + PartiallyIncomplete = true; } // Add subranges to array type. @@ -1021,18 +1187,24 @@ TypeIndex CodeViewDebug::lowerTypeArray(const DICompositeType *Ty) { // FIXME: Make front-end support VLA subrange and emit LF_DIMVARLU. if (Count == -1) { Count = 1; - UndefinedSubrange = true; + PartiallyIncomplete = true; } - StringRef Name = (i == 0) ? Ty->getName() : ""; // Update the element size and element type index for subsequent subranges. ElementSize *= Count; - ElementTypeIndex = TypeTable.writeArray( - ArrayRecord(ElementTypeIndex, IndexType, ElementSize, Name)); + + // If this is the outermost array, use the size from the array. It will be + // more accurate if PartiallyIncomplete is true. + uint64_t ArraySize = + (i == 0 && ElementSize == 0) ? Ty->getSizeInBits() / 8 : ElementSize; + + StringRef Name = (i == 0) ? Ty->getName() : ""; + ArrayRecord AR(ElementTypeIndex, IndexType, ArraySize, Name); + ElementTypeIndex = TypeTable.writeKnownType(AR); } - (void)UndefinedSubrange; - assert(UndefinedSubrange || ElementSize == (Ty->getSizeInBits() / 8)); + (void)PartiallyIncomplete; + assert(PartiallyIncomplete || ElementSize == (Ty->getSizeInBits() / 8)); return ElementTypeIndex; } @@ -1080,20 +1252,20 @@ TypeIndex CodeViewDebug::lowerTypeBasic(const DIBasicType *Ty) { break; case dwarf::DW_ATE_signed: switch (ByteSize) { - case 1: STK = SimpleTypeKind::SByte; break; - case 2: STK = SimpleTypeKind::Int16Short; break; - case 4: STK = SimpleTypeKind::Int32; break; - case 8: STK = SimpleTypeKind::Int64Quad; break; - case 16: STK = SimpleTypeKind::Int128Oct; break; + case 1: STK = SimpleTypeKind::SignedCharacter; break; + case 2: STK = SimpleTypeKind::Int16Short; break; + case 4: STK = SimpleTypeKind::Int32; break; + case 8: STK = SimpleTypeKind::Int64Quad; break; + case 16: STK = SimpleTypeKind::Int128Oct; break; } break; case dwarf::DW_ATE_unsigned: switch (ByteSize) { - case 1: STK = SimpleTypeKind::Byte; break; - case 2: STK = SimpleTypeKind::UInt16Short; break; - case 4: STK = SimpleTypeKind::UInt32; break; - case 8: STK = SimpleTypeKind::UInt64Quad; break; - case 16: STK = SimpleTypeKind::UInt128Oct; break; + case 1: STK = SimpleTypeKind::UnsignedCharacter; break; + case 2: STK = SimpleTypeKind::UInt16Short; break; + case 4: STK = SimpleTypeKind::UInt32; break; + case 8: STK = SimpleTypeKind::UInt64Quad; break; + case 16: STK = SimpleTypeKind::UInt128Oct; break; } break; case dwarf::DW_ATE_UTF: @@ -1133,13 +1305,6 @@ TypeIndex CodeViewDebug::lowerTypeBasic(const DIBasicType *Ty) { TypeIndex CodeViewDebug::lowerTypePointer(const DIDerivedType *Ty) { TypeIndex PointeeTI = getTypeIndex(Ty->getBaseType()); - // While processing the type being pointed to it is possible we already - // created this pointer type. If so, we check here and return the existing - // pointer type. - auto I = TypeIndices.find({Ty, nullptr}); - if (I != TypeIndices.end()) - return I->second; - // Pointers to simple types can use SimpleTypeMode, rather than having a // dedicated pointer type record. if (PointeeTI.isSimple() && @@ -1171,7 +1336,7 @@ TypeIndex CodeViewDebug::lowerTypePointer(const DIDerivedType *Ty) { // do. PointerOptions PO = PointerOptions::None; PointerRecord PR(PointeeTI, PK, PM, PO, Ty->getSizeInBits() / 8); - return TypeTable.writePointer(PR); + return TypeTable.writeKnownType(PR); } static PointerToMemberRepresentation @@ -1222,7 +1387,7 @@ TypeIndex CodeViewDebug::lowerTypeMemberPointer(const DIDerivedType *Ty) { MemberPointerInfo MPI( ClassTI, translatePtrToMemberRep(SizeInBytes, IsPMF, Ty->getFlags())); PointerRecord PR(PointeeTI, PK, PM, PO, SizeInBytes, MPI); - return TypeTable.writePointer(PR); + return TypeTable.writeKnownType(PR); } /// Given a DWARF calling convention, get the CodeView equivalent. If we don't @@ -1244,7 +1409,7 @@ TypeIndex CodeViewDebug::lowerTypeModifier(const DIDerivedType *Ty) { bool IsModifier = true; const DIType *BaseTy = Ty; while (IsModifier && BaseTy) { - // FIXME: Need to add DWARF tag for __unaligned. + // FIXME: Need to add DWARF tags for __unaligned and _Atomic switch (BaseTy->getTag()) { case dwarf::DW_TAG_const_type: Mods |= ModifierOptions::Const; @@ -1260,16 +1425,8 @@ TypeIndex CodeViewDebug::lowerTypeModifier(const DIDerivedType *Ty) { BaseTy = cast<DIDerivedType>(BaseTy)->getBaseType().resolve(); } TypeIndex ModifiedTI = getTypeIndex(BaseTy); - - // While processing the type being pointed to, it is possible we already - // created this modifier type. If so, we check here and return the existing - // modifier type. - auto I = TypeIndices.find({Ty, nullptr}); - if (I != TypeIndices.end()) - return I->second; - ModifierRecord MR(ModifiedTI, Mods); - return TypeTable.writeModifier(MR); + return TypeTable.writeKnownType(MR); } TypeIndex CodeViewDebug::lowerTypeFunction(const DISubroutineType *Ty) { @@ -1286,13 +1443,13 @@ TypeIndex CodeViewDebug::lowerTypeFunction(const DISubroutineType *Ty) { } ArgListRecord ArgListRec(TypeRecordKind::ArgList, ArgTypeIndices); - TypeIndex ArgListIndex = TypeTable.writeArgList(ArgListRec); + TypeIndex ArgListIndex = TypeTable.writeKnownType(ArgListRec); CallingConvention CC = dwarfCCToCodeView(Ty->getCC()); ProcedureRecord Procedure(ReturnTypeIndex, CC, FunctionOptions::None, ArgTypeIndices.size(), ArgListIndex); - return TypeTable.writeProcedure(Procedure); + return TypeTable.writeKnownType(Procedure); } TypeIndex CodeViewDebug::lowerTypeMemberFunction(const DISubroutineType *Ty, @@ -1319,20 +1476,29 @@ TypeIndex CodeViewDebug::lowerTypeMemberFunction(const DISubroutineType *Ty, } ArgListRecord ArgListRec(TypeRecordKind::ArgList, ArgTypeIndices); - TypeIndex ArgListIndex = TypeTable.writeArgList(ArgListRec); + TypeIndex ArgListIndex = TypeTable.writeKnownType(ArgListRec); CallingConvention CC = dwarfCCToCodeView(Ty->getCC()); // TODO: Need to use the correct values for: // FunctionOptions // ThisPointerAdjustment. - TypeIndex TI = TypeTable.writeMemberFunction(MemberFunctionRecord( - ReturnTypeIndex, ClassType, ThisTypeIndex, CC, FunctionOptions::None, - ArgTypeIndices.size(), ArgListIndex, ThisAdjustment)); + MemberFunctionRecord MFR(ReturnTypeIndex, ClassType, ThisTypeIndex, CC, + FunctionOptions::None, ArgTypeIndices.size(), + ArgListIndex, ThisAdjustment); + TypeIndex TI = TypeTable.writeKnownType(MFR); return TI; } +TypeIndex CodeViewDebug::lowerTypeVFTableShape(const DIDerivedType *Ty) { + unsigned VSlotCount = Ty->getSizeInBits() / (8 * Asm->MAI->getPointerSize()); + SmallVector<VFTableSlotKind, 4> Slots(VSlotCount, VFTableSlotKind::Near); + + VFTableShapeRecord VFTSR(Slots); + return TypeTable.writeKnownType(VFTSR); +} + static MemberAccess translateAccessFlags(unsigned RecordTag, unsigned Flags) { switch (Flags & DINode::FlagAccessibility) { case DINode::FlagPrivate: return MemberAccess::Private; @@ -1420,25 +1586,28 @@ TypeIndex CodeViewDebug::lowerTypeEnum(const DICompositeType *Ty) { if (Ty->isForwardDecl()) { CO |= ClassOptions::ForwardReference; } else { - FieldListRecordBuilder Fields; + FieldListRecordBuilder FLRB(TypeTable); + + FLRB.begin(); for (const DINode *Element : Ty->getElements()) { // We assume that the frontend provides all members in source declaration // order, which is what MSVC does. if (auto *Enumerator = dyn_cast_or_null<DIEnumerator>(Element)) { - Fields.writeEnumerator(EnumeratorRecord( - MemberAccess::Public, APSInt::getUnsigned(Enumerator->getValue()), - Enumerator->getName())); + EnumeratorRecord ER(MemberAccess::Public, + APSInt::getUnsigned(Enumerator->getValue()), + Enumerator->getName()); + FLRB.writeMemberType(ER); EnumeratorCount++; } } - FTI = TypeTable.writeFieldList(Fields); + FTI = FLRB.end(); } std::string FullName = getFullyQualifiedName(Ty); - return TypeTable.writeEnum(EnumRecord(EnumeratorCount, CO, FTI, FullName, - Ty->getIdentifier(), - getTypeIndex(Ty->getBaseType()))); + EnumRecord ER(EnumeratorCount, CO, FTI, FullName, Ty->getIdentifier(), + getTypeIndex(Ty->getBaseType())); + return TypeTable.writeKnownType(ER); } //===----------------------------------------------------------------------===// @@ -1465,6 +1634,8 @@ struct llvm::ClassInfo { // Direct overloaded methods gathered by name. MethodsMap Methods; + TypeIndex VShapeTI; + std::vector<const DICompositeType *> NestedClasses; }; @@ -1513,11 +1684,13 @@ ClassInfo CodeViewDebug::collectClassInfo(const DICompositeType *Ty) { collectMemberInfo(Info, DDTy); } else if (DDTy->getTag() == dwarf::DW_TAG_inheritance) { Info.Inheritance.push_back(DDTy); + } else if (DDTy->getTag() == dwarf::DW_TAG_pointer_type && + DDTy->getName() == "__vtbl_ptr_type") { + Info.VShapeTI = getTypeIndex(DDTy); } else if (DDTy->getTag() == dwarf::DW_TAG_friend) { // Ignore friend members. It appears that MSVC emitted info about // friends in the past, but modern versions do not. } - // FIXME: Get Clang to emit function virtual table here and handle it. } else if (auto *Composite = dyn_cast<DICompositeType>(Element)) { Info.NestedClasses.push_back(Composite); } @@ -1533,9 +1706,9 @@ TypeIndex CodeViewDebug::lowerTypeClass(const DICompositeType *Ty) { ClassOptions CO = ClassOptions::ForwardReference | getCommonClassOptions(Ty); std::string FullName = getFullyQualifiedName(Ty); - TypeIndex FwdDeclTI = TypeTable.writeClass(ClassRecord( - Kind, 0, CO, HfaKind::None, WindowsRTClassKind::None, TypeIndex(), - TypeIndex(), TypeIndex(), 0, FullName, Ty->getIdentifier())); + ClassRecord CR(Kind, 0, CO, TypeIndex(), TypeIndex(), TypeIndex(), 0, + FullName, Ty->getIdentifier()); + TypeIndex FwdDeclTI = TypeTable.writeKnownType(CR); if (!Ty->isForwardDecl()) DeferredCompleteTypes.push_back(Ty); return FwdDeclTI; @@ -1559,14 +1732,14 @@ TypeIndex CodeViewDebug::lowerCompleteTypeClass(const DICompositeType *Ty) { uint64_t SizeInBytes = Ty->getSizeInBits() / 8; - TypeIndex ClassTI = TypeTable.writeClass(ClassRecord( - Kind, FieldCount, CO, HfaKind::None, WindowsRTClassKind::None, FieldTI, - TypeIndex(), VShapeTI, SizeInBytes, FullName, Ty->getIdentifier())); + ClassRecord CR(Kind, FieldCount, CO, FieldTI, TypeIndex(), VShapeTI, + SizeInBytes, FullName, Ty->getIdentifier()); + TypeIndex ClassTI = TypeTable.writeKnownType(CR); - TypeTable.writeUdtSourceLine(UdtSourceLineRecord( - ClassTI, TypeTable.writeStringId(StringIdRecord( - TypeIndex(0x0), getFullFilepath(Ty->getFile()))), - Ty->getLine())); + StringIdRecord SIDR(TypeIndex(0x0), getFullFilepath(Ty->getFile())); + TypeIndex SIDI = TypeTable.writeKnownType(SIDR); + UdtSourceLineRecord USLR(ClassTI, SIDI, Ty->getLine()); + TypeTable.writeKnownType(USLR); addToUDTs(Ty, ClassTI); @@ -1577,9 +1750,8 @@ TypeIndex CodeViewDebug::lowerTypeUnion(const DICompositeType *Ty) { ClassOptions CO = ClassOptions::ForwardReference | getCommonClassOptions(Ty); std::string FullName = getFullyQualifiedName(Ty); - TypeIndex FwdDeclTI = - TypeTable.writeUnion(UnionRecord(0, CO, HfaKind::None, TypeIndex(), 0, - FullName, Ty->getIdentifier())); + UnionRecord UR(0, CO, TypeIndex(), 0, FullName, Ty->getIdentifier()); + TypeIndex FwdDeclTI = TypeTable.writeKnownType(UR); if (!Ty->isForwardDecl()) DeferredCompleteTypes.push_back(Ty); return FwdDeclTI; @@ -1599,14 +1771,14 @@ TypeIndex CodeViewDebug::lowerCompleteTypeUnion(const DICompositeType *Ty) { uint64_t SizeInBytes = Ty->getSizeInBits() / 8; std::string FullName = getFullyQualifiedName(Ty); - TypeIndex UnionTI = TypeTable.writeUnion( - UnionRecord(FieldCount, CO, HfaKind::None, FieldTI, SizeInBytes, FullName, - Ty->getIdentifier())); + UnionRecord UR(FieldCount, CO, FieldTI, SizeInBytes, FullName, + Ty->getIdentifier()); + TypeIndex UnionTI = TypeTable.writeKnownType(UR); - TypeTable.writeUdtSourceLine(UdtSourceLineRecord( - UnionTI, TypeTable.writeStringId(StringIdRecord( - TypeIndex(0x0), getFullFilepath(Ty->getFile()))), - Ty->getLine())); + StringIdRecord SIR(TypeIndex(0x0), getFullFilepath(Ty->getFile())); + TypeIndex SIRI = TypeTable.writeKnownType(SIR); + UdtSourceLineRecord USLR(UnionTI, SIRI, Ty->getLine()); + TypeTable.writeKnownType(USLR); addToUDTs(Ty, UnionTI); @@ -1621,7 +1793,8 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) { // list record. unsigned MemberCount = 0; ClassInfo Info = collectClassInfo(Ty); - FieldListRecordBuilder Fields; + FieldListRecordBuilder FLBR(TypeTable); + FLBR.begin(); // Create base classes. for (const DIDerivedType *I : Info.Inheritance) { @@ -1631,16 +1804,22 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) { unsigned VBPtrOffset = 0; // FIXME: Despite the accessor name, the offset is really in bytes. unsigned VBTableIndex = I->getOffsetInBits() / 4; - Fields.writeVirtualBaseClass(VirtualBaseClassRecord( - translateAccessFlags(Ty->getTag(), I->getFlags()), + auto RecordKind = (I->getFlags() & DINode::FlagIndirectVirtualBase) == DINode::FlagIndirectVirtualBase + ? TypeRecordKind::IndirectVirtualBaseClass + : TypeRecordKind::VirtualBaseClass; + VirtualBaseClassRecord VBCR( + RecordKind, translateAccessFlags(Ty->getTag(), I->getFlags()), getTypeIndex(I->getBaseType()), getVBPTypeIndex(), VBPtrOffset, - VBTableIndex)); + VBTableIndex); + + FLBR.writeMemberType(VBCR); } else { assert(I->getOffsetInBits() % 8 == 0 && "bases must be on byte boundaries"); - Fields.writeBaseClass(BaseClassRecord( - translateAccessFlags(Ty->getTag(), I->getFlags()), - getTypeIndex(I->getBaseType()), I->getOffsetInBits() / 8)); + BaseClassRecord BCR(translateAccessFlags(Ty->getTag(), I->getFlags()), + getTypeIndex(I->getBaseType()), + I->getOffsetInBits() / 8); + FLBR.writeMemberType(BCR); } } @@ -1653,8 +1832,17 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) { translateAccessFlags(Ty->getTag(), Member->getFlags()); if (Member->isStaticMember()) { - Fields.writeStaticDataMember( - StaticDataMemberRecord(Access, MemberBaseType, MemberName)); + StaticDataMemberRecord SDMR(Access, MemberBaseType, MemberName); + FLBR.writeMemberType(SDMR); + MemberCount++; + continue; + } + + // Virtual function pointer member. + if ((Member->getFlags() & DINode::FlagArtificial) && + Member->getName().startswith("_vptr$")) { + VFPtrRecord VFPR(getTypeIndex(Member->getBaseType())); + FLBR.writeMemberType(VFPR); MemberCount++; continue; } @@ -1669,12 +1857,14 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) { MemberOffsetInBits = CI->getZExtValue() + MemberInfo.BaseOffset; } StartBitOffset -= MemberOffsetInBits; - MemberBaseType = TypeTable.writeBitField(BitFieldRecord( - MemberBaseType, Member->getSizeInBits(), StartBitOffset)); + BitFieldRecord BFR(MemberBaseType, Member->getSizeInBits(), + StartBitOffset); + MemberBaseType = TypeTable.writeKnownType(BFR); } uint64_t MemberOffsetInBytes = MemberOffsetInBits / 8; - Fields.writeDataMember(DataMemberRecord(Access, MemberBaseType, - MemberOffsetInBytes, MemberName)); + DataMemberRecord DMR(Access, MemberBaseType, MemberOffsetInBytes, + MemberName); + FLBR.writeMemberType(DMR); MemberCount++; } @@ -1691,33 +1881,32 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) { if (Introduced) VFTableOffset = SP->getVirtualIndex() * getPointerSizeInBytes(); - Methods.push_back( - OneMethodRecord(MethodType, translateMethodKindFlags(SP, Introduced), - translateMethodOptionFlags(SP), - translateAccessFlags(Ty->getTag(), SP->getFlags()), - VFTableOffset, Name)); + Methods.push_back(OneMethodRecord( + MethodType, translateAccessFlags(Ty->getTag(), SP->getFlags()), + translateMethodKindFlags(SP, Introduced), + translateMethodOptionFlags(SP), VFTableOffset, Name)); MemberCount++; } assert(Methods.size() > 0 && "Empty methods map entry"); if (Methods.size() == 1) - Fields.writeOneMethod(Methods[0]); + FLBR.writeMemberType(Methods[0]); else { - TypeIndex MethodList = - TypeTable.writeMethodOverloadList(MethodOverloadListRecord(Methods)); - Fields.writeOverloadedMethod( - OverloadedMethodRecord(Methods.size(), MethodList, Name)); + MethodOverloadListRecord MOLR(Methods); + TypeIndex MethodList = TypeTable.writeKnownType(MOLR); + OverloadedMethodRecord OMR(Methods.size(), MethodList, Name); + FLBR.writeMemberType(OMR); } } // Create nested classes. for (const DICompositeType *Nested : Info.NestedClasses) { NestedTypeRecord R(getTypeIndex(DITypeRef(Nested)), Nested->getName()); - Fields.writeNestedType(R); + FLBR.writeMemberType(R); MemberCount++; } - TypeIndex FieldTI = TypeTable.writeFieldList(Fields); - return std::make_tuple(FieldTI, TypeIndex(), MemberCount, + TypeIndex FieldTI = FLBR.end(); + return std::make_tuple(FieldTI, Info.VShapeTI, MemberCount, !Info.NestedClasses.empty()); } @@ -1725,7 +1914,7 @@ TypeIndex CodeViewDebug::getVBPTypeIndex() { if (!VBPType.getIndex()) { // Make a 'const int *' type. ModifierRecord MR(TypeIndex::Int32(), ModifierOptions::Const); - TypeIndex ModifiedTI = TypeTable.writeModifier(MR); + TypeIndex ModifiedTI = TypeTable.writeKnownType(MR); PointerKind PK = getPointerSizeInBytes() == 8 ? PointerKind::Near64 : PointerKind::Near32; @@ -1733,7 +1922,7 @@ TypeIndex CodeViewDebug::getVBPTypeIndex() { PointerOptions PO = PointerOptions::None; PointerRecord PR(ModifiedTI, PK, PM, PO, getPointerSizeInBytes()); - VBPType = TypeTable.writePointer(PR); + VBPType = TypeTable.writeKnownType(PR); } return VBPType; @@ -1880,30 +2069,47 @@ void CodeViewDebug::emitLocalVariable(const LocalVariable &Var) { SmallString<20> BytePrefix; for (const LocalVarDefRange &DefRange : Var.DefRanges) { BytePrefix.clear(); - // FIXME: Handle bitpieces. - if (DefRange.StructOffset != 0) - continue; - if (DefRange.InMemory) { - DefRangeRegisterRelSym Sym(DefRange.CVRegister, 0, DefRange.DataOffset, 0, - 0, 0, ArrayRef<LocalVariableAddrGap>()); + uint16_t RegRelFlags = 0; + if (DefRange.IsSubfield) { + RegRelFlags = DefRangeRegisterRelSym::IsSubfieldFlag | + (DefRange.StructOffset + << DefRangeRegisterRelSym::OffsetInParentShift); + } + DefRangeRegisterRelSym Sym(S_DEFRANGE_REGISTER_REL); + Sym.Hdr.Register = DefRange.CVRegister; + Sym.Hdr.Flags = RegRelFlags; + Sym.Hdr.BasePointerOffset = DefRange.DataOffset; ulittle16_t SymKind = ulittle16_t(S_DEFRANGE_REGISTER_REL); BytePrefix += StringRef(reinterpret_cast<const char *>(&SymKind), sizeof(SymKind)); BytePrefix += - StringRef(reinterpret_cast<const char *>(&Sym.Header), - sizeof(Sym.Header) - sizeof(LocalVariableAddrRange)); + StringRef(reinterpret_cast<const char *>(&Sym.Hdr), sizeof(Sym.Hdr)); } else { assert(DefRange.DataOffset == 0 && "unexpected offset into register"); - // Unclear what matters here. - DefRangeRegisterSym Sym(DefRange.CVRegister, 0, 0, 0, 0, - ArrayRef<LocalVariableAddrGap>()); - ulittle16_t SymKind = ulittle16_t(S_DEFRANGE_REGISTER); - BytePrefix += - StringRef(reinterpret_cast<const char *>(&SymKind), sizeof(SymKind)); - BytePrefix += - StringRef(reinterpret_cast<const char *>(&Sym.Header), - sizeof(Sym.Header) - sizeof(LocalVariableAddrRange)); + if (DefRange.IsSubfield) { + // Unclear what matters here. + DefRangeSubfieldRegisterSym Sym(S_DEFRANGE_SUBFIELD_REGISTER); + Sym.Hdr.Register = DefRange.CVRegister; + Sym.Hdr.MayHaveNoName = 0; + Sym.Hdr.OffsetInParent = DefRange.StructOffset; + + ulittle16_t SymKind = ulittle16_t(S_DEFRANGE_SUBFIELD_REGISTER); + BytePrefix += StringRef(reinterpret_cast<const char *>(&SymKind), + sizeof(SymKind)); + BytePrefix += StringRef(reinterpret_cast<const char *>(&Sym.Hdr), + sizeof(Sym.Hdr)); + } else { + // Unclear what matters here. + DefRangeRegisterSym Sym(S_DEFRANGE_REGISTER); + Sym.Hdr.Register = DefRange.CVRegister; + Sym.Hdr.MayHaveNoName = 0; + ulittle16_t SymKind = ulittle16_t(S_DEFRANGE_REGISTER); + BytePrefix += StringRef(reinterpret_cast<const char *>(&SymKind), + sizeof(SymKind)); + BytePrefix += StringRef(reinterpret_cast<const char *>(&Sym.Hdr), + sizeof(Sym.Hdr)); + } } OS.EmitCVDefRangeDirective(DefRange.Ranges, BytePrefix); } @@ -1983,6 +2189,15 @@ void CodeViewDebug::emitDebugInfoForUDTs( } void CodeViewDebug::emitDebugInfoForGlobals() { + DenseMap<const DIGlobalVariableExpression *, const GlobalVariable *> + GlobalMap; + for (const GlobalVariable &GV : MMI->getModule()->globals()) { + SmallVector<DIGlobalVariableExpression *, 1> GVEs; + GV.getDebugInfo(GVEs); + for (const auto *GVE : GVEs) + GlobalMap[GVE] = &GV; + } + NamedMDNode *CUs = MMI->getModule()->getNamedMetadata("llvm.dbg.cu"); for (const MDNode *Node : CUs->operands()) { const auto *CU = cast<DICompileUnit>(Node); @@ -1992,31 +2207,32 @@ void CodeViewDebug::emitDebugInfoForGlobals() { // it if we have at least one global to emit. switchToDebugSectionForSymbol(nullptr); MCSymbol *EndLabel = nullptr; - for (const DIGlobalVariable *G : CU->getGlobalVariables()) { - if (const auto *GV = dyn_cast_or_null<GlobalVariable>(G->getVariable())) { + for (const auto *GVE : CU->getGlobalVariables()) { + if (const auto *GV = GlobalMap.lookup(GVE)) if (!GV->hasComdat() && !GV->isDeclarationForLinker()) { if (!EndLabel) { OS.AddComment("Symbol subsection for globals"); EndLabel = beginCVSubsection(ModuleSubstreamKind::Symbols); } - emitDebugInfoForGlobal(G, Asm->getSymbol(GV)); + // FIXME: emitDebugInfoForGlobal() doesn't handle DIExpressions. + emitDebugInfoForGlobal(GVE->getVariable(), GV, Asm->getSymbol(GV)); } - } } if (EndLabel) endCVSubsection(EndLabel); // Second, emit each global that is in a comdat into its own .debug$S // section along with its own symbol substream. - for (const DIGlobalVariable *G : CU->getGlobalVariables()) { - if (const auto *GV = dyn_cast_or_null<GlobalVariable>(G->getVariable())) { + for (const auto *GVE : CU->getGlobalVariables()) { + if (const auto *GV = GlobalMap.lookup(GVE)) { if (GV->hasComdat()) { MCSymbol *GVSym = Asm->getSymbol(GV); OS.AddComment("Symbol subsection for " + Twine(GlobalValue::getRealLinkageName(GV->getName()))); switchToDebugSectionForSymbol(GVSym); EndLabel = beginCVSubsection(ModuleSubstreamKind::Symbols); - emitDebugInfoForGlobal(G, GVSym); + // FIXME: emitDebugInfoForGlobal() doesn't handle DIExpressions. + emitDebugInfoForGlobal(GVE->getVariable(), GV, GVSym); endCVSubsection(EndLabel); } } @@ -2037,6 +2253,7 @@ void CodeViewDebug::emitDebugInfoForRetainedTypes() { } void CodeViewDebug::emitDebugInfoForGlobal(const DIGlobalVariable *DIGV, + const GlobalVariable *GV, MCSymbol *GVSym) { // DataSym record, see SymbolRecord.h for more info. // FIXME: Thread local data, etc @@ -2045,7 +2262,6 @@ void CodeViewDebug::emitDebugInfoForGlobal(const DIGlobalVariable *DIGV, OS.AddComment("Record length"); OS.emitAbsoluteSymbolDiff(DataEnd, DataBegin, 2); OS.EmitLabel(DataBegin); - const auto *GV = cast<GlobalVariable>(DIGV->getVariable()); if (DIGV->isLocalToUnit()) { if (GV->isThreadLocal()) { OS.AddComment("Record kind: S_LTHREAD32"); @@ -2066,7 +2282,7 @@ void CodeViewDebug::emitDebugInfoForGlobal(const DIGlobalVariable *DIGV, OS.AddComment("Type"); OS.EmitIntValue(getCompleteTypeIndex(DIGV->getType()).getIndex(), 4); OS.AddComment("DataOffset"); - OS.EmitCOFFSecRel32(GVSym); + OS.EmitCOFFSecRel32(GVSym, /*Offset=*/0); OS.AddComment("Segment"); OS.EmitCOFFSectionIndex(GVSym); OS.AddComment("Name"); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h index e4bbd61..3dd4315 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h @@ -20,8 +20,8 @@ #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/DebugInfo/CodeView/MemoryTypeTableBuilder.h" #include "llvm/DebugInfo/CodeView/TypeIndex.h" +#include "llvm/DebugInfo/CodeView/TypeTableBuilder.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DebugLoc.h" #include "llvm/MC/MCStreamer.h" @@ -36,7 +36,8 @@ struct ClassInfo; /// \brief Collects and handles line tables information in a CodeView format. class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { MCStreamer &OS; - codeview::MemoryTypeTableBuilder TypeTable; + llvm::BumpPtrAllocator Allocator; + codeview::TypeTableBuilder TypeTable; /// Represents the most general definition range. struct LocalVarDefRange { @@ -47,9 +48,11 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { /// Offset of variable data in memory. int DataOffset : 31; - /// Offset of the data into the user level struct. If zero, no splitting - /// occurred. - uint16_t StructOffset; + /// Non-zero if this is a piece of an aggregate. + uint16_t IsSubfield : 1; + + /// Offset into aggregate. + uint16_t StructOffset : 15; /// Register containing the data or the register base of the memory /// location containing the data. @@ -59,14 +62,18 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { /// ranges. bool isDifferentLocation(LocalVarDefRange &O) { return InMemory != O.InMemory || DataOffset != O.DataOffset || - StructOffset != O.StructOffset || CVRegister != O.CVRegister; + IsSubfield != O.IsSubfield || StructOffset != O.StructOffset || + CVRegister != O.CVRegister; } SmallVector<std::pair<const MCSymbol *, const MCSymbol *>, 1> Ranges; }; static LocalVarDefRange createDefRangeMem(uint16_t CVRegister, int Offset); - static LocalVarDefRange createDefRangeReg(uint16_t CVRegister); + static LocalVarDefRange createDefRangeGeneral(uint16_t CVRegister, + bool InMemory, int Offset, + bool IsSubfield, + uint16_t StructOffset); /// Similar to DbgVariable in DwarfDebug, but not dwarf-specific. struct LocalVariable { @@ -190,6 +197,8 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { void emitTypeInformation(); + void emitCompilerInformation(); + void emitInlineeLinesSubsection(); void emitDebugInfoForFunction(const Function *GV, FunctionInfo &FI); @@ -201,7 +210,8 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { void emitDebugInfoForUDTs( ArrayRef<std::pair<std::string, codeview::TypeIndex>> UDTs); - void emitDebugInfoForGlobal(const DIGlobalVariable *DIGV, MCSymbol *GVSym); + void emitDebugInfoForGlobal(const DIGlobalVariable *DIGV, + const GlobalVariable *GV, MCSymbol *GVSym); /// Opens a subsection of the given kind in a .debug$S codeview section. /// Returns an end label for use with endCVSubsection when the subsection is @@ -217,7 +227,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { void collectVariableInfo(const DISubprogram *SP); - void collectVariableInfoFromMMITable(DenseSet<InlinedVariable> &Processed); + void collectVariableInfoFromMFTable(DenseSet<InlinedVariable> &Processed); /// Records information about a local variable in the appropriate scope. In /// particular, locals from inlined code live inside the inlining site. @@ -251,6 +261,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { codeview::TypeIndex lowerTypeMemberPointer(const DIDerivedType *Ty); codeview::TypeIndex lowerTypeModifier(const DIDerivedType *Ty); codeview::TypeIndex lowerTypeFunction(const DISubroutineType *Ty); + codeview::TypeIndex lowerTypeVFTableShape(const DIDerivedType *Ty); codeview::TypeIndex lowerTypeMemberFunction(const DISubroutineType *Ty, const DIType *ClassTy, int ThisAdjustment); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp index 2aaa85a..8799189 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp @@ -63,10 +63,10 @@ void DIEAbbrev::Profile(FoldingSetNodeID &ID) const { /// void DIEAbbrev::Emit(const AsmPrinter *AP) const { // Emit its Dwarf tag type. - AP->EmitULEB128(Tag, dwarf::TagString(Tag)); + AP->EmitULEB128(Tag, dwarf::TagString(Tag).data()); // Emit whether it has children DIEs. - AP->EmitULEB128((unsigned)Children, dwarf::ChildrenString(Children)); + AP->EmitULEB128((unsigned)Children, dwarf::ChildrenString(Children).data()); // For each attribute description. for (unsigned i = 0, N = Data.size(); i < N; ++i) { @@ -74,11 +74,18 @@ void DIEAbbrev::Emit(const AsmPrinter *AP) const { // Emit attribute type. AP->EmitULEB128(AttrData.getAttribute(), - dwarf::AttributeString(AttrData.getAttribute())); + dwarf::AttributeString(AttrData.getAttribute()).data()); // Emit form type. AP->EmitULEB128(AttrData.getForm(), - dwarf::FormEncodingString(AttrData.getForm())); + dwarf::FormEncodingString(AttrData.getForm()).data()); + + // Emit value for DW_FORM_implicit_const. + if (AttrData.getForm() == dwarf::DW_FORM_implicit_const) { + assert(AP->getDwarfVersion() >= 5 && + "DW_FORM_implicit_const is supported starting from DWARFv5"); + AP->EmitSLEB128(AttrData.getValue()); + } } // Mark end of abbreviation. @@ -108,24 +115,73 @@ void DIEAbbrev::print(raw_ostream &O) { LLVM_DUMP_METHOD void DIEAbbrev::dump() { print(dbgs()); } +//===----------------------------------------------------------------------===// +// DIEAbbrevSet Implementation +//===----------------------------------------------------------------------===// + +DIEAbbrevSet::~DIEAbbrevSet() { + for (DIEAbbrev *Abbrev : Abbreviations) + Abbrev->~DIEAbbrev(); +} + +DIEAbbrev &DIEAbbrevSet::uniqueAbbreviation(DIE &Die) { + + FoldingSetNodeID ID; + DIEAbbrev Abbrev = Die.generateAbbrev(); + Abbrev.Profile(ID); + + void *InsertPos; + if (DIEAbbrev *Existing = + AbbreviationsSet.FindNodeOrInsertPos(ID, InsertPos)) { + Die.setAbbrevNumber(Existing->getNumber()); + return *Existing; + } + + // Move the abbreviation to the heap and assign a number. + DIEAbbrev *New = new (Alloc) DIEAbbrev(std::move(Abbrev)); + Abbreviations.push_back(New); + New->setNumber(Abbreviations.size()); + Die.setAbbrevNumber(Abbreviations.size()); + + // Store it for lookup. + AbbreviationsSet.InsertNode(New, InsertPos); + return *New; +} + +void DIEAbbrevSet::Emit(const AsmPrinter *AP, MCSection *Section) const { + if (!Abbreviations.empty()) { + // Start the debug abbrev section. + AP->OutStreamer->SwitchSection(Section); + AP->emitDwarfAbbrevs(Abbreviations); + } +} + +//===----------------------------------------------------------------------===// +// DIE Implementation +//===----------------------------------------------------------------------===// + +DIE *DIE::getParent() const { + return Owner.dyn_cast<DIE*>(); +} + DIEAbbrev DIE::generateAbbrev() const { DIEAbbrev Abbrev(Tag, hasChildren()); for (const DIEValue &V : values()) - Abbrev.AddAttribute(V.getAttribute(), V.getForm()); + if (V.getForm() == dwarf::DW_FORM_implicit_const) + Abbrev.AddImplicitConstAttribute(V.getAttribute(), + V.getDIEInteger().getValue()); + else + Abbrev.AddAttribute(V.getAttribute(), V.getForm()); return Abbrev; } -/// Climb up the parent chain to get the unit DIE to which this DIE -/// belongs. -const DIE *DIE::getUnit() const { - const DIE *Cu = getUnitOrNull(); - assert(Cu && "We should not have orphaned DIEs."); - return Cu; +unsigned DIE::getDebugSectionOffset() const { + const DIEUnit *Unit = getUnit(); + assert(Unit && "DIE must be owned by a DIEUnit to get its absolute offset"); + return Unit->getDebugSectionOffset() + getOffset(); } -/// Climb up the parent chain to get the unit DIE this DIE belongs -/// to. Return NULL if DIE is not added to an owner yet. -const DIE *DIE::getUnitOrNull() const { +const DIE *DIE::getUnitDie() const { const DIE *p = this; while (p) { if (p->getTag() == dwarf::DW_TAG_compile_unit || @@ -136,6 +192,13 @@ const DIE *DIE::getUnitOrNull() const { return nullptr; } +const DIEUnit *DIE::getUnit() const { + const DIE *UnitDie = getUnitDie(); + if (UnitDie) + return UnitDie->Owner.dyn_cast<DIEUnit*>(); + return nullptr; +} + DIEValue DIE::findAttribute(dwarf::Attribute Attribute) const { // Iterate through all the attributes until we find the one we're // looking for, if we can't find it return NULL. @@ -191,6 +254,55 @@ void DIE::dump() { print(dbgs()); } +unsigned DIE::computeOffsetsAndAbbrevs(const AsmPrinter *AP, + DIEAbbrevSet &AbbrevSet, + unsigned CUOffset) { + // Unique the abbreviation and fill in the abbreviation number so this DIE + // can be emitted. + const DIEAbbrev &Abbrev = AbbrevSet.uniqueAbbreviation(*this); + + // Set compile/type unit relative offset of this DIE. + setOffset(CUOffset); + + // Add the byte size of the abbreviation code. + CUOffset += getULEB128Size(getAbbrevNumber()); + + // Add the byte size of all the DIE attribute values. + for (const auto &V : values()) + CUOffset += V.SizeOf(AP); + + // Let the children compute their offsets and abbreviation numbers. + if (hasChildren()) { + (void)Abbrev; + assert(Abbrev.hasChildren() && "Children flag not set"); + + for (auto &Child : children()) + CUOffset = Child.computeOffsetsAndAbbrevs(AP, AbbrevSet, CUOffset); + + // Each child chain is terminated with a zero byte, adjust the offset. + CUOffset += sizeof(int8_t); + } + + // Compute the byte size of this DIE and all of its children correctly. This + // is needed so that top level DIE can help the compile unit set its length + // correctly. + setSize(CUOffset - getOffset()); + return CUOffset; +} + +//===----------------------------------------------------------------------===// +// DIEUnit Implementation +//===----------------------------------------------------------------------===// +DIEUnit::DIEUnit(uint16_t V, uint8_t A, dwarf::Tag UnitTag) + : Die(UnitTag), Section(nullptr), Offset(0), Length(0), Version(V), + AddrSize(A) +{ + Die.Owner = this; + assert((UnitTag == dwarf::DW_TAG_compile_unit || + UnitTag == dwarf::DW_TAG_type_unit || + UnitTag == dwarf::DW_TAG_partial_unit) && "expected a unit TAG"); +} + void DIEValue::EmitValue(const AsmPrinter *AP) const { switch (Ty) { case isNone: @@ -240,67 +352,121 @@ void DIEValue::dump() const { /// EmitValue - Emit integer of appropriate size. /// void DIEInteger::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const { - unsigned Size = ~0U; switch (Form) { + case dwarf::DW_FORM_implicit_const: + LLVM_FALLTHROUGH; case dwarf::DW_FORM_flag_present: // Emit something to keep the lines and comments in sync. // FIXME: Is there a better way to do this? Asm->OutStreamer->AddBlankLine(); return; - case dwarf::DW_FORM_flag: // Fall thru - case dwarf::DW_FORM_ref1: // Fall thru - case dwarf::DW_FORM_data1: Size = 1; break; - case dwarf::DW_FORM_ref2: // Fall thru - case dwarf::DW_FORM_data2: Size = 2; break; - case dwarf::DW_FORM_sec_offset: // Fall thru - case dwarf::DW_FORM_strp: // Fall thru - case dwarf::DW_FORM_ref4: // Fall thru - case dwarf::DW_FORM_data4: Size = 4; break; - case dwarf::DW_FORM_ref8: // Fall thru - case dwarf::DW_FORM_ref_sig8: // Fall thru - case dwarf::DW_FORM_data8: Size = 8; break; - case dwarf::DW_FORM_GNU_str_index: Asm->EmitULEB128(Integer); return; - case dwarf::DW_FORM_GNU_addr_index: Asm->EmitULEB128(Integer); return; - case dwarf::DW_FORM_udata: Asm->EmitULEB128(Integer); return; - case dwarf::DW_FORM_sdata: Asm->EmitSLEB128(Integer); return; + case dwarf::DW_FORM_flag: + LLVM_FALLTHROUGH; + case dwarf::DW_FORM_ref1: + LLVM_FALLTHROUGH; + case dwarf::DW_FORM_data1: + LLVM_FALLTHROUGH; + case dwarf::DW_FORM_ref2: + LLVM_FALLTHROUGH; + case dwarf::DW_FORM_data2: + LLVM_FALLTHROUGH; + case dwarf::DW_FORM_strp: + LLVM_FALLTHROUGH; + case dwarf::DW_FORM_ref4: + LLVM_FALLTHROUGH; + case dwarf::DW_FORM_data4: + LLVM_FALLTHROUGH; + case dwarf::DW_FORM_ref8: + LLVM_FALLTHROUGH; + case dwarf::DW_FORM_ref_sig8: + LLVM_FALLTHROUGH; + case dwarf::DW_FORM_data8: + LLVM_FALLTHROUGH; + case dwarf::DW_FORM_GNU_ref_alt: + LLVM_FALLTHROUGH; + case dwarf::DW_FORM_GNU_strp_alt: + LLVM_FALLTHROUGH; + case dwarf::DW_FORM_line_strp: + LLVM_FALLTHROUGH; + case dwarf::DW_FORM_sec_offset: + LLVM_FALLTHROUGH; + case dwarf::DW_FORM_strp_sup: + LLVM_FALLTHROUGH; + case dwarf::DW_FORM_ref_sup: + LLVM_FALLTHROUGH; case dwarf::DW_FORM_addr: - Size = Asm->getPointerSize(); - break; + LLVM_FALLTHROUGH; case dwarf::DW_FORM_ref_addr: - Size = SizeOf(Asm, dwarf::DW_FORM_ref_addr); - break; + Asm->OutStreamer->EmitIntValue(Integer, SizeOf(Asm, Form)); + return; + case dwarf::DW_FORM_GNU_str_index: + LLVM_FALLTHROUGH; + case dwarf::DW_FORM_GNU_addr_index: + LLVM_FALLTHROUGH; + case dwarf::DW_FORM_ref_udata: + LLVM_FALLTHROUGH; + case dwarf::DW_FORM_udata: + Asm->EmitULEB128(Integer); + return; + case dwarf::DW_FORM_sdata: + Asm->EmitSLEB128(Integer); + return; default: llvm_unreachable("DIE Value form not supported yet"); } - Asm->OutStreamer->EmitIntValue(Integer, Size); } /// SizeOf - Determine size of integer value in bytes. /// unsigned DIEInteger::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { switch (Form) { + case dwarf::DW_FORM_implicit_const: LLVM_FALLTHROUGH; case dwarf::DW_FORM_flag_present: return 0; - case dwarf::DW_FORM_flag: // Fall thru - case dwarf::DW_FORM_ref1: // Fall thru + case dwarf::DW_FORM_flag: LLVM_FALLTHROUGH; + case dwarf::DW_FORM_ref1: LLVM_FALLTHROUGH; case dwarf::DW_FORM_data1: return sizeof(int8_t); - case dwarf::DW_FORM_ref2: // Fall thru + case dwarf::DW_FORM_ref2: LLVM_FALLTHROUGH; case dwarf::DW_FORM_data2: return sizeof(int16_t); - case dwarf::DW_FORM_sec_offset: // Fall thru - case dwarf::DW_FORM_strp: // Fall thru - case dwarf::DW_FORM_ref4: // Fall thru + case dwarf::DW_FORM_ref4: LLVM_FALLTHROUGH; case dwarf::DW_FORM_data4: return sizeof(int32_t); - case dwarf::DW_FORM_ref8: // Fall thru - case dwarf::DW_FORM_ref_sig8: // Fall thru + case dwarf::DW_FORM_ref8: LLVM_FALLTHROUGH; + case dwarf::DW_FORM_ref_sig8: LLVM_FALLTHROUGH; case dwarf::DW_FORM_data8: return sizeof(int64_t); - case dwarf::DW_FORM_GNU_str_index: return getULEB128Size(Integer); - case dwarf::DW_FORM_GNU_addr_index: return getULEB128Size(Integer); - case dwarf::DW_FORM_udata: return getULEB128Size(Integer); - case dwarf::DW_FORM_sdata: return getSLEB128Size(Integer); - case dwarf::DW_FORM_addr: - return AP->getPointerSize(); case dwarf::DW_FORM_ref_addr: - if (AP->OutStreamer->getContext().getDwarfVersion() == 2) + if (AP->getDwarfVersion() == 2) return AP->getPointerSize(); - return sizeof(int32_t); + LLVM_FALLTHROUGH; + case dwarf::DW_FORM_strp: + LLVM_FALLTHROUGH; + case dwarf::DW_FORM_GNU_ref_alt: + LLVM_FALLTHROUGH; + case dwarf::DW_FORM_GNU_strp_alt: + LLVM_FALLTHROUGH; + case dwarf::DW_FORM_line_strp: + LLVM_FALLTHROUGH; + case dwarf::DW_FORM_sec_offset: + LLVM_FALLTHROUGH; + case dwarf::DW_FORM_strp_sup: + LLVM_FALLTHROUGH; + case dwarf::DW_FORM_ref_sup: + switch (AP->OutStreamer->getContext().getDwarfFormat()) { + case dwarf::DWARF32: + return 4; + case dwarf::DWARF64: + return 8; + } + llvm_unreachable("Invalid DWARF format"); + case dwarf::DW_FORM_GNU_str_index: + LLVM_FALLTHROUGH; + case dwarf::DW_FORM_GNU_addr_index: + LLVM_FALLTHROUGH; + case dwarf::DW_FORM_ref_udata: + LLVM_FALLTHROUGH; + case dwarf::DW_FORM_udata: + return getULEB128Size(Integer); + case dwarf::DW_FORM_sdata: + return getSLEB128Size(Integer); + case dwarf::DW_FORM_addr: + return AP->getPointerSize(); default: llvm_unreachable("DIE Value form not supported yet"); } } @@ -318,7 +484,7 @@ void DIEInteger::print(raw_ostream &O) const { /// EmitValue - Emit expression value. /// void DIEExpr::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const { - AP->OutStreamer->EmitValue(Expr, SizeOf(AP, Form)); + AP->EmitDebugValue(Expr, SizeOf(AP, Form)); } /// SizeOf - Determine size of expression value in bytes. @@ -343,7 +509,8 @@ void DIELabel::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const { AP->EmitLabelReference(Label, SizeOf(AP, Form), Form == dwarf::DW_FORM_strp || Form == dwarf::DW_FORM_sec_offset || - Form == dwarf::DW_FORM_ref_addr); + Form == dwarf::DW_FORM_ref_addr || + Form == dwarf::DW_FORM_data4); } /// SizeOf - Determine size of label value in bytes. @@ -435,6 +602,29 @@ void DIEString::print(raw_ostream &O) const { } //===----------------------------------------------------------------------===// +// DIEInlineString Implementation +//===----------------------------------------------------------------------===// +void DIEInlineString::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const { + if (Form == dwarf::DW_FORM_string) { + for (char ch : S) + AP->EmitInt8(ch); + AP->EmitInt8(0); + return; + } + llvm_unreachable("Expected valid string form"); +} + +unsigned DIEInlineString::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { + // Emit string bytes + NULL byte. + return S.size() + 1; +} + +LLVM_DUMP_METHOD +void DIEInlineString::print(raw_ostream &O) const { + O << "InlineString: " << S; +} + +//===----------------------------------------------------------------------===// // DIEEntry Implementation //===----------------------------------------------------------------------===// @@ -442,35 +632,69 @@ void DIEString::print(raw_ostream &O) const { /// void DIEEntry::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const { - if (Form == dwarf::DW_FORM_ref_addr) { - const DwarfDebug *DD = AP->getDwarfDebug(); - unsigned Addr = Entry->getOffset(); - assert(!DD->useSplitDwarf() && "TODO: dwo files can't have relocations."); - // For DW_FORM_ref_addr, output the offset from beginning of debug info - // section. Entry->getOffset() returns the offset from start of the - // compile unit. - DwarfCompileUnit *CU = DD->lookupUnit(Entry->getUnit()); - assert(CU && "CUDie should belong to a CU."); - Addr += CU->getDebugInfoOffset(); - if (AP->MAI->doesDwarfUseRelocationsAcrossSections()) - AP->EmitLabelPlusOffset(CU->getSectionSym(), Addr, - DIEEntry::getRefAddrSize(AP)); - else - AP->OutStreamer->EmitIntValue(Addr, DIEEntry::getRefAddrSize(AP)); - } else - AP->EmitInt32(Entry->getOffset()); -} - -unsigned DIEEntry::getRefAddrSize(const AsmPrinter *AP) { - // DWARF4: References that use the attribute form DW_FORM_ref_addr are - // specified to be four bytes in the DWARF 32-bit format and eight bytes - // in the DWARF 64-bit format, while DWARF Version 2 specifies that such - // references have the same size as an address on the target system. - const DwarfDebug *DD = AP->getDwarfDebug(); - assert(DD && "Expected Dwarf Debug info to be available"); - if (DD->getDwarfVersion() == 2) - return AP->getPointerSize(); - return sizeof(int32_t); + switch (Form) { + case dwarf::DW_FORM_ref1: + case dwarf::DW_FORM_ref2: + case dwarf::DW_FORM_ref4: + case dwarf::DW_FORM_ref8: + AP->OutStreamer->EmitIntValue(Entry->getOffset(), SizeOf(AP, Form)); + return; + + case dwarf::DW_FORM_ref_udata: + AP->EmitULEB128(Entry->getOffset()); + return; + + case dwarf::DW_FORM_ref_addr: { + // Get the absolute offset for this DIE within the debug info/types section. + unsigned Addr = Entry->getDebugSectionOffset(); + if (AP->MAI->doesDwarfUseRelocationsAcrossSections()) { + const DwarfDebug *DD = AP->getDwarfDebug(); + if (DD) + assert(!DD->useSplitDwarf() && + "TODO: dwo files can't have relocations."); + const DIEUnit *Unit = Entry->getUnit(); + assert(Unit && "CUDie should belong to a CU."); + MCSection *Section = Unit->getSection(); + if (Section) { + const MCSymbol *SectionSym = Section->getBeginSymbol(); + AP->EmitLabelPlusOffset(SectionSym, Addr, SizeOf(AP, Form), true); + return; + } + } + AP->OutStreamer->EmitIntValue(Addr, SizeOf(AP, Form)); + return; + } + default: + llvm_unreachable("Improper form for DIE reference"); + } +} + +unsigned DIEEntry::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { + switch (Form) { + case dwarf::DW_FORM_ref1: + return 1; + case dwarf::DW_FORM_ref2: + return 2; + case dwarf::DW_FORM_ref4: + return 4; + case dwarf::DW_FORM_ref8: + return 8; + case dwarf::DW_FORM_ref_udata: + return getULEB128Size(Entry->getOffset()); + case dwarf::DW_FORM_ref_addr: + if (AP->getDwarfVersion() == 2) + return AP->getPointerSize(); + switch (AP->OutStreamer->getContext().getDwarfFormat()) { + case dwarf::DWARF32: + return 4; + case dwarf::DWARF64: + return 8; + } + llvm_unreachable("Invalid DWARF format"); + + default: + llvm_unreachable("Improper form for DIE reference"); + } } LLVM_DUMP_METHOD diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp index 74c47d1..d8ecc7c 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp @@ -330,6 +330,12 @@ void DIEHash::hashAttribute(const DIEValue &Value, dwarf::Tag Tag) { addULEB128(dwarf::DW_FORM_string); addString(Value.getDIEString().getString()); break; + case DIEValue::isInlineString: + addULEB128('A'); + addULEB128(Attribute); + addULEB128(dwarf::DW_FORM_string); + addString(Value.getDIEInlineString().getString()); + break; case DIEValue::isBlock: case DIEValue::isLoc: case DIEValue::isLocList: diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp index adc536f..22fd7bb 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp @@ -31,7 +31,7 @@ static unsigned isDescribedByReg(const MachineInstr &MI) { assert(MI.isDebugValue()); assert(MI.getNumOperands() == 4); // If location of variable is described using a register (directly or - // indirecltly), this register is always a first operand. + // indirectly), this register is always a first operand. return MI.getOperand(0).isReg() ? MI.getOperand(0).getReg() : 0; } @@ -83,7 +83,7 @@ static void dropRegDescribedVar(RegDescribedVarsMap &RegVars, unsigned RegNo, const auto &I = RegVars.find(RegNo); assert(RegNo != 0U && I != RegVars.end()); auto &VarSet = I->second; - const auto &VarPos = std::find(VarSet.begin(), VarSet.end(), Var); + const auto &VarPos = find(VarSet, Var); assert(VarPos != VarSet.end()); VarSet.erase(VarPos); // Don't keep empty sets in a map to keep it as small as possible. @@ -96,7 +96,7 @@ static void addRegDescribedVar(RegDescribedVarsMap &RegVars, unsigned RegNo, InlinedVariable Var) { assert(RegNo != 0U); auto &VarSet = RegVars[RegNo]; - assert(std::find(VarSet.begin(), VarSet.end(), Var) == VarSet.end()); + assert(!is_contained(VarSet, Var)); VarSet.push_back(Var); } @@ -134,8 +134,8 @@ static const MachineInstr *getFirstEpilogueInst(const MachineBasicBlock &MBB) { // as the return instruction. DebugLoc LastLoc = LastMI->getDebugLoc(); auto Res = LastMI; - for (MachineBasicBlock::const_reverse_iterator I(std::next(LastMI)), - E = MBB.rend(); + for (MachineBasicBlock::const_reverse_iterator I = LastMI.getReverse(), + E = MBB.rend(); I != E; ++I) { if (I->getDebugLoc() != LastLoc) return &*Res; @@ -164,7 +164,9 @@ static void collectChangingRegs(const MachineFunction *MF, // Look for register defs and register masks. Register masks are // typically on calls and they clobber everything not in the mask. for (const MachineOperand &MO : MI.operands()) { - if (MO.isReg() && MO.isDef() && MO.getReg()) { + // Skip virtual registers since they are handled by the parent. + if (MO.isReg() && MO.isDef() && MO.getReg() && + !TRI->isVirtualRegister(MO.getReg())) { for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI) Regs.set(*AI); @@ -192,12 +194,18 @@ void llvm::calculateDbgValueHistory(const MachineFunction *MF, // some variables. for (const MachineOperand &MO : MI.operands()) { if (MO.isReg() && MO.isDef() && MO.getReg()) { + // If this is a virtual register, only clobber it since it doesn't + // have aliases. + if (TRI->isVirtualRegister(MO.getReg())) + clobberRegisterUses(RegVars, MO.getReg(), Result, MI); // If this is a register def operand, it may end a debug value // range. - for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); - ++AI) - if (ChangingRegs.test(*AI)) - clobberRegisterUses(RegVars, *AI, Result, MI); + else { + for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); + ++AI) + if (ChangingRegs.test(*AI)) + clobberRegisterUses(RegVars, *AI, Result, MI); + } } else if (MO.isRegMask()) { // If this is a register mask operand, clobber all debug values in // non-CSRs. @@ -238,7 +246,8 @@ void llvm::calculateDbgValueHistory(const MachineFunction *MF, if (!MBB.empty() && &MBB != &MF->back()) { for (auto I = RegVars.begin(), E = RegVars.end(); I != E;) { auto CurElem = I++; // CurElem can be erased below. - if (ChangingRegs.test(CurElem->first)) + if (TRI->isVirtualRegister(CurElem->first) || + ChangingRegs.test(CurElem->first)) clobberRegisterUses(RegVars, CurElem, Result, MBB.back()); } } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp index 16ffe2e..9419098 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp @@ -18,6 +18,7 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/IR/DebugInfo.h" +#include "llvm/MC/MCStreamer.h" #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; @@ -62,14 +63,14 @@ MCSymbol *DebugHandlerBase::getLabelAfterInsn(const MachineInstr *MI) { return LabelsAfterInsn.lookup(MI); } -// Determine the relative position of the pieces described by P1 and P2. -// Returns -1 if P1 is entirely before P2, 0 if P1 and P2 overlap, -// 1 if P1 is entirely after P2. -int DebugHandlerBase::pieceCmp(const DIExpression *P1, const DIExpression *P2) { - unsigned l1 = P1->getBitPieceOffset(); - unsigned l2 = P2->getBitPieceOffset(); - unsigned r1 = l1 + P1->getBitPieceSize(); - unsigned r2 = l2 + P2->getBitPieceSize(); +int DebugHandlerBase::fragmentCmp(const DIExpression *P1, + const DIExpression *P2) { + auto Fragment1 = *P1->getFragmentInfo(); + auto Fragment2 = *P2->getFragmentInfo(); + unsigned l1 = Fragment1.OffsetInBits; + unsigned l2 = Fragment2.OffsetInBits; + unsigned r1 = l1 + Fragment1.SizeInBits; + unsigned r2 = l2 + Fragment2.SizeInBits; if (r1 <= l2) return -1; else if (r2 <= l1) @@ -78,11 +79,11 @@ int DebugHandlerBase::pieceCmp(const DIExpression *P1, const DIExpression *P2) { return 0; } -/// Determine whether two variable pieces overlap. -bool DebugHandlerBase::piecesOverlap(const DIExpression *P1, const DIExpression *P2) { - if (!P1->isBitPiece() || !P2->isBitPiece()) +bool DebugHandlerBase::fragmentsOverlap(const DIExpression *P1, + const DIExpression *P2) { + if (!P1->isFragment() || !P2->isFragment()) return true; - return pieceCmp(P1, P2) == 0; + return fragmentCmp(P1, P2) == 0; } /// If this type is derived from a base type then return base type size. @@ -97,7 +98,7 @@ uint64_t DebugHandlerBase::getBaseTypeSize(const DITypeRef TyRef) { if (Tag != dwarf::DW_TAG_member && Tag != dwarf::DW_TAG_typedef && Tag != dwarf::DW_TAG_const_type && Tag != dwarf::DW_TAG_volatile_type && - Tag != dwarf::DW_TAG_restrict_type) + Tag != dwarf::DW_TAG_restrict_type && Tag != dwarf::DW_TAG_atomic_type) return DDTy->getSizeInBits(); DIType *BaseType = DDTy->getBaseType().resolve(); @@ -141,14 +142,15 @@ void DebugHandlerBase::beginFunction(const MachineFunction *MF) { if (DIVar->isParameter() && getDISubprogram(DIVar->getScope())->describes(MF->getFunction())) { LabelsBeforeInsn[Ranges.front().first] = Asm->getFunctionBegin(); - if (Ranges.front().first->getDebugExpression()->isBitPiece()) { - // Mark all non-overlapping initial pieces. + if (Ranges.front().first->getDebugExpression()->isFragment()) { + // Mark all non-overlapping initial fragments. for (auto I = Ranges.begin(); I != Ranges.end(); ++I) { - const DIExpression *Piece = I->first->getDebugExpression(); + const DIExpression *Fragment = I->first->getDebugExpression(); if (std::all_of(Ranges.begin(), I, [&](DbgValueHistoryMap::InstrRange Pred) { - return !piecesOverlap(Piece, Pred.first->getDebugExpression()); - })) + return !fragmentsOverlap( + Fragment, Pred.first->getDebugExpression()); + })) LabelsBeforeInsn[I->first] = Asm->getFunctionBegin(); else break; @@ -200,8 +202,10 @@ void DebugHandlerBase::endInstruction() { assert(CurMI != nullptr); // Don't create a new label after DBG_VALUE instructions. // They don't generate code. - if (!CurMI->isDebugValue()) + if (!CurMI->isDebugValue()) { PrevLabel = nullptr; + PrevInstBB = CurMI->getParent(); + } DenseMap<const MachineInstr *, MCSymbol *>::iterator I = LabelsAfterInsn.find(CurMI); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.h index b8bbcec..c00fa18 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.h @@ -38,10 +38,12 @@ protected: MachineModuleInfo *MMI; /// Previous instruction's location information. This is used to - /// determine label location to indicate scope boundries in dwarf - /// debug info. + /// determine label location to indicate scope boundaries in debug info. + /// We track the previous instruction's source location (if not line 0), + /// whether it was a label, and its parent BB. DebugLoc PrevInstLoc; MCSymbol *PrevLabel = nullptr; + const MachineBasicBlock *PrevInstBB = nullptr; /// This location indicates end of function prologue and beginning of /// function body. @@ -92,13 +94,13 @@ public: /// Return Label immediately following the instruction. MCSymbol *getLabelAfterInsn(const MachineInstr *MI); - /// Determine the relative position of the pieces described by P1 and P2. - /// Returns -1 if P1 is entirely before P2, 0 if P1 and P2 overlap, - /// 1 if P1 is entirely after P2. - static int pieceCmp(const DIExpression *P1, const DIExpression *P2); + /// Determine the relative position of the fragments described by P1 and P2. + /// Returns -1 if P1 is entirely before P2, 0 if P1 and P2 overlap, 1 if P1 is + /// entirely after P2. + static int fragmentCmp(const DIExpression *P1, const DIExpression *P2); - /// Determine whether two variable pieces overlap. - static bool piecesOverlap(const DIExpression *P1, const DIExpression *P2); + /// Determine whether two variable fragments overlap. + static bool fragmentsOverlap(const DIExpression *P1, const DIExpression *P2); /// If this type is derived from a base type then return base type size. static uint64_t getBaseTypeSize(const DITypeRef TyRef); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h index 20acd45..36fb150 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h @@ -72,7 +72,7 @@ public: const ConstantFP *getConstantFP() const { return Constant.CFP; } const ConstantInt *getConstantInt() const { return Constant.CIP; } MachineLocation getLoc() const { return Loc; } - bool isBitPiece() const { return getExpression()->isBitPiece(); } + bool isFragment() const { return getExpression()->isFragment(); } const DIExpression *getExpression() const { return Expression; } friend bool operator==(const Value &, const Value &); friend bool operator<(const Value &, const Value &); @@ -128,8 +128,8 @@ public: void addValues(ArrayRef<DebugLocEntry::Value> Vals) { Values.append(Vals.begin(), Vals.end()); sortUniqueValues(); - assert(std::all_of(Values.begin(), Values.end(), [](DebugLocEntry::Value V){ - return V.isBitPiece(); + assert(all_of(Values, [](DebugLocEntry::Value V) { + return V.isFragment(); }) && "value must be a piece"); } @@ -172,11 +172,11 @@ inline bool operator==(const DebugLocEntry::Value &A, llvm_unreachable("unhandled EntryKind"); } -/// \brief Compare two pieces based on their offset. +/// Compare two fragments based on their offset. inline bool operator<(const DebugLocEntry::Value &A, const DebugLocEntry::Value &B) { - return A.getExpression()->getBitPieceOffset() < - B.getExpression()->getBitPieceOffset(); + return A.getExpression()->getFragmentInfo()->OffsetInBits < + B.getExpression()->getFragmentInfo()->OffsetInBits; } } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp index 4ad3e18..9c324ea 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp @@ -221,9 +221,7 @@ void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfDebug *D) { Asm->EmitInt32((*HI)->Data.Values.size()); for (HashDataContents *HD : (*HI)->Data.Values) { // Emit the DIE offset - DwarfCompileUnit *CU = D->lookupUnit(HD->Die->getUnit()); - assert(CU && "Accelerated DIE should belong to a CU."); - Asm->EmitInt32(HD->Die->getOffset() + CU->getDebugInfoOffset()); + Asm->EmitInt32(HD->Die->getDebugSectionOffset()); // If we have multiple Atoms emit that info too. // FIXME: A bit of a hack, we either emit only one atom or all info. if (HeaderData.Atoms.size() > 1) { diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h index 4d81441..05ac1cb 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h @@ -126,8 +126,7 @@ public: uint16_t type; // enum AtomType uint16_t form; // DWARF DW_FORM_ defines - LLVM_CONSTEXPR Atom(uint16_t type, uint16_t form) - : type(type), form(form) {} + constexpr Atom(uint16_t type, uint16_t form) : type(type), form(form) {} #ifndef NDEBUG void print(raw_ostream &O) { O << "Type: " << dwarf::AtomTypeString(type) << "\n" diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp index 2eae1b2..e08306b 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp @@ -39,16 +39,16 @@ using namespace llvm; DwarfCFIExceptionBase::DwarfCFIExceptionBase(AsmPrinter *A) - : EHStreamer(A), shouldEmitCFI(false) {} + : EHStreamer(A), shouldEmitCFI(false), hasEmittedCFISections(false) {} void DwarfCFIExceptionBase::markFunctionEnd() { endFragment(); - if (MMI->getLandingPads().empty()) - return; - // Map all labels and get rid of any dead landing pads. - MMI->TidyLandingPads(); + if (!Asm->MF->getLandingPads().empty()) { + MachineFunction *NonConstMF = const_cast<MachineFunction*>(Asm->MF); + NonConstMF->tidyLandingPads(); + } } void DwarfCFIExceptionBase::endFragment() { @@ -59,7 +59,7 @@ void DwarfCFIExceptionBase::endFragment() { DwarfCFIException::DwarfCFIException(AsmPrinter *A) : DwarfCFIExceptionBase(A), shouldEmitPersonality(false), forceEmitPersonality(false), shouldEmitLSDA(false), - shouldEmitMoves(false), moveTypeModule(AsmPrinter::CFI_M_None) {} + shouldEmitMoves(false) {} DwarfCFIException::~DwarfCFIException() {} @@ -70,9 +70,6 @@ void DwarfCFIException::endModule() { if (!Asm->MAI->usesCFIForEH()) return; - if (moveTypeModule == AsmPrinter::CFI_M_Debug) - Asm->OutStreamer->EmitCFISections(false, true); - const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); unsigned PerEncoding = TLOF.getPersonalityEncoding(); @@ -98,14 +95,10 @@ void DwarfCFIException::beginFunction(const MachineFunction *MF) { const Function *F = MF->getFunction(); // If any landing pads survive, we need an EH table. - bool hasLandingPads = !MMI->getLandingPads().empty(); + bool hasLandingPads = !MF->getLandingPads().empty(); // See if we need frame move info. AsmPrinter::CFIMoveType MoveType = Asm->needsCFIMoves(); - if (MoveType == AsmPrinter::CFI_M_EH || - (MoveType == AsmPrinter::CFI_M_Debug && - moveTypeModule == AsmPrinter::CFI_M_None)) - moveTypeModule = MoveType; shouldEmitMoves = MoveType != AsmPrinter::CFI_M_None; @@ -143,6 +136,12 @@ void DwarfCFIException::beginFragment(const MachineBasicBlock *MBB, if (!shouldEmitCFI) return; + if (!hasEmittedCFISections) { + if (Asm->needsOnlyDebugCFIMoves()) + Asm->OutStreamer->EmitCFISections(false, true); + hasEmittedCFISections = true; + } + Asm->OutStreamer->EmitCFIStartProc(/*IsSimple=*/false); // Indicate personality routine, if any. @@ -160,8 +159,7 @@ void DwarfCFIException::beginFragment(const MachineBasicBlock *MBB, const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); unsigned PerEncoding = TLOF.getPersonalityEncoding(); - const MCSymbol *Sym = - TLOF.getCFIPersonalitySymbol(P, *Asm->Mang, Asm->TM, MMI); + const MCSymbol *Sym = TLOF.getCFIPersonalitySymbol(P, Asm->TM, MMI); Asm->OutStreamer->EmitCFIPersonality(Sym, PerEncoding); // Provide LSDA information. @@ -171,7 +169,7 @@ void DwarfCFIException::beginFragment(const MachineBasicBlock *MBB, /// endFunction - Gather and emit post-function exception information. /// -void DwarfCFIException::endFunction(const MachineFunction *) { +void DwarfCFIException::endFunction(const MachineFunction *MF) { if (!shouldEmitPersonality) return; diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 7822814c..d904372 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -73,36 +73,8 @@ unsigned DwarfCompileUnit::getOrCreateSourceID(StringRef FileName, Asm->OutStreamer->hasRawTextSupport() ? 0 : getUniqueID()); } -// Return const expression if value is a GEP to access merged global -// constant. e.g. -// i8* getelementptr ({ i8, i8, i8, i8 }* @_MergedGlobals, i32 0, i32 0) -static const ConstantExpr *getMergedGlobalExpr(const Value *V) { - const ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(V); - if (!CE || CE->getNumOperands() != 3 || - CE->getOpcode() != Instruction::GetElementPtr) - return nullptr; - - // First operand points to a global struct. - Value *Ptr = CE->getOperand(0); - GlobalValue *GV = dyn_cast<GlobalValue>(Ptr); - if (!GV || !isa<StructType>(GV->getValueType())) - return nullptr; - - // Second operand is zero. - const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(CE->getOperand(1)); - if (!CI || !CI->isZero()) - return nullptr; - - // Third operand is offset. - if (!isa<ConstantInt>(CE->getOperand(2))) - return nullptr; - - return CE; -} - -/// getOrCreateGlobalVariableDIE - get or create global variable DIE. DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE( - const DIGlobalVariable *GV) { + const DIGlobalVariable *GV, ArrayRef<GlobalExpr> GlobalExprs) { // Check for pre-existence. if (DIE *Die = getDIE(GV)) return Die; @@ -126,6 +98,10 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE( // We need the declaration DIE that is in the static member's class. DIE *VariableSpecDIE = getOrCreateStaticMemberDIE(SDMDecl); addDIEEntry(*VariableDIE, dwarf::DW_AT_specification, *VariableSpecDIE); + // If the global variable's type is different from the one in the class + // member type, assume that it's more specific and also emit it. + if (GTy != DD->resolve(SDMDecl->getBaseType())) + addType(*VariableDIE, GTy); } else { DeclContext = GV->getScope(); // Add name and type. @@ -145,73 +121,82 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE( else addGlobalName(GV->getName(), *VariableDIE, DeclContext); + if (uint32_t AlignInBytes = GV->getAlignInBytes()) + addUInt(*VariableDIE, dwarf::DW_AT_alignment, dwarf::DW_FORM_udata, + AlignInBytes); + // Add location. bool addToAccelTable = false; - if (auto *Global = dyn_cast_or_null<GlobalVariable>(GV->getVariable())) { - // We cannot describe the location of dllimport'd variables: the computation - // of their address requires loads from the IAT. - if (!Global->hasDLLImportStorageClass()) { + DIELoc *Loc = nullptr; + std::unique_ptr<DIEDwarfExpression> DwarfExpr; + bool AllConstant = std::all_of( + GlobalExprs.begin(), GlobalExprs.end(), + [&](const GlobalExpr GE) { + return GE.Expr && GE.Expr->isConstant(); + }); + + for (const auto &GE : GlobalExprs) { + const GlobalVariable *Global = GE.Var; + const DIExpression *Expr = GE.Expr; + // For compatibility with DWARF 3 and earlier, + // DW_AT_location(DW_OP_constu, X, DW_OP_stack_value) becomes + // DW_AT_const_value(X). + if (GlobalExprs.size() == 1 && Expr && Expr->isConstant()) { + addConstantValue(*VariableDIE, /*Unsigned=*/true, Expr->getElement(1)); + // We cannot describe the location of dllimport'd variables: the + // computation of their address requires loads from the IAT. + } else if ((Global && !Global->hasDLLImportStorageClass()) || AllConstant) { + if (!Loc) { + Loc = new (DIEValueAllocator) DIELoc; + DwarfExpr = llvm::make_unique<DIEDwarfExpression>(*Asm, *this, *Loc); + } addToAccelTable = true; - DIELoc *Loc = new (DIEValueAllocator) DIELoc; - const MCSymbol *Sym = Asm->getSymbol(Global); - if (Global->isThreadLocal()) { - if (Asm->TM.Options.EmulatedTLS) { - // TODO: add debug info for emulated thread local mode. - } else { - // FIXME: Make this work with -gsplit-dwarf. - unsigned PointerSize = Asm->getDataLayout().getPointerSize(); - assert((PointerSize == 4 || PointerSize == 8) && - "Add support for other sizes if necessary"); - // Based on GCC's support for TLS: - if (!DD->useSplitDwarf()) { - // 1) Start with a constNu of the appropriate pointer size - addUInt(*Loc, dwarf::DW_FORM_data1, PointerSize == 4 - ? dwarf::DW_OP_const4u - : dwarf::DW_OP_const8u); - // 2) containing the (relocated) offset of the TLS variable - // within the module's TLS block. - addExpr(*Loc, dwarf::DW_FORM_udata, - Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym)); + if (Global) { + const MCSymbol *Sym = Asm->getSymbol(Global); + if (Global->isThreadLocal()) { + if (Asm->TM.Options.EmulatedTLS) { + // TODO: add debug info for emulated thread local mode. } else { - addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index); - addUInt(*Loc, dwarf::DW_FORM_udata, - DD->getAddressPool().getIndex(Sym, /* TLS */ true)); + // FIXME: Make this work with -gsplit-dwarf. + unsigned PointerSize = Asm->getDataLayout().getPointerSize(); + assert((PointerSize == 4 || PointerSize == 8) && + "Add support for other sizes if necessary"); + // Based on GCC's support for TLS: + if (!DD->useSplitDwarf()) { + // 1) Start with a constNu of the appropriate pointer size + addUInt(*Loc, dwarf::DW_FORM_data1, + PointerSize == 4 ? dwarf::DW_OP_const4u + : dwarf::DW_OP_const8u); + // 2) containing the (relocated) offset of the TLS variable + // within the module's TLS block. + addExpr(*Loc, dwarf::DW_FORM_udata, + Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym)); + } else { + addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index); + addUInt(*Loc, dwarf::DW_FORM_udata, + DD->getAddressPool().getIndex(Sym, /* TLS */ true)); + } + // 3) followed by an OP to make the debugger do a TLS lookup. + addUInt(*Loc, dwarf::DW_FORM_data1, + DD->useGNUTLSOpcode() ? dwarf::DW_OP_GNU_push_tls_address + : dwarf::DW_OP_form_tls_address); } - // 3) followed by an OP to make the debugger do a TLS lookup. - addUInt(*Loc, dwarf::DW_FORM_data1, - DD->useGNUTLSOpcode() ? dwarf::DW_OP_GNU_push_tls_address - : dwarf::DW_OP_form_tls_address); + } else { + DD->addArangeLabel(SymbolCU(this, Sym)); + addOpAddress(*Loc, Sym); } - } else { - DD->addArangeLabel(SymbolCU(this, Sym)); - addOpAddress(*Loc, Sym); } - - addBlock(*VariableDIE, dwarf::DW_AT_location, Loc); - if (DD->useAllLinkageNames()) - addLinkageName(*VariableDIE, GV->getLinkageName()); - } - } else if (const ConstantInt *CI = - dyn_cast_or_null<ConstantInt>(GV->getVariable())) { - addConstantValue(*VariableDIE, CI, GTy); - } else if (const ConstantExpr *CE = getMergedGlobalExpr(GV->getVariable())) { - auto *Ptr = cast<GlobalValue>(CE->getOperand(0)); - if (!Ptr->hasDLLImportStorageClass()) { - addToAccelTable = true; - // GV is a merged global. - DIELoc *Loc = new (DIEValueAllocator) DIELoc; - MCSymbol *Sym = Asm->getSymbol(Ptr); - DD->addArangeLabel(SymbolCU(this, Sym)); - addOpAddress(*Loc, Sym); - addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); - SmallVector<Value *, 3> Idx(CE->op_begin() + 1, CE->op_end()); - addUInt(*Loc, dwarf::DW_FORM_udata, - Asm->getDataLayout().getIndexedOffsetInType(Ptr->getValueType(), - Idx)); - addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); - addBlock(*VariableDIE, dwarf::DW_AT_location, Loc); + if (Expr) { + DwarfExpr->addFragmentOffset(Expr); + DwarfExpr->AddExpression(Expr); + } } } + if (Loc) + addBlock(*VariableDIE, dwarf::DW_AT_location, DwarfExpr->finalize()); + + if (DD->useAllLinkageNames()) + addLinkageName(*VariableDIE, GV->getLinkageName()); if (addToAccelTable) { DD->addAccelName(GV->getName(), *VariableDIE); @@ -265,7 +250,7 @@ void DwarfCompileUnit::initStmtList() { // is not okay to use line_table_start here. const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); StmtListValue = - addSectionLabel(UnitDie, dwarf::DW_AT_stmt_list, LineTableStartSym, + addSectionLabel(getUnitDie(), dwarf::DW_AT_stmt_list, LineTableStartSym, TLOF.getDwarfLineSection()->getBeginSymbol()); } @@ -450,7 +435,7 @@ DIE *DwarfCompileUnit::constructInlinedScopeDIE(LexicalScope *Scope) { addUInt(*ScopeDIE, dwarf::DW_AT_call_file, None, getOrCreateSourceID(IA->getFilename(), IA->getDirectory())); addUInt(*ScopeDIE, dwarf::DW_AT_call_line, None, IA->getLine()); - if (IA->getDiscriminator()) + if (IA->getDiscriminator() && DD->getDwarfVersion() >= 4) addUInt(*ScopeDIE, dwarf::DW_AT_GNU_discriminator, None, IA->getDiscriminator()); @@ -521,9 +506,10 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV, DIELoc *Loc = new (DIEValueAllocator) DIELoc; DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc); // If there is an expression, emit raw unsigned bytes. + DwarfExpr.addFragmentOffset(Expr); DwarfExpr.AddUnsignedConstant(DVInsn->getOperand(0).getImm()); - DwarfExpr.AddExpression(Expr->expr_op_begin(), Expr->expr_op_end()); - addBlock(*VariableDie, dwarf::DW_AT_location, Loc); + DwarfExpr.AddExpression(Expr); + addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize()); } else addConstantValue(*VariableDie, DVInsn->getOperand(0), DV.getType()); } else if (DVInsn->getOperand(0).isFPImm()) @@ -536,23 +522,21 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV, } // .. else use frame index. - if (DV.getFrameIndex().empty()) + if (!DV.hasFrameIndexExprs()) return VariableDie; - auto Expr = DV.getExpression().begin(); DIELoc *Loc = new (DIEValueAllocator) DIELoc; DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc); - for (auto FI : DV.getFrameIndex()) { + for (auto &Fragment : DV.getFrameIndexExprs()) { unsigned FrameReg = 0; const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering(); - int Offset = TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg); - assert(Expr != DV.getExpression().end() && "Wrong number of expressions"); + int Offset = TFI->getFrameIndexReference(*Asm->MF, Fragment.FI, FrameReg); + DwarfExpr.addFragmentOffset(Fragment.Expr); DwarfExpr.AddMachineRegIndirect(*Asm->MF->getSubtarget().getRegisterInfo(), FrameReg, Offset); - DwarfExpr.AddExpression((*Expr)->expr_op_begin(), (*Expr)->expr_op_end()); - ++Expr; + DwarfExpr.AddExpression(Fragment.Expr); } - addBlock(*VariableDie, dwarf::DW_AT_location, Loc); + addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize()); return VariableDie; } @@ -585,25 +569,22 @@ DIE *DwarfCompileUnit::createScopeChildrenDIE(LexicalScope *Scope, return ObjectPointer; } -void DwarfCompileUnit::constructSubprogramScopeDIE(LexicalScope *Scope) { - assert(Scope && Scope->getScopeNode()); - assert(!Scope->getInlinedAt()); - assert(!Scope->isAbstractScope()); - auto *Sub = cast<DISubprogram>(Scope->getScopeNode()); - - DD->getProcessedSPNodes().insert(Sub); - +void DwarfCompileUnit::constructSubprogramScopeDIE(const DISubprogram *Sub, LexicalScope *Scope) { DIE &ScopeDIE = updateSubprogramScopeDIE(Sub); + if (Scope) { + assert(!Scope->getInlinedAt()); + assert(!Scope->isAbstractScope()); + // Collect lexical scope children first. + // ObjectPointer might be a local (non-argument) local variable if it's a + // block's synthetic this pointer. + if (DIE *ObjectPointer = createAndAddScopeChildren(Scope, ScopeDIE)) + addDIEEntry(ScopeDIE, dwarf::DW_AT_object_pointer, *ObjectPointer); + } + // If this is a variadic function, add an unspecified parameter. DITypeRefArray FnArgs = Sub->getType()->getTypeArray(); - // Collect lexical scope children first. - // ObjectPointer might be a local (non-argument) local variable if it's a - // block's synthetic this pointer. - if (DIE *ObjectPointer = createAndAddScopeChildren(Scope, ScopeDIE)) - addDIEEntry(ScopeDIE, dwarf::DW_AT_object_pointer, *ObjectPointer); - // If we have a single element of null, it is a function that returns void. // If we have more than one elements and the last one is null, it is a // variadic function. @@ -674,7 +655,7 @@ DIE *DwarfCompileUnit::constructImportedEntityDIE( else if (auto *T = dyn_cast<DIType>(Entity)) EntityDie = getOrCreateTypeDIE(T); else if (auto *GV = dyn_cast<DIGlobalVariable>(Entity)) - EntityDie = getOrCreateGlobalVariableDIE(GV); + EntityDie = getOrCreateGlobalVariableDIE(GV, {}); else EntityDie = getDIE(Entity); assert(EntityDie); @@ -695,11 +676,7 @@ void DwarfCompileUnit::finishSubprogramDefinition(const DISubprogram *SP) { // If this subprogram has an abstract definition, reference that addDIEEntry(*D, dwarf::DW_AT_abstract_origin, *AbsSPDIE); } else { - if (!D && !includeMinimalInlineScopes()) - // Lazily construct the subprogram if we didn't see either concrete or - // inlined versions during codegen. (except in -gmlt ^ where we want - // to omit these entirely) - D = getOrCreateSubprogramDIE(SP); + assert(D || includeMinimalInlineScopes()); if (D) // And attach the attributes applySubprogramAttributesToDefinition(SP, *D); @@ -750,18 +727,22 @@ void DwarfCompileUnit::addVariableAddress(const DbgVariable &DV, DIE &Die, void DwarfCompileUnit::addAddress(DIE &Die, dwarf::Attribute Attribute, const MachineLocation &Location) { DIELoc *Loc = new (DIEValueAllocator) DIELoc; + DIEDwarfExpression Expr(*Asm, *this, *Loc); bool validReg; if (Location.isReg()) - validReg = addRegisterOpPiece(*Loc, Location.getReg()); + validReg = Expr.AddMachineReg(*Asm->MF->getSubtarget().getRegisterInfo(), + Location.getReg()); else - validReg = addRegisterOffset(*Loc, Location.getReg(), Location.getOffset()); + validReg = + Expr.AddMachineRegIndirect(*Asm->MF->getSubtarget().getRegisterInfo(), + Location.getReg(), Location.getOffset()); if (!validReg) return; // Now attach the location information to the DIE. - addBlock(Die, Attribute, Loc); + addBlock(Die, Attribute, Expr.finalize()); } /// Start with the address based on the location provided, and generate the @@ -774,19 +755,22 @@ void DwarfCompileUnit::addComplexAddress(const DbgVariable &DV, DIE &Die, DIELoc *Loc = new (DIEValueAllocator) DIELoc; DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc); const DIExpression *Expr = DV.getSingleExpression(); - bool ValidReg; + DIExpressionCursor ExprCursor(Expr); const TargetRegisterInfo &TRI = *Asm->MF->getSubtarget().getRegisterInfo(); - if (Location.getOffset()) { - ValidReg = DwarfExpr.AddMachineRegIndirect(TRI, Location.getReg(), - Location.getOffset()); - if (ValidReg) - DwarfExpr.AddExpression(Expr->expr_op_begin(), Expr->expr_op_end()); - } else - ValidReg = DwarfExpr.AddMachineRegExpression(TRI, Expr, Location.getReg()); + auto Reg = Location.getReg(); + DwarfExpr.addFragmentOffset(Expr); + bool ValidReg = + Location.getOffset() + ? DwarfExpr.AddMachineRegIndirect(TRI, Reg, Location.getOffset()) + : DwarfExpr.AddMachineRegExpression(TRI, ExprCursor, Reg); + + if (!ValidReg) + return; + + DwarfExpr.AddExpression(std::move(ExprCursor)); // Now attach the location information to the DIE. - if (ValidReg) - addBlock(Die, Attribute, Loc); + addBlock(Die, Attribute, Loc); } /// Add a Dwarf loclistptr attribute data and value. @@ -802,7 +786,13 @@ void DwarfCompileUnit::applyVariableAttributes(const DbgVariable &Var, StringRef Name = Var.getName(); if (!Name.empty()) addString(VariableDie, dwarf::DW_AT_name, Name); - addSourceLine(VariableDie, Var.getVariable()); + const auto *DIVar = Var.getVariable(); + if (DIVar) + if (uint32_t AlignInBytes = DIVar->getAlignInBytes()) + addUInt(VariableDie, dwarf::DW_AT_alignment, dwarf::DW_FORM_udata, + AlignInBytes); + + addSourceLine(VariableDie, DIVar); addType(VariableDie, Var.getType()); if (Var.isArtificial()) addFlag(VariableDie, dwarf::DW_AT_artificial); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index 90f74a3..a8025f1 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -32,9 +32,6 @@ class DwarfCompileUnit : public DwarfUnit { /// A numeric ID unique among all CUs in the module unsigned UniqueID; - /// Offset of the UnitDie from beginning of debug info section. - unsigned DebugInfoOffset = 0; - /// The attribute index of DW_AT_stmt_list in the compile unit DIE, avoiding /// the need to search for it in applyStmtList. DIE::value_iterator StmtListValue; @@ -84,8 +81,6 @@ public: DwarfDebug *DW, DwarfFile *DWU); unsigned getUniqueID() const { return UniqueID; } - unsigned getDebugInfoOffset() const { return DebugInfoOffset; } - void setDebugInfoOffset(unsigned DbgInfoOff) { DebugInfoOffset = DbgInfoOff; } DwarfCompileUnit *getSkeleton() const { return Skeleton; @@ -96,8 +91,16 @@ public: /// Apply the DW_AT_stmt_list from this compile unit to the specified DIE. void applyStmtList(DIE &D); - /// getOrCreateGlobalVariableDIE - get or create global variable DIE. - DIE *getOrCreateGlobalVariableDIE(const DIGlobalVariable *GV); + /// A pair of GlobalVariable and DIExpression. + struct GlobalExpr { + const GlobalVariable *Var; + const DIExpression *Expr; + }; + + /// Get or create global variable DIE. + DIE * + getOrCreateGlobalVariableDIE(const DIGlobalVariable *GV, + ArrayRef<GlobalExpr> GlobalExprs); /// addLabelAddress - Add a dwarf label attribute data and value using /// either DW_FORM_addr or DW_FORM_GNU_addr_index. @@ -176,7 +179,7 @@ public: unsigned *ChildScopeCount = nullptr); /// \brief Construct a DIE for this subprogram scope. - void constructSubprogramScopeDIE(LexicalScope *Scope); + void constructSubprogramScopeDIE(const DISubprogram *Sub, LexicalScope *Scope); DIE *createAndAddScopeChildren(LexicalScope *Scope, DIE &ScopeDIE); @@ -190,20 +193,15 @@ public: /// Set the skeleton unit associated with this unit. void setSkeleton(DwarfCompileUnit &Skel) { Skeleton = &Skel; } - const MCSymbol *getSectionSym() const { - assert(Section); - return Section->getBeginSymbol(); - } - unsigned getLength() { return sizeof(uint32_t) + // Length field - getHeaderSize() + UnitDie.getSize(); + getHeaderSize() + getUnitDie().getSize(); } void emitHeader(bool UseOffsets) override; MCSymbol *getLabelBegin() const { - assert(Section); + assert(getSection()); return LabelBegin; } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 7fba768..91a3d09 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -62,11 +62,6 @@ static cl::opt<bool> DisableDebugInfoPrinting("disable-debug-info-print", cl::Hidden, cl::desc("Disable debug info printing")); -static cl::opt<bool> UnknownLocations( - "use-unknown-locations", cl::Hidden, - cl::desc("Make an absence of debug location information explicit."), - cl::init(false)); - static cl::opt<bool> GenerateGnuPubSections("generate-gnu-dwarf-pub-sections", cl::Hidden, cl::desc("Generate GNU-style pubnames and pubtypes"), @@ -81,12 +76,19 @@ namespace { enum DefaultOnOff { Default, Enable, Disable }; } +static cl::opt<DefaultOnOff> UnknownLocations( + "use-unknown-locations", cl::Hidden, + cl::desc("Make an absence of debug location information explicit."), + cl::values(clEnumVal(Default, "At top of block or after label"), + clEnumVal(Enable, "In all cases"), clEnumVal(Disable, "Never")), + cl::init(Default)); + static cl::opt<DefaultOnOff> DwarfAccelTables("dwarf-accel-tables", cl::Hidden, cl::desc("Output prototype dwarf accelerator tables."), cl::values(clEnumVal(Default, "Default for platform"), clEnumVal(Enable, "Enabled"), - clEnumVal(Disable, "Disabled"), clEnumValEnd), + clEnumVal(Disable, "Disabled")), cl::init(Default)); static cl::opt<DefaultOnOff> @@ -94,7 +96,7 @@ SplitDwarf("split-dwarf", cl::Hidden, cl::desc("Output DWARF5 split debug info."), cl::values(clEnumVal(Default, "Default for platform"), clEnumVal(Enable, "Enabled"), - clEnumVal(Disable, "Disabled"), clEnumValEnd), + clEnumVal(Disable, "Disabled")), cl::init(Default)); static cl::opt<DefaultOnOff> @@ -102,7 +104,7 @@ DwarfPubSections("generate-dwarf-pub-sections", cl::Hidden, cl::desc("Generate DWARF pubnames and pubtypes sections"), cl::values(clEnumVal(Default, "Default for platform"), clEnumVal(Enable, "Enabled"), - clEnumVal(Disable, "Disabled"), clEnumValEnd), + clEnumVal(Disable, "Disabled")), cl::init(Default)); enum LinkageNameOption { @@ -117,12 +119,13 @@ static cl::opt<LinkageNameOption> "Default for platform"), clEnumValN(AllLinkageNames, "All", "All"), clEnumValN(AbstractLinkageNames, "Abstract", - "Abstract subprograms"), - clEnumValEnd), + "Abstract subprograms")), cl::init(DefaultLinkageNames)); -static const char *const DWARFGroupName = "DWARF Emission"; -static const char *const DbgTimerName = "DWARF Debug Writer"; +static const char *const DWARFGroupName = "dwarf"; +static const char *const DWARFGroupDescription = "DWARF Emission"; +static const char *const DbgTimerName = "writer"; +static const char *const DbgTimerDescription = "DWARF Debug Writer"; void DebugLocDwarfExpression::EmitOp(uint8_t Op, const char *Comment) { BS.EmitInt8( @@ -196,7 +199,16 @@ const DIType *DbgVariable::getType() const { return Ty; } -static LLVM_CONSTEXPR DwarfAccelTable::Atom TypeAtoms[] = { +ArrayRef<DbgVariable::FrameIndexExpr> DbgVariable::getFrameIndexExprs() const { + std::sort(FrameIndexExprs.begin(), FrameIndexExprs.end(), + [](const FrameIndexExpr &A, const FrameIndexExpr &B) -> bool { + return A.Expr->getFragmentInfo()->OffsetInBits < + B.Expr->getFragmentInfo()->OffsetInBits; + }); + return FrameIndexExprs; +} + +static const DwarfAccelTable::Atom TypeAtoms[] = { DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4), DwarfAccelTable::Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2), DwarfAccelTable::Atom(dwarf::DW_ATOM_type_flags, dwarf::DW_FORM_data1)}; @@ -205,7 +217,7 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) : DebugHandlerBase(A), DebugLocs(A->OutStreamer->isVerboseAsm()), InfoHolder(A, "info_string", DIEValueAllocator), SkeletonHolder(A, "skel_string", DIEValueAllocator), - IsDarwin(Triple(A->getTargetTriple()).isOSDarwin()), + IsDarwin(A->TM.getTargetTriple().isOSDarwin()), AccelNames(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)), AccelObjC(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, @@ -215,7 +227,7 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) AccelTypes(TypeAtoms), DebuggerTuning(DebuggerKind::Default) { CurFn = nullptr; - Triple TT(Asm->getTargetTriple()); + const Triple &TT = Asm->TM.getTargetTriple(); // Make sure we know our "debugger tuning." The target option takes // precedence; fall back to triple-based defaults. @@ -255,7 +267,7 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) UseAllLinkageNames = DwarfLinkageNames == AllLinkageNames; unsigned DwarfVersionNumber = Asm->TM.Options.MCOptions.DwarfVersion; - DwarfVersion = DwarfVersionNumber ? DwarfVersionNumber + unsigned DwarfVersion = DwarfVersionNumber ? DwarfVersionNumber : MMI->getModule()->getDwarfVersion(); // Use dwarf 4 by default if nothing is requested. DwarfVersion = DwarfVersion ? DwarfVersion : dwarf::DWARF_VERSION; @@ -349,10 +361,11 @@ bool DwarfDebug::isLexicalScopeDIENull(LexicalScope *Scope) { return !getLabelAfterInsn(Ranges.front().second); } -template <typename Func> void forBothCUs(DwarfCompileUnit &CU, Func F) { +template <typename Func> static void forBothCUs(DwarfCompileUnit &CU, Func F) { F(CU); if (auto *SkelCU = CU.getSkeleton()) - F(*SkelCU); + if (CU.getCUNode()->getSplitDebugInlining()) + F(*SkelCU); } void DwarfDebug::constructAbstractSubprogramScopeDIE(LexicalScope *Scope) { @@ -360,13 +373,13 @@ void DwarfDebug::constructAbstractSubprogramScopeDIE(LexicalScope *Scope) { assert(Scope->isAbstractScope()); assert(!Scope->getInlinedAt()); - const MDNode *SP = Scope->getScopeNode(); + auto *SP = cast<DISubprogram>(Scope->getScopeNode()); ProcessedSPNodes.insert(SP); // Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram // was inlined from another compile unit. - auto &CU = *CUMap.lookup(cast<DISubprogram>(SP)->getUnit()); + auto &CU = *CUMap.lookup(SP->getUnit()); forBothCUs(CU, [&](DwarfCompileUnit &CU) { CU.constructAbstractSubprogramScopeDIE(Scope); }); @@ -435,9 +448,9 @@ DwarfDebug::constructDwarfCompileUnit(const DICompileUnit *DIUnit) { } if (useSplitDwarf()) - NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoDWOSection()); + NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoDWOSection()); else - NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoSection()); + NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoSection()); if (DIUnit->getDWOId()) { // This CU is either a clang module DWO or a skeleton CU. @@ -449,8 +462,8 @@ DwarfDebug::constructDwarfCompileUnit(const DICompileUnit *DIUnit) { DIUnit->getSplitDebugFilename()); } - CUMap.insert(std::make_pair(DIUnit, &NewCU)); - CUDieMap.insert(std::make_pair(&Die, &NewCU)); + CUMap.insert({DIUnit, &NewCU}); + CUDieMap.insert({&Die, &NewCU}); return NewCU; } @@ -460,11 +473,34 @@ void DwarfDebug::constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU, D->addChild(TheCU.constructImportedEntityDIE(N)); } +/// Sort and unique GVEs by comparing their fragment offset. +static SmallVectorImpl<DwarfCompileUnit::GlobalExpr> & +sortGlobalExprs(SmallVectorImpl<DwarfCompileUnit::GlobalExpr> &GVEs) { + std::sort(GVEs.begin(), GVEs.end(), + [](DwarfCompileUnit::GlobalExpr A, DwarfCompileUnit::GlobalExpr B) { + if (A.Expr != B.Expr && A.Expr && B.Expr) { + auto FragmentA = A.Expr->getFragmentInfo(); + auto FragmentB = B.Expr->getFragmentInfo(); + if (FragmentA && FragmentB) + return FragmentA->OffsetInBits < FragmentB->OffsetInBits; + } + return false; + }); + GVEs.erase(std::unique(GVEs.begin(), GVEs.end(), + [](DwarfCompileUnit::GlobalExpr A, + DwarfCompileUnit::GlobalExpr B) { + return A.Expr == B.Expr; + }), + GVEs.end()); + return GVEs; +} + // Emit all Dwarf sections that should come prior to the content. Create // global DIEs and emit initial debug info sections. This is invoked by // the target AsmPrinter. void DwarfDebug::beginModule() { - NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); + NamedRegionTimer T(DbgTimerName, DbgTimerDescription, DWARFGroupName, + DWARFGroupDescription, TimePassesIsEnabled); if (DisableDebugInfoPrinting) return; @@ -475,13 +511,30 @@ void DwarfDebug::beginModule() { // Tell MMI whether we have debug info. MMI->setDebugInfoAvailability(NumDebugCUs > 0); SingleCU = NumDebugCUs == 1; + DenseMap<DIGlobalVariable *, SmallVector<DwarfCompileUnit::GlobalExpr, 1>> + GVMap; + for (const GlobalVariable &Global : M->globals()) { + SmallVector<DIGlobalVariableExpression *, 1> GVs; + Global.getDebugInfo(GVs); + for (auto *GVE : GVs) + GVMap[GVE->getVariable()].push_back({&Global, GVE->getExpression()}); + } for (DICompileUnit *CUNode : M->debug_compile_units()) { DwarfCompileUnit &CU = constructDwarfCompileUnit(CUNode); for (auto *IE : CUNode->getImportedEntities()) CU.addImportedEntity(IE); - for (auto *GV : CUNode->getGlobalVariables()) - CU.getOrCreateGlobalVariableDIE(GV); + + // Global Variables. + for (auto *GVE : CUNode->getGlobalVariables()) + GVMap[GVE->getVariable()].push_back({nullptr, GVE->getExpression()}); + DenseSet<DIGlobalVariable *> Processed; + for (auto *GVE : CUNode->getGlobalVariables()) { + DIGlobalVariable *GV = GVE->getVariable(); + if (Processed.insert(GV).second) + CU.getOrCreateGlobalVariableDIE(GV, sortGlobalExprs(GVMap[GV])); + } + for (auto *Ty : CUNode->getEnumTypes()) { // The enum types array by design contains pointers to // MDNodes rather than DIRefs. Unique them here. @@ -509,7 +562,7 @@ void DwarfDebug::finishVariableDefinitions() { // FIXME: Consider the time-space tradeoff of just storing the unit pointer // in the ConcreteVariables list, rather than looking it up again here. // DIE::getUnit isn't simple - it walks parent pointers, etc. - DwarfCompileUnit *Unit = lookupUnit(VariableDie->getUnit()); + DwarfCompileUnit *Unit = CUDieMap.lookup(VariableDie->getUnitDie()); assert(Unit); DbgVariable *AbsVar = getExistingAbstractVariable( InlinedVariable(Var->getVariable(), Var->getInlinedAt())); @@ -522,13 +575,11 @@ void DwarfDebug::finishVariableDefinitions() { } void DwarfDebug::finishSubprogramDefinitions() { - for (auto &F : MMI->getModule()->functions()) - if (auto *SP = F.getSubprogram()) - if (ProcessedSPNodes.count(SP) && - SP->getUnit()->getEmissionKind() != DICompileUnit::NoDebug) - forBothCUs(*CUMap.lookup(SP->getUnit()), [&](DwarfCompileUnit &CU) { - CU.finishSubprogramDefinition(SP); - }); + for (const DISubprogram *SP : ProcessedSPNodes) + if (SP->getUnit()->getEmissionKind() != DICompileUnit::NoDebug) + forBothCUs(*CUMap.lookup(SP->getUnit()), [&](DwarfCompileUnit &CU) { + CU.finishSubprogramDefinition(SP); + }); } void DwarfDebug::finalizeModuleInfo() { @@ -715,10 +766,10 @@ void DwarfDebug::ensureAbstractVariableIsCreatedIfScoped( createAbstractVariable(Cleansed, Scope); } -// Collect variable information from side table maintained by MMI. -void DwarfDebug::collectVariableInfoFromMMITable( +// Collect variable information from side table maintained by MF. +void DwarfDebug::collectVariableInfoFromMFTable( DenseSet<InlinedVariable> &Processed) { - for (const auto &VI : MMI->getVariableDbgInfo()) { + for (const auto &VI : Asm->MF->getVariableDbgInfo()) { if (!VI.Var) continue; assert(VI.Var->isValidLocationForIntrinsic(VI.Loc) && @@ -765,7 +816,7 @@ static DebugLocEntry::Value getDebugLocValue(const MachineInstr *MI) { llvm_unreachable("Unexpected 4-operand DBG_VALUE instruction!"); } -/// \brief If this and Next are describing different pieces of the same +/// \brief If this and Next are describing different fragments of the same /// variable, merge them by appending Next's values to the current /// list of values. /// Return true if the merge was successful. @@ -773,15 +824,15 @@ bool DebugLocEntry::MergeValues(const DebugLocEntry &Next) { if (Begin == Next.Begin) { auto *FirstExpr = cast<DIExpression>(Values[0].Expression); auto *FirstNextExpr = cast<DIExpression>(Next.Values[0].Expression); - if (!FirstExpr->isBitPiece() || !FirstNextExpr->isBitPiece()) + if (!FirstExpr->isFragment() || !FirstNextExpr->isFragment()) return false; - // We can only merge entries if none of the pieces overlap any others. + // We can only merge entries if none of the fragments overlap any others. // In doing so, we can take advantage of the fact that both lists are // sorted. for (unsigned i = 0, j = 0; i < Values.size(); ++i) { for (; j < Next.Values.size(); ++j) { - int res = DebugHandlerBase::pieceCmp( + int res = DebugHandlerBase::fragmentCmp( cast<DIExpression>(Values[i].Expression), cast<DIExpression>(Next.Values[j].Expression)); if (res == 0) // The two expressions overlap, we can't merge. @@ -804,27 +855,27 @@ bool DebugLocEntry::MergeValues(const DebugLocEntry &Next) { /// Build the location list for all DBG_VALUEs in the function that /// describe the same variable. If the ranges of several independent -/// pieces of the same variable overlap partially, split them up and +/// fragments of the same variable overlap partially, split them up and /// combine the ranges. The resulting DebugLocEntries are will have /// strict monotonically increasing begin addresses and will never /// overlap. // // Input: // -// Ranges History [var, loc, piece ofs size] -// 0 | [x, (reg0, piece 0, 32)] -// 1 | | [x, (reg1, piece 32, 32)] <- IsPieceOfPrevEntry +// Ranges History [var, loc, fragment ofs size] +// 0 | [x, (reg0, fragment 0, 32)] +// 1 | | [x, (reg1, fragment 32, 32)] <- IsFragmentOfPrevEntry // 2 | | ... // 3 | [clobber reg0] -// 4 [x, (mem, piece 0, 64)] <- overlapping with both previous pieces of +// 4 [x, (mem, fragment 0, 64)] <- overlapping with both previous fragments of // x. // // Output: // -// [0-1] [x, (reg0, piece 0, 32)] -// [1-3] [x, (reg0, piece 0, 32), (reg1, piece 32, 32)] -// [3-4] [x, (reg1, piece 32, 32)] -// [4- ] [x, (mem, piece 0, 64)] +// [0-1] [x, (reg0, fragment 0, 32)] +// [1-3] [x, (reg0, fragment 0, 32), (reg1, fragment 32, 32)] +// [3-4] [x, (reg1, fragment 32, 32)] +// [4- ] [x, (mem, fragment 0, 64)] void DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc, const DbgValueHistoryMap::InstrRanges &Ranges) { @@ -842,11 +893,10 @@ DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc, continue; } - // If this piece overlaps with any open ranges, truncate them. + // If this fragment overlaps with any open ranges, truncate them. const DIExpression *DIExpr = Begin->getDebugExpression(); - auto Last = std::remove_if(OpenRanges.begin(), OpenRanges.end(), - [&](DebugLocEntry::Value R) { - return piecesOverlap(DIExpr, R.getExpression()); + auto Last = remove_if(OpenRanges, [&](DebugLocEntry::Value R) { + return fragmentsOverlap(DIExpr, R.getExpression()); }); OpenRanges.erase(Last, OpenRanges.end()); @@ -868,12 +918,12 @@ DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc, DebugLocEntry Loc(StartLabel, EndLabel, Value); bool couldMerge = false; - // If this is a piece, it may belong to the current DebugLocEntry. - if (DIExpr->isBitPiece()) { + // If this is a fragment, it may belong to the current DebugLocEntry. + if (DIExpr->isFragment()) { // Add this value to the list of open ranges. OpenRanges.push_back(Value); - // Attempt to add the piece to the last entry. + // Attempt to add the fragment to the last entry. if (!DebugLoc.empty()) if (DebugLoc.back().MergeValues(Loc)) couldMerge = true; @@ -881,7 +931,7 @@ DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc, if (!couldMerge) { // Need to add a new DebugLocEntry. Add all values from still - // valid non-overlapping pieces. + // valid non-overlapping fragments. if (OpenRanges.size()) Loc.addValues(OpenRanges); @@ -929,7 +979,7 @@ void DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU, const DISubprogram *SP, DenseSet<InlinedVariable> &Processed) { // Grab the variable info that was squirreled away in the MMI side-table. - collectVariableInfoFromMMITable(Processed); + collectVariableInfoFromMFTable(Processed); for (const auto &I : DbgValues) { InlinedVariable IV = I.first; @@ -996,30 +1046,82 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) { DebugHandlerBase::beginInstruction(MI); assert(CurMI); - // Check if source location changes, but ignore DBG_VALUE locations. - if (!MI->isDebugValue()) { - const DebugLoc &DL = MI->getDebugLoc(); - if (DL != PrevInstLoc) { - if (DL) { - unsigned Flags = 0; - PrevInstLoc = DL; - if (DL == PrologEndLoc) { - Flags |= DWARF2_FLAG_PROLOGUE_END; - PrologEndLoc = DebugLoc(); - Flags |= DWARF2_FLAG_IS_STMT; - } - if (DL.getLine() != - Asm->OutStreamer->getContext().getCurrentDwarfLoc().getLine()) - Flags |= DWARF2_FLAG_IS_STMT; - - const MDNode *Scope = DL.getScope(); - recordSourceLine(DL.getLine(), DL.getCol(), Scope, Flags); - } else if (UnknownLocations) { - PrevInstLoc = DL; - recordSourceLine(0, 0, nullptr, 0); + // Check if source location changes, but ignore DBG_VALUE and CFI locations. + if (MI->isDebugValue() || MI->isCFIInstruction()) + return; + const DebugLoc &DL = MI->getDebugLoc(); + // When we emit a line-0 record, we don't update PrevInstLoc; so look at + // the last line number actually emitted, to see if it was line 0. + unsigned LastAsmLine = + Asm->OutStreamer->getContext().getCurrentDwarfLoc().getLine(); + + if (DL == PrevInstLoc) { + // If we have an ongoing unspecified location, nothing to do here. + if (!DL) + return; + // We have an explicit location, same as the previous location. + // But we might be coming back to it after a line 0 record. + if (LastAsmLine == 0 && DL.getLine() != 0) { + // Reinstate the source location but not marked as a statement. + const MDNode *Scope = DL.getScope(); + recordSourceLine(DL.getLine(), DL.getCol(), Scope, /*Flags=*/0); + } + return; + } + + if (!DL) { + // We have an unspecified location, which might want to be line 0. + // If we have already emitted a line-0 record, don't repeat it. + if (LastAsmLine == 0) + return; + // If user said Don't Do That, don't do that. + if (UnknownLocations == Disable) + return; + // See if we have a reason to emit a line-0 record now. + // Reasons to emit a line-0 record include: + // - User asked for it (UnknownLocations). + // - Instruction has a label, so it's referenced from somewhere else, + // possibly debug information; we want it to have a source location. + // - Instruction is at the top of a block; we don't want to inherit the + // location from the physically previous (maybe unrelated) block. + if (UnknownLocations == Enable || PrevLabel || + (PrevInstBB && PrevInstBB != MI->getParent())) { + // Preserve the file and column numbers, if we can, to save space in + // the encoded line table. + // Do not update PrevInstLoc, it remembers the last non-0 line. + const MDNode *Scope = nullptr; + unsigned Column = 0; + if (PrevInstLoc) { + Scope = PrevInstLoc.getScope(); + Column = PrevInstLoc.getCol(); } + recordSourceLine(/*Line=*/0, Column, Scope, /*Flags=*/0); } + return; + } + + // We have an explicit location, different from the previous location. + // Don't repeat a line-0 record, but otherwise emit the new location. + // (The new location might be an explicit line 0, which we do emit.) + if (PrevInstLoc && DL.getLine() == 0 && LastAsmLine == 0) + return; + unsigned Flags = 0; + if (DL == PrologEndLoc) { + Flags |= DWARF2_FLAG_PROLOGUE_END | DWARF2_FLAG_IS_STMT; + PrologEndLoc = DebugLoc(); } + // If the line changed, we call that a new statement; unless we went to + // line 0 and came back, in which case it is not a new statement. + unsigned OldLine = PrevInstLoc ? PrevInstLoc.getLine() : LastAsmLine; + if (DL.getLine() && DL.getLine() != OldLine) + Flags |= DWARF2_FLAG_IS_STMT; + + const MDNode *Scope = DL.getScope(); + recordSourceLine(DL.getLine(), DL.getCol(), Scope, Flags); + + // If we're not at line 0, remember this location. + if (DL.getLine()) + PrevInstLoc = DL; } static DebugLoc findPrologueEndLoc(const MachineFunction *MF) { @@ -1093,18 +1195,14 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { "endFunction should be called with the same function as beginFunction"); const DISubprogram *SP = MF->getFunction()->getSubprogram(); - if (!MMI->hasDebugInfo() || LScopes.empty() || !SP || + if (!MMI->hasDebugInfo() || !SP || SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug) { - // If we don't have a lexical scope for this function then there will - // be a hole in the range information. Keep note of this by setting the - // previously used section to nullptr. + // If we don't have a subprogram for this function then there will be a hole + // in the range information. Keep note of this by setting the previously + // used section to nullptr. PrevCU = nullptr; CurFn = nullptr; DebugHandlerBase::endFunction(MF); - // Mark functions with no debug info on any instructions, but a - // valid DISubprogram as processed. - if (SP) - ProcessedSPNodes.insert(SP); return; } @@ -1112,7 +1210,7 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { Asm->OutStreamer->getContext().setDwarfCompileUnitID(0); LexicalScope *FnScope = LScopes.getCurrentFunctionScope(); - SP = cast<DISubprogram>(FnScope->getScopeNode()); + assert(!FnScope || SP == FnScope->getScopeNode()); DwarfCompileUnit &TheCU = *CUMap.lookup(SP->getUnit()); DenseSet<InlinedVariable> ProcessedVars; @@ -1154,10 +1252,12 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { constructAbstractSubprogramScopeDIE(AScope); } - TheCU.constructSubprogramScopeDIE(FnScope); + ProcessedSPNodes.insert(SP); + TheCU.constructSubprogramScopeDIE(SP, FnScope); if (auto *SkelCU = TheCU.getSkeleton()) - if (!LScopes.getAbstractScopesList().empty()) - SkelCU->constructSubprogramScopeDIE(FnScope); + if (!LScopes.getAbstractScopesList().empty() && + TheCU.getCUNode()->getSplitDebugInlining()) + SkelCU->constructSubprogramScopeDIE(SP, FnScope); // Clear debug info // Ownership of DbgVariables is a bit subtle - ScopeVariables owns all the @@ -1181,7 +1281,8 @@ void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S, Fn = Scope->getFilename(); Dir = Scope->getDirectory(); if (auto *LBF = dyn_cast<DILexicalBlockFile>(Scope)) - Discriminator = LBF->getDiscriminator(); + if (getDwarfVersion() >= 4) + Discriminator = LBF->getDiscriminator(); unsigned CUID = Asm->OutStreamer->getContext().getDwarfCompileUnitID(); Src = static_cast<DwarfCompileUnit &>(*InfoHolder.getUnits()[CUID]) @@ -1396,9 +1497,9 @@ void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer, static void emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT, ByteStreamer &Streamer, const DebugLocEntry::Value &Value, - unsigned PieceOffsetInBits) { - DebugLocDwarfExpression DwarfExpr(AP.getDwarfDebug()->getDwarfVersion(), - Streamer); + DwarfExpression &DwarfExpr) { + DIExpressionCursor ExprCursor(Value.getExpression()); + DwarfExpr.addFragmentOffset(Value.getExpression()); // Regular entry. if (Value.isInt()) { if (BT && (BT->getEncoding() == dwarf::DW_ATE_signed || @@ -1408,25 +1509,16 @@ static void emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT, DwarfExpr.AddUnsignedConstant(Value.getInt()); } else if (Value.isLocation()) { MachineLocation Loc = Value.getLoc(); - const DIExpression *Expr = Value.getExpression(); - if (!Expr || !Expr->getNumElements()) - // Regular entry. - AP.EmitDwarfRegOp(Streamer, Loc); - else { - // Complex address entry. - const TargetRegisterInfo &TRI = *AP.MF->getSubtarget().getRegisterInfo(); - if (Loc.getOffset()) { - DwarfExpr.AddMachineRegIndirect(TRI, Loc.getReg(), Loc.getOffset()); - DwarfExpr.AddExpression(Expr->expr_op_begin(), Expr->expr_op_end(), - PieceOffsetInBits); - } else - DwarfExpr.AddMachineRegExpression(TRI, Expr, Loc.getReg(), - PieceOffsetInBits); - } + const TargetRegisterInfo &TRI = *AP.MF->getSubtarget().getRegisterInfo(); + if (Loc.getOffset()) + DwarfExpr.AddMachineRegIndirect(TRI, Loc.getReg(), Loc.getOffset()); + else + DwarfExpr.AddMachineRegExpression(TRI, ExprCursor, Loc.getReg()); } else if (Value.isConstantFP()) { APInt RawBytes = Value.getConstantFP()->getValueAPF().bitcastToAPInt(); DwarfExpr.AddUnsignedConstant(RawBytes); } + DwarfExpr.AddExpression(std::move(ExprCursor)); } void DebugLocEntry::finalize(const AsmPrinter &AP, @@ -1434,36 +1526,24 @@ void DebugLocEntry::finalize(const AsmPrinter &AP, const DIBasicType *BT) { DebugLocStream::EntryBuilder Entry(List, Begin, End); BufferByteStreamer Streamer = Entry.getStreamer(); + DebugLocDwarfExpression DwarfExpr(AP.getDwarfVersion(), Streamer); const DebugLocEntry::Value &Value = Values[0]; - if (Value.isBitPiece()) { - // Emit all pieces that belong to the same variable and range. - assert(std::all_of(Values.begin(), Values.end(), [](DebugLocEntry::Value P) { - return P.isBitPiece(); - }) && "all values are expected to be pieces"); + if (Value.isFragment()) { + // Emit all fragments that belong to the same variable and range. + assert(all_of(Values, [](DebugLocEntry::Value P) { + return P.isFragment(); + }) && "all values are expected to be fragments"); assert(std::is_sorted(Values.begin(), Values.end()) && - "pieces are expected to be sorted"); - - unsigned Offset = 0; - for (auto Piece : Values) { - const DIExpression *Expr = Piece.getExpression(); - unsigned PieceOffset = Expr->getBitPieceOffset(); - unsigned PieceSize = Expr->getBitPieceSize(); - assert(Offset <= PieceOffset && "overlapping or duplicate pieces"); - if (Offset < PieceOffset) { - // The DWARF spec seriously mandates pieces with no locations for gaps. - DebugLocDwarfExpression Expr(AP.getDwarfDebug()->getDwarfVersion(), - Streamer); - Expr.AddOpPiece(PieceOffset-Offset, 0); - Offset += PieceOffset-Offset; - } - Offset += PieceSize; + "fragments are expected to be sorted"); + + for (auto Fragment : Values) + emitDebugLocValue(AP, BT, Streamer, Fragment, DwarfExpr); - emitDebugLocValue(AP, BT, Streamer, Piece, PieceOffset); - } } else { - assert(Values.size() == 1 && "only pieces may have >1 value"); - emitDebugLocValue(AP, BT, Streamer, Value, 0); + assert(Values.size() == 1 && "only fragments may have >1 value"); + emitDebugLocValue(AP, BT, Streamer, Value, DwarfExpr); } + DwarfExpr.finalize(); } void DwarfDebug::emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry) { @@ -1514,14 +1594,14 @@ void DwarfDebug::emitDebugLocDWO() { // rather than two. We could get fancier and try to, say, reuse an // address we know we've emitted elsewhere (the start of the function? // The start of the CU or CU subrange that encloses this range?) - Asm->EmitInt8(dwarf::DW_LLE_start_length_entry); + Asm->EmitInt8(dwarf::DW_LLE_startx_length); unsigned idx = AddrPool.getIndex(Entry.BeginSym); Asm->EmitULEB128(idx); Asm->EmitLabelDifference(Entry.EndSym, Entry.BeginSym, 4); emitDebugLocEntryLocation(Entry); } - Asm->EmitInt8(dwarf::DW_LLE_end_of_list_entry); + Asm->EmitInt8(dwarf::DW_LLE_end_of_list); } } @@ -1807,7 +1887,7 @@ DwarfCompileUnit &DwarfDebug::constructSkeletonCU(const DwarfCompileUnit &CU) { auto OwnedUnit = make_unique<DwarfCompileUnit>( CU.getUniqueID(), CU.getCUNode(), Asm, this, &SkeletonHolder); DwarfCompileUnit &NewCU = *OwnedUnit; - NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoSection()); + NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoSection()); NewCU.initStmtList(); @@ -1889,8 +1969,7 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU, getDwoLineTable(CU)); DwarfTypeUnit &NewTU = *OwnedUnit; DIE &UnitDie = NewTU.getUnitDie(); - TypeUnitsUnderConstruction.push_back( - std::make_pair(std::move(OwnedUnit), CTy)); + TypeUnitsUnderConstruction.emplace_back(std::move(OwnedUnit), CTy); NewTU.addUInt(UnitDie, dwarf::DW_AT_language, dwarf::DW_FORM_data2, CU.getLanguage()); @@ -1900,11 +1979,10 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU, Ins.first->second = Signature; if (useSplitDwarf()) - NewTU.initSection(Asm->getObjFileLowering().getDwarfTypesDWOSection()); + NewTU.setSection(Asm->getObjFileLowering().getDwarfTypesDWOSection()); else { CU.applyStmtList(UnitDie); - NewTU.initSection( - Asm->getObjFileLowering().getDwarfTypesSection(Signature)); + NewTU.setSection(Asm->getObjFileLowering().getDwarfTypesSection(Signature)); } NewTU.setType(NewTU.createTypeDIE(CTy)); @@ -1968,3 +2046,7 @@ void DwarfDebug::addAccelType(StringRef Name, const DIE &Die, char Flags) { return; AccelTypes.AddName(InfoHolder.getStringPool().getEntry(*Asm, Name), &Die); } + +uint16_t DwarfDebug::getDwarfVersion() const { + return Asm->OutStreamer->getContext().getDwarfVersion(); +} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h index 6b06757..253e3f0 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -22,6 +22,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringMap.h" #include "llvm/CodeGen/DIE.h" @@ -53,7 +54,7 @@ class MachineModuleInfo; /// /// Variables can be created from allocas, in which case they're generated from /// the MMI table. Such variables can have multiple expressions and frame -/// indices. The \a Expr and \a FrameIndices array must match. +/// indices. /// /// Variables can be created from \c DBG_VALUE instructions. Those whose /// location changes over time use \a DebugLocListIndex, while those with a @@ -63,11 +64,16 @@ class MachineModuleInfo; class DbgVariable { const DILocalVariable *Var; /// Variable Descriptor. const DILocation *IA; /// Inlined at location. - SmallVector<const DIExpression *, 1> Expr; /// Complex address. DIE *TheDIE = nullptr; /// Variable DIE. unsigned DebugLocListIndex = ~0u; /// Offset in DebugLocs. const MachineInstr *MInsn = nullptr; /// DBG_VALUE instruction. - SmallVector<int, 1> FrameIndex; /// Frame index. + + struct FrameIndexExpr { + int FI; + const DIExpression *Expr; + }; + mutable SmallVector<FrameIndexExpr, 1> + FrameIndexExprs; /// Frame index + expression. public: /// Construct a DbgVariable. @@ -79,21 +85,18 @@ public: /// Initialize from the MMI table. void initializeMMI(const DIExpression *E, int FI) { - assert(Expr.empty() && "Already initialized?"); - assert(FrameIndex.empty() && "Already initialized?"); + assert(FrameIndexExprs.empty() && "Already initialized?"); assert(!MInsn && "Already initialized?"); assert((!E || E->isValid()) && "Expected valid expression"); assert(~FI && "Expected valid index"); - Expr.push_back(E); - FrameIndex.push_back(FI); + FrameIndexExprs.push_back({FI, E}); } /// Initialize from a DBG_VALUE instruction. void initializeDbgValue(const MachineInstr *DbgValue) { - assert(Expr.empty() && "Already initialized?"); - assert(FrameIndex.empty() && "Already initialized?"); + assert(FrameIndexExprs.empty() && "Already initialized?"); assert(!MInsn && "Already initialized?"); assert(Var == DbgValue->getDebugVariable() && "Wrong variable"); @@ -102,16 +105,15 @@ public: MInsn = DbgValue; if (auto *E = DbgValue->getDebugExpression()) if (E->getNumElements()) - Expr.push_back(E); + FrameIndexExprs.push_back({0, E}); } // Accessors. const DILocalVariable *getVariable() const { return Var; } const DILocation *getInlinedAt() const { return IA; } - ArrayRef<const DIExpression *> getExpression() const { return Expr; } const DIExpression *getSingleExpression() const { - assert(MInsn && Expr.size() <= 1); - return Expr.size() ? Expr[0] : nullptr; + assert(MInsn && FrameIndexExprs.size() <= 1); + return FrameIndexExprs.size() ? FrameIndexExprs[0].Expr : nullptr; } void setDIE(DIE &D) { TheDIE = &D; } DIE *getDIE() const { return TheDIE; } @@ -119,7 +121,9 @@ public: unsigned getDebugLocListIndex() const { return DebugLocListIndex; } StringRef getName() const { return Var->getName(); } const MachineInstr *getMInsn() const { return MInsn; } - ArrayRef<int> getFrameIndex() const { return FrameIndex; } + /// Get the FI entries, sorted by fragment offset. + ArrayRef<FrameIndexExpr> getFrameIndexExprs() const; + bool hasFrameIndexExprs() const { return !FrameIndexExprs.empty(); } void addMMIEntry(const DbgVariable &V) { assert(DebugLocListIndex == ~0U && !MInsn && "not an MMI entry"); @@ -127,16 +131,15 @@ public: assert(V.Var == Var && "conflicting variable"); assert(V.IA == IA && "conflicting inlined-at location"); - assert(!FrameIndex.empty() && "Expected an MMI entry"); - assert(!V.FrameIndex.empty() && "Expected an MMI entry"); - assert(Expr.size() == FrameIndex.size() && "Mismatched expressions"); - assert(V.Expr.size() == V.FrameIndex.size() && "Mismatched expressions"); + assert(!FrameIndexExprs.empty() && "Expected an MMI entry"); + assert(!V.FrameIndexExprs.empty() && "Expected an MMI entry"); - Expr.append(V.Expr.begin(), V.Expr.end()); - FrameIndex.append(V.FrameIndex.begin(), V.FrameIndex.end()); - assert(std::all_of(Expr.begin(), Expr.end(), [](const DIExpression *E) { - return E && E->isBitPiece(); - }) && "conflicting locations for variable"); + FrameIndexExprs.append(V.FrameIndexExprs.begin(), V.FrameIndexExprs.end()); + assert(all_of(FrameIndexExprs, + [](FrameIndexExpr &FIE) { + return FIE.Expr && FIE.Expr->isFragment(); + }) && + "conflicting locations for variable"); } // Translate tag to proper Dwarf tag. @@ -166,11 +169,11 @@ public: bool hasComplexAddress() const { assert(MInsn && "Expected DBG_VALUE, not MMI variable"); - assert(FrameIndex.empty() && "Expected DBG_VALUE, not MMI variable"); - assert( - (Expr.empty() || (Expr.size() == 1 && Expr.back()->getNumElements())) && - "Invalid Expr for DBG_VALUE"); - return !Expr.empty(); + assert((FrameIndexExprs.empty() || + (FrameIndexExprs.size() == 1 && + FrameIndexExprs[0].Expr->getNumElements())) && + "Invalid Expr for DBG_VALUE"); + return !FrameIndexExprs.empty(); } bool isBlockByrefVariable() const; const DIType *getType() const; @@ -216,7 +219,9 @@ class DwarfDebug : public DebugHandlerBase { /// This is a collection of subprogram MDNodes that are processed to /// create DIEs. - SmallPtrSet<const MDNode *, 16> ProcessedSPNodes; + SetVector<const DISubprogram *, SmallVector<const DISubprogram *, 16>, + SmallPtrSet<const DISubprogram *, 16>> + ProcessedSPNodes; /// If nonnull, stores the current machine function we're processing. const MachineFunction *CurFn; @@ -254,9 +259,6 @@ class DwarfDebug : public DebugHandlerBase { /// Whether to emit all linkage names, or just abstract subprograms. bool UseAllLinkageNames; - /// Version of dwarf we're emitting. - unsigned DwarfVersion; - /// DWARF5 Experimental Options /// @{ bool HasDwarfAccelTables; @@ -443,9 +445,8 @@ class DwarfDebug : public DebugHandlerBase { void buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc, const DbgValueHistoryMap::InstrRanges &Ranges); - /// Collect variable information from the side table maintained - /// by MMI. - void collectVariableInfoFromMMITable(DenseSet<InlinedVariable> &P); + /// Collect variable information from the side table maintained by MF. + void collectVariableInfoFromMFTable(DenseSet<InlinedVariable> &P); public: //===--------------------------------------------------------------------===// @@ -515,7 +516,7 @@ public: bool useSplitDwarf() const { return HasSplitDwarf; } /// Returns the Dwarf Version. - unsigned getDwarfVersion() const { return DwarfVersion; } + uint16_t getDwarfVersion() const; /// Returns the previous CU that was being updated const DwarfCompileUnit *getPrevCU() const { return PrevCU; } @@ -537,11 +538,6 @@ public: return Ref.resolve(); } - /// Find the DwarfCompileUnit for the given CU Die. - DwarfCompileUnit *lookupUnit(const DIE *CU) const { - return CUDieMap.lookup(CU); - } - void addSubprogramNames(const DISubprogram *SP, DIE &Die); AddressPool &getAddressPool() { return AddrPool; } @@ -559,12 +555,6 @@ public: /// A helper function to check whether the DIE for a given Scope is /// going to be null. bool isLexicalScopeDIENull(LexicalScope *Scope); - - // FIXME: Sink these functions down into DwarfFile/Dwarf*Unit. - - SmallPtrSet<const MDNode *, 16> &getProcessedSPNodes() { - return ProcessedSPNodes; - } }; } // End of namespace llvm diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h index 8287f28..80d5bd2 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h @@ -28,6 +28,8 @@ protected: /// Per-function flag to indicate if frame CFI info should be emitted. bool shouldEmitCFI; + /// Per-module flag to indicate if .cfi_section has beeen emitted. + bool hasEmittedCFISections; void markFunctionEnd() override; void endFragment() override; @@ -46,8 +48,6 @@ class LLVM_LIBRARY_VISIBILITY DwarfCFIException : public DwarfCFIExceptionBase { /// Per-function flag to indicate if frame moves info should be emitted. bool shouldEmitMoves; - AsmPrinter::CFIMoveType moveTypeModule; - public: //===--------------------------------------------------------------------===// // Main entry points. @@ -81,7 +81,7 @@ public: ~ARMException() override; /// Emit all exception information that should come after the content. - void endModule() override; + void endModule() override {} /// Gather pre-function exception information. Assumes being emitted /// immediately after the function entry point. diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp index 7dbc6cb..61b2c7e6 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp @@ -46,7 +46,9 @@ void DwarfExpression::AddRegIndirect(int DwarfReg, int Offset, bool Deref) { } void DwarfExpression::AddOpPiece(unsigned SizeInBits, unsigned OffsetInBits) { - assert(SizeInBits > 0 && "piece has size zero"); + if (!SizeInBits) + return; + const unsigned SizeOfByte = 8; if (OffsetInBits > 0 || SizeInBits % SizeOfByte) { EmitOp(dwarf::DW_OP_bit_piece); @@ -57,6 +59,7 @@ void DwarfExpression::AddOpPiece(unsigned SizeInBits, unsigned OffsetInBits) { unsigned ByteSize = SizeInBits / SizeOfByte; EmitUnsigned(ByteSize); } + this->OffsetInBits += SizeInBits; } void DwarfExpression::AddShr(unsigned ShiftBy) { @@ -82,10 +85,8 @@ bool DwarfExpression::AddMachineRegIndirect(const TargetRegisterInfo &TRI, return true; } -bool DwarfExpression::AddMachineRegPiece(const TargetRegisterInfo &TRI, - unsigned MachineReg, - unsigned PieceSizeInBits, - unsigned PieceOffsetInBits) { +bool DwarfExpression::AddMachineReg(const TargetRegisterInfo &TRI, + unsigned MachineReg, unsigned MaxSize) { if (!TRI.isPhysicalRegister(MachineReg)) return false; @@ -94,13 +95,11 @@ bool DwarfExpression::AddMachineRegPiece(const TargetRegisterInfo &TRI, // If this is a valid register number, emit it. if (Reg >= 0) { AddReg(Reg); - if (PieceSizeInBits) - AddOpPiece(PieceSizeInBits, PieceOffsetInBits); return true; } // Walk up the super-register chain until we find a valid number. - // For example, EAX on x86_64 is a 32-bit piece of RAX with offset 0. + // For example, EAX on x86_64 is a 32-bit fragment of RAX with offset 0. for (MCSuperRegIterator SR(MachineReg, &TRI); SR.isValid(); ++SR) { Reg = TRI.getDwarfRegNum(*SR, false); if (Reg >= 0) { @@ -108,27 +107,15 @@ bool DwarfExpression::AddMachineRegPiece(const TargetRegisterInfo &TRI, unsigned Size = TRI.getSubRegIdxSize(Idx); unsigned RegOffset = TRI.getSubRegIdxOffset(Idx); AddReg(Reg, "super-register"); - if (PieceOffsetInBits == RegOffset) { - AddOpPiece(Size, RegOffset); - } else { - // If this is part of a variable in a sub-register at a - // non-zero offset, we need to manually shift the value into - // place, since the DW_OP_piece describes the part of the - // variable, not the position of the subregister. - if (RegOffset) - AddShr(RegOffset); - AddOpPiece(Size, PieceOffsetInBits); - } + // Use a DW_OP_bit_piece to describe the sub-register. + setSubRegisterPiece(Size, RegOffset); return true; } } // Otherwise, attempt to find a covering set of sub-register numbers. // For example, Q0 on ARM is a composition of D0+D1. - // - // Keep track of the current position so we can emit the more - // efficient DW_OP_piece. - unsigned CurPos = PieceOffsetInBits; + unsigned CurPos = 0; // The size of the register in bits, assuming 8 bits per byte. unsigned RegSize = TRI.getMinimalPhysRegClass(MachineReg)->getSize() * 8; // Keep track of the bits in the register we already emitted, so we @@ -150,7 +137,12 @@ bool DwarfExpression::AddMachineRegPiece(const TargetRegisterInfo &TRI, // its range, emit a DWARF piece for it. if (Reg >= 0 && Intersection.any()) { AddReg(Reg, "sub-register"); - AddOpPiece(Size, Offset == CurPos ? 0 : Offset); + if (Offset >= MaxSize) + break; + // Emit a piece for the any gap in the coverage. + if (Offset > CurPos) + AddOpPiece(Offset - CurPos); + AddOpPiece(std::min<unsigned>(Size, MaxSize - Offset)); CurPos = Offset + Size; // Mark it as emitted. @@ -158,7 +150,7 @@ bool DwarfExpression::AddMachineRegPiece(const TargetRegisterInfo &TRI, } } - return CurPos > PieceOffsetInBits; + return CurPos; } void DwarfExpression::AddStackValue() { @@ -194,92 +186,114 @@ void DwarfExpression::AddUnsignedConstant(const APInt &Value) { } } -static unsigned getOffsetOrZero(unsigned OffsetInBits, - unsigned PieceOffsetInBits) { - if (OffsetInBits == PieceOffsetInBits) - return 0; - assert(OffsetInBits >= PieceOffsetInBits && "overlapping pieces"); - return OffsetInBits; -} - bool DwarfExpression::AddMachineRegExpression(const TargetRegisterInfo &TRI, - const DIExpression *Expr, + DIExpressionCursor &ExprCursor, unsigned MachineReg, - unsigned PieceOffsetInBits) { - auto I = Expr->expr_op_begin(); - auto E = Expr->expr_op_end(); - if (I == E) - return AddMachineRegPiece(TRI, MachineReg); + unsigned FragmentOffsetInBits) { + if (!ExprCursor) + return AddMachineReg(TRI, MachineReg); // Pattern-match combinations for which more efficient representations exist // first. bool ValidReg = false; - switch (I->getOp()) { - case dwarf::DW_OP_bit_piece: { - unsigned OffsetInBits = I->getArg(0); - unsigned SizeInBits = I->getArg(1); - // Piece always comes at the end of the expression. - return AddMachineRegPiece(TRI, MachineReg, SizeInBits, - getOffsetOrZero(OffsetInBits, PieceOffsetInBits)); + auto Op = ExprCursor.peek(); + switch (Op->getOp()) { + default: { + auto Fragment = ExprCursor.getFragmentInfo(); + ValidReg = AddMachineReg(TRI, MachineReg, + Fragment ? Fragment->SizeInBits : ~1U); + break; } case dwarf::DW_OP_plus: case dwarf::DW_OP_minus: { // [DW_OP_reg,Offset,DW_OP_plus, DW_OP_deref] --> [DW_OP_breg, Offset]. // [DW_OP_reg,Offset,DW_OP_minus,DW_OP_deref] --> [DW_OP_breg,-Offset]. - auto N = I.getNext(); - if (N != E && N->getOp() == dwarf::DW_OP_deref) { - unsigned Offset = I->getArg(0); + auto N = ExprCursor.peekNext(); + if (N && N->getOp() == dwarf::DW_OP_deref) { + unsigned Offset = Op->getArg(0); ValidReg = AddMachineRegIndirect( - TRI, MachineReg, I->getOp() == dwarf::DW_OP_plus ? Offset : -Offset); - std::advance(I, 2); - break; + TRI, MachineReg, Op->getOp() == dwarf::DW_OP_plus ? Offset : -Offset); + ExprCursor.consume(2); } else - ValidReg = AddMachineRegPiece(TRI, MachineReg); - } - case dwarf::DW_OP_deref: { - // [DW_OP_reg,DW_OP_deref] --> [DW_OP_breg]. - ValidReg = AddMachineRegIndirect(TRI, MachineReg); - ++I; - break; + ValidReg = AddMachineReg(TRI, MachineReg); + break; } - default: - llvm_unreachable("unsupported operand"); + case dwarf::DW_OP_deref: + // [DW_OP_reg,DW_OP_deref] --> [DW_OP_breg]. + ValidReg = AddMachineRegIndirect(TRI, MachineReg); + ExprCursor.take(); + break; } - if (!ValidReg) - return false; - - // Emit remaining elements of the expression. - AddExpression(I, E, PieceOffsetInBits); - return true; + return ValidReg; } -void DwarfExpression::AddExpression(DIExpression::expr_op_iterator I, - DIExpression::expr_op_iterator E, - unsigned PieceOffsetInBits) { - for (; I != E; ++I) { - switch (I->getOp()) { - case dwarf::DW_OP_bit_piece: { - unsigned OffsetInBits = I->getArg(0); - unsigned SizeInBits = I->getArg(1); - AddOpPiece(SizeInBits, getOffsetOrZero(OffsetInBits, PieceOffsetInBits)); +void DwarfExpression::AddExpression(DIExpressionCursor &&ExprCursor, + unsigned FragmentOffsetInBits) { + while (ExprCursor) { + auto Op = ExprCursor.take(); + switch (Op->getOp()) { + case dwarf::DW_OP_LLVM_fragment: { + unsigned SizeInBits = Op->getArg(1); + unsigned FragmentOffset = Op->getArg(0); + // The fragment offset must have already been adjusted by emitting an + // empty DW_OP_piece / DW_OP_bit_piece before we emitted the base + // location. + assert(OffsetInBits >= FragmentOffset && "fragment offset not added?"); + + // If \a AddMachineReg already emitted DW_OP_piece operations to represent + // a super-register by splicing together sub-registers, subtract the size + // of the pieces that was already emitted. + SizeInBits -= OffsetInBits - FragmentOffset; + + // If \a AddMachineReg requested a DW_OP_bit_piece to stencil out a + // sub-register that is smaller than the current fragment's size, use it. + if (SubRegisterSizeInBits) + SizeInBits = std::min<unsigned>(SizeInBits, SubRegisterSizeInBits); + + AddOpPiece(SizeInBits, SubRegisterOffsetInBits); + setSubRegisterPiece(0, 0); break; } case dwarf::DW_OP_plus: EmitOp(dwarf::DW_OP_plus_uconst); - EmitUnsigned(I->getArg(0)); + EmitUnsigned(Op->getArg(0)); break; case dwarf::DW_OP_minus: // There is no OP_minus_uconst. EmitOp(dwarf::DW_OP_constu); - EmitUnsigned(I->getArg(0)); + EmitUnsigned(Op->getArg(0)); EmitOp(dwarf::DW_OP_minus); break; case dwarf::DW_OP_deref: EmitOp(dwarf::DW_OP_deref); break; + case dwarf::DW_OP_constu: + EmitOp(dwarf::DW_OP_constu); + EmitUnsigned(Op->getArg(0)); + break; + case dwarf::DW_OP_stack_value: + AddStackValue(); + break; default: llvm_unreachable("unhandled opcode found in expression"); } } } + +void DwarfExpression::finalize() { + if (SubRegisterSizeInBits) + AddOpPiece(SubRegisterSizeInBits, SubRegisterOffsetInBits); +} + +void DwarfExpression::addFragmentOffset(const DIExpression *Expr) { + if (!Expr || !Expr->isFragment()) + return; + + uint64_t FragmentOffset = Expr->getFragmentInfo()->OffsetInBits; + assert(FragmentOffset >= OffsetInBits && + "overlapping or duplicate fragments"); + if (FragmentOffset > OffsetInBits) + AddOpPiece(FragmentOffset - OffsetInBits); + OffsetInBits = FragmentOffset; +} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h index 5fff28d..fd90fa0 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h @@ -25,17 +25,86 @@ class TargetRegisterInfo; class DwarfUnit; class DIELoc; +/// Holds a DIExpression and keeps track of how many operands have been consumed +/// so far. +class DIExpressionCursor { + DIExpression::expr_op_iterator Start, End; +public: + DIExpressionCursor(const DIExpression *Expr) { + if (!Expr) { + assert(Start == End); + return; + } + Start = Expr->expr_op_begin(); + End = Expr->expr_op_end(); + } + + DIExpressionCursor(ArrayRef<uint64_t> Expr) + : Start(Expr.begin()), End(Expr.end()) {} + + /// Consume one operation. + Optional<DIExpression::ExprOperand> take() { + if (Start == End) + return None; + return *(Start++); + } + + /// Consume N operations. + void consume(unsigned N) { std::advance(Start, N); } + + /// Return the current operation. + Optional<DIExpression::ExprOperand> peek() const { + if (Start == End) + return None; + return *(Start); + } + + /// Return the next operation. + Optional<DIExpression::ExprOperand> peekNext() const { + if (Start == End) + return None; + + auto Next = Start.getNext(); + if (Next == End) + return None; + + return *Next; + } + /// Determine whether there are any operations left in this expression. + operator bool() const { return Start != End; } + + /// Retrieve the fragment information, if any. + Optional<DIExpression::FragmentInfo> getFragmentInfo() const { + return DIExpression::getFragmentInfo(Start, End); + } +}; + /// Base class containing the logic for constructing DWARF expressions /// independently of whether they are emitted into a DIE or into a .debug_loc /// entry. class DwarfExpression { protected: - // Various convenience accessors that extract things out of AsmPrinter. unsigned DwarfVersion; + /// Current Fragment Offset in Bits. + uint64_t OffsetInBits = 0; + + /// Sometimes we need to add a DW_OP_bit_piece to describe a subregister. + unsigned SubRegisterSizeInBits = 0; + unsigned SubRegisterOffsetInBits = 0; + + /// Push a DW_OP_piece / DW_OP_bit_piece for emitting later, if one is needed + /// to represent a subregister. + void setSubRegisterPiece(unsigned SizeInBits, unsigned OffsetInBits) { + SubRegisterSizeInBits = SizeInBits; + SubRegisterOffsetInBits = OffsetInBits; + } public: DwarfExpression(unsigned DwarfVersion) : DwarfVersion(DwarfVersion) {} - virtual ~DwarfExpression() {} + virtual ~DwarfExpression() {}; + + /// This needs to be called last to commit any pending changes. + void finalize(); /// Output a dwarf operand and an optional assembler comment. virtual void EmitOp(uint8_t Op, const char *Comment = nullptr) = 0; @@ -52,24 +121,25 @@ public: /// Emit an (double-)indirect dwarf register operation. void AddRegIndirect(int DwarfReg, int Offset, bool Deref = false); - /// Emit a dwarf register operation for describing - /// - a small value occupying only part of a register or - /// - a register representing only part of a value. + /// Emit a DW_OP_piece or DW_OP_bit_piece operation for a variable fragment. + /// \param OffsetInBits This is an optional offset into the location that + /// is at the top of the DWARF stack. void AddOpPiece(unsigned SizeInBits, unsigned OffsetInBits = 0); + /// Emit a shift-right dwarf expression. void AddShr(unsigned ShiftBy); + /// Emit a DW_OP_stack_value, if supported. /// - /// The proper way to describe a constant value is - /// DW_OP_constu <const>, DW_OP_stack_value. - /// Unfortunately, DW_OP_stack_value was not available until DWARF-4, - /// so we will continue to generate DW_OP_constu <const> for DWARF-2 - /// and DWARF-3. Technically, this is incorrect since DW_OP_const <const> - /// actually describes a value at a constant addess, not a constant value. - /// However, in the past there was no better way to describe a constant - /// value, so the producers and consumers started to rely on heuristics - /// to disambiguate the value vs. location status of the expression. - /// See PR21176 for more details. + /// The proper way to describe a constant value is DW_OP_constu <const>, + /// DW_OP_stack_value. Unfortunately, DW_OP_stack_value was not available + /// until DWARF 4, so we will continue to generate DW_OP_constu <const> for + /// DWARF 2 and DWARF 3. Technically, this is incorrect since DW_OP_const + /// <const> actually describes a value at a constant addess, not a constant + /// value. However, in the past there was no better way to describe a + /// constant value, so the producers and consumers started to rely on + /// heuristics to disambiguate the value vs. location status of the + /// expression. See PR21176 for more details. void AddStackValue(); /// Emit an indirect dwarf register operation for the given machine register. @@ -77,23 +147,23 @@ public: bool AddMachineRegIndirect(const TargetRegisterInfo &TRI, unsigned MachineReg, int Offset = 0); - /// \brief Emit a partial DWARF register operation. - /// \param MachineReg the register - /// \param PieceSizeInBits size and - /// \param PieceOffsetInBits offset of the piece in bits, if this is one - /// piece of an aggregate value. + /// Emit a partial DWARF register operation. + /// + /// \param MachineReg The register number. + /// \param MaxSize If the register must be composed from + /// sub-registers this is an upper bound + /// for how many bits the emitted DW_OP_piece + /// may cover. /// - /// If size and offset is zero an operation for the entire - /// register is emitted: Some targets do not provide a DWARF - /// register number for every register. If this is the case, this - /// function will attempt to emit a DWARF register by emitting a - /// piece of a super-register or by piecing together multiple - /// subregisters that alias the register. + /// If size and offset is zero an operation for the entire register is + /// emitted: Some targets do not provide a DWARF register number for every + /// register. If this is the case, this function will attempt to emit a DWARF + /// register by emitting a fragment of a super-register or by piecing together + /// multiple subregisters that alias the register. /// /// \return false if no DWARF register exists for MachineReg. - bool AddMachineRegPiece(const TargetRegisterInfo &TRI, unsigned MachineReg, - unsigned PieceSizeInBits = 0, - unsigned PieceOffsetInBits = 0); + bool AddMachineReg(const TargetRegisterInfo &TRI, unsigned MachineReg, + unsigned MaxSize = ~1U); /// Emit a signed constant. void AddSignedConstant(int64_t Value); @@ -102,20 +172,29 @@ public: /// Emit an unsigned constant. void AddUnsignedConstant(const APInt &Value); - /// \brief Emit an entire expression on top of a machine register location. + /// Emit a machine register location. As an optimization this may also consume + /// the prefix of a DwarfExpression if a more efficient representation for + /// combining the register location and the first operation exists. /// - /// \param PieceOffsetInBits If this is one piece out of a fragmented - /// location, this is the offset of the piece inside the entire variable. - /// \return false if no DWARF register exists for MachineReg. + /// \param FragmentOffsetInBits If this is one fragment out of a fragmented + /// location, this is the offset of the + /// fragment inside the entire variable. + /// \return false if no DWARF register exists + /// for MachineReg. bool AddMachineRegExpression(const TargetRegisterInfo &TRI, - const DIExpression *Expr, unsigned MachineReg, - unsigned PieceOffsetInBits = 0); - /// Emit a the operations remaining the DIExpressionIterator I. - /// \param PieceOffsetInBits If this is one piece out of a fragmented - /// location, this is the offset of the piece inside the entire variable. - void AddExpression(DIExpression::expr_op_iterator I, - DIExpression::expr_op_iterator E, - unsigned PieceOffsetInBits = 0); + DIExpressionCursor &Expr, unsigned MachineReg, + unsigned FragmentOffsetInBits = 0); + /// Emit all remaining operations in the DIExpressionCursor. + /// + /// \param FragmentOffsetInBits If this is one fragment out of multiple + /// locations, this is the offset of the + /// fragment inside the entire variable. + void AddExpression(DIExpressionCursor &&Expr, + unsigned FragmentOffsetInBits = 0); + + /// If applicable, emit an empty DW_OP_piece / DW_OP_bit_piece to advance to + /// the fragment described by \c Expr. + void addFragmentOffset(const DIExpression *Expr); }; /// DwarfExpression implementation for .debug_loc entries. @@ -146,6 +225,10 @@ public: void EmitUnsigned(uint64_t Value) override; bool isFrameRegister(const TargetRegisterInfo &TRI, unsigned MachineReg) override; + DIELoc *finalize() { + DwarfExpression::finalize(); + return &DIE; + } }; } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp index e9fe98a..595f1d9 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp @@ -19,37 +19,7 @@ namespace llvm { DwarfFile::DwarfFile(AsmPrinter *AP, StringRef Pref, BumpPtrAllocator &DA) - : Asm(AP), StrPool(DA, *Asm, Pref) {} - -DwarfFile::~DwarfFile() { - for (DIEAbbrev *Abbrev : Abbreviations) - Abbrev->~DIEAbbrev(); -} - -// Define a unique number for the abbreviation. -// -DIEAbbrev &DwarfFile::assignAbbrevNumber(DIE &Die) { - FoldingSetNodeID ID; - DIEAbbrev Abbrev = Die.generateAbbrev(); - Abbrev.Profile(ID); - - void *InsertPos; - if (DIEAbbrev *Existing = - AbbreviationsSet.FindNodeOrInsertPos(ID, InsertPos)) { - Die.setAbbrevNumber(Existing->getNumber()); - return *Existing; - } - - // Move the abbreviation to the heap and assign a number. - DIEAbbrev *New = new (AbbrevAllocator) DIEAbbrev(std::move(Abbrev)); - Abbreviations.push_back(New); - New->setNumber(Abbreviations.size()); - Die.setAbbrevNumber(Abbreviations.size()); - - // Store it for lookup. - AbbreviationsSet.InsertNode(New, InsertPos); - return *New; -} + : Asm(AP), Abbrevs(AbbrevAllocator), StrPool(DA, *Asm, Pref) {} void DwarfFile::addUnit(std::unique_ptr<DwarfCompileUnit> U) { CUs.push_back(std::move(U)); @@ -80,7 +50,7 @@ void DwarfFile::computeSizeAndOffsets() { // Iterate over each compile unit and set the size and offsets for each // DIE within each compile unit. All offsets are CU relative. for (const auto &TheU : CUs) { - TheU->setDebugInfoOffset(SecOffset); + TheU->setDebugSectionOffset(SecOffset); SecOffset += computeSizeAndOffsetsForUnit(TheU.get()); } } @@ -98,44 +68,10 @@ unsigned DwarfFile::computeSizeAndOffsetsForUnit(DwarfUnit *TheU) { // Compute the size and offset of a DIE. The offset is relative to start of the // CU. It returns the offset after laying out the DIE. unsigned DwarfFile::computeSizeAndOffset(DIE &Die, unsigned Offset) { - // Record the abbreviation. - const DIEAbbrev &Abbrev = assignAbbrevNumber(Die); - - // Set DIE offset - Die.setOffset(Offset); - - // Start the size with the size of abbreviation code. - Offset += getULEB128Size(Die.getAbbrevNumber()); - - // Size the DIE attribute values. - for (const auto &V : Die.values()) - // Size attribute value. - Offset += V.SizeOf(Asm); - - // Size the DIE children if any. - if (Die.hasChildren()) { - (void)Abbrev; - assert(Abbrev.hasChildren() && "Children flag not set"); - - for (auto &Child : Die.children()) - Offset = computeSizeAndOffset(Child, Offset); - - // End of children marker. - Offset += sizeof(int8_t); - } - - Die.setSize(Offset - Die.getOffset()); - return Offset; + return Die.computeOffsetsAndAbbrevs(Asm, Abbrevs, Offset); } -void DwarfFile::emitAbbrevs(MCSection *Section) { - // Check to see if it is worth the effort. - if (!Abbreviations.empty()) { - // Start the debug abbrev section. - Asm->OutStreamer->SwitchSection(Section); - Asm->emitDwarfAbbrevs(Abbreviations); - } -} +void DwarfFile::emitAbbrevs(MCSection *Section) { Abbrevs.Emit(Asm, Section); } // Emit strings into a string section. void DwarfFile::emitStrings(MCSection *StrSection, MCSection *OffsetSection) { diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h index b73d89b..d4d2ed2 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h @@ -16,10 +16,10 @@ #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" +#include "llvm/CodeGen/DIE.h" #include "llvm/IR/Metadata.h" #include "llvm/Support/Allocator.h" #include <memory> -#include <vector> namespace llvm { class AsmPrinter; @@ -41,10 +41,7 @@ class DwarfFile { BumpPtrAllocator AbbrevAllocator; // Used to uniquely define abbreviations. - FoldingSet<DIEAbbrev> AbbreviationsSet; - - // A list of all the unique abbreviations in use. - std::vector<DIEAbbrev *> Abbreviations; + DIEAbbrevSet Abbrevs; // A pointer to all units in the section. SmallVector<std::unique_ptr<DwarfCompileUnit>, 1> CUs; @@ -65,8 +62,6 @@ class DwarfFile { public: DwarfFile(AsmPrinter *AP, StringRef Pref, BumpPtrAllocator &DA); - ~DwarfFile(); - const SmallVectorImpl<std::unique_ptr<DwarfCompileUnit>> &getUnits() { return CUs; } @@ -81,12 +76,6 @@ public: /// \returns The size of the root DIE. unsigned computeSizeAndOffsetsForUnit(DwarfUnit *TheU); - /// Define a unique number for the abbreviation. - /// - /// Compute the abbreviation for \c Die, look up its unique number, and - /// return a reference to it in the uniquing table. - DIEAbbrev &assignAbbrevNumber(DIE &Die); - /// \brief Add a unit to the list of CUs. void addUnit(std::unique_ptr<DwarfCompileUnit> U); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 4100d72..2a866c0 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -12,28 +12,33 @@ //===----------------------------------------------------------------------===// #include "DwarfUnit.h" -#include "DwarfAccelTable.h" +#include "AddressPool.h" #include "DwarfCompileUnit.h" #include "DwarfDebug.h" #include "DwarfExpression.h" #include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/ADT/None.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/DIBuilder.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Mangler.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCContext.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Metadata.h" +#include "llvm/MC/MachineLocation.h" +#include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include <cassert> +#include <cstdint> +#include <string> +#include <utility> using namespace llvm; @@ -46,18 +51,21 @@ GenerateDwarfTypeUnits("generate-type-units", cl::Hidden, DIEDwarfExpression::DIEDwarfExpression(const AsmPrinter &AP, DwarfUnit &DU, DIELoc &DIE) - : DwarfExpression(AP.getDwarfDebug()->getDwarfVersion()), AP(AP), DU(DU), + : DwarfExpression(AP.getDwarfVersion()), AP(AP), DU(DU), DIE(DIE) {} void DIEDwarfExpression::EmitOp(uint8_t Op, const char* Comment) { DU.addUInt(DIE, dwarf::DW_FORM_data1, Op); } + void DIEDwarfExpression::EmitSigned(int64_t Value) { DU.addSInt(DIE, dwarf::DW_FORM_sdata, Value); } + void DIEDwarfExpression::EmitUnsigned(uint64_t Value) { DU.addUInt(DIE, dwarf::DW_FORM_udata, Value); } + bool DIEDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI, unsigned MachineReg) { return MachineReg == TRI.getFrameRegister(*AP.MF); @@ -65,10 +73,8 @@ bool DIEDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI, DwarfUnit::DwarfUnit(dwarf::Tag UnitTag, const DICompileUnit *Node, AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU) - : CUNode(Node), UnitDie(*DIE::get(DIEValueAllocator, UnitTag)), Asm(A), - DD(DW), DU(DWU), IndexTyDie(nullptr), Section(nullptr) { - assert(UnitTag == dwarf::DW_TAG_compile_unit || - UnitTag == dwarf::DW_TAG_type_unit); + : DIEUnit(A->getDwarfVersion(), A->getPointerSize(), UnitTag), CUNode(Node), + Asm(A), DD(DW), DU(DWU), IndexTyDie(nullptr) { } DwarfTypeUnit::DwarfTypeUnit(DwarfCompileUnit &CU, AsmPrinter *A, @@ -77,7 +83,7 @@ DwarfTypeUnit::DwarfTypeUnit(DwarfCompileUnit &CU, AsmPrinter *A, : DwarfUnit(dwarf::DW_TAG_type_unit, CU.getCUNode(), A, DW, DWU), CU(CU), SplitLineTable(SplitLineTable) { if (SplitLineTable) - addSectionOffset(UnitDie, dwarf::DW_AT_stmt_list, 0); + addSectionOffset(getUnitDie(), dwarf::DW_AT_stmt_list, 0); } DwarfUnit::~DwarfUnit() { @@ -194,6 +200,8 @@ void DwarfUnit::addUInt(DIEValueList &Die, dwarf::Attribute Attribute, Optional<dwarf::Form> Form, uint64_t Integer) { if (!Form) Form = DIEInteger::BestForm(false, Integer); + assert(Form != dwarf::DW_FORM_implicit_const && + "DW_FORM_implicit_const is used only for signed integers"); Die.addValue(DIEValueAllocator, Attribute, *Form, DIEInteger(Integer)); } @@ -286,15 +294,15 @@ void DwarfUnit::addDIETypeSignature(DIE &Die, dwarf::Attribute Attribute, void DwarfUnit::addDIEEntry(DIE &Die, dwarf::Attribute Attribute, DIEEntry Entry) { - const DIE *DieCU = Die.getUnitOrNull(); - const DIE *EntryCU = Entry.getEntry().getUnitOrNull(); - if (!DieCU) + const DIEUnit *CU = Die.getUnit(); + const DIEUnit *EntryCU = Entry.getEntry().getUnit(); + if (!CU) // We assume that Die belongs to this CU, if it is not linked to any CU yet. - DieCU = &getUnitDie(); + CU = getUnitDie().getUnit(); if (!EntryCU) - EntryCU = &getUnitDie(); + EntryCU = getUnitDie().getUnit(); Die.addValue(DIEValueAllocator, Attribute, - EntryCU == DieCU ? dwarf::DW_FORM_ref4 : dwarf::DW_FORM_ref_addr, + EntryCU == CU ? dwarf::DW_FORM_ref4 : dwarf::DW_FORM_ref_addr, Entry); } @@ -365,21 +373,6 @@ void DwarfUnit::addSourceLine(DIE &Die, const DINamespace *NS) { addSourceLine(Die, NS->getLine(), NS->getFilename(), NS->getDirectory()); } -bool DwarfUnit::addRegisterOpPiece(DIELoc &TheDie, unsigned Reg, - unsigned SizeInBits, unsigned OffsetInBits) { - DIEDwarfExpression Expr(*Asm, *this, TheDie); - Expr.AddMachineRegPiece(*Asm->MF->getSubtarget().getRegisterInfo(), Reg, - SizeInBits, OffsetInBits); - return true; -} - -bool DwarfUnit::addRegisterOffset(DIELoc &TheDie, unsigned Reg, - int64_t Offset) { - DIEDwarfExpression Expr(*Asm, *this, TheDie); - return Expr.AddMachineRegIndirect(*Asm->MF->getSubtarget().getRegisterInfo(), - Reg, Offset); -} - /* Byref variables, in Blocks, are declared by the programmer as "SomeType VarName;", but the compiler creates a __Block_byref_x_VarName struct, and gives the variable VarName either the struct, or a pointer to the struct, as @@ -472,12 +465,17 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die, // Decode the original location, and use that as the start of the byref // variable's location. DIELoc *Loc = new (DIEValueAllocator) DIELoc; + SmallVector<uint64_t, 6> DIExpr; + DIEDwarfExpression Expr(*Asm, *this, *Loc); bool validReg; if (Location.isReg()) - validReg = addRegisterOpPiece(*Loc, Location.getReg()); + validReg = Expr.AddMachineReg(*Asm->MF->getSubtarget().getRegisterInfo(), + Location.getReg()); else - validReg = addRegisterOffset(*Loc, Location.getReg(), Location.getOffset()); + validReg = + Expr.AddMachineRegIndirect(*Asm->MF->getSubtarget().getRegisterInfo(), + Location.getReg(), Location.getOffset()); if (!validReg) return; @@ -485,27 +483,29 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die, // If we started with a pointer to the __Block_byref... struct, then // the first thing we need to do is dereference the pointer (DW_OP_deref). if (isPointer) - addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + DIExpr.push_back(dwarf::DW_OP_deref); // Next add the offset for the '__forwarding' field: // DW_OP_plus_uconst ForwardingFieldOffset. Note there's no point in // adding the offset if it's 0. if (forwardingFieldOffset > 0) { - addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); - addUInt(*Loc, dwarf::DW_FORM_udata, forwardingFieldOffset); + DIExpr.push_back(dwarf::DW_OP_plus); + DIExpr.push_back(forwardingFieldOffset); } // Now dereference the __forwarding field to get to the real __Block_byref // struct: DW_OP_deref. - addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + DIExpr.push_back(dwarf::DW_OP_deref); // Now that we've got the real __Block_byref... struct, add the offset // for the variable's field to get to the location of the actual variable: // DW_OP_plus_uconst varFieldOffset. Again, don't add if it's 0. if (varFieldOffset > 0) { - addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); - addUInt(*Loc, dwarf::DW_FORM_udata, varFieldOffset); + DIExpr.push_back(dwarf::DW_OP_plus); + DIExpr.push_back(varFieldOffset); } + Expr.AddExpression(makeArrayRef(DIExpr)); + Expr.finalize(); // Now attach the location information to the DIE. addBlock(Die, Attribute, Loc); @@ -538,7 +538,7 @@ static bool isUnsignedDIType(DwarfDebug *DD, const DIType *Ty) { return true; assert(T == dwarf::DW_TAG_typedef || T == dwarf::DW_TAG_const_type || T == dwarf::DW_TAG_volatile_type || - T == dwarf::DW_TAG_restrict_type); + T == dwarf::DW_TAG_restrict_type || T == dwarf::DW_TAG_atomic_type); DITypeRef Deriv = DTy->getBaseType(); assert(Deriv && "Expected valid base type"); return isUnsignedDIType(DD, DD->resolve(Deriv)); @@ -699,6 +699,10 @@ DIE *DwarfUnit::getOrCreateTypeDIE(const MDNode *TyNode) { if (Ty->getTag() == dwarf::DW_TAG_restrict_type && DD->getDwarfVersion() <= 2) return getOrCreateTypeDIE(resolve(cast<DIDerivedType>(Ty)->getBaseType())); + // DW_TAG_atomic_type is not supported in DWARF < 5 + if (Ty->getTag() == dwarf::DW_TAG_atomic_type && DD->getDwarfVersion() < 5) + return getOrCreateTypeDIE(resolve(cast<DIDerivedType>(Ty)->getBaseType())); + // Construct the context before querying for the existence of the DIE in case // such construction creates the DIE. auto *Context = resolve(Ty->getScope()); @@ -735,7 +739,7 @@ DIE *DwarfUnit::getOrCreateTypeDIE(const MDNode *TyNode) { void DwarfUnit::updateAcceleratorTables(const DIScope *Context, const DIType *Ty, const DIE &TyDIE) { if (!Ty->getName().empty() && !Ty->isForwardDecl()) { - bool IsImplementation = 0; + bool IsImplementation = false; if (auto *CT = dyn_cast<DICompositeType>(Ty)) { // A runtime language of 0 actually means C/C++ and that any // non-negative value is some version of Objective-C/C++. @@ -999,6 +1003,11 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) { if (RLang) addUInt(Buffer, dwarf::DW_AT_APPLE_runtime_class, dwarf::DW_FORM_data1, RLang); + + // Add align info if available. + if (uint32_t AlignInBytes = CTy->getAlignInBytes()) + addUInt(Buffer, dwarf::DW_AT_alignment, dwarf::DW_FORM_udata, + AlignInBytes); } } @@ -1066,6 +1075,8 @@ DIE *DwarfUnit::getOrCreateNameSpace(const DINamespace *NS) { DD->addAccelNamespace(Name, NDie); addGlobalName(Name, NDie, NS->getScope()); addSourceLine(NDie, NS); + if (NS->getExportSymbols()) + addFlag(NDie, dwarf::DW_AT_export_symbols); return &NDie; } @@ -1133,7 +1144,9 @@ bool DwarfUnit::applySubprogramDefinitionAttributes(const DISubprogram *SP, assert(DeclDie && "This DIE should've already been constructed when the " "definition DIE was created in " "getOrCreateSubprogramDIE"); - DeclLinkageName = SPDecl->getLinkageName(); + // Look at the Decl's linkage name only if we emitted it. + if (DD->useAllLinkageNames()) + DeclLinkageName = SPDecl->getLinkageName(); unsigned DeclID = getOrCreateSourceID(SPDecl->getFilename(), SPDecl->getDirectory()); unsigned DefID = getOrCreateSourceID(SP->getFilename(), SP->getDirectory()); @@ -1248,6 +1261,9 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie, if (SP->isRValueReference()) addFlag(SPDie, dwarf::DW_AT_rvalue_reference); + if (SP->isNoReturn()) + addFlag(SPDie, dwarf::DW_AT_noreturn); + if (SP->isProtected()) addUInt(SPDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, dwarf::DW_ACCESS_protected); @@ -1260,6 +1276,9 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie, if (SP->isExplicit()) addFlag(SPDie, dwarf::DW_AT_explicit); + + if (SP->isMainSubprogram()) + addFlag(SPDie, dwarf::DW_AT_main_subprogram); } void DwarfUnit::constructSubrangeDIE(DIE &Buffer, const DISubrange *SR, @@ -1288,7 +1307,7 @@ DIE *DwarfUnit::getIndexTyDie() { if (IndexTyDie) return IndexTyDie; // Construct an integer type to use for indexes. - IndexTyDie = &createAndAddDIE(dwarf::DW_TAG_base_type, UnitDie); + IndexTyDie = &createAndAddDIE(dwarf::DW_TAG_base_type, getUnitDie()); addString(*IndexTyDie, dwarf::DW_AT_name, "sizetype"); addUInt(*IndexTyDie, dwarf::DW_AT_byte_size, None, sizeof(int64_t)); addUInt(*IndexTyDie, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, @@ -1383,6 +1402,7 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) { } else { uint64_t Size = DT->getSizeInBits(); uint64_t FieldSize = DD->getBaseTypeSize(DT); + uint32_t AlignInBytes = DT->getAlignInBytes(); uint64_t OffsetInBytes; bool IsBitfield = FieldSize && Size != FieldSize; @@ -1393,8 +1413,11 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) { addUInt(MemberDie, dwarf::DW_AT_bit_size, None, Size); uint64_t Offset = DT->getOffsetInBits(); - uint64_t Align = DT->getAlignInBits() ? DT->getAlignInBits() : FieldSize; - uint64_t AlignMask = ~(Align - 1); + // We can't use DT->getAlignInBits() here: AlignInBits for member type + // is non-zero if and only if alignment was forced (e.g. _Alignas()), + // which can't be done with bitfields. Thus we use FieldSize here. + uint32_t AlignInBits = FieldSize; + uint32_t AlignMask = ~(AlignInBits - 1); // The bits from the start of the storage unit to the start of the field. uint64_t StartBitOffset = Offset - (Offset & AlignMask); // The byte offset of the field's aligned storage unit inside the struct. @@ -1417,6 +1440,9 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) { } else { // This is not a bitfield. OffsetInBytes = DT->getOffsetInBits() / 8; + if (AlignInBytes) + addUInt(MemberDie, dwarf::DW_AT_alignment, dwarf::DW_FORM_udata, + AlignInBytes); } if (DD->getDwarfVersion() <= 2) { @@ -1493,13 +1519,17 @@ DIE *DwarfUnit::getOrCreateStaticMemberDIE(const DIDerivedType *DT) { if (const ConstantFP *CFP = dyn_cast_or_null<ConstantFP>(DT->getConstant())) addConstantFPValue(StaticMemberDIE, CFP); + if (uint32_t AlignInBytes = DT->getAlignInBytes()) + addUInt(StaticMemberDIE, dwarf::DW_AT_alignment, dwarf::DW_FORM_udata, + AlignInBytes); + return &StaticMemberDIE; } void DwarfUnit::emitHeader(bool UseOffsets) { // Emit size of content not including length itself Asm->OutStreamer->AddComment("Length of Unit"); - Asm->EmitInt32(getHeaderSize() + UnitDie.getSize()); + Asm->EmitInt32(getHeaderSize() + getUnitDie().getSize()); Asm->OutStreamer->AddComment("DWARF version number"); Asm->EmitInt16(DD->getDwarfVersion()); @@ -1519,11 +1549,6 @@ void DwarfUnit::emitHeader(bool UseOffsets) { Asm->EmitInt8(Asm->getDataLayout().getPointerSize()); } -void DwarfUnit::initSection(MCSection *Section) { - assert(!this->Section); - this->Section = Section; -} - void DwarfTypeUnit::emitHeader(bool UseOffsets) { DwarfUnit::emitHeader(UseOffsets); Asm->OutStreamer->AddComment("Type Signature"); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h index e225f92..8654d6f 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h @@ -65,7 +65,7 @@ public: //===----------------------------------------------------------------------===// /// This dwarf writer support class manages information associated with a /// source file. -class DwarfUnit { + class DwarfUnit : public DIEUnit { protected: /// MDNode for the compile unit. const DICompileUnit *CUNode; @@ -73,9 +73,6 @@ protected: // All DIEValues are allocated through this allocator. BumpPtrAllocator DIEValueAllocator; - /// Unit debug information entry. - DIE &UnitDie; - /// Target of Dwarf emission. AsmPrinter *Asm; @@ -83,7 +80,7 @@ protected: DwarfDebug *DD; DwarfFile *DU; - /// An anonymous type for index type. Owned by UnitDie. + /// An anonymous type for index type. Owned by DIEUnit. DIE *IndexTyDie; /// Tracks the mapping of unit level debug information variables to debug @@ -101,9 +98,6 @@ protected: /// corresponds to the MDNode mapped with the subprogram DIE. DenseMap<DIE *, const DINode *> ContainingTypeMap; - /// The section this unit will be emitted in. - MCSection *Section; - DwarfUnit(dwarf::Tag, const DICompileUnit *CU, AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU); @@ -112,21 +106,13 @@ protected: public: virtual ~DwarfUnit(); - void initSection(MCSection *Section); - - MCSection *getSection() const { - assert(Section); - return Section; - } - // Accessors. AsmPrinter* getAsmPrinter() const { return Asm; } uint16_t getLanguage() const { return CUNode->getSourceLanguage(); } const DICompileUnit *getCUNode() const { return CUNode; } - DIE &getUnitDie() { return UnitDie; } /// Return true if this compile unit has something to write out. - bool hasContent() const { return UnitDie.hasChildren(); } + bool hasContent() const { return getUnitDie().hasChildren(); } /// Get string containing language specific context for a global name. /// @@ -249,17 +235,6 @@ public: /// Add template parameters in buffer. void addTemplateParams(DIE &Buffer, DINodeArray TParams); - /// Add register operand. - /// \returns false if the register does not exist, e.g., because it was never - /// materialized. - bool addRegisterOpPiece(DIELoc &TheDie, unsigned Reg, - unsigned SizeInBits = 0, unsigned OffsetInBits = 0); - - /// Add register offset. - /// \returns false if the register does not exist, e.g., because it was never - /// materialized. - bool addRegisterOffset(DIELoc &TheDie, unsigned Reg, int64_t Offset); - // FIXME: Should be reformulated in terms of addComplexAddress. /// Start with the address based on the location provided, and generate the /// DWARF information necessary to find the actual Block variable (navigating diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp index e24dcb1..0a4a7a0 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp @@ -74,7 +74,7 @@ computeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads, // output using a fixed width encoding. FilterOffsets[i] holds the byte // offset corresponding to FilterIds[i]. - const std::vector<unsigned> &FilterIds = MMI->getFilterIds(); + const std::vector<unsigned> &FilterIds = Asm->MF->getFilterIds(); SmallVector<int, 16> FilterOffsets; FilterOffsets.reserve(FilterIds.size()); int Offset = -1; @@ -296,7 +296,7 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, else { // SjLj EH must maintain the call sites in the order assigned // to them by the SjLjPrepare pass. - unsigned SiteNo = MMI->getCallSiteBeginLabel(BeginLabel); + unsigned SiteNo = Asm->MF->getCallSiteBeginLabel(BeginLabel); if (CallSites.size() < SiteNo) CallSites.resize(SiteNo); CallSites[SiteNo - 1] = Site; @@ -336,9 +336,10 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, /// 3. Type ID table contains references to all the C++ typeinfo for all /// catches in the function. This tables is reverse indexed base 1. void EHStreamer::emitExceptionTable() { - const std::vector<const GlobalValue *> &TypeInfos = MMI->getTypeInfos(); - const std::vector<unsigned> &FilterIds = MMI->getFilterIds(); - const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads(); + const MachineFunction *MF = Asm->MF; + const std::vector<const GlobalValue *> &TypeInfos = MF->getTypeInfos(); + const std::vector<unsigned> &FilterIds = MF->getFilterIds(); + const std::vector<LandingPadInfo> &PadInfos = MF->getLandingPads(); // Sort the landing pads in order of their type ids. This is used to fold // duplicate actions. @@ -649,8 +650,9 @@ void EHStreamer::emitExceptionTable() { } void EHStreamer::emitTypeInfos(unsigned TTypeEncoding) { - const std::vector<const GlobalValue *> &TypeInfos = MMI->getTypeInfos(); - const std::vector<unsigned> &FilterIds = MMI->getFilterIds(); + const MachineFunction *MF = Asm->MF; + const std::vector<const GlobalValue *> &TypeInfos = MF->getTypeInfos(); + const std::vector<unsigned> &FilterIds = MF->getFilterIds(); bool VerboseAsm = Asm->OutStreamer->isVerboseAsm(); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp index c09ef6a..8baee4d 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp @@ -50,7 +50,7 @@ static void EmitCamlGlobal(const Module &M, AsmPrinter &AP, const char *Id) { std::string SymName; SymName += "caml"; size_t Letter = SymName.size(); - SymName.append(MId.begin(), std::find(MId.begin(), MId.end(), '.')); + SymName.append(MId.begin(), find(MId, '.')); SymName += "__"; SymName += Id; diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp index e5933d8..9d7c96a 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp @@ -63,8 +63,8 @@ void WinException::beginFunction(const MachineFunction *MF) { shouldEmitMoves = shouldEmitPersonality = shouldEmitLSDA = false; // If any landing pads survive, we need an EH table. - bool hasLandingPads = !MMI->getLandingPads().empty(); - bool hasEHFunclets = MMI->hasEHFunclets(); + bool hasLandingPads = !MF->getLandingPads().empty(); + bool hasEHFunclets = MF->hasEHFunclets(); const Function *F = MF->getFunction(); @@ -72,17 +72,21 @@ void WinException::beginFunction(const MachineFunction *MF) { const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); unsigned PerEncoding = TLOF.getPersonalityEncoding(); - const Function *Per = nullptr; - if (F->hasPersonalityFn()) - Per = dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts()); - bool forceEmitPersonality = - F->hasPersonalityFn() && !isNoOpWithoutInvoke(classifyEHPersonality(Per)) && - F->needsUnwindTableEntry(); + EHPersonality Per = EHPersonality::Unknown; + const Function *PerFn = nullptr; + if (F->hasPersonalityFn()) { + PerFn = dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts()); + Per = classifyEHPersonality(PerFn); + } + + bool forceEmitPersonality = F->hasPersonalityFn() && + !isNoOpWithoutInvoke(Per) && + F->needsUnwindTableEntry(); shouldEmitPersonality = forceEmitPersonality || ((hasLandingPads || hasEHFunclets) && - PerEncoding != dwarf::DW_EH_PE_omit && Per); + PerEncoding != dwarf::DW_EH_PE_omit && PerFn); unsigned LSDAEncoding = TLOF.getLSDAEncoding(); shouldEmitLSDA = shouldEmitPersonality && @@ -90,7 +94,16 @@ void WinException::beginFunction(const MachineFunction *MF) { // If we're not using CFI, we don't want the CFI or the personality, but we // might want EH tables if we had EH pads. - if (!Asm->MAI->usesWindowsCFI()) { + if (!Asm->MAI->usesWindowsCFI() || (!MF->hasWinCFI() && !PerFn)) { + if (Per == EHPersonality::MSVC_X86SEH && !hasEHFunclets) { + // If this is 32-bit SEH and we don't have any funclets (really invokes), + // make sure we emit the parent offset label. Some unreferenced filter + // functions may still refer to it. + const WinEHFuncInfo &FuncInfo = *MF->getWinEHFuncInfo(); + StringRef FLinkageName = + GlobalValue::getRealLinkageName(MF->getFunction()->getName()); + emitEHRegistrationOffsetLabel(FuncInfo, FLinkageName); + } shouldEmitLSDA = hasEHFunclets; shouldEmitPersonality = false; return; @@ -108,18 +121,20 @@ void WinException::endFunction(const MachineFunction *MF) { const Function *F = MF->getFunction(); EHPersonality Per = EHPersonality::Unknown; if (F->hasPersonalityFn()) - Per = classifyEHPersonality(F->getPersonalityFn()); + Per = classifyEHPersonality(F->getPersonalityFn()->stripPointerCasts()); // Get rid of any dead landing pads if we're not using funclets. In funclet // schemes, the landing pad is not actually reachable. It only exists so // that we can emit the right table data. - if (!isFuncletEHPersonality(Per)) - MMI->TidyLandingPads(); + if (!isFuncletEHPersonality(Per)) { + MachineFunction *NonConstMF = const_cast<MachineFunction*>(MF); + NonConstMF->tidyLandingPads(); + } endFunclet(); // endFunclet will emit the necessary .xdata tables for x64 SEH. - if (Per == EHPersonality::MSVC_Win64SEH && MMI->hasEHFunclets()) + if (Per == EHPersonality::MSVC_Win64SEH && MF->hasEHFunclets()) return; if (shouldEmitPersonality || shouldEmitLSDA) { @@ -147,7 +162,7 @@ void WinException::endFunction(const MachineFunction *MF) { } } -/// Retreive the MCSymbol for a GlobalValue or MachineBasicBlock. +/// Retrieve the MCSymbol for a GlobalValue or MachineBasicBlock. static MCSymbol *getMCSymbolForMBB(AsmPrinter *Asm, const MachineBasicBlock *MBB) { if (!MBB) @@ -193,8 +208,10 @@ void WinException::beginFunclet(const MachineBasicBlock &MBB, } // Mark 'Sym' as starting our funclet. - if (shouldEmitMoves || shouldEmitPersonality) + if (shouldEmitMoves || shouldEmitPersonality) { + CurrentFuncletTextSection = Asm->OutStreamer->getCurrentSectionOnly(); Asm->OutStreamer->EmitWinCFIStartProc(Sym); + } if (shouldEmitPersonality) { const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); @@ -204,16 +221,14 @@ void WinException::beginFunclet(const MachineBasicBlock &MBB, if (F->hasPersonalityFn()) PerFn = dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts()); const MCSymbol *PersHandlerSym = - TLOF.getCFIPersonalitySymbol(PerFn, *Asm->Mang, Asm->TM, MMI); - - // Classify the personality routine so that we may reason about it. - EHPersonality Per = EHPersonality::Unknown; - if (F->hasPersonalityFn()) - Per = classifyEHPersonality(F->getPersonalityFn()); - - // Do not emit a .seh_handler directive if it is a C++ cleanup funclet. - if (Per != EHPersonality::MSVC_CXX || - !CurrentFuncletEntry->isCleanupFuncletEntry()) + TLOF.getCFIPersonalitySymbol(PerFn, Asm->TM, MMI); + + // Do not emit a .seh_handler directives for cleanup funclets. + // FIXME: This means cleanup funclets cannot handle exceptions. Given that + // Clang doesn't produce EH constructs inside cleanup funclets and LLVM's + // inliner doesn't allow inlining them, this isn't a major problem in + // practice. + if (!CurrentFuncletEntry->isCleanupFuncletEntry()) Asm->OutStreamer->EmitWinEHHandler(PersHandlerSym, true, true); } } @@ -223,15 +238,12 @@ void WinException::endFunclet() { if (!CurrentFuncletEntry) return; + const MachineFunction *MF = Asm->MF; if (shouldEmitMoves || shouldEmitPersonality) { - const Function *F = Asm->MF->getFunction(); + const Function *F = MF->getFunction(); EHPersonality Per = EHPersonality::Unknown; if (F->hasPersonalityFn()) - Per = classifyEHPersonality(F->getPersonalityFn()); - - // The .seh_handlerdata directive implicitly switches section, push the - // current section so that we may return to it. - Asm->OutStreamer->PushSection(); + Per = classifyEHPersonality(F->getPersonalityFn()->stripPointerCasts()); // Emit an UNWIND_INFO struct describing the prologue. Asm->OutStreamer->EmitWinEHHandlerData(); @@ -244,18 +256,17 @@ void WinException::endFunclet() { MCSymbol *FuncInfoXData = Asm->OutContext.getOrCreateSymbol( Twine("$cppxdata$", FuncLinkageName)); Asm->OutStreamer->EmitValue(create32bitRef(FuncInfoXData), 4); - } else if (Per == EHPersonality::MSVC_Win64SEH && MMI->hasEHFunclets() && + } else if (Per == EHPersonality::MSVC_Win64SEH && MF->hasEHFunclets() && !CurrentFuncletEntry->isEHFuncletEntry()) { // If this is the parent function in Win64 SEH, emit the LSDA immediately // following .seh_handlerdata. - emitCSpecificHandlerTable(Asm->MF); + emitCSpecificHandlerTable(MF); } - // Switch back to the previous section now that we are done writing to - // .xdata. - Asm->OutStreamer->PopSection(); - - // Emit a .seh_endproc directive to mark the end of the function. + // Switch back to the funclet start .text section now that we are done + // writing to .xdata, and emit an .seh_endproc directive to mark the end of + // the function. + Asm->OutStreamer->SwitchSection(CurrentFuncletTextSection); Asm->OutStreamer->EmitWinCFIEndProc(); } @@ -905,15 +916,24 @@ void WinException::emitEHRegistrationOffsetLabel(const WinEHFuncInfo &FuncInfo, // registration in order to recover the parent frame pointer. Now that we know // we've code generated the parent, we can emit the label assignment that // those helpers use to get the offset of the registration node. + + // Compute the parent frame offset. The EHRegNodeFrameIndex will be invalid if + // after optimization all the invokes were eliminated. We still need to emit + // the parent frame offset label, but it should be garbage and should never be + // used. + int64_t Offset = 0; + int FI = FuncInfo.EHRegNodeFrameIndex; + if (FI != INT_MAX) { + const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering(); + unsigned UnusedReg; + Offset = TFI->getFrameIndexReference(*Asm->MF, FI, UnusedReg); + } + MCContext &Ctx = Asm->OutContext; MCSymbol *ParentFrameOffset = Ctx.getOrCreateParentFrameOffsetSymbol(FLinkageName); - unsigned UnusedReg; - const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering(); - int64_t Offset = TFI->getFrameIndexReference( - *Asm->MF, FuncInfo.EHRegNodeFrameIndex, UnusedReg); - const MCExpr *MCOffset = MCConstantExpr::create(Offset, Ctx); - Asm->OutStreamer->EmitAssignment(ParentFrameOffset, MCOffset); + Asm->OutStreamer->EmitAssignment(ParentFrameOffset, + MCConstantExpr::create(Offset, Ctx)); } /// Emit the language-specific data that _except_handler3 and 4 expect. This is @@ -966,11 +986,11 @@ void WinException::emitExceptHandlerTable(const MachineFunction *MF) { // Retrieve the Guard Stack slot. int GSCookieOffset = -2; - const MachineFrameInfo *MFI = MF->getFrameInfo(); - if (MFI->hasStackProtectorIndex()) { + const MachineFrameInfo &MFI = MF->getFrameInfo(); + if (MFI.hasStackProtectorIndex()) { unsigned UnusedReg; const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering(); - int SSPIdx = MFI->getStackProtectorIndex(); + int SSPIdx = MFI.getStackProtectorIndex(); GSCookieOffset = TFI->getFrameIndexReference(*MF, SSPIdx, UnusedReg); } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.h b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.h index acb3010..371061c 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.h @@ -21,6 +21,7 @@ class Function; class GlobalValue; class MachineFunction; class MCExpr; +class MCSection; class Value; struct WinEHFuncInfo; @@ -40,6 +41,9 @@ class LLVM_LIBRARY_VISIBILITY WinException : public EHStreamer { /// Pointer to the current funclet entry BB. const MachineBasicBlock *CurrentFuncletEntry = nullptr; + /// The section of the last funclet start. + MCSection *CurrentFuncletTextSection = nullptr; + void emitCSpecificHandlerTable(const MachineFunction *MF); void emitSEHActionsForRange(const WinEHFuncInfo &FuncInfo, diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.cpp b/contrib/llvm/lib/CodeGen/BranchFolding.cpp index 5dacbf9..6fba161 100644 --- a/contrib/llvm/lib/CodeGen/BranchFolding.cpp +++ b/contrib/llvm/lib/CodeGen/BranchFolding.cpp @@ -110,9 +110,12 @@ bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) { BranchFolder::BranchFolder(bool defaultEnableTailMerge, bool CommonHoist, MBFIWrapper &FreqInfo, - const MachineBranchProbabilityInfo &ProbInfo) - : EnableHoistCommonCode(CommonHoist), MBBFreqInfo(FreqInfo), - MBPI(ProbInfo) { + const MachineBranchProbabilityInfo &ProbInfo, + unsigned MinTailLength) + : EnableHoistCommonCode(CommonHoist), MinCommonTailLength(MinTailLength), + MBBFreqInfo(FreqInfo), MBPI(ProbInfo) { + if (MinCommonTailLength == 0) + MinCommonTailLength = TailMergeSize; switch (FlagEnableTailMerge) { case cl::BOU_UNSET: EnableTailMerge = defaultEnableTailMerge; break; case cl::BOU_TRUE: EnableTailMerge = true; break; @@ -141,59 +144,6 @@ void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) { MLI->removeBlock(MBB); } -/// OptimizeImpDefsBlock - If a basic block is just a bunch of implicit_def -/// followed by terminators, and if the implicitly defined registers are not -/// used by the terminators, remove those implicit_def's. e.g. -/// BB1: -/// r0 = implicit_def -/// r1 = implicit_def -/// br -/// This block can be optimized away later if the implicit instructions are -/// removed. -bool BranchFolder::OptimizeImpDefsBlock(MachineBasicBlock *MBB) { - SmallSet<unsigned, 4> ImpDefRegs; - MachineBasicBlock::iterator I = MBB->begin(); - while (I != MBB->end()) { - if (!I->isImplicitDef()) - break; - unsigned Reg = I->getOperand(0).getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { - for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); - SubRegs.isValid(); ++SubRegs) - ImpDefRegs.insert(*SubRegs); - } else { - ImpDefRegs.insert(Reg); - } - ++I; - } - if (ImpDefRegs.empty()) - return false; - - MachineBasicBlock::iterator FirstTerm = I; - while (I != MBB->end()) { - if (!TII->isUnpredicatedTerminator(*I)) - return false; - // See if it uses any of the implicitly defined registers. - for (const MachineOperand &MO : I->operands()) { - if (!MO.isReg() || !MO.isUse()) - continue; - unsigned Reg = MO.getReg(); - if (ImpDefRegs.count(Reg)) - return false; - } - ++I; - } - - I = MBB->begin(); - while (I != FirstTerm) { - MachineInstr *ImpDefMI = &*I; - ++I; - MBB->erase(ImpDefMI); - } - - return true; -} - /// OptimizeFunction - Perhaps branch folding, tail merging and other /// CFG optimizations on the given function. Block placement changes the layout /// and may create new tail merging opportunities. @@ -224,7 +174,6 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF, SmallVector<MachineOperand, 4> Cond; if (!TII->analyzeBranch(MBB, TBB, FBB, Cond, true)) MadeChange |= MBB.CorrectExtraCFGEdges(TBB, FBB, !Cond.empty()); - MadeChange |= OptimizeImpDefsBlock(&MBB); } // Recalculate funclet membership. @@ -399,37 +348,16 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1, return TailLen; } -void BranchFolder::computeLiveIns(MachineBasicBlock &MBB) { - if (!UpdateLiveIns) - return; - - LiveRegs.init(TRI); - LiveRegs.addLiveOutsNoPristines(MBB); - for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) - LiveRegs.stepBackward(MI); - - for (unsigned Reg : LiveRegs) { - // Skip the register if we are about to add one of its super registers. - bool ContainsSuperReg = false; - for (MCSuperRegIterator SReg(Reg, TRI); SReg.isValid(); ++SReg) { - if (LiveRegs.contains(*SReg)) { - ContainsSuperReg = true; - break; - } - } - if (ContainsSuperReg) - continue; - MBB.addLiveIn(Reg); - } -} - /// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything /// after it, replacing it with an unconditional branch to NewDest. void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, MachineBasicBlock *NewDest) { TII->ReplaceTailWithBranchTo(OldInst, NewDest); - computeLiveIns(*NewDest); + if (UpdateLiveIns) { + NewDest->clearLiveIns(); + computeLiveIns(LiveRegs, *TRI, *NewDest); + } ++NumTailMerge; } @@ -467,7 +395,8 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB, // NewMBB inherits CurMBB's block frequency. MBBFreqInfo.setBlockFreq(NewMBB, MBBFreqInfo.getBlockFreq(&CurMBB)); - computeLiveIns(*NewMBB); + if (UpdateLiveIns) + computeLiveIns(LiveRegs, *TRI, *NewMBB); // Add the new block to the funclet. const auto &FuncletI = FuncletMembership.find(&CurMBB); @@ -511,14 +440,14 @@ static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB, if (I != MF->end() && !TII->analyzeBranch(*CurMBB, TBB, FBB, Cond, true)) { MachineBasicBlock *NextBB = &*I; if (TBB == NextBB && !Cond.empty() && !FBB) { - if (!TII->ReverseBranchCondition(Cond)) { - TII->RemoveBranch(*CurMBB); - TII->InsertBranch(*CurMBB, SuccBB, nullptr, Cond, dl); + if (!TII->reverseBranchCondition(Cond)) { + TII->removeBranch(*CurMBB); + TII->insertBranch(*CurMBB, SuccBB, nullptr, Cond, dl); return; } } } - TII->InsertBranch(*CurMBB, SuccBB, nullptr, + TII->insertBranch(*CurMBB, SuccBB, nullptr, SmallVector<MachineOperand, 0>(), dl); } @@ -591,13 +520,26 @@ static unsigned CountTerminators(MachineBasicBlock *MBB, /// and decide if it would be profitable to merge those tails. Return the /// length of the common tail and iterators to the first common instruction /// in each block. +/// MBB1, MBB2 The blocks to check +/// MinCommonTailLength Minimum size of tail block to be merged. +/// CommonTailLen Out parameter to record the size of the shared tail between +/// MBB1 and MBB2 +/// I1, I2 Iterator references that will be changed to point to the first +/// instruction in the common tail shared by MBB1,MBB2 +/// SuccBB A common successor of MBB1, MBB2 which are in a canonical form +/// relative to SuccBB +/// PredBB The layout predecessor of SuccBB, if any. +/// FuncletMembership map from block to funclet #. +/// AfterPlacement True if we are merging blocks after layout. Stricter +/// thresholds apply to prevent undoing tail-duplication. static bool ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2, - unsigned minCommonTailLength, unsigned &CommonTailLen, + unsigned MinCommonTailLength, unsigned &CommonTailLen, MachineBasicBlock::iterator &I1, MachineBasicBlock::iterator &I2, MachineBasicBlock *SuccBB, MachineBasicBlock *PredBB, - DenseMap<const MachineBasicBlock *, int> &FuncletMembership) { + DenseMap<const MachineBasicBlock *, int> &FuncletMembership, + bool AfterPlacement) { // It is never profitable to tail-merge blocks from two different funclets. if (!FuncletMembership.empty()) { auto Funclet1 = FuncletMembership.find(MBB1); @@ -617,7 +559,11 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2, // It's almost always profitable to merge any number of non-terminator // instructions with the block that falls through into the common successor. - if (MBB1 == PredBB || MBB2 == PredBB) { + // This is true only for a single successor. For multiple successors, we are + // trading a conditional branch for an unconditional one. + // TODO: Re-visit successor size for non-layout tail merging. + if ((MBB1 == PredBB || MBB2 == PredBB) && + (!AfterPlacement || MBB1->succ_size() == 1)) { MachineBasicBlock::iterator I; unsigned NumTerms = CountTerminators(MBB1 == PredBB ? MBB2 : MBB1, I); if (CommonTailLen > NumTerms) @@ -635,15 +581,18 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2, // If both blocks have an unconditional branch temporarily stripped out, // count that as an additional common instruction for the following - // heuristics. + // heuristics. This heuristic is only accurate for single-succ blocks, so to + // make sure that during layout merging and duplicating don't crash, we check + // for that when merging during layout. unsigned EffectiveTailLen = CommonTailLen; if (SuccBB && MBB1 != PredBB && MBB2 != PredBB && + (MBB1->succ_size() == 1 || !AfterPlacement) && !MBB1->back().isBarrier() && !MBB2->back().isBarrier()) ++EffectiveTailLen; // Check if the common tail is long enough to be worthwhile. - if (EffectiveTailLen >= minCommonTailLength) + if (EffectiveTailLen >= MinCommonTailLength) return true; // If we are optimizing for code size, 2 instructions in common is enough if @@ -666,7 +615,7 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2, /// those blocks appear in MergePotentials (where they are not necessarily /// consecutive). unsigned BranchFolder::ComputeSameTails(unsigned CurHash, - unsigned minCommonTailLength, + unsigned MinCommonTailLength, MachineBasicBlock *SuccBB, MachineBasicBlock *PredBB) { unsigned maxCommonTailLength = 0U; @@ -679,10 +628,11 @@ unsigned BranchFolder::ComputeSameTails(unsigned CurHash, for (MPIterator I = std::prev(CurMPIter); I->getHash() == CurHash; --I) { unsigned CommonTailLen; if (ProfitableToMerge(CurMPIter->getBlock(), I->getBlock(), - minCommonTailLength, + MinCommonTailLength, CommonTailLen, TrialBBI1, TrialBBI2, SuccBB, PredBB, - FuncletMembership)) { + FuncletMembership, + AfterBlockPlacement)) { if (CommonTailLen > maxCommonTailLength) { SameTails.clear(); maxCommonTailLength = CommonTailLen; @@ -749,8 +699,6 @@ bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, SameTails[commonTailIndex].getTailStartPos(); MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock(); - // If the common tail includes any debug info we will take it pretty - // randomly from one of the inputs. Might be better to remove it? DEBUG(dbgs() << "\nSplitting BB#" << MBB->getNumber() << ", size " << maxCommonTailLength); @@ -832,14 +780,13 @@ mergeOperations(MachineBasicBlock::iterator MBBIStartPos, // branch to Succ added (but the predecessor/successor lists need no // adjustment). The lone predecessor of Succ that falls through into Succ, // if any, is given in PredBB. +// MinCommonTailLength - Except for the special cases below, tail-merge if +// there are at least this many instructions in common. bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB, - MachineBasicBlock *PredBB) { + MachineBasicBlock *PredBB, + unsigned MinCommonTailLength) { bool MadeChange = false; - // Except for the special cases below, tail-merge if there are at least - // this many instructions in common. - unsigned minCommonTailLength = TailMergeSize; - DEBUG(dbgs() << "\nTryTailMergeBlocks: "; for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i) dbgs() << "BB#" << MergePotentials[i].getBlock()->getNumber() @@ -852,8 +799,8 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB, << PredBB->getNumber() << "\n"; } dbgs() << "Looking for common tails of at least " - << minCommonTailLength << " instruction" - << (minCommonTailLength == 1 ? "" : "s") << '\n'; + << MinCommonTailLength << " instruction" + << (MinCommonTailLength == 1 ? "" : "s") << '\n'; ); // Sort by hash value so that blocks with identical end sequences sort @@ -867,10 +814,10 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB, // Build SameTails, identifying the set of blocks with this hash code // and with the maximum number of instructions in common. unsigned maxCommonTailLength = ComputeSameTails(CurHash, - minCommonTailLength, + MinCommonTailLength, SuccBB, PredBB); - // If we didn't find any pair that has at least minCommonTailLength + // If we didn't find any pair that has at least MinCommonTailLength // instructions in common, remove all blocks with this hash code and retry. if (SameTails.empty()) { RemoveBlocksWithHash(CurHash, SuccBB, PredBB); @@ -928,6 +875,11 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB, // Recompute common tail MBB's edge weights and block frequency. setCommonTailEdgeWeights(*MBB); + // Remove the original debug location from the common tail. + for (auto &MI : *MBB) + if (!MI.isDebugValue()) + MI.setDebugLoc(DebugLoc()); + // MBB is common tail. Adjust all other BB's to jump to this one. // Traversal must be forwards so erases work. DEBUG(dbgs() << "\nUsing common tail in BB#" << MBB->getNumber() @@ -976,7 +928,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { // See if we can do any tail merging on those. if (MergePotentials.size() >= 2) - MadeChange |= TryTailMergeBlocks(nullptr, nullptr); + MadeChange |= TryTailMergeBlocks(nullptr, nullptr, MinCommonTailLength); } // Look at blocks (IBB) with multiple predecessors (PBB). @@ -1056,7 +1008,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { // branch. SmallVector<MachineOperand, 4> NewCond(Cond); if (!Cond.empty() && TBB == IBB) { - if (TII->ReverseBranchCondition(NewCond)) + if (TII->reverseBranchCondition(NewCond)) continue; // This is the QBB case described above if (!FBB) { @@ -1092,10 +1044,10 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { // Remove the unconditional branch at the end, if any. if (TBB && (Cond.empty() || FBB)) { DebugLoc dl; // FIXME: this is nowhere - TII->RemoveBranch(*PBB); + TII->removeBranch(*PBB); if (!Cond.empty()) // reinsert conditional branch only, for now - TII->InsertBranch(*PBB, (TBB == IBB) ? FBB : TBB, nullptr, + TII->insertBranch(*PBB, (TBB == IBB) ? FBB : TBB, nullptr, NewCond, dl); } @@ -1110,7 +1062,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { TriedMerging.insert(MergePotentials[i].getBlock()); if (MergePotentials.size() >= 2) - MadeChange |= TryTailMergeBlocks(IBB, PredBB); + MadeChange |= TryTailMergeBlocks(IBB, PredBB, MinCommonTailLength); // Reinsert an unconditional branch if needed. The 1 below can occur as a // result of removing blocks in TryTailMergeBlocks. @@ -1311,10 +1263,10 @@ ReoptimizeBlock: // a fall-through. if (PriorTBB && PriorTBB == PriorFBB) { DebugLoc dl = getBranchDebugLoc(PrevBB); - TII->RemoveBranch(PrevBB); + TII->removeBranch(PrevBB); PriorCond.clear(); if (PriorTBB != MBB) - TII->InsertBranch(PrevBB, PriorTBB, nullptr, PriorCond, dl); + TII->insertBranch(PrevBB, PriorTBB, nullptr, PriorCond, dl); MadeChange = true; ++NumBranchOpts; goto ReoptimizeBlock; @@ -1359,7 +1311,7 @@ ReoptimizeBlock: // If the previous branch *only* branches to *this* block (conditional or // not) remove the branch. if (PriorTBB == MBB && !PriorFBB) { - TII->RemoveBranch(PrevBB); + TII->removeBranch(PrevBB); MadeChange = true; ++NumBranchOpts; goto ReoptimizeBlock; @@ -1369,8 +1321,8 @@ ReoptimizeBlock: // the condition is false, remove the uncond second branch. if (PriorFBB == MBB) { DebugLoc dl = getBranchDebugLoc(PrevBB); - TII->RemoveBranch(PrevBB); - TII->InsertBranch(PrevBB, PriorTBB, nullptr, PriorCond, dl); + TII->removeBranch(PrevBB); + TII->insertBranch(PrevBB, PriorTBB, nullptr, PriorCond, dl); MadeChange = true; ++NumBranchOpts; goto ReoptimizeBlock; @@ -1381,10 +1333,10 @@ ReoptimizeBlock: // fall-through. if (PriorTBB == MBB) { SmallVector<MachineOperand, 4> NewPriorCond(PriorCond); - if (!TII->ReverseBranchCondition(NewPriorCond)) { + if (!TII->reverseBranchCondition(NewPriorCond)) { DebugLoc dl = getBranchDebugLoc(PrevBB); - TII->RemoveBranch(PrevBB); - TII->InsertBranch(PrevBB, PriorFBB, nullptr, NewPriorCond, dl); + TII->removeBranch(PrevBB); + TII->insertBranch(PrevBB, PriorFBB, nullptr, NewPriorCond, dl); MadeChange = true; ++NumBranchOpts; goto ReoptimizeBlock; @@ -1416,13 +1368,13 @@ ReoptimizeBlock: if (DoTransform) { // Reverse the branch so we will fall through on the previous true cond. SmallVector<MachineOperand, 4> NewPriorCond(PriorCond); - if (!TII->ReverseBranchCondition(NewPriorCond)) { + if (!TII->reverseBranchCondition(NewPriorCond)) { DEBUG(dbgs() << "\nMoving MBB: " << *MBB << "To make fallthrough to: " << *PriorTBB << "\n"); DebugLoc dl = getBranchDebugLoc(PrevBB); - TII->RemoveBranch(PrevBB); - TII->InsertBranch(PrevBB, MBB, nullptr, NewPriorCond, dl); + TII->removeBranch(PrevBB); + TII->insertBranch(PrevBB, MBB, nullptr, NewPriorCond, dl); // Move this block to the end of the function. MBB->moveAfter(&MF.back()); @@ -1450,10 +1402,10 @@ ReoptimizeBlock: // Loop: xxx; jncc Loop; jmp Out if (CurTBB && CurFBB && CurFBB == MBB && CurTBB != MBB) { SmallVector<MachineOperand, 4> NewCond(CurCond); - if (!TII->ReverseBranchCondition(NewCond)) { + if (!TII->reverseBranchCondition(NewCond)) { DebugLoc dl = getBranchDebugLoc(*MBB); - TII->RemoveBranch(*MBB); - TII->InsertBranch(*MBB, CurFBB, CurTBB, NewCond, dl); + TII->removeBranch(*MBB); + TII->insertBranch(*MBB, CurFBB, CurTBB, NewCond, dl); MadeChange = true; ++NumBranchOpts; goto ReoptimizeBlock; @@ -1469,7 +1421,7 @@ ReoptimizeBlock: // This block may contain just an unconditional branch. Because there can // be 'non-branch terminators' in the block, try removing the branch and // then seeing if the block is empty. - TII->RemoveBranch(*MBB); + TII->removeBranch(*MBB); // If the only things remaining in the block are debug info, remove these // as well, so this will behave the same as an empty block in non-debug // mode. @@ -1500,8 +1452,8 @@ ReoptimizeBlock: PriorFBB = MBB; } DebugLoc pdl = getBranchDebugLoc(PrevBB); - TII->RemoveBranch(PrevBB); - TII->InsertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, pdl); + TII->removeBranch(PrevBB); + TII->insertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, pdl); } // Iterate through all the predecessors, revectoring each in-turn. @@ -1526,9 +1478,9 @@ ReoptimizeBlock: *PMBB, NewCurTBB, NewCurFBB, NewCurCond, true); if (!NewCurUnAnalyzable && NewCurTBB && NewCurTBB == NewCurFBB) { DebugLoc pdl = getBranchDebugLoc(*PMBB); - TII->RemoveBranch(*PMBB); + TII->removeBranch(*PMBB); NewCurCond.clear(); - TII->InsertBranch(*PMBB, NewCurTBB, nullptr, NewCurCond, pdl); + TII->insertBranch(*PMBB, NewCurTBB, nullptr, NewCurCond, pdl); MadeChange = true; ++NumBranchOpts; PMBB->CorrectExtraCFGEdges(NewCurTBB, nullptr, false); @@ -1548,7 +1500,7 @@ ReoptimizeBlock: } // Add the branch back if the block is more than just an uncond branch. - TII->InsertBranch(*MBB, CurTBB, nullptr, CurCond, dl); + TII->insertBranch(*MBB, CurTBB, nullptr, CurCond, dl); } } @@ -1585,7 +1537,7 @@ ReoptimizeBlock: if (CurFallsThru) { MachineBasicBlock *NextBB = &*std::next(MBB->getIterator()); CurCond.clear(); - TII->InsertBranch(*MBB, NextBB, nullptr, CurCond, DebugLoc()); + TII->insertBranch(*MBB, NextBB, nullptr, CurCond, DebugLoc()); } MBB->moveAfter(PredBB); MadeChange = true; @@ -1615,18 +1567,22 @@ ReoptimizeBlock: // Okay, there is no really great place to put this block. If, however, // the block before this one would be a fall-through if this block were - // removed, move this block to the end of the function. + // removed, move this block to the end of the function. There is no real + // advantage in "falling through" to an EH block, so we don't want to + // perform this transformation for that case. + // + // Also, Windows EH introduced the possibility of an arbitrary number of + // successors to a given block. The analyzeBranch call does not consider + // exception handling and so we can get in a state where a block + // containing a call is followed by multiple EH blocks that would be + // rotated infinitely at the end of the function if the transformation + // below were performed for EH "FallThrough" blocks. Therefore, even if + // that appears not to be happening anymore, we should assume that it is + // possible and not remove the "!FallThrough()->isEHPad" condition below. MachineBasicBlock *PrevTBB = nullptr, *PrevFBB = nullptr; SmallVector<MachineOperand, 4> PrevCond; - // We're looking for cases where PrevBB could possibly fall through to - // FallThrough, but if FallThrough is an EH pad that wouldn't be useful - // so here we skip over any EH pads so we might have a chance to find - // a branch target from PrevBB. - while (FallThrough != MF.end() && FallThrough->isEHPad()) - ++FallThrough; - // Now check to see if the current block is sitting between PrevBB and - // a block to which it could fall through. if (FallThrough != MF.end() && + !FallThrough->isEHPad() && !TII->analyzeBranch(PrevBB, PrevTBB, PrevFBB, PrevCond, true) && PrevBB.isSuccessor(&*FallThrough)) { MBB->moveAfter(&MF.back()); @@ -1720,10 +1676,8 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, // The terminator is probably a conditional branch, try not to separate the // branch from condition setting instruction. - MachineBasicBlock::iterator PI = Loc; - --PI; - while (PI != MBB->begin() && PI->isDebugValue()) - --PI; + MachineBasicBlock::iterator PI = + skipDebugInstructionsBackward(std::prev(Loc), MBB->begin()); bool IsDef = false; for (const MachineOperand &MO : PI->operands()) { @@ -1817,18 +1771,11 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { MachineBasicBlock::iterator FIE = FBB->end(); while (TIB != TIE && FIB != FIE) { // Skip dbg_value instructions. These do not count. - if (TIB->isDebugValue()) { - while (TIB != TIE && TIB->isDebugValue()) - ++TIB; - if (TIB == TIE) - break; - } - if (FIB->isDebugValue()) { - while (FIB != FIE && FIB->isDebugValue()) - ++FIB; - if (FIB == FIE) - break; - } + TIB = skipDebugInstructionsForward(TIB, TIE); + FIB = skipDebugInstructionsForward(FIB, FIE); + if (TIB == TIE || FIB == FIE) + break; + if (!TIB->isIdenticalTo(*FIB, MachineInstr::CheckKillDead)) break; @@ -1929,14 +1876,21 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { FBB->erase(FBB->begin(), FIB); // Update livein's. + bool AddedLiveIns = false; for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) { unsigned Def = LocalDefs[i]; if (LocalDefsSet.count(Def)) { TBB->addLiveIn(Def); FBB->addLiveIn(Def); + AddedLiveIns = true; } } + if (AddedLiveIns) { + TBB->sortUniqueLiveIns(); + FBB->sortUniqueLiveIns(); + } + ++NumHoist; return true; } diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.h b/contrib/llvm/lib/CodeGen/BranchFolding.h index 36a5a2e..fc48e48 100644 --- a/contrib/llvm/lib/CodeGen/BranchFolding.h +++ b/contrib/llvm/lib/CodeGen/BranchFolding.h @@ -29,9 +29,13 @@ namespace llvm { public: class MBFIWrapper; - explicit BranchFolder(bool defaultEnableTailMerge, bool CommonHoist, + explicit BranchFolder(bool defaultEnableTailMerge, + bool CommonHoist, MBFIWrapper &MBFI, - const MachineBranchProbabilityInfo &MBPI); + const MachineBranchProbabilityInfo &MBPI, + // Min tail length to merge. Defaults to commandline + // flag. Ignored for optsize. + unsigned MinCommonTailLength = 0); bool OptimizeFunction(MachineFunction &MF, const TargetInstrInfo *tii, const TargetRegisterInfo *tri, MachineModuleInfo *mmi, @@ -99,6 +103,7 @@ namespace llvm { bool EnableTailMerge; bool EnableHoistCommonCode; bool UpdateLiveIns; + unsigned MinCommonTailLength; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; MachineModuleInfo *MMI; @@ -129,9 +134,9 @@ namespace llvm { bool TailMergeBlocks(MachineFunction &MF); bool TryTailMergeBlocks(MachineBasicBlock* SuccBB, - MachineBasicBlock* PredBB); + MachineBasicBlock* PredBB, + unsigned MinCommonTailLength); void setCommonTailEdgeWeights(MachineBasicBlock &TailMBB); - void computeLiveIns(MachineBasicBlock &MBB); void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, MachineBasicBlock *NewDest); MachineBasicBlock *SplitMBBAt(MachineBasicBlock &CurMBB, @@ -150,7 +155,6 @@ namespace llvm { bool OptimizeBranches(MachineFunction &MF); bool OptimizeBlock(MachineBasicBlock *MBB); void RemoveDeadBlock(MachineBasicBlock *MBB); - bool OptimizeImpDefsBlock(MachineBasicBlock *MBB); bool HoistCommonCode(MachineFunction &MF); bool HoistCommonCodeInSuccs(MachineBasicBlock *MBB); diff --git a/contrib/llvm/lib/CodeGen/BranchRelaxation.cpp b/contrib/llvm/lib/CodeGen/BranchRelaxation.cpp new file mode 100644 index 0000000..8b27570 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/BranchRelaxation.cpp @@ -0,0 +1,510 @@ +//===-- BranchRelaxation.cpp ----------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +#define DEBUG_TYPE "branch-relaxation" + +STATISTIC(NumSplit, "Number of basic blocks split"); +STATISTIC(NumConditionalRelaxed, "Number of conditional branches relaxed"); +STATISTIC(NumUnconditionalRelaxed, "Number of unconditional branches relaxed"); + +#define BRANCH_RELAX_NAME "Branch relaxation pass" + +namespace { +class BranchRelaxation : public MachineFunctionPass { + /// BasicBlockInfo - Information about the offset and size of a single + /// basic block. + struct BasicBlockInfo { + /// Offset - Distance from the beginning of the function to the beginning + /// of this basic block. + /// + /// The offset is always aligned as required by the basic block. + unsigned Offset; + + /// Size - Size of the basic block in bytes. If the block contains + /// inline assembly, this is a worst case estimate. + /// + /// The size does not include any alignment padding whether from the + /// beginning of the block, or from an aligned jump table at the end. + unsigned Size; + + BasicBlockInfo() : Offset(0), Size(0) {} + + /// Compute the offset immediately following this block. \p MBB is the next + /// block. + unsigned postOffset(const MachineBasicBlock &MBB) const { + unsigned PO = Offset + Size; + unsigned Align = MBB.getAlignment(); + if (Align == 0) + return PO; + + unsigned AlignAmt = 1 << Align; + unsigned ParentAlign = MBB.getParent()->getAlignment(); + if (Align <= ParentAlign) + return PO + OffsetToAlignment(PO, AlignAmt); + + // The alignment of this MBB is larger than the function's alignment, so we + // can't tell whether or not it will insert nops. Assume that it will. + return PO + AlignAmt + OffsetToAlignment(PO, AlignAmt); + } + }; + + SmallVector<BasicBlockInfo, 16> BlockInfo; + std::unique_ptr<RegScavenger> RS; + LivePhysRegs LiveRegs; + + MachineFunction *MF; + const TargetRegisterInfo *TRI; + const TargetInstrInfo *TII; + + bool relaxBranchInstructions(); + void scanFunction(); + + MachineBasicBlock *createNewBlockAfter(MachineBasicBlock &BB); + + MachineBasicBlock *splitBlockBeforeInstr(MachineInstr &MI, + MachineBasicBlock *DestBB); + void adjustBlockOffsets(MachineBasicBlock &MBB); + bool isBlockInRange(const MachineInstr &MI, const MachineBasicBlock &BB) const; + + bool fixupConditionalBranch(MachineInstr &MI); + bool fixupUnconditionalBranch(MachineInstr &MI); + uint64_t computeBlockSize(const MachineBasicBlock &MBB) const; + unsigned getInstrOffset(const MachineInstr &MI) const; + void dumpBBs(); + void verify(); + +public: + static char ID; + BranchRelaxation() : MachineFunctionPass(ID) { } + + bool runOnMachineFunction(MachineFunction &MF) override; + + StringRef getPassName() const override { + return BRANCH_RELAX_NAME; + } +}; + +} + +char BranchRelaxation::ID = 0; +char &llvm::BranchRelaxationPassID = BranchRelaxation::ID; + +INITIALIZE_PASS(BranchRelaxation, DEBUG_TYPE, BRANCH_RELAX_NAME, false, false) + +/// verify - check BBOffsets, BBSizes, alignment of islands +void BranchRelaxation::verify() { +#ifndef NDEBUG + unsigned PrevNum = MF->begin()->getNumber(); + for (MachineBasicBlock &MBB : *MF) { + unsigned Align = MBB.getAlignment(); + unsigned Num = MBB.getNumber(); + assert(BlockInfo[Num].Offset % (1u << Align) == 0); + assert(!Num || BlockInfo[PrevNum].postOffset(MBB) <= BlockInfo[Num].Offset); + assert(BlockInfo[Num].Size == computeBlockSize(MBB)); + PrevNum = Num; + } +#endif +} + +/// print block size and offset information - debugging +void BranchRelaxation::dumpBBs() { + for (auto &MBB : *MF) { + const BasicBlockInfo &BBI = BlockInfo[MBB.getNumber()]; + dbgs() << format("BB#%u\toffset=%08x\t", MBB.getNumber(), BBI.Offset) + << format("size=%#x\n", BBI.Size); + } +} + +/// scanFunction - Do the initial scan of the function, building up +/// information about each block. +void BranchRelaxation::scanFunction() { + BlockInfo.clear(); + BlockInfo.resize(MF->getNumBlockIDs()); + + // First thing, compute the size of all basic blocks, and see if the function + // has any inline assembly in it. If so, we have to be conservative about + // alignment assumptions, as we don't know for sure the size of any + // instructions in the inline assembly. + for (MachineBasicBlock &MBB : *MF) + BlockInfo[MBB.getNumber()].Size = computeBlockSize(MBB); + + // Compute block offsets and known bits. + adjustBlockOffsets(*MF->begin()); +} + +/// computeBlockSize - Compute the size for MBB. +uint64_t BranchRelaxation::computeBlockSize(const MachineBasicBlock &MBB) const { + uint64_t Size = 0; + for (const MachineInstr &MI : MBB) + Size += TII->getInstSizeInBytes(MI); + return Size; +} + +/// getInstrOffset - Return the current offset of the specified machine +/// instruction from the start of the function. This offset changes as stuff is +/// moved around inside the function. +unsigned BranchRelaxation::getInstrOffset(const MachineInstr &MI) const { + const MachineBasicBlock *MBB = MI.getParent(); + + // The offset is composed of two things: the sum of the sizes of all MBB's + // before this instruction's block, and the offset from the start of the block + // it is in. + unsigned Offset = BlockInfo[MBB->getNumber()].Offset; + + // Sum instructions before MI in MBB. + for (MachineBasicBlock::const_iterator I = MBB->begin(); &*I != &MI; ++I) { + assert(I != MBB->end() && "Didn't find MI in its own basic block?"); + Offset += TII->getInstSizeInBytes(*I); + } + + return Offset; +} + +void BranchRelaxation::adjustBlockOffsets(MachineBasicBlock &Start) { + unsigned PrevNum = Start.getNumber(); + for (auto &MBB : make_range(MachineFunction::iterator(Start), MF->end())) { + unsigned Num = MBB.getNumber(); + if (!Num) // block zero is never changed from offset zero. + continue; + // Get the offset and known bits at the end of the layout predecessor. + // Include the alignment of the current block. + BlockInfo[Num].Offset = BlockInfo[PrevNum].postOffset(MBB); + + PrevNum = Num; + } +} + + /// Insert a new empty basic block and insert it after \BB +MachineBasicBlock *BranchRelaxation::createNewBlockAfter(MachineBasicBlock &BB) { + // Create a new MBB for the code after the OrigBB. + MachineBasicBlock *NewBB = + MF->CreateMachineBasicBlock(BB.getBasicBlock()); + MF->insert(++BB.getIterator(), NewBB); + + // Insert an entry into BlockInfo to align it properly with the block numbers. + BlockInfo.insert(BlockInfo.begin() + NewBB->getNumber(), BasicBlockInfo()); + + return NewBB; +} + +/// Split the basic block containing MI into two blocks, which are joined by +/// an unconditional branch. Update data structures and renumber blocks to +/// account for this change and returns the newly created block. +MachineBasicBlock *BranchRelaxation::splitBlockBeforeInstr(MachineInstr &MI, + MachineBasicBlock *DestBB) { + MachineBasicBlock *OrigBB = MI.getParent(); + + // Create a new MBB for the code after the OrigBB. + MachineBasicBlock *NewBB = + MF->CreateMachineBasicBlock(OrigBB->getBasicBlock()); + MF->insert(++OrigBB->getIterator(), NewBB); + + // Splice the instructions starting with MI over to NewBB. + NewBB->splice(NewBB->end(), OrigBB, MI.getIterator(), OrigBB->end()); + + // Add an unconditional branch from OrigBB to NewBB. + // Note the new unconditional branch is not being recorded. + // There doesn't seem to be meaningful DebugInfo available; this doesn't + // correspond to anything in the source. + TII->insertUnconditionalBranch(*OrigBB, NewBB, DebugLoc()); + + // Insert an entry into BlockInfo to align it properly with the block numbers. + BlockInfo.insert(BlockInfo.begin() + NewBB->getNumber(), BasicBlockInfo()); + + + NewBB->transferSuccessors(OrigBB); + OrigBB->addSuccessor(NewBB); + OrigBB->addSuccessor(DestBB); + + // Cleanup potential unconditional branch to successor block. + // Note that updateTerminator may change the size of the blocks. + NewBB->updateTerminator(); + OrigBB->updateTerminator(); + + // Figure out how large the OrigBB is. As the first half of the original + // block, it cannot contain a tablejump. The size includes + // the new jump we added. (It should be possible to do this without + // recounting everything, but it's very confusing, and this is rarely + // executed.) + BlockInfo[OrigBB->getNumber()].Size = computeBlockSize(*OrigBB); + + // Figure out how large the NewMBB is. As the second half of the original + // block, it may contain a tablejump. + BlockInfo[NewBB->getNumber()].Size = computeBlockSize(*NewBB); + + // All BBOffsets following these blocks must be modified. + adjustBlockOffsets(*OrigBB); + + // Need to fix live-in lists if we track liveness. + if (TRI->trackLivenessAfterRegAlloc(*MF)) + computeLiveIns(LiveRegs, *TRI, *NewBB); + + ++NumSplit; + + return NewBB; +} + +/// isBlockInRange - Returns true if the distance between specific MI and +/// specific BB can fit in MI's displacement field. +bool BranchRelaxation::isBlockInRange( + const MachineInstr &MI, const MachineBasicBlock &DestBB) const { + int64_t BrOffset = getInstrOffset(MI); + int64_t DestOffset = BlockInfo[DestBB.getNumber()].Offset; + + if (TII->isBranchOffsetInRange(MI.getOpcode(), DestOffset - BrOffset)) + return true; + + DEBUG( + dbgs() << "Out of range branch to destination BB#" << DestBB.getNumber() + << " from BB#" << MI.getParent()->getNumber() + << " to " << DestOffset + << " offset " << DestOffset - BrOffset + << '\t' << MI + ); + + return false; +} + +/// fixupConditionalBranch - Fix up a conditional branch whose destination is +/// too far away to fit in its displacement field. It is converted to an inverse +/// conditional branch + an unconditional branch to the destination. +bool BranchRelaxation::fixupConditionalBranch(MachineInstr &MI) { + DebugLoc DL = MI.getDebugLoc(); + MachineBasicBlock *MBB = MI.getParent(); + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; + SmallVector<MachineOperand, 4> Cond; + + bool Fail = TII->analyzeBranch(*MBB, TBB, FBB, Cond); + assert(!Fail && "branches to be relaxed must be analyzable"); + (void)Fail; + + // Add an unconditional branch to the destination and invert the branch + // condition to jump over it: + // tbz L1 + // => + // tbnz L2 + // b L1 + // L2: + + if (FBB && isBlockInRange(MI, *FBB)) { + // Last MI in the BB is an unconditional branch. We can simply invert the + // condition and swap destinations: + // beq L1 + // b L2 + // => + // bne L2 + // b L1 + DEBUG(dbgs() << " Invert condition and swap " + "its destination with " << MBB->back()); + + TII->reverseBranchCondition(Cond); + int OldSize = 0, NewSize = 0; + TII->removeBranch(*MBB, &OldSize); + TII->insertBranch(*MBB, FBB, TBB, Cond, DL, &NewSize); + + BlockInfo[MBB->getNumber()].Size += (NewSize - OldSize); + return true; + } else if (FBB) { + // We need to split the basic block here to obtain two long-range + // unconditional branches. + auto &NewBB = *MF->CreateMachineBasicBlock(MBB->getBasicBlock()); + MF->insert(++MBB->getIterator(), &NewBB); + + // Insert an entry into BlockInfo to align it properly with the block + // numbers. + BlockInfo.insert(BlockInfo.begin() + NewBB.getNumber(), BasicBlockInfo()); + + unsigned &NewBBSize = BlockInfo[NewBB.getNumber()].Size; + int NewBrSize; + TII->insertUnconditionalBranch(NewBB, FBB, DL, &NewBrSize); + NewBBSize += NewBrSize; + + // Update the successor lists according to the transformation to follow. + // Do it here since if there's no split, no update is needed. + MBB->replaceSuccessor(FBB, &NewBB); + NewBB.addSuccessor(FBB); + } + + // We now have an appropriate fall-through block in place (either naturally or + // just created), so we can invert the condition. + MachineBasicBlock &NextBB = *std::next(MachineFunction::iterator(MBB)); + + DEBUG(dbgs() << " Insert B to BB#" << TBB->getNumber() + << ", invert condition and change dest. to BB#" + << NextBB.getNumber() << '\n'); + + unsigned &MBBSize = BlockInfo[MBB->getNumber()].Size; + + // Insert a new conditional branch and a new unconditional branch. + int RemovedSize = 0; + TII->reverseBranchCondition(Cond); + TII->removeBranch(*MBB, &RemovedSize); + MBBSize -= RemovedSize; + + int AddedSize = 0; + TII->insertBranch(*MBB, &NextBB, TBB, Cond, DL, &AddedSize); + MBBSize += AddedSize; + + // Finally, keep the block offsets up to date. + adjustBlockOffsets(*MBB); + return true; +} + +bool BranchRelaxation::fixupUnconditionalBranch(MachineInstr &MI) { + MachineBasicBlock *MBB = MI.getParent(); + + unsigned OldBrSize = TII->getInstSizeInBytes(MI); + MachineBasicBlock *DestBB = TII->getBranchDestBlock(MI); + + int64_t DestOffset = BlockInfo[DestBB->getNumber()].Offset; + int64_t SrcOffset = getInstrOffset(MI); + + assert(!TII->isBranchOffsetInRange(MI.getOpcode(), DestOffset - SrcOffset)); + + BlockInfo[MBB->getNumber()].Size -= OldBrSize; + + MachineBasicBlock *BranchBB = MBB; + + // If this was an expanded conditional branch, there is already a single + // unconditional branch in a block. + if (!MBB->empty()) { + BranchBB = createNewBlockAfter(*MBB); + + // Add live outs. + for (const MachineBasicBlock *Succ : MBB->successors()) { + for (const MachineBasicBlock::RegisterMaskPair &LiveIn : Succ->liveins()) + BranchBB->addLiveIn(LiveIn); + } + + BranchBB->sortUniqueLiveIns(); + BranchBB->addSuccessor(DestBB); + MBB->replaceSuccessor(DestBB, BranchBB); + } + + DebugLoc DL = MI.getDebugLoc(); + MI.eraseFromParent(); + BlockInfo[BranchBB->getNumber()].Size += TII->insertIndirectBranch( + *BranchBB, *DestBB, DL, DestOffset - SrcOffset, RS.get()); + + adjustBlockOffsets(*MBB); + return true; +} + +bool BranchRelaxation::relaxBranchInstructions() { + bool Changed = false; + + // Relaxing branches involves creating new basic blocks, so re-eval + // end() for termination. + for (MachineFunction::iterator I = MF->begin(); I != MF->end(); ++I) { + MachineBasicBlock &MBB = *I; + + // Empty block? + MachineBasicBlock::iterator Last = MBB.getLastNonDebugInstr(); + if (Last == MBB.end()) + continue; + + // Expand the unconditional branch first if necessary. If there is a + // conditional branch, this will end up changing the branch destination of + // it to be over the newly inserted indirect branch block, which may avoid + // the need to try expanding the conditional branch first, saving an extra + // jump. + if (Last->isUnconditionalBranch()) { + // Unconditional branch destination might be unanalyzable, assume these + // are OK. + if (MachineBasicBlock *DestBB = TII->getBranchDestBlock(*Last)) { + if (!isBlockInRange(*Last, *DestBB)) { + fixupUnconditionalBranch(*Last); + ++NumUnconditionalRelaxed; + Changed = true; + } + } + } + + // Loop over the conditional branches. + MachineBasicBlock::iterator Next; + for (MachineBasicBlock::iterator J = MBB.getFirstTerminator(); + J != MBB.end(); J = Next) { + Next = std::next(J); + MachineInstr &MI = *J; + + if (MI.isConditionalBranch()) { + MachineBasicBlock *DestBB = TII->getBranchDestBlock(MI); + if (!isBlockInRange(MI, *DestBB)) { + if (Next != MBB.end() && Next->isConditionalBranch()) { + // If there are multiple conditional branches, this isn't an + // analyzable block. Split later terminators into a new block so + // each one will be analyzable. + + splitBlockBeforeInstr(*Next, DestBB); + } else { + fixupConditionalBranch(MI); + ++NumConditionalRelaxed; + } + + Changed = true; + + // This may have modified all of the terminators, so start over. + Next = MBB.getFirstTerminator(); + } + } + } + } + + return Changed; +} + +bool BranchRelaxation::runOnMachineFunction(MachineFunction &mf) { + MF = &mf; + + DEBUG(dbgs() << "***** BranchRelaxation *****\n"); + + const TargetSubtargetInfo &ST = MF->getSubtarget(); + TII = ST.getInstrInfo(); + + TRI = ST.getRegisterInfo(); + if (TRI->trackLivenessAfterRegAlloc(*MF)) + RS.reset(new RegScavenger()); + + // Renumber all of the machine basic blocks in the function, guaranteeing that + // the numbers agree with the position of the block in the function. + MF->RenumberBlocks(); + + // Do the initial scan of the function, building up information about the + // sizes of each block. + scanFunction(); + + DEBUG(dbgs() << " Basic blocks before relaxation\n"; dumpBBs();); + + bool MadeChange = false; + while (relaxBranchInstructions()) + MadeChange = true; + + // After a while, this might be made debug-only, but it is not expensive. + verify(); + + DEBUG(dbgs() << " Basic blocks after relaxation\n\n"; dumpBBs()); + + BlockInfo.clear(); + + return MadeChange; +} diff --git a/contrib/llvm/lib/CodeGen/CallingConvLower.cpp b/contrib/llvm/lib/CodeGen/CallingConvLower.cpp index 7d67bcf..2e33f14 100644 --- a/contrib/llvm/lib/CodeGen/CallingConvLower.cpp +++ b/contrib/llvm/lib/CodeGen/CallingConvLower.cpp @@ -23,6 +23,8 @@ #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include <algorithm> + using namespace llvm; CCState::CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &mf, @@ -64,6 +66,22 @@ void CCState::MarkAllocated(unsigned Reg) { UsedRegs[*AI/32] |= 1 << (*AI&31); } +bool CCState::IsShadowAllocatedReg(unsigned Reg) const { + if (!isAllocated(Reg)) + return false; + + for (auto const &ValAssign : Locs) { + if (ValAssign.isRegLoc()) { + for (MCRegAliasIterator AI(ValAssign.getLocReg(), &TRI, true); + AI.isValid(); ++AI) { + if (*AI == Reg) + return false; + } + } + } + return true; +} + /// Analyze an array of argument values, /// incorporating info about the formals into this state. void diff --git a/contrib/llvm/lib/CodeGen/CodeGen.cpp b/contrib/llvm/lib/CodeGen/CodeGen.cpp index 6679819..4cf9b13 100644 --- a/contrib/llvm/lib/CodeGen/CodeGen.cpp +++ b/contrib/llvm/lib/CodeGen/CodeGen.cpp @@ -22,7 +22,9 @@ using namespace llvm; void llvm::initializeCodeGen(PassRegistry &Registry) { initializeAtomicExpandPass(Registry); initializeBranchFolderPassPass(Registry); + initializeBranchRelaxationPass(Registry); initializeCodeGenPreparePass(Registry); + initializeCountingFunctionInserterPass(Registry); initializeDeadMachineInstructionElimPass(Registry); initializeDetectDeadLanesPass(Registry); initializeDwarfEHPreparePass(Registry); @@ -53,6 +55,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeMachineLICMPass(Registry); initializeMachineLoopInfoPass(Registry); initializeMachineModuleInfoPass(Registry); + initializeMachinePipelinerPass(Registry); initializeMachinePostDominatorTreePass(Registry); initializeMachineSchedulerPass(Registry); initializeMachineSinkingPass(Registry); @@ -68,6 +71,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializePostRASchedulerPass(Registry); initializePreISelIntrinsicLoweringLegacyPassPass(Registry); initializeProcessImplicitDefsPass(Registry); + initializeRAGreedyPass(Registry); initializeRegisterCoalescerPass(Registry); initializeRenameIndependentSubregsPass(Registry); initializeShrinkWrapPass(Registry); diff --git a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp index ede4041..934b470 100644 --- a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -17,12 +17,16 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/CodeGen/Analysis.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -118,6 +122,19 @@ static cl::opt<bool> DisablePreheaderProtect( "disable-preheader-prot", cl::Hidden, cl::init(false), cl::desc("Disable protection against removing loop preheaders")); +static cl::opt<bool> ProfileGuidedSectionPrefix( + "profile-guided-section-prefix", cl::Hidden, cl::init(true), + cl::desc("Use profile info to add section prefix for hot/cold functions")); + +static cl::opt<unsigned> FreqRatioToSkipMerge( + "cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2), + cl::desc("Skip merging empty blocks if (frequency of empty block) / " + "(frequency of destination block) is greater than this ratio")); + +static cl::opt<bool> ForceSplitStore( + "force-split-store", cl::Hidden, cl::init(false), + cl::desc("Force store splitting no matter what the target query says.")); + namespace { typedef SmallPtrSet<Instruction *, 16> SetOfInstrs; typedef PointerIntPair<Type *, 1, bool> TypeIsSExt; @@ -130,6 +147,8 @@ class TypePromotionTransaction; const TargetTransformInfo *TTI; const TargetLibraryInfo *TLInfo; const LoopInfo *LI; + std::unique_ptr<BlockFrequencyInfo> BFI; + std::unique_ptr<BranchProbabilityInfo> BPI; /// As we scan instructions optimizing them, this is the next instruction /// to optimize. Transforms that can invalidate this should update it. @@ -163,10 +182,11 @@ class TypePromotionTransaction; } bool runOnFunction(Function &F) override; - const char *getPassName() const override { return "CodeGen Prepare"; } + StringRef getPassName() const override { return "CodeGen Prepare"; } void getAnalysisUsage(AnalysisUsage &AU) const override { // FIXME: When we can selectively preserve passes, preserve the domtree. + AU.addRequired<ProfileSummaryInfoWrapperPass>(); AU.addRequired<TargetLibraryInfoWrapperPass>(); AU.addRequired<TargetTransformInfoWrapperPass>(); AU.addRequired<LoopInfoWrapperPass>(); @@ -175,8 +195,11 @@ class TypePromotionTransaction; private: bool eliminateFallThrough(Function &F); bool eliminateMostlyEmptyBlocks(Function &F); + BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB); bool canMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const; void eliminateMostlyEmptyBlock(BasicBlock *BB); + bool isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB, + bool isPreheader); bool optimizeBlock(BasicBlock &BB, bool& ModifiedDT); bool optimizeInst(Instruction *I, bool& ModifiedDT); bool optimizeMemoryInst(Instruction *I, Value *Addr, @@ -199,13 +222,15 @@ class TypePromotionTransaction; unsigned CreatedInstCost); bool splitBranchCondition(Function &F); bool simplifyOffsetableRelocate(Instruction &I); - void stripInvariantGroupMetadata(Instruction &I); }; } char CodeGenPrepare::ID = 0; -INITIALIZE_TM_PASS(CodeGenPrepare, "codegenprepare", - "Optimize for code generation", false, false) +INITIALIZE_TM_PASS_BEGIN(CodeGenPrepare, "codegenprepare", + "Optimize for code generation", false, false) +INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) +INITIALIZE_TM_PASS_END(CodeGenPrepare, "codegenprepare", + "Optimize for code generation", false, false) FunctionPass *llvm::createCodeGenPreparePass(const TargetMachine *TM) { return new CodeGenPrepare(TM); @@ -221,6 +246,8 @@ bool CodeGenPrepare::runOnFunction(Function &F) { // Clear per function information. InsertedInsts.clear(); PromotedInsts.clear(); + BFI.reset(); + BPI.reset(); ModifiedDT = false; if (TM) @@ -230,6 +257,15 @@ bool CodeGenPrepare::runOnFunction(Function &F) { LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); OptSize = F.optForSize(); + if (ProfileGuidedSectionPrefix) { + ProfileSummaryInfo *PSI = + getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); + if (PSI->isFunctionEntryHot(&F)) + F.setSectionPrefix(".hot"); + else if (PSI->isFunctionEntryCold(&F)) + F.setSectionPrefix(".cold"); + } + /// This optimization identifies DIV instructions that can be /// profitably bypassed and carried out with a shorter, faster divide. if (!OptSize && TLI && TLI->isSlowDivBypassed()) { @@ -364,6 +400,38 @@ bool CodeGenPrepare::eliminateFallThrough(Function &F) { return Changed; } +/// Find a destination block from BB if BB is mergeable empty block. +BasicBlock *CodeGenPrepare::findDestBlockOfMergeableEmptyBlock(BasicBlock *BB) { + // If this block doesn't end with an uncond branch, ignore it. + BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()); + if (!BI || !BI->isUnconditional()) + return nullptr; + + // If the instruction before the branch (skipping debug info) isn't a phi + // node, then other stuff is happening here. + BasicBlock::iterator BBI = BI->getIterator(); + if (BBI != BB->begin()) { + --BBI; + while (isa<DbgInfoIntrinsic>(BBI)) { + if (BBI == BB->begin()) + break; + --BBI; + } + if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI)) + return nullptr; + } + + // Do not break infinite loops. + BasicBlock *DestBB = BI->getSuccessor(0); + if (DestBB == BB) + return nullptr; + + if (!canMergeBlocks(BB, DestBB)) + DestBB = nullptr; + + return DestBB; +} + /// Eliminate blocks that contain only PHI nodes, debug info directives, and an /// unconditional branch. Passes before isel (e.g. LSR/loopsimplify) often split /// edges in ways that are non-optimal for isel. Start by eliminating these @@ -382,46 +450,106 @@ bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) { // Note that this intentionally skips the entry block. for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) { BasicBlock *BB = &*I++; + BasicBlock *DestBB = findDestBlockOfMergeableEmptyBlock(BB); + if (!DestBB || + !isMergingEmptyBlockProfitable(BB, DestBB, Preheaders.count(BB))) + continue; + + eliminateMostlyEmptyBlock(BB); + MadeChange = true; + } + return MadeChange; +} + +bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB, + BasicBlock *DestBB, + bool isPreheader) { + // Do not delete loop preheaders if doing so would create a critical edge. + // Loop preheaders can be good locations to spill registers. If the + // preheader is deleted and we create a critical edge, registers may be + // spilled in the loop body instead. + if (!DisablePreheaderProtect && isPreheader && + !(BB->getSinglePredecessor() && + BB->getSinglePredecessor()->getSingleSuccessor())) + return false; + + // Try to skip merging if the unique predecessor of BB is terminated by a + // switch or indirect branch instruction, and BB is used as an incoming block + // of PHIs in DestBB. In such case, merging BB and DestBB would cause ISel to + // add COPY instructions in the predecessor of BB instead of BB (if it is not + // merged). Note that the critical edge created by merging such blocks wont be + // split in MachineSink because the jump table is not analyzable. By keeping + // such empty block (BB), ISel will place COPY instructions in BB, not in the + // predecessor of BB. + BasicBlock *Pred = BB->getUniquePredecessor(); + if (!Pred || + !(isa<SwitchInst>(Pred->getTerminator()) || + isa<IndirectBrInst>(Pred->getTerminator()))) + return true; + + if (BB->getTerminator() != BB->getFirstNonPHI()) + return true; + + // We use a simple cost heuristic which determine skipping merging is + // profitable if the cost of skipping merging is less than the cost of + // merging : Cost(skipping merging) < Cost(merging BB), where the + // Cost(skipping merging) is Freq(BB) * (Cost(Copy) + Cost(Branch)), and + // the Cost(merging BB) is Freq(Pred) * Cost(Copy). + // Assuming Cost(Copy) == Cost(Branch), we could simplify it to : + // Freq(Pred) / Freq(BB) > 2. + // Note that if there are multiple empty blocks sharing the same incoming + // value for the PHIs in the DestBB, we consider them together. In such + // case, Cost(merging BB) will be the sum of their frequencies. + + if (!isa<PHINode>(DestBB->begin())) + return true; - // If this block doesn't end with an uncond branch, ignore it. - BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()); - if (!BI || !BI->isUnconditional()) + SmallPtrSet<BasicBlock *, 16> SameIncomingValueBBs; + + // Find all other incoming blocks from which incoming values of all PHIs in + // DestBB are the same as the ones from BB. + for (pred_iterator PI = pred_begin(DestBB), E = pred_end(DestBB); PI != E; + ++PI) { + BasicBlock *DestBBPred = *PI; + if (DestBBPred == BB) continue; - // If the instruction before the branch (skipping debug info) isn't a phi - // node, then other stuff is happening here. - BasicBlock::iterator BBI = BI->getIterator(); - if (BBI != BB->begin()) { - --BBI; - while (isa<DbgInfoIntrinsic>(BBI)) { - if (BBI == BB->begin()) - break; - --BBI; + bool HasAllSameValue = true; + BasicBlock::const_iterator DestBBI = DestBB->begin(); + while (const PHINode *DestPN = dyn_cast<PHINode>(DestBBI++)) { + if (DestPN->getIncomingValueForBlock(BB) != + DestPN->getIncomingValueForBlock(DestBBPred)) { + HasAllSameValue = false; + break; } - if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI)) - continue; } + if (HasAllSameValue) + SameIncomingValueBBs.insert(DestBBPred); + } - // Do not break infinite loops. - BasicBlock *DestBB = BI->getSuccessor(0); - if (DestBB == BB) - continue; + // See if all BB's incoming values are same as the value from Pred. In this + // case, no reason to skip merging because COPYs are expected to be place in + // Pred already. + if (SameIncomingValueBBs.count(Pred)) + return true; - if (!canMergeBlocks(BB, DestBB)) - continue; + if (!BFI) { + Function &F = *BB->getParent(); + LoopInfo LI{DominatorTree(F)}; + BPI.reset(new BranchProbabilityInfo(F, LI)); + BFI.reset(new BlockFrequencyInfo(F, *BPI, LI)); + } - // Do not delete loop preheaders if doing so would create a critical edge. - // Loop preheaders can be good locations to spill registers. If the - // preheader is deleted and we create a critical edge, registers may be - // spilled in the loop body instead. - if (!DisablePreheaderProtect && Preheaders.count(BB) && - !(BB->getSinglePredecessor() && BB->getSinglePredecessor()->getSingleSuccessor())) - continue; + BlockFrequency PredFreq = BFI->getBlockFreq(Pred); + BlockFrequency BBFreq = BFI->getBlockFreq(BB); - eliminateMostlyEmptyBlock(BB); - MadeChange = true; - } - return MadeChange; + for (auto SameValueBB : SameIncomingValueBBs) + if (SameValueBB->getUniquePredecessor() == Pred && + DestBB == findDestBlockOfMergeableEmptyBlock(SameValueBB)) + BBFreq += BFI->getBlockFreq(SameValueBB); + + return PredFreq.getFrequency() <= + BBFreq.getFrequency() * FreqRatioToSkipMerge; } /// Return true if we can merge BB into DestBB if there is a single @@ -805,6 +933,14 @@ static bool SinkCast(CastInst *CI) { /// static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI, const DataLayout &DL) { + // Sink only "cheap" (or nop) address-space casts. This is a weaker condition + // than sinking only nop casts, but is helpful on some platforms. + if (auto *ASC = dyn_cast<AddrSpaceCastInst>(CI)) { + if (!TLI.isCheapAddrSpaceCast(ASC->getSrcAddressSpace(), + ASC->getDestAddressSpace())) + return false; + } + // If this is a noop copy, EVT SrcVT = TLI.getValueType(DL, CI->getOperand(0)->getType()); EVT DstVT = TLI.getValueType(DL, CI->getType()); @@ -925,6 +1061,8 @@ static bool SinkCmpExpression(CmpInst *CI, const TargetLowering *TLI) { InsertedCmp = CmpInst::Create(CI->getOpcode(), CI->getPredicate(), CI->getOperand(0), CI->getOperand(1), "", &*InsertPt); + // Propagate the debug info. + InsertedCmp->setDebugLoc(CI->getDebugLoc()); } // Replace a use of the cmp with a use of the new cmp. @@ -1814,18 +1952,8 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) { default: break; case Intrinsic::objectsize: { // Lower all uses of llvm.objectsize.* - uint64_t Size; - Type *ReturnTy = CI->getType(); - Constant *RetVal = nullptr; - ConstantInt *Op1 = cast<ConstantInt>(II->getArgOperand(1)); - ObjSizeMode Mode = Op1->isZero() ? ObjSizeMode::Max : ObjSizeMode::Min; - if (getObjectSize(II->getArgOperand(0), - Size, *DL, TLInfo, false, Mode)) { - RetVal = ConstantInt::get(ReturnTy, Size); - } else { - RetVal = ConstantInt::get(ReturnTy, - Mode == ObjSizeMode::Min ? 0 : -1ULL); - } + ConstantInt *RetVal = + lowerObjectSizeCall(II, *DL, TLInfo, /*MustSucceed=*/true); // Substituting this can cause recursive simplifications, which can // invalidate our iterator. Use a WeakVH to hold onto it in case this // happens. @@ -1963,13 +2091,13 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB) { if (!TLI) return false; - ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator()); - if (!RI) + ReturnInst *RetI = dyn_cast<ReturnInst>(BB->getTerminator()); + if (!RetI) return false; PHINode *PN = nullptr; BitCastInst *BCI = nullptr; - Value *V = RI->getReturnValue(); + Value *V = RetI->getReturnValue(); if (V) { BCI = dyn_cast<BitCastInst>(V); if (BCI) @@ -1983,14 +2111,6 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB) { if (PN && PN->getParent() != BB) return false; - // It's not safe to eliminate the sign / zero extension of the return value. - // See llvm::isInTailCallPosition(). - const Function *F = BB->getParent(); - AttributeSet CallerAttrs = F->getAttributes(); - if (CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt) || - CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt)) - return false; - // Make sure there are no instructions between the PHI and return, or that the // return is the first instruction in the block. if (PN) { @@ -1999,24 +2119,26 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB) { if (&*BI == BCI) // Also skip over the bitcast. ++BI; - if (&*BI != RI) + if (&*BI != RetI) return false; } else { BasicBlock::iterator BI = BB->begin(); while (isa<DbgInfoIntrinsic>(BI)) ++BI; - if (&*BI != RI) + if (&*BI != RetI) return false; } /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail /// call. + const Function *F = BB->getParent(); SmallVector<CallInst*, 4> TailCalls; if (PN) { for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) { CallInst *CI = dyn_cast<CallInst>(PN->getIncomingValue(I)); // Make sure the phi value is indeed produced by the tail call. if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) && - TLI->mayBeEmittedAsTailCall(CI)) + TLI->mayBeEmittedAsTailCall(CI) && + attributesPermitTailCall(F, CI, RetI, *TLI)) TailCalls.push_back(CI); } } else { @@ -2033,7 +2155,8 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB) { continue; CallInst *CI = dyn_cast<CallInst>(&*RI); - if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI)) + if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI) && + attributesPermitTailCall(F, CI, RetI, *TLI)) TailCalls.push_back(CI); } } @@ -2060,7 +2183,7 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB) { continue; // Duplicate the return into CallBB. - (void)FoldReturnIntoUncondBranch(RI, BB, CallBB); + (void)FoldReturnIntoUncondBranch(RetI, BB, CallBB); ModifiedDT = Changed = true; ++NumRetsDup; } @@ -3237,7 +3360,7 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode, int64_t ConstantOffset = 0; gep_type_iterator GTI = gep_type_begin(AddrInst); for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) { - if (StructType *STy = dyn_cast<StructType>(*GTI)) { + if (StructType *STy = GTI.getStructTypeOrNull()) { const StructLayout *SL = DL.getStructLayout(STy); unsigned Idx = cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue(); @@ -3665,8 +3788,7 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, TPT.rollback(LastKnownGood); // If the match didn't cover I, then it won't be shared by it. - if (std::find(MatchedAddrModeInsts.begin(), MatchedAddrModeInsts.end(), - I) == MatchedAddrModeInsts.end()) + if (!is_contained(MatchedAddrModeInsts, I)) return false; MatchedAddrModeInsts.clear(); @@ -3791,18 +3913,10 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, } TPT.commit(); - // Check to see if any of the instructions supersumed by this addr mode are - // non-local to I's BB. - bool AnyNonLocal = false; - for (unsigned i = 0, e = AddrModeInsts.size(); i != e; ++i) { - if (IsNonLocalValue(AddrModeInsts[i], MemoryInst->getParent())) { - AnyNonLocal = true; - break; - } - } - // If all the instructions matched are already in this BB, don't do anything. - if (!AnyNonLocal) { + if (none_of(AddrModeInsts, [&](Value *V) { + return IsNonLocalValue(V, MemoryInst->getParent()); + })) { DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode << "\n"); return false; } @@ -4217,6 +4331,10 @@ bool CodeGenPrepare::extLdPromotion(TypePromotionTransaction &TPT, /// promotions apply. /// bool CodeGenPrepare::moveExtToFormExtLoad(Instruction *&I) { + // ExtLoad formation infrastructure requires TLI to be effective. + if (!TLI) + return false; + // Try to promote a chain of computation if it allows to form // an extended load. TypePromotionTransaction TPT; @@ -4246,7 +4364,7 @@ bool CodeGenPrepare::moveExtToFormExtLoad(Instruction *&I) { // If the load has other users and the truncate is not free, this probably // isn't worthwhile. - if (!LI->hasOneUse() && TLI && + if (!LI->hasOneUse() && (TLI->isTypeLegal(LoadVT) || !TLI->isTypeLegal(VT)) && !TLI->isTruncateFree(I->getType(), LI->getType())) { I = OldExt; @@ -4262,7 +4380,7 @@ bool CodeGenPrepare::moveExtToFormExtLoad(Instruction *&I) { assert(isa<SExtInst>(I) && "Unexpected ext type!"); LType = ISD::SEXTLOAD; } - if (TLI && !TLI->isLoadExtLegal(LType, VT, LoadVT)) { + if (!TLI->isLoadExtLegal(LType, VT, LoadVT)) { I = OldExt; TPT.rollback(LastKnownGood); return false; @@ -4273,6 +4391,14 @@ bool CodeGenPrepare::moveExtToFormExtLoad(Instruction *&I) { TPT.commit(); I->removeFromParent(); I->insertAfter(LI); + // CGP does not check if the zext would be speculatively executed when moved + // to the same basic block as the load. Preserving its original location would + // pessimize the debugging experience, as well as negatively impact the + // quality of sample pgo. We don't want to use "line 0" as that has a + // size cost in the line-table section and logically the zext can be seen as + // part of the load. Therefore we conservatively reuse the same debug location + // for the load and the zext. + I->setDebugLoc(LI->getDebugLoc()); ++NumExtsMoved; return true; } @@ -4583,10 +4709,45 @@ static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI, return false; } +/// If \p isTrue is true, return the true value of \p SI, otherwise return +/// false value of \p SI. If the true/false value of \p SI is defined by any +/// select instructions in \p Selects, look through the defining select +/// instruction until the true/false value is not defined in \p Selects. +static Value *getTrueOrFalseValue( + SelectInst *SI, bool isTrue, + const SmallPtrSet<const Instruction *, 2> &Selects) { + Value *V; + + for (SelectInst *DefSI = SI; DefSI != nullptr && Selects.count(DefSI); + DefSI = dyn_cast<SelectInst>(V)) { + assert(DefSI->getCondition() == SI->getCondition() && + "The condition of DefSI does not match with SI"); + V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue()); + } + return V; +} /// If we have a SelectInst that will likely profit from branch prediction, /// turn it into a branch. bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) { + // Find all consecutive select instructions that share the same condition. + SmallVector<SelectInst *, 2> ASI; + ASI.push_back(SI); + for (BasicBlock::iterator It = ++BasicBlock::iterator(SI); + It != SI->getParent()->end(); ++It) { + SelectInst *I = dyn_cast<SelectInst>(&*It); + if (I && SI->getCondition() == I->getCondition()) { + ASI.push_back(I); + } else { + break; + } + } + + SelectInst *LastSI = ASI.back(); + // Increment the current iterator to skip all the rest of select instructions + // because they will be either "not lowered" or "all lowered" to branch. + CurInstIterator = std::next(LastSI->getIterator()); + bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1); // Can we convert the 'select' to CF ? @@ -4633,7 +4794,7 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) { // First, we split the block containing the select into 2 blocks. BasicBlock *StartBlock = SI->getParent(); - BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(SI)); + BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(LastSI)); BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end"); // Delete the unconditional branch that was just created by the split. @@ -4643,22 +4804,30 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) { // At least one will become an actual new basic block. BasicBlock *TrueBlock = nullptr; BasicBlock *FalseBlock = nullptr; + BranchInst *TrueBranch = nullptr; + BranchInst *FalseBranch = nullptr; // Sink expensive instructions into the conditional blocks to avoid executing // them speculatively. - if (sinkSelectOperand(TTI, SI->getTrueValue())) { - TrueBlock = BasicBlock::Create(SI->getContext(), "select.true.sink", - EndBlock->getParent(), EndBlock); - auto *TrueBranch = BranchInst::Create(EndBlock, TrueBlock); - auto *TrueInst = cast<Instruction>(SI->getTrueValue()); - TrueInst->moveBefore(TrueBranch); - } - if (sinkSelectOperand(TTI, SI->getFalseValue())) { - FalseBlock = BasicBlock::Create(SI->getContext(), "select.false.sink", - EndBlock->getParent(), EndBlock); - auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock); - auto *FalseInst = cast<Instruction>(SI->getFalseValue()); - FalseInst->moveBefore(FalseBranch); + for (SelectInst *SI : ASI) { + if (sinkSelectOperand(TTI, SI->getTrueValue())) { + if (TrueBlock == nullptr) { + TrueBlock = BasicBlock::Create(SI->getContext(), "select.true.sink", + EndBlock->getParent(), EndBlock); + TrueBranch = BranchInst::Create(EndBlock, TrueBlock); + } + auto *TrueInst = cast<Instruction>(SI->getTrueValue()); + TrueInst->moveBefore(TrueBranch); + } + if (sinkSelectOperand(TTI, SI->getFalseValue())) { + if (FalseBlock == nullptr) { + FalseBlock = BasicBlock::Create(SI->getContext(), "select.false.sink", + EndBlock->getParent(), EndBlock); + FalseBranch = BranchInst::Create(EndBlock, FalseBlock); + } + auto *FalseInst = cast<Instruction>(SI->getFalseValue()); + FalseInst->moveBefore(FalseBranch); + } } // If there was nothing to sink, then arbitrarily choose the 'false' side @@ -4677,28 +4846,42 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) { // of the condition, it means that side of the branch goes to the end block // directly and the path originates from the start block from the point of // view of the new PHI. + BasicBlock *TT, *FT; if (TrueBlock == nullptr) { - BranchInst::Create(EndBlock, FalseBlock, SI->getCondition(), SI); + TT = EndBlock; + FT = FalseBlock; TrueBlock = StartBlock; } else if (FalseBlock == nullptr) { - BranchInst::Create(TrueBlock, EndBlock, SI->getCondition(), SI); + TT = TrueBlock; + FT = EndBlock; FalseBlock = StartBlock; } else { - BranchInst::Create(TrueBlock, FalseBlock, SI->getCondition(), SI); + TT = TrueBlock; + FT = FalseBlock; + } + IRBuilder<>(SI).CreateCondBr(SI->getCondition(), TT, FT, SI); + + SmallPtrSet<const Instruction *, 2> INS; + INS.insert(ASI.begin(), ASI.end()); + // Use reverse iterator because later select may use the value of the + // earlier select, and we need to propagate value through earlier select + // to get the PHI operand. + for (auto It = ASI.rbegin(); It != ASI.rend(); ++It) { + SelectInst *SI = *It; + // The select itself is replaced with a PHI Node. + PHINode *PN = PHINode::Create(SI->getType(), 2, "", &EndBlock->front()); + PN->takeName(SI); + PN->addIncoming(getTrueOrFalseValue(SI, true, INS), TrueBlock); + PN->addIncoming(getTrueOrFalseValue(SI, false, INS), FalseBlock); + + SI->replaceAllUsesWith(PN); + SI->eraseFromParent(); + INS.erase(SI); + ++NumSelectsExpanded; } - - // The select itself is replaced with a PHI Node. - PHINode *PN = PHINode::Create(SI->getType(), 2, "", &EndBlock->front()); - PN->takeName(SI); - PN->addIncoming(SI->getTrueValue(), TrueBlock); - PN->addIncoming(SI->getFalseValue(), FalseBlock); - - SI->replaceAllUsesWith(PN); - SI->eraseFromParent(); // Instruct OptimizeBlock to skip to the next block. CurInstIterator = StartBlock->end(); - ++NumSelectsExpanded; return true; } @@ -5179,6 +5362,117 @@ bool CodeGenPrepare::optimizeExtractElementInst(Instruction *Inst) { return false; } +/// For the instruction sequence of store below, F and I values +/// are bundled together as an i64 value before being stored into memory. +/// Sometimes it is more efficent to generate separate stores for F and I, +/// which can remove the bitwise instructions or sink them to colder places. +/// +/// (store (or (zext (bitcast F to i32) to i64), +/// (shl (zext I to i64), 32)), addr) --> +/// (store F, addr) and (store I, addr+4) +/// +/// Similarly, splitting for other merged store can also be beneficial, like: +/// For pair of {i32, i32}, i64 store --> two i32 stores. +/// For pair of {i32, i16}, i64 store --> two i32 stores. +/// For pair of {i16, i16}, i32 store --> two i16 stores. +/// For pair of {i16, i8}, i32 store --> two i16 stores. +/// For pair of {i8, i8}, i16 store --> two i8 stores. +/// +/// We allow each target to determine specifically which kind of splitting is +/// supported. +/// +/// The store patterns are commonly seen from the simple code snippet below +/// if only std::make_pair(...) is sroa transformed before inlined into hoo. +/// void goo(const std::pair<int, float> &); +/// hoo() { +/// ... +/// goo(std::make_pair(tmp, ftmp)); +/// ... +/// } +/// +/// Although we already have similar splitting in DAG Combine, we duplicate +/// it in CodeGenPrepare to catch the case in which pattern is across +/// multiple BBs. The logic in DAG Combine is kept to catch case generated +/// during code expansion. +static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL, + const TargetLowering &TLI) { + // Handle simple but common cases only. + Type *StoreType = SI.getValueOperand()->getType(); + if (DL.getTypeStoreSizeInBits(StoreType) != DL.getTypeSizeInBits(StoreType) || + DL.getTypeSizeInBits(StoreType) == 0) + return false; + + unsigned HalfValBitSize = DL.getTypeSizeInBits(StoreType) / 2; + Type *SplitStoreType = Type::getIntNTy(SI.getContext(), HalfValBitSize); + if (DL.getTypeStoreSizeInBits(SplitStoreType) != + DL.getTypeSizeInBits(SplitStoreType)) + return false; + + // Match the following patterns: + // (store (or (zext LValue to i64), + // (shl (zext HValue to i64), 32)), HalfValBitSize) + // or + // (store (or (shl (zext HValue to i64), 32)), HalfValBitSize) + // (zext LValue to i64), + // Expect both operands of OR and the first operand of SHL have only + // one use. + Value *LValue, *HValue; + if (!match(SI.getValueOperand(), + m_c_Or(m_OneUse(m_ZExt(m_Value(LValue))), + m_OneUse(m_Shl(m_OneUse(m_ZExt(m_Value(HValue))), + m_SpecificInt(HalfValBitSize)))))) + return false; + + // Check LValue and HValue are int with size less or equal than 32. + if (!LValue->getType()->isIntegerTy() || + DL.getTypeSizeInBits(LValue->getType()) > HalfValBitSize || + !HValue->getType()->isIntegerTy() || + DL.getTypeSizeInBits(HValue->getType()) > HalfValBitSize) + return false; + + // If LValue/HValue is a bitcast instruction, use the EVT before bitcast + // as the input of target query. + auto *LBC = dyn_cast<BitCastInst>(LValue); + auto *HBC = dyn_cast<BitCastInst>(HValue); + EVT LowTy = LBC ? EVT::getEVT(LBC->getOperand(0)->getType()) + : EVT::getEVT(LValue->getType()); + EVT HighTy = HBC ? EVT::getEVT(HBC->getOperand(0)->getType()) + : EVT::getEVT(HValue->getType()); + if (!ForceSplitStore && !TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy)) + return false; + + // Start to split store. + IRBuilder<> Builder(SI.getContext()); + Builder.SetInsertPoint(&SI); + + // If LValue/HValue is a bitcast in another BB, create a new one in current + // BB so it may be merged with the splitted stores by dag combiner. + if (LBC && LBC->getParent() != SI.getParent()) + LValue = Builder.CreateBitCast(LBC->getOperand(0), LBC->getType()); + if (HBC && HBC->getParent() != SI.getParent()) + HValue = Builder.CreateBitCast(HBC->getOperand(0), HBC->getType()); + + auto CreateSplitStore = [&](Value *V, bool Upper) { + V = Builder.CreateZExtOrBitCast(V, SplitStoreType); + Value *Addr = Builder.CreateBitCast( + SI.getOperand(1), + SplitStoreType->getPointerTo(SI.getPointerAddressSpace())); + if (Upper) + Addr = Builder.CreateGEP( + SplitStoreType, Addr, + ConstantInt::get(Type::getInt32Ty(SI.getContext()), 1)); + Builder.CreateAlignedStore( + V, Addr, Upper ? SI.getAlignment() / 2 : SI.getAlignment()); + }; + + CreateSplitStore(LValue, false); + CreateSplitStore(HValue, true); + + // Delete the old store. + SI.eraseFromParent(); + return true; +} + bool CodeGenPrepare::optimizeInst(Instruction *I, bool& ModifiedDT) { // Bail out if we inserted the instruction to prevent optimizations from // stepping on each other's toes. @@ -5232,7 +5526,7 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool& ModifiedDT) { return OptimizeCmpExpression(CI, TLI); if (LoadInst *LI = dyn_cast<LoadInst>(I)) { - stripInvariantGroupMetadata(*LI); + LI->setMetadata(LLVMContext::MD_invariant_group, nullptr); if (TLI) { bool Modified = optimizeLoadExt(LI); unsigned AS = LI->getPointerAddressSpace(); @@ -5243,7 +5537,9 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool& ModifiedDT) { } if (StoreInst *SI = dyn_cast<StoreInst>(I)) { - stripInvariantGroupMetadata(*SI); + if (TLI && splitMergedValStore(*SI, *DL, *TLI)) + return true; + SI->setMetadata(LLVMContext::MD_invariant_group, nullptr); if (TLI) { unsigned AS = SI->getPointerAddressSpace(); return optimizeMemoryInst(I, SI->getOperand(1), @@ -5542,7 +5838,7 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) { // incoming edge to the PHI nodes, because both branch instructions target // now the same successor. Depending on the original branch condition // (and/or) we have to swap the successors (TrueDest, FalseDest), so that - // we perfrom the correct update for the PHI nodes. + // we perform the correct update for the PHI nodes. // This doesn't change the successor order of the just created branch // instruction (or any other instruction). if (Opc == Instruction::Or) @@ -5649,8 +5945,3 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) { } return MadeChange; } - -void CodeGenPrepare::stripInvariantGroupMetadata(Instruction &I) { - if (auto *InvariantMD = I.getMetadata(LLVMContext::MD_invariant_group)) - I.dropUnknownNonDebugMetadata(InvariantMD->getMetadataID()); -} diff --git a/contrib/llvm/lib/CodeGen/CountingFunctionInserter.cpp b/contrib/llvm/lib/CodeGen/CountingFunctionInserter.cpp new file mode 100644 index 0000000..1e46a7a --- /dev/null +++ b/contrib/llvm/lib/CodeGen/CountingFunctionInserter.cpp @@ -0,0 +1,62 @@ +//===- CountingFunctionInserter.cpp - Insert mcount-like function calls ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Insert calls to counter functions, such as mcount, intended to be called +// once per function, at the beginning of each function. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/Pass.h" +using namespace llvm; + +namespace { + struct CountingFunctionInserter : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + CountingFunctionInserter() : FunctionPass(ID) { + initializeCountingFunctionInserterPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addPreserved<GlobalsAAWrapperPass>(); + } + + bool runOnFunction(Function &F) override { + std::string CountingFunctionName = + F.getFnAttribute("counting-function").getValueAsString(); + if (CountingFunctionName.empty()) + return false; + + Type *VoidTy = Type::getVoidTy(F.getContext()); + Constant *CountingFn = + F.getParent()->getOrInsertFunction(CountingFunctionName, + VoidTy, nullptr); + CallInst::Create(CountingFn, "", &*F.begin()->getFirstInsertionPt()); + return true; + } + }; + + char CountingFunctionInserter::ID = 0; +} + +INITIALIZE_PASS(CountingFunctionInserter, "cfinserter", + "Inserts calls to mcount-like functions", false, false) + +//===----------------------------------------------------------------------===// +// +// CountingFunctionInserter - Give any unnamed non-void instructions "tmp" names. +// +FunctionPass *llvm::createCountingFunctionInserterPass() { + return new CountingFunctionInserter(); +} diff --git a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp index a0189a1..5d60c30 100644 --- a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -69,8 +69,8 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { // Mark live-out callee-saved registers. In a return block this is // all callee-saved registers. In non-return this is any // callee-saved register that is not saved in the prolog. - const MachineFrameInfo *MFI = MF.getFrameInfo(); - BitVector Pristine = MFI->getPristineRegs(MF); + const MachineFrameInfo &MFI = MF.getFrameInfo(); + BitVector Pristine = MFI.getPristineRegs(MF); for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) { if (!IsReturnBlock && !Pristine.test(*I)) continue; for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) { diff --git a/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp b/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp index 2386af9..7b1b2d6 100644 --- a/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp +++ b/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp @@ -31,9 +31,14 @@ #include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/CommandLine.h" using namespace llvm; +static cl::opt<unsigned> InstrLimit("dfa-instr-limit", cl::Hidden, + cl::init(0), cl::desc("If present, stops packetizing after N instructions")); +static unsigned InstrCount = 0; + // -------------------------------------------------------------------- // Definitions shared between DFAPacketizer.cpp and DFAPacketizerEmitter.cpp @@ -218,6 +223,13 @@ VLIWPacketizerList::~VLIWPacketizerList() { // End the current packet, bundle packet instructions and reset DFA state. void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB, MachineBasicBlock::iterator MI) { + DEBUG({ + if (!CurrentPacketMIs.empty()) { + dbgs() << "Finalizing packet:\n"; + for (MachineInstr *MI : CurrentPacketMIs) + dbgs() << " * " << *MI; + } + }); if (CurrentPacketMIs.size() > 1) { MachineInstr &MIFirst = *CurrentPacketMIs.front(); finalizeBundle(*MBB, MIFirst.getIterator(), MI.getInstrIterator()); @@ -249,8 +261,17 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB, for (SUnit &SU : VLIWScheduler->SUnits) MIToSUnit[SU.getInstr()] = &SU; + bool LimitPresent = InstrLimit.getPosition(); + // The main packetizer loop. for (; BeginItr != EndItr; ++BeginItr) { + if (LimitPresent) { + if (InstrCount >= InstrLimit) { + EndItr = BeginItr; + break; + } + InstrCount++; + } MachineInstr &MI = *BeginItr; initPacketizerState(); diff --git a/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp index 0b8dc7a..17c229a 100644 --- a/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -122,7 +122,7 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { // liveness as we go. for (MachineBasicBlock::reverse_iterator MII = MBB.rbegin(), MIE = MBB.rend(); MII != MIE; ) { - MachineInstr *MI = &*MII; + MachineInstr *MI = &*MII++; // If the instruction is dead, delete it! if (isDead(MI)) { @@ -133,9 +133,6 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { MI->eraseFromParentAndMarkDBGValuesForRemoval(); AnyChanges = true; ++NumDeletes; - MIE = MBB.rend(); - // MII is now pointing to the next instruction to process, - // so don't increment it. continue; } @@ -169,10 +166,6 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { } } } - - // We didn't delete the current instruction, so increment MII to - // the next one. - ++MII; } } diff --git a/contrib/llvm/lib/CodeGen/DetectDeadLanes.cpp b/contrib/llvm/lib/CodeGen/DetectDeadLanes.cpp index 1d9e79c..a7ba694 100644 --- a/contrib/llvm/lib/CodeGen/DetectDeadLanes.cpp +++ b/contrib/llvm/lib/CodeGen/DetectDeadLanes.cpp @@ -63,7 +63,7 @@ public: static char ID; DetectDeadLanes() : MachineFunctionPass(ID) {} - const char *getPassName() const override { return "Detect Dead Lanes"; } + StringRef getPassName() const override { return "Detect Dead Lanes"; } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); @@ -210,7 +210,7 @@ void DetectDeadLanes::addUsedLanesOnOperand(const MachineOperand &MO, VRegInfo &MORegInfo = VRegInfos[MORegIdx]; LaneBitmask PrevUsedLanes = MORegInfo.UsedLanes; // Any change at all? - if ((UsedLanes & ~PrevUsedLanes) == 0) + if ((UsedLanes & ~PrevUsedLanes).none()) return; // Set UsedLanes and remember instruction for further propagation. @@ -303,7 +303,7 @@ void DetectDeadLanes::transferDefinedLanesStep(const MachineOperand &Use, VRegInfo &RegInfo = VRegInfos[DefRegIdx]; LaneBitmask PrevDefinedLanes = RegInfo.DefinedLanes; // Any change at all? - if ((DefinedLanes & ~PrevDefinedLanes) == 0) + if ((DefinedLanes & ~PrevDefinedLanes).none()) return; RegInfo.DefinedLanes = PrevDefinedLanes | DefinedLanes; @@ -356,7 +356,7 @@ LaneBitmask DetectDeadLanes::determineInitialDefinedLanes(unsigned Reg) { // Live-In or unused registers have no definition but are considered fully // defined. if (!MRI->hasOneDef(Reg)) - return ~0u; + return LaneBitmask::getAll(); const MachineOperand &Def = *MRI->def_begin(Reg); const MachineInstr &DefMI = *Def.getParent(); @@ -368,7 +368,7 @@ LaneBitmask DetectDeadLanes::determineInitialDefinedLanes(unsigned Reg) { PutInWorklist(RegIdx); if (Def.isDead()) - return 0; + return LaneBitmask::getNone(); // COPY/PHI can copy across unrelated register classes (example: float/int) // with incompatible subregister structure. Do not include these in the @@ -376,7 +376,7 @@ LaneBitmask DetectDeadLanes::determineInitialDefinedLanes(unsigned Reg) { const TargetRegisterClass *DefRC = MRI->getRegClass(Reg); // Determine initially DefinedLanes. - LaneBitmask DefinedLanes = 0; + LaneBitmask DefinedLanes; for (const MachineOperand &MO : DefMI.uses()) { if (!MO.isReg() || !MO.readsReg()) continue; @@ -386,9 +386,9 @@ LaneBitmask DetectDeadLanes::determineInitialDefinedLanes(unsigned Reg) { LaneBitmask MODefinedLanes; if (TargetRegisterInfo::isPhysicalRegister(MOReg)) { - MODefinedLanes = ~0u; + MODefinedLanes = LaneBitmask::getAll(); } else if (isCrossCopy(*MRI, DefMI, DefRC, MO)) { - MODefinedLanes = ~0u; + MODefinedLanes = LaneBitmask::getAll(); } else { assert(TargetRegisterInfo::isVirtualRegister(MOReg)); if (MRI->hasOneDef(MOReg)) { @@ -410,7 +410,7 @@ LaneBitmask DetectDeadLanes::determineInitialDefinedLanes(unsigned Reg) { return DefinedLanes; } if (DefMI.isImplicitDef() || Def.isDead()) - return 0; + return LaneBitmask::getNone(); assert(Def.getSubReg() == 0 && "Should not have subregister defs in machine SSA phase"); @@ -418,7 +418,7 @@ LaneBitmask DetectDeadLanes::determineInitialDefinedLanes(unsigned Reg) { } LaneBitmask DetectDeadLanes::determineInitialUsedLanes(unsigned Reg) { - LaneBitmask UsedLanes = 0; + LaneBitmask UsedLanes = LaneBitmask::getNone(); for (const MachineOperand &MO : MRI->use_nodbg_operands(Reg)) { if (!MO.readsReg()) continue; @@ -462,7 +462,7 @@ bool DetectDeadLanes::isUndefRegAtInput(const MachineOperand &MO, const VRegInfo &RegInfo) const { unsigned SubReg = MO.getSubReg(); LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubReg); - return (RegInfo.DefinedLanes & RegInfo.UsedLanes & Mask) == 0; + return (RegInfo.DefinedLanes & RegInfo.UsedLanes & Mask).none(); } bool DetectDeadLanes::isUndefInput(const MachineOperand &MO, @@ -482,7 +482,7 @@ bool DetectDeadLanes::isUndefInput(const MachineOperand &MO, const VRegInfo &DefRegInfo = VRegInfos[DefRegIdx]; LaneBitmask UsedLanes = transferUsedLanes(MI, DefRegInfo.UsedLanes, MO); - if (UsedLanes != 0) + if (UsedLanes.any()) return false; unsigned MOReg = MO.getReg(); @@ -546,7 +546,7 @@ bool DetectDeadLanes::runOnce(MachineFunction &MF) { continue; unsigned RegIdx = TargetRegisterInfo::virtReg2Index(Reg); const VRegInfo &RegInfo = VRegInfos[RegIdx]; - if (MO.isDef() && !MO.isDead() && RegInfo.UsedLanes == 0) { + if (MO.isDef() && !MO.isDead() && RegInfo.UsedLanes.none()) { DEBUG(dbgs() << "Marking operand '" << MO << "' as dead in " << MI); MO.setIsDead(); } @@ -577,12 +577,12 @@ bool DetectDeadLanes::runOnMachineFunction(MachineFunction &MF) { // register coalescer cannot deal with hidden dead defs. However without // subregister liveness enabled, the expected benefits of this pass are small // so we safe the compile time. - if (!MF.getSubtarget().enableSubRegLiveness()) { + MRI = &MF.getRegInfo(); + if (!MRI->subRegLivenessEnabled()) { DEBUG(dbgs() << "Skipping Detect dead lanes pass\n"); return false; } - MRI = &MF.getRegInfo(); TRI = MRI->getTargetRegisterInfo(); unsigned NumVirtRegs = MRI->getNumVirtRegs(); diff --git a/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp b/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp index eae78a9..38af19a 100644 --- a/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp +++ b/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp @@ -71,7 +71,7 @@ namespace { void getAnalysisUsage(AnalysisUsage &AU) const override; - const char *getPassName() const override { + StringRef getPassName() const override { return "Exception handling preparation"; } }; diff --git a/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp b/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp index 8c96124..7291727 100644 --- a/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp +++ b/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp @@ -547,7 +547,7 @@ void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) { // Fix up Head's terminators. // It should become a single branch or a fallthrough. DebugLoc HeadDL = Head->getFirstTerminator()->getDebugLoc(); - TII->RemoveBranch(*Head); + TII->removeBranch(*Head); // Erase the now empty conditional blocks. It is likely that Head can fall // through to Tail, and we can join the two blocks. @@ -574,7 +574,7 @@ void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) { // We need a branch to Tail, let code placement work it out later. DEBUG(dbgs() << "Converting to unconditional branch.\n"); SmallVector<MachineOperand, 0> EmptyCond; - TII->InsertBranch(*Head, Tail, nullptr, EmptyCond, HeadDL); + TII->insertBranch(*Head, Tail, nullptr, EmptyCond, HeadDL); Head->addSuccessor(Tail); } DEBUG(dbgs() << *Head); @@ -602,7 +602,7 @@ public: EarlyIfConverter() : MachineFunctionPass(ID) {} void getAnalysisUsage(AnalysisUsage &AU) const override; bool runOnMachineFunction(MachineFunction &MF) override; - const char *getPassName() const override { return "Early If-Conversion"; } + StringRef getPassName() const override { return "Early If-Conversion"; } private: bool tryConvertIf(MachineBasicBlock*); diff --git a/contrib/llvm/lib/CodeGen/EdgeBundles.cpp b/contrib/llvm/lib/CodeGen/EdgeBundles.cpp index aea7c31..b3a25544 100644 --- a/contrib/llvm/lib/CodeGen/EdgeBundles.cpp +++ b/contrib/llvm/lib/CodeGen/EdgeBundles.cpp @@ -17,6 +17,7 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/GraphWriter.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -57,8 +58,8 @@ bool EdgeBundles::runOnMachineFunction(MachineFunction &mf) { Blocks.resize(getNumBundles()); for (unsigned i = 0, e = MF->getNumBlockIDs(); i != e; ++i) { - unsigned b0 = getBundle(i, 0); - unsigned b1 = getBundle(i, 1); + unsigned b0 = getBundle(i, false); + unsigned b1 = getBundle(i, true); Blocks[b0].push_back(i); if (b1 != b0) Blocks[b1].push_back(i); @@ -69,6 +70,7 @@ bool EdgeBundles::runOnMachineFunction(MachineFunction &mf) { /// Specialize WriteGraph, the standard implementation won't work. namespace llvm { + template<> raw_ostream &WriteGraph<>(raw_ostream &O, const EdgeBundles &G, bool ShortNames, @@ -89,7 +91,8 @@ raw_ostream &WriteGraph<>(raw_ostream &O, const EdgeBundles &G, O << "}\n"; return O; } -} + +} // end namespace llvm /// view - Visualize the annotated bipartite CFG with Graphviz. void EdgeBundles::view() const { diff --git a/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp b/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp index 566b8d5..32c57e3 100644 --- a/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp +++ b/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp @@ -26,6 +26,7 @@ #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -137,6 +138,7 @@ class ExeDepsFix : public MachineFunctionPass { MachineFunction *MF; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; + RegisterClassInfo RegClassInfo; std::vector<SmallVector<int, 1>> AliasMap; const unsigned NumRegs; LiveReg *LiveRegs; @@ -170,12 +172,10 @@ public: MachineFunctionProperties getRequiredProperties() const override { return MachineFunctionProperties().set( - MachineFunctionProperties::Property::AllVRegsAllocated); + MachineFunctionProperties::Property::NoVRegs); } - const char *getPassName() const override { - return "Execution dependency fix"; - } + StringRef getPassName() const override { return "Execution dependency fix"; } private: iterator_range<SmallVectorImpl<int>::const_iterator> @@ -203,6 +203,8 @@ private: void processDefs(MachineInstr*, bool Kill); void visitSoftInstr(MachineInstr*, unsigned mask); void visitHardInstr(MachineInstr*, unsigned domain); + void pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx, + unsigned Pref); bool shouldBreakDependence(MachineInstr*, unsigned OpIdx, unsigned Pref); void processUndefReads(MachineBasicBlock*); }; @@ -473,6 +475,60 @@ void ExeDepsFix::visitInstr(MachineInstr *MI) { processDefs(MI, !DomP.first); } +/// \brief Helps avoid false dependencies on undef registers by updating the +/// machine instructions' undef operand to use a register that the instruction +/// is truly dependent on, or use a register with clearance higher than Pref. +void ExeDepsFix::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx, + unsigned Pref) { + MachineOperand &MO = MI->getOperand(OpIdx); + assert(MO.isUndef() && "Expected undef machine operand"); + + unsigned OriginalReg = MO.getReg(); + + // Update only undef operands that are mapped to one register. + if (AliasMap[OriginalReg].size() != 1) + return; + + // Get the undef operand's register class + const TargetRegisterClass *OpRC = + TII->getRegClass(MI->getDesc(), OpIdx, TRI, *MF); + + // If the instruction has a true dependency, we can hide the false depdency + // behind it. + for (MachineOperand &CurrMO : MI->operands()) { + if (!CurrMO.isReg() || CurrMO.isDef() || CurrMO.isUndef() || + !OpRC->contains(CurrMO.getReg())) + continue; + // We found a true dependency - replace the undef register with the true + // dependency. + MO.setReg(CurrMO.getReg()); + return; + } + + // Go over all registers in the register class and find the register with + // max clearance or clearance higher than Pref. + unsigned MaxClearance = 0; + unsigned MaxClearanceReg = OriginalReg; + ArrayRef<MCPhysReg> Order = RegClassInfo.getOrder(OpRC); + for (auto Reg : Order) { + assert(AliasMap[Reg].size() == 1 && + "Reg is expected to be mapped to a single index"); + int RCrx = *regIndices(Reg).begin(); + unsigned Clearance = CurInstr - LiveRegs[RCrx].Def; + if (Clearance <= MaxClearance) + continue; + MaxClearance = Clearance; + MaxClearanceReg = Reg; + + if (MaxClearance > Pref) + break; + } + + // Update the operand if we found a register with better clearance. + if (MaxClearanceReg != OriginalReg) + MO.setReg(MaxClearanceReg); +} + /// \brief Return true to if it makes sense to break dependence on a partial def /// or undef use. bool ExeDepsFix::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx, @@ -510,6 +566,7 @@ void ExeDepsFix::processDefs(MachineInstr *MI, bool Kill) { unsigned OpNum; unsigned Pref = TII->getUndefRegClearance(*MI, OpNum, TRI); if (Pref) { + pickBestRegisterForUndef(MI, OpNum, Pref); if (shouldBreakDependence(MI, OpNum, Pref)) UndefReads.push_back(std::make_pair(MI, OpNum)); } @@ -520,8 +577,6 @@ void ExeDepsFix::processDefs(MachineInstr *MI, bool Kill) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg()) continue; - if (MO.isImplicit()) - break; if (MO.isUse()) continue; for (int rx : regIndices(MO.getReg())) { @@ -557,7 +612,7 @@ void ExeDepsFix::processUndefReads(MachineBasicBlock *MBB) { return; // Collect this block's live out register units. - LiveRegSet.init(TRI); + LiveRegSet.init(*TRI); // We do not need to care about pristine registers as they are just preserved // but not actually used in the function. LiveRegSet.addLiveOutsNoPristines(*MBB); @@ -652,9 +707,8 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { // Kill off any remaining uses that don't match available, and build a list of // incoming DomainValues that we want to merge. - SmallVector<LiveReg, 4> Regs; - for (SmallVectorImpl<int>::iterator i=used.begin(), e=used.end(); i!=e; ++i) { - int rx = *i; + SmallVector<const LiveReg *, 4> Regs; + for (int rx : used) { assert(LiveRegs && "no space allocated for live registers"); const LiveReg &LR = LiveRegs[rx]; // This useless DomainValue could have been missed above. @@ -663,16 +717,11 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { continue; } // Sorted insertion. - bool Inserted = false; - for (SmallVectorImpl<LiveReg>::iterator i = Regs.begin(), e = Regs.end(); - i != e && !Inserted; ++i) { - if (LR.Def < i->Def) { - Inserted = true; - Regs.insert(i, LR); - } - } - if (!Inserted) - Regs.push_back(LR); + auto I = std::upper_bound(Regs.begin(), Regs.end(), &LR, + [](const LiveReg *LHS, const LiveReg *RHS) { + return LHS->Def < RHS->Def; + }); + Regs.insert(I, &LR); } // doms are now sorted in order of appearance. Try to merge them all, giving @@ -680,14 +729,14 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { DomainValue *dv = nullptr; while (!Regs.empty()) { if (!dv) { - dv = Regs.pop_back_val().Value; + dv = Regs.pop_back_val()->Value; // Force the first dv to match the current instruction. dv->AvailableDomains = dv->getCommonDomains(available); assert(dv->AvailableDomains && "Domain should have been filtered"); continue; } - DomainValue *Latest = Regs.pop_back_val().Value; + DomainValue *Latest = Regs.pop_back_val()->Value; // Skip already merged values. if (Latest == dv || Latest->Next) continue; @@ -731,6 +780,7 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) { MF = &mf; TII = MF->getSubtarget().getInstrInfo(); TRI = MF->getSubtarget().getRegisterInfo(); + RegClassInfo.runOnMachineFunction(mf); LiveRegs = nullptr; assert(NumRegs == RC->getNumRegs() && "Bad regclass"); diff --git a/contrib/llvm/lib/CodeGen/FuncletLayout.cpp b/contrib/llvm/lib/CodeGen/FuncletLayout.cpp index b16f81c..d61afad 100644 --- a/contrib/llvm/lib/CodeGen/FuncletLayout.cpp +++ b/contrib/llvm/lib/CodeGen/FuncletLayout.cpp @@ -30,7 +30,7 @@ public: bool runOnMachineFunction(MachineFunction &F) override; MachineFunctionProperties getRequiredProperties() const override { return MachineFunctionProperties().set( - MachineFunctionProperties::Property::AllVRegsAllocated); + MachineFunctionProperties::Property::NoVRegs); } }; } diff --git a/contrib/llvm/lib/CodeGen/GCMetadata.cpp b/contrib/llvm/lib/CodeGen/GCMetadata.cpp index c8116a4..be21c73 100644 --- a/contrib/llvm/lib/CodeGen/GCMetadata.cpp +++ b/contrib/llvm/lib/CodeGen/GCMetadata.cpp @@ -32,7 +32,7 @@ class Printer : public FunctionPass { public: explicit Printer(raw_ostream &OS) : FunctionPass(ID), OS(OS) {} - const char *getPassName() const override; + StringRef getPassName() const override; void getAnalysisUsage(AnalysisUsage &AU) const override; bool runOnFunction(Function &F) override; @@ -87,7 +87,7 @@ FunctionPass *llvm::createGCInfoPrinter(raw_ostream &OS) { return new Printer(OS); } -const char *Printer::getPassName() const { +StringRef Printer::getPassName() const { return "Print Garbage Collector Information"; } diff --git a/contrib/llvm/lib/CodeGen/GCMetadataPrinter.cpp b/contrib/llvm/lib/CodeGen/GCMetadataPrinter.cpp index bb8cfa1..d183c7f 100644 --- a/contrib/llvm/lib/CodeGen/GCMetadataPrinter.cpp +++ b/contrib/llvm/lib/CodeGen/GCMetadataPrinter.cpp @@ -14,6 +14,8 @@ #include "llvm/CodeGen/GCMetadataPrinter.h" using namespace llvm; +LLVM_INSTANTIATE_REGISTRY(GCMetadataPrinterRegistry) + GCMetadataPrinter::GCMetadataPrinter() {} GCMetadataPrinter::~GCMetadataPrinter() {} diff --git a/contrib/llvm/lib/CodeGen/GCRootLowering.cpp b/contrib/llvm/lib/CodeGen/GCRootLowering.cpp index 326adab..3524654 100644 --- a/contrib/llvm/lib/CodeGen/GCRootLowering.cpp +++ b/contrib/llvm/lib/CodeGen/GCRootLowering.cpp @@ -45,7 +45,7 @@ public: static char ID; LowerIntrinsics(); - const char *getPassName() const override; + StringRef getPassName() const override; void getAnalysisUsage(AnalysisUsage &AU) const override; bool doInitialization(Module &M) override; @@ -93,7 +93,7 @@ LowerIntrinsics::LowerIntrinsics() : FunctionPass(ID) { initializeLowerIntrinsicsPass(*PassRegistry::getPassRegistry()); } -const char *LowerIntrinsics::getPassName() const { +StringRef LowerIntrinsics::getPassName() const { return "Lower Garbage Collection Instructions"; } @@ -316,7 +316,7 @@ void GCMachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) { for (GCFunctionInfo::roots_iterator RI = FI->roots_begin(); RI != FI->roots_end();) { // If the root references a dead object, no need to keep it. - if (MF.getFrameInfo()->isDeadObjectIndex(RI->Num)) { + if (MF.getFrameInfo().isDeadObjectIndex(RI->Num)) { RI = FI->removeStackRoot(RI); } else { unsigned FrameReg; // FIXME: surely GCRoot ought to store the @@ -338,11 +338,11 @@ bool GCMachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) { // Find the size of the stack frame. There may be no correct static frame // size, we use UINT64_MAX to represent this. - const MachineFrameInfo *MFI = MF.getFrameInfo(); + const MachineFrameInfo &MFI = MF.getFrameInfo(); const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); - const bool DynamicFrameSize = MFI->hasVarSizedObjects() || + const bool DynamicFrameSize = MFI.hasVarSizedObjects() || RegInfo->needsStackRealignment(MF); - FI->setFrameSize(DynamicFrameSize ? UINT64_MAX : MFI->getStackSize()); + FI->setFrameSize(DynamicFrameSize ? UINT64_MAX : MFI.getStackSize()); // Find all safe points. if (FI->getStrategy().needsSafePoints()) diff --git a/contrib/llvm/lib/CodeGen/GCStrategy.cpp b/contrib/llvm/lib/CodeGen/GCStrategy.cpp index 554d326..31ab86f 100644 --- a/contrib/llvm/lib/CodeGen/GCStrategy.cpp +++ b/contrib/llvm/lib/CodeGen/GCStrategy.cpp @@ -16,6 +16,8 @@ using namespace llvm; +LLVM_INSTANTIATE_REGISTRY(GCRegistry) + GCStrategy::GCStrategy() : UseStatepoints(false), NeededSafePoints(0), CustomReadBarriers(false), CustomWriteBarriers(false), CustomRoots(false), InitRoots(true), diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp new file mode 100644 index 0000000..1321221 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -0,0 +1,170 @@ +//===-- lib/CodeGen/GlobalISel/CallLowering.cpp - Call lowering -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file implements some simple delegations needed for call lowering. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GlobalISel/CallLowering.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/Target/TargetLowering.h" + +using namespace llvm; + +bool CallLowering::lowerCall( + MachineIRBuilder &MIRBuilder, const CallInst &CI, unsigned ResReg, + ArrayRef<unsigned> ArgRegs, std::function<unsigned()> GetCalleeReg) const { + auto &DL = CI.getParent()->getParent()->getParent()->getDataLayout(); + + // First step is to marshall all the function's parameters into the correct + // physregs and memory locations. Gather the sequence of argument types that + // we'll pass to the assigner function. + SmallVector<ArgInfo, 8> OrigArgs; + unsigned i = 0; + for (auto &Arg : CI.arg_operands()) { + ArgInfo OrigArg{ArgRegs[i], Arg->getType(), ISD::ArgFlagsTy{}}; + setArgFlags(OrigArg, i + 1, DL, CI); + OrigArgs.push_back(OrigArg); + ++i; + } + + MachineOperand Callee = MachineOperand::CreateImm(0); + if (Function *F = CI.getCalledFunction()) + Callee = MachineOperand::CreateGA(F, 0); + else + Callee = MachineOperand::CreateReg(GetCalleeReg(), false); + + ArgInfo OrigRet{ResReg, CI.getType(), ISD::ArgFlagsTy{}}; + if (!OrigRet.Ty->isVoidTy()) + setArgFlags(OrigRet, AttributeSet::ReturnIndex, DL, CI); + + return lowerCall(MIRBuilder, Callee, OrigRet, OrigArgs); +} + +template <typename FuncInfoTy> +void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx, + const DataLayout &DL, + const FuncInfoTy &FuncInfo) const { + const AttributeSet &Attrs = FuncInfo.getAttributes(); + if (Attrs.hasAttribute(OpIdx, Attribute::ZExt)) + Arg.Flags.setZExt(); + if (Attrs.hasAttribute(OpIdx, Attribute::SExt)) + Arg.Flags.setSExt(); + if (Attrs.hasAttribute(OpIdx, Attribute::InReg)) + Arg.Flags.setInReg(); + if (Attrs.hasAttribute(OpIdx, Attribute::StructRet)) + Arg.Flags.setSRet(); + if (Attrs.hasAttribute(OpIdx, Attribute::SwiftSelf)) + Arg.Flags.setSwiftSelf(); + if (Attrs.hasAttribute(OpIdx, Attribute::SwiftError)) + Arg.Flags.setSwiftError(); + if (Attrs.hasAttribute(OpIdx, Attribute::ByVal)) + Arg.Flags.setByVal(); + if (Attrs.hasAttribute(OpIdx, Attribute::InAlloca)) + Arg.Flags.setInAlloca(); + + if (Arg.Flags.isByVal() || Arg.Flags.isInAlloca()) { + Type *ElementTy = cast<PointerType>(Arg.Ty)->getElementType(); + Arg.Flags.setByValSize(DL.getTypeAllocSize(ElementTy)); + // For ByVal, alignment should be passed from FE. BE will guess if + // this info is not there but there are cases it cannot get right. + unsigned FrameAlign; + if (FuncInfo.getParamAlignment(OpIdx)) + FrameAlign = FuncInfo.getParamAlignment(OpIdx); + else + FrameAlign = getTLI()->getByValTypeAlignment(ElementTy, DL); + Arg.Flags.setByValAlign(FrameAlign); + } + if (Attrs.hasAttribute(OpIdx, Attribute::Nest)) + Arg.Flags.setNest(); + Arg.Flags.setOrigAlign(DL.getABITypeAlignment(Arg.Ty)); +} + +template void +CallLowering::setArgFlags<Function>(CallLowering::ArgInfo &Arg, unsigned OpIdx, + const DataLayout &DL, + const Function &FuncInfo) const; + +template void +CallLowering::setArgFlags<CallInst>(CallLowering::ArgInfo &Arg, unsigned OpIdx, + const DataLayout &DL, + const CallInst &FuncInfo) const; + +bool CallLowering::handleAssignments(MachineIRBuilder &MIRBuilder, + CCAssignFn *AssignFn, + ArrayRef<ArgInfo> Args, + ValueHandler &Handler) const { + MachineFunction &MF = MIRBuilder.getMF(); + const Function &F = *MF.getFunction(); + const DataLayout &DL = F.getParent()->getDataLayout(); + + SmallVector<CCValAssign, 16> ArgLocs; + CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext()); + + unsigned NumArgs = Args.size(); + for (unsigned i = 0; i != NumArgs; ++i) { + MVT CurVT = MVT::getVT(Args[i].Ty); + if (AssignFn(i, CurVT, CurVT, CCValAssign::Full, Args[i].Flags, CCInfo)) + return false; + } + + for (unsigned i = 0, e = Args.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + + if (VA.isRegLoc()) + Handler.assignValueToReg(Args[i].Reg, VA.getLocReg(), VA); + else if (VA.isMemLoc()) { + unsigned Size = VA.getValVT() == MVT::iPTR + ? DL.getPointerSize() + : alignTo(VA.getValVT().getSizeInBits(), 8) / 8; + unsigned Offset = VA.getLocMemOffset(); + MachinePointerInfo MPO; + unsigned StackAddr = Handler.getStackAddress(Size, Offset, MPO); + Handler.assignValueToAddress(Args[i].Reg, StackAddr, Size, MPO, VA); + } else { + // FIXME: Support byvals and other weirdness + return false; + } + } + return true; +} + +unsigned CallLowering::ValueHandler::extendRegister(unsigned ValReg, + CCValAssign &VA) { + LLT LocTy{VA.getLocVT()}; + switch (VA.getLocInfo()) { + default: break; + case CCValAssign::Full: + case CCValAssign::BCvt: + // FIXME: bitconverting between vector types may or may not be a + // nop in big-endian situations. + return ValReg; + case CCValAssign::AExt: + assert(!VA.getLocVT().isVector() && "unexpected vector extend"); + // Otherwise, it's a nop. + return ValReg; + case CCValAssign::SExt: { + unsigned NewReg = MRI.createGenericVirtualRegister(LocTy); + MIRBuilder.buildSExt(NewReg, ValReg); + return NewReg; + } + case CCValAssign::ZExt: { + unsigned NewReg = MRI.createGenericVirtualRegister(LocTy); + MIRBuilder.buildZExt(NewReg, ValReg); + return NewReg; + } + } + llvm_unreachable("unable to extend register"); +} diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp index 231e5ac..fcd2722 100644 --- a/contrib/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp +++ b/contrib/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp @@ -25,6 +25,8 @@ void llvm::initializeGlobalISel(PassRegistry &Registry) { void llvm::initializeGlobalISel(PassRegistry &Registry) { initializeIRTranslatorPass(Registry); + initializeLegalizerPass(Registry); initializeRegBankSelectPass(Registry); + initializeInstructionSelectPass(Registry); } #endif // LLVM_BUILD_GLOBAL_ISEL diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index b8a960c..89a042f 100644 --- a/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -14,12 +14,19 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/GlobalISel/CallLowering.h" +#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Function.h" +#include "llvm/IR/GetElementPtrTypeIterator.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" +#include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetLowering.h" #define DEBUG_TYPE "irtranslator" @@ -27,13 +34,29 @@ using namespace llvm; char IRTranslator::ID = 0; -INITIALIZE_PASS(IRTranslator, "irtranslator", "IRTranslator LLVM IR -> MI", - false, false); +INITIALIZE_PASS_BEGIN(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI", + false, false) +INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) +INITIALIZE_PASS_END(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI", + false, false) + +static void reportTranslationError(const Value &V, const Twine &Message) { + std::string ErrStorage; + raw_string_ostream Err(ErrStorage); + Err << Message << ": " << V << '\n'; + report_fatal_error(Err.str()); +} IRTranslator::IRTranslator() : MachineFunctionPass(ID), MRI(nullptr) { initializeIRTranslatorPass(*PassRegistry::getPassRegistry()); } +void IRTranslator::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<TargetPassConfig>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + + unsigned IRTranslator::getOrCreateVReg(const Value &Val) { unsigned &ValReg = ValToVReg[&Val]; // Check if this is the first time we see Val. @@ -42,56 +65,132 @@ unsigned IRTranslator::getOrCreateVReg(const Value &Val) { // we need to concat together to produce the value. assert(Val.getType()->isSized() && "Don't know how to create an empty vreg"); - assert(!Val.getType()->isAggregateType() && "Not yet implemented"); - unsigned Size = Val.getType()->getPrimitiveSizeInBits(); - unsigned VReg = MRI->createGenericVirtualRegister(Size); + unsigned VReg = MRI->createGenericVirtualRegister(LLT{*Val.getType(), *DL}); ValReg = VReg; - assert(!isa<Constant>(Val) && "Not yet implemented"); + + if (auto CV = dyn_cast<Constant>(&Val)) { + bool Success = translate(*CV, VReg); + if (!Success) { + if (!TPC->isGlobalISelAbortEnabled()) { + MF->getProperties().set( + MachineFunctionProperties::Property::FailedISel); + return VReg; + } + reportTranslationError(Val, "unable to translate constant"); + } + } } return ValReg; } +int IRTranslator::getOrCreateFrameIndex(const AllocaInst &AI) { + if (FrameIndices.find(&AI) != FrameIndices.end()) + return FrameIndices[&AI]; + + unsigned ElementSize = DL->getTypeStoreSize(AI.getAllocatedType()); + unsigned Size = + ElementSize * cast<ConstantInt>(AI.getArraySize())->getZExtValue(); + + // Always allocate at least one byte. + Size = std::max(Size, 1u); + + unsigned Alignment = AI.getAlignment(); + if (!Alignment) + Alignment = DL->getABITypeAlignment(AI.getAllocatedType()); + + int &FI = FrameIndices[&AI]; + FI = MF->getFrameInfo().CreateStackObject(Size, Alignment, false, &AI); + return FI; +} + +unsigned IRTranslator::getMemOpAlignment(const Instruction &I) { + unsigned Alignment = 0; + Type *ValTy = nullptr; + if (const StoreInst *SI = dyn_cast<StoreInst>(&I)) { + Alignment = SI->getAlignment(); + ValTy = SI->getValueOperand()->getType(); + } else if (const LoadInst *LI = dyn_cast<LoadInst>(&I)) { + Alignment = LI->getAlignment(); + ValTy = LI->getType(); + } else if (!TPC->isGlobalISelAbortEnabled()) { + MF->getProperties().set( + MachineFunctionProperties::Property::FailedISel); + return 1; + } else + llvm_unreachable("unhandled memory instruction"); + + return Alignment ? Alignment : DL->getABITypeAlignment(ValTy); +} + MachineBasicBlock &IRTranslator::getOrCreateBB(const BasicBlock &BB) { MachineBasicBlock *&MBB = BBToMBB[&BB]; if (!MBB) { - MachineFunction &MF = MIRBuilder.getMF(); - MBB = MF.CreateMachineBasicBlock(); - MF.push_back(MBB); + MBB = MF->CreateMachineBasicBlock(&BB); + MF->push_back(MBB); + + if (BB.hasAddressTaken()) + MBB->setHasAddressTaken(); } return *MBB; } -bool IRTranslator::translateBinaryOp(unsigned Opcode, const Instruction &Inst) { +bool IRTranslator::translateBinaryOp(unsigned Opcode, const User &U, + MachineIRBuilder &MIRBuilder) { + // FIXME: handle signed/unsigned wrapping flags. + // Get or create a virtual register for each value. // Unless the value is a Constant => loadimm cst? // or inline constant each time? // Creation of a virtual register needs to have a size. - unsigned Op0 = getOrCreateVReg(*Inst.getOperand(0)); - unsigned Op1 = getOrCreateVReg(*Inst.getOperand(1)); - unsigned Res = getOrCreateVReg(Inst); - MIRBuilder.buildInstr(Opcode, Inst.getType(), Res, Op0, Op1); + unsigned Op0 = getOrCreateVReg(*U.getOperand(0)); + unsigned Op1 = getOrCreateVReg(*U.getOperand(1)); + unsigned Res = getOrCreateVReg(U); + MIRBuilder.buildInstr(Opcode).addDef(Res).addUse(Op0).addUse(Op1); return true; } -bool IRTranslator::translateReturn(const Instruction &Inst) { - assert(isa<ReturnInst>(Inst) && "Return expected"); - const Value *Ret = cast<ReturnInst>(Inst).getReturnValue(); +bool IRTranslator::translateCompare(const User &U, + MachineIRBuilder &MIRBuilder) { + const CmpInst *CI = dyn_cast<CmpInst>(&U); + unsigned Op0 = getOrCreateVReg(*U.getOperand(0)); + unsigned Op1 = getOrCreateVReg(*U.getOperand(1)); + unsigned Res = getOrCreateVReg(U); + CmpInst::Predicate Pred = + CI ? CI->getPredicate() : static_cast<CmpInst::Predicate>( + cast<ConstantExpr>(U).getPredicate()); + + if (CmpInst::isIntPredicate(Pred)) + MIRBuilder.buildICmp(Pred, Res, Op0, Op1); + else + MIRBuilder.buildFCmp(Pred, Res, Op0, Op1); + + return true; +} + +bool IRTranslator::translateRet(const User &U, MachineIRBuilder &MIRBuilder) { + const ReturnInst &RI = cast<ReturnInst>(U); + const Value *Ret = RI.getReturnValue(); // The target may mess up with the insertion point, but // this is not important as a return is the last instruction // of the block anyway. return CLI->lowerReturn(MIRBuilder, Ret, !Ret ? 0 : getOrCreateVReg(*Ret)); } -bool IRTranslator::translateBr(const Instruction &Inst) { - assert(isa<BranchInst>(Inst) && "Branch expected"); - const BranchInst &BrInst = *cast<BranchInst>(&Inst); - if (BrInst.isUnconditional()) { - const BasicBlock &BrTgt = *cast<BasicBlock>(BrInst.getOperand(0)); - MachineBasicBlock &TgtBB = getOrCreateBB(BrTgt); - MIRBuilder.buildInstr(TargetOpcode::G_BR, BrTgt.getType(), TgtBB); - } else { - assert(0 && "Not yet implemented"); +bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) { + const BranchInst &BrInst = cast<BranchInst>(U); + unsigned Succ = 0; + if (!BrInst.isUnconditional()) { + // We want a G_BRCOND to the true BB followed by an unconditional branch. + unsigned Tst = getOrCreateVReg(*BrInst.getCondition()); + const BasicBlock &TrueTgt = *cast<BasicBlock>(BrInst.getSuccessor(Succ++)); + MachineBasicBlock &TrueBB = getOrCreateBB(TrueTgt); + MIRBuilder.buildBrCond(Tst, TrueBB); } + + const BasicBlock &BrTgt = *cast<BasicBlock>(BrInst.getSuccessor(Succ)); + MachineBasicBlock &TgtBB = getOrCreateBB(BrTgt); + MIRBuilder.buildBr(TgtBB); + // Link successors. MachineBasicBlock &CurBB = MIRBuilder.getMBB(); for (const BasicBlock *Succ : BrInst.successors()) @@ -99,66 +198,694 @@ bool IRTranslator::translateBr(const Instruction &Inst) { return true; } +bool IRTranslator::translateSwitch(const User &U, + MachineIRBuilder &MIRBuilder) { + // For now, just translate as a chain of conditional branches. + // FIXME: could we share most of the logic/code in + // SelectionDAGBuilder::visitSwitch between SelectionDAG and GlobalISel? + // At first sight, it seems most of the logic in there is independent of + // SelectionDAG-specifics and a lot of work went in to optimize switch + // lowering in there. + + const SwitchInst &SwInst = cast<SwitchInst>(U); + const unsigned SwCondValue = getOrCreateVReg(*SwInst.getCondition()); + + LLT LLTi1 = LLT(*Type::getInt1Ty(U.getContext()), *DL); + for (auto &CaseIt : SwInst.cases()) { + const unsigned CaseValueReg = getOrCreateVReg(*CaseIt.getCaseValue()); + const unsigned Tst = MRI->createGenericVirtualRegister(LLTi1); + MIRBuilder.buildICmp(CmpInst::ICMP_EQ, Tst, CaseValueReg, SwCondValue); + MachineBasicBlock &CurBB = MIRBuilder.getMBB(); + MachineBasicBlock &TrueBB = getOrCreateBB(*CaseIt.getCaseSuccessor()); + + MIRBuilder.buildBrCond(Tst, TrueBB); + CurBB.addSuccessor(&TrueBB); + + MachineBasicBlock *FalseBB = + MF->CreateMachineBasicBlock(SwInst.getParent()); + MF->push_back(FalseBB); + MIRBuilder.buildBr(*FalseBB); + CurBB.addSuccessor(FalseBB); + + MIRBuilder.setMBB(*FalseBB); + } + // handle default case + MachineBasicBlock &DefaultBB = getOrCreateBB(*SwInst.getDefaultDest()); + MIRBuilder.buildBr(DefaultBB); + MIRBuilder.getMBB().addSuccessor(&DefaultBB); + + return true; +} + +bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) { + const LoadInst &LI = cast<LoadInst>(U); + + if (!TPC->isGlobalISelAbortEnabled() && LI.isAtomic()) + return false; + + assert(!LI.isAtomic() && "only non-atomic loads are supported at the moment"); + auto Flags = LI.isVolatile() ? MachineMemOperand::MOVolatile + : MachineMemOperand::MONone; + Flags |= MachineMemOperand::MOLoad; + + unsigned Res = getOrCreateVReg(LI); + unsigned Addr = getOrCreateVReg(*LI.getPointerOperand()); + LLT VTy{*LI.getType(), *DL}, PTy{*LI.getPointerOperand()->getType(), *DL}; + MIRBuilder.buildLoad( + Res, Addr, + *MF->getMachineMemOperand(MachinePointerInfo(LI.getPointerOperand()), + Flags, DL->getTypeStoreSize(LI.getType()), + getMemOpAlignment(LI))); + return true; +} + +bool IRTranslator::translateStore(const User &U, MachineIRBuilder &MIRBuilder) { + const StoreInst &SI = cast<StoreInst>(U); + + if (!TPC->isGlobalISelAbortEnabled() && SI.isAtomic()) + return false; + + assert(!SI.isAtomic() && "only non-atomic stores supported at the moment"); + auto Flags = SI.isVolatile() ? MachineMemOperand::MOVolatile + : MachineMemOperand::MONone; + Flags |= MachineMemOperand::MOStore; + + unsigned Val = getOrCreateVReg(*SI.getValueOperand()); + unsigned Addr = getOrCreateVReg(*SI.getPointerOperand()); + LLT VTy{*SI.getValueOperand()->getType(), *DL}, + PTy{*SI.getPointerOperand()->getType(), *DL}; + + MIRBuilder.buildStore( + Val, Addr, + *MF->getMachineMemOperand( + MachinePointerInfo(SI.getPointerOperand()), Flags, + DL->getTypeStoreSize(SI.getValueOperand()->getType()), + getMemOpAlignment(SI))); + return true; +} + +bool IRTranslator::translateExtractValue(const User &U, + MachineIRBuilder &MIRBuilder) { + const Value *Src = U.getOperand(0); + Type *Int32Ty = Type::getInt32Ty(U.getContext()); + SmallVector<Value *, 1> Indices; + + // getIndexedOffsetInType is designed for GEPs, so the first index is the + // usual array element rather than looking into the actual aggregate. + Indices.push_back(ConstantInt::get(Int32Ty, 0)); + + if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(&U)) { + for (auto Idx : EVI->indices()) + Indices.push_back(ConstantInt::get(Int32Ty, Idx)); + } else { + for (unsigned i = 1; i < U.getNumOperands(); ++i) + Indices.push_back(U.getOperand(i)); + } + + uint64_t Offset = 8 * DL->getIndexedOffsetInType(Src->getType(), Indices); + + unsigned Res = getOrCreateVReg(U); + MIRBuilder.buildExtract(Res, Offset, getOrCreateVReg(*Src)); + + return true; +} + +bool IRTranslator::translateInsertValue(const User &U, + MachineIRBuilder &MIRBuilder) { + const Value *Src = U.getOperand(0); + Type *Int32Ty = Type::getInt32Ty(U.getContext()); + SmallVector<Value *, 1> Indices; + + // getIndexedOffsetInType is designed for GEPs, so the first index is the + // usual array element rather than looking into the actual aggregate. + Indices.push_back(ConstantInt::get(Int32Ty, 0)); + + if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(&U)) { + for (auto Idx : IVI->indices()) + Indices.push_back(ConstantInt::get(Int32Ty, Idx)); + } else { + for (unsigned i = 2; i < U.getNumOperands(); ++i) + Indices.push_back(U.getOperand(i)); + } + + uint64_t Offset = 8 * DL->getIndexedOffsetInType(Src->getType(), Indices); + + unsigned Res = getOrCreateVReg(U); + const Value &Inserted = *U.getOperand(1); + MIRBuilder.buildInsert(Res, getOrCreateVReg(*Src), getOrCreateVReg(Inserted), + Offset); + + return true; +} + +bool IRTranslator::translateSelect(const User &U, + MachineIRBuilder &MIRBuilder) { + MIRBuilder.buildSelect(getOrCreateVReg(U), getOrCreateVReg(*U.getOperand(0)), + getOrCreateVReg(*U.getOperand(1)), + getOrCreateVReg(*U.getOperand(2))); + return true; +} + +bool IRTranslator::translateBitCast(const User &U, + MachineIRBuilder &MIRBuilder) { + if (LLT{*U.getOperand(0)->getType(), *DL} == LLT{*U.getType(), *DL}) { + unsigned &Reg = ValToVReg[&U]; + if (Reg) + MIRBuilder.buildCopy(Reg, getOrCreateVReg(*U.getOperand(0))); + else + Reg = getOrCreateVReg(*U.getOperand(0)); + return true; + } + return translateCast(TargetOpcode::G_BITCAST, U, MIRBuilder); +} + +bool IRTranslator::translateCast(unsigned Opcode, const User &U, + MachineIRBuilder &MIRBuilder) { + unsigned Op = getOrCreateVReg(*U.getOperand(0)); + unsigned Res = getOrCreateVReg(U); + MIRBuilder.buildInstr(Opcode).addDef(Res).addUse(Op); + return true; +} + +bool IRTranslator::translateGetElementPtr(const User &U, + MachineIRBuilder &MIRBuilder) { + // FIXME: support vector GEPs. + if (U.getType()->isVectorTy()) + return false; + + Value &Op0 = *U.getOperand(0); + unsigned BaseReg = getOrCreateVReg(Op0); + LLT PtrTy{*Op0.getType(), *DL}; + unsigned PtrSize = DL->getPointerSizeInBits(PtrTy.getAddressSpace()); + LLT OffsetTy = LLT::scalar(PtrSize); + + int64_t Offset = 0; + for (gep_type_iterator GTI = gep_type_begin(&U), E = gep_type_end(&U); + GTI != E; ++GTI) { + const Value *Idx = GTI.getOperand(); + if (StructType *StTy = GTI.getStructTypeOrNull()) { + unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue(); + Offset += DL->getStructLayout(StTy)->getElementOffset(Field); + continue; + } else { + uint64_t ElementSize = DL->getTypeAllocSize(GTI.getIndexedType()); + + // If this is a scalar constant or a splat vector of constants, + // handle it quickly. + if (const auto *CI = dyn_cast<ConstantInt>(Idx)) { + Offset += ElementSize * CI->getSExtValue(); + continue; + } + + if (Offset != 0) { + unsigned NewBaseReg = MRI->createGenericVirtualRegister(PtrTy); + unsigned OffsetReg = MRI->createGenericVirtualRegister(OffsetTy); + MIRBuilder.buildConstant(OffsetReg, Offset); + MIRBuilder.buildGEP(NewBaseReg, BaseReg, OffsetReg); + + BaseReg = NewBaseReg; + Offset = 0; + } + + // N = N + Idx * ElementSize; + unsigned ElementSizeReg = MRI->createGenericVirtualRegister(OffsetTy); + MIRBuilder.buildConstant(ElementSizeReg, ElementSize); + + unsigned IdxReg = getOrCreateVReg(*Idx); + if (MRI->getType(IdxReg) != OffsetTy) { + unsigned NewIdxReg = MRI->createGenericVirtualRegister(OffsetTy); + MIRBuilder.buildSExtOrTrunc(NewIdxReg, IdxReg); + IdxReg = NewIdxReg; + } + + unsigned OffsetReg = MRI->createGenericVirtualRegister(OffsetTy); + MIRBuilder.buildMul(OffsetReg, ElementSizeReg, IdxReg); + + unsigned NewBaseReg = MRI->createGenericVirtualRegister(PtrTy); + MIRBuilder.buildGEP(NewBaseReg, BaseReg, OffsetReg); + BaseReg = NewBaseReg; + } + } + + if (Offset != 0) { + unsigned OffsetReg = MRI->createGenericVirtualRegister(OffsetTy); + MIRBuilder.buildConstant(OffsetReg, Offset); + MIRBuilder.buildGEP(getOrCreateVReg(U), BaseReg, OffsetReg); + return true; + } + + MIRBuilder.buildCopy(getOrCreateVReg(U), BaseReg); + return true; +} + +bool IRTranslator::translateMemcpy(const CallInst &CI, + MachineIRBuilder &MIRBuilder) { + LLT SizeTy{*CI.getArgOperand(2)->getType(), *DL}; + if (cast<PointerType>(CI.getArgOperand(0)->getType())->getAddressSpace() != + 0 || + cast<PointerType>(CI.getArgOperand(1)->getType())->getAddressSpace() != + 0 || + SizeTy.getSizeInBits() != DL->getPointerSizeInBits(0)) + return false; + + SmallVector<CallLowering::ArgInfo, 8> Args; + for (int i = 0; i < 3; ++i) { + const auto &Arg = CI.getArgOperand(i); + Args.emplace_back(getOrCreateVReg(*Arg), Arg->getType()); + } + + MachineOperand Callee = MachineOperand::CreateES("memcpy"); + + return CLI->lowerCall(MIRBuilder, Callee, + CallLowering::ArgInfo(0, CI.getType()), Args); +} + +void IRTranslator::getStackGuard(unsigned DstReg, + MachineIRBuilder &MIRBuilder) { + auto MIB = MIRBuilder.buildInstr(TargetOpcode::LOAD_STACK_GUARD); + MIB.addDef(DstReg); + + auto &TLI = *MF->getSubtarget().getTargetLowering(); + Value *Global = TLI.getSDagStackGuard(*MF->getFunction()->getParent()); + if (!Global) + return; + + MachinePointerInfo MPInfo(Global); + MachineInstr::mmo_iterator MemRefs = MF->allocateMemRefsArray(1); + auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant | + MachineMemOperand::MODereferenceable; + *MemRefs = + MF->getMachineMemOperand(MPInfo, Flags, DL->getPointerSizeInBits() / 8, + DL->getPointerABIAlignment()); + MIB.setMemRefs(MemRefs, MemRefs + 1); +} + +bool IRTranslator::translateOverflowIntrinsic(const CallInst &CI, unsigned Op, + MachineIRBuilder &MIRBuilder) { + LLT Ty{*CI.getOperand(0)->getType(), *DL}; + LLT s1 = LLT::scalar(1); + unsigned Width = Ty.getSizeInBits(); + unsigned Res = MRI->createGenericVirtualRegister(Ty); + unsigned Overflow = MRI->createGenericVirtualRegister(s1); + auto MIB = MIRBuilder.buildInstr(Op) + .addDef(Res) + .addDef(Overflow) + .addUse(getOrCreateVReg(*CI.getOperand(0))) + .addUse(getOrCreateVReg(*CI.getOperand(1))); + + if (Op == TargetOpcode::G_UADDE || Op == TargetOpcode::G_USUBE) { + unsigned Zero = MRI->createGenericVirtualRegister(s1); + EntryBuilder.buildConstant(Zero, 0); + MIB.addUse(Zero); + } + + MIRBuilder.buildSequence(getOrCreateVReg(CI), Res, 0, Overflow, Width); + return true; +} + +bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, + MachineIRBuilder &MIRBuilder) { + switch (ID) { + default: + break; + case Intrinsic::dbg_declare: + case Intrinsic::dbg_value: + // FIXME: these obviously need to be supported properly. + MF->getProperties().set( + MachineFunctionProperties::Property::FailedISel); + return true; + case Intrinsic::uadd_with_overflow: + return translateOverflowIntrinsic(CI, TargetOpcode::G_UADDE, MIRBuilder); + case Intrinsic::sadd_with_overflow: + return translateOverflowIntrinsic(CI, TargetOpcode::G_SADDO, MIRBuilder); + case Intrinsic::usub_with_overflow: + return translateOverflowIntrinsic(CI, TargetOpcode::G_USUBE, MIRBuilder); + case Intrinsic::ssub_with_overflow: + return translateOverflowIntrinsic(CI, TargetOpcode::G_SSUBO, MIRBuilder); + case Intrinsic::umul_with_overflow: + return translateOverflowIntrinsic(CI, TargetOpcode::G_UMULO, MIRBuilder); + case Intrinsic::smul_with_overflow: + return translateOverflowIntrinsic(CI, TargetOpcode::G_SMULO, MIRBuilder); + case Intrinsic::memcpy: + return translateMemcpy(CI, MIRBuilder); + case Intrinsic::eh_typeid_for: { + GlobalValue *GV = ExtractTypeInfo(CI.getArgOperand(0)); + unsigned Reg = getOrCreateVReg(CI); + unsigned TypeID = MF->getTypeIDFor(GV); + MIRBuilder.buildConstant(Reg, TypeID); + return true; + } + case Intrinsic::objectsize: { + // If we don't know by now, we're never going to know. + const ConstantInt *Min = cast<ConstantInt>(CI.getArgOperand(1)); + + MIRBuilder.buildConstant(getOrCreateVReg(CI), Min->isZero() ? -1ULL : 0); + return true; + } + case Intrinsic::stackguard: + getStackGuard(getOrCreateVReg(CI), MIRBuilder); + return true; + case Intrinsic::stackprotector: { + LLT PtrTy{*CI.getArgOperand(0)->getType(), *DL}; + unsigned GuardVal = MRI->createGenericVirtualRegister(PtrTy); + getStackGuard(GuardVal, MIRBuilder); + + AllocaInst *Slot = cast<AllocaInst>(CI.getArgOperand(1)); + MIRBuilder.buildStore( + GuardVal, getOrCreateVReg(*Slot), + *MF->getMachineMemOperand( + MachinePointerInfo::getFixedStack(*MF, + getOrCreateFrameIndex(*Slot)), + MachineMemOperand::MOStore | MachineMemOperand::MOVolatile, + PtrTy.getSizeInBits() / 8, 8)); + return true; + } + } + return false; +} + +bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { + const CallInst &CI = cast<CallInst>(U); + auto TII = MF->getTarget().getIntrinsicInfo(); + const Function *F = CI.getCalledFunction(); + + if (!F || !F->isIntrinsic()) { + unsigned Res = CI.getType()->isVoidTy() ? 0 : getOrCreateVReg(CI); + SmallVector<unsigned, 8> Args; + for (auto &Arg: CI.arg_operands()) + Args.push_back(getOrCreateVReg(*Arg)); + + return CLI->lowerCall(MIRBuilder, CI, Res, Args, [&]() { + return getOrCreateVReg(*CI.getCalledValue()); + }); + } + + Intrinsic::ID ID = F->getIntrinsicID(); + if (TII && ID == Intrinsic::not_intrinsic) + ID = static_cast<Intrinsic::ID>(TII->getIntrinsicID(F)); + + assert(ID != Intrinsic::not_intrinsic && "unknown intrinsic"); + + if (translateKnownIntrinsic(CI, ID, MIRBuilder)) + return true; + + unsigned Res = CI.getType()->isVoidTy() ? 0 : getOrCreateVReg(CI); + MachineInstrBuilder MIB = + MIRBuilder.buildIntrinsic(ID, Res, !CI.doesNotAccessMemory()); + + for (auto &Arg : CI.arg_operands()) { + if (ConstantInt *CI = dyn_cast<ConstantInt>(Arg)) + MIB.addImm(CI->getSExtValue()); + else + MIB.addUse(getOrCreateVReg(*Arg)); + } + return true; +} + +bool IRTranslator::translateInvoke(const User &U, + MachineIRBuilder &MIRBuilder) { + const InvokeInst &I = cast<InvokeInst>(U); + MCContext &Context = MF->getContext(); + + const BasicBlock *ReturnBB = I.getSuccessor(0); + const BasicBlock *EHPadBB = I.getSuccessor(1); + + const Value *Callee(I.getCalledValue()); + const Function *Fn = dyn_cast<Function>(Callee); + if (isa<InlineAsm>(Callee)) + return false; + + // FIXME: support invoking patchpoint and statepoint intrinsics. + if (Fn && Fn->isIntrinsic()) + return false; + + // FIXME: support whatever these are. + if (I.countOperandBundlesOfType(LLVMContext::OB_deopt)) + return false; + + // FIXME: support Windows exception handling. + if (!isa<LandingPadInst>(EHPadBB->front())) + return false; + + + // Emit the actual call, bracketed by EH_LABELs so that the MF knows about + // the region covered by the try. + MCSymbol *BeginSymbol = Context.createTempSymbol(); + MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(BeginSymbol); + + unsigned Res = I.getType()->isVoidTy() ? 0 : getOrCreateVReg(I); + SmallVector<CallLowering::ArgInfo, 8> Args; + for (auto &Arg: I.arg_operands()) + Args.emplace_back(getOrCreateVReg(*Arg), Arg->getType()); + + if (!CLI->lowerCall(MIRBuilder, MachineOperand::CreateGA(Fn, 0), + CallLowering::ArgInfo(Res, I.getType()), Args)) + return false; + + MCSymbol *EndSymbol = Context.createTempSymbol(); + MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(EndSymbol); + + // FIXME: track probabilities. + MachineBasicBlock &EHPadMBB = getOrCreateBB(*EHPadBB), + &ReturnMBB = getOrCreateBB(*ReturnBB); + MF->addInvoke(&EHPadMBB, BeginSymbol, EndSymbol); + MIRBuilder.getMBB().addSuccessor(&ReturnMBB); + MIRBuilder.getMBB().addSuccessor(&EHPadMBB); + + return true; +} + +bool IRTranslator::translateLandingPad(const User &U, + MachineIRBuilder &MIRBuilder) { + const LandingPadInst &LP = cast<LandingPadInst>(U); + + MachineBasicBlock &MBB = MIRBuilder.getMBB(); + addLandingPadInfo(LP, MBB); + + MBB.setIsEHPad(); + + // If there aren't registers to copy the values into (e.g., during SjLj + // exceptions), then don't bother. + auto &TLI = *MF->getSubtarget().getTargetLowering(); + const Constant *PersonalityFn = MF->getFunction()->getPersonalityFn(); + if (TLI.getExceptionPointerRegister(PersonalityFn) == 0 && + TLI.getExceptionSelectorRegister(PersonalityFn) == 0) + return true; + + // If landingpad's return type is token type, we don't create DAG nodes + // for its exception pointer and selector value. The extraction of exception + // pointer or selector value from token type landingpads is not currently + // supported. + if (LP.getType()->isTokenTy()) + return true; + + // Add a label to mark the beginning of the landing pad. Deletion of the + // landing pad can thus be detected via the MachineModuleInfo. + MIRBuilder.buildInstr(TargetOpcode::EH_LABEL) + .addSym(MF->addLandingPad(&MBB)); + + // Mark exception register as live in. + SmallVector<unsigned, 2> Regs; + SmallVector<uint64_t, 2> Offsets; + LLT p0 = LLT::pointer(0, DL->getPointerSizeInBits()); + if (unsigned Reg = TLI.getExceptionPointerRegister(PersonalityFn)) { + unsigned VReg = MRI->createGenericVirtualRegister(p0); + MIRBuilder.buildCopy(VReg, Reg); + Regs.push_back(VReg); + Offsets.push_back(0); + } + + if (unsigned Reg = TLI.getExceptionSelectorRegister(PersonalityFn)) { + unsigned VReg = MRI->createGenericVirtualRegister(p0); + MIRBuilder.buildCopy(VReg, Reg); + Regs.push_back(VReg); + Offsets.push_back(p0.getSizeInBits()); + } + + MIRBuilder.buildSequence(getOrCreateVReg(LP), Regs, Offsets); + return true; +} + +bool IRTranslator::translateStaticAlloca(const AllocaInst &AI, + MachineIRBuilder &MIRBuilder) { + if (!TPC->isGlobalISelAbortEnabled() && !AI.isStaticAlloca()) + return false; + + assert(AI.isStaticAlloca() && "only handle static allocas now"); + unsigned Res = getOrCreateVReg(AI); + int FI = getOrCreateFrameIndex(AI); + MIRBuilder.buildFrameIndex(Res, FI); + return true; +} + +bool IRTranslator::translatePHI(const User &U, MachineIRBuilder &MIRBuilder) { + const PHINode &PI = cast<PHINode>(U); + auto MIB = MIRBuilder.buildInstr(TargetOpcode::PHI); + MIB.addDef(getOrCreateVReg(PI)); + + PendingPHIs.emplace_back(&PI, MIB.getInstr()); + return true; +} + +void IRTranslator::finishPendingPhis() { + for (std::pair<const PHINode *, MachineInstr *> &Phi : PendingPHIs) { + const PHINode *PI = Phi.first; + MachineInstrBuilder MIB(*MF, Phi.second); + + // All MachineBasicBlocks exist, add them to the PHI. We assume IRTranslator + // won't create extra control flow here, otherwise we need to find the + // dominating predecessor here (or perhaps force the weirder IRTranslators + // to provide a simple boundary). + for (unsigned i = 0; i < PI->getNumIncomingValues(); ++i) { + assert(BBToMBB[PI->getIncomingBlock(i)]->isSuccessor(MIB->getParent()) && + "I appear to have misunderstood Machine PHIs"); + MIB.addUse(getOrCreateVReg(*PI->getIncomingValue(i))); + MIB.addMBB(BBToMBB[PI->getIncomingBlock(i)]); + } + } +} + bool IRTranslator::translate(const Instruction &Inst) { - MIRBuilder.setDebugLoc(Inst.getDebugLoc()); + CurBuilder.setDebugLoc(Inst.getDebugLoc()); switch(Inst.getOpcode()) { - case Instruction::Add: - return translateBinaryOp(TargetOpcode::G_ADD, Inst); - case Instruction::Or: - return translateBinaryOp(TargetOpcode::G_OR, Inst); - case Instruction::Br: - return translateBr(Inst); - case Instruction::Ret: - return translateReturn(Inst); - +#define HANDLE_INST(NUM, OPCODE, CLASS) \ + case Instruction::OPCODE: return translate##OPCODE(Inst, CurBuilder); +#include "llvm/IR/Instruction.def" default: - llvm_unreachable("Opcode not supported"); + if (!TPC->isGlobalISelAbortEnabled()) + return false; + llvm_unreachable("unknown opcode"); } } +bool IRTranslator::translate(const Constant &C, unsigned Reg) { + if (auto CI = dyn_cast<ConstantInt>(&C)) + EntryBuilder.buildConstant(Reg, *CI); + else if (auto CF = dyn_cast<ConstantFP>(&C)) + EntryBuilder.buildFConstant(Reg, *CF); + else if (isa<UndefValue>(C)) + EntryBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF).addDef(Reg); + else if (isa<ConstantPointerNull>(C)) + EntryBuilder.buildConstant(Reg, 0); + else if (auto GV = dyn_cast<GlobalValue>(&C)) + EntryBuilder.buildGlobalValue(Reg, GV); + else if (auto CE = dyn_cast<ConstantExpr>(&C)) { + switch(CE->getOpcode()) { +#define HANDLE_INST(NUM, OPCODE, CLASS) \ + case Instruction::OPCODE: return translate##OPCODE(*CE, EntryBuilder); +#include "llvm/IR/Instruction.def" + default: + if (!TPC->isGlobalISelAbortEnabled()) + return false; + llvm_unreachable("unknown opcode"); + } + } else if (!TPC->isGlobalISelAbortEnabled()) + return false; + else + llvm_unreachable("unhandled constant kind"); + + return true; +} -void IRTranslator::finalize() { +void IRTranslator::finalizeFunction() { // Release the memory used by the different maps we // needed during the translation. + PendingPHIs.clear(); ValToVReg.clear(); + FrameIndices.clear(); Constants.clear(); } -bool IRTranslator::runOnMachineFunction(MachineFunction &MF) { - const Function &F = *MF.getFunction(); +bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { + MF = &CurMF; + const Function &F = *MF->getFunction(); if (F.empty()) return false; - CLI = MF.getSubtarget().getCallLowering(); - MIRBuilder.setMF(MF); - MRI = &MF.getRegInfo(); - // Setup the arguments. - MachineBasicBlock &MBB = getOrCreateBB(F.front()); - MIRBuilder.setMBB(MBB); + CLI = MF->getSubtarget().getCallLowering(); + CurBuilder.setMF(*MF); + EntryBuilder.setMF(*MF); + MRI = &MF->getRegInfo(); + DL = &F.getParent()->getDataLayout(); + TPC = &getAnalysis<TargetPassConfig>(); + + assert(PendingPHIs.empty() && "stale PHIs"); + + // Setup a separate basic-block for the arguments and constants, falling + // through to the IR-level Function's entry block. + MachineBasicBlock *EntryBB = MF->CreateMachineBasicBlock(); + MF->push_back(EntryBB); + EntryBB->addSuccessor(&getOrCreateBB(F.front())); + EntryBuilder.setMBB(*EntryBB); + + // Lower the actual args into this basic block. SmallVector<unsigned, 8> VRegArgs; for (const Argument &Arg: F.args()) VRegArgs.push_back(getOrCreateVReg(Arg)); - bool Succeeded = - CLI->lowerFormalArguments(MIRBuilder, F.getArgumentList(), VRegArgs); - if (!Succeeded) + bool Succeeded = CLI->lowerFormalArguments(EntryBuilder, F, VRegArgs); + if (!Succeeded) { + if (!TPC->isGlobalISelAbortEnabled()) { + MF->getProperties().set( + MachineFunctionProperties::Property::FailedISel); + finalizeFunction(); + return false; + } report_fatal_error("Unable to lower arguments"); + } + // And translate the function! for (const BasicBlock &BB: F) { MachineBasicBlock &MBB = getOrCreateBB(BB); // Set the insertion point of all the following translations to // the end of this basic block. - MIRBuilder.setMBB(MBB); + CurBuilder.setMBB(MBB); + for (const Instruction &Inst: BB) { - bool Succeeded = translate(Inst); + Succeeded &= translate(Inst); if (!Succeeded) { - DEBUG(dbgs() << "Cannot translate: " << Inst << '\n'); - report_fatal_error("Unable to translate instruction"); + if (TPC->isGlobalISelAbortEnabled()) + reportTranslationError(Inst, "unable to translate instruction"); + MF->getProperties().set( + MachineFunctionProperties::Property::FailedISel); + break; } } } - // Now that the MachineFrameInfo has been configured, no further changes to - // the reserved registers are possible. - MRI->freezeReservedRegs(MF); + if (Succeeded) { + finishPendingPhis(); + + // Now that the MachineFrameInfo has been configured, no further changes to + // the reserved registers are possible. + MRI->freezeReservedRegs(*MF); + + // Merge the argument lowering and constants block with its single + // successor, the LLVM-IR entry block. We want the basic block to + // be maximal. + assert(EntryBB->succ_size() == 1 && + "Custom BB used for lowering should have only one successor"); + // Get the successor of the current entry block. + MachineBasicBlock &NewEntryBB = **EntryBB->succ_begin(); + assert(NewEntryBB.pred_size() == 1 && + "LLVM-IR entry block has a predecessor!?"); + // Move all the instruction from the current entry block to the + // new entry block. + NewEntryBB.splice(NewEntryBB.begin(), EntryBB, EntryBB->begin(), + EntryBB->end()); + + // Update the live-in information for the new entry block. + for (const MachineBasicBlock::RegisterMaskPair &LiveIn : EntryBB->liveins()) + NewEntryBB.addLiveIn(LiveIn); + NewEntryBB.sortUniqueLiveIns(); + + // Get rid of the now empty basic block. + EntryBB->removeSuccessor(&NewEntryBB); + MF->remove(EntryBB); + + assert(&MF->front() == &NewEntryBB && + "New entry wasn't next in the list of basic block!"); + } + + finalizeFunction(); return false; } diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp new file mode 100644 index 0000000..1d205cd --- /dev/null +++ b/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp @@ -0,0 +1,175 @@ +//===- llvm/CodeGen/GlobalISel/InstructionSelect.cpp - InstructionSelect ---==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements the InstructionSelect class. +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GlobalISel/InstructionSelect.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/Twine.h" +#include "llvm/CodeGen/GlobalISel/InstructionSelector.h" +#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +#define DEBUG_TYPE "instruction-select" + +using namespace llvm; + +char InstructionSelect::ID = 0; +INITIALIZE_PASS_BEGIN(InstructionSelect, DEBUG_TYPE, + "Select target instructions out of generic instructions", + false, false) +INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) +INITIALIZE_PASS_END(InstructionSelect, DEBUG_TYPE, + "Select target instructions out of generic instructions", + false, false) + +InstructionSelect::InstructionSelect() : MachineFunctionPass(ID) { + initializeInstructionSelectPass(*PassRegistry::getPassRegistry()); +} + +void InstructionSelect::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<TargetPassConfig>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +static void reportSelectionError(const MachineInstr *MI, const Twine &Message) { + const MachineFunction &MF = *MI->getParent()->getParent(); + std::string ErrStorage; + raw_string_ostream Err(ErrStorage); + Err << Message << ":\nIn function: " << MF.getName() << '\n'; + if (MI) + Err << *MI << '\n'; + report_fatal_error(Err.str()); +} + +bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { + // If the ISel pipeline failed, do not bother running that pass. + if (MF.getProperties().hasProperty( + MachineFunctionProperties::Property::FailedISel)) + return false; + + DEBUG(dbgs() << "Selecting function: " << MF.getName() << '\n'); + + const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>(); + const InstructionSelector *ISel = MF.getSubtarget().getInstructionSelector(); + assert(ISel && "Cannot work without InstructionSelector"); + + // FIXME: freezeReservedRegs is now done in IRTranslator, but there are many + // other MF/MFI fields we need to initialize. + + const MachineRegisterInfo &MRI = MF.getRegInfo(); + +#ifndef NDEBUG + // Check that our input is fully legal: we require the function to have the + // Legalized property, so it should be. + // FIXME: This should be in the MachineVerifier, but it can't use the + // LegalizerInfo as it's currently in the separate GlobalISel library. + // The RegBankSelected property is already checked in the verifier. Note + // that it has the same layering problem, but we only use inline methods so + // end up not needing to link against the GlobalISel library. + if (const LegalizerInfo *MLI = MF.getSubtarget().getLegalizerInfo()) + for (const MachineBasicBlock &MBB : MF) + for (const MachineInstr &MI : MBB) + if (isPreISelGenericOpcode(MI.getOpcode()) && !MLI->isLegal(MI, MRI)) + reportSelectionError(&MI, "Instruction is not legal"); + +#endif + // FIXME: We could introduce new blocks and will need to fix the outer loop. + // Until then, keep track of the number of blocks to assert that we don't. + const size_t NumBlocks = MF.size(); + + bool Failed = false; + for (MachineBasicBlock *MBB : post_order(&MF)) { + if (MBB->empty()) + continue; + + // Select instructions in reverse block order. We permit erasing so have + // to resort to manually iterating and recognizing the begin (rend) case. + bool ReachedBegin = false; + for (auto MII = std::prev(MBB->end()), Begin = MBB->begin(); + !ReachedBegin;) { +#ifndef NDEBUG + // Keep track of the insertion range for debug printing. + const auto AfterIt = std::next(MII); +#endif + // Select this instruction. + MachineInstr &MI = *MII; + + // And have our iterator point to the next instruction, if there is one. + if (MII == Begin) + ReachedBegin = true; + else + --MII; + + DEBUG(dbgs() << "Selecting: \n " << MI); + + if (!ISel->select(MI)) { + if (TPC.isGlobalISelAbortEnabled()) + // FIXME: It would be nice to dump all inserted instructions. It's + // not + // obvious how, esp. considering select() can insert after MI. + reportSelectionError(&MI, "Cannot select"); + Failed = true; + break; + } + + // Dump the range of instructions that MI expanded into. + DEBUG({ + auto InsertedBegin = ReachedBegin ? MBB->begin() : std::next(MII); + dbgs() << "Into:\n"; + for (auto &InsertedMI : make_range(InsertedBegin, AfterIt)) + dbgs() << " " << InsertedMI; + dbgs() << '\n'; + }); + } + } + + // Now that selection is complete, there are no more generic vregs. Verify + // that the size of the now-constrained vreg is unchanged and that it has a + // register class. + for (auto &VRegToType : MRI.getVRegToType()) { + unsigned VReg = VRegToType.first; + auto *RC = MRI.getRegClassOrNull(VReg); + auto *MI = MRI.def_instr_begin(VReg) == MRI.def_instr_end() + ? nullptr + : &*MRI.def_instr_begin(VReg); + if (!RC) { + if (TPC.isGlobalISelAbortEnabled()) + reportSelectionError(MI, "VReg as no regclass after selection"); + Failed = true; + break; + } + + if (VRegToType.second.isValid() && + VRegToType.second.getSizeInBits() > (RC->getSize() * 8)) { + if (TPC.isGlobalISelAbortEnabled()) + reportSelectionError( + MI, "VReg has explicit size different from class size"); + Failed = true; + break; + } + } + + MRI.getVRegToType().clear(); + + if (!TPC.isGlobalISelAbortEnabled() && (Failed || MF.size() != NumBlocks)) { + MF.getProperties().set(MachineFunctionProperties::Property::FailedISel); + return false; + } + assert(MF.size() == NumBlocks && "Inserting blocks is not supported yet"); + + // FIXME: Should we accurately track changes? + return true; +} diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp new file mode 100644 index 0000000..5c34da0 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp @@ -0,0 +1,60 @@ +//===- llvm/CodeGen/GlobalISel/InstructionSelector.cpp -----------*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements the InstructionSelector class. +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GlobalISel/InstructionSelector.h" +#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" + +#define DEBUG_TYPE "instructionselector" + +using namespace llvm; + +InstructionSelector::InstructionSelector() {} + +bool InstructionSelector::constrainSelectedInstRegOperands( + MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, + const RegisterBankInfo &RBI) const { + MachineBasicBlock &MBB = *I.getParent(); + MachineFunction &MF = *MBB.getParent(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + + for (unsigned OpI = 0, OpE = I.getNumExplicitOperands(); OpI != OpE; ++OpI) { + MachineOperand &MO = I.getOperand(OpI); + + // There's nothing to be done on non-register operands. + if (!MO.isReg()) + continue; + + DEBUG(dbgs() << "Converting operand: " << MO << '\n'); + assert(MO.isReg() && "Unsupported non-reg operand"); + + unsigned Reg = MO.getReg(); + // Physical registers don't need to be constrained. + if (TRI.isPhysicalRegister(Reg)) + continue; + + // Register operands with a value of 0 (e.g. predicate operands) don't need + // to be constrained. + if (Reg == 0) + continue; + + // If the operand is a vreg, we should constrain its regclass, and only + // insert COPYs if that's impossible. + // constrainOperandRegClass does that for us. + MO.setReg(constrainOperandRegClass(MF, TRI, MRI, TII, RBI, I, I.getDesc(), + Reg, OpI)); + } + return true; +} diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp new file mode 100644 index 0000000..e863568 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp @@ -0,0 +1,180 @@ +//===-- llvm/CodeGen/GlobalISel/Legalizer.cpp -----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file This file implements the LegalizerHelper class to legalize individual +/// instructions and the LegalizePass wrapper pass for the primary +/// legalization. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GlobalISel/Legalizer.h" +#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" +#include "llvm/CodeGen/GlobalISel/Legalizer.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +#define DEBUG_TYPE "legalizer" + +using namespace llvm; + +char Legalizer::ID = 0; +INITIALIZE_PASS_BEGIN(Legalizer, DEBUG_TYPE, + "Legalize the Machine IR a function's Machine IR", false, + false) +INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) +INITIALIZE_PASS_END(Legalizer, DEBUG_TYPE, + "Legalize the Machine IR a function's Machine IR", false, + false) + +Legalizer::Legalizer() : MachineFunctionPass(ID) { + initializeLegalizerPass(*PassRegistry::getPassRegistry()); +} + +void Legalizer::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<TargetPassConfig>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +void Legalizer::init(MachineFunction &MF) { +} + +bool Legalizer::combineExtracts(MachineInstr &MI, MachineRegisterInfo &MRI, + const TargetInstrInfo &TII) { + bool Changed = false; + if (MI.getOpcode() != TargetOpcode::G_EXTRACT) + return Changed; + + unsigned NumDefs = (MI.getNumOperands() - 1) / 2; + unsigned SrcReg = MI.getOperand(NumDefs).getReg(); + MachineInstr &SeqI = *MRI.def_instr_begin(SrcReg); + if (SeqI.getOpcode() != TargetOpcode::G_SEQUENCE) + return Changed; + + unsigned NumSeqSrcs = (SeqI.getNumOperands() - 1) / 2; + bool AllDefsReplaced = true; + + // Try to match each register extracted with a corresponding insertion formed + // by the G_SEQUENCE. + for (unsigned Idx = 0, SeqIdx = 0; Idx < NumDefs; ++Idx) { + MachineOperand &ExtractMO = MI.getOperand(Idx); + assert(ExtractMO.isReg() && ExtractMO.isDef() && + "unexpected extract operand"); + + unsigned ExtractReg = ExtractMO.getReg(); + unsigned ExtractPos = MI.getOperand(NumDefs + Idx + 1).getImm(); + + while (SeqIdx < NumSeqSrcs && + SeqI.getOperand(2 * SeqIdx + 2).getImm() < ExtractPos) + ++SeqIdx; + + if (SeqIdx == NumSeqSrcs) { + AllDefsReplaced = false; + continue; + } + + unsigned OrigReg = SeqI.getOperand(2 * SeqIdx + 1).getReg(); + if (SeqI.getOperand(2 * SeqIdx + 2).getImm() != ExtractPos || + MRI.getType(OrigReg) != MRI.getType(ExtractReg)) { + AllDefsReplaced = false; + continue; + } + + assert(!TargetRegisterInfo::isPhysicalRegister(OrigReg) && + "unexpected physical register in G_SEQUENCE"); + + // Finally we can replace the uses. + for (auto &Use : MRI.use_operands(ExtractReg)) { + Changed = true; + Use.setReg(OrigReg); + } + } + + if (AllDefsReplaced) { + // If SeqI was the next instruction in the BB and we removed it, we'd break + // the outer iteration. + assert(std::next(MachineBasicBlock::iterator(MI)) != SeqI && + "G_SEQUENCE does not dominate G_EXTRACT"); + + MI.eraseFromParent(); + + if (MRI.use_empty(SrcReg)) + SeqI.eraseFromParent(); + Changed = true; + } + + return Changed; +} + +bool Legalizer::runOnMachineFunction(MachineFunction &MF) { + // If the ISel pipeline failed, do not bother running that pass. + if (MF.getProperties().hasProperty( + MachineFunctionProperties::Property::FailedISel)) + return false; + DEBUG(dbgs() << "Legalize Machine IR for: " << MF.getName() << '\n'); + init(MF); + const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>(); + const LegalizerInfo &LegalizerInfo = *MF.getSubtarget().getLegalizerInfo(); + LegalizerHelper Helper(MF); + + // FIXME: an instruction may need more than one pass before it is legal. For + // example on most architectures <3 x i3> is doubly-illegal. It would + // typically proceed along a path like: <3 x i3> -> <3 x i8> -> <8 x i8>. We + // probably want a worklist of instructions rather than naive iterate until + // convergence for performance reasons. + bool Changed = false; + MachineBasicBlock::iterator NextMI; + for (auto &MBB : MF) + for (auto MI = MBB.begin(); MI != MBB.end(); MI = NextMI) { + // Get the next Instruction before we try to legalize, because there's a + // good chance MI will be deleted. + NextMI = std::next(MI); + + // Only legalize pre-isel generic instructions: others don't have types + // and are assumed to be legal. + if (!isPreISelGenericOpcode(MI->getOpcode())) + continue; + + auto Res = Helper.legalizeInstr(*MI, LegalizerInfo); + + // Error out if we couldn't legalize this instruction. We may want to fall + // back to DAG ISel instead in the future. + if (Res == LegalizerHelper::UnableToLegalize) { + if (!TPC.isGlobalISelAbortEnabled()) { + MF.getProperties().set( + MachineFunctionProperties::Property::FailedISel); + return false; + } + std::string Msg; + raw_string_ostream OS(Msg); + OS << "unable to legalize instruction: "; + MI->print(OS); + report_fatal_error(OS.str()); + } + + Changed |= Res == LegalizerHelper::Legalized; + } + + + MachineRegisterInfo &MRI = MF.getRegInfo(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + for (auto &MBB : MF) { + for (auto MI = MBB.begin(); MI != MBB.end(); MI = NextMI) { + // Get the next Instruction before we try to legalize, because there's a + // good chance MI will be deleted. + NextMI = std::next(MI); + + Changed |= combineExtracts(*MI, MRI, TII); + } + } + + return Changed; +} diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp new file mode 100644 index 0000000..eb25b6c --- /dev/null +++ b/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -0,0 +1,354 @@ +//===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file This file implements the LegalizerHelper class to legalize +/// individual instructions and the LegalizeMachineIR wrapper pass for the +/// primary legalization. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" +#include "llvm/CodeGen/GlobalISel/CallLowering.h" +#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +#include <sstream> + +#define DEBUG_TYPE "legalize-mir" + +using namespace llvm; + +LegalizerHelper::LegalizerHelper(MachineFunction &MF) + : MRI(MF.getRegInfo()) { + MIRBuilder.setMF(MF); +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::legalizeInstrStep(MachineInstr &MI, + const LegalizerInfo &LegalizerInfo) { + auto Action = LegalizerInfo.getAction(MI, MRI); + switch (std::get<0>(Action)) { + case LegalizerInfo::Legal: + return AlreadyLegal; + case LegalizerInfo::Libcall: + return libcall(MI); + case LegalizerInfo::NarrowScalar: + return narrowScalar(MI, std::get<1>(Action), std::get<2>(Action)); + case LegalizerInfo::WidenScalar: + return widenScalar(MI, std::get<1>(Action), std::get<2>(Action)); + case LegalizerInfo::Lower: + return lower(MI, std::get<1>(Action), std::get<2>(Action)); + case LegalizerInfo::FewerElements: + return fewerElementsVector(MI, std::get<1>(Action), std::get<2>(Action)); + default: + return UnableToLegalize; + } +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::legalizeInstr(MachineInstr &MI, + const LegalizerInfo &LegalizerInfo) { + SmallVector<MachineInstr *, 4> WorkList; + MIRBuilder.recordInsertions( + [&](MachineInstr *MI) { WorkList.push_back(MI); }); + WorkList.push_back(&MI); + + bool Changed = false; + LegalizeResult Res; + unsigned Idx = 0; + do { + Res = legalizeInstrStep(*WorkList[Idx], LegalizerInfo); + if (Res == UnableToLegalize) { + MIRBuilder.stopRecordingInsertions(); + return UnableToLegalize; + } + Changed |= Res == Legalized; + ++Idx; + } while (Idx < WorkList.size()); + + MIRBuilder.stopRecordingInsertions(); + + return Changed ? Legalized : AlreadyLegal; +} + +void LegalizerHelper::extractParts(unsigned Reg, LLT Ty, int NumParts, + SmallVectorImpl<unsigned> &VRegs) { + unsigned Size = Ty.getSizeInBits(); + SmallVector<uint64_t, 4> Indexes; + for (int i = 0; i < NumParts; ++i) { + VRegs.push_back(MRI.createGenericVirtualRegister(Ty)); + Indexes.push_back(i * Size); + } + MIRBuilder.buildExtract(VRegs, Indexes, Reg); +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::libcall(MachineInstr &MI) { + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); + unsigned Size = Ty.getSizeInBits(); + MIRBuilder.setInstr(MI); + + switch (MI.getOpcode()) { + default: + return UnableToLegalize; + case TargetOpcode::G_FREM: { + auto &Ctx = MIRBuilder.getMF().getFunction()->getContext(); + Type *Ty = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx); + auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering(); + auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering(); + const char *Name = + TLI.getLibcallName(Size == 64 ? RTLIB::REM_F64 : RTLIB::REM_F32); + + CLI.lowerCall( + MIRBuilder, MachineOperand::CreateES(Name), + {MI.getOperand(0).getReg(), Ty}, + {{MI.getOperand(1).getReg(), Ty}, {MI.getOperand(2).getReg(), Ty}}); + MI.eraseFromParent(); + return Legalized; + } + } +} + +LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, + unsigned TypeIdx, + LLT NarrowTy) { + // FIXME: Don't know how to handle secondary types yet. + if (TypeIdx != 0) + return UnableToLegalize; + switch (MI.getOpcode()) { + default: + return UnableToLegalize; + case TargetOpcode::G_ADD: { + // Expand in terms of carry-setting/consuming G_ADDE instructions. + unsigned NarrowSize = NarrowTy.getSizeInBits(); + int NumParts = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() / + NarrowTy.getSizeInBits(); + + MIRBuilder.setInstr(MI); + + SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs; + SmallVector<uint64_t, 2> Indexes; + extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs); + extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs); + + unsigned CarryIn = MRI.createGenericVirtualRegister(LLT::scalar(1)); + MIRBuilder.buildConstant(CarryIn, 0); + + for (int i = 0; i < NumParts; ++i) { + unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); + unsigned CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1)); + + MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i], + Src2Regs[i], CarryIn); + + DstRegs.push_back(DstReg); + Indexes.push_back(i * NarrowSize); + CarryIn = CarryOut; + } + unsigned DstReg = MI.getOperand(0).getReg(); + MIRBuilder.buildSequence(DstReg, DstRegs, Indexes); + MI.eraseFromParent(); + return Legalized; + } + } +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { + MIRBuilder.setInstr(MI); + + switch (MI.getOpcode()) { + default: + return UnableToLegalize; + case TargetOpcode::G_ADD: + case TargetOpcode::G_AND: + case TargetOpcode::G_MUL: + case TargetOpcode::G_OR: + case TargetOpcode::G_XOR: + case TargetOpcode::G_SUB: { + // Perform operation at larger width (any extension is fine here, high bits + // don't affect the result) and then truncate the result back to the + // original type. + unsigned Src1Ext = MRI.createGenericVirtualRegister(WideTy); + unsigned Src2Ext = MRI.createGenericVirtualRegister(WideTy); + MIRBuilder.buildAnyExt(Src1Ext, MI.getOperand(1).getReg()); + MIRBuilder.buildAnyExt(Src2Ext, MI.getOperand(2).getReg()); + + unsigned DstExt = MRI.createGenericVirtualRegister(WideTy); + MIRBuilder.buildInstr(MI.getOpcode()) + .addDef(DstExt) + .addUse(Src1Ext) + .addUse(Src2Ext); + + MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), DstExt); + MI.eraseFromParent(); + return Legalized; + } + case TargetOpcode::G_SDIV: + case TargetOpcode::G_UDIV: { + unsigned ExtOp = MI.getOpcode() == TargetOpcode::G_SDIV + ? TargetOpcode::G_SEXT + : TargetOpcode::G_ZEXT; + + unsigned LHSExt = MRI.createGenericVirtualRegister(WideTy); + MIRBuilder.buildInstr(ExtOp).addDef(LHSExt).addUse( + MI.getOperand(1).getReg()); + + unsigned RHSExt = MRI.createGenericVirtualRegister(WideTy); + MIRBuilder.buildInstr(ExtOp).addDef(RHSExt).addUse( + MI.getOperand(2).getReg()); + + unsigned ResExt = MRI.createGenericVirtualRegister(WideTy); + MIRBuilder.buildInstr(MI.getOpcode()) + .addDef(ResExt) + .addUse(LHSExt) + .addUse(RHSExt); + + MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), ResExt); + MI.eraseFromParent(); + return Legalized; + } + case TargetOpcode::G_LOAD: { + assert(alignTo(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(), 8) == + WideTy.getSizeInBits() && + "illegal to increase number of bytes loaded"); + + unsigned DstExt = MRI.createGenericVirtualRegister(WideTy); + MIRBuilder.buildLoad(DstExt, MI.getOperand(1).getReg(), + **MI.memoperands_begin()); + MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), DstExt); + MI.eraseFromParent(); + return Legalized; + } + case TargetOpcode::G_STORE: { + assert(alignTo(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(), 8) == + WideTy.getSizeInBits() && + "illegal to increase number of bytes modified by a store"); + + unsigned SrcExt = MRI.createGenericVirtualRegister(WideTy); + MIRBuilder.buildAnyExt(SrcExt, MI.getOperand(0).getReg()); + MIRBuilder.buildStore(SrcExt, MI.getOperand(1).getReg(), + **MI.memoperands_begin()); + MI.eraseFromParent(); + return Legalized; + } + case TargetOpcode::G_CONSTANT: { + unsigned DstExt = MRI.createGenericVirtualRegister(WideTy); + MIRBuilder.buildConstant(DstExt, *MI.getOperand(1).getCImm()); + MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), DstExt); + MI.eraseFromParent(); + return Legalized; + } + case TargetOpcode::G_FCONSTANT: { + unsigned DstExt = MRI.createGenericVirtualRegister(WideTy); + MIRBuilder.buildFConstant(DstExt, *MI.getOperand(1).getFPImm()); + MIRBuilder.buildFPTrunc(MI.getOperand(0).getReg(), DstExt); + MI.eraseFromParent(); + return Legalized; + } + case TargetOpcode::G_BRCOND: { + unsigned TstExt = MRI.createGenericVirtualRegister(WideTy); + MIRBuilder.buildAnyExt(TstExt, MI.getOperand(0).getReg()); + MIRBuilder.buildBrCond(TstExt, *MI.getOperand(1).getMBB()); + MI.eraseFromParent(); + return Legalized; + } + case TargetOpcode::G_ICMP: { + assert(TypeIdx == 1 && "unable to legalize predicate"); + bool IsSigned = CmpInst::isSigned( + static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate())); + unsigned Op0Ext = MRI.createGenericVirtualRegister(WideTy); + unsigned Op1Ext = MRI.createGenericVirtualRegister(WideTy); + if (IsSigned) { + MIRBuilder.buildSExt(Op0Ext, MI.getOperand(2).getReg()); + MIRBuilder.buildSExt(Op1Ext, MI.getOperand(3).getReg()); + } else { + MIRBuilder.buildZExt(Op0Ext, MI.getOperand(2).getReg()); + MIRBuilder.buildZExt(Op1Ext, MI.getOperand(3).getReg()); + } + MIRBuilder.buildICmp( + static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()), + MI.getOperand(0).getReg(), Op0Ext, Op1Ext); + MI.eraseFromParent(); + return Legalized; + } + case TargetOpcode::G_GEP: { + assert(TypeIdx == 1 && "unable to legalize pointer of GEP"); + unsigned OffsetExt = MRI.createGenericVirtualRegister(WideTy); + MIRBuilder.buildSExt(OffsetExt, MI.getOperand(2).getReg()); + MI.getOperand(2).setReg(OffsetExt); + return Legalized; + } + } +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { + using namespace TargetOpcode; + MIRBuilder.setInstr(MI); + + switch(MI.getOpcode()) { + default: + return UnableToLegalize; + case TargetOpcode::G_SREM: + case TargetOpcode::G_UREM: { + unsigned QuotReg = MRI.createGenericVirtualRegister(Ty); + MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV) + .addDef(QuotReg) + .addUse(MI.getOperand(1).getReg()) + .addUse(MI.getOperand(2).getReg()); + + unsigned ProdReg = MRI.createGenericVirtualRegister(Ty); + MIRBuilder.buildMul(ProdReg, QuotReg, MI.getOperand(2).getReg()); + MIRBuilder.buildSub(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(), + ProdReg); + MI.eraseFromParent(); + return Legalized; + } + } +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, + LLT NarrowTy) { + // FIXME: Don't know how to handle secondary types yet. + if (TypeIdx != 0) + return UnableToLegalize; + switch (MI.getOpcode()) { + default: + return UnableToLegalize; + case TargetOpcode::G_ADD: { + unsigned NarrowSize = NarrowTy.getSizeInBits(); + unsigned DstReg = MI.getOperand(0).getReg(); + int NumParts = MRI.getType(DstReg).getSizeInBits() / NarrowSize; + + MIRBuilder.setInstr(MI); + + SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs; + SmallVector<uint64_t, 2> Indexes; + extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs); + extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs); + + for (int i = 0; i < NumParts; ++i) { + unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); + MIRBuilder.buildAdd(DstReg, Src1Regs[i], Src2Regs[i]); + DstRegs.push_back(DstReg); + Indexes.push_back(i * NarrowSize); + } + + MIRBuilder.buildSequence(DstReg, DstRegs, Indexes); + MI.eraseFromParent(); + return Legalized; + } + } +} diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp new file mode 100644 index 0000000..e496620 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp @@ -0,0 +1,182 @@ +//===---- lib/CodeGen/GlobalISel/LegalizerInfo.cpp - Legalizer -------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implement an interface to specify and query how an illegal operation on a +// given type should be expanded. +// +// Issues to be resolved: +// + Make it fast. +// + Support weird types like i3, <7 x i3>, ... +// + Operations with more than one type (ICMP, CMPXCHG, intrinsics, ...) +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" + +#include "llvm/ADT/SmallBitVector.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/Type.h" +#include "llvm/Target/TargetOpcodes.h" +using namespace llvm; + +LegalizerInfo::LegalizerInfo() : TablesInitialized(false) { + // FIXME: these two can be legalized to the fundamental load/store Jakob + // proposed. Once loads & stores are supported. + DefaultActions[TargetOpcode::G_ANYEXT] = Legal; + DefaultActions[TargetOpcode::G_TRUNC] = Legal; + + DefaultActions[TargetOpcode::G_INTRINSIC] = Legal; + DefaultActions[TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS] = Legal; + + DefaultActions[TargetOpcode::G_ADD] = NarrowScalar; + DefaultActions[TargetOpcode::G_LOAD] = NarrowScalar; + DefaultActions[TargetOpcode::G_STORE] = NarrowScalar; + + DefaultActions[TargetOpcode::G_BRCOND] = WidenScalar; +} + +void LegalizerInfo::computeTables() { + for (unsigned Opcode = 0; Opcode <= LastOp - FirstOp; ++Opcode) { + for (unsigned Idx = 0; Idx != Actions[Opcode].size(); ++Idx) { + for (auto &Action : Actions[Opcode][Idx]) { + LLT Ty = Action.first; + if (!Ty.isVector()) + continue; + + auto &Entry = MaxLegalVectorElts[std::make_pair(Opcode + FirstOp, + Ty.getElementType())]; + Entry = std::max(Entry, Ty.getNumElements()); + } + } + } + + TablesInitialized = true; +} + +// FIXME: inefficient implementation for now. Without ComputeValueVTs we're +// probably going to need specialized lookup structures for various types before +// we have any hope of doing well with something like <13 x i3>. Even the common +// cases should do better than what we have now. +std::pair<LegalizerInfo::LegalizeAction, LLT> +LegalizerInfo::getAction(const InstrAspect &Aspect) const { + assert(TablesInitialized && "backend forgot to call computeTables"); + // These *have* to be implemented for now, they're the fundamental basis of + // how everything else is transformed. + + // Nothing is going to go well with types that aren't a power of 2 yet, so + // don't even try because we might make things worse. + if (!isPowerOf2_64(Aspect.Type.getSizeInBits())) + return std::make_pair(Unsupported, LLT()); + + // FIXME: the long-term plan calls for expansion in terms of load/store (if + // they're not legal). + if (Aspect.Opcode == TargetOpcode::G_SEQUENCE || + Aspect.Opcode == TargetOpcode::G_EXTRACT) + return std::make_pair(Legal, Aspect.Type); + + LegalizeAction Action = findInActions(Aspect); + if (Action != NotFound) + return findLegalAction(Aspect, Action); + + unsigned Opcode = Aspect.Opcode; + LLT Ty = Aspect.Type; + if (!Ty.isVector()) { + auto DefaultAction = DefaultActions.find(Aspect.Opcode); + if (DefaultAction != DefaultActions.end() && DefaultAction->second == Legal) + return std::make_pair(Legal, Ty); + + if (DefaultAction == DefaultActions.end() || + DefaultAction->second != NarrowScalar) + return std::make_pair(Unsupported, LLT()); + return findLegalAction(Aspect, NarrowScalar); + } + + LLT EltTy = Ty.getElementType(); + int NumElts = Ty.getNumElements(); + + auto ScalarAction = ScalarInVectorActions.find(std::make_pair(Opcode, EltTy)); + if (ScalarAction != ScalarInVectorActions.end() && + ScalarAction->second != Legal) + return findLegalAction(Aspect, ScalarAction->second); + + // The element type is legal in principle, but the number of elements is + // wrong. + auto MaxLegalElts = MaxLegalVectorElts.lookup(std::make_pair(Opcode, EltTy)); + if (MaxLegalElts > NumElts) + return findLegalAction(Aspect, MoreElements); + + if (MaxLegalElts == 0) { + // Scalarize if there's no legal vector type, which is just a special case + // of FewerElements. + return std::make_pair(FewerElements, EltTy); + } + + return findLegalAction(Aspect, FewerElements); +} + +std::tuple<LegalizerInfo::LegalizeAction, unsigned, LLT> +LegalizerInfo::getAction(const MachineInstr &MI, + const MachineRegisterInfo &MRI) const { + SmallBitVector SeenTypes(8); + const MCOperandInfo *OpInfo = MI.getDesc().OpInfo; + for (unsigned i = 0; i < MI.getDesc().getNumOperands(); ++i) { + if (!OpInfo[i].isGenericType()) + continue; + + // We don't want to repeatedly check the same operand index, that + // could get expensive. + unsigned TypeIdx = OpInfo[i].getGenericTypeIndex(); + if (SeenTypes[TypeIdx]) + continue; + + SeenTypes.set(TypeIdx); + + LLT Ty = MRI.getType(MI.getOperand(i).getReg()); + auto Action = getAction({MI.getOpcode(), TypeIdx, Ty}); + if (Action.first != Legal) + return std::make_tuple(Action.first, TypeIdx, Action.second); + } + return std::make_tuple(Legal, 0, LLT{}); +} + +bool LegalizerInfo::isLegal(const MachineInstr &MI, + const MachineRegisterInfo &MRI) const { + return std::get<0>(getAction(MI, MRI)) == Legal; +} + +LLT LegalizerInfo::findLegalType(const InstrAspect &Aspect, + LegalizeAction Action) const { + switch(Action) { + default: + llvm_unreachable("Cannot find legal type"); + case Legal: + case Lower: + case Libcall: + return Aspect.Type; + case NarrowScalar: { + return findLegalType(Aspect, + [&](LLT Ty) -> LLT { return Ty.halfScalarSize(); }); + } + case WidenScalar: { + return findLegalType(Aspect, [&](LLT Ty) -> LLT { + return Ty.getSizeInBits() < 8 ? LLT::scalar(8) : Ty.doubleScalarSize(); + }); + } + case FewerElements: { + return findLegalType(Aspect, + [&](LLT Ty) -> LLT { return Ty.halfElements(); }); + } + case MoreElements: { + return findLegalType(Aspect, + [&](LLT Ty) -> LLT { return Ty.doubleElements(); }); + } + } +} diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index 2f19bcf..c04f6e4 100644 --- a/contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -14,6 +14,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetOpcodes.h" #include "llvm/Target/TargetSubtargetInfo.h" @@ -23,82 +24,408 @@ using namespace llvm; void MachineIRBuilder::setMF(MachineFunction &MF) { this->MF = &MF; this->MBB = nullptr; + this->MRI = &MF.getRegInfo(); this->TII = MF.getSubtarget().getInstrInfo(); this->DL = DebugLoc(); - this->MI = nullptr; + this->II = MachineBasicBlock::iterator(); + this->InsertedInstr = nullptr; } -void MachineIRBuilder::setMBB(MachineBasicBlock &MBB, bool Beginning) { +void MachineIRBuilder::setMBB(MachineBasicBlock &MBB) { this->MBB = &MBB; - Before = Beginning; + this->II = MBB.end(); assert(&getMF() == MBB.getParent() && "Basic block is in a different function"); } -void MachineIRBuilder::setInstr(MachineInstr &MI, bool Before) { +void MachineIRBuilder::setInstr(MachineInstr &MI) { assert(MI.getParent() && "Instruction is not part of a basic block"); setMBB(*MI.getParent()); - this->MI = &MI; - this->Before = Before; + this->II = MI.getIterator(); } -MachineBasicBlock::iterator MachineIRBuilder::getInsertPt() { - if (MI) { - if (Before) - return MI; - if (!MI->getNextNode()) - return getMBB().end(); - return MI->getNextNode(); - } - return Before ? getMBB().begin() : getMBB().end(); +void MachineIRBuilder::setInsertPt(MachineBasicBlock &MBB, + MachineBasicBlock::iterator II) { + assert(MBB.getParent() == &getMF() && + "Basic block is in a different function"); + this->MBB = &MBB; + this->II = II; +} + +void MachineIRBuilder::recordInsertions( + std::function<void(MachineInstr *)> Inserted) { + InsertedInstr = Inserted; +} + +void MachineIRBuilder::stopRecordingInsertions() { + InsertedInstr = nullptr; } //------------------------------------------------------------------------------ // Build instruction variants. //------------------------------------------------------------------------------ -MachineInstr *MachineIRBuilder::buildInstr(unsigned Opcode, Type *Ty) { - MachineInstr *NewMI = BuildMI(getMF(), DL, getTII().get(Opcode)); - if (Ty) { - assert(isPreISelGenericOpcode(Opcode) && - "Only generic instruction can have a type"); - NewMI->setType(Ty); - } else - assert(!isPreISelGenericOpcode(Opcode) && - "Generic instruction must have a type"); - getMBB().insert(getInsertPt(), NewMI); - return NewMI; + +MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opcode) { + return insertInstr(buildInstrNoInsert(Opcode)); } -MachineInstr *MachineIRBuilder::buildInstr(unsigned Opcode, unsigned Res, - unsigned Op0, unsigned Op1) { - return buildInstr(Opcode, nullptr, Res, Op0, Op1); +MachineInstrBuilder MachineIRBuilder::buildInstrNoInsert(unsigned Opcode) { + MachineInstrBuilder MIB = BuildMI(getMF(), DL, getTII().get(Opcode)); + return MIB; } -MachineInstr *MachineIRBuilder::buildInstr(unsigned Opcode, Type *Ty, - unsigned Res, unsigned Op0, - unsigned Op1) { - MachineInstr *NewMI = buildInstr(Opcode, Ty); - MachineInstrBuilder(getMF(), NewMI) - .addReg(Res, RegState::Define) - .addReg(Op0) - .addReg(Op1); - return NewMI; + +MachineInstrBuilder MachineIRBuilder::insertInstr(MachineInstrBuilder MIB) { + getMBB().insert(getInsertPt(), MIB); + if (InsertedInstr) + InsertedInstr(MIB); + return MIB; } -MachineInstr *MachineIRBuilder::buildInstr(unsigned Opcode, unsigned Res, - unsigned Op0) { - MachineInstr *NewMI = buildInstr(Opcode, nullptr); - MachineInstrBuilder(getMF(), NewMI).addReg(Res, RegState::Define).addReg(Op0); - return NewMI; +MachineInstrBuilder MachineIRBuilder::buildFrameIndex(unsigned Res, int Idx) { + assert(MRI->getType(Res).isPointer() && "invalid operand type"); + return buildInstr(TargetOpcode::G_FRAME_INDEX) + .addDef(Res) + .addFrameIndex(Idx); } -MachineInstr *MachineIRBuilder::buildInstr(unsigned Opcode) { - return buildInstr(Opcode, nullptr); +MachineInstrBuilder MachineIRBuilder::buildGlobalValue(unsigned Res, + const GlobalValue *GV) { + assert(MRI->getType(Res).isPointer() && "invalid operand type"); + assert(MRI->getType(Res).getAddressSpace() == + GV->getType()->getAddressSpace() && + "address space mismatch"); + + return buildInstr(TargetOpcode::G_GLOBAL_VALUE) + .addDef(Res) + .addGlobalAddress(GV); +} + +MachineInstrBuilder MachineIRBuilder::buildAdd(unsigned Res, unsigned Op0, + unsigned Op1) { + assert((MRI->getType(Res).isScalar() || MRI->getType(Res).isVector()) && + "invalid operand type"); + assert(MRI->getType(Res) == MRI->getType(Op0) && + MRI->getType(Res) == MRI->getType(Op1) && "type mismatch"); + + return buildInstr(TargetOpcode::G_ADD) + .addDef(Res) + .addUse(Op0) + .addUse(Op1); +} + +MachineInstrBuilder MachineIRBuilder::buildGEP(unsigned Res, unsigned Op0, + unsigned Op1) { + assert(MRI->getType(Res).isPointer() && + MRI->getType(Res) == MRI->getType(Op0) && "type mismatch"); + assert(MRI->getType(Op1).isScalar() && "invalid offset type"); + + return buildInstr(TargetOpcode::G_GEP) + .addDef(Res) + .addUse(Op0) + .addUse(Op1); +} + +MachineInstrBuilder MachineIRBuilder::buildSub(unsigned Res, unsigned Op0, + unsigned Op1) { + assert((MRI->getType(Res).isScalar() || MRI->getType(Res).isVector()) && + "invalid operand type"); + assert(MRI->getType(Res) == MRI->getType(Op0) && + MRI->getType(Res) == MRI->getType(Op1) && "type mismatch"); + + return buildInstr(TargetOpcode::G_SUB) + .addDef(Res) + .addUse(Op0) + .addUse(Op1); +} + +MachineInstrBuilder MachineIRBuilder::buildMul(unsigned Res, unsigned Op0, + unsigned Op1) { + assert((MRI->getType(Res).isScalar() || MRI->getType(Res).isVector()) && + "invalid operand type"); + assert(MRI->getType(Res) == MRI->getType(Op0) && + MRI->getType(Res) == MRI->getType(Op1) && "type mismatch"); + + return buildInstr(TargetOpcode::G_MUL) + .addDef(Res) + .addUse(Op0) + .addUse(Op1); } -MachineInstr *MachineIRBuilder::buildInstr(unsigned Opcode, Type *Ty, - MachineBasicBlock &BB) { - MachineInstr *NewMI = buildInstr(Opcode, Ty); - MachineInstrBuilder(getMF(), NewMI).addMBB(&BB); - return NewMI; +MachineInstrBuilder MachineIRBuilder::buildBr(MachineBasicBlock &Dest) { + return buildInstr(TargetOpcode::G_BR).addMBB(&Dest); +} + +MachineInstrBuilder MachineIRBuilder::buildCopy(unsigned Res, unsigned Op) { + return buildInstr(TargetOpcode::COPY).addDef(Res).addUse(Op); +} + +MachineInstrBuilder MachineIRBuilder::buildConstant(unsigned Res, + const ConstantInt &Val) { + LLT Ty = MRI->getType(Res); + + assert((Ty.isScalar() || Ty.isPointer()) && "invalid operand type"); + + const ConstantInt *NewVal = &Val; + if (Ty.getSizeInBits() != Val.getBitWidth()) + NewVal = ConstantInt::get(MF->getFunction()->getContext(), + Val.getValue().sextOrTrunc(Ty.getSizeInBits())); + + return buildInstr(TargetOpcode::G_CONSTANT).addDef(Res).addCImm(NewVal); +} + +MachineInstrBuilder MachineIRBuilder::buildConstant(unsigned Res, + int64_t Val) { + auto IntN = IntegerType::get(MF->getFunction()->getContext(), + MRI->getType(Res).getSizeInBits()); + ConstantInt *CI = ConstantInt::get(IntN, Val, true); + return buildConstant(Res, *CI); +} + +MachineInstrBuilder MachineIRBuilder::buildFConstant(unsigned Res, + const ConstantFP &Val) { + assert(MRI->getType(Res).isScalar() && "invalid operand type"); + + return buildInstr(TargetOpcode::G_FCONSTANT).addDef(Res).addFPImm(&Val); +} + +MachineInstrBuilder MachineIRBuilder::buildBrCond(unsigned Tst, + MachineBasicBlock &Dest) { + assert(MRI->getType(Tst).isScalar() && "invalid operand type"); + + return buildInstr(TargetOpcode::G_BRCOND).addUse(Tst).addMBB(&Dest); +} + +MachineInstrBuilder MachineIRBuilder::buildLoad(unsigned Res, unsigned Addr, + MachineMemOperand &MMO) { + assert(MRI->getType(Res).isValid() && "invalid operand type"); + assert(MRI->getType(Addr).isPointer() && "invalid operand type"); + + return buildInstr(TargetOpcode::G_LOAD) + .addDef(Res) + .addUse(Addr) + .addMemOperand(&MMO); +} + +MachineInstrBuilder MachineIRBuilder::buildStore(unsigned Val, unsigned Addr, + MachineMemOperand &MMO) { + assert(MRI->getType(Val).isValid() && "invalid operand type"); + assert(MRI->getType(Addr).isPointer() && "invalid operand type"); + + return buildInstr(TargetOpcode::G_STORE) + .addUse(Val) + .addUse(Addr) + .addMemOperand(&MMO); +} + +MachineInstrBuilder MachineIRBuilder::buildUAdde(unsigned Res, + unsigned CarryOut, + unsigned Op0, unsigned Op1, + unsigned CarryIn) { + assert(MRI->getType(Res).isScalar() && "invalid operand type"); + assert(MRI->getType(Res) == MRI->getType(Op0) && + MRI->getType(Res) == MRI->getType(Op1) && "type mismatch"); + assert(MRI->getType(CarryOut).isScalar() && "invalid operand type"); + assert(MRI->getType(CarryOut) == MRI->getType(CarryIn) && "type mismatch"); + + return buildInstr(TargetOpcode::G_UADDE) + .addDef(Res) + .addDef(CarryOut) + .addUse(Op0) + .addUse(Op1) + .addUse(CarryIn); +} + +MachineInstrBuilder MachineIRBuilder::buildAnyExt(unsigned Res, unsigned Op) { + validateTruncExt(Res, Op, true); + return buildInstr(TargetOpcode::G_ANYEXT).addDef(Res).addUse(Op); +} + +MachineInstrBuilder MachineIRBuilder::buildSExt(unsigned Res, unsigned Op) { + validateTruncExt(Res, Op, true); + return buildInstr(TargetOpcode::G_SEXT).addDef(Res).addUse(Op); +} + +MachineInstrBuilder MachineIRBuilder::buildZExt(unsigned Res, unsigned Op) { + validateTruncExt(Res, Op, true); + return buildInstr(TargetOpcode::G_ZEXT).addDef(Res).addUse(Op); +} + +MachineInstrBuilder MachineIRBuilder::buildSExtOrTrunc(unsigned Res, + unsigned Op) { + unsigned Opcode = TargetOpcode::COPY; + if (MRI->getType(Res).getSizeInBits() > MRI->getType(Op).getSizeInBits()) + Opcode = TargetOpcode::G_SEXT; + else if (MRI->getType(Res).getSizeInBits() < MRI->getType(Op).getSizeInBits()) + Opcode = TargetOpcode::G_TRUNC; + + return buildInstr(Opcode).addDef(Res).addUse(Op); +} + +MachineInstrBuilder MachineIRBuilder::buildExtract(ArrayRef<unsigned> Results, + ArrayRef<uint64_t> Indices, + unsigned Src) { +#ifndef NDEBUG + assert(Results.size() == Indices.size() && "inconsistent number of regs"); + assert(!Results.empty() && "invalid trivial extract"); + assert(std::is_sorted(Indices.begin(), Indices.end()) && + "extract offsets must be in ascending order"); + + assert(MRI->getType(Src).isValid() && "invalid operand type"); + for (auto Res : Results) + assert(MRI->getType(Res).isValid() && "invalid operand type"); +#endif + + auto MIB = BuildMI(getMF(), DL, getTII().get(TargetOpcode::G_EXTRACT)); + for (auto Res : Results) + MIB.addDef(Res); + + MIB.addUse(Src); + + for (auto Idx : Indices) + MIB.addImm(Idx); + + getMBB().insert(getInsertPt(), MIB); + if (InsertedInstr) + InsertedInstr(MIB); + + return MIB; +} + +MachineInstrBuilder +MachineIRBuilder::buildSequence(unsigned Res, + ArrayRef<unsigned> Ops, + ArrayRef<uint64_t> Indices) { +#ifndef NDEBUG + assert(Ops.size() == Indices.size() && "incompatible args"); + assert(!Ops.empty() && "invalid trivial sequence"); + assert(std::is_sorted(Indices.begin(), Indices.end()) && + "sequence offsets must be in ascending order"); + + assert(MRI->getType(Res).isValid() && "invalid operand type"); + for (auto Op : Ops) + assert(MRI->getType(Op).isValid() && "invalid operand type"); +#endif + + MachineInstrBuilder MIB = buildInstr(TargetOpcode::G_SEQUENCE); + MIB.addDef(Res); + for (unsigned i = 0; i < Ops.size(); ++i) { + MIB.addUse(Ops[i]); + MIB.addImm(Indices[i]); + } + return MIB; +} + +MachineInstrBuilder MachineIRBuilder::buildIntrinsic(Intrinsic::ID ID, + unsigned Res, + bool HasSideEffects) { + auto MIB = + buildInstr(HasSideEffects ? TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS + : TargetOpcode::G_INTRINSIC); + if (Res) + MIB.addDef(Res); + MIB.addIntrinsicID(ID); + return MIB; +} + +MachineInstrBuilder MachineIRBuilder::buildTrunc(unsigned Res, unsigned Op) { + validateTruncExt(Res, Op, false); + return buildInstr(TargetOpcode::G_TRUNC).addDef(Res).addUse(Op); +} + +MachineInstrBuilder MachineIRBuilder::buildFPTrunc(unsigned Res, unsigned Op) { + validateTruncExt(Res, Op, false); + return buildInstr(TargetOpcode::G_FPTRUNC).addDef(Res).addUse(Op); +} + +MachineInstrBuilder MachineIRBuilder::buildICmp(CmpInst::Predicate Pred, + unsigned Res, unsigned Op0, + unsigned Op1) { +#ifndef NDEBUG + assert(MRI->getType(Op0) == MRI->getType(Op0) && "type mismatch"); + assert(CmpInst::isIntPredicate(Pred) && "invalid predicate"); + if (MRI->getType(Op0).isScalar() || MRI->getType(Op0).isPointer()) + assert(MRI->getType(Res).isScalar() && "type mismatch"); + else + assert(MRI->getType(Res).isVector() && + MRI->getType(Res).getNumElements() == + MRI->getType(Op0).getNumElements() && + "type mismatch"); +#endif + + return buildInstr(TargetOpcode::G_ICMP) + .addDef(Res) + .addPredicate(Pred) + .addUse(Op0) + .addUse(Op1); +} + +MachineInstrBuilder MachineIRBuilder::buildFCmp(CmpInst::Predicate Pred, + unsigned Res, unsigned Op0, + unsigned Op1) { +#ifndef NDEBUG + assert((MRI->getType(Op0).isScalar() || MRI->getType(Op0).isVector()) && + "invalid operand type"); + assert(MRI->getType(Op0) == MRI->getType(Op1) && "type mismatch"); + assert(CmpInst::isFPPredicate(Pred) && "invalid predicate"); + if (MRI->getType(Op0).isScalar()) + assert(MRI->getType(Res).isScalar() && "type mismatch"); + else + assert(MRI->getType(Res).isVector() && + MRI->getType(Res).getNumElements() == + MRI->getType(Op0).getNumElements() && + "type mismatch"); +#endif + + return buildInstr(TargetOpcode::G_FCMP) + .addDef(Res) + .addPredicate(Pred) + .addUse(Op0) + .addUse(Op1); +} + +MachineInstrBuilder MachineIRBuilder::buildSelect(unsigned Res, unsigned Tst, + unsigned Op0, unsigned Op1) { +#ifndef NDEBUG + LLT ResTy = MRI->getType(Res); + assert((ResTy.isScalar() || ResTy.isVector() || ResTy.isPointer()) && + "invalid operand type"); + assert(ResTy == MRI->getType(Op0) && ResTy == MRI->getType(Op1) && + "type mismatch"); + if (ResTy.isScalar() || ResTy.isPointer()) + assert(MRI->getType(Tst).isScalar() && "type mismatch"); + else + assert(MRI->getType(Tst).isVector() && + MRI->getType(Tst).getNumElements() == + MRI->getType(Op0).getNumElements() && + "type mismatch"); +#endif + + return buildInstr(TargetOpcode::G_SELECT) + .addDef(Res) + .addUse(Tst) + .addUse(Op0) + .addUse(Op1); +} + +void MachineIRBuilder::validateTruncExt(unsigned Dst, unsigned Src, + bool IsExtend) { +#ifndef NDEBUG + LLT SrcTy = MRI->getType(Src); + LLT DstTy = MRI->getType(Dst); + + if (DstTy.isVector()) { + assert(SrcTy.isVector() && "mismatched cast between vecot and non-vector"); + assert(SrcTy.getNumElements() == DstTy.getNumElements() && + "different number of elements in a trunc/ext"); + } else + assert(DstTy.isScalar() && SrcTy.isScalar() && "invalid extend/trunc"); + + if (IsExtend) + assert(DstTy.getSizeInBits() > SrcTy.getSizeInBits() && + "invalid narrowing extend"); + else + assert(DstTy.getSizeInBits() < SrcTy.getSizeInBits() && + "invalid widening trunc"); +#endif } diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp index 419e270..cc026ef 100644 --- a/contrib/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp +++ b/contrib/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp @@ -12,10 +12,12 @@ #include "llvm/CodeGen/GlobalISel/RegBankSelect.h" #include "llvm/ADT/PostOrderIterator.h" +#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" #include "llvm/CodeGen/GlobalISel/RegisterBank.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Function.h" #include "llvm/Support/BlockFrequency.h" #include "llvm/Support/CommandLine.h" @@ -31,18 +33,18 @@ static cl::opt<RegBankSelect::Mode> RegBankSelectMode( cl::values(clEnumValN(RegBankSelect::Mode::Fast, "regbankselect-fast", "Run the Fast mode (default mapping)"), clEnumValN(RegBankSelect::Mode::Greedy, "regbankselect-greedy", - "Use the Greedy mode (best local mapping)"), - clEnumValEnd)); + "Use the Greedy mode (best local mapping)"))); char RegBankSelect::ID = 0; -INITIALIZE_PASS_BEGIN(RegBankSelect, "regbankselect", +INITIALIZE_PASS_BEGIN(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false); INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo) INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) -INITIALIZE_PASS_END(RegBankSelect, "regbankselect", +INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) +INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, - false); + false) RegBankSelect::RegBankSelect(Mode RunningMode) : MachineFunctionPass(ID), RBI(nullptr), MRI(nullptr), TRI(nullptr), @@ -60,6 +62,7 @@ void RegBankSelect::init(MachineFunction &MF) { assert(RBI && "Cannot work without RegisterBankInfo"); MRI = &MF.getRegInfo(); TRI = MF.getSubtarget().getRegisterInfo(); + TPC = &getAnalysis<TargetPassConfig>(); if (OptMode != Mode::Fast) { MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); @@ -77,6 +80,7 @@ void RegBankSelect::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<MachineBlockFrequencyInfo>(); AU.addRequired<MachineBranchProbabilityInfo>(); } + AU.addRequired<TargetPassConfig>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -87,7 +91,7 @@ bool RegBankSelect::assignmentMatch( OnlyAssign = false; // Each part of a break down needs to end up in a different register. // In other word, Reg assignement does not match. - if (ValMapping.BreakDown.size() > 1) + if (ValMapping.NumBreakDowns > 1) return false; const RegisterBank *CurRegBank = RBI->getRegBank(Reg, *MRI, *TRI); @@ -103,11 +107,13 @@ bool RegBankSelect::assignmentMatch( return CurRegBank == DesiredRegBrank; } -void RegBankSelect::repairReg( +bool RegBankSelect::repairReg( MachineOperand &MO, const RegisterBankInfo::ValueMapping &ValMapping, RegBankSelect::RepairingPlacement &RepairPt, const iterator_range<SmallVectorImpl<unsigned>::const_iterator> &NewVRegs) { - assert(ValMapping.BreakDown.size() == 1 && "Not yet implemented"); + if (ValMapping.NumBreakDowns != 1 && !TPC->isGlobalISelAbortEnabled()) + return false; + assert(ValMapping.NumBreakDowns == 1 && "Not yet implemented"); // An empty range of new register means no repairing. assert(NewVRegs.begin() != NewVRegs.end() && "We should not have to repair"); @@ -126,7 +132,7 @@ void RegBankSelect::repairReg( "We are about to create several defs for Dst"); // Build the instruction used to repair, then clone it at the right places. - MachineInstr *MI = MIRBuilder.buildInstr(TargetOpcode::COPY, Dst, Src); + MachineInstr *MI = MIRBuilder.buildCopy(Dst, Src); MI->removeFromParent(); DEBUG(dbgs() << "Copy: " << PrintReg(Src) << " to: " << PrintReg(Dst) << '\n'); @@ -149,15 +155,16 @@ void RegBankSelect::repairReg( } // TODO: // Legalize NewInstrs if need be. + return true; } uint64_t RegBankSelect::getRepairCost( const MachineOperand &MO, const RegisterBankInfo::ValueMapping &ValMapping) const { assert(MO.isReg() && "We should only repair register operand"); - assert(!ValMapping.BreakDown.empty() && "Nothing to map??"); + assert(ValMapping.NumBreakDowns && "Nothing to map??"); - bool IsSameNumOfValues = ValMapping.BreakDown.size() == 1; + bool IsSameNumOfValues = ValMapping.NumBreakDowns == 1; const RegisterBank *CurRegBank = RBI->getRegBank(MO.getReg(), *MRI, *TRI); // If MO does not have a register bank, we should have just been // able to set one unless we have to break the value down. @@ -195,16 +202,20 @@ uint64_t RegBankSelect::getRepairCost( // TODO: use a dedicated constant for ImpossibleCost. if (Cost != UINT_MAX) return Cost; - assert(false && "Legalization not available yet"); + assert(!TPC->isGlobalISelAbortEnabled() && + "Legalization not available yet"); // Return the legalization cost of that repairing. } - assert(false && "Complex repairing not implemented yet"); - return 1; + assert(!TPC->isGlobalISelAbortEnabled() && + "Complex repairing not implemented yet"); + return UINT_MAX; } RegisterBankInfo::InstructionMapping &RegBankSelect::findBestMapping( MachineInstr &MI, RegisterBankInfo::InstructionMappings &PossibleMappings, SmallVectorImpl<RepairingPlacement> &RepairPts) { + assert(!PossibleMappings.empty() && + "Do not know how to map this instruction"); RegisterBankInfo::InstructionMapping *BestMapping = nullptr; MappingCost Cost = MappingCost::ImpossibleCost(); @@ -212,6 +223,7 @@ RegisterBankInfo::InstructionMapping &RegBankSelect::findBestMapping( for (RegisterBankInfo::InstructionMapping &CurMapping : PossibleMappings) { MappingCost CurCost = computeMapping(MI, CurMapping, LocalRepairPts, &Cost); if (CurCost < Cost) { + DEBUG(dbgs() << "New best: " << CurCost << '\n'); Cost = CurCost; BestMapping = &CurMapping; RepairPts.clear(); @@ -219,7 +231,15 @@ RegisterBankInfo::InstructionMapping &RegBankSelect::findBestMapping( RepairPts.emplace_back(std::move(RepairPt)); } } - assert(BestMapping && "No suitable mapping for instruction"); + if (!BestMapping && !TPC->isGlobalISelAbortEnabled()) { + // If none of the mapping worked that means they are all impossible. + // Thus, pick the first one and set an impossible repairing point. + // It will trigger the failed isel mode. + BestMapping = &(*PossibleMappings.begin()); + RepairPts.emplace_back( + RepairingPlacement(MI, 0, *TRI, *this, RepairingPlacement::Impossible)); + } else + assert(BestMapping && "No suitable mapping for instruction"); return *BestMapping; } @@ -250,7 +270,7 @@ void RegBankSelect::tryAvoidingSplit( // For the PHI case, the split may not be actually required. // In the copy case, a phi is already a copy on the incoming edge, // therefore there is no need to split. - if (ValMapping.BreakDown.size() == 1) + if (ValMapping.NumBreakDowns == 1) // This is a already a copy, there is nothing to do. RepairPt.switchTo(RepairingPlacement::RepairingKind::Reassign); } @@ -327,7 +347,7 @@ void RegBankSelect::tryAvoidingSplit( // We will split all the edges and repair there. } else { // This is a virtual register defined by a terminator. - if (ValMapping.BreakDown.size() == 1) { + if (ValMapping.NumBreakDowns == 1) { // There is nothing to repair, but we may actually lie on // the repairing cost because of the PHIs already proceeded // as already stated. @@ -348,6 +368,9 @@ RegBankSelect::MappingCost RegBankSelect::computeMapping( const RegBankSelect::MappingCost *BestCost) { assert((MBFI || !BestCost) && "Costs comparison require MBFI"); + if (!InstrMapping.isValid()) + return MappingCost::ImpossibleCost(); + // If mapped with InstrMapping, MI will have the recorded cost. MappingCost Cost(MBFI ? MBFI->getBlockFreq(MI.getParent()) : 1); bool Saturated = Cost.addLocalCost(InstrMapping.getCost()); @@ -355,32 +378,34 @@ RegBankSelect::MappingCost RegBankSelect::computeMapping( DEBUG(dbgs() << "Evaluating mapping cost for: " << MI); DEBUG(dbgs() << "With: " << InstrMapping << '\n'); RepairPts.clear(); - if (BestCost && Cost > *BestCost) + if (BestCost && Cost > *BestCost) { + DEBUG(dbgs() << "Mapping is too expensive from the start\n"); return Cost; + } // Moreover, to realize this mapping, the register bank of each operand must // match this mapping. In other words, we may need to locally reassign the // register banks. Account for that repairing cost as well. // In this context, local means in the surrounding of MI. - for (unsigned OpIdx = 0, EndOpIdx = MI.getNumOperands(); OpIdx != EndOpIdx; - ++OpIdx) { + for (unsigned OpIdx = 0, EndOpIdx = InstrMapping.getNumOperands(); + OpIdx != EndOpIdx; ++OpIdx) { const MachineOperand &MO = MI.getOperand(OpIdx); if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (!Reg) continue; - DEBUG(dbgs() << "Opd" << OpIdx); + DEBUG(dbgs() << "Opd" << OpIdx << '\n'); const RegisterBankInfo::ValueMapping &ValMapping = InstrMapping.getOperandMapping(OpIdx); // If Reg is already properly mapped, this is free. bool Assign; if (assignmentMatch(Reg, ValMapping, Assign)) { - DEBUG(dbgs() << " is free (match).\n"); + DEBUG(dbgs() << "=> is free (match).\n"); continue; } if (Assign) { - DEBUG(dbgs() << " is free (simple assignment).\n"); + DEBUG(dbgs() << "=> is free (simple assignment).\n"); RepairPts.emplace_back(RepairingPlacement(MI, OpIdx, *TRI, *this, RepairingPlacement::Reassign)); continue; @@ -398,8 +423,10 @@ RegBankSelect::MappingCost RegBankSelect::computeMapping( tryAvoidingSplit(RepairPt, MO, ValMapping); // Check that the materialization of the repairing is possible. - if (!RepairPt.canMaterialize()) + if (!RepairPt.canMaterialize()) { + DEBUG(dbgs() << "Mapping involves impossible repairing\n"); return MappingCost::ImpossibleCost(); + } // Account for the split cost and repair cost. // Unless the cost is already saturated or we do not care about the cost. @@ -454,8 +481,10 @@ RegBankSelect::MappingCost RegBankSelect::computeMapping( // Stop looking into what it takes to repair, this is already // too expensive. - if (BestCost && Cost > *BestCost) + if (BestCost && Cost > *BestCost) { + DEBUG(dbgs() << "Mapping is too expensive, stop processing\n"); return Cost; + } // No need to accumulate more cost information. // We need to still gather the repairing information though. @@ -463,10 +492,11 @@ RegBankSelect::MappingCost RegBankSelect::computeMapping( break; } } + DEBUG(dbgs() << "Total cost is: " << Cost << "\n"); return Cost; } -void RegBankSelect::applyMapping( +bool RegBankSelect::applyMapping( MachineInstr &MI, const RegisterBankInfo::InstructionMapping &InstrMapping, SmallVectorImpl<RegBankSelect::RepairingPlacement> &RepairPts) { // OpdMapper will hold all the information needed for the rewritting. @@ -474,28 +504,27 @@ void RegBankSelect::applyMapping( // First, place the repairing code. for (RepairingPlacement &RepairPt : RepairPts) { - assert(RepairPt.canMaterialize() && - RepairPt.getKind() != RepairingPlacement::Impossible && - "This mapping is impossible"); + if (!RepairPt.canMaterialize() || + RepairPt.getKind() == RepairingPlacement::Impossible) + return false; assert(RepairPt.getKind() != RepairingPlacement::None && "This should not make its way in the list"); unsigned OpIdx = RepairPt.getOpIdx(); MachineOperand &MO = MI.getOperand(OpIdx); const RegisterBankInfo::ValueMapping &ValMapping = InstrMapping.getOperandMapping(OpIdx); - unsigned BreakDownSize = ValMapping.BreakDown.size(); - (void)BreakDownSize; unsigned Reg = MO.getReg(); switch (RepairPt.getKind()) { case RepairingPlacement::Reassign: - assert(BreakDownSize == 1 && + assert(ValMapping.NumBreakDowns == 1 && "Reassignment should only be for simple mapping"); MRI->setRegBank(Reg, *ValMapping.BreakDown[0].RegBank); break; case RepairingPlacement::Insert: OpdMapper.createVRegs(OpIdx); - repairReg(MO, ValMapping, RepairPt, OpdMapper.getVRegs(OpIdx)); + if (!repairReg(MO, ValMapping, RepairPt, OpdMapper.getVRegs(OpIdx))) + return false; break; default: llvm_unreachable("Other kind should not happen"); @@ -504,9 +533,10 @@ void RegBankSelect::applyMapping( // Second, rewrite the instruction. DEBUG(dbgs() << "Actual mapping of the operands: " << OpdMapper << '\n'); RBI->applyMapping(OpdMapper); + return true; } -void RegBankSelect::assignInstr(MachineInstr &MI) { +bool RegBankSelect::assignInstr(MachineInstr &MI) { DEBUG(dbgs() << "Assign: " << MI); // Remember the repairing placement for all the operands. SmallVector<RepairingPlacement, 4> RepairPts; @@ -516,32 +546,63 @@ void RegBankSelect::assignInstr(MachineInstr &MI) { BestMapping = RBI->getInstrMapping(MI); MappingCost DefaultCost = computeMapping(MI, BestMapping, RepairPts); (void)DefaultCost; - assert(DefaultCost != MappingCost::ImpossibleCost() && - "Default mapping is not suited"); + if (DefaultCost == MappingCost::ImpossibleCost()) + return false; } else { RegisterBankInfo::InstructionMappings PossibleMappings = RBI->getInstrPossibleMappings(MI); - assert(!PossibleMappings.empty() && - "Do not know how to map this instruction"); + if (PossibleMappings.empty()) + return false; BestMapping = std::move(findBestMapping(MI, PossibleMappings, RepairPts)); } // Make sure the mapping is valid for MI. assert(BestMapping.verify(MI) && "Invalid instruction mapping"); - DEBUG(dbgs() << "Mapping: " << BestMapping << '\n'); + DEBUG(dbgs() << "Best Mapping: " << BestMapping << '\n'); // After this call, MI may not be valid anymore. // Do not use it. - applyMapping(MI, BestMapping, RepairPts); + return applyMapping(MI, BestMapping, RepairPts); } bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) { + // If the ISel pipeline failed, do not bother running that pass. + if (MF.getProperties().hasProperty( + MachineFunctionProperties::Property::FailedISel)) + return false; + DEBUG(dbgs() << "Assign register banks for: " << MF.getName() << '\n'); const Function *F = MF.getFunction(); Mode SaveOptMode = OptMode; if (F->hasFnAttribute(Attribute::OptimizeNone)) OptMode = Mode::Fast; init(MF); + +#ifndef NDEBUG + // Check that our input is fully legal: we require the function to have the + // Legalized property, so it should be. + // FIXME: This should be in the MachineVerifier, but it can't use the + // LegalizerInfo as it's currently in the separate GlobalISel library. + const MachineRegisterInfo &MRI = MF.getRegInfo(); + if (const LegalizerInfo *MLI = MF.getSubtarget().getLegalizerInfo()) { + for (const MachineBasicBlock &MBB : MF) { + for (const MachineInstr &MI : MBB) { + if (isPreISelGenericOpcode(MI.getOpcode()) && !MLI->isLegal(MI, MRI)) { + if (!TPC->isGlobalISelAbortEnabled()) { + MF.getProperties().set( + MachineFunctionProperties::Property::FailedISel); + return false; + } + std::string ErrStorage; + raw_string_ostream Err(ErrStorage); + Err << "Instruction is not legal: " << MI << '\n'; + report_fatal_error(Err.str()); + } + } + } + } +#endif + // Walk the function and assign register banks to all operands. // Use a RPOT to make sure all registers are assigned before we choose // the best mapping of the current instruction. @@ -554,7 +615,18 @@ bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) { MII != End;) { // MI might be invalidated by the assignment, so move the // iterator before hand. - assignInstr(*MII++); + MachineInstr &MI = *MII++; + + // Ignore target-specific instructions: they should use proper regclasses. + if (isTargetSpecificOpcode(MI.getOpcode())) + continue; + + if (!assignInstr(MI)) { + if (TPC->isGlobalISelAbortEnabled()) + report_fatal_error("Unable to map instruction"); + MF.getProperties().set(MachineFunctionProperties::Property::FailedISel); + return false; + } } } OptMode = SaveOptMode; @@ -895,3 +967,20 @@ bool RegBankSelect::MappingCost::operator==(const MappingCost &Cost) const { return LocalCost == Cost.LocalCost && NonLocalCost == Cost.NonLocalCost && LocalFreq == Cost.LocalFreq; } + +void RegBankSelect::MappingCost::dump() const { + print(dbgs()); + dbgs() << '\n'; +} + +void RegBankSelect::MappingCost::print(raw_ostream &OS) const { + if (*this == ImpossibleCost()) { + OS << "impossible"; + return; + } + if (isSaturated()) { + OS << "saturated"; + return; + } + OS << LocalFreq << " * " << LocalCost << " + " << NonLocalCost; +} diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp index a911225..49d676f 100644 --- a/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp +++ b/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp @@ -19,12 +19,15 @@ using namespace llvm; const unsigned RegisterBank::InvalidID = UINT_MAX; -RegisterBank::RegisterBank() : ID(InvalidID), Name(nullptr), Size(0) {} +RegisterBank::RegisterBank(unsigned ID, const char *Name, unsigned Size, + const uint32_t *CoveredClasses) + : ID(ID), Name(Name), Size(Size) { + ContainedRegClasses.resize(200); + ContainedRegClasses.setBitsInMask(CoveredClasses); +} bool RegisterBank::verify(const TargetRegisterInfo &TRI) const { assert(isValid() && "Invalid register bank"); - assert(ContainedRegClasses.size() == TRI.getNumRegClasses() && - "TRI does not match the initialization process?"); for (unsigned RCId = 0, End = TRI.getNumRegClasses(); RCId != End; ++RCId) { const TargetRegisterClass &RC = *TRI.getRegClass(RCId); @@ -72,7 +75,7 @@ bool RegisterBank::operator==(const RegisterBank &OtherRB) const { return &OtherRB == this; } -void RegisterBank::dump(const TargetRegisterInfo *TRI) const { +LLVM_DUMP_METHOD void RegisterBank::dump(const TargetRegisterInfo *TRI) const { print(dbgs(), /* IsForDebug */ true, TRI); } diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp index ef8e4f6..da5ab0b 100644 --- a/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp +++ b/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp @@ -13,6 +13,7 @@ #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/ADT/iterator_range.h" #include "llvm/CodeGen/GlobalISel/RegisterBank.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -32,139 +33,56 @@ using namespace llvm; +STATISTIC(NumPartialMappingsCreated, + "Number of partial mappings dynamically created"); +STATISTIC(NumPartialMappingsAccessed, + "Number of partial mappings dynamically accessed"); +STATISTIC(NumValueMappingsCreated, + "Number of value mappings dynamically created"); +STATISTIC(NumValueMappingsAccessed, + "Number of value mappings dynamically accessed"); +STATISTIC(NumOperandsMappingsCreated, + "Number of operands mappings dynamically created"); +STATISTIC(NumOperandsMappingsAccessed, + "Number of operands mappings dynamically accessed"); + const unsigned RegisterBankInfo::DefaultMappingID = UINT_MAX; const unsigned RegisterBankInfo::InvalidMappingID = UINT_MAX - 1; //------------------------------------------------------------------------------ // RegisterBankInfo implementation. //------------------------------------------------------------------------------ -RegisterBankInfo::RegisterBankInfo(unsigned NumRegBanks) - : NumRegBanks(NumRegBanks) { - RegBanks.reset(new RegisterBank[NumRegBanks]); +RegisterBankInfo::RegisterBankInfo(RegisterBank **RegBanks, + unsigned NumRegBanks) + : RegBanks(RegBanks), NumRegBanks(NumRegBanks) { +#ifndef NDEBUG + for (unsigned Idx = 0, End = getNumRegBanks(); Idx != End; ++Idx) { + assert(RegBanks[Idx] != nullptr && "Invalid RegisterBank"); + assert(RegBanks[Idx]->isValid() && "RegisterBank should be valid"); + } +#endif // NDEBUG +} + +RegisterBankInfo::~RegisterBankInfo() { + for (auto It : MapOfPartialMappings) + delete It.second; + for (auto It : MapOfValueMappings) + delete It.second; } bool RegisterBankInfo::verify(const TargetRegisterInfo &TRI) const { - DEBUG(for (unsigned Idx = 0, End = getNumRegBanks(); Idx != End; ++Idx) { +#ifndef NDEBUG + for (unsigned Idx = 0, End = getNumRegBanks(); Idx != End; ++Idx) { const RegisterBank &RegBank = getRegBank(Idx); assert(Idx == RegBank.getID() && "ID does not match the index in the array"); - dbgs() << "Verify " << RegBank << '\n'; + DEBUG(dbgs() << "Verify " << RegBank << '\n'); assert(RegBank.verify(TRI) && "RegBank is invalid"); - }); + } +#endif // NDEBUG return true; } -void RegisterBankInfo::createRegisterBank(unsigned ID, const char *Name) { - DEBUG(dbgs() << "Create register bank: " << ID << " with name \"" << Name - << "\"\n"); - RegisterBank &RegBank = getRegBank(ID); - assert(RegBank.getID() == RegisterBank::InvalidID && - "A register bank should be created only once"); - RegBank.ID = ID; - RegBank.Name = Name; -} - -void RegisterBankInfo::addRegBankCoverage(unsigned ID, unsigned RCId, - const TargetRegisterInfo &TRI, - bool AddTypeMapping) { - RegisterBank &RB = getRegBank(ID); - unsigned NbOfRegClasses = TRI.getNumRegClasses(); - - DEBUG(dbgs() << "Add coverage for: " << RB << '\n'); - - // Check if RB is underconstruction. - if (!RB.isValid()) - RB.ContainedRegClasses.resize(NbOfRegClasses); - else if (RB.covers(*TRI.getRegClass(RCId))) - // If RB already covers this register class, there is nothing - // to do. - return; - - BitVector &Covered = RB.ContainedRegClasses; - SmallVector<unsigned, 8> WorkList; - - WorkList.push_back(RCId); - Covered.set(RCId); - - unsigned &MaxSize = RB.Size; - do { - unsigned RCId = WorkList.pop_back_val(); - - const TargetRegisterClass &CurRC = *TRI.getRegClass(RCId); - - DEBUG(dbgs() << "Examine: " << TRI.getRegClassName(&CurRC) - << "(Size*8: " << (CurRC.getSize() * 8) << ")\n"); - - // Remember the biggest size in bits. - MaxSize = std::max(MaxSize, CurRC.getSize() * 8); - - // If we have been asked to record the type supported by this - // register bank, do it now. - if (AddTypeMapping) - for (MVT::SimpleValueType SVT : - make_range(CurRC.vt_begin(), CurRC.vt_end())) - recordRegBankForType(getRegBank(ID), SVT); - - // Walk through all sub register classes and push them into the worklist. - bool First = true; - for (BitMaskClassIterator It(CurRC.getSubClassMask(), TRI); It.isValid(); - ++It) { - unsigned SubRCId = It.getID(); - if (!Covered.test(SubRCId)) { - if (First) - DEBUG(dbgs() << " Enqueue sub-class: "); - DEBUG(dbgs() << TRI.getRegClassName(TRI.getRegClass(SubRCId)) << ", "); - WorkList.push_back(SubRCId); - // Remember that we saw the sub class. - Covered.set(SubRCId); - First = false; - } - } - if (!First) - DEBUG(dbgs() << '\n'); - - // Push also all the register classes that can be accessed via a - // subreg index, i.e., its subreg-class (which is different than - // its subclass). - // - // Note: It would probably be faster to go the other way around - // and have this method add only super classes, since this - // information is available in a more efficient way. However, it - // feels less natural for the client of this APIs plus we will - // TableGen the whole bitset at some point, so compile time for - // the initialization is not very important. - First = true; - for (unsigned SubRCId = 0; SubRCId < NbOfRegClasses; ++SubRCId) { - if (Covered.test(SubRCId)) - continue; - bool Pushed = false; - const TargetRegisterClass *SubRC = TRI.getRegClass(SubRCId); - for (SuperRegClassIterator SuperRCIt(SubRC, &TRI); SuperRCIt.isValid(); - ++SuperRCIt) { - if (Pushed) - break; - for (BitMaskClassIterator It(SuperRCIt.getMask(), TRI); It.isValid(); - ++It) { - unsigned SuperRCId = It.getID(); - if (SuperRCId == RCId) { - if (First) - DEBUG(dbgs() << " Enqueue subreg-class: "); - DEBUG(dbgs() << TRI.getRegClassName(SubRC) << ", "); - WorkList.push_back(SubRCId); - // Remember that we saw the sub class. - Covered.set(SubRCId); - Pushed = true; - First = false; - break; - } - } - } - } - if (!First) - DEBUG(dbgs() << '\n'); - } while (!WorkList.empty()); -} - const RegisterBank * RegisterBankInfo::getRegBank(unsigned Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const { @@ -173,11 +91,9 @@ RegisterBankInfo::getRegBank(unsigned Reg, const MachineRegisterInfo &MRI, assert(Reg && "NoRegister does not have a register bank"); const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg); - if (RegClassOrBank.is<const RegisterBank *>()) - return RegClassOrBank.get<const RegisterBank *>(); - const TargetRegisterClass *RC = - RegClassOrBank.get<const TargetRegisterClass *>(); - if (RC) + if (auto *RB = RegClassOrBank.dyn_cast<const RegisterBank *>()) + return RB; + if (auto *RC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>()) return &getRegBankFromRegClass(*RC); return nullptr; } @@ -199,10 +115,37 @@ const RegisterBank *RegisterBankInfo::getRegBankFromConstraints( return &RegBank; } +const TargetRegisterClass *RegisterBankInfo::constrainGenericRegister( + unsigned Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI) { + + // If the register already has a class, fallback to MRI::constrainRegClass. + auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg); + if (RegClassOrBank.is<const TargetRegisterClass *>()) + return MRI.constrainRegClass(Reg, &RC); + + const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>(); + // Otherwise, all we can do is ensure the bank covers the class, and set it. + if (RB && !RB->covers(RC)) + return nullptr; + + // If nothing was set or the class is simply compatible, set it. + MRI.setRegClass(Reg, &RC); + return &RC; +} + RegisterBankInfo::InstructionMapping RegisterBankInfo::getInstrMappingImpl(const MachineInstr &MI) const { + // For copies we want to walk over the operands and try to find one + // that has a register bank since the instruction itself will not get + // us any constraint. + bool isCopyLike = MI.isCopy() || MI.isPHI(); + // For copy like instruction, only the mapping of the definition + // is important. The rest is not constrained. + unsigned NumOperandsForMapping = isCopyLike ? 1 : MI.getNumOperands(); + RegisterBankInfo::InstructionMapping Mapping(DefaultMappingID, /*Cost*/ 1, - MI.getNumOperands()); + /*OperandsMapping*/ nullptr, + NumOperandsForMapping); const MachineFunction &MF = *MI.getParent()->getParent(); const TargetSubtargetInfo &STI = MF.getSubtarget(); const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); @@ -213,14 +156,10 @@ RegisterBankInfo::getInstrMappingImpl(const MachineInstr &MI) const { // Before doing anything complicated check if the mapping is not // directly available. bool CompleteMapping = true; - // For copies we want to walk over the operands and try to find one - // that has a register bank. - bool isCopyLike = MI.isCopy() || MI.isPHI(); - // Remember the register bank for reuse for copy-like instructions. - const RegisterBank *RegBank = nullptr; - // Remember the size of the register for reuse for copy-like instructions. - unsigned RegSize = 0; - for (unsigned OpIdx = 0, End = MI.getNumOperands(); OpIdx != End; ++OpIdx) { + + SmallVector<const ValueMapping *, 8> OperandsMapping(NumOperandsForMapping); + for (unsigned OpIdx = 0, EndIdx = MI.getNumOperands(); OpIdx != EndIdx; + ++OpIdx) { const MachineOperand &MO = MI.getOperand(OpIdx); if (!MO.isReg()) continue; @@ -242,71 +181,147 @@ RegisterBankInfo::getInstrMappingImpl(const MachineInstr &MI) const { // the register bank from the encoding constraints. CurRegBank = getRegBankFromConstraints(MI, OpIdx, TII, TRI); if (!CurRegBank) { - // Check if we can deduce the register bank from the type of - // the instruction. - Type *MITy = MI.getType(); - if (MITy) - CurRegBank = getRegBankForType( - MVT::getVT(MITy, /*HandleUnknown*/ true).SimpleTy); - if (!CurRegBank) - // Use the current assigned register bank. - // That may not make much sense though. - CurRegBank = AltRegBank; - if (!CurRegBank) { - // All our attempts failed, give up. - CompleteMapping = false; - - if (!isCopyLike) - // MI does not carry enough information to guess the mapping. - return InstructionMapping(); - - // For copies, we want to keep interating to find a register - // bank for the other operands if we did not find one yet. - if (RegBank) - break; - continue; - } + // All our attempts failed, give up. + CompleteMapping = false; + + if (!isCopyLike) + // MI does not carry enough information to guess the mapping. + return InstructionMapping(); + continue; } } - RegBank = CurRegBank; - RegSize = getSizeInBits(Reg, MRI, TRI); - Mapping.setOperandMapping(OpIdx, RegSize, *CurRegBank); + const ValueMapping *ValMapping = + &getValueMapping(0, getSizeInBits(Reg, MRI, TRI), *CurRegBank); + if (isCopyLike) { + OperandsMapping[0] = ValMapping; + CompleteMapping = true; + break; + } + OperandsMapping[OpIdx] = ValMapping; } - if (CompleteMapping) - return Mapping; - - assert(isCopyLike && "We should have bailed on non-copies at this point"); - // For copy like instruction, if none of the operand has a register - // bank avialable, there is nothing we can propagate. - if (!RegBank) + if (isCopyLike && !CompleteMapping) + // No way to deduce the type from what we have. return InstructionMapping(); - // This is a copy-like instruction. - // Propagate RegBank to all operands that do not have a - // mapping yet. - for (unsigned OpIdx = 0, End = MI.getNumOperands(); OpIdx != End; ++OpIdx) { - const MachineOperand &MO = MI.getOperand(OpIdx); - // Don't assign a mapping for non-reg operands. - if (!MO.isReg()) - continue; + assert(CompleteMapping && "Setting an uncomplete mapping"); + Mapping.setOperandsMapping(getOperandsMapping(OperandsMapping)); + return Mapping; +} - // If a mapping already exists, do not touch it. - if (!static_cast<const InstructionMapping *>(&Mapping) - ->getOperandMapping(OpIdx) - .BreakDown.empty()) - continue; +/// Hashing function for PartialMapping. +static hash_code hashPartialMapping(unsigned StartIdx, unsigned Length, + const RegisterBank *RegBank) { + return hash_combine(StartIdx, Length, RegBank ? RegBank->getID() : 0); +} + +/// Overloaded version of hash_value for a PartialMapping. +hash_code +llvm::hash_value(const RegisterBankInfo::PartialMapping &PartMapping) { + return hashPartialMapping(PartMapping.StartIdx, PartMapping.Length, + PartMapping.RegBank); +} + +const RegisterBankInfo::PartialMapping & +RegisterBankInfo::getPartialMapping(unsigned StartIdx, unsigned Length, + const RegisterBank &RegBank) const { + ++NumPartialMappingsAccessed; + + hash_code Hash = hashPartialMapping(StartIdx, Length, &RegBank); + const auto &It = MapOfPartialMappings.find(Hash); + if (It != MapOfPartialMappings.end()) + return *It->second; + + ++NumPartialMappingsCreated; + + const PartialMapping *&PartMapping = MapOfPartialMappings[Hash]; + PartMapping = new PartialMapping{StartIdx, Length, RegBank}; + return *PartMapping; +} + +const RegisterBankInfo::ValueMapping & +RegisterBankInfo::getValueMapping(unsigned StartIdx, unsigned Length, + const RegisterBank &RegBank) const { + return getValueMapping(&getPartialMapping(StartIdx, Length, RegBank), 1); +} + +static hash_code +hashValueMapping(const RegisterBankInfo::PartialMapping *BreakDown, + unsigned NumBreakDowns) { + if (LLVM_LIKELY(NumBreakDowns == 1)) + return hash_value(*BreakDown); + SmallVector<size_t, 8> Hashes(NumBreakDowns); + for (unsigned Idx = 0; Idx != NumBreakDowns; ++Idx) + Hashes.push_back(hash_value(BreakDown[Idx])); + return hash_combine_range(Hashes.begin(), Hashes.end()); +} + +const RegisterBankInfo::ValueMapping & +RegisterBankInfo::getValueMapping(const PartialMapping *BreakDown, + unsigned NumBreakDowns) const { + ++NumValueMappingsAccessed; + + hash_code Hash = hashValueMapping(BreakDown, NumBreakDowns); + const auto &It = MapOfValueMappings.find(Hash); + if (It != MapOfValueMappings.end()) + return *It->second; + + ++NumValueMappingsCreated; + + const ValueMapping *&ValMapping = MapOfValueMappings[Hash]; + ValMapping = new ValueMapping{BreakDown, NumBreakDowns}; + return *ValMapping; +} - Mapping.setOperandMapping(OpIdx, RegSize, *RegBank); +template <typename Iterator> +const RegisterBankInfo::ValueMapping * +RegisterBankInfo::getOperandsMapping(Iterator Begin, Iterator End) const { + + ++NumOperandsMappingsAccessed; + + // The addresses of the value mapping are unique. + // Therefore, we can use them directly to hash the operand mapping. + hash_code Hash = hash_combine_range(Begin, End); + const auto &It = MapOfOperandsMappings.find(Hash); + if (It != MapOfOperandsMappings.end()) + return It->second; + + ++NumOperandsMappingsCreated; + + // Create the array of ValueMapping. + // Note: this array will not hash to this instance of operands + // mapping, because we use the pointer of the ValueMapping + // to hash and we expect them to uniquely identify an instance + // of value mapping. + ValueMapping *&Res = MapOfOperandsMappings[Hash]; + Res = new ValueMapping[std::distance(Begin, End)]; + unsigned Idx = 0; + for (Iterator It = Begin; It != End; ++It, ++Idx) { + const ValueMapping *ValMap = *It; + if (!ValMap) + continue; + Res[Idx] = *ValMap; } - return Mapping; + return Res; +} + +const RegisterBankInfo::ValueMapping *RegisterBankInfo::getOperandsMapping( + const SmallVectorImpl<const RegisterBankInfo::ValueMapping *> &OpdsMapping) + const { + return getOperandsMapping(OpdsMapping.begin(), OpdsMapping.end()); +} + +const RegisterBankInfo::ValueMapping *RegisterBankInfo::getOperandsMapping( + std::initializer_list<const RegisterBankInfo::ValueMapping *> OpdsMapping) + const { + return getOperandsMapping(OpdsMapping.begin(), OpdsMapping.end()); } RegisterBankInfo::InstructionMapping RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { - RegisterBankInfo::InstructionMapping Mapping = getInstrMappingImpl(MI); - if (Mapping.isValid()) - return Mapping; + RegisterBankInfo::InstructionMapping Mapping = getInstrMappingImpl(MI); + if (Mapping.isValid()) + return Mapping; llvm_unreachable("The target must implement this"); } @@ -335,18 +350,18 @@ RegisterBankInfo::getInstrAlternativeMappings(const MachineInstr &MI) const { void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) { MachineInstr &MI = OpdMapper.getMI(); DEBUG(dbgs() << "Applying default-like mapping\n"); - for (unsigned OpIdx = 0, EndIdx = MI.getNumOperands(); OpIdx != EndIdx; - ++OpIdx) { + for (unsigned OpIdx = 0, + EndIdx = OpdMapper.getInstrMapping().getNumOperands(); + OpIdx != EndIdx; ++OpIdx) { DEBUG(dbgs() << "OpIdx " << OpIdx); MachineOperand &MO = MI.getOperand(OpIdx); if (!MO.isReg()) { DEBUG(dbgs() << " is not a register, nothing to be done\n"); continue; } - assert( - OpdMapper.getInstrMapping().getOperandMapping(OpIdx).BreakDown.size() == - 1 && - "This mapping is too complex for this function"); + assert(OpdMapper.getInstrMapping().getOperandMapping(OpIdx).NumBreakDowns == + 1 && + "This mapping is too complex for this function"); iterator_range<SmallVectorImpl<unsigned>::const_iterator> NewRegs = OpdMapper.getVRegs(OpIdx); if (NewRegs.begin() == NewRegs.end()) { @@ -369,7 +384,8 @@ unsigned RegisterBankInfo::getSizeInBits(unsigned Reg, // get the size of that register class. RC = TRI.getMinimalPhysRegClass(Reg); } else { - unsigned RegSize = MRI.getSize(Reg); + LLT Ty = MRI.getType(Reg); + unsigned RegSize = Ty.isValid() ? Ty.getSizeInBits() : 0; // If Reg is not a generic register, query the register class to // get its size. if (RegSize) @@ -384,7 +400,7 @@ unsigned RegisterBankInfo::getSizeInBits(unsigned Reg, //------------------------------------------------------------------------------ // Helper classes implementation. //------------------------------------------------------------------------------ -void RegisterBankInfo::PartialMapping::dump() const { +LLVM_DUMP_METHOD void RegisterBankInfo::PartialMapping::dump() const { print(dbgs()); dbgs() << '\n'; } @@ -392,7 +408,7 @@ void RegisterBankInfo::PartialMapping::dump() const { bool RegisterBankInfo::PartialMapping::verify() const { assert(RegBank && "Register bank not set"); assert(Length && "Empty mapping"); - assert((StartIdx < getHighBitIdx()) && "Overflow, switch to APInt?"); + assert((StartIdx <= getHighBitIdx()) && "Overflow, switch to APInt?"); // Check if the minimum width fits into RegBank. assert(RegBank->getSize() >= Length && "Register bank too small for Mask"); return true; @@ -406,10 +422,10 @@ void RegisterBankInfo::PartialMapping::print(raw_ostream &OS) const { OS << "nullptr"; } -bool RegisterBankInfo::ValueMapping::verify(unsigned ExpectedBitWidth) const { - assert(!BreakDown.empty() && "Value mapped nowhere?!"); +bool RegisterBankInfo::ValueMapping::verify(unsigned MeaningfulBitWidth) const { + assert(NumBreakDowns && "Value mapped nowhere?!"); unsigned OrigValueBitWidth = 0; - for (const RegisterBankInfo::PartialMapping &PartMap : BreakDown) { + for (const RegisterBankInfo::PartialMapping &PartMap : *this) { // Check that each register bank is big enough to hold the partial value: // this check is done by PartialMapping::verify assert(PartMap.verify() && "Partial mapping is invalid"); @@ -418,9 +434,10 @@ bool RegisterBankInfo::ValueMapping::verify(unsigned ExpectedBitWidth) const { OrigValueBitWidth = std::max(OrigValueBitWidth, PartMap.getHighBitIdx() + 1); } - assert(OrigValueBitWidth == ExpectedBitWidth && "BitWidth does not match"); + assert(OrigValueBitWidth >= MeaningfulBitWidth && + "Meaningful bits not covered by the mapping"); APInt ValueMask(OrigValueBitWidth, 0); - for (const RegisterBankInfo::PartialMapping &PartMap : BreakDown) { + for (const RegisterBankInfo::PartialMapping &PartMap : *this) { // Check that the union of the partial mappings covers the whole value, // without overlaps. // The high bit is exclusive in the APInt API, thus getHighBitIdx + 1. @@ -434,15 +451,15 @@ bool RegisterBankInfo::ValueMapping::verify(unsigned ExpectedBitWidth) const { return true; } -void RegisterBankInfo::ValueMapping::dump() const { +LLVM_DUMP_METHOD void RegisterBankInfo::ValueMapping::dump() const { print(dbgs()); dbgs() << '\n'; } void RegisterBankInfo::ValueMapping::print(raw_ostream &OS) const { - OS << "#BreakDown: " << BreakDown.size() << " "; + OS << "#BreakDown: " << NumBreakDowns << " "; bool IsFirst = true; - for (const PartialMapping &PartMap : BreakDown) { + for (const PartialMapping &PartMap : *this) { if (!IsFirst) OS << ", "; OS << '[' << PartMap << ']'; @@ -450,21 +467,13 @@ void RegisterBankInfo::ValueMapping::print(raw_ostream &OS) const { } } -void RegisterBankInfo::InstructionMapping::setOperandMapping( - unsigned OpIdx, unsigned MaskSize, const RegisterBank &RegBank) { - // Build the value mapping. - assert(MaskSize <= RegBank.getSize() && "Register bank is too small"); - - // Create the mapping object. - getOperandMapping(OpIdx).BreakDown.push_back( - PartialMapping(0, MaskSize, RegBank)); -} - bool RegisterBankInfo::InstructionMapping::verify( const MachineInstr &MI) const { // Check that all the register operands are properly mapped. // Check the constructor invariant. - assert(NumOperands == MI.getNumOperands() && + // For PHI, we only care about mapping the definition. + assert(NumOperands == + ((MI.isCopy() || MI.isPHI()) ? 1 : MI.getNumOperands()) && "NumOperands must match, see constructor"); assert(MI.getParent() && MI.getParent()->getParent() && "MI must be connected to a MachineFunction"); @@ -473,16 +482,18 @@ bool RegisterBankInfo::InstructionMapping::verify( for (unsigned Idx = 0; Idx < NumOperands; ++Idx) { const MachineOperand &MO = MI.getOperand(Idx); - const RegisterBankInfo::ValueMapping &MOMapping = getOperandMapping(Idx); - (void)MOMapping; if (!MO.isReg()) { - assert(MOMapping.BreakDown.empty() && + assert(!getOperandMapping(Idx).isValid() && "We should not care about non-reg mapping"); continue; } unsigned Reg = MO.getReg(); if (!Reg) continue; + assert(getOperandMapping(Idx).isValid() && + "We must have a mapping for reg operands"); + const RegisterBankInfo::ValueMapping &MOMapping = getOperandMapping(Idx); + (void)MOMapping; // Register size in bits. // This size must match what the mapping expects. assert(MOMapping.verify(getSizeInBits( @@ -492,7 +503,7 @@ bool RegisterBankInfo::InstructionMapping::verify( return true; } -void RegisterBankInfo::InstructionMapping::dump() const { +LLVM_DUMP_METHOD void RegisterBankInfo::InstructionMapping::dump() const { print(dbgs()); dbgs() << '\n'; } @@ -514,18 +525,16 @@ RegisterBankInfo::OperandsMapper::OperandsMapper( MachineInstr &MI, const InstructionMapping &InstrMapping, MachineRegisterInfo &MRI) : MRI(MRI), MI(MI), InstrMapping(InstrMapping) { - unsigned NumOpds = MI.getNumOperands(); - OpToNewVRegIdx.reset(new int[NumOpds]); - std::fill(&OpToNewVRegIdx[0], &OpToNewVRegIdx[NumOpds], - OperandsMapper::DontKnowIdx); + unsigned NumOpds = InstrMapping.getNumOperands(); + OpToNewVRegIdx.resize(NumOpds, OperandsMapper::DontKnowIdx); assert(InstrMapping.verify(MI) && "Invalid mapping for MI"); } iterator_range<SmallVectorImpl<unsigned>::iterator> RegisterBankInfo::OperandsMapper::getVRegsMem(unsigned OpIdx) { - assert(OpIdx < getMI().getNumOperands() && "Out-of-bound access"); + assert(OpIdx < getInstrMapping().getNumOperands() && "Out-of-bound access"); unsigned NumPartialVal = - getInstrMapping().getOperandMapping(OpIdx).BreakDown.size(); + getInstrMapping().getOperandMapping(OpIdx).NumBreakDowns; int StartIdx = OpToNewVRegIdx[OpIdx]; if (StartIdx == OperandsMapper::DontKnowIdx) { @@ -559,16 +568,15 @@ RegisterBankInfo::OperandsMapper::getNewVRegsEnd(unsigned StartIdx, } void RegisterBankInfo::OperandsMapper::createVRegs(unsigned OpIdx) { - assert(OpIdx < getMI().getNumOperands() && "Out-of-bound access"); + assert(OpIdx < getInstrMapping().getNumOperands() && "Out-of-bound access"); iterator_range<SmallVectorImpl<unsigned>::iterator> NewVRegsForOpIdx = getVRegsMem(OpIdx); - const SmallVectorImpl<PartialMapping> &PartMapList = - getInstrMapping().getOperandMapping(OpIdx).BreakDown; - SmallVectorImpl<PartialMapping>::const_iterator PartMap = PartMapList.begin(); + const ValueMapping &ValMapping = getInstrMapping().getOperandMapping(OpIdx); + const PartialMapping *PartMap = ValMapping.begin(); for (unsigned &NewVReg : NewVRegsForOpIdx) { - assert(PartMap != PartMapList.end() && "Out-of-bound access"); + assert(PartMap != ValMapping.end() && "Out-of-bound access"); assert(NewVReg == 0 && "Register has already been created"); - NewVReg = MRI.createGenericVirtualRegister(PartMap->Length); + NewVReg = MRI.createGenericVirtualRegister(LLT::scalar(PartMap->Length)); MRI.setRegBank(NewVReg, *PartMap->RegBank); ++PartMap; } @@ -577,8 +585,8 @@ void RegisterBankInfo::OperandsMapper::createVRegs(unsigned OpIdx) { void RegisterBankInfo::OperandsMapper::setVRegs(unsigned OpIdx, unsigned PartialMapIdx, unsigned NewVReg) { - assert(OpIdx < getMI().getNumOperands() && "Out-of-bound access"); - assert(getInstrMapping().getOperandMapping(OpIdx).BreakDown.size() > + assert(OpIdx < getInstrMapping().getNumOperands() && "Out-of-bound access"); + assert(getInstrMapping().getOperandMapping(OpIdx).NumBreakDowns > PartialMapIdx && "Out-of-bound access for partial mapping"); // Make sure the memory is initialized for that operand. @@ -592,14 +600,14 @@ iterator_range<SmallVectorImpl<unsigned>::const_iterator> RegisterBankInfo::OperandsMapper::getVRegs(unsigned OpIdx, bool ForDebug) const { (void)ForDebug; - assert(OpIdx < getMI().getNumOperands() && "Out-of-bound access"); + assert(OpIdx < getInstrMapping().getNumOperands() && "Out-of-bound access"); int StartIdx = OpToNewVRegIdx[OpIdx]; if (StartIdx == OperandsMapper::DontKnowIdx) return make_range(NewVRegs.end(), NewVRegs.end()); unsigned PartMapSize = - getInstrMapping().getOperandMapping(OpIdx).BreakDown.size(); + getInstrMapping().getOperandMapping(OpIdx).NumBreakDowns; SmallVectorImpl<unsigned>::const_iterator End = getNewVRegsEnd(StartIdx, PartMapSize); iterator_range<SmallVectorImpl<unsigned>::const_iterator> Res = @@ -611,14 +619,14 @@ RegisterBankInfo::OperandsMapper::getVRegs(unsigned OpIdx, return Res; } -void RegisterBankInfo::OperandsMapper::dump() const { +LLVM_DUMP_METHOD void RegisterBankInfo::OperandsMapper::dump() const { print(dbgs(), true); dbgs() << '\n'; } void RegisterBankInfo::OperandsMapper::print(raw_ostream &OS, bool ForDebug) const { - unsigned NumOpds = getMI().getNumOperands(); + unsigned NumOpds = getInstrMapping().getNumOperands(); if (ForDebug) { OS << "Mapping for " << getMI() << "\nwith " << getInstrMapping() << '\n'; // Print out the internal state of the index table. diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/Utils.cpp new file mode 100644 index 0000000..e500918 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -0,0 +1,45 @@ +//===- llvm/CodeGen/GlobalISel/Utils.cpp -------------------------*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file This file implements the utility functions used by the GlobalISel +/// pipeline. +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" + +#define DEBUG_TYPE "globalisel-utils" + +using namespace llvm; + +unsigned llvm::constrainOperandRegClass( + const MachineFunction &MF, const TargetRegisterInfo &TRI, + MachineRegisterInfo &MRI, const TargetInstrInfo &TII, + const RegisterBankInfo &RBI, MachineInstr &InsertPt, const MCInstrDesc &II, + unsigned Reg, unsigned OpIdx) { + // Assume physical registers are properly constrained. + assert(TargetRegisterInfo::isVirtualRegister(Reg) && + "PhysReg not implemented"); + + const TargetRegisterClass *RegClass = TII.getRegClass(II, OpIdx, &TRI, MF); + + if (!RBI.constrainGenericRegister(Reg, *RegClass, MRI)) { + unsigned NewReg = MRI.createVirtualRegister(RegClass); + BuildMI(*InsertPt.getParent(), InsertPt, InsertPt.getDebugLoc(), + TII.get(TargetOpcode::COPY), NewReg) + .addReg(Reg); + return NewReg; + } + + return Reg; +} diff --git a/contrib/llvm/lib/CodeGen/GlobalMerge.cpp b/contrib/llvm/lib/CodeGen/GlobalMerge.cpp index 8c760b7..1ea5349 100644 --- a/contrib/llvm/lib/CodeGen/GlobalMerge.cpp +++ b/contrib/llvm/lib/CodeGen/GlobalMerge.cpp @@ -182,9 +182,7 @@ namespace { bool runOnFunction(Function &F) override; bool doFinalization(Module &M) override; - const char *getPassName() const override { - return "Merge internal globals"; - } + StringRef getPassName() const override { return "Merge internal globals"; } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); @@ -434,6 +432,8 @@ bool GlobalMerge::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals, std::vector<Type*> Tys; std::vector<Constant*> Inits; + bool HasExternal = false; + StringRef FirstExternalName; for (j = i; j != -1; j = GlobalSet.find_next(j)) { Type *Ty = Globals[j]->getValueType(); MergedSize += DL.getTypeAllocSize(Ty); @@ -442,19 +442,46 @@ bool GlobalMerge::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals, } Tys.push_back(Ty); Inits.push_back(Globals[j]->getInitializer()); + + if (Globals[j]->hasExternalLinkage() && !HasExternal) { + HasExternal = true; + FirstExternalName = Globals[j]->getName(); + } } + // If merged variables doesn't have external linkage, we needn't to expose + // the symbol after merging. + GlobalValue::LinkageTypes Linkage = HasExternal + ? GlobalValue::ExternalLinkage + : GlobalValue::InternalLinkage; StructType *MergedTy = StructType::get(M.getContext(), Tys); Constant *MergedInit = ConstantStruct::get(MergedTy, Inits); - GlobalVariable *MergedGV = new GlobalVariable( - M, MergedTy, isConst, GlobalValue::PrivateLinkage, MergedInit, - "_MergedGlobals", nullptr, GlobalVariable::NotThreadLocal, AddrSpace); + // On Darwin external linkage needs to be preserved, otherwise + // dsymutil cannot preserve the debug info for the merged + // variables. If they have external linkage, use the symbol name + // of the first variable merged as the suffix of global symbol + // name. This avoids a link-time naming conflict for the + // _MergedGlobals symbols. + Twine MergedName = + (IsMachO && HasExternal) + ? "_MergedGlobals_" + FirstExternalName + : "_MergedGlobals"; + auto MergedLinkage = IsMachO ? Linkage : GlobalValue::PrivateLinkage; + auto *MergedGV = new GlobalVariable( + M, MergedTy, isConst, MergedLinkage, MergedInit, MergedName, nullptr, + GlobalVariable::NotThreadLocal, AddrSpace); + + const StructLayout *MergedLayout = DL.getStructLayout(MergedTy); for (ssize_t k = i, idx = 0; k != j; k = GlobalSet.find_next(k), ++idx) { GlobalValue::LinkageTypes Linkage = Globals[k]->getLinkage(); std::string Name = Globals[k]->getName(); + // Copy metadata while adjusting any debug info metadata by the original + // global's offset within the merged global. + MergedGV->copyMetadata(Globals[k], MergedLayout->getElementOffset(idx)); + Constant *Idx[2] = { ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, idx), @@ -498,22 +525,18 @@ void GlobalMerge::collectUsedGlobalVariables(Module &M) { void GlobalMerge::setMustKeepGlobalVariables(Module &M) { collectUsedGlobalVariables(M); - for (Module::iterator IFn = M.begin(), IEndFn = M.end(); IFn != IEndFn; - ++IFn) { - for (Function::iterator IBB = IFn->begin(), IEndBB = IFn->end(); - IBB != IEndBB; ++IBB) { - // Follow the invoke link to find the landing pad instruction - const InvokeInst *II = dyn_cast<InvokeInst>(IBB->getTerminator()); - if (!II) continue; - - const LandingPadInst *LPInst = II->getUnwindDest()->getLandingPadInst(); - // Look for globals in the clauses of the landing pad instruction - for (unsigned Idx = 0, NumClauses = LPInst->getNumClauses(); - Idx != NumClauses; ++Idx) + for (Function &F : M) { + for (BasicBlock &BB : F) { + Instruction *Pad = BB.getFirstNonPHI(); + if (!Pad->isEHPad()) + continue; + + // Keep globals used by landingpads and catchpads. + for (const Use &U : Pad->operands()) { if (const GlobalVariable *GV = - dyn_cast<GlobalVariable>(LPInst->getClause(Idx) - ->stripPointerCasts())) + dyn_cast<GlobalVariable>(U->stripPointerCasts())) MustKeepGlobalVariables.insert(GV); + } } } } diff --git a/contrib/llvm/lib/CodeGen/IfConversion.cpp b/contrib/llvm/lib/CodeGen/IfConversion.cpp index d225162..b9f3d86 100644 --- a/contrib/llvm/lib/CodeGen/IfConversion.cpp +++ b/contrib/llvm/lib/CodeGen/IfConversion.cpp @@ -15,6 +15,7 @@ #include "llvm/CodeGen/Passes.h" #include "BranchFolding.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LivePhysRegs.h" @@ -58,6 +59,8 @@ static cl::opt<bool> DisableTriangleFR("disable-ifcvt-triangle-false-rev", cl::init(false), cl::Hidden); static cl::opt<bool> DisableDiamond("disable-ifcvt-diamond", cl::init(false), cl::Hidden); +static cl::opt<bool> DisableForkedDiamond("disable-ifcvt-forked-diamond", + cl::init(false), cl::Hidden); static cl::opt<bool> IfCvtBranchFold("ifcvt-branch-fold", cl::init(true), cl::Hidden); @@ -68,6 +71,7 @@ STATISTIC(NumTriangleRev, "Number of triangle (R) if-conversions performed"); STATISTIC(NumTriangleFalse,"Number of triangle (F) if-conversions performed"); STATISTIC(NumTriangleFRev, "Number of triangle (F/R) if-conversions performed"); STATISTIC(NumDiamonds, "Number of diamond if-conversions performed"); +STATISTIC(NumForkedDiamonds, "Number of forked-diamond if-conversions performed"); STATISTIC(NumIfConvBBs, "Number of if-converted blocks"); STATISTIC(NumDupBBs, "Number of duplicated blocks"); STATISTIC(NumUnpred, "Number of true blocks of diamonds unpredicated"); @@ -82,10 +86,12 @@ namespace { ICTriangleRev, // Same as ICTriangle, but true path rev condition. ICTriangleFalse, // Same as ICTriangle, but on the false path. ICTriangle, // BB is entry of a triangle sub-CFG. - ICDiamond // BB is entry of a diamond sub-CFG. + ICDiamond, // BB is entry of a diamond sub-CFG. + ICForkedDiamond // BB is entry of an almost diamond sub-CFG, with a + // common tail that can be shared. }; - /// BBInfo - One per MachineBasicBlock, this is used to cache the result + /// One per MachineBasicBlock, this is used to cache the result /// if-conversion feasibility analysis. This includes results from /// TargetInstrInfo::analyzeBranch() (i.e. TBB, FBB, and Cond), and its /// classification, and common tail block of its successors (if it's a @@ -114,6 +120,7 @@ namespace { bool IsAnalyzed : 1; bool IsEnqueued : 1; bool IsBrAnalyzable : 1; + bool IsBrReversible : 1; bool HasFallThrough : 1; bool IsUnpredicable : 1; bool CannotBeCopied : 1; @@ -128,13 +135,14 @@ namespace { SmallVector<MachineOperand, 4> Predicate; BBInfo() : IsDone(false), IsBeingAnalyzed(false), IsAnalyzed(false), IsEnqueued(false), IsBrAnalyzable(false), - HasFallThrough(false), IsUnpredicable(false), - CannotBeCopied(false), ClobbersPred(false), NonPredSize(0), - ExtraCost(0), ExtraCost2(0), BB(nullptr), TrueBB(nullptr), + IsBrReversible(false), HasFallThrough(false), + IsUnpredicable(false), CannotBeCopied(false), + ClobbersPred(false), NonPredSize(0), ExtraCost(0), + ExtraCost2(0), BB(nullptr), TrueBB(nullptr), FalseBB(nullptr) {} }; - /// IfcvtToken - Record information about pending if-conversions to attempt: + /// Record information about pending if-conversions to attempt: /// BBI - Corresponding BBInfo. /// Kind - Type of block. See IfcvtKind. /// NeedSubsumption - True if the to-be-predicated BB has already been @@ -148,15 +156,19 @@ namespace { struct IfcvtToken { BBInfo &BBI; IfcvtKind Kind; - bool NeedSubsumption; unsigned NumDups; unsigned NumDups2; - IfcvtToken(BBInfo &b, IfcvtKind k, bool s, unsigned d, unsigned d2 = 0) - : BBI(b), Kind(k), NeedSubsumption(s), NumDups(d), NumDups2(d2) {} + bool NeedSubsumption : 1; + bool TClobbersPred : 1; + bool FClobbersPred : 1; + IfcvtToken(BBInfo &b, IfcvtKind k, bool s, unsigned d, unsigned d2 = 0, + bool tc = false, bool fc = false) + : BBI(b), Kind(k), NumDups(d), NumDups2(d2), NeedSubsumption(s), + TClobbersPred(tc), FClobbersPred(fc) {} }; - /// BBAnalysis - Results of if-conversion feasibility analysis indexed by - /// basic block number. + /// Results of if-conversion feasibility analysis indexed by basic block + /// number. std::vector<BBInfo> BBAnalysis; TargetSchedModel SchedModel; @@ -172,11 +184,11 @@ namespace { bool PreRegAlloc; bool MadeChange; int FnNum; - std::function<bool(const Function &)> PredicateFtor; + std::function<bool(const MachineFunction &)> PredicateFtor; public: static char ID; - IfConverter(std::function<bool(const Function &)> Ftor = nullptr) + IfConverter(std::function<bool(const MachineFunction &)> Ftor = nullptr) : MachineFunctionPass(ID), FnNum(-1), PredicateFtor(std::move(Ftor)) { initializeIfConverterPass(*PassRegistry::getPassRegistry()); } @@ -191,31 +203,58 @@ namespace { MachineFunctionProperties getRequiredProperties() const override { return MachineFunctionProperties().set( - MachineFunctionProperties::Property::AllVRegsAllocated); + MachineFunctionProperties::Property::NoVRegs); } private: - bool ReverseBranchCondition(BBInfo &BBI); + bool reverseBranchCondition(BBInfo &BBI) const; bool ValidSimple(BBInfo &TrueBBI, unsigned &Dups, BranchProbability Prediction) const; bool ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI, bool FalseBranch, unsigned &Dups, BranchProbability Prediction) const; + bool CountDuplicatedInstructions( + MachineBasicBlock::iterator &TIB, MachineBasicBlock::iterator &FIB, + MachineBasicBlock::iterator &TIE, MachineBasicBlock::iterator &FIE, + unsigned &Dups1, unsigned &Dups2, + MachineBasicBlock &TBB, MachineBasicBlock &FBB, + bool SkipUnconditionalBranches) const; bool ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI, - unsigned &Dups1, unsigned &Dups2) const; - void ScanInstructions(BBInfo &BBI); - void AnalyzeBlock(MachineBasicBlock *MBB, + unsigned &Dups1, unsigned &Dups2, + BBInfo &TrueBBICalc, BBInfo &FalseBBICalc) const; + bool ValidForkedDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI, + unsigned &Dups1, unsigned &Dups2, + BBInfo &TrueBBICalc, BBInfo &FalseBBICalc) const; + void AnalyzeBranches(BBInfo &BBI); + void ScanInstructions(BBInfo &BBI, + MachineBasicBlock::iterator &Begin, + MachineBasicBlock::iterator &End, + bool BranchUnpredicable = false) const; + bool RescanInstructions( + MachineBasicBlock::iterator &TIB, MachineBasicBlock::iterator &FIB, + MachineBasicBlock::iterator &TIE, MachineBasicBlock::iterator &FIE, + BBInfo &TrueBBI, BBInfo &FalseBBI) const; + void AnalyzeBlock(MachineBasicBlock &MBB, std::vector<std::unique_ptr<IfcvtToken>> &Tokens); bool FeasibilityAnalysis(BBInfo &BBI, SmallVectorImpl<MachineOperand> &Cond, - bool isTriangle = false, bool RevBranch = false); + bool isTriangle = false, bool RevBranch = false, + bool hasCommonTail = false); void AnalyzeBlocks(MachineFunction &MF, std::vector<std::unique_ptr<IfcvtToken>> &Tokens); - void InvalidatePreds(MachineBasicBlock *BB); + void InvalidatePreds(MachineBasicBlock &MBB); void RemoveExtraEdges(BBInfo &BBI); bool IfConvertSimple(BBInfo &BBI, IfcvtKind Kind); bool IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind); + bool IfConvertDiamondCommon(BBInfo &BBI, BBInfo &TrueBBI, BBInfo &FalseBBI, + unsigned NumDups1, unsigned NumDups2, + bool TClobbersPred, bool FClobbersPred, + bool RemoveBranch, bool MergeAddEdges); bool IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, - unsigned NumDups1, unsigned NumDups2); + unsigned NumDups1, unsigned NumDups2, + bool TClobbers, bool FClobbers); + bool IfConvertForkedDiamond(BBInfo &BBI, IfcvtKind Kind, + unsigned NumDups1, unsigned NumDups2, + bool TClobbers, bool FClobbers); void PredicateBlock(BBInfo &BBI, MachineBasicBlock::iterator E, SmallVectorImpl<MachineOperand> &Cond, @@ -242,12 +281,12 @@ namespace { Prediction); } - // blockAlwaysFallThrough - Block ends without a terminator. + /// Returns true if Block ends without a terminator. bool blockAlwaysFallThrough(BBInfo &BBI) const { return BBI.IsBrAnalyzable && BBI.TrueBB == nullptr; } - // IfcvtTokenCmp - Used to sort if-conversion candidates. + /// Used to sort if-conversion candidates. static bool IfcvtTokenCmp(const std::unique_ptr<IfcvtToken> &C1, const std::unique_ptr<IfcvtToken> &C2) { int Incr1 = (C1->Kind == ICDiamond) @@ -282,8 +321,7 @@ INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) INITIALIZE_PASS_END(IfConverter, "if-converter", "If Converter", false, false) bool IfConverter::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(*MF.getFunction()) || - (PredicateFtor && !PredicateFtor(*MF.getFunction()))) + if (skipFunction(*MF.getFunction()) || (PredicateFtor && !PredicateFtor(MF))) return false; const TargetSubtargetInfo &ST = MF.getSubtarget(); @@ -402,11 +440,26 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "Ifcvt (Diamond): BB#" << BBI.BB->getNumber() << " (T:" << BBI.TrueBB->getNumber() << ",F:" << BBI.FalseBB->getNumber() << ") "); - RetVal = IfConvertDiamond(BBI, Kind, NumDups, NumDups2); + RetVal = IfConvertDiamond(BBI, Kind, NumDups, NumDups2, + Token->TClobbersPred, + Token->FClobbersPred); DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n"); if (RetVal) ++NumDiamonds; break; } + case ICForkedDiamond: { + if (DisableForkedDiamond) break; + DEBUG(dbgs() << "Ifcvt (Forked Diamond): BB#" + << BBI.BB->getNumber() << " (T:" + << BBI.TrueBB->getNumber() << ",F:" + << BBI.FalseBB->getNumber() << ") "); + RetVal = IfConvertForkedDiamond(BBI, Kind, NumDups, NumDups2, + Token->TClobbersPred, + Token->FClobbersPred); + DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n"); + if (RetVal) ++NumForkedDiamonds; + break; + } } Change |= RetVal; @@ -435,46 +488,42 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { return MadeChange; } -/// findFalseBlock - BB has a fallthrough. Find its 'false' successor given -/// its 'true' successor. +/// BB has a fallthrough. Find its 'false' successor given its 'true' successor. static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB, MachineBasicBlock *TrueBB) { - for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), - E = BB->succ_end(); SI != E; ++SI) { - MachineBasicBlock *SuccBB = *SI; + for (MachineBasicBlock *SuccBB : BB->successors()) { if (SuccBB != TrueBB) return SuccBB; } return nullptr; } -/// ReverseBranchCondition - Reverse the condition of the end of the block -/// branch. Swap block's 'true' and 'false' successors. -bool IfConverter::ReverseBranchCondition(BBInfo &BBI) { +/// Reverse the condition of the end of the block branch. Swap block's 'true' +/// and 'false' successors. +bool IfConverter::reverseBranchCondition(BBInfo &BBI) const { DebugLoc dl; // FIXME: this is nowhere - if (!TII->ReverseBranchCondition(BBI.BrCond)) { - TII->RemoveBranch(*BBI.BB); - TII->InsertBranch(*BBI.BB, BBI.FalseBB, BBI.TrueBB, BBI.BrCond, dl); + if (!TII->reverseBranchCondition(BBI.BrCond)) { + TII->removeBranch(*BBI.BB); + TII->insertBranch(*BBI.BB, BBI.FalseBB, BBI.TrueBB, BBI.BrCond, dl); std::swap(BBI.TrueBB, BBI.FalseBB); return true; } return false; } -/// getNextBlock - Returns the next block in the function blocks ordering. If -/// it is the end, returns NULL. -static inline MachineBasicBlock *getNextBlock(MachineBasicBlock *BB) { - MachineFunction::iterator I = BB->getIterator(); - MachineFunction::iterator E = BB->getParent()->end(); +/// Returns the next block in the function blocks ordering. If it is the end, +/// returns NULL. +static inline MachineBasicBlock *getNextBlock(MachineBasicBlock &MBB) { + MachineFunction::iterator I = MBB.getIterator(); + MachineFunction::iterator E = MBB.getParent()->end(); if (++I == E) return nullptr; return &*I; } -/// ValidSimple - Returns true if the 'true' block (along with its -/// predecessor) forms a valid simple shape for ifcvt. It also returns the -/// number of instructions that the ifcvt would need to duplicate if performed -/// in Dups. +/// Returns true if the 'true' block (along with its predecessor) forms a valid +/// simple shape for ifcvt. It also returns the number of instructions that the +/// ifcvt would need to duplicate if performed in Dups. bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups, BranchProbability Prediction) const { Dups = 0; @@ -495,12 +544,11 @@ bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups, return true; } -/// ValidTriangle - Returns true if the 'true' and 'false' blocks (along -/// with their common predecessor) forms a valid triangle shape for ifcvt. -/// If 'FalseBranch' is true, it checks if 'true' block's false branch -/// branches to the 'false' block rather than the other way around. It also -/// returns the number of instructions that the ifcvt would need to duplicate -/// if performed in 'Dups'. +/// Returns true if the 'true' and 'false' blocks (along with their common +/// predecessor) forms a valid triangle shape for ifcvt. If 'FalseBranch' is +/// true, it checks if 'true' block's false branch branches to the 'false' block +/// rather than the other way around. It also returns the number of instructions +/// that the ifcvt would need to duplicate if performed in 'Dups'. bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI, bool FalseBranch, unsigned &Dups, BranchProbability Prediction) const { @@ -540,122 +588,353 @@ bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI, return TExit && TExit == FalseBBI.BB; } -/// ValidDiamond - Returns true if the 'true' and 'false' blocks (along -/// with their common predecessor) forms a valid diamond shape for ifcvt. -bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI, - unsigned &Dups1, unsigned &Dups2) const { - Dups1 = Dups2 = 0; - if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone || - FalseBBI.IsBeingAnalyzed || FalseBBI.IsDone) - return false; - - MachineBasicBlock *TT = TrueBBI.TrueBB; - MachineBasicBlock *FT = FalseBBI.TrueBB; - - if (!TT && blockAlwaysFallThrough(TrueBBI)) - TT = getNextBlock(TrueBBI.BB); - if (!FT && blockAlwaysFallThrough(FalseBBI)) - FT = getNextBlock(FalseBBI.BB); - if (TT != FT) - return false; - if (!TT && (TrueBBI.IsBrAnalyzable || FalseBBI.IsBrAnalyzable)) - return false; - if (TrueBBI.BB->pred_size() > 1 || FalseBBI.BB->pred_size() > 1) - return false; +/// Shrink the provided inclusive range by one instruction. +/// If the range was one instruction (\p It == \p Begin), It is not modified, +/// but \p Empty is set to true. +static inline void shrinkInclusiveRange( + MachineBasicBlock::iterator &Begin, + MachineBasicBlock::iterator &It, + bool &Empty) { + if (It == Begin) + Empty = true; + else + It--; +} - // FIXME: Allow true block to have an early exit? - if (TrueBBI.FalseBB || FalseBBI.FalseBB || - (TrueBBI.ClobbersPred && FalseBBI.ClobbersPred)) - return false; +/// Count duplicated instructions and move the iterators to show where they +/// are. +/// @param TIB True Iterator Begin +/// @param FIB False Iterator Begin +/// These two iterators initially point to the first instruction of the two +/// blocks, and finally point to the first non-shared instruction. +/// @param TIE True Iterator End +/// @param FIE False Iterator End +/// These two iterators initially point to End() for the two blocks() and +/// finally point to the first shared instruction in the tail. +/// Upon return [TIB, TIE), and [FIB, FIE) mark the un-duplicated portions of +/// two blocks. +/// @param Dups1 count of duplicated instructions at the beginning of the 2 +/// blocks. +/// @param Dups2 count of duplicated instructions at the end of the 2 blocks. +/// @param SkipUnconditionalBranches if true, Don't make sure that +/// unconditional branches at the end of the blocks are the same. True is +/// passed when the blocks are analyzable to allow for fallthrough to be +/// handled. +/// @return false if the shared portion prevents if conversion. +bool IfConverter::CountDuplicatedInstructions( + MachineBasicBlock::iterator &TIB, + MachineBasicBlock::iterator &FIB, + MachineBasicBlock::iterator &TIE, + MachineBasicBlock::iterator &FIE, + unsigned &Dups1, unsigned &Dups2, + MachineBasicBlock &TBB, MachineBasicBlock &FBB, + bool SkipUnconditionalBranches) const { - // Count duplicate instructions at the beginning of the true and false blocks. - MachineBasicBlock::iterator TIB = TrueBBI.BB->begin(); - MachineBasicBlock::iterator FIB = FalseBBI.BB->begin(); - MachineBasicBlock::iterator TIE = TrueBBI.BB->end(); - MachineBasicBlock::iterator FIE = FalseBBI.BB->end(); while (TIB != TIE && FIB != FIE) { // Skip dbg_value instructions. These do not count. - if (TIB->isDebugValue()) { - while (TIB != TIE && TIB->isDebugValue()) - ++TIB; - if (TIB == TIE) - break; - } - if (FIB->isDebugValue()) { - while (FIB != FIE && FIB->isDebugValue()) - ++FIB; - if (FIB == FIE) - break; - } + TIB = skipDebugInstructionsForward(TIB, TIE); + if(TIB == TIE) + break; + FIB = skipDebugInstructionsForward(FIB, FIE); + if(FIB == FIE) + break; if (!TIB->isIdenticalTo(*FIB)) break; - ++Dups1; + // A pred-clobbering instruction in the shared portion prevents + // if-conversion. + std::vector<MachineOperand> PredDefs; + if (TII->DefinesPredicate(*TIB, PredDefs)) + return false; + // If we get all the way to the branch instructions, don't count them. + if (!TIB->isBranch()) + ++Dups1; ++TIB; ++FIB; } - // Now, in preparation for counting duplicate instructions at the ends of the - // blocks, move the end iterators up past any branch instructions. - // If both blocks are returning don't skip the branches, since they will - // likely be both identical return instructions. In such cases the return - // can be left unpredicated. // Check for already containing all of the block. if (TIB == TIE || FIB == FIE) return true; + // Now, in preparation for counting duplicate instructions at the ends of the + // blocks, move the end iterators up past any branch instructions. --TIE; --FIE; - if (!TrueBBI.BB->succ_empty() || !FalseBBI.BB->succ_empty()) { - while (TIE != TIB && TIE->isBranch()) - --TIE; - while (FIE != FIB && FIE->isBranch()) - --FIE; + + // After this point TIB and TIE define an inclusive range, which means that + // TIB == TIE is true when there is one more instruction to consider, not at + // the end. Because we may not be able to go before TIB, we need a flag to + // indicate a completely empty range. + bool TEmpty = false, FEmpty = false; + + // Upon exit TIE and FIE will both point at the last non-shared instruction. + // They need to be moved forward to point past the last non-shared + // instruction if the range they delimit is non-empty. + auto IncrementEndIteratorsOnExit = make_scope_exit([&]() { + if (!TEmpty) + ++TIE; + if (!FEmpty) + ++FIE; + }); + + if (!TBB.succ_empty() || !FBB.succ_empty()) { + if (SkipUnconditionalBranches) { + while (!TEmpty && TIE->isUnconditionalBranch()) + shrinkInclusiveRange(TIB, TIE, TEmpty); + while (!FEmpty && FIE->isUnconditionalBranch()) + shrinkInclusiveRange(FIB, FIE, FEmpty); + } } // If Dups1 includes all of a block, then don't count duplicate // instructions at the end of the blocks. - if (TIB == TIE || FIB == FIE) + if (TEmpty || FEmpty) return true; // Count duplicate instructions at the ends of the blocks. - while (TIE != TIB && FIE != FIB) { + while (!TEmpty && !FEmpty) { // Skip dbg_value instructions. These do not count. - if (TIE->isDebugValue()) { - while (TIE != TIB && TIE->isDebugValue()) - --TIE; - if (TIE == TIB) - break; + TIE = skipDebugInstructionsBackward(TIE, TIB); + FIE = skipDebugInstructionsBackward(FIE, FIB); + TEmpty = TIE == TIB && TIE->isDebugValue(); + FEmpty = FIE == FIB && FIE->isDebugValue(); + if (TEmpty || FEmpty) + break; + if (!TIE->isIdenticalTo(*FIE)) + break; + // We have to verify that any branch instructions are the same, and then we + // don't count them toward the # of duplicate instructions. + if (!TIE->isBranch()) + ++Dups2; + shrinkInclusiveRange(TIB, TIE, TEmpty); + shrinkInclusiveRange(FIB, FIE, FEmpty); + } + return true; +} + +/// RescanInstructions - Run ScanInstructions on a pair of blocks. +/// @param TIB - True Iterator Begin, points to first non-shared instruction +/// @param FIB - False Iterator Begin, points to first non-shared instruction +/// @param TIE - True Iterator End, points past last non-shared instruction +/// @param FIE - False Iterator End, points past last non-shared instruction +/// @param TrueBBI - BBInfo to update for the true block. +/// @param FalseBBI - BBInfo to update for the false block. +/// @returns - false if either block cannot be predicated or if both blocks end +/// with a predicate-clobbering instruction. +bool IfConverter::RescanInstructions( + MachineBasicBlock::iterator &TIB, MachineBasicBlock::iterator &FIB, + MachineBasicBlock::iterator &TIE, MachineBasicBlock::iterator &FIE, + BBInfo &TrueBBI, BBInfo &FalseBBI) const { + bool BranchUnpredicable = true; + TrueBBI.IsUnpredicable = FalseBBI.IsUnpredicable = false; + ScanInstructions(TrueBBI, TIB, TIE, BranchUnpredicable); + if (TrueBBI.IsUnpredicable) + return false; + ScanInstructions(FalseBBI, FIB, FIE, BranchUnpredicable); + if (FalseBBI.IsUnpredicable) + return false; + if (TrueBBI.ClobbersPred && FalseBBI.ClobbersPred) + return false; + return true; +} + +#ifndef NDEBUG +static void verifySameBranchInstructions( + MachineBasicBlock *MBB1, + MachineBasicBlock *MBB2) { + MachineBasicBlock::iterator B1 = MBB1->begin(); + MachineBasicBlock::iterator B2 = MBB2->begin(); + MachineBasicBlock::iterator E1 = std::prev(MBB1->end()); + MachineBasicBlock::iterator E2 = std::prev(MBB2->end()); + bool Empty1 = false, Empty2 = false; + while (!Empty1 && !Empty2) { + E1 = skipDebugInstructionsBackward(E1, B1); + E2 = skipDebugInstructionsBackward(E2, B2); + Empty1 = E1 == B1 && E1->isDebugValue(); + Empty2 = E2 == B2 && E2->isDebugValue(); + + if (Empty1 && Empty2) + break; + + if (Empty1) { + assert(!E2->isBranch() && "Branch mis-match, one block is empty."); + break; } - if (FIE->isDebugValue()) { - while (FIE != FIB && FIE->isDebugValue()) - --FIE; - if (FIE == FIB) - break; + if (Empty2) { + assert(!E1->isBranch() && "Branch mis-match, one block is empty."); + break; } - if (!TIE->isIdenticalTo(*FIE)) + + if (E1->isBranch() || E2->isBranch()) + assert(E1->isIdenticalTo(*E2) && + "Branch mis-match, branch instructions don't match."); + else break; - ++Dups2; - --TIE; - --FIE; + shrinkInclusiveRange(B1, E1, Empty1); + shrinkInclusiveRange(B2, E2, Empty2); + } +} +#endif + +/// ValidForkedDiamond - Returns true if the 'true' and 'false' blocks (along +/// with their common predecessor) form a diamond if a common tail block is +/// extracted. +/// While not strictly a diamond, this pattern would form a diamond if +/// tail-merging had merged the shared tails. +/// EBB +/// _/ \_ +/// | | +/// TBB FBB +/// / \ / \ +/// FalseBB TrueBB FalseBB +/// Currently only handles analyzable branches. +/// Specifically excludes actual diamonds to avoid overlap. +bool IfConverter::ValidForkedDiamond( + BBInfo &TrueBBI, BBInfo &FalseBBI, + unsigned &Dups1, unsigned &Dups2, + BBInfo &TrueBBICalc, BBInfo &FalseBBICalc) const { + Dups1 = Dups2 = 0; + if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone || + FalseBBI.IsBeingAnalyzed || FalseBBI.IsDone) + return false; + + if (!TrueBBI.IsBrAnalyzable || !FalseBBI.IsBrAnalyzable) + return false; + // Don't IfConvert blocks that can't be folded into their predecessor. + if (TrueBBI.BB->pred_size() > 1 || FalseBBI.BB->pred_size() > 1) + return false; + + // This function is specifically looking for conditional tails, as + // unconditional tails are already handled by the standard diamond case. + if (TrueBBI.BrCond.size() == 0 || + FalseBBI.BrCond.size() == 0) + return false; + + MachineBasicBlock *TT = TrueBBI.TrueBB; + MachineBasicBlock *TF = TrueBBI.FalseBB; + MachineBasicBlock *FT = FalseBBI.TrueBB; + MachineBasicBlock *FF = FalseBBI.FalseBB; + + if (!TT) + TT = getNextBlock(*TrueBBI.BB); + if (!TF) + TF = getNextBlock(*TrueBBI.BB); + if (!FT) + FT = getNextBlock(*FalseBBI.BB); + if (!FF) + FF = getNextBlock(*FalseBBI.BB); + + if (!TT || !TF) + return false; + + // Check successors. If they don't match, bail. + if (!((TT == FT && TF == FF) || (TF == FT && TT == FF))) + return false; + + bool FalseReversed = false; + if (TF == FT && TT == FF) { + // If the branches are opposing, but we can't reverse, don't do it. + if (!FalseBBI.IsBrReversible) + return false; + FalseReversed = true; + reverseBranchCondition(FalseBBI); } + auto UnReverseOnExit = make_scope_exit([&]() { + if (FalseReversed) + reverseBranchCondition(FalseBBI); + }); + + // Count duplicate instructions at the beginning of the true and false blocks. + MachineBasicBlock::iterator TIB = TrueBBI.BB->begin(); + MachineBasicBlock::iterator FIB = FalseBBI.BB->begin(); + MachineBasicBlock::iterator TIE = TrueBBI.BB->end(); + MachineBasicBlock::iterator FIE = FalseBBI.BB->end(); + if(!CountDuplicatedInstructions(TIB, FIB, TIE, FIE, Dups1, Dups2, + *TrueBBI.BB, *FalseBBI.BB, + /* SkipUnconditionalBranches */ true)) + return false; + + TrueBBICalc.BB = TrueBBI.BB; + FalseBBICalc.BB = FalseBBI.BB; + if (!RescanInstructions(TIB, FIB, TIE, FIE, TrueBBICalc, FalseBBICalc)) + return false; + // The size is used to decide whether to if-convert, and the shared portions + // are subtracted off. Because of the subtraction, we just use the size that + // was calculated by the original ScanInstructions, as it is correct. + TrueBBICalc.NonPredSize = TrueBBI.NonPredSize; + FalseBBICalc.NonPredSize = FalseBBI.NonPredSize; return true; } -/// ScanInstructions - Scan all the instructions in the block to determine if -/// the block is predicable. In most cases, that means all the instructions -/// in the block are isPredicable(). Also checks if the block contains any -/// instruction which can clobber a predicate (e.g. condition code register). -/// If so, the block is not predicable unless it's the last instruction. -void IfConverter::ScanInstructions(BBInfo &BBI) { +/// ValidDiamond - Returns true if the 'true' and 'false' blocks (along +/// with their common predecessor) forms a valid diamond shape for ifcvt. +bool IfConverter::ValidDiamond( + BBInfo &TrueBBI, BBInfo &FalseBBI, + unsigned &Dups1, unsigned &Dups2, + BBInfo &TrueBBICalc, BBInfo &FalseBBICalc) const { + Dups1 = Dups2 = 0; + if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone || + FalseBBI.IsBeingAnalyzed || FalseBBI.IsDone) + return false; + + MachineBasicBlock *TT = TrueBBI.TrueBB; + MachineBasicBlock *FT = FalseBBI.TrueBB; + + if (!TT && blockAlwaysFallThrough(TrueBBI)) + TT = getNextBlock(*TrueBBI.BB); + if (!FT && blockAlwaysFallThrough(FalseBBI)) + FT = getNextBlock(*FalseBBI.BB); + if (TT != FT) + return false; + if (!TT && (TrueBBI.IsBrAnalyzable || FalseBBI.IsBrAnalyzable)) + return false; + if (TrueBBI.BB->pred_size() > 1 || FalseBBI.BB->pred_size() > 1) + return false; + + // FIXME: Allow true block to have an early exit? + if (TrueBBI.FalseBB || FalseBBI.FalseBB) + return false; + + // Count duplicate instructions at the beginning and end of the true and + // false blocks. + // Skip unconditional branches only if we are considering an analyzable + // diamond. Otherwise the branches must be the same. + bool SkipUnconditionalBranches = + TrueBBI.IsBrAnalyzable && FalseBBI.IsBrAnalyzable; + MachineBasicBlock::iterator TIB = TrueBBI.BB->begin(); + MachineBasicBlock::iterator FIB = FalseBBI.BB->begin(); + MachineBasicBlock::iterator TIE = TrueBBI.BB->end(); + MachineBasicBlock::iterator FIE = FalseBBI.BB->end(); + if(!CountDuplicatedInstructions(TIB, FIB, TIE, FIE, Dups1, Dups2, + *TrueBBI.BB, *FalseBBI.BB, + SkipUnconditionalBranches)) + return false; + + TrueBBICalc.BB = TrueBBI.BB; + FalseBBICalc.BB = FalseBBI.BB; + if (!RescanInstructions(TIB, FIB, TIE, FIE, TrueBBICalc, FalseBBICalc)) + return false; + // The size is used to decide whether to if-convert, and the shared portions + // are subtracted off. Because of the subtraction, we just use the size that + // was calculated by the original ScanInstructions, as it is correct. + TrueBBICalc.NonPredSize = TrueBBI.NonPredSize; + FalseBBICalc.NonPredSize = FalseBBI.NonPredSize; + return true; +} + +/// AnalyzeBranches - Look at the branches at the end of a block to determine if +/// the block is predicable. +void IfConverter::AnalyzeBranches(BBInfo &BBI) { if (BBI.IsDone) return; - bool AlreadyPredicated = !BBI.Predicate.empty(); - // First analyze the end of BB branches. BBI.TrueBB = BBI.FalseBB = nullptr; BBI.BrCond.clear(); BBI.IsBrAnalyzable = !TII->analyzeBranch(*BBI.BB, BBI.TrueBB, BBI.FalseBB, BBI.BrCond); + SmallVector<MachineOperand, 4> RevCond(BBI.BrCond.begin(), BBI.BrCond.end()); + BBI.IsBrReversible = (RevCond.size() == 0) || + !TII->reverseBranchCondition(RevCond); BBI.HasFallThrough = BBI.IsBrAnalyzable && BBI.FalseBB == nullptr; if (BBI.BrCond.size()) { @@ -666,16 +945,29 @@ void IfConverter::ScanInstructions(BBInfo &BBI) { if (!BBI.FalseBB) { // Malformed bcc? True and false blocks are the same? BBI.IsUnpredicable = true; - return; } } +} + +/// ScanInstructions - Scan all the instructions in the block to determine if +/// the block is predicable. In most cases, that means all the instructions +/// in the block are isPredicable(). Also checks if the block contains any +/// instruction which can clobber a predicate (e.g. condition code register). +/// If so, the block is not predicable unless it's the last instruction. +void IfConverter::ScanInstructions(BBInfo &BBI, + MachineBasicBlock::iterator &Begin, + MachineBasicBlock::iterator &End, + bool BranchUnpredicable) const { + if (BBI.IsDone || BBI.IsUnpredicable) + return; + + bool AlreadyPredicated = !BBI.Predicate.empty(); - // Then scan all the instructions. BBI.NonPredSize = 0; BBI.ExtraCost = 0; BBI.ExtraCost2 = 0; BBI.ClobbersPred = false; - for (auto &MI : *BBI.BB) { + for (MachineInstr &MI : make_range(Begin, End)) { if (MI.isDebugValue()) continue; @@ -715,6 +1007,11 @@ void IfConverter::ScanInstructions(BBInfo &BBI) { bool isPredicated = TII->isPredicated(MI); bool isCondBr = BBI.IsBrAnalyzable && MI.isConditionalBranch(); + if (BranchUnpredicable && MI.isBranch()) { + BBI.IsUnpredicable = true; + return; + } + // A conditional branch is not predicable, but it may be eliminated. if (isCondBr) continue; @@ -756,13 +1053,24 @@ void IfConverter::ScanInstructions(BBInfo &BBI) { } } -/// FeasibilityAnalysis - Determine if the block is a suitable candidate to be -/// predicated by the specified predicate. +/// Determine if the block is a suitable candidate to be predicated by the +/// specified predicate. +/// @param BBI BBInfo for the block to check +/// @param Pred Predicate array for the branch that leads to BBI +/// @param isTriangle true if the Analysis is for a triangle +/// @param RevBranch true if Reverse(Pred) leads to BBI (e.g. BBI is the false +/// case +/// @param hasCommonTail true if BBI shares a tail with a sibling block that +/// contains any instruction that would make the block unpredicable. bool IfConverter::FeasibilityAnalysis(BBInfo &BBI, SmallVectorImpl<MachineOperand> &Pred, - bool isTriangle, bool RevBranch) { + bool isTriangle, bool RevBranch, + bool hasCommonTail) { // If the block is dead or unpredicable, then it cannot be predicated. - if (BBI.IsDone || BBI.IsUnpredicable) + // Two blocks may share a common unpredicable tail, but this doesn't prevent + // them from being if-converted. The non-shared portion is assumed to have + // been checked + if (BBI.IsDone || (BBI.IsUnpredicable && !hasCommonTail)) return false; // If it is already predicated but we couldn't analyze its terminator, the @@ -776,7 +1084,7 @@ bool IfConverter::FeasibilityAnalysis(BBInfo &BBI, if (BBI.Predicate.size() && !TII->SubsumesPredicate(Pred, BBI.Predicate)) return false; - if (BBI.BrCond.size()) { + if (!hasCommonTail && BBI.BrCond.size()) { if (!isTriangle) return false; @@ -784,10 +1092,10 @@ bool IfConverter::FeasibilityAnalysis(BBInfo &BBI, SmallVector<MachineOperand, 4> RevPred(Pred.begin(), Pred.end()); SmallVector<MachineOperand, 4> Cond(BBI.BrCond.begin(), BBI.BrCond.end()); if (RevBranch) { - if (TII->ReverseBranchCondition(Cond)) + if (TII->reverseBranchCondition(Cond)) return false; } - if (TII->ReverseBranchCondition(RevPred) || + if (TII->reverseBranchCondition(RevPred) || !TII->SubsumesPredicate(Cond, RevPred)) return false; } @@ -795,13 +1103,12 @@ bool IfConverter::FeasibilityAnalysis(BBInfo &BBI, return true; } -/// AnalyzeBlock - Analyze the structure of the sub-CFG starting from -/// the specified block. Record its successors and whether it looks like an -/// if-conversion candidate. +/// Analyze the structure of the sub-CFG starting from the specified block. +/// Record its successors and whether it looks like an if-conversion candidate. void IfConverter::AnalyzeBlock( - MachineBasicBlock *MBB, std::vector<std::unique_ptr<IfcvtToken>> &Tokens) { + MachineBasicBlock &MBB, std::vector<std::unique_ptr<IfcvtToken>> &Tokens) { struct BBState { - BBState(MachineBasicBlock *BB) : MBB(BB), SuccsAnalyzed(false) {} + BBState(MachineBasicBlock &MBB) : MBB(&MBB), SuccsAnalyzed(false) {} MachineBasicBlock *MBB; /// This flag is true if MBB's successors have been analyzed. @@ -825,7 +1132,10 @@ void IfConverter::AnalyzeBlock( BBI.BB = BB; BBI.IsBeingAnalyzed = true; - ScanInstructions(BBI); + AnalyzeBranches(BBI); + MachineBasicBlock::iterator Begin = BBI.BB->begin(); + MachineBasicBlock::iterator End = BBI.BB->end(); + ScanInstructions(BBI, Begin, End); // Unanalyzable or ends with fallthrough or unconditional branch, or if is // not considered for ifcvt anymore. @@ -854,8 +1164,8 @@ void IfConverter::AnalyzeBlock( // Push the False and True blocks to the stack. State.SuccsAnalyzed = true; - BBStack.push_back(BBI.FalseBB); - BBStack.push_back(BBI.TrueBB); + BBStack.push_back(*BBI.FalseBB); + BBStack.push_back(*BBI.TrueBB); continue; } @@ -871,7 +1181,7 @@ void IfConverter::AnalyzeBlock( SmallVector<MachineOperand, 4> RevCond(BBI.BrCond.begin(), BBI.BrCond.end()); - bool CanRevCond = !TII->ReverseBranchCondition(RevCond); + bool CanRevCond = !TII->reverseBranchCondition(RevCond); unsigned Dups = 0; unsigned Dups2 = 0; @@ -881,25 +1191,59 @@ void IfConverter::AnalyzeBlock( BranchProbability Prediction = MBPI->getEdgeProbability(BB, TrueBBI.BB); - if (CanRevCond && ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2) && - MeetIfcvtSizeLimit(*TrueBBI.BB, (TrueBBI.NonPredSize - (Dups + Dups2) + - TrueBBI.ExtraCost), TrueBBI.ExtraCost2, - *FalseBBI.BB, (FalseBBI.NonPredSize - (Dups + Dups2) + - FalseBBI.ExtraCost),FalseBBI.ExtraCost2, - Prediction) && - FeasibilityAnalysis(TrueBBI, BBI.BrCond) && - FeasibilityAnalysis(FalseBBI, RevCond)) { - // Diamond: - // EBB - // / \_ - // | | - // TBB FBB - // \ / - // TailBB - // Note TailBB can be empty. - Tokens.push_back(llvm::make_unique<IfcvtToken>( - BBI, ICDiamond, TNeedSub | FNeedSub, Dups, Dups2)); - Enqueued = true; + if (CanRevCond) { + BBInfo TrueBBICalc, FalseBBICalc; + auto feasibleDiamond = [&]() { + bool MeetsSize = MeetIfcvtSizeLimit( + *TrueBBI.BB, (TrueBBICalc.NonPredSize - (Dups + Dups2) + + TrueBBICalc.ExtraCost), TrueBBICalc.ExtraCost2, + *FalseBBI.BB, (FalseBBICalc.NonPredSize - (Dups + Dups2) + + FalseBBICalc.ExtraCost), FalseBBICalc.ExtraCost2, + Prediction); + bool TrueFeasible = FeasibilityAnalysis(TrueBBI, BBI.BrCond, + /* IsTriangle */ false, /* RevCond */ false, + /* hasCommonTail */ true); + bool FalseFeasible = FeasibilityAnalysis(FalseBBI, RevCond, + /* IsTriangle */ false, /* RevCond */ false, + /* hasCommonTail */ true); + return MeetsSize && TrueFeasible && FalseFeasible; + }; + + if (ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2, + TrueBBICalc, FalseBBICalc)) { + if (feasibleDiamond()) { + // Diamond: + // EBB + // / \_ + // | | + // TBB FBB + // \ / + // TailBB + // Note TailBB can be empty. + Tokens.push_back(llvm::make_unique<IfcvtToken>( + BBI, ICDiamond, TNeedSub | FNeedSub, Dups, Dups2, + (bool) TrueBBICalc.ClobbersPred, (bool) FalseBBICalc.ClobbersPred)); + Enqueued = true; + } + } else if (ValidForkedDiamond(TrueBBI, FalseBBI, Dups, Dups2, + TrueBBICalc, FalseBBICalc)) { + if (feasibleDiamond()) { + // ForkedDiamond: + // if TBB and FBB have a common tail that includes their conditional + // branch instructions, then we can If Convert this pattern. + // EBB + // _/ \_ + // | | + // TBB FBB + // / \ / \ + // FalseBB TrueBB FalseBB + // + Tokens.push_back(llvm::make_unique<IfcvtToken>( + BBI, ICForkedDiamond, TNeedSub | FNeedSub, Dups, Dups2, + (bool) TrueBBICalc.ClobbersPred, (bool) FalseBBICalc.ClobbersPred)); + Enqueued = true; + } + } } if (ValidTriangle(TrueBBI, FalseBBI, false, Dups, Prediction) && @@ -985,25 +1329,23 @@ void IfConverter::AnalyzeBlock( } } -/// AnalyzeBlocks - Analyze all blocks and find entries for all if-conversion -/// candidates. +/// Analyze all blocks and find entries for all if-conversion candidates. void IfConverter::AnalyzeBlocks( MachineFunction &MF, std::vector<std::unique_ptr<IfcvtToken>> &Tokens) { - for (auto &BB : MF) - AnalyzeBlock(&BB, Tokens); + for (MachineBasicBlock &MBB : MF) + AnalyzeBlock(MBB, Tokens); // Sort to favor more complex ifcvt scheme. std::stable_sort(Tokens.begin(), Tokens.end(), IfcvtTokenCmp); } -/// canFallThroughTo - Returns true either if ToBB is the next block after BB or -/// that all the intervening blocks are empty (given BB can fall through to its -/// next block). -static bool canFallThroughTo(MachineBasicBlock *BB, MachineBasicBlock *ToBB) { - MachineFunction::iterator PI = BB->getIterator(); +/// Returns true either if ToMBB is the next block after MBB or that all the +/// intervening blocks are empty (given MBB can fall through to its next block). +static bool canFallThroughTo(MachineBasicBlock &MBB, MachineBasicBlock &ToMBB) { + MachineFunction::iterator PI = MBB.getIterator(); MachineFunction::iterator I = std::next(PI); - MachineFunction::iterator TI = ToBB->getIterator(); - MachineFunction::iterator E = BB->getParent()->end(); + MachineFunction::iterator TI = ToMBB.getIterator(); + MachineFunction::iterator E = MBB.getParent()->end(); while (I != TI) { // Check isSuccessor to avoid case where the next block is empty, but // it's not a successor. @@ -1014,30 +1356,27 @@ static bool canFallThroughTo(MachineBasicBlock *BB, MachineBasicBlock *ToBB) { return true; } -/// InvalidatePreds - Invalidate predecessor BB info so it would be re-analyzed -/// to determine if it can be if-converted. If predecessor is already enqueued, -/// dequeue it! -void IfConverter::InvalidatePreds(MachineBasicBlock *BB) { - for (const auto &Predecessor : BB->predecessors()) { +/// Invalidate predecessor BB info so it would be re-analyzed to determine if it +/// can be if-converted. If predecessor is already enqueued, dequeue it! +void IfConverter::InvalidatePreds(MachineBasicBlock &MBB) { + for (const MachineBasicBlock *Predecessor : MBB.predecessors()) { BBInfo &PBBI = BBAnalysis[Predecessor->getNumber()]; - if (PBBI.IsDone || PBBI.BB == BB) + if (PBBI.IsDone || PBBI.BB == &MBB) continue; PBBI.IsAnalyzed = false; PBBI.IsEnqueued = false; } } -/// InsertUncondBranch - Inserts an unconditional branch from BB to ToBB. -/// -static void InsertUncondBranch(MachineBasicBlock *BB, MachineBasicBlock *ToBB, +/// Inserts an unconditional branch from \p MBB to \p ToMBB. +static void InsertUncondBranch(MachineBasicBlock &MBB, MachineBasicBlock &ToMBB, const TargetInstrInfo *TII) { DebugLoc dl; // FIXME: this is nowhere SmallVector<MachineOperand, 0> NoCond; - TII->InsertBranch(*BB, ToBB, nullptr, NoCond, dl); + TII->insertBranch(MBB, &ToMBB, nullptr, NoCond, dl); } -/// RemoveExtraEdges - Remove true / false edges if either / both are no longer -/// successors. +/// Remove true / false edges if either / both are no longer successors. void IfConverter::RemoveExtraEdges(BBInfo &BBI) { MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector<MachineOperand, 4> Cond; @@ -1046,29 +1385,42 @@ void IfConverter::RemoveExtraEdges(BBInfo &BBI) { } /// Behaves like LiveRegUnits::StepForward() but also adds implicit uses to all -/// values defined in MI which are not live/used by MI. +/// values defined in MI which are also live/used by MI. static void UpdatePredRedefs(MachineInstr &MI, LivePhysRegs &Redefs) { + const TargetRegisterInfo *TRI = MI.getParent()->getParent() + ->getSubtarget().getRegisterInfo(); + + // Before stepping forward past MI, remember which regs were live + // before MI. This is needed to set the Undef flag only when reg is + // dead. + SparseSet<unsigned> LiveBeforeMI; + LiveBeforeMI.setUniverse(TRI->getNumRegs()); + for (unsigned Reg : Redefs) + LiveBeforeMI.insert(Reg); + SmallVector<std::pair<unsigned, const MachineOperand*>, 4> Clobbers; Redefs.stepForward(MI, Clobbers); // Now add the implicit uses for each of the clobbered values. - for (auto Reg : Clobbers) { + for (auto Clobber : Clobbers) { // FIXME: Const cast here is nasty, but better than making StepForward // take a mutable instruction instead of const. - MachineOperand &Op = const_cast<MachineOperand&>(*Reg.second); + unsigned Reg = Clobber.first; + MachineOperand &Op = const_cast<MachineOperand&>(*Clobber.second); MachineInstr *OpMI = Op.getParent(); MachineInstrBuilder MIB(*OpMI->getParent()->getParent(), OpMI); if (Op.isRegMask()) { // First handle regmasks. They clobber any entries in the mask which // means that we need a def for those registers. - MIB.addReg(Reg.first, RegState::Implicit | RegState::Undef); + if (LiveBeforeMI.count(Reg)) + MIB.addReg(Reg, RegState::Implicit); // We also need to add an implicit def of this register for the later // use to read from. // For the register allocator to have allocated a register clobbered // by the call which is used later, it must be the case that // the call doesn't return. - MIB.addReg(Reg.first, RegState::Implicit | RegState::Define); + MIB.addReg(Reg, RegState::Implicit | RegState::Define); continue; } assert(Op.isReg() && "Register operand required"); @@ -1078,13 +1430,23 @@ static void UpdatePredRedefs(MachineInstr &MI, LivePhysRegs &Redefs) { if (Redefs.contains(Op.getReg())) Op.setIsDead(false); } - MIB.addReg(Reg.first, RegState::Implicit | RegState::Undef); + if (LiveBeforeMI.count(Reg)) + MIB.addReg(Reg, RegState::Implicit); + else { + bool HasLiveSubReg = false; + for (MCSubRegIterator S(Reg, TRI); S.isValid(); ++S) { + if (!LiveBeforeMI.count(*S)) + continue; + HasLiveSubReg = true; + break; + } + if (HasLiveSubReg) + MIB.addReg(Reg, RegState::Implicit); + } } } -/** - * Remove kill flags from operands with a registers in the @p DontKill set. - */ +/// Remove kill flags from operands with a registers in the \p DontKill set. static void RemoveKills(MachineInstr &MI, const LivePhysRegs &DontKill) { for (MIBundleOperands O(MI); O.isValid(); ++O) { if (!O->isReg() || !O->isKill()) @@ -1094,20 +1456,17 @@ static void RemoveKills(MachineInstr &MI, const LivePhysRegs &DontKill) { } } -/** - * Walks a range of machine instructions and removes kill flags for registers - * in the @p DontKill set. - */ +/// Walks a range of machine instructions and removes kill flags for registers +/// in the \p DontKill set. static void RemoveKills(MachineBasicBlock::iterator I, MachineBasicBlock::iterator E, const LivePhysRegs &DontKill, const MCRegisterInfo &MCRI) { - for ( ; I != E; ++I) - RemoveKills(*I, DontKill); + for (MachineInstr &MI : make_range(I, E)) + RemoveKills(MI, DontKill); } -/// IfConvertSimple - If convert a simple (split, no rejoin) sub-CFG. -/// +/// If convert a simple (split, no rejoin) sub-CFG. bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) { BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()]; BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()]; @@ -1118,54 +1477,58 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) { if (Kind == ICSimpleFalse) std::swap(CvtBBI, NextBBI); + MachineBasicBlock &CvtMBB = *CvtBBI->BB; + MachineBasicBlock &NextMBB = *NextBBI->BB; if (CvtBBI->IsDone || - (CvtBBI->CannotBeCopied && CvtBBI->BB->pred_size() > 1)) { + (CvtBBI->CannotBeCopied && CvtMBB.pred_size() > 1)) { // Something has changed. It's no longer safe to predicate this block. BBI.IsAnalyzed = false; CvtBBI->IsAnalyzed = false; return false; } - if (CvtBBI->BB->hasAddressTaken()) + if (CvtMBB.hasAddressTaken()) // Conservatively abort if-conversion if BB's address is taken. return false; if (Kind == ICSimpleFalse) - if (TII->ReverseBranchCondition(Cond)) + if (TII->reverseBranchCondition(Cond)) llvm_unreachable("Unable to reverse branch condition!"); - // Initialize liveins to the first BB. These are potentiall redefined by - // predicated instructions. - Redefs.init(TRI); - Redefs.addLiveIns(*CvtBBI->BB); - Redefs.addLiveIns(*NextBBI->BB); - - // Compute a set of registers which must not be killed by instructions in - // BB1: This is everything live-in to BB2. - DontKill.init(TRI); - DontKill.addLiveIns(*NextBBI->BB); + Redefs.init(*TRI); + DontKill.init(*TRI); + + if (MRI->tracksLiveness()) { + // Initialize liveins to the first BB. These are potentiall redefined by + // predicated instructions. + Redefs.addLiveIns(CvtMBB); + Redefs.addLiveIns(NextMBB); + // Compute a set of registers which must not be killed by instructions in + // BB1: This is everything live-in to BB2. + DontKill.addLiveIns(NextMBB); + } - if (CvtBBI->BB->pred_size() > 1) { - BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); + if (CvtMBB.pred_size() > 1) { + BBI.NonPredSize -= TII->removeBranch(*BBI.BB); // Copy instructions in the true block, predicate them, and add them to // the entry block. CopyAndPredicateBlock(BBI, *CvtBBI, Cond); // RemoveExtraEdges won't work if the block has an unanalyzable branch, so // explicitly remove CvtBBI as a successor. - BBI.BB->removeSuccessor(CvtBBI->BB, true); + BBI.BB->removeSuccessor(&CvtMBB, true); } else { - RemoveKills(CvtBBI->BB->begin(), CvtBBI->BB->end(), DontKill, *TRI); - PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond); + RemoveKills(CvtMBB.begin(), CvtMBB.end(), DontKill, *TRI); + PredicateBlock(*CvtBBI, CvtMBB.end(), Cond); // Merge converted block into entry block. - BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); + BBI.NonPredSize -= TII->removeBranch(*BBI.BB); MergeBlocks(BBI, *CvtBBI); } bool IterIfcvt = true; - if (!canFallThroughTo(BBI.BB, NextBBI->BB)) { - InsertUncondBranch(BBI.BB, NextBBI->BB, TII); + if (!canFallThroughTo(*BBI.BB, NextMBB)) { + InsertUncondBranch(*BBI.BB, NextMBB, TII); BBI.HasFallThrough = false; // Now ifcvt'd block will look like this: // BB: @@ -1185,15 +1548,14 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) { // Update block info. BB can be iteratively if-converted. if (!IterIfcvt) BBI.IsDone = true; - InvalidatePreds(BBI.BB); + InvalidatePreds(*BBI.BB); CvtBBI->IsDone = true; // FIXME: Must maintain LiveIns. return true; } -/// IfConvertTriangle - If convert a triangle sub-CFG. -/// +/// If convert a triangle sub-CFG. bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()]; BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()]; @@ -1205,29 +1567,29 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { if (Kind == ICTriangleFalse || Kind == ICTriangleFRev) std::swap(CvtBBI, NextBBI); + MachineBasicBlock &CvtMBB = *CvtBBI->BB; + MachineBasicBlock &NextMBB = *NextBBI->BB; if (CvtBBI->IsDone || - (CvtBBI->CannotBeCopied && CvtBBI->BB->pred_size() > 1)) { + (CvtBBI->CannotBeCopied && CvtMBB.pred_size() > 1)) { // Something has changed. It's no longer safe to predicate this block. BBI.IsAnalyzed = false; CvtBBI->IsAnalyzed = false; return false; } - if (CvtBBI->BB->hasAddressTaken()) + if (CvtMBB.hasAddressTaken()) // Conservatively abort if-conversion if BB's address is taken. return false; if (Kind == ICTriangleFalse || Kind == ICTriangleFRev) - if (TII->ReverseBranchCondition(Cond)) + if (TII->reverseBranchCondition(Cond)) llvm_unreachable("Unable to reverse branch condition!"); if (Kind == ICTriangleRev || Kind == ICTriangleFRev) { - if (ReverseBranchCondition(*CvtBBI)) { + if (reverseBranchCondition(*CvtBBI)) { // BB has been changed, modify its predecessors (except for this // one) so they don't get ifcvt'ed based on bad intel. - for (MachineBasicBlock::pred_iterator PI = CvtBBI->BB->pred_begin(), - E = CvtBBI->BB->pred_end(); PI != E; ++PI) { - MachineBasicBlock *PBB = *PI; + for (MachineBasicBlock *PBB : CvtMBB.predecessors()) { if (PBB == BBI.BB) continue; BBInfo &PBBI = BBAnalysis[PBB->getNumber()]; @@ -1241,9 +1603,11 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { // Initialize liveins to the first BB. These are potentially redefined by // predicated instructions. - Redefs.init(TRI); - Redefs.addLiveIns(*CvtBBI->BB); - Redefs.addLiveIns(*NextBBI->BB); + Redefs.init(*TRI); + if (MRI->tracksLiveness()) { + Redefs.addLiveIns(CvtMBB); + Redefs.addLiveIns(NextMBB); + } DontKill.clear(); @@ -1251,29 +1615,29 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { BranchProbability CvtNext, CvtFalse, BBNext, BBCvt; if (HasEarlyExit) { - // Get probabilities before modifying CvtBBI->BB and BBI.BB. - CvtNext = MBPI->getEdgeProbability(CvtBBI->BB, NextBBI->BB); - CvtFalse = MBPI->getEdgeProbability(CvtBBI->BB, CvtBBI->FalseBB); - BBNext = MBPI->getEdgeProbability(BBI.BB, NextBBI->BB); - BBCvt = MBPI->getEdgeProbability(BBI.BB, CvtBBI->BB); + // Get probabilities before modifying CvtMBB and BBI.BB. + CvtNext = MBPI->getEdgeProbability(&CvtMBB, &NextMBB); + CvtFalse = MBPI->getEdgeProbability(&CvtMBB, CvtBBI->FalseBB); + BBNext = MBPI->getEdgeProbability(BBI.BB, &NextMBB); + BBCvt = MBPI->getEdgeProbability(BBI.BB, &CvtMBB); } - if (CvtBBI->BB->pred_size() > 1) { - BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); + if (CvtMBB.pred_size() > 1) { + BBI.NonPredSize -= TII->removeBranch(*BBI.BB); // Copy instructions in the true block, predicate them, and add them to // the entry block. CopyAndPredicateBlock(BBI, *CvtBBI, Cond, true); // RemoveExtraEdges won't work if the block has an unanalyzable branch, so // explicitly remove CvtBBI as a successor. - BBI.BB->removeSuccessor(CvtBBI->BB, true); + BBI.BB->removeSuccessor(&CvtMBB, true); } else { // Predicate the 'true' block after removing its branch. - CvtBBI->NonPredSize -= TII->RemoveBranch(*CvtBBI->BB); - PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond); + CvtBBI->NonPredSize -= TII->removeBranch(CvtMBB); + PredicateBlock(*CvtBBI, CvtMBB.end(), Cond); // Now merge the entry of the triangle with the true block. - BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); + BBI.NonPredSize -= TII->removeBranch(*BBI.BB); MergeBlocks(BBI, *CvtBBI, false); } @@ -1281,24 +1645,23 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { if (HasEarlyExit) { SmallVector<MachineOperand, 4> RevCond(CvtBBI->BrCond.begin(), CvtBBI->BrCond.end()); - if (TII->ReverseBranchCondition(RevCond)) + if (TII->reverseBranchCondition(RevCond)) llvm_unreachable("Unable to reverse branch condition!"); // Update the edge probability for both CvtBBI->FalseBB and NextBBI. - // NewNext = New_Prob(BBI.BB, NextBBI->BB) = - // Prob(BBI.BB, NextBBI->BB) + - // Prob(BBI.BB, CvtBBI->BB) * Prob(CvtBBI->BB, NextBBI->BB) + // NewNext = New_Prob(BBI.BB, NextMBB) = + // Prob(BBI.BB, NextMBB) + + // Prob(BBI.BB, CvtMBB) * Prob(CvtMBB, NextMBB) // NewFalse = New_Prob(BBI.BB, CvtBBI->FalseBB) = - // Prob(BBI.BB, CvtBBI->BB) * Prob(CvtBBI->BB, CvtBBI->FalseBB) - auto NewTrueBB = getNextBlock(BBI.BB); + // Prob(BBI.BB, CvtMBB) * Prob(CvtMBB, CvtBBI->FalseBB) + auto NewTrueBB = getNextBlock(*BBI.BB); auto NewNext = BBNext + BBCvt * CvtNext; - auto NewTrueBBIter = - std::find(BBI.BB->succ_begin(), BBI.BB->succ_end(), NewTrueBB); + auto NewTrueBBIter = find(BBI.BB->successors(), NewTrueBB); if (NewTrueBBIter != BBI.BB->succ_end()) BBI.BB->setSuccProbability(NewTrueBBIter, NewNext); auto NewFalse = BBCvt * CvtFalse; - TII->InsertBranch(*BBI.BB, CvtBBI->FalseBB, nullptr, RevCond, dl); + TII->insertBranch(*BBI.BB, CvtBBI->FalseBB, nullptr, RevCond, dl); BBI.BB->addSuccessor(CvtBBI->FalseBB, NewFalse); } @@ -1306,18 +1669,18 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { // predecessors. Otherwise, add an unconditional branch to 'false'. bool FalseBBDead = false; bool IterIfcvt = true; - bool isFallThrough = canFallThroughTo(BBI.BB, NextBBI->BB); + bool isFallThrough = canFallThroughTo(*BBI.BB, NextMBB); if (!isFallThrough) { // Only merge them if the true block does not fallthrough to the false // block. By not merging them, we make it possible to iteratively // ifcvt the blocks. if (!HasEarlyExit && - NextBBI->BB->pred_size() == 1 && !NextBBI->HasFallThrough && - !NextBBI->BB->hasAddressTaken()) { + NextMBB.pred_size() == 1 && !NextBBI->HasFallThrough && + !NextMBB.hasAddressTaken()) { MergeBlocks(BBI, *NextBBI); FalseBBDead = true; } else { - InsertUncondBranch(BBI.BB, NextBBI->BB, TII); + InsertUncondBranch(*BBI.BB, NextMBB, TII); BBI.HasFallThrough = false; } // Mixed predicated and unpredicated code. This cannot be iteratively @@ -1330,7 +1693,7 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { // Update block info. BB can be iteratively if-converted. if (!IterIfcvt) BBI.IsDone = true; - InvalidatePreds(BBI.BB); + InvalidatePreds(*BBI.BB); CvtBBI->IsDone = true; if (FalseBBDead) NextBBI->IsDone = true; @@ -1339,23 +1702,25 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { return true; } -/// IfConvertDiamond - If convert a diamond sub-CFG. -/// -bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, - unsigned NumDups1, unsigned NumDups2) { - BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()]; - BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()]; - MachineBasicBlock *TailBB = TrueBBI.TrueBB; - // True block must fall through or end with an unanalyzable terminator. - if (!TailBB) { - if (blockAlwaysFallThrough(TrueBBI)) - TailBB = FalseBBI.TrueBB; - assert((TailBB || !TrueBBI.IsBrAnalyzable) && "Unexpected!"); - } +/// Common code shared between diamond conversions. +/// \p BBI, \p TrueBBI, and \p FalseBBI form the diamond shape. +/// \p NumDups1 - number of shared instructions at the beginning of \p TrueBBI +/// and FalseBBI +/// \p NumDups2 - number of shared instructions at the end of \p TrueBBI +/// and \p FalseBBI +/// \p RemoveBranch - Remove the common branch of the two blocks before +/// predicating. Only false for unanalyzable fallthrough +/// cases. The caller will replace the branch if necessary. +/// \p MergeAddEdges - Add successor edges when merging blocks. Only false for +/// unanalyzable fallthrough +bool IfConverter::IfConvertDiamondCommon( + BBInfo &BBI, BBInfo &TrueBBI, BBInfo &FalseBBI, + unsigned NumDups1, unsigned NumDups2, + bool TClobbersPred, bool FClobbersPred, + bool RemoveBranch, bool MergeAddEdges) { if (TrueBBI.IsDone || FalseBBI.IsDone || - TrueBBI.BB->pred_size() > 1 || - FalseBBI.BB->pred_size() > 1) { + TrueBBI.BB->pred_size() > 1 || FalseBBI.BB->pred_size() > 1) { // Something has changed. It's no longer safe to predicate these blocks. BBI.IsAnalyzed = false; TrueBBI.IsAnalyzed = false; @@ -1373,36 +1738,47 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, BBInfo *BBI1 = &TrueBBI; BBInfo *BBI2 = &FalseBBI; SmallVector<MachineOperand, 4> RevCond(BBI.BrCond.begin(), BBI.BrCond.end()); - if (TII->ReverseBranchCondition(RevCond)) + if (TII->reverseBranchCondition(RevCond)) llvm_unreachable("Unable to reverse branch condition!"); SmallVector<MachineOperand, 4> *Cond1 = &BBI.BrCond; SmallVector<MachineOperand, 4> *Cond2 = &RevCond; // Figure out the more profitable ordering. bool DoSwap = false; - if (TrueBBI.ClobbersPred && !FalseBBI.ClobbersPred) + if (TClobbersPred && !FClobbersPred) DoSwap = true; - else if (TrueBBI.ClobbersPred == FalseBBI.ClobbersPred) { + else if (!TClobbersPred && !FClobbersPred) { if (TrueBBI.NonPredSize > FalseBBI.NonPredSize) DoSwap = true; - } + } else if (TClobbersPred && FClobbersPred) + llvm_unreachable("Predicate info cannot be clobbered by both sides."); if (DoSwap) { std::swap(BBI1, BBI2); std::swap(Cond1, Cond2); } // Remove the conditional branch from entry to the blocks. - BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); - - // Initialize liveins to the first BB. These are potentially redefined by - // predicated instructions. - Redefs.init(TRI); - Redefs.addLiveIns(*BBI1->BB); + BBI.NonPredSize -= TII->removeBranch(*BBI.BB); + + MachineBasicBlock &MBB1 = *BBI1->BB; + MachineBasicBlock &MBB2 = *BBI2->BB; + + // Initialize the Redefs: + // - BB2 live-in regs need implicit uses before being redefined by BB1 + // instructions. + // - BB1 live-out regs need implicit uses before being redefined by BB2 + // instructions. We start with BB1 live-ins so we have the live-out regs + // after tracking the BB1 instructions. + Redefs.init(*TRI); + if (MRI->tracksLiveness()) { + Redefs.addLiveIns(MBB1); + Redefs.addLiveIns(MBB2); + } // Remove the duplicated instructions at the beginnings of both paths. // Skip dbg_value instructions - MachineBasicBlock::iterator DI1 = BBI1->BB->getFirstNonDebugInstr(); - MachineBasicBlock::iterator DI2 = BBI2->BB->getFirstNonDebugInstr(); + MachineBasicBlock::iterator DI1 = MBB1.getFirstNonDebugInstr(); + MachineBasicBlock::iterator DI2 = MBB2.getFirstNonDebugInstr(); BBI1->NonPredSize -= NumDups1; BBI2->NonPredSize -= NumDups1; @@ -1421,52 +1797,60 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, // Compute a set of registers which must not be killed by instructions in BB1: // This is everything used+live in BB2 after the duplicated instructions. We // can compute this set by simulating liveness backwards from the end of BB2. - DontKill.init(TRI); - for (MachineBasicBlock::reverse_iterator I = BBI2->BB->rbegin(), - E = MachineBasicBlock::reverse_iterator(DI2); I != E; ++I) { - DontKill.stepBackward(*I); + DontKill.init(*TRI); + if (MRI->tracksLiveness()) { + for (const MachineInstr &MI : make_range(MBB2.rbegin(), ++DI2.getReverse())) + DontKill.stepBackward(MI); + + for (const MachineInstr &MI : make_range(MBB1.begin(), DI1)) { + SmallVector<std::pair<unsigned, const MachineOperand*>, 4> Dummy; + Redefs.stepForward(MI, Dummy); + } } + BBI.BB->splice(BBI.BB->end(), &MBB1, MBB1.begin(), DI1); + MBB2.erase(MBB2.begin(), DI2); - for (MachineBasicBlock::const_iterator I = BBI1->BB->begin(), E = DI1; I != E; - ++I) { - SmallVector<std::pair<unsigned, const MachineOperand*>, 4> IgnoredClobbers; - Redefs.stepForward(*I, IgnoredClobbers); - } - BBI.BB->splice(BBI.BB->end(), BBI1->BB, BBI1->BB->begin(), DI1); - BBI2->BB->erase(BBI2->BB->begin(), DI2); - - // Remove branch from the 'true' block, unless it was not analyzable. - // Non-analyzable branches need to be preserved, since in such cases, - // the CFG structure is not an actual diamond (the join block may not - // be present). - if (BBI1->IsBrAnalyzable) - BBI1->NonPredSize -= TII->RemoveBranch(*BBI1->BB); + // The branches have been checked to match, so it is safe to remove the branch + // in BB1 and rely on the copy in BB2 +#ifndef NDEBUG + // Unanalyzable branches must match exactly. Check that now. + if (!BBI1->IsBrAnalyzable) + verifySameBranchInstructions(&MBB1, &MBB2); +#endif + BBI1->NonPredSize -= TII->removeBranch(*BBI1->BB); // Remove duplicated instructions. - DI1 = BBI1->BB->end(); + DI1 = MBB1.end(); for (unsigned i = 0; i != NumDups2; ) { // NumDups2 only counted non-dbg_value instructions, so this won't // run off the head of the list. - assert (DI1 != BBI1->BB->begin()); + assert(DI1 != MBB1.begin()); --DI1; // skip dbg_value instructions if (!DI1->isDebugValue()) ++i; } - BBI1->BB->erase(DI1, BBI1->BB->end()); + MBB1.erase(DI1, MBB1.end()); // Kill flags in the true block for registers living into the false block // must be removed. - RemoveKills(BBI1->BB->begin(), BBI1->BB->end(), DontKill, *TRI); + RemoveKills(MBB1.begin(), MBB1.end(), DontKill, *TRI); - // Remove 'false' block branch (unless it was not analyzable), and find - // the last instruction to predicate. - if (BBI2->IsBrAnalyzable) - BBI2->NonPredSize -= TII->RemoveBranch(*BBI2->BB); DI2 = BBI2->BB->end(); + // The branches have been checked to match. Skip over the branch in the false + // block so that we don't try to predicate it. + if (RemoveBranch) + BBI2->NonPredSize -= TII->removeBranch(*BBI2->BB); + else { + do { + assert(DI2 != MBB2.begin()); + DI2--; + } while (DI2->isBranch() || DI2->isDebugValue()); + DI2++; + } while (NumDups2 != 0) { // NumDups2 only counted non-dbg_value instructions, so this won't // run off the head of the list. - assert (DI2 != BBI2->BB->begin()); + assert(DI2 != MBB2.begin()); --DI2; // skip dbg_value instructions if (!DI2->isDebugValue()) @@ -1483,13 +1867,12 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, // addne r0, r1, #1 SmallSet<unsigned, 4> RedefsByFalse; SmallSet<unsigned, 4> ExtUses; - if (TII->isProfitableToUnpredicate(*BBI1->BB, *BBI2->BB)) { - for (MachineBasicBlock::iterator FI = BBI2->BB->begin(); FI != DI2; ++FI) { - if (FI->isDebugValue()) + if (TII->isProfitableToUnpredicate(MBB1, MBB2)) { + for (const MachineInstr &FI : make_range(MBB2.begin(), DI2)) { + if (FI.isDebugValue()) continue; SmallVector<unsigned, 4> Defs; - for (unsigned i = 0, e = FI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = FI->getOperand(i); + for (const MachineOperand &MO : FI.operands()) { if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); @@ -1506,8 +1889,7 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, } } - for (unsigned i = 0, e = Defs.size(); i != e; ++i) { - unsigned Reg = Defs[i]; + for (unsigned Reg : Defs) { if (!ExtUses.count(Reg)) { for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); SubRegs.isValid(); ++SubRegs) @@ -1518,17 +1900,17 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, } // Predicate the 'true' block. - PredicateBlock(*BBI1, BBI1->BB->end(), *Cond1, &RedefsByFalse); + PredicateBlock(*BBI1, MBB1.end(), *Cond1, &RedefsByFalse); // After predicating BBI1, if there is a predicated terminator in BBI1 and // a non-predicated in BBI2, then we don't want to predicate the one from // BBI2. The reason is that if we merged these blocks, we would end up with // two predicated terminators in the same block. - if (!BBI2->BB->empty() && (DI2 == BBI2->BB->end())) { - MachineBasicBlock::iterator BBI1T = BBI1->BB->getFirstTerminator(); - MachineBasicBlock::iterator BBI2T = BBI2->BB->getFirstTerminator(); - if (BBI1T != BBI1->BB->end() && TII->isPredicated(*BBI1T) && - BBI2T != BBI2->BB->end() && !TII->isPredicated(*BBI2T)) + if (!MBB2.empty() && (DI2 == MBB2.end())) { + MachineBasicBlock::iterator BBI1T = MBB1.getFirstTerminator(); + MachineBasicBlock::iterator BBI2T = MBB2.getFirstTerminator(); + if (BBI1T != MBB1.end() && TII->isPredicated(*BBI1T) && + BBI2T != MBB2.end() && !TII->isPredicated(*BBI2T)) --DI2; } @@ -1536,8 +1918,72 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, PredicateBlock(*BBI2, DI2, *Cond2); // Merge the true block into the entry of the diamond. - MergeBlocks(BBI, *BBI1, TailBB == nullptr); - MergeBlocks(BBI, *BBI2, TailBB == nullptr); + MergeBlocks(BBI, *BBI1, MergeAddEdges); + MergeBlocks(BBI, *BBI2, MergeAddEdges); + return true; +} + +/// If convert an almost-diamond sub-CFG where the true +/// and false blocks share a common tail. +bool IfConverter::IfConvertForkedDiamond( + BBInfo &BBI, IfcvtKind Kind, + unsigned NumDups1, unsigned NumDups2, + bool TClobbersPred, bool FClobbersPred) { + BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()]; + BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()]; + + // Save the debug location for later. + DebugLoc dl; + MachineBasicBlock::iterator TIE = TrueBBI.BB->getFirstTerminator(); + if (TIE != TrueBBI.BB->end()) + dl = TIE->getDebugLoc(); + // Removing branches from both blocks is safe, because we have already + // determined that both blocks have the same branch instructions. The branch + // will be added back at the end, unpredicated. + if (!IfConvertDiamondCommon( + BBI, TrueBBI, FalseBBI, + NumDups1, NumDups2, + TClobbersPred, FClobbersPred, + /* RemoveBranch */ true, /* MergeAddEdges */ true)) + return false; + + // Add back the branch. + // Debug location saved above when removing the branch from BBI2 + TII->insertBranch(*BBI.BB, TrueBBI.TrueBB, TrueBBI.FalseBB, + TrueBBI.BrCond, dl); + + RemoveExtraEdges(BBI); + + // Update block info. + BBI.IsDone = TrueBBI.IsDone = FalseBBI.IsDone = true; + InvalidatePreds(*BBI.BB); + + // FIXME: Must maintain LiveIns. + return true; +} + +/// If convert a diamond sub-CFG. +bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, + unsigned NumDups1, unsigned NumDups2, + bool TClobbersPred, bool FClobbersPred) { + BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()]; + BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()]; + MachineBasicBlock *TailBB = TrueBBI.TrueBB; + + // True block must fall through or end with an unanalyzable terminator. + if (!TailBB) { + if (blockAlwaysFallThrough(TrueBBI)) + TailBB = FalseBBI.TrueBB; + assert((TailBB || !TrueBBI.IsBrAnalyzable) && "Unexpected!"); + } + + if (!IfConvertDiamondCommon( + BBI, TrueBBI, FalseBBI, + NumDups1, NumDups2, + TClobbersPred, FClobbersPred, + /* RemoveBranch */ TrueBBI.IsBrAnalyzable, + /* MergeAddEdges */ TailBB == nullptr)) + return false; // If the if-converted block falls through or unconditionally branches into // the tail block, and the tail block does not have other predecessors, then @@ -1560,7 +2006,7 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, CanMergeTail = false; else if (NumPreds == 1 && CanMergeTail) { MachineBasicBlock::pred_iterator PI = TailBB->pred_begin(); - if (*PI != BBI1->BB && *PI != BBI2->BB) + if (*PI != TrueBBI.BB && *PI != FalseBBI.BB) CanMergeTail = false; } if (CanMergeTail) { @@ -1568,7 +2014,7 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, TailBBI.IsDone = true; } else { BBI.BB->addSuccessor(TailBB, BranchProbability::getOne()); - InsertUncondBranch(BBI.BB, TailBB, TII); + InsertUncondBranch(*BBI.BB, *TailBB, TII); BBI.HasFallThrough = false; } } @@ -1576,13 +2022,13 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, // RemoveExtraEdges won't work if the block has an unanalyzable branch, // which can happen here if TailBB is unanalyzable and is merged, so // explicitly remove BBI1 and BBI2 as successors. - BBI.BB->removeSuccessor(BBI1->BB); - BBI.BB->removeSuccessor(BBI2->BB, true); + BBI.BB->removeSuccessor(TrueBBI.BB); + BBI.BB->removeSuccessor(FalseBBI.BB, /* NormalizeSuccessProbs */ true); RemoveExtraEdges(BBI); // Update block info. BBI.IsDone = TrueBBI.IsDone = FalseBBI.IsDone = true; - InvalidatePreds(BBI.BB); + InvalidatePreds(*BBI.BB); // FIXME: Must maintain LiveIns. return true; @@ -1594,8 +2040,7 @@ static bool MaySpeculate(const MachineInstr &MI, if (!MI.isSafeToMove(nullptr, SawStore)) return false; - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI.getOperand(i); + for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); @@ -1608,15 +2053,15 @@ static bool MaySpeculate(const MachineInstr &MI, return true; } -/// PredicateBlock - Predicate instructions from the start of the block to the -/// specified end with the specified condition. +/// Predicate instructions from the start of the block to the specified end with +/// the specified condition. void IfConverter::PredicateBlock(BBInfo &BBI, MachineBasicBlock::iterator E, SmallVectorImpl<MachineOperand> &Cond, SmallSet<unsigned, 4> *LaterRedefs) { bool AnyUnpred = false; bool MaySpec = LaterRedefs != nullptr; - for (MachineInstr &I : llvm::make_range(BBI.BB->begin(), E)) { + for (MachineInstr &I : make_range(BBI.BB->begin(), E)) { if (I.isDebugValue() || TII->isPredicated(I)) continue; // It may be possible not to predicate an instruction if it's the 'true' @@ -1651,14 +2096,15 @@ void IfConverter::PredicateBlock(BBInfo &BBI, ++NumUnpred; } -/// CopyAndPredicateBlock - Copy and predicate instructions from source BB to -/// the destination block. Skip end of block branches if IgnoreBr is true. +/// Copy and predicate instructions from source BB to the destination block. +/// Skip end of block branches if IgnoreBr is true. void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, SmallVectorImpl<MachineOperand> &Cond, bool IgnoreBr) { MachineFunction &MF = *ToBBI.BB->getParent(); - for (auto &I : *FromBBI.BB) { + MachineBasicBlock &FromMBB = *FromBBI.BB; + for (MachineInstr &I : FromMBB) { // Do not copy the end of the block branches. if (IgnoreBr && I.isBranch()) break; @@ -1691,13 +2137,12 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, } if (!IgnoreBr) { - std::vector<MachineBasicBlock *> Succs(FromBBI.BB->succ_begin(), - FromBBI.BB->succ_end()); - MachineBasicBlock *NBB = getNextBlock(FromBBI.BB); + std::vector<MachineBasicBlock *> Succs(FromMBB.succ_begin(), + FromMBB.succ_end()); + MachineBasicBlock *NBB = getNextBlock(FromMBB); MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : nullptr; - for (unsigned i = 0, e = Succs.size(); i != e; ++i) { - MachineBasicBlock *Succ = Succs[i]; + for (MachineBasicBlock *Succ : Succs) { // Fallthrough edge can't be transferred. if (Succ == FallThrough) continue; @@ -1714,25 +2159,25 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, ++NumDupBBs; } -/// MergeBlocks - Move all instructions from FromBB to the end of ToBB. -/// This will leave FromBB as an empty block, so remove all of its -/// successor edges except for the fall-through edge. If AddEdges is true, -/// i.e., when FromBBI's branch is being moved, add those successor edges to -/// ToBBI. +/// Move all instructions from FromBB to the end of ToBB. This will leave +/// FromBB as an empty block, so remove all of its successor edges except for +/// the fall-through edge. If AddEdges is true, i.e., when FromBBI's branch is +/// being moved, add those successor edges to ToBBI. void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) { - assert(!FromBBI.BB->hasAddressTaken() && + MachineBasicBlock &FromMBB = *FromBBI.BB; + assert(!FromMBB.hasAddressTaken() && "Removing a BB whose address is taken!"); - // In case FromBBI.BB contains terminators (e.g. return instruction), + // In case FromMBB contains terminators (e.g. return instruction), // first move the non-terminator instructions, then the terminators. - MachineBasicBlock::iterator FromTI = FromBBI.BB->getFirstTerminator(); + MachineBasicBlock::iterator FromTI = FromMBB.getFirstTerminator(); MachineBasicBlock::iterator ToTI = ToBBI.BB->getFirstTerminator(); - ToBBI.BB->splice(ToTI, FromBBI.BB, FromBBI.BB->begin(), FromTI); + ToBBI.BB->splice(ToTI, &FromMBB, FromMBB.begin(), FromTI); // If FromBB has non-predicated terminator we should copy it at the end. - if (FromTI != FromBBI.BB->end() && !TII->isPredicated(*FromTI)) + if (FromTI != FromMBB.end() && !TII->isPredicated(*FromTI)) ToTI = ToBBI.BB->end(); - ToBBI.BB->splice(ToTI, FromBBI.BB, FromTI, FromBBI.BB->end()); + ToBBI.BB->splice(ToTI, &FromMBB, FromTI, FromMBB.end()); // Force normalizing the successors' probabilities of ToBBI.BB to convert all // unknown probabilities into known ones. @@ -1740,25 +2185,23 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) { // eliminate all unknown probabilities in MBB. ToBBI.BB->normalizeSuccProbs(); - SmallVector<MachineBasicBlock *, 4> FromSuccs(FromBBI.BB->succ_begin(), - FromBBI.BB->succ_end()); - MachineBasicBlock *NBB = getNextBlock(FromBBI.BB); + SmallVector<MachineBasicBlock *, 4> FromSuccs(FromMBB.succ_begin(), + FromMBB.succ_end()); + MachineBasicBlock *NBB = getNextBlock(FromMBB); MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : nullptr; - // The edge probability from ToBBI.BB to FromBBI.BB, which is only needed when - // AddEdges is true and FromBBI.BB is a successor of ToBBI.BB. + // The edge probability from ToBBI.BB to FromMBB, which is only needed when + // AddEdges is true and FromMBB is a successor of ToBBI.BB. auto To2FromProb = BranchProbability::getZero(); - if (AddEdges && ToBBI.BB->isSuccessor(FromBBI.BB)) { - To2FromProb = MBPI->getEdgeProbability(ToBBI.BB, FromBBI.BB); - // Set the edge probability from ToBBI.BB to FromBBI.BB to zero to avoid the + if (AddEdges && ToBBI.BB->isSuccessor(&FromMBB)) { + To2FromProb = MBPI->getEdgeProbability(ToBBI.BB, &FromMBB); + // Set the edge probability from ToBBI.BB to FromMBB to zero to avoid the // edge probability being merged to other edges when this edge is removed // later. - ToBBI.BB->setSuccProbability( - std::find(ToBBI.BB->succ_begin(), ToBBI.BB->succ_end(), FromBBI.BB), - BranchProbability::getZero()); + ToBBI.BB->setSuccProbability(find(ToBBI.BB->successors(), &FromMBB), + BranchProbability::getZero()); } - for (unsigned i = 0, e = FromSuccs.size(); i != e; ++i) { - MachineBasicBlock *Succ = FromSuccs[i]; + for (MachineBasicBlock *Succ : FromSuccs) { // Fallthrough edge can't be transferred. if (Succ == FallThrough) continue; @@ -1766,26 +2209,26 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) { auto NewProb = BranchProbability::getZero(); if (AddEdges) { // Calculate the edge probability for the edge from ToBBI.BB to Succ, - // which is a portion of the edge probability from FromBBI.BB to Succ. The - // portion ratio is the edge probability from ToBBI.BB to FromBBI.BB (if + // which is a portion of the edge probability from FromMBB to Succ. The + // portion ratio is the edge probability from ToBBI.BB to FromMBB (if // FromBBI is a successor of ToBBI.BB. See comment below for excepion). - NewProb = MBPI->getEdgeProbability(FromBBI.BB, Succ); + NewProb = MBPI->getEdgeProbability(&FromMBB, Succ); - // To2FromProb is 0 when FromBBI.BB is not a successor of ToBBI.BB. This - // only happens when if-converting a diamond CFG and FromBBI.BB is the - // tail BB. In this case FromBBI.BB post-dominates ToBBI.BB and hence we - // could just use the probabilities on FromBBI.BB's out-edges when adding + // To2FromProb is 0 when FromMBB is not a successor of ToBBI.BB. This + // only happens when if-converting a diamond CFG and FromMBB is the + // tail BB. In this case FromMBB post-dominates ToBBI.BB and hence we + // could just use the probabilities on FromMBB's out-edges when adding // new successors. if (!To2FromProb.isZero()) NewProb *= To2FromProb; } - FromBBI.BB->removeSuccessor(Succ); + FromMBB.removeSuccessor(Succ); if (AddEdges) { // If the edge from ToBBI.BB to Succ already exists, update the // probability of this edge by adding NewProb to it. An example is shown - // below, in which A is ToBBI.BB and B is FromBBI.BB. In this case we + // below, in which A is ToBBI.BB and B is FromMBB. In this case we // don't have to set C as A's successor as it already is. We only need to // update the edge probability on A->C. Note that B will not be // immediately removed from A's successors. It is possible that B->D is @@ -1807,7 +2250,7 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) { // if (ToBBI.BB->isSuccessor(Succ)) ToBBI.BB->setSuccProbability( - std::find(ToBBI.BB->succ_begin(), ToBBI.BB->succ_end(), Succ), + find(ToBBI.BB->successors(), Succ), MBPI->getEdgeProbability(ToBBI.BB, Succ) + NewProb); else ToBBI.BB->addSuccessor(Succ, NewProb); @@ -1815,8 +2258,8 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) { } // Now FromBBI always falls through to the next block! - if (NBB && !FromBBI.BB->isSuccessor(NBB)) - FromBBI.BB->addSuccessor(NBB); + if (NBB && !FromMBB.isSuccessor(NBB)) + FromMBB.addSuccessor(NBB); // Normalize the probabilities of ToBBI.BB's successors with all adjustment // we've done above. @@ -1839,6 +2282,6 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) { } FunctionPass * -llvm::createIfConverter(std::function<bool(const Function &)> Ftor) { +llvm::createIfConverter(std::function<bool(const MachineFunction &)> Ftor) { return new IfConverter(std::move(Ftor)); } diff --git a/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp b/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp index 31d6bd0..9588dfb 100644 --- a/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp +++ b/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp @@ -51,6 +51,12 @@ static cl::opt<int> PageSize("imp-null-check-page-size", cl::desc("The page size of the target in bytes"), cl::init(4096)); +static cl::opt<unsigned> MaxInstsToConsider( + "imp-null-max-insts-to-consider", + cl::desc("The max number of instructions to consider hoisting loads over " + "(the algorithm is quadratic over this number)"), + cl::init(8)); + #define DEBUG_TYPE "implicit-null-checks" STATISTIC(NumImplicitNullChecks, @@ -59,6 +65,44 @@ STATISTIC(NumImplicitNullChecks, namespace { class ImplicitNullChecks : public MachineFunctionPass { + /// Return true if \c computeDependence can process \p MI. + static bool canHandle(const MachineInstr *MI); + + /// Helper function for \c computeDependence. Return true if \p A + /// and \p B do not have any dependences between them, and can be + /// re-ordered without changing program semantics. + bool canReorder(const MachineInstr *A, const MachineInstr *B); + + /// A data type for representing the result computed by \c + /// computeDependence. States whether it is okay to reorder the + /// instruction passed to \c computeDependence with at most one + /// depednency. + struct DependenceResult { + /// Can we actually re-order \p MI with \p Insts (see \c + /// computeDependence). + bool CanReorder; + + /// If non-None, then an instruction in \p Insts that also must be + /// hoisted. + Optional<ArrayRef<MachineInstr *>::iterator> PotentialDependence; + + /*implicit*/ DependenceResult( + bool CanReorder, + Optional<ArrayRef<MachineInstr *>::iterator> PotentialDependence) + : CanReorder(CanReorder), PotentialDependence(PotentialDependence) { + assert((!PotentialDependence || CanReorder) && + "!CanReorder && PotentialDependence.hasValue() not allowed!"); + } + }; + + /// Compute a result for the following question: can \p MI be + /// re-ordered from after \p Insts to before it. + /// + /// \c canHandle should return true for all instructions in \p + /// Insts. + DependenceResult computeDependence(const MachineInstr *MI, + ArrayRef<MachineInstr *> Insts); + /// Represents one null check that can be made implicit. class NullCheck { // The memory operation the null check can be folded into. @@ -114,6 +158,19 @@ class ImplicitNullChecks : public MachineFunctionPass { MachineBasicBlock *HandlerMBB); void rewriteNullChecks(ArrayRef<NullCheck> NullCheckList); + /// Is \p MI a memory operation that can be used to implicitly null check the + /// value in \p PointerReg? \p PrevInsts is the set of instruction seen since + /// the explicit null check on \p PointerReg. + bool isSuitableMemoryOp(MachineInstr &MI, unsigned PointerReg, + ArrayRef<MachineInstr *> PrevInsts); + + /// Return true if \p FaultingMI can be hoisted from after the the + /// instructions in \p InstsSeenSoFar to before them. Set \p Dependence to a + /// non-null value if we also need to (and legally can) hoist a depedency. + bool canHoistLoadInst(MachineInstr *FaultingMI, unsigned PointerReg, + ArrayRef<MachineInstr *> InstsSeenSoFar, + MachineBasicBlock *NullSucc, MachineInstr *&Dependence); + public: static char ID; @@ -129,160 +186,70 @@ public: MachineFunctionProperties getRequiredProperties() const override { return MachineFunctionProperties().set( - MachineFunctionProperties::Property::AllVRegsAllocated); + MachineFunctionProperties::Property::NoVRegs); } }; -/// \brief Detect re-ordering hazards and dependencies. -/// -/// This class keeps track of defs and uses, and can be queried if a given -/// machine instruction can be re-ordered from after the machine instructions -/// seen so far to before them. -class HazardDetector { - static MachineInstr *getUnknownMI() { - return DenseMapInfo<MachineInstr *>::getTombstoneKey(); - } - - // Maps physical registers to the instruction defining them. If there has - // been more than one def of an specific register, that register is mapped to - // getUnknownMI(). - DenseMap<unsigned, MachineInstr *> RegDefs; - DenseSet<unsigned> RegUses; - const TargetRegisterInfo &TRI; - bool hasSeenClobber; - AliasAnalysis &AA; - -public: - explicit HazardDetector(const TargetRegisterInfo &TRI, AliasAnalysis &AA) - : TRI(TRI), hasSeenClobber(false), AA(AA) {} +} - /// \brief Make a note of \p MI for later queries to isSafeToHoist. - /// - /// May clobber this HazardDetector instance. \see isClobbered. - void rememberInstruction(MachineInstr *MI); +bool ImplicitNullChecks::canHandle(const MachineInstr *MI) { + if (MI->isCall() || MI->mayStore() || MI->hasUnmodeledSideEffects()) + return false; + auto IsRegMask = [](const MachineOperand &MO) { return MO.isRegMask(); }; + (void)IsRegMask; - /// \brief Return true if it is safe to hoist \p MI from after all the - /// instructions seen so far (via rememberInstruction) to before it. If \p MI - /// has one and only one transitive dependency, set \p Dependency to that - /// instruction. If there are more dependencies, return false. - bool isSafeToHoist(MachineInstr *MI, MachineInstr *&Dependency); + assert(!llvm::any_of(MI->operands(), IsRegMask) && + "Calls were filtered out above!"); - /// \brief Return true if this instance of HazardDetector has been clobbered - /// (i.e. has no more useful information). - /// - /// A HazardDetecter is clobbered when it sees a construct it cannot - /// understand, and it would have to return a conservative answer for all - /// future queries. Having a separate clobbered state lets the client code - /// bail early, without making queries about all of the future instructions - /// (which would have returned the most conservative answer anyway). - /// - /// Calling rememberInstruction or isSafeToHoist on a clobbered HazardDetector - /// is an error. - bool isClobbered() { return hasSeenClobber; } -}; + auto IsUnordered = [](MachineMemOperand *MMO) { return MMO->isUnordered(); }; + return llvm::all_of(MI->memoperands(), IsUnordered); } +ImplicitNullChecks::DependenceResult +ImplicitNullChecks::computeDependence(const MachineInstr *MI, + ArrayRef<MachineInstr *> Block) { + assert(llvm::all_of(Block, canHandle) && "Check this first!"); + assert(!llvm::is_contained(Block, MI) && "Block must be exclusive of MI!"); -void HazardDetector::rememberInstruction(MachineInstr *MI) { - assert(!isClobbered() && - "Don't add instructions to a clobbered hazard detector"); + Optional<ArrayRef<MachineInstr *>::iterator> Dep; - if (MI->mayStore() || MI->hasUnmodeledSideEffects()) { - hasSeenClobber = true; - return; - } + for (auto I = Block.begin(), E = Block.end(); I != E; ++I) { + if (canReorder(*I, MI)) + continue; - for (auto *MMO : MI->memoperands()) { - // Right now we don't want to worry about LLVM's memory model. - if (!MMO->isUnordered()) { - hasSeenClobber = true; - return; + if (Dep == None) { + // Found one possible dependency, keep track of it. + Dep = I; + } else { + // We found two dependencies, so bail out. + return {false, None}; } } - for (auto &MO : MI->operands()) { - if (!MO.isReg() || !MO.getReg()) - continue; - - if (MO.isDef()) { - auto It = RegDefs.find(MO.getReg()); - if (It == RegDefs.end()) - RegDefs.insert({MO.getReg(), MI}); - else { - assert(It->second && "Found null MI?"); - It->second = getUnknownMI(); - } - } else - RegUses.insert(MO.getReg()); - } + return {true, Dep}; } -bool HazardDetector::isSafeToHoist(MachineInstr *MI, - MachineInstr *&Dependency) { - assert(!isClobbered() && "isSafeToHoist cannot do anything useful!"); - Dependency = nullptr; +bool ImplicitNullChecks::canReorder(const MachineInstr *A, + const MachineInstr *B) { + assert(canHandle(A) && canHandle(B) && "Precondition!"); - // Right now we don't want to worry about LLVM's memory model. This can be - // made more precise later. - for (auto *MMO : MI->memoperands()) - if (!MMO->isUnordered()) - return false; + // canHandle makes sure that we _can_ correctly analyze the dependencies + // between A and B here -- for instance, we should not be dealing with heap + // load-store dependencies here. - for (auto &MO : MI->operands()) { - if (MO.isReg() && MO.getReg()) { - for (auto &RegDef : RegDefs) { - unsigned Reg = RegDef.first; - MachineInstr *MI = RegDef.second; - if (!TRI.regsOverlap(Reg, MO.getReg())) - continue; + for (auto MOA : A->operands()) { + if (!(MOA.isReg() && MOA.getReg())) + continue; - // We found a write-after-write or read-after-write, see if the - // instruction causing this dependency can be hoisted too. - - if (MI == getUnknownMI()) - // We don't have precise dependency information. - return false; - - if (Dependency) { - if (Dependency == MI) - continue; - // We already have one dependency, and we can track only one. - return false; - } - - // Now check if MI is actually a dependency that can be hoisted. - - // We don't want to track transitive dependencies. We already know that - // MI is the only instruction that defines Reg, but we need to be sure - // that it does not use any registers that have been defined (trivially - // checked below by ensuring that there are no register uses), and that - // it is the only def for every register it defines (otherwise we could - // violate a write after write hazard). - auto IsMIOperandSafe = [&](MachineOperand &MO) { - if (!MO.isReg() || !MO.getReg()) - return true; - if (MO.isUse()) - return false; - assert((!MO.isDef() || RegDefs.count(MO.getReg())) && - "All defs must be tracked in RegDefs by now!"); - return !MO.isDef() || RegDefs.find(MO.getReg())->second == MI; - }; - - if (!all_of(MI->operands(), IsMIOperandSafe)) - return false; - - // Now check for speculation safety: - bool SawStore = true; - if (!MI->isSafeToMove(&AA, SawStore) || MI->mayLoad()) - return false; - - Dependency = MI; - } + unsigned RegA = MOA.getReg(); + for (auto MOB : B->operands()) { + if (!(MOB.isReg() && MOB.getReg())) + continue; - if (MO.isDef()) - for (unsigned Reg : RegUses) - if (TRI.regsOverlap(Reg, MO.getReg())) - return false; // We found a write-after-read + unsigned RegB = MOB.getReg(); + + if (TRI->regsOverlap(RegA, RegB)) + return false; } } @@ -316,6 +283,96 @@ static bool AnyAliasLiveIn(const TargetRegisterInfo *TRI, return false; } +bool ImplicitNullChecks::isSuitableMemoryOp( + MachineInstr &MI, unsigned PointerReg, ArrayRef<MachineInstr *> PrevInsts) { + int64_t Offset; + unsigned BaseReg; + + if (!TII->getMemOpBaseRegImmOfs(MI, BaseReg, Offset, TRI) || + BaseReg != PointerReg) + return false; + + // We want the load to be issued at a sane offset from PointerReg, so that + // if PointerReg is null then the load reliably page faults. + if (!(MI.mayLoad() && !MI.isPredicable() && Offset < PageSize)) + return false; + + // Finally, we need to make sure that the load instruction actually is + // loading from PointerReg, and there isn't some re-definition of PointerReg + // between the compare and the load. + for (auto *PrevMI : PrevInsts) + for (auto &PrevMO : PrevMI->operands()) + if (PrevMO.isReg() && PrevMO.getReg() && + TRI->regsOverlap(PrevMO.getReg(), PointerReg)) + return false; + + return true; +} + +bool ImplicitNullChecks::canHoistLoadInst( + MachineInstr *FaultingMI, unsigned PointerReg, + ArrayRef<MachineInstr *> InstsSeenSoFar, MachineBasicBlock *NullSucc, + MachineInstr *&Dependence) { + auto DepResult = computeDependence(FaultingMI, InstsSeenSoFar); + if (!DepResult.CanReorder) + return false; + + if (!DepResult.PotentialDependence) { + Dependence = nullptr; + return true; + } + + auto DependenceItr = *DepResult.PotentialDependence; + auto *DependenceMI = *DependenceItr; + + // We don't want to reason about speculating loads. Note -- at this point + // we should have already filtered out all of the other non-speculatable + // things, like calls and stores. + assert(canHandle(DependenceMI) && "Should never have reached here!"); + if (DependenceMI->mayLoad()) + return false; + + for (auto &DependenceMO : DependenceMI->operands()) { + if (!(DependenceMO.isReg() && DependenceMO.getReg())) + continue; + + // Make sure that we won't clobber any live ins to the sibling block by + // hoisting Dependency. For instance, we can't hoist INST to before the + // null check (even if it safe, and does not violate any dependencies in + // the non_null_block) if %rdx is live in to _null_block. + // + // test %rcx, %rcx + // je _null_block + // _non_null_block: + // %rdx<def> = INST + // ... + // + // This restriction does not apply to the faulting load inst because in + // case the pointer loaded from is in the null page, the load will not + // semantically execute, and affect machine state. That is, if the load + // was loading into %rax and it faults, the value of %rax should stay the + // same as it would have been had the load not have executed and we'd have + // branched to NullSucc directly. + if (AnyAliasLiveIn(TRI, NullSucc, DependenceMO.getReg())) + return false; + + // The Dependency can't be re-defining the base register -- then we won't + // get the memory operation on the address we want. This is already + // checked in \c IsSuitableMemoryOp. + assert(!TRI->regsOverlap(DependenceMO.getReg(), PointerReg) && + "Should have been checked before!"); + } + + auto DepDepResult = + computeDependence(DependenceMI, {InstsSeenSoFar.begin(), DependenceItr}); + + if (!DepDepResult.CanReorder || DepDepResult.PotentialDependence) + return false; + + Dependence = DependenceMI; + return true; +} + /// Analyze MBB to check if its terminating branch can be turned into an /// implicit null check. If yes, append a description of the said null check to /// NullCheckList and return true, else return false. @@ -415,63 +472,24 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks( // ptr could be some non-null invalid reference that never gets loaded from // because some_cond is always true. - unsigned PointerReg = MBP.LHS.getReg(); - - HazardDetector HD(*TRI, *AA); - - for (auto MII = NotNullSucc->begin(), MIE = NotNullSucc->end(); MII != MIE; - ++MII) { - MachineInstr &MI = *MII; - unsigned BaseReg; - int64_t Offset; - MachineInstr *Dependency = nullptr; - if (TII->getMemOpBaseRegImmOfs(MI, BaseReg, Offset, TRI)) - if (MI.mayLoad() && !MI.isPredicable() && BaseReg == PointerReg && - Offset < PageSize && MI.getDesc().getNumDefs() <= 1 && - HD.isSafeToHoist(&MI, Dependency)) { - - auto DependencyOperandIsOk = [&](MachineOperand &MO) { - assert(!(MO.isReg() && MO.isUse()) && - "No transitive dependendencies please!"); - if (!MO.isReg() || !MO.getReg() || !MO.isDef()) - return true; - - // Make sure that we won't clobber any live ins to the sibling block - // by hoisting Dependency. For instance, we can't hoist INST to - // before the null check (even if it safe, and does not violate any - // dependencies in the non_null_block) if %rdx is live in to - // _null_block. - // - // test %rcx, %rcx - // je _null_block - // _non_null_block: - // %rdx<def> = INST - // ... - if (AnyAliasLiveIn(TRI, NullSucc, MO.getReg())) - return false; - - // Make sure Dependency isn't re-defining the base register. Then we - // won't get the memory operation on the address we want. - if (TRI->regsOverlap(MO.getReg(), BaseReg)) - return false; - - return true; - }; - - bool DependencyOperandsAreOk = - !Dependency || - all_of(Dependency->operands(), DependencyOperandIsOk); - - if (DependencyOperandsAreOk) { - NullCheckList.emplace_back(&MI, MBP.ConditionDef, &MBB, NotNullSucc, - NullSucc, Dependency); - return true; - } - } + const unsigned PointerReg = MBP.LHS.getReg(); - HD.rememberInstruction(&MI); - if (HD.isClobbered()) + SmallVector<MachineInstr *, 8> InstsSeenSoFar; + + for (auto &MI : *NotNullSucc) { + if (!canHandle(&MI) || InstsSeenSoFar.size() >= MaxInstsToConsider) return false; + + MachineInstr *Dependence; + if (isSuitableMemoryOp(MI, PointerReg, InstsSeenSoFar) && + canHoistLoadInst(&MI, PointerReg, InstsSeenSoFar, NullSucc, + Dependence)) { + NullCheckList.emplace_back(&MI, MBP.ConditionDef, &MBB, NotNullSucc, + NullSucc, Dependence); + return true; + } + + InstsSeenSoFar.push_back(&MI); } return false; @@ -518,7 +536,7 @@ void ImplicitNullChecks::rewriteNullChecks( for (auto &NC : NullCheckList) { // Remove the conditional branch dependent on the null check. - unsigned BranchesRemoved = TII->RemoveBranch(*NC.getCheckBlock()); + unsigned BranchesRemoved = TII->removeBranch(*NC.getCheckBlock()); (void)BranchesRemoved; assert(BranchesRemoved > 0 && "expected at least one branch!"); @@ -560,13 +578,14 @@ void ImplicitNullChecks::rewriteNullChecks( NC.getCheckOperation()->eraseFromParent(); // Insert an *unconditional* branch to not-null successor. - TII->InsertBranch(*NC.getCheckBlock(), NC.getNotNullSucc(), nullptr, + TII->insertBranch(*NC.getCheckBlock(), NC.getNotNullSucc(), nullptr, /*Cond=*/None, DL); NumImplicitNullChecks++; } } + char ImplicitNullChecks::ID = 0; char &llvm::ImplicitNullChecksID = ImplicitNullChecks::ID; INITIALIZE_PASS_BEGIN(ImplicitNullChecks, "implicit-null-checks", diff --git a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp index 197db77..3d81184 100644 --- a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp +++ b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp @@ -114,7 +114,7 @@ public: AA(&pass.getAnalysis<AAResultsWrapperPass>().getAAResults()), MDT(pass.getAnalysis<MachineDominatorTree>()), Loops(pass.getAnalysis<MachineLoopInfo>()), VRM(vrm), - MFI(*mf.getFrameInfo()), MRI(mf.getRegInfo()), + MFI(mf.getFrameInfo()), MRI(mf.getRegInfo()), TII(*mf.getSubtarget().getInstrInfo()), TRI(*mf.getSubtarget().getRegisterInfo()), MBFI(pass.getAnalysis<MachineBlockFrequencyInfo>()), @@ -172,7 +172,7 @@ public: AA(&pass.getAnalysis<AAResultsWrapperPass>().getAAResults()), MDT(pass.getAnalysis<MachineDominatorTree>()), Loops(pass.getAnalysis<MachineLoopInfo>()), VRM(vrm), - MFI(*mf.getFrameInfo()), MRI(mf.getRegInfo()), + MFI(mf.getFrameInfo()), MRI(mf.getRegInfo()), TII(*mf.getSubtarget().getInstrInfo()), TRI(*mf.getSubtarget().getRegisterInfo()), MBFI(pass.getAnalysis<MachineBlockFrequencyInfo>()), @@ -185,10 +185,7 @@ private: bool isSnippet(const LiveInterval &SnipLI); void collectRegsToSpill(); - bool isRegToSpill(unsigned Reg) { - return std::find(RegsToSpill.begin(), - RegsToSpill.end(), Reg) != RegsToSpill.end(); - } + bool isRegToSpill(unsigned Reg) { return is_contained(RegsToSpill, Reg); } bool isSibling(unsigned Reg); bool hoistSpillInsideBB(LiveInterval &SpillLI, MachineInstr &CopyMI); @@ -380,7 +377,7 @@ bool InlineSpiller::hoistSpillInsideBB(LiveInterval &SpillLI, MachineBasicBlock *MBB = LIS.getMBBFromIndex(SrcVNI->def); MachineBasicBlock::iterator MII; if (SrcVNI->isPHIDef()) - MII = MBB->SkipPHIsAndLabels(MBB->begin()); + MII = MBB->SkipPHIsLabelsAndDebug(MBB->begin()); else { MachineInstr *DefMI = LIS.getInstructionFromIndex(SrcVNI->def); assert(DefMI && "Defining instruction disappeared"); @@ -553,12 +550,18 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) { return true; } - // Alocate a new register for the remat. + // Allocate a new register for the remat. unsigned NewVReg = Edit->createFrom(Original); // Finally we can rematerialize OrigMI before MI. SlotIndex DefIdx = Edit->rematerializeAt(*MI.getParent(), MI, NewVReg, RM, TRI); + + // We take the DebugLoc from MI, since OrigMI may be attributed to a + // different source location. + auto *NewMI = LIS.getInstructionFromIndex(DefIdx); + NewMI->setDebugLoc(MI.getDebugLoc()); + (void)DefIdx; DEBUG(dbgs() << "\tremat: " << DefIdx << '\t' << *LIS.getInstructionFromIndex(DefIdx)); @@ -736,9 +739,12 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops, bool WasCopy = MI->isCopy(); unsigned ImpReg = 0; - bool SpillSubRegs = (MI->getOpcode() == TargetOpcode::STATEPOINT || - MI->getOpcode() == TargetOpcode::PATCHPOINT || - MI->getOpcode() == TargetOpcode::STACKMAP); + // Spill subregs if the target allows it. + // We always want to spill subregs for stackmap/patchpoint pseudos. + bool SpillSubRegs = TII.isSubregFoldable() || + MI->getOpcode() == TargetOpcode::STATEPOINT || + MI->getOpcode() == TargetOpcode::PATCHPOINT || + MI->getOpcode() == TargetOpcode::STACKMAP; // TargetInstrInfo::foldMemoryOperand only expects explicit, non-tied // operands. @@ -751,7 +757,7 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops, ImpReg = MO.getReg(); continue; } - // FIXME: Teach targets to deal with subregs. + if (!SpillSubRegs && MO.getSubReg()) return false; // We cannot fold a load instruction into a def. @@ -762,6 +768,11 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops, FoldOps.push_back(Idx); } + // If we only have implicit uses, we won't be able to fold that. + // Moreover, TargetInstrInfo::foldMemoryOperand will assert if we try! + if (FoldOps.empty()) + return false; + MachineInstrSpan MIS(MI); MachineInstr *FoldMI = @@ -1113,7 +1124,7 @@ void HoistSpillHelper::rmRedundantSpills( // earlier spill with smaller SlotIndex. for (const auto CurrentSpill : Spills) { MachineBasicBlock *Block = CurrentSpill->getParent(); - MachineDomTreeNode *Node = MDT.DT->getNode(Block); + MachineDomTreeNode *Node = MDT.getBase().getNode(Block); MachineInstr *PrevSpill = SpillBBToSpill[Node]; if (PrevSpill) { SlotIndex PIdx = LIS.getInstructionIndex(*PrevSpill); @@ -1121,9 +1132,9 @@ void HoistSpillHelper::rmRedundantSpills( MachineInstr *SpillToRm = (CIdx > PIdx) ? CurrentSpill : PrevSpill; MachineInstr *SpillToKeep = (CIdx > PIdx) ? PrevSpill : CurrentSpill; SpillsToRm.push_back(SpillToRm); - SpillBBToSpill[MDT.DT->getNode(Block)] = SpillToKeep; + SpillBBToSpill[MDT.getBase().getNode(Block)] = SpillToKeep; } else { - SpillBBToSpill[MDT.DT->getNode(Block)] = CurrentSpill; + SpillBBToSpill[MDT.getBase().getNode(Block)] = CurrentSpill; } } for (const auto SpillToRm : SpillsToRm) @@ -1198,7 +1209,7 @@ void HoistSpillHelper::getVisitOrders( // Sort the nodes in WorkSet in top-down order and save the nodes // in Orders. Orders will be used for hoisting in runHoistSpills. unsigned idx = 0; - Orders.push_back(MDT.DT->getNode(Root)); + Orders.push_back(MDT.getBase().getNode(Root)); do { MachineDomTreeNode *Node = Orders[idx++]; const std::vector<MachineDomTreeNode *> &Children = Node->getChildren(); diff --git a/contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp index 3f11119..ec35b3f 100644 --- a/contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp +++ b/contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp @@ -29,6 +29,9 @@ // It could be transformed into a ld2 intrinsic in AArch64 backend or a vld2 // intrinsic in ARM backend. // +// In X86, this can be further optimized into a set of target +// specific loads followed by an optimized sequence of shuffles. +// // E.g. An interleaved store (Factor = 3): // %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1, // <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11> @@ -37,6 +40,8 @@ // It could be transformed into a st3 intrinsic in AArch64 backend or a vst3 // intrinsic in ARM backend. // +// Similarly, a set of interleaved stores can be transformed into an optimized +// sequence of shuffles followed by a set of target specific stores for X86. //===----------------------------------------------------------------------===// #include "llvm/CodeGen/Passes.h" @@ -57,8 +62,6 @@ static cl::opt<bool> LowerInterleavedAccesses( cl::desc("Enable lowering interleaved accesses to intrinsics"), cl::init(true), cl::Hidden); -static unsigned MaxFactor; // The maximum supported interleave factor. - namespace { class InterleavedAccess : public FunctionPass { @@ -70,7 +73,7 @@ public: initializeInterleavedAccessPass(*PassRegistry::getPassRegistry()); } - const char *getPassName() const override { return "Interleaved Access Pass"; } + StringRef getPassName() const override { return "Interleaved Access Pass"; } bool runOnFunction(Function &F) override; @@ -84,6 +87,9 @@ private: const TargetMachine *TM; const TargetLowering *TLI; + /// The maximum supported interleave factor. + unsigned MaxFactor; + /// \brief Transform an interleaved load into target specific intrinsics. bool lowerInterleavedLoad(LoadInst *LI, SmallVector<Instruction *, 32> &DeadInsts); @@ -144,7 +150,7 @@ static bool isDeInterleaveMaskOfFactor(ArrayRef<int> Mask, unsigned Factor, /// <0, 2, 4, 6> (mask of index 0 to extract even elements) /// <1, 3, 5, 7> (mask of index 1 to extract odd elements) static bool isDeInterleaveMask(ArrayRef<int> Mask, unsigned &Factor, - unsigned &Index) { + unsigned &Index, unsigned MaxFactor) { if (Mask.size() < 2) return false; @@ -156,13 +162,19 @@ static bool isDeInterleaveMask(ArrayRef<int> Mask, unsigned &Factor, return false; } -/// \brief Check if the mask is RE-interleave mask for an interleaved store. -/// -/// I.e. <0, NumSubElts, ... , NumSubElts*(Factor - 1), 1, NumSubElts + 1, ...> +/// \brief Check if the mask can be used in an interleaved store. +// +/// It checks for a more general pattern than the RE-interleave mask. +/// I.e. <x, y, ... z, x+1, y+1, ...z+1, x+2, y+2, ...z+2, ...> +/// E.g. For a Factor of 2 (LaneLen=4): <4, 32, 5, 33, 6, 34, 7, 35> +/// E.g. For a Factor of 3 (LaneLen=4): <4, 32, 16, 5, 33, 17, 6, 34, 18, 7, 35, 19> +/// E.g. For a Factor of 4 (LaneLen=2): <8, 2, 12, 4, 9, 3, 13, 5> /// -/// E.g. The RE-interleave mask (Factor = 2) could be: -/// <0, 4, 1, 5, 2, 6, 3, 7> -static bool isReInterleaveMask(ArrayRef<int> Mask, unsigned &Factor) { +/// The particular case of an RE-interleave mask is: +/// I.e. <0, LaneLen, ... , LaneLen*(Factor - 1), 1, LaneLen + 1, ...> +/// E.g. For a Factor of 2 (LaneLen=4): <0, 4, 1, 5, 2, 6, 3, 7> +static bool isReInterleaveMask(ArrayRef<int> Mask, unsigned &Factor, + unsigned MaxFactor, unsigned OpNumElts) { unsigned NumElts = Mask.size(); if (NumElts < 4) return false; @@ -172,21 +184,75 @@ static bool isReInterleaveMask(ArrayRef<int> Mask, unsigned &Factor) { if (NumElts % Factor) continue; - unsigned NumSubElts = NumElts / Factor; - if (!isPowerOf2_32(NumSubElts)) + unsigned LaneLen = NumElts / Factor; + if (!isPowerOf2_32(LaneLen)) continue; - // Check whether each element matchs the RE-interleaved rule. Ignore undef - // elements. - unsigned i = 0; - for (; i < NumElts; i++) - if (Mask[i] >= 0 && - static_cast<unsigned>(Mask[i]) != - (i % Factor) * NumSubElts + i / Factor) + // Check whether each element matches the general interleaved rule. + // Ignore undef elements, as long as the defined elements match the rule. + // Outer loop processes all factors (x, y, z in the above example) + unsigned I = 0, J; + for (; I < Factor; I++) { + unsigned SavedLaneValue; + unsigned SavedNoUndefs = 0; + + // Inner loop processes consecutive accesses (x, x+1... in the example) + for (J = 0; J < LaneLen - 1; J++) { + // Lane computes x's position in the Mask + unsigned Lane = J * Factor + I; + unsigned NextLane = Lane + Factor; + int LaneValue = Mask[Lane]; + int NextLaneValue = Mask[NextLane]; + + // If both are defined, values must be sequential + if (LaneValue >= 0 && NextLaneValue >= 0 && + LaneValue + 1 != NextLaneValue) + break; + + // If the next value is undef, save the current one as reference + if (LaneValue >= 0 && NextLaneValue < 0) { + SavedLaneValue = LaneValue; + SavedNoUndefs = 1; + } + + // Undefs are allowed, but defined elements must still be consecutive: + // i.e.: x,..., undef,..., x + 2,..., undef,..., undef,..., x + 5, .... + // Verify this by storing the last non-undef followed by an undef + // Check that following non-undef masks are incremented with the + // corresponding distance. + if (SavedNoUndefs > 0 && LaneValue < 0) { + SavedNoUndefs++; + if (NextLaneValue >= 0 && + SavedLaneValue + SavedNoUndefs != (unsigned)NextLaneValue) + break; + } + } + + if (J < LaneLen - 1) break; - // Find a RE-interleaved mask of current factor. - if (i == NumElts) + int StartMask = 0; + if (Mask[I] >= 0) { + // Check that the start of the I range (J=0) is greater than 0 + StartMask = Mask[I]; + } else if (Mask[(LaneLen - 1) * Factor + I] >= 0) { + // StartMask defined by the last value in lane + StartMask = Mask[(LaneLen - 1) * Factor + I] - J; + } else if (SavedNoUndefs > 0) { + // StartMask defined by some non-zero value in the j loop + StartMask = SavedLaneValue - (LaneLen - 1 - SavedNoUndefs); + } + // else StartMask remains set to 0, i.e. all elements are undefs + + if (StartMask < 0) + break; + // We must stay within the vectors; This case can happen with undefs. + if (StartMask + LaneLen > OpNumElts*2) + break; + } + + // Found an interleaved mask of current factor. + if (I == Factor) return true; } @@ -224,7 +290,8 @@ bool InterleavedAccess::lowerInterleavedLoad( unsigned Factor, Index; // Check if the first shufflevector is DE-interleave shuffle. - if (!isDeInterleaveMask(Shuffles[0]->getShuffleMask(), Factor, Index)) + if (!isDeInterleaveMask(Shuffles[0]->getShuffleMask(), Factor, Index, + MaxFactor)) return false; // Holds the corresponding index for each DE-interleave shuffle. @@ -342,7 +409,8 @@ bool InterleavedAccess::lowerInterleavedStore( // Check if the shufflevector is RE-interleave shuffle. unsigned Factor; - if (!isReInterleaveMask(SVI->getShuffleMask(), Factor)) + unsigned OpNumElts = SVI->getOperand(0)->getType()->getVectorNumElements(); + if (!isReInterleaveMask(SVI->getShuffleMask(), Factor, MaxFactor, OpNumElts)) return false; DEBUG(dbgs() << "IA: Found an interleaved store: " << *SI << "\n"); diff --git a/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp index 2962f87..afd2406 100644 --- a/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp +++ b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp @@ -436,8 +436,14 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { errs() << "WARNING: this target does not support the llvm." << (Callee->getIntrinsicID() == Intrinsic::returnaddress ? "return" : "frame") << "address intrinsic.\n"; - CI->replaceAllUsesWith(ConstantPointerNull::get( - cast<PointerType>(CI->getType()))); + CI->replaceAllUsesWith( + ConstantPointerNull::get(cast<PointerType>(CI->getType()))); + break; + case Intrinsic::addressofreturnaddress: + errs() << "WARNING: this target does not support the " + "llvm.addressofreturnaddress intrinsic.\n"; + CI->replaceAllUsesWith( + ConstantPointerNull::get(cast<PointerType>(CI->getType()))); break; case Intrinsic::prefetch: diff --git a/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp index 9eb43d2..26794e2 100644 --- a/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp +++ b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp @@ -15,7 +15,6 @@ #include "llvm/Analysis/Passes.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/BasicTTIImpl.h" -#include "llvm/CodeGen/MachineFunctionAnalysis.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetPassConfig.h" @@ -102,25 +101,12 @@ TargetIRAnalysis LLVMTargetMachine::getTargetIRAnalysis() { }); } -MachineModuleInfo & -LLVMTargetMachine::addMachineModuleInfo(PassManagerBase &PM) const { - MachineModuleInfo *MMI = new MachineModuleInfo(*getMCAsmInfo(), - *getMCRegisterInfo(), - getObjFileLowering()); - PM.add(MMI); - return *MMI; -} - -void LLVMTargetMachine::addMachineFunctionAnalysis(PassManagerBase &PM, - MachineFunctionInitializer *MFInitializer) const { - PM.add(new MachineFunctionAnalysis(*this, MFInitializer)); -} - /// addPassesToX helper drives creation and initialization of TargetPassConfig. static MCContext * addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM, bool DisableVerify, AnalysisID StartBefore, - AnalysisID StartAfter, AnalysisID StopAfter, + AnalysisID StartAfter, AnalysisID StopBefore, + AnalysisID StopAfter, MachineFunctionInitializer *MFInitializer = nullptr) { // When in emulated TLS mode, add the LowerEmuTLS pass. @@ -135,7 +121,8 @@ addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM, // Targets may override createPassConfig to provide a target-specific // subclass. TargetPassConfig *PassConfig = TM->createPassConfig(PM); - PassConfig->setStartStopPasses(StartBefore, StartAfter, StopAfter); + PassConfig->setStartStopPasses(StartBefore, StartAfter, StopBefore, + StopAfter); // Set PassConfig options provided by TargetMachine. PassConfig->setDisableVerify(DisableVerify); @@ -150,8 +137,9 @@ addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM, PassConfig->addISelPrepare(); - MachineModuleInfo &MMI = TM->addMachineModuleInfo(PM); - TM->addMachineFunctionAnalysis(PM, MFInitializer); + MachineModuleInfo *MMI = new MachineModuleInfo(TM); + MMI->setMachineFunctionInitializer(MFInitializer); + PM.add(MMI); // Enable FastISel with -fast, but allow that to be overridden. TM->setO0WantsFastISel(EnableFastISelOption != cl::BOU_FALSE); @@ -165,6 +153,11 @@ addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM, if (PassConfig->addIRTranslator()) return nullptr; + PassConfig->addPreLegalizeMachineIR(); + + if (PassConfig->addLegalizeMachineIR()) + return nullptr; + // Before running the register bank selector, ask the target if it // wants to run some passes. PassConfig->addPreRegBankSelect(); @@ -172,6 +165,21 @@ addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM, if (PassConfig->addRegBankSelect()) return nullptr; + PassConfig->addPreGlobalInstructionSelect(); + + if (PassConfig->addGlobalInstructionSelect()) + return nullptr; + + // Pass to reset the MachineFunction if the ISel failed. + PM.add(createResetMachineFunctionPass( + PassConfig->reportDiagnosticWhenGlobalISelFallback())); + + // Provide a fallback path when we do not want to abort on + // not-yet-supported input. + if (LLVM_UNLIKELY(!PassConfig->isGlobalISelAbortEnabled()) && + PassConfig->addInstSelector()) + return nullptr; + } else if (PassConfig->addInstSelector()) return nullptr; @@ -179,21 +187,22 @@ addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM, PassConfig->setInitialized(); - return &MMI.getContext(); + return &MMI->getContext(); } bool LLVMTargetMachine::addPassesToEmitFile( PassManagerBase &PM, raw_pwrite_stream &Out, CodeGenFileType FileType, bool DisableVerify, AnalysisID StartBefore, AnalysisID StartAfter, - AnalysisID StopAfter, MachineFunctionInitializer *MFInitializer) { + AnalysisID StopBefore, AnalysisID StopAfter, + MachineFunctionInitializer *MFInitializer) { // Add common CodeGen passes. MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify, StartBefore, StartAfter, - StopAfter, MFInitializer); + StopBefore, StopAfter, MFInitializer); if (!Context) return true; - if (StopAfter) { + if (StopBefore || StopAfter) { PM.add(createPrintMIRPass(Out)); return false; } @@ -219,7 +228,8 @@ bool LLVMTargetMachine::addPassesToEmitFile( MCE = getTarget().createMCCodeEmitter(MII, MRI, *Context); MCAsmBackend *MAB = - getTarget().createMCAsmBackend(MRI, getTargetTriple().str(), TargetCPU); + getTarget().createMCAsmBackend(MRI, getTargetTriple().str(), TargetCPU, + Options.MCOptions); auto FOut = llvm::make_unique<formatted_raw_ostream>(Out); MCStreamer *S = getTarget().createAsmStreamer( *Context, std::move(FOut), Options.MCOptions.AsmVerbose, @@ -233,7 +243,8 @@ bool LLVMTargetMachine::addPassesToEmitFile( // emission fails. MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(MII, MRI, *Context); MCAsmBackend *MAB = - getTarget().createMCAsmBackend(MRI, getTargetTriple().str(), TargetCPU); + getTarget().createMCAsmBackend(MRI, getTargetTriple().str(), TargetCPU, + Options.MCOptions); if (!MCE || !MAB) return true; @@ -261,6 +272,7 @@ bool LLVMTargetMachine::addPassesToEmitFile( return true; PM.add(Printer); + PM.add(createFreeMachineFunctionPass()); return false; } @@ -275,7 +287,7 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx, bool DisableVerify) { // Add common CodeGen passes. Ctx = addPassesToGenerateCode(this, PM, DisableVerify, nullptr, nullptr, - nullptr); + nullptr, nullptr); if (!Ctx) return true; @@ -288,7 +300,8 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx, MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getMCInstrInfo(), MRI, *Ctx); MCAsmBackend *MAB = - getTarget().createMCAsmBackend(MRI, getTargetTriple().str(), TargetCPU); + getTarget().createMCAsmBackend(MRI, getTargetTriple().str(), TargetCPU, + Options.MCOptions); if (!MCE || !MAB) return true; @@ -306,6 +319,7 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx, return true; PM.add(Printer); + PM.add(createFreeMachineFunctionPass()); return false; // success! } diff --git a/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp b/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp index 4321849..86ef898 100644 --- a/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp +++ b/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp @@ -133,7 +133,7 @@ SUnit *LatencyPriorityQueue::pop() { void LatencyPriorityQueue::remove(SUnit *SU) { assert(!Queue.empty() && "Queue is empty!"); - std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), SU); + std::vector<SUnit *>::iterator I = find(Queue, SU); if (I != std::prev(Queue.end())) std::swap(*I, Queue.back()); Queue.pop_back(); diff --git a/contrib/llvm/lib/CodeGen/LexicalScopes.cpp b/contrib/llvm/lib/CodeGen/LexicalScopes.cpp index b810176..834ed5f 100644 --- a/contrib/llvm/lib/CodeGen/LexicalScopes.cpp +++ b/contrib/llvm/lib/CodeGen/LexicalScopes.cpp @@ -222,17 +222,13 @@ void LexicalScopes::constructScopeNest(LexicalScope *Scope) { LexicalScope *WS = WorkStack.back(); const SmallVectorImpl<LexicalScope *> &Children = WS->getChildren(); bool visitedChildren = false; - for (SmallVectorImpl<LexicalScope *>::const_iterator SI = Children.begin(), - SE = Children.end(); - SI != SE; ++SI) { - LexicalScope *ChildScope = *SI; + for (auto &ChildScope : Children) if (!ChildScope->getDFSOut()) { WorkStack.push_back(ChildScope); visitedChildren = true; ChildScope->setDFSIn(++Counter); break; } - } if (!visitedChildren) { WorkStack.pop_back(); WS->setDFSOut(++Counter); @@ -247,10 +243,7 @@ void LexicalScopes::assignInstructionRanges( DenseMap<const MachineInstr *, LexicalScope *> &MI2ScopeMap) { LexicalScope *PrevLexicalScope = nullptr; - for (SmallVectorImpl<InsnRange>::const_iterator RI = MIRanges.begin(), - RE = MIRanges.end(); - RI != RE; ++RI) { - const InsnRange &R = *RI; + for (const auto &R : MIRanges) { LexicalScope *S = MI2ScopeMap.lookup(R.first); assert(S && "Lost LexicalScope for a machine instruction!"); if (PrevLexicalScope && !PrevLexicalScope->dominates(S)) @@ -281,12 +274,8 @@ void LexicalScopes::getMachineBasicBlocks( } SmallVectorImpl<InsnRange> &InsnRanges = Scope->getRanges(); - for (SmallVectorImpl<InsnRange>::iterator I = InsnRanges.begin(), - E = InsnRanges.end(); - I != E; ++I) { - InsnRange &R = *I; + for (auto &R : InsnRanges) MBBs.insert(R.first->getParent()); - } } /// dominates - Return true if DebugLoc's lexical scope dominates at least one @@ -301,9 +290,8 @@ bool LexicalScopes::dominates(const DILocation *DL, MachineBasicBlock *MBB) { return true; bool Result = false; - for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; - ++I) { - if (const DILocation *IDL = I->getDebugLoc()) + for (auto &I : *MBB) { + if (const DILocation *IDL = I.getDebugLoc()) if (LexicalScope *IScope = getOrCreateLexicalScope(IDL)) if (Scope->dominates(IScope)) return true; diff --git a/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp b/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp index 4ff88d5..c945376 100644 --- a/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp +++ b/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp @@ -23,6 +23,7 @@ #include "llvm/ADT/SparseBitVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/UniqueVector.h" +#include "llvm/CodeGen/LexicalScopes.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -60,6 +61,26 @@ class LiveDebugValues : public MachineFunctionPass { private: const TargetRegisterInfo *TRI; const TargetInstrInfo *TII; + LexicalScopes LS; + + /// Keeps track of lexical scopes associated with a user value's source + /// location. + class UserValueScopes { + DebugLoc DL; + LexicalScopes &LS; + SmallPtrSet<const MachineBasicBlock *, 4> LBlocks; + + public: + UserValueScopes(DebugLoc D, LexicalScopes &L) : DL(std::move(D)), LS(L) {} + + /// Return true if current scope dominates at least one machine + /// instruction in a given machine basic block. + bool dominates(MachineBasicBlock *MBB) { + if (LBlocks.empty()) + LS.getMachineBasicBlocks(DL, LBlocks); + return LBlocks.count(MBB) != 0 || LS.dominates(DL, MBB); + } + }; /// Based on std::pair so it can be used as an index into a DenseMap. typedef std::pair<const DILocalVariable *, const DILocation *> @@ -83,7 +104,7 @@ private: struct VarLoc { const DebugVariable Var; const MachineInstr &MI; ///< Only used for cloning a new DBG_VALUE. - + mutable UserValueScopes UVS; enum { InvalidKind = 0, RegisterKind } Kind; /// The value location. Stored separately to avoid repeatedly @@ -96,9 +117,9 @@ private: uint64_t Hash; } Loc; - VarLoc(const MachineInstr &MI) + VarLoc(const MachineInstr &MI, LexicalScopes &LS) : Var(MI.getDebugVariable(), MI.getDebugLoc()->getInlinedAt()), MI(MI), - Kind(InvalidKind) { + UVS(MI.getDebugLoc(), LS), Kind(InvalidKind) { static_assert((sizeof(Loc) == sizeof(uint64_t)), "hash does not cover all members of Loc"); assert(MI.isDebugValue() && "not a DBG_VALUE"); @@ -125,6 +146,10 @@ private: return 0; } + /// Determine whether the lexical scope of this value's debug location + /// dominates MBB. + bool dominates(MachineBasicBlock &MBB) const { return UVS.dominates(&MBB); } + void dump() const { MI.dump(); } bool operator==(const VarLoc &Other) const { @@ -201,7 +226,8 @@ private: VarLocInMBB &OutLocs, VarLocMap &VarLocIDs); bool join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs, VarLocInMBB &InLocs, - const VarLocMap &VarLocIDs); + const VarLocMap &VarLocIDs, + SmallPtrSet<const MachineBasicBlock *, 16> &Visited); bool ExtendRanges(MachineFunction &MF); @@ -217,7 +243,7 @@ public: MachineFunctionProperties getRequiredProperties() const override { return MachineFunctionProperties().set( - MachineFunctionProperties::Property::AllVRegsAllocated); + MachineFunctionProperties::Property::NoVRegs); } /// Print to ostream with a message. @@ -228,6 +254,7 @@ public: /// Calculate the liveness information for the given machine function. bool runOnMachineFunction(MachineFunction &MF) override; }; + } // namespace //===----------------------------------------------------------------------===// @@ -260,6 +287,7 @@ void LiveDebugValues::printVarLocInMBB(const MachineFunction &MF, const VarLocMap &VarLocIDs, const char *msg, raw_ostream &Out) const { + Out << '\n' << msg << '\n'; for (const MachineBasicBlock &BB : MF) { const auto &L = V.lookup(&BB); Out << "MBB: " << BB.getName() << ":\n"; @@ -268,7 +296,6 @@ void LiveDebugValues::printVarLocInMBB(const MachineFunction &MF, Out << " Var: " << VL.Var.getVar()->getName(); Out << " MI: "; VL.dump(); - Out << "\n"; } } Out << "\n"; @@ -294,7 +321,7 @@ void LiveDebugValues::transferDebugValue(const MachineInstr &MI, // Add the VarLoc to OpenRanges from this DBG_VALUE. // TODO: Currently handles DBG_VALUE which has only reg as location. if (isDbgValueDescribedByReg(MI)) { - VarLoc VL(MI); + VarLoc VL(MI, LS); unsigned ID = VarLocIDs.insert(VL); OpenRanges.insert(ID, VL.Var); } @@ -368,7 +395,8 @@ bool LiveDebugValues::transfer(MachineInstr &MI, OpenRangesSet &OpenRanges, /// inserting a new DBG_VALUE instruction at the start of the @MBB - if the same /// source variable in all the predecessors of @MBB reside in the same location. bool LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs, - VarLocInMBB &InLocs, const VarLocMap &VarLocIDs) { + VarLocInMBB &InLocs, const VarLocMap &VarLocIDs, + SmallPtrSet<const MachineBasicBlock *, 16> &Visited) { DEBUG(dbgs() << "join MBB: " << MBB.getName() << "\n"); bool Changed = false; @@ -376,21 +404,39 @@ bool LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs, // For all predecessors of this MBB, find the set of VarLocs that // can be joined. + int NumVisited = 0; for (auto p : MBB.predecessors()) { + // Ignore unvisited predecessor blocks. As we are processing + // the blocks in reverse post-order any unvisited block can + // be considered to not remove any incoming values. + if (!Visited.count(p)) + continue; auto OL = OutLocs.find(p); // Join is null in case of empty OutLocs from any of the pred. if (OL == OutLocs.end()) return false; - // Just copy over the Out locs to incoming locs for the first predecessor. - if (p == *MBB.pred_begin()) { + // Just copy over the Out locs to incoming locs for the first visited + // predecessor, and for all other predecessors join the Out locs. + if (!NumVisited) InLocsT = OL->second; - continue; - } - // Join with this predecessor. - InLocsT &= OL->second; + else + InLocsT &= OL->second; + NumVisited++; } + // Filter out DBG_VALUES that are out of scope. + VarLocSet KillSet; + for (auto ID : InLocsT) + if (!VarLocIDs[ID].dominates(MBB)) + KillSet.set(ID); + InLocsT.intersectWithComplement(KillSet); + + // As we are processing blocks in reverse post-order we + // should have processed at least one predecessor, unless it + // is the entry block which has no predecessor. + assert((NumVisited || MBB.pred_empty()) && + "Should have processed at least one predecessor"); if (InLocsT.empty()) return false; @@ -463,16 +509,18 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { // To solve it, we perform join() and transfer() using the two worklist method // until the ranges converge. // Ranges have converged when both worklists are empty. + SmallPtrSet<const MachineBasicBlock *, 16> Visited; while (!Worklist.empty() || !Pending.empty()) { // We track what is on the pending worklist to avoid inserting the same // thing twice. We could avoid this with a custom priority queue, but this // is probably not worth it. SmallPtrSet<MachineBasicBlock *, 16> OnPending; + DEBUG(dbgs() << "Processing Worklist\n"); while (!Worklist.empty()) { MachineBasicBlock *MBB = OrderToBB[Worklist.top()]; Worklist.pop(); - MBBJoined = join(*MBB, OutLocs, InLocs, VarLocIDs); - + MBBJoined = join(*MBB, OutLocs, InLocs, VarLocIDs, Visited); + Visited.insert(MBB); if (MBBJoined) { MBBJoined = false; Changed = true; @@ -505,12 +553,14 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { } bool LiveDebugValues::runOnMachineFunction(MachineFunction &MF) { + if (!MF.getFunction()->getSubprogram()) + // LiveDebugValues will already have removed all DBG_VALUEs. + return false; + TRI = MF.getSubtarget().getRegisterInfo(); TII = MF.getSubtarget().getInstrInfo(); + LS.initialize(MF); - bool Changed = false; - - Changed |= ExtendRanges(MF); - + bool Changed = ExtendRanges(MF); return Changed; } diff --git a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp index 966b4f1..0934d8c 100644 --- a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp +++ b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp @@ -22,7 +22,6 @@ #include "LiveDebugVariables.h" #include "llvm/ADT/IntervalMap.h" #include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/LexicalScopes.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" @@ -76,27 +75,6 @@ LiveDebugVariables::LiveDebugVariables() : MachineFunctionPass(ID), pImpl(nullpt /// LocMap - Map of where a user value is live, and its location. typedef IntervalMap<SlotIndex, unsigned, 4> LocMap; -namespace { -/// UserValueScopes - Keeps track of lexical scopes associated with a -/// user value's source location. -class UserValueScopes { - DebugLoc DL; - LexicalScopes &LS; - SmallPtrSet<const MachineBasicBlock *, 4> LBlocks; - -public: - UserValueScopes(DebugLoc D, LexicalScopes &L) : DL(std::move(D)), LS(L) {} - - /// dominates - Return true if current scope dominates at least one machine - /// instruction in a given machine basic block. - bool dominates(MachineBasicBlock *MBB) { - if (LBlocks.empty()) - LS.getMachineBasicBlocks(DL, LBlocks); - return LBlocks.count(MBB) != 0 || LS.dominates(DL, MBB); - } -}; -} // end anonymous namespace - /// UserValue - A user value is a part of a debug info user variable. /// /// A DBG_VALUE instruction notes that (a sub-register of) a virtual register @@ -221,8 +199,8 @@ public: I.setValue(getLocationNo(LocMO)); } - /// extendDef - Extend the current definition as far as possible down the - /// dominator tree. Stop when meeting an existing def or when leaving the live + /// extendDef - Extend the current definition as far as possible down. + /// Stop when meeting an existing def or when leaving the live /// range of VNI. /// End points where VNI is no longer live are added to Kills. /// @param Idx Starting point for the definition. @@ -231,12 +209,10 @@ public: /// @param VNI When LR is not null, this is the value to restrict to. /// @param Kills Append end points of VNI's live range to Kills. /// @param LIS Live intervals analysis. - /// @param MDT Dominator tree. void extendDef(SlotIndex Idx, unsigned LocNo, LiveRange *LR, const VNInfo *VNI, SmallVectorImpl<SlotIndex> *Kills, - LiveIntervals &LIS, MachineDominatorTree &MDT, - UserValueScopes &UVS); + LiveIntervals &LIS); /// addDefsFromCopies - The value in LI/LocNo may be copies to other /// registers. Determine if any of the copies are available at the kill @@ -254,8 +230,7 @@ public: /// computeIntervals - Compute the live intervals of all locations after /// collecting all their def points. void computeIntervals(MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, - LiveIntervals &LIS, MachineDominatorTree &MDT, - UserValueScopes &UVS); + LiveIntervals &LIS); /// splitRegister - Replace OldReg ranges with NewRegs ranges where NewRegs is /// live. Returns true if any changes were made. @@ -283,8 +258,6 @@ class LDVImpl { LocMap::Allocator allocator; MachineFunction *MF; LiveIntervals *LIS; - LexicalScopes LS; - MachineDominatorTree *MDT; const TargetRegisterInfo *TRI; /// Whether emitDebugValues is called. @@ -342,7 +315,6 @@ public: "Dbg values are not emitted in LDV"); EmitDone = false; ModifiedMF = false; - LS.reset(); } /// mapVirtReg - Map virtual register to an equivalence class. @@ -541,8 +513,7 @@ bool LDVImpl::collectDebugValues(MachineFunction &mf) { /// data-flow analysis to propagate them beyond basic block boundaries. void UserValue::extendDef(SlotIndex Idx, unsigned LocNo, LiveRange *LR, const VNInfo *VNI, SmallVectorImpl<SlotIndex> *Kills, - LiveIntervals &LIS, MachineDominatorTree &MDT, - UserValueScopes &UVS) { + LiveIntervals &LIS) { SlotIndex Start = Idx; MachineBasicBlock *MBB = LIS.getMBBFromIndex(Start); SlotIndex Stop = LIS.getMBBEndIdx(MBB); @@ -660,9 +631,7 @@ UserValue::addDefsFromCopies(LiveInterval *LI, unsigned LocNo, void UserValue::computeIntervals(MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, - LiveIntervals &LIS, - MachineDominatorTree &MDT, - UserValueScopes &UVS) { + LiveIntervals &LIS) { SmallVector<std::pair<SlotIndex, unsigned>, 16> Defs; // Collect all defs to be extended (Skipping undefs). @@ -677,7 +646,7 @@ UserValue::computeIntervals(MachineRegisterInfo &MRI, const MachineOperand &Loc = locations[LocNo]; if (!Loc.isReg()) { - extendDef(Idx, LocNo, nullptr, nullptr, nullptr, LIS, MDT, UVS); + extendDef(Idx, LocNo, nullptr, nullptr, nullptr, LIS); continue; } @@ -690,7 +659,7 @@ UserValue::computeIntervals(MachineRegisterInfo &MRI, VNI = LI->getVNInfoAt(Idx); } SmallVector<SlotIndex, 16> Kills; - extendDef(Idx, LocNo, LI, VNI, &Kills, LIS, MDT, UVS); + extendDef(Idx, LocNo, LI, VNI, &Kills, LIS); if (LI) addDefsFromCopies(LI, LocNo, Kills, Defs, MRI, LIS); continue; @@ -701,7 +670,7 @@ UserValue::computeIntervals(MachineRegisterInfo &MRI, LiveRange *LR = &LIS.getRegUnit(Unit); const VNInfo *VNI = LR->getVNInfoAt(Idx); // Don't track copies from physregs, it is too expensive. - extendDef(Idx, LocNo, LR, VNI, nullptr, LIS, MDT, UVS); + extendDef(Idx, LocNo, LR, VNI, nullptr, LIS); } // Finally, erase all the undefs. @@ -714,8 +683,7 @@ UserValue::computeIntervals(MachineRegisterInfo &MRI, void LDVImpl::computeIntervals() { for (unsigned i = 0, e = userValues.size(); i != e; ++i) { - UserValueScopes UVS(userValues[i]->getDebugLoc(), LS); - userValues[i]->computeIntervals(MF->getRegInfo(), *TRI, *LIS, *MDT, UVS); + userValues[i]->computeIntervals(MF->getRegInfo(), *TRI, *LIS); userValues[i]->mapVirtRegs(this); } } @@ -724,9 +692,7 @@ bool LDVImpl::runOnMachineFunction(MachineFunction &mf) { clear(); MF = &mf; LIS = &pass.getAnalysis<LiveIntervals>(); - MDT = &pass.getAnalysis<MachineDominatorTree>(); TRI = mf.getSubtarget().getRegisterInfo(); - LS.initialize(mf); DEBUG(dbgs() << "********** COMPUTING LIVE DEBUG VARIABLES: " << mf.getName() << " **********\n"); @@ -951,7 +917,7 @@ findInsertLocation(MachineBasicBlock *MBB, SlotIndex Idx, while (!(MI = LIS.getInstructionFromIndex(Idx))) { // We've reached the beginning of MBB. if (Idx == Start) { - MachineBasicBlock::iterator I = MBB->SkipPHIsAndLabels(MBB->begin()); + MachineBasicBlock::iterator I = MBB->SkipPHIsLabelsAndDebug(MBB->begin()); return I; } Idx = Idx.getPrevIndex(); diff --git a/contrib/llvm/lib/CodeGen/LiveInterval.cpp b/contrib/llvm/lib/CodeGen/LiveInterval.cpp index 93c5ca7..623af49 100644 --- a/contrib/llvm/lib/CodeGen/LiveInterval.cpp +++ b/contrib/llvm/lib/CodeGen/LiveInterval.cpp @@ -59,18 +59,32 @@ public: typedef LiveRange::Segment Segment; typedef IteratorT iterator; - VNInfo *createDeadDef(SlotIndex Def, VNInfo::Allocator &VNInfoAllocator) { + /// A counterpart of LiveRange::createDeadDef: Make sure the range has a + /// value defined at @p Def. + /// If @p ForVNI is null, and there is no value defined at @p Def, a new + /// value will be allocated using @p VNInfoAllocator. + /// If @p ForVNI is null, the return value is the value defined at @p Def, + /// either a pre-existing one, or the one newly created. + /// If @p ForVNI is not null, then @p Def should be the location where + /// @p ForVNI is defined. If the range does not have a value defined at + /// @p Def, the value @p ForVNI will be used instead of allocating a new + /// one. If the range already has a value defined at @p Def, it must be + /// same as @p ForVNI. In either case, @p ForVNI will be the return value. + VNInfo *createDeadDef(SlotIndex Def, VNInfo::Allocator *VNInfoAllocator, + VNInfo *ForVNI) { assert(!Def.isDead() && "Cannot define a value at the dead slot"); - + assert((!ForVNI || ForVNI->def == Def) && + "If ForVNI is specified, it must match Def"); iterator I = impl().find(Def); if (I == segments().end()) { - VNInfo *VNI = LR->getNextValue(Def, VNInfoAllocator); + VNInfo *VNI = ForVNI ? ForVNI : LR->getNextValue(Def, *VNInfoAllocator); impl().insertAtEnd(Segment(Def, Def.getDeadSlot(), VNI)); return VNI; } Segment *S = segmentAt(I); if (SlotIndex::isSameInstr(Def, S->start)) { + assert((!ForVNI || ForVNI == S->valno) && "Value number mismatch"); assert(S->valno->def == S->start && "Inconsistent existing value def"); // It is possible to have both normal and early-clobber defs of the same @@ -84,7 +98,7 @@ public: return S->valno; } assert(SlotIndex::isEarlierInstr(Def, S->start) && "Already live at def"); - VNInfo *VNI = LR->getNextValue(Def, VNInfoAllocator); + VNInfo *VNI = ForVNI ? ForVNI : LR->getNextValue(Def, *VNInfoAllocator); segments().insert(I, Segment(Def, Def.getDeadSlot(), VNI)); return VNI; } @@ -93,7 +107,7 @@ public: if (segments().empty()) return nullptr; iterator I = - impl().findInsertPos(Segment(Use.getPrevSlot(), Use, nullptr)); + impl().findInsertPos(Segment(Use.getPrevSlot(), Use, nullptr)); if (I == segments().begin()) return nullptr; --I; @@ -104,6 +118,25 @@ public: return I->valno; } + std::pair<VNInfo*,bool> extendInBlock(ArrayRef<SlotIndex> Undefs, + SlotIndex StartIdx, SlotIndex Use) { + if (segments().empty()) + return std::make_pair(nullptr, false); + SlotIndex BeforeUse = Use.getPrevSlot(); + iterator I = impl().findInsertPos(Segment(BeforeUse, Use, nullptr)); + if (I == segments().begin()) + return std::make_pair(nullptr, LR->isUndefIn(Undefs, StartIdx, BeforeUse)); + --I; + if (I->end <= StartIdx) + return std::make_pair(nullptr, LR->isUndefIn(Undefs, StartIdx, BeforeUse)); + if (I->end < Use) { + if (LR->isUndefIn(Undefs, I->end, BeforeUse)) + return std::make_pair(nullptr, true); + extendSegmentEndTo(I, Use); + } + return std::make_pair(I->valno, false); + } + /// This method is used when we want to extend the segment specified /// by I to end at the specified endpoint. To do this, we should /// merge and eliminate all segments that this will overlap @@ -320,13 +353,20 @@ LiveRange::iterator LiveRange::find(SlotIndex Pos) { return I; } -VNInfo *LiveRange::createDeadDef(SlotIndex Def, - VNInfo::Allocator &VNInfoAllocator) { +VNInfo *LiveRange::createDeadDef(SlotIndex Def, VNInfo::Allocator &VNIAlloc) { + // Use the segment set, if it is available. + if (segmentSet != nullptr) + return CalcLiveRangeUtilSet(this).createDeadDef(Def, &VNIAlloc, nullptr); + // Otherwise use the segment vector. + return CalcLiveRangeUtilVector(this).createDeadDef(Def, &VNIAlloc, nullptr); +} + +VNInfo *LiveRange::createDeadDef(VNInfo *VNI) { // Use the segment set, if it is available. if (segmentSet != nullptr) - return CalcLiveRangeUtilSet(this).createDeadDef(Def, VNInfoAllocator); + return CalcLiveRangeUtilSet(this).createDeadDef(VNI->def, nullptr, VNI); // Otherwise use the segment vector. - return CalcLiveRangeUtilVector(this).createDeadDef(Def, VNInfoAllocator); + return CalcLiveRangeUtilVector(this).createDeadDef(VNI->def, nullptr, VNI); } // overlaps - Return true if the intersection of the two live ranges is @@ -507,9 +547,15 @@ void LiveRange::append(const Segment S) { segments.push_back(S); } -/// extendInBlock - If this range is live before Kill in the basic -/// block that starts at StartIdx, extend it to be live up to Kill and return -/// the value. If there is no live range before Kill, return NULL. +std::pair<VNInfo*,bool> LiveRange::extendInBlock(ArrayRef<SlotIndex> Undefs, + SlotIndex StartIdx, SlotIndex Kill) { + // Use the segment set, if it is available. + if (segmentSet != nullptr) + return CalcLiveRangeUtilSet(this).extendInBlock(Undefs, StartIdx, Kill); + // Otherwise use the segment vector. + return CalcLiveRangeUtilVector(this).extendInBlock(Undefs, StartIdx, Kill); +} + VNInfo *LiveRange::extendInBlock(SlotIndex StartIdx, SlotIndex Kill) { // Use the segment set, if it is available. if (segmentSet != nullptr) @@ -571,7 +617,7 @@ void LiveRange::removeSegment(SlotIndex Start, SlotIndex End, /// Also remove the value# from value# list. void LiveRange::removeValNo(VNInfo *ValNo) { if (empty()) return; - segments.erase(std::remove_if(begin(), end(), [ValNo](const Segment &S) { + segments.erase(remove_if(*this, [ValNo](const Segment &S) { return S.valno == ValNo; }), end()); // Now that ValNo is dead, remove it. @@ -824,6 +870,30 @@ unsigned LiveInterval::getSize() const { return Sum; } +void LiveInterval::computeSubRangeUndefs(SmallVectorImpl<SlotIndex> &Undefs, + LaneBitmask LaneMask, + const MachineRegisterInfo &MRI, + const SlotIndexes &Indexes) const { + assert(TargetRegisterInfo::isVirtualRegister(reg)); + LaneBitmask VRegMask = MRI.getMaxLaneMaskForVReg(reg); + assert((VRegMask & LaneMask).any()); + const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); + for (const MachineOperand &MO : MRI.def_operands(reg)) { + if (!MO.isUndef()) + continue; + unsigned SubReg = MO.getSubReg(); + assert(SubReg != 0 && "Undef should only be set on subreg defs"); + LaneBitmask DefMask = TRI.getSubRegIndexLaneMask(SubReg); + LaneBitmask UndefMask = VRegMask & ~DefMask; + if ((UndefMask & LaneMask).any()) { + const MachineInstr &MI = *MO.getParent(); + bool EarlyClobber = MO.isEarlyClobber(); + SlotIndex Pos = Indexes.getInstructionIndex(MI).getRegSlot(EarlyClobber); + Undefs.push_back(Pos); + } + } +} + raw_ostream& llvm::operator<<(raw_ostream& os, const LiveRange::Segment &S) { return os << '[' << S.start << ',' << S.end << ':' << S.valno->id << ')'; } @@ -912,15 +982,16 @@ void LiveInterval::verify(const MachineRegisterInfo *MRI) const { super::verify(); // Make sure SubRanges are fine and LaneMasks are disjunct. - LaneBitmask Mask = 0; - LaneBitmask MaxMask = MRI != nullptr ? MRI->getMaxLaneMaskForVReg(reg) : ~0u; + LaneBitmask Mask; + LaneBitmask MaxMask = MRI != nullptr ? MRI->getMaxLaneMaskForVReg(reg) + : LaneBitmask::getAll(); for (const SubRange &SR : subranges()) { // Subrange lanemask should be disjunct to any previous subrange masks. - assert((Mask & SR.LaneMask) == 0); + assert((Mask & SR.LaneMask).none()); Mask |= SR.LaneMask; // subrange mask should not contained in maximum lane mask for the vreg. - assert((Mask & ~MaxMask) == 0); + assert((Mask & ~MaxMask).none()); // empty subranges must be removed. assert(!SR.empty()); diff --git a/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp index 5f3281f..70d3483 100644 --- a/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -58,10 +58,6 @@ static cl::opt<bool> EnablePrecomputePhysRegs( static bool EnablePrecomputePhysRegs = false; #endif // NDEBUG -static cl::opt<bool> EnableSubRegLiveness( - "enable-subreg-liveness", cl::Hidden, cl::init(true), - cl::desc("Enable subregister liveness tracking.")); - namespace llvm { cl::opt<bool> UseSegmentSetForPhysRegs( "use-segment-set-for-physregs", cl::Hidden, cl::init(true), @@ -119,9 +115,6 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) { Indexes = &getAnalysis<SlotIndexes>(); DomTree = &getAnalysis<MachineDominatorTree>(); - if (EnableSubRegLiveness && MF->getSubtarget().enableSubRegLiveness()) - MRI->enableSubRegLiveness(true); - if (!LRCalc) LRCalc = new LiveRangeCalc(); @@ -504,8 +497,7 @@ bool LiveIntervals::computeDeadValues(LiveInterval &LI, return MayHaveSplitComponents; } -void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, unsigned Reg) -{ +void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, unsigned Reg) { DEBUG(dbgs() << "Shrink: " << SR << '\n'); assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Can only shrink virtual registers"); @@ -514,18 +506,19 @@ void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, unsigned Reg) // Visit all instructions reading Reg. SlotIndex LastIdx; - for (MachineOperand &MO : MRI->reg_operands(Reg)) { - MachineInstr *UseMI = MO.getParent(); - if (UseMI->isDebugValue()) + for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) { + // Skip "undef" uses. + if (!MO.readsReg()) continue; // Maybe the operand is for a subregister we don't care about. unsigned SubReg = MO.getSubReg(); if (SubReg != 0) { LaneBitmask LaneMask = TRI->getSubRegIndexLaneMask(SubReg); - if ((LaneMask & SR.LaneMask) == 0) + if ((LaneMask & SR.LaneMask).none()) continue; } // We only need to visit each instruction once. + MachineInstr *UseMI = MO.getParent(); SlotIndex Idx = getInstructionIndex(*UseMI).getRegSlot(); if (Idx == LastIdx) continue; @@ -574,11 +567,12 @@ void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, unsigned Reg) } void LiveIntervals::extendToIndices(LiveRange &LR, - ArrayRef<SlotIndex> Indices) { + ArrayRef<SlotIndex> Indices, + ArrayRef<SlotIndex> Undefs) { assert(LRCalc && "LRCalc not initialized."); LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator()); for (unsigned i = 0, e = Indices.size(); i != e; ++i) - LRCalc->extend(LR, Indices[i]); + LRCalc->extend(LR, Indices[i], /*PhysReg=*/0, Undefs); } void LiveIntervals::pruneValue(LiveRange &LR, SlotIndex Kill, @@ -605,7 +599,7 @@ void LiveIntervals::pruneValue(LiveRange &LR, SlotIndex Kill, // Find all blocks that are reachable from KillMBB without leaving VNI's live // range. It is possible that KillMBB itself is reachable, so start a DFS // from each successor. - typedef SmallPtrSet<MachineBasicBlock*, 9> VisitedTy; + typedef df_iterator_default_set<MachineBasicBlock*,9> VisitedTy; VisitedTy Visited; for (MachineBasicBlock::succ_iterator SuccI = KillMBB->succ_begin(), SuccE = KillMBB->succ_end(); @@ -724,7 +718,7 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) { LaneBitmask DefinedLanesMask; if (!SRs.empty()) { // Compute a mask of lanes that are defined. - DefinedLanesMask = 0; + DefinedLanesMask = LaneBitmask::getNone(); for (auto &SRP : SRs) { const LiveInterval::SubRange &SR = *SRP.first; LiveRange::const_iterator &I = SRP.second; @@ -737,7 +731,7 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) { DefinedLanesMask |= SR.LaneMask; } } else - DefinedLanesMask = ~0u; + DefinedLanesMask = LaneBitmask::getAll(); bool IsFullWrite = false; for (const MachineOperand &MO : MI->operands()) { @@ -746,7 +740,7 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) { if (MO.isUse()) { // Reading any undefined lanes? LaneBitmask UseMask = TRI->getSubRegIndexLaneMask(MO.getSubReg()); - if ((UseMask & ~DefinedLanesMask) != 0) + if ((UseMask & ~DefinedLanesMask).any()) goto CancelKill; } else if (MO.getSubReg() == 0) { // Writing to the full register? @@ -954,14 +948,15 @@ public: LiveInterval &LI = LIS.getInterval(Reg); if (LI.hasSubRanges()) { unsigned SubReg = MO.getSubReg(); - LaneBitmask LaneMask = TRI.getSubRegIndexLaneMask(SubReg); + LaneBitmask LaneMask = SubReg ? TRI.getSubRegIndexLaneMask(SubReg) + : MRI.getMaxLaneMaskForVReg(Reg); for (LiveInterval::SubRange &S : LI.subranges()) { - if ((S.LaneMask & LaneMask) == 0) + if ((S.LaneMask & LaneMask).none()) continue; updateRange(S, Reg, S.LaneMask); } } - updateRange(LI, Reg, 0); + updateRange(LI, Reg, LaneBitmask::getNone()); continue; } @@ -969,7 +964,7 @@ public: // precomputed live range. for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units) if (LiveRange *LR = getRegUnitLI(*Units)) - updateRange(*LR, *Units, 0); + updateRange(*LR, *Units, LaneBitmask::getNone()); } if (hasRegMask) updateRegMaskSlots(); @@ -985,7 +980,7 @@ private: dbgs() << " "; if (TargetRegisterInfo::isVirtualRegister(Reg)) { dbgs() << PrintReg(Reg); - if (LaneMask != 0) + if (LaneMask.any()) dbgs() << " L" << PrintLaneMask(LaneMask); } else { dbgs() << PrintRegUnit(Reg, &TRI); @@ -1039,6 +1034,8 @@ private: LiveRange::iterator Prev = std::prev(NewIdxIn); Prev->end = NewIdx.getRegSlot(); } + // Extend OldIdxIn. + OldIdxIn->end = Next->start; return; } @@ -1317,8 +1314,8 @@ private: if (MO.isUndef()) continue; unsigned SubReg = MO.getSubReg(); - if (SubReg != 0 && LaneMask != 0 - && (TRI.getSubRegIndexLaneMask(SubReg) & LaneMask) == 0) + if (SubReg != 0 && LaneMask.any() + && (TRI.getSubRegIndexLaneMask(SubReg) & LaneMask).none()) continue; const MachineInstr &MI = *MO.getParent(); @@ -1394,6 +1391,11 @@ void LiveIntervals::repairOldRegInRange(const MachineBasicBlock::iterator Begin, LaneBitmask LaneMask) { LiveInterval::iterator LII = LR.find(endIdx); SlotIndex lastUseIdx; + if (LII == LR.begin()) { + // This happens when the function is called for a subregister that only + // occurs _after_ the range that is to be repaired. + return; + } if (LII != LR.end() && LII->start < endIdx) lastUseIdx = LII->end; else @@ -1420,7 +1422,7 @@ void LiveIntervals::repairOldRegInRange(const MachineBasicBlock::iterator Begin, unsigned SubReg = MO.getSubReg(); LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubReg); - if ((Mask & LaneMask) == 0) + if ((Mask & LaneMask).none()) continue; if (MO.isDef()) { @@ -1538,15 +1540,19 @@ void LiveIntervals::removePhysRegDefAt(unsigned Reg, SlotIndex Pos) { } void LiveIntervals::removeVRegDefAt(LiveInterval &LI, SlotIndex Pos) { + // LI may not have the main range computed yet, but its subranges may + // be present. VNInfo *VNI = LI.getVNInfoAt(Pos); - if (VNI == nullptr) - return; - LI.removeValNo(VNI); + if (VNI != nullptr) { + assert(VNI->def.getBaseIndex() == Pos.getBaseIndex()); + LI.removeValNo(VNI); + } - // Also remove the value in subranges. + // Also remove the value defined in subranges. for (LiveInterval::SubRange &S : LI.subranges()) { if (VNInfo *SVNI = S.getVNInfoAt(Pos)) - S.removeValNo(SVNI); + if (SVNI->def.getBaseIndex() == Pos.getBaseIndex()) + S.removeValNo(SVNI); } LI.removeEmptySubRanges(); } diff --git a/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp b/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp index 025d99c..fc2f233 100644 --- a/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp +++ b/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp @@ -14,6 +14,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/LiveIntervalUnion.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SparseBitVector.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -102,9 +103,7 @@ void LiveIntervalUnion::verify(LiveVirtRegBitSet& VisitedVRegs) { // Scan the vector of interfering virtual registers in this union. Assume it's // quite small. bool LiveIntervalUnion::Query::isSeenInterference(LiveInterval *VirtReg) const { - SmallVectorImpl<LiveInterval*>::const_iterator I = - std::find(InterferingVRegs.begin(), InterferingVRegs.end(), VirtReg); - return I != InterferingVRegs.end(); + return is_contained(InterferingVRegs, VirtReg); } // Collect virtual registers in this union that interfere with this diff --git a/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp b/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp index 4e2528f..dcc41c1 100644 --- a/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp +++ b/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp @@ -49,7 +49,7 @@ void LivePhysRegs::stepBackward(const MachineInstr &MI) { if (!O->isDef()) continue; unsigned Reg = O->getReg(); - if (Reg == 0) + if (!TargetRegisterInfo::isPhysicalRegister(Reg)) continue; removeReg(Reg); } else if (O->isRegMask()) @@ -61,7 +61,7 @@ void LivePhysRegs::stepBackward(const MachineInstr &MI) { if (!O->isReg() || !O->readsReg()) continue; unsigned Reg = O->getReg(); - if (Reg == 0) + if (!TargetRegisterInfo::isPhysicalRegister(Reg)) continue; addReg(Reg); } @@ -77,7 +77,7 @@ void LivePhysRegs::stepForward(const MachineInstr &MI, for (ConstMIBundleOperands O(MI); O.isValid(); ++O) { if (O->isReg()) { unsigned Reg = O->getReg(); - if (Reg == 0) + if (!TargetRegisterInfo::isPhysicalRegister(Reg)) continue; if (O->isDef()) { // Note, dead defs are still recorded. The caller should decide how to @@ -141,9 +141,19 @@ bool LivePhysRegs::available(const MachineRegisterInfo &MRI, } /// Add live-in registers of basic block \p MBB to \p LiveRegs. -static void addLiveIns(LivePhysRegs &LiveRegs, const MachineBasicBlock &MBB) { - for (const auto &LI : MBB.liveins()) - LiveRegs.addReg(LI.PhysReg); +void LivePhysRegs::addBlockLiveIns(const MachineBasicBlock &MBB) { + for (const auto &LI : MBB.liveins()) { + MCSubRegIndexIterator S(LI.PhysReg, TRI); + if (LI.LaneMask.all() || (LI.LaneMask.any() && !S.isValid())) { + addReg(LI.PhysReg); + continue; + } + for (; S.isValid(); ++S) { + unsigned SI = S.getSubRegIndex(); + if ((LI.LaneMask & TRI->getSubRegIndexLaneMask(SI)).any()) + addReg(S.getSubReg()); + } + } } /// Add pristine registers to the given \p LiveRegs. This function removes @@ -160,12 +170,12 @@ static void addPristines(LivePhysRegs &LiveRegs, const MachineFunction &MF, void LivePhysRegs::addLiveOutsNoPristines(const MachineBasicBlock &MBB) { // To get the live-outs we simply merge the live-ins of all successors. for (const MachineBasicBlock *Succ : MBB.successors()) - ::addLiveIns(*this, *Succ); + addBlockLiveIns(*Succ); } void LivePhysRegs::addLiveOuts(const MachineBasicBlock &MBB) { const MachineFunction &MF = *MBB.getParent(); - const MachineFrameInfo &MFI = *MF.getFrameInfo(); + const MachineFrameInfo &MFI = MF.getFrameInfo(); if (MFI.isCalleeSavedInfoValid()) { if (MBB.isReturnBlock()) { // The return block has no successors whose live-ins we could merge @@ -182,8 +192,31 @@ void LivePhysRegs::addLiveOuts(const MachineBasicBlock &MBB) { void LivePhysRegs::addLiveIns(const MachineBasicBlock &MBB) { const MachineFunction &MF = *MBB.getParent(); - const MachineFrameInfo &MFI = *MF.getFrameInfo(); + const MachineFrameInfo &MFI = MF.getFrameInfo(); if (MFI.isCalleeSavedInfoValid()) addPristines(*this, MF, MFI, *TRI); - ::addLiveIns(*this, MBB); + addBlockLiveIns(MBB); +} + +void llvm::computeLiveIns(LivePhysRegs &LiveRegs, const TargetRegisterInfo &TRI, + MachineBasicBlock &MBB) { + assert(MBB.livein_empty()); + LiveRegs.init(TRI); + LiveRegs.addLiveOutsNoPristines(MBB); + for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) + LiveRegs.stepBackward(MI); + + for (unsigned Reg : LiveRegs) { + // Skip the register if we are about to add one of its super registers. + bool ContainsSuperReg = false; + for (MCSuperRegIterator SReg(Reg, &TRI); SReg.isValid(); ++SReg) { + if (LiveRegs.contains(*SReg)) { + ContainsSuperReg = true; + break; + } + } + if (ContainsSuperReg) + continue; + MBB.addLiveIn(Reg); + } } diff --git a/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp b/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp index db91ca1..0128376 100644 --- a/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp +++ b/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "LiveRangeCalc.h" +#include "llvm/ADT/SetVector.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -23,6 +24,7 @@ void LiveRangeCalc::resetLiveOutMap() { unsigned NumBlocks = MF->getNumBlockIDs(); Seen.clear(); Seen.resize(NumBlocks); + EntryInfoMap.clear(); Map.resize(NumBlocks); } @@ -64,9 +66,8 @@ void LiveRangeCalc::calculate(LiveInterval &LI, bool TrackSubRegs) { unsigned SubReg = MO.getSubReg(); if (LI.hasSubRanges() || (SubReg != 0 && TrackSubRegs)) { - LaneBitmask Mask = SubReg != 0 ? TRI.getSubRegIndexLaneMask(SubReg) - : MRI->getMaxLaneMaskForVReg(Reg); - + LaneBitmask SubMask = SubReg != 0 ? TRI.getSubRegIndexLaneMask(SubReg) + : MRI->getMaxLaneMaskForVReg(Reg); // If this is the first time we see a subregister def, initialize // subranges by creating a copy of the main range. if (!LI.hasSubRanges() && !LI.empty()) { @@ -74,17 +75,19 @@ void LiveRangeCalc::calculate(LiveInterval &LI, bool TrackSubRegs) { LI.createSubRangeFrom(*Alloc, ClassMask, LI); } + LaneBitmask Mask = SubMask; for (LiveInterval::SubRange &S : LI.subranges()) { // A Mask for subregs common to the existing subrange and current def. LaneBitmask Common = S.LaneMask & Mask; - if (Common == 0) + if (Common.none()) continue; - // A Mask for subregs covered by the subrange but not the current def. - LaneBitmask LRest = S.LaneMask & ~Mask; LiveInterval::SubRange *CommonRange; - if (LRest != 0) { - // Split current subrange into Common and LRest ranges. - S.LaneMask = LRest; + // A Mask for subregs covered by the subrange but not the current def. + LaneBitmask RM = S.LaneMask & ~Mask; + if (RM.any()) { + // Split the subrange S into two parts: one covered by the current + // def (CommonRange), and the one not affected by it (updated S). + S.LaneMask = RM; CommonRange = LI.createSubRangeFrom(*Alloc, Common, S); } else { assert(Common == S.LaneMask); @@ -95,7 +98,7 @@ void LiveRangeCalc::calculate(LiveInterval &LI, bool TrackSubRegs) { Mask &= ~Common; } // Create a new SubRange for subregs we did not cover yet. - if (Mask != 0) { + if (Mask.any()) { LiveInterval::SubRange *NewRange = LI.createSubRange(*Alloc, Mask); if (MO.isDef()) createDeadDef(*Indexes, *Alloc, *NewRange, MO); @@ -116,14 +119,15 @@ void LiveRangeCalc::calculate(LiveInterval &LI, bool TrackSubRegs) { // necessary. if (LI.hasSubRanges()) { for (LiveInterval::SubRange &S : LI.subranges()) { - resetLiveOutMap(); - extendToUses(S, Reg, S.LaneMask); + LiveRangeCalc SubLRC; + SubLRC.reset(MF, Indexes, DomTree, Alloc); + SubLRC.extendToUses(S, Reg, S.LaneMask, &LI); } LI.clear(); constructMainRangeFromSubranges(LI); } else { resetLiveOutMap(); - extendToUses(LI, Reg, ~0u); + extendToUses(LI, Reg, LaneBitmask::getAll()); } } @@ -139,9 +143,8 @@ void LiveRangeCalc::constructMainRangeFromSubranges(LiveInterval &LI) { MainRange.createDeadDef(VNI->def, *Alloc); } } - resetLiveOutMap(); - extendToUses(MainRange, LI.reg); + extendToUses(MainRange, LI.reg, LaneBitmask::getAll(), &LI); } void LiveRangeCalc::createDeadDefs(LiveRange &LR, unsigned Reg) { @@ -154,29 +157,34 @@ void LiveRangeCalc::createDeadDefs(LiveRange &LR, unsigned Reg) { } -void LiveRangeCalc::extendToUses(LiveRange &LR, unsigned Reg, - LaneBitmask Mask) { +void LiveRangeCalc::extendToUses(LiveRange &LR, unsigned Reg, LaneBitmask Mask, + LiveInterval *LI) { + SmallVector<SlotIndex, 4> Undefs; + if (LI != nullptr) + LI->computeSubRangeUndefs(Undefs, Mask, *MRI, *Indexes); + // Visit all operands that read Reg. This may include partial defs. + bool IsSubRange = !Mask.all(); const TargetRegisterInfo &TRI = *MRI->getTargetRegisterInfo(); for (MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) { // Clear all kill flags. They will be reinserted after register allocation // by LiveIntervalAnalysis::addKillFlags(). if (MO.isUse()) MO.setIsKill(false); - else { - // We only care about uses, but on the main range (mask ~0u) this includes - // the "virtual" reads happening for subregister defs. - if (Mask != ~0u) - continue; - } - - if (!MO.readsReg()) + // MO::readsReg returns "true" for subregister defs. This is for keeping + // liveness of the entire register (i.e. for the main range of the live + // interval). For subranges, definitions of non-overlapping subregisters + // do not count as uses. + if (!MO.readsReg() || (IsSubRange && MO.isDef())) continue; + unsigned SubReg = MO.getSubReg(); if (SubReg != 0) { - LaneBitmask SubRegMask = TRI.getSubRegIndexLaneMask(SubReg); - // Ignore uses not covering the current subrange. - if ((SubRegMask & Mask) == 0) + LaneBitmask SLM = TRI.getSubRegIndexLaneMask(SubReg); + if (MO.isDef()) + SLM = ~SLM; + // Ignore uses not reading the current (sub)range. + if ((SLM & Mask).none()) continue; } @@ -205,7 +213,7 @@ void LiveRangeCalc::extendToUses(LiveRange &LR, unsigned Reg, // MI is reading Reg. We may have visited MI before if it happens to be // reading Reg multiple times. That is OK, extend() is idempotent. - extend(LR, UseIdx, Reg); + extend(LR, UseIdx, Reg, Undefs); } } @@ -235,8 +243,8 @@ void LiveRangeCalc::updateFromLiveIns() { LiveIn.clear(); } - -void LiveRangeCalc::extend(LiveRange &LR, SlotIndex Use, unsigned PhysReg) { +void LiveRangeCalc::extend(LiveRange &LR, SlotIndex Use, unsigned PhysReg, + ArrayRef<SlotIndex> Undefs) { assert(Use.isValid() && "Invalid SlotIndex"); assert(Indexes && "Missing SlotIndexes"); assert(DomTree && "Missing dominator tree"); @@ -245,14 +253,15 @@ void LiveRangeCalc::extend(LiveRange &LR, SlotIndex Use, unsigned PhysReg) { assert(UseMBB && "No MBB at Use"); // Is there a def in the same MBB we can extend? - if (LR.extendInBlock(Indexes->getMBBStartIdx(UseMBB), Use)) + auto EP = LR.extendInBlock(Undefs, Indexes->getMBBStartIdx(UseMBB), Use); + if (EP.first != nullptr || EP.second) return; // Find the single reaching def, or determine if Use is jointly dominated by // multiple values, and we may need to create even more phi-defs to preserve // VNInfo SSA form. Perform a search for all predecessor blocks where we // know the dominating VNInfo. - if (findReachingDefs(LR, *UseMBB, Use, PhysReg)) + if (findReachingDefs(LR, *UseMBB, Use, PhysReg, Undefs)) return; // When there were multiple different values, we may need new PHIs. @@ -271,8 +280,72 @@ void LiveRangeCalc::calculateValues() { } +bool LiveRangeCalc::isDefOnEntry(LiveRange &LR, ArrayRef<SlotIndex> Undefs, + MachineBasicBlock &MBB, BitVector &DefOnEntry, + BitVector &UndefOnEntry) { + unsigned BN = MBB.getNumber(); + if (DefOnEntry[BN]) + return true; + if (UndefOnEntry[BN]) + return false; + + auto MarkDefined = + [this,BN,&DefOnEntry,&UndefOnEntry] (MachineBasicBlock &B) -> bool { + for (MachineBasicBlock *S : B.successors()) + DefOnEntry[S->getNumber()] = true; + DefOnEntry[BN] = true; + return true; + }; + + SetVector<unsigned> WorkList; + // Checking if the entry of MBB is reached by some def: add all predecessors + // that are potentially defined-on-exit to the work list. + for (MachineBasicBlock *P : MBB.predecessors()) + WorkList.insert(P->getNumber()); + + for (unsigned i = 0; i != WorkList.size(); ++i) { + // Determine if the exit from the block is reached by some def. + unsigned N = WorkList[i]; + MachineBasicBlock &B = *MF->getBlockNumbered(N); + if (Seen[N] && Map[&B].first != nullptr) + return MarkDefined(B); + SlotIndex Begin, End; + std::tie(Begin, End) = Indexes->getMBBRange(&B); + LiveRange::iterator UB = std::upper_bound(LR.begin(), LR.end(), End); + if (UB != LR.begin()) { + LiveRange::Segment &Seg = *std::prev(UB); + if (Seg.end > Begin) { + // There is a segment that overlaps B. If the range is not explicitly + // undefined between the end of the segment and the end of the block, + // treat the block as defined on exit. If it is, go to the next block + // on the work list. + if (LR.isUndefIn(Undefs, Seg.end, End)) + continue; + return MarkDefined(B); + } + } + + // No segment overlaps with this block. If this block is not defined on + // entry, or it undefines the range, do not process its predecessors. + if (UndefOnEntry[N] || LR.isUndefIn(Undefs, Begin, End)) { + UndefOnEntry[N] = true; + continue; + } + if (DefOnEntry[N]) + return MarkDefined(B); + + // Still don't know: add all predecessors to the work list. + for (MachineBasicBlock *P : B.predecessors()) + WorkList.insert(P->getNumber()); + } + + UndefOnEntry[BN] = true; + return false; +} + bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB, - SlotIndex Use, unsigned PhysReg) { + SlotIndex Use, unsigned PhysReg, + ArrayRef<SlotIndex> Undefs) { unsigned UseMBBNum = UseMBB.getNumber(); // Block numbers where LR should be live-in. @@ -282,6 +355,8 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB, bool UniqueVNI = true; VNInfo *TheVNI = nullptr; + bool FoundUndef = false; + // Using Seen as a visited set, perform a BFS for all reaching defs. for (unsigned i = 0; i != WorkList.size(); ++i) { MachineBasicBlock *MBB = MF->getBlockNumbered(WorkList[i]); @@ -294,18 +369,20 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB, const MachineInstr *MI = Indexes->getInstructionFromIndex(Use); if (MI != nullptr) errs() << Use << " " << *MI; - llvm_unreachable("Use not jointly dominated by defs."); + report_fatal_error("Use not jointly dominated by defs."); } if (TargetRegisterInfo::isPhysicalRegister(PhysReg) && !MBB->isLiveIn(PhysReg)) { MBB->getParent()->verify(); - errs() << "The register " << PrintReg(PhysReg) + const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo(); + errs() << "The register " << PrintReg(PhysReg, TRI) << " needs to be live in to BB#" << MBB->getNumber() << ", but is missing from the live-in list.\n"; - llvm_unreachable("Invalid global physical register"); + report_fatal_error("Invalid global physical register"); } #endif + FoundUndef |= MBB->pred_empty(); for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), PE = MBB->pred_end(); PI != PE; ++PI) { @@ -326,18 +403,21 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB, // First time we see Pred. Try to determine the live-out value, but set // it as null if Pred is live-through with an unknown value. - VNInfo *VNI = LR.extendInBlock(Start, End); + auto EP = LR.extendInBlock(Undefs, Start, End); + VNInfo *VNI = EP.first; + FoundUndef |= EP.second; setLiveOutValue(Pred, VNI); if (VNI) { if (TheVNI && TheVNI != VNI) UniqueVNI = false; TheVNI = VNI; - continue; } + if (VNI || EP.second) + continue; // No, we need a live-in value for Pred as well if (Pred != &UseMBB) - WorkList.push_back(Pred->getNumber()); + WorkList.push_back(Pred->getNumber()); else // Loopback to UseMBB, so value is really live through. Use = SlotIndex(); @@ -345,6 +425,9 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB, } LiveIn.clear(); + FoundUndef |= (TheVNI == nullptr); + if (Undefs.size() > 0 && FoundUndef) + UniqueVNI = false; // Both updateSSA() and LiveRangeUpdater benefit from ordered blocks, but // neither require it. Skip the sorting overhead for small updates. @@ -353,27 +436,39 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB, // If a unique reaching def was found, blit in the live ranges immediately. if (UniqueVNI) { + assert(TheVNI != nullptr); LiveRangeUpdater Updater(&LR); - for (SmallVectorImpl<unsigned>::const_iterator I = WorkList.begin(), - E = WorkList.end(); I != E; ++I) { - SlotIndex Start, End; - std::tie(Start, End) = Indexes->getMBBRange(*I); - // Trim the live range in UseMBB. - if (*I == UseMBBNum && Use.isValid()) - End = Use; - else - Map[MF->getBlockNumbered(*I)] = LiveOutPair(TheVNI, nullptr); - Updater.add(Start, End, TheVNI); + for (unsigned BN : WorkList) { + SlotIndex Start, End; + std::tie(Start, End) = Indexes->getMBBRange(BN); + // Trim the live range in UseMBB. + if (BN == UseMBBNum && Use.isValid()) + End = Use; + else + Map[MF->getBlockNumbered(BN)] = LiveOutPair(TheVNI, nullptr); + Updater.add(Start, End, TheVNI); } return true; } + // Prepare the defined/undefined bit vectors. + auto EF = EntryInfoMap.find(&LR); + if (EF == EntryInfoMap.end()) { + unsigned N = MF->getNumBlockIDs(); + EF = EntryInfoMap.insert({&LR, {BitVector(), BitVector()}}).first; + EF->second.first.resize(N); + EF->second.second.resize(N); + } + BitVector &DefOnEntry = EF->second.first; + BitVector &UndefOnEntry = EF->second.second; + // Multiple values were found, so transfer the work list to the LiveIn array // where UpdateSSA will use it as a work list. LiveIn.reserve(WorkList.size()); - for (SmallVectorImpl<unsigned>::const_iterator - I = WorkList.begin(), E = WorkList.end(); I != E; ++I) { - MachineBasicBlock *MBB = MF->getBlockNumbered(*I); + for (unsigned BN : WorkList) { + MachineBasicBlock *MBB = MF->getBlockNumbered(BN); + if (Undefs.size() > 0 && !isDefOnEntry(LR, Undefs, *MBB, DefOnEntry, UndefOnEntry)) + continue; addLiveInBlock(LR, DomTree->getNode(MBB)); if (MBB == &UseMBB) LiveIn.back().Kill = Use; @@ -458,10 +553,12 @@ void LiveRangeCalc::updateSSA() { I.DomNode = nullptr; // Add liveness since updateFromLiveIns now skips this node. - if (I.Kill.isValid()) - LR.addSegment(LiveInterval::Segment(Start, I.Kill, VNI)); - else { - LR.addSegment(LiveInterval::Segment(Start, End, VNI)); + if (I.Kill.isValid()) { + if (VNI) + LR.addSegment(LiveInterval::Segment(Start, I.Kill, VNI)); + } else { + if (VNI) + LR.addSegment(LiveInterval::Segment(Start, End, VNI)); LOP = LiveOutPair(VNI, Node); } } else if (IDomValue.first) { diff --git a/contrib/llvm/lib/CodeGen/LiveRangeCalc.h b/contrib/llvm/lib/CodeGen/LiveRangeCalc.h index 9de48b7..1a7598f 100644 --- a/contrib/llvm/lib/CodeGen/LiveRangeCalc.h +++ b/contrib/llvm/lib/CodeGen/LiveRangeCalc.h @@ -22,6 +22,7 @@ #ifndef LLVM_LIB_CODEGEN_LIVERANGECALC_H #define LLVM_LIB_CODEGEN_LIVERANGECALC_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/IndexedMap.h" #include "llvm/CodeGen/LiveInterval.h" @@ -53,6 +54,19 @@ class LiveRangeCalc { /// when switching live ranges. BitVector Seen; + /// Map LiveRange to sets of blocks (represented by bit vectors) that + /// in the live range are defined on entry and undefined on entry. + /// A block is defined on entry if there is a path from at least one of + /// the defs in the live range to the entry of the block, and conversely, + /// a block is undefined on entry, if there is no such path (i.e. no + /// definition reaches the entry of the block). A single LiveRangeCalc + /// object is used to track live-out information for multiple registers + /// in live range splitting (which is ok, since the live ranges of these + /// registers do not overlap), but the defined/undefined information must + /// be kept separate for each individual range. + /// By convention, EntryInfoMap[&LR] = { Defined, Undefined }. + std::map<LiveRange*,std::pair<BitVector,BitVector>> EntryInfoMap; + /// Map each basic block where a live range is live out to the live-out value /// and its defining block. /// @@ -101,18 +115,31 @@ class LiveRangeCalc { /// used to add entries directly. SmallVector<LiveInBlock, 16> LiveIn; - /// Assuming that @p LR is live-in to @p UseMBB, find the set of defs that can - /// reach it. + /// Check if the entry to block @p MBB can be reached by any of the defs + /// in @p LR. Return true if none of the defs reach the entry to @p MBB. + bool isDefOnEntry(LiveRange &LR, ArrayRef<SlotIndex> Undefs, + MachineBasicBlock &MBB, BitVector &DefOnEntry, + BitVector &UndefOnEntry); + + /// Find the set of defs that can reach @p Kill. @p Kill must belong to + /// @p UseMBB. /// - /// If only one def can reach @p UseMBB, all paths from the def to @p UseMBB - /// are added to @p LR, and the function returns true. + /// If exactly one def can reach @p UseMBB, and the def dominates @p Kill, + /// all paths from the def to @p UseMBB are added to @p LR, and the function + /// returns true. /// /// If multiple values can reach @p UseMBB, the blocks that need @p LR to be /// live in are added to the LiveIn array, and the function returns false. /// + /// The array @p Undef provides the locations where the range @p LR becomes + /// undefined by <def,read-undef> operands on other subranges. If @p Undef + /// is non-empty and @p Kill is jointly dominated only by the entries of + /// @p Undef, the function returns false. + /// /// PhysReg, when set, is used to verify live-in lists on basic blocks. bool findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB, - SlotIndex Kill, unsigned PhysReg); + SlotIndex Kill, unsigned PhysReg, + ArrayRef<SlotIndex> Undefs); /// updateSSA - Compute the values that will be live in to all requested /// blocks in LiveIn. Create PHI-def values as required to preserve SSA form. @@ -127,9 +154,16 @@ class LiveRangeCalc { /// Extend the live range of @p LR to reach all uses of Reg. /// - /// All uses must be jointly dominated by existing liveness. PHI-defs are - /// inserted as needed to preserve SSA form. - void extendToUses(LiveRange &LR, unsigned Reg, LaneBitmask LaneMask); + /// If @p LR is a main range, or if @p LI is null, then all uses must be + /// jointly dominated by the definitions from @p LR. If @p LR is a subrange + /// of the live interval @p LI, corresponding to lane mask @p LaneMask, + /// all uses must be jointly dominated by the definitions from @p LR + /// together with definitions of other lanes where @p LR becomes undefined + /// (via <def,read-undef> operands). + /// If @p LR is a main range, the @p LaneMask should be set to ~0, i.e. + /// LaneBitmask::getAll(). + void extendToUses(LiveRange &LR, unsigned Reg, LaneBitmask LaneMask, + LiveInterval *LI = nullptr); /// Reset Map and Seen fields. void resetLiveOutMap(); @@ -169,7 +203,8 @@ public: /// inserted as required to preserve SSA form. /// /// PhysReg, when set, is used to verify live-in lists on basic blocks. - void extend(LiveRange &LR, SlotIndex Use, unsigned PhysReg = 0); + void extend(LiveRange &LR, SlotIndex Use, unsigned PhysReg, + ArrayRef<SlotIndex> Undefs); /// createDeadDefs - Create a dead def in LI for every def operand of Reg. /// Each instruction defining Reg gets a new VNInfo with a corresponding @@ -181,7 +216,7 @@ public: /// All uses must be jointly dominated by existing liveness. PHI-defs are /// inserted as needed to preserve SSA form. void extendToUses(LiveRange &LR, unsigned PhysReg) { - extendToUses(LR, PhysReg, ~0u); + extendToUses(LR, PhysReg, LaneBitmask::getAll()); } /// Calculates liveness for the register specified in live interval @p LI. diff --git a/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp b/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp index b35c0ad..7f1c69c 100644 --- a/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp +++ b/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp @@ -37,6 +37,13 @@ LiveInterval &LiveRangeEdit::createEmptyIntervalFrom(unsigned OldReg) { VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg)); } LiveInterval &LI = LIS.createEmptyInterval(VReg); + // Create empty subranges if the OldReg's interval has them. Do not create + // the main range here---it will be constructed later after the subranges + // have been finalized. + LiveInterval &OldLI = LIS.getInterval(OldReg); + VNInfo::Allocator &Alloc = LIS.getVNInfoAllocator(); + for (LiveInterval::SubRange &S : OldLI.subranges()) + LI.createSubRange(Alloc, S.LaneMask); return LI; } @@ -66,6 +73,8 @@ void LiveRangeEdit::scanRemattable(AliasAnalysis *aa) { unsigned Original = VRM->getOriginal(getReg()); LiveInterval &OrigLI = LIS.getInterval(Original); VNInfo *OrigVNI = OrigLI.getVNInfoAt(VNI->def); + if (!OrigVNI) + continue; MachineInstr *DefMI = LIS.getInstructionFromIndex(OrigVNI->def); if (!DefMI) continue; @@ -94,7 +103,7 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI, // We can't remat physreg uses, unless it is a constant. if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { - if (MRI.isConstantPhysReg(MO.getReg(), *OrigMI->getParent()->getParent())) + if (MRI.isConstantPhysReg(MO.getReg())) continue; return false; } @@ -227,7 +236,7 @@ bool LiveRangeEdit::useIsKill(const LiveInterval &LI, unsigned SubReg = MO.getSubReg(); LaneBitmask LaneMask = TRI.getSubRegIndexLaneMask(SubReg); for (const LiveInterval::SubRange &S : LI.subranges()) { - if ((S.LaneMask & LaneMask) != 0 && S.Query(Idx).isKill()) + if ((S.LaneMask & LaneMask).any() && S.Query(Idx).isKill()) return true; } return false; @@ -263,7 +272,11 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink, bool ReadsPhysRegs = false; bool isOrigDef = false; unsigned Dest; - if (VRM && MI->getOperand(0).isReg()) { + // Only optimize rematerialize case when the instruction has one def, since + // otherwise we could leave some dead defs in the code. This case is + // extremely rare. + if (VRM && MI->getOperand(0).isReg() && MI->getOperand(0).isDef() && + MI->getDesc().getNumDefs() == 1) { Dest = MI->getOperand(0).getReg(); unsigned Original = VRM->getOriginal(Dest); LiveInterval &OrigLI = LIS.getInterval(Original); @@ -335,6 +348,7 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink, // allocations of the func are done. if (isOrigDef && DeadRemats && TII.isTriviallyReMaterializable(*MI, AA)) { LiveInterval &NewLI = createEmptyIntervalFrom(Dest); + NewLI.removeEmptySubRanges(); VNInfo *VNI = NewLI.getNextValue(Idx, LIS.getVNInfoAllocator()); NewLI.addSegment(LiveInterval::Segment(Idx, Idx.getDeadSlot(), VNI)); pop_back(); @@ -428,6 +442,9 @@ LiveRangeEdit::MRI_NoteNewVirtualRegister(unsigned VReg) if (VRM) VRM->grow(); + if (Parent && !Parent->isSpillable()) + LIS.getInterval(VReg).markNotSpillable(); + NewRegs.push_back(VReg); } diff --git a/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp b/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp index 7ee87c1..7a51386 100644 --- a/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp +++ b/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp @@ -70,15 +70,16 @@ void LiveRegMatrix::releaseMemory() { } } -template<typename Callable> -bool foreachUnit(const TargetRegisterInfo *TRI, LiveInterval &VRegInterval, - unsigned PhysReg, Callable Func) { +template <typename Callable> +static bool foreachUnit(const TargetRegisterInfo *TRI, + LiveInterval &VRegInterval, unsigned PhysReg, + Callable Func) { if (VRegInterval.hasSubRanges()) { for (MCRegUnitMaskIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { unsigned Unit = (*Units).first; LaneBitmask Mask = (*Units).second; for (LiveInterval::SubRange &S : VRegInterval.subranges()) { - if (S.LaneMask & Mask) { + if ((S.LaneMask & Mask).any()) { if (Func(Unit, S)) return true; break; diff --git a/contrib/llvm/lib/CodeGen/LiveVariables.cpp b/contrib/llvm/lib/CodeGen/LiveVariables.cpp index dd87216..269b990a31 100644 --- a/contrib/llvm/lib/CodeGen/LiveVariables.cpp +++ b/contrib/llvm/lib/CodeGen/LiveVariables.cpp @@ -643,7 +643,7 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { // register before its uses due to dominance properties of SSA (except for PHI // nodes, which are treated as a special case). MachineBasicBlock *Entry = &MF->front(); - SmallPtrSet<MachineBasicBlock*,16> Visited; + df_iterator_default_set<MachineBasicBlock*,16> Visited; for (MachineBasicBlock *MBB : depth_first_ext(Entry, Visited)) { runOnBlock(MBB, NumRegs); diff --git a/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp index af7392f..e189fb0 100644 --- a/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp +++ b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp @@ -51,12 +51,21 @@ namespace { MachineBasicBlock::iterator MI; // Instr referencing the frame int64_t LocalOffset; // Local offset of the frame idx referenced int FrameIdx; // The frame index + + // Order reference instruction appears in program. Used to ensure + // deterministic order when multiple instructions may reference the same + // location. + unsigned Order; + public: - FrameRef(MachineBasicBlock::iterator I, int64_t Offset, int Idx) : - MI(I), LocalOffset(Offset), FrameIdx(Idx) {} + FrameRef(MachineInstr *I, int64_t Offset, int Idx, unsigned Ord) : + MI(I), LocalOffset(Offset), FrameIdx(Idx), Order(Ord) {} + bool operator<(const FrameRef &RHS) const { - return LocalOffset < RHS.LocalOffset; + return std::tie(LocalOffset, FrameIdx, Order) < + std::tie(RHS.LocalOffset, RHS.FrameIdx, RHS.Order); } + MachineBasicBlock::iterator getMachineInstr() const { return MI; } int64_t getLocalOffset() const { return LocalOffset; } int getFrameIndex() const { return FrameIdx; } @@ -67,17 +76,17 @@ namespace { /// StackObjSet - A set of stack object indexes typedef SmallSetVector<int, 8> StackObjSet; - void AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx, int64_t &Offset, + void AdjustStackOffset(MachineFrameInfo &MFI, int FrameIdx, int64_t &Offset, bool StackGrowsDown, unsigned &MaxAlign); void AssignProtectedObjSet(const StackObjSet &UnassignedObjs, SmallSet<int, 16> &ProtectedObjs, - MachineFrameInfo *MFI, bool StackGrowsDown, + MachineFrameInfo &MFI, bool StackGrowsDown, int64_t &Offset, unsigned &MaxAlign); void calculateFrameObjectOffsets(MachineFunction &Fn); bool insertFrameReferenceRegisters(MachineFunction &Fn); public: static char ID; // Pass identification, replacement for typeid - explicit LocalStackSlotPass() : MachineFunctionPass(ID) { + explicit LocalStackSlotPass() : MachineFunctionPass(ID) { initializeLocalStackSlotPassPass(*PassRegistry::getPassRegistry()); } bool runOnMachineFunction(MachineFunction &MF) override; @@ -102,9 +111,9 @@ INITIALIZE_PASS_END(LocalStackSlotPass, "localstackalloc", bool LocalStackSlotPass::runOnMachineFunction(MachineFunction &MF) { - MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); - unsigned LocalObjectCount = MFI->getObjectIndexEnd(); + unsigned LocalObjectCount = MFI.getObjectIndexEnd(); // If the target doesn't want/need this pass, or if there are no locals // to consider, early exit. @@ -112,7 +121,7 @@ bool LocalStackSlotPass::runOnMachineFunction(MachineFunction &MF) { return true; // Make sure we have enough space to store the local offsets. - LocalOffsets.resize(MFI->getObjectIndexEnd()); + LocalOffsets.resize(MFI.getObjectIndexEnd()); // Lay out the local blob. calculateFrameObjectOffsets(MF); @@ -125,21 +134,21 @@ bool LocalStackSlotPass::runOnMachineFunction(MachineFunction &MF) { // Otherwise, PEI can do a bit better job of getting the alignment right // without a hole at the start since it knows the alignment of the stack // at the start of local allocation, and this pass doesn't. - MFI->setUseLocalStackAllocationBlock(UsedBaseRegs); + MFI.setUseLocalStackAllocationBlock(UsedBaseRegs); return true; } /// AdjustStackOffset - Helper function used to adjust the stack frame offset. -void LocalStackSlotPass::AdjustStackOffset(MachineFrameInfo *MFI, +void LocalStackSlotPass::AdjustStackOffset(MachineFrameInfo &MFI, int FrameIdx, int64_t &Offset, bool StackGrowsDown, unsigned &MaxAlign) { // If the stack grows down, add the object size to find the lowest address. if (StackGrowsDown) - Offset += MFI->getObjectSize(FrameIdx); + Offset += MFI.getObjectSize(FrameIdx); - unsigned Align = MFI->getObjectAlignment(FrameIdx); + unsigned Align = MFI.getObjectAlignment(FrameIdx); // If the alignment of this object is greater than that of the stack, then // increase the stack alignment to match. @@ -154,10 +163,10 @@ void LocalStackSlotPass::AdjustStackOffset(MachineFrameInfo *MFI, // Keep the offset available for base register allocation LocalOffsets[FrameIdx] = LocalOffset; // And tell MFI about it for PEI to use later - MFI->mapLocalFrameObject(FrameIdx, LocalOffset); + MFI.mapLocalFrameObject(FrameIdx, LocalOffset); if (!StackGrowsDown) - Offset += MFI->getObjectSize(FrameIdx); + Offset += MFI.getObjectSize(FrameIdx); ++NumAllocations; } @@ -166,7 +175,7 @@ void LocalStackSlotPass::AdjustStackOffset(MachineFrameInfo *MFI, /// those required to be close to the Stack Protector) to stack offsets. void LocalStackSlotPass::AssignProtectedObjSet(const StackObjSet &UnassignedObjs, SmallSet<int, 16> &ProtectedObjs, - MachineFrameInfo *MFI, + MachineFrameInfo &MFI, bool StackGrowsDown, int64_t &Offset, unsigned &MaxAlign) { @@ -183,7 +192,7 @@ void LocalStackSlotPass::AssignProtectedObjSet(const StackObjSet &UnassignedObjs /// void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) { // Loop over all of the stack objects, assigning sequential addresses... - MachineFrameInfo *MFI = Fn.getFrameInfo(); + MachineFrameInfo &MFI = Fn.getFrameInfo(); const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering(); bool StackGrowsDown = TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; @@ -194,22 +203,22 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) { // Make sure that the stack protector comes before the local variables on the // stack. SmallSet<int, 16> ProtectedObjs; - if (MFI->getStackProtectorIndex() >= 0) { + if (MFI.getStackProtectorIndex() >= 0) { StackObjSet LargeArrayObjs; StackObjSet SmallArrayObjs; StackObjSet AddrOfObjs; - AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), Offset, + AdjustStackOffset(MFI, MFI.getStackProtectorIndex(), Offset, StackGrowsDown, MaxAlign); // Assign large stack objects first. - for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { - if (MFI->isDeadObjectIndex(i)) + for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) { + if (MFI.isDeadObjectIndex(i)) continue; - if (MFI->getStackProtectorIndex() == (int)i) + if (MFI.getStackProtectorIndex() == (int)i) continue; - switch (SP->getSSPLayout(MFI->getObjectAllocation(i))) { + switch (SP->getSSPLayout(MFI.getObjectAllocation(i))) { case StackProtector::SSPLK_None: continue; case StackProtector::SSPLK_SmallArray: @@ -235,10 +244,10 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) { // Then assign frame offsets to stack objects that are not used to spill // callee saved registers. - for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { - if (MFI->isDeadObjectIndex(i)) + for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) { + if (MFI.isDeadObjectIndex(i)) continue; - if (MFI->getStackProtectorIndex() == (int)i) + if (MFI.getStackProtectorIndex() == (int)i) continue; if (ProtectedObjs.count(i)) continue; @@ -247,8 +256,8 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) { } // Remember how big this blob of stack space is - MFI->setLocalFrameSize(Offset); - MFI->setLocalFrameMaxAlign(MaxAlign); + MFI.setLocalFrameSize(Offset); + MFI.setLocalFrameMaxAlign(MaxAlign); } static inline bool @@ -273,7 +282,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { // and ask the target to create a defining instruction for it. bool UsedBaseReg = false; - MachineFrameInfo *MFI = Fn.getFrameInfo(); + MachineFrameInfo &MFI = Fn.getFrameInfo(); const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo(); const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering(); bool StackGrowsDown = @@ -285,6 +294,8 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { // choose the first one). SmallVector<FrameRef, 64> FrameReferenceInsns; + unsigned Order = 0; + for (MachineBasicBlock &BB : Fn) { for (MachineInstr &MI : BB) { // Debug value, stackmap and patchpoint instructions can't be out of @@ -305,21 +316,22 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { // an object allocated in the local block. if (MI.getOperand(i).isFI()) { // Don't try this with values not in the local block. - if (!MFI->isObjectPreAllocated(MI.getOperand(i).getIndex())) + if (!MFI.isObjectPreAllocated(MI.getOperand(i).getIndex())) break; int Idx = MI.getOperand(i).getIndex(); int64_t LocalOffset = LocalOffsets[Idx]; if (!TRI->needsFrameBaseReg(&MI, LocalOffset)) break; - FrameReferenceInsns.push_back(FrameRef(&MI, LocalOffset, Idx)); + FrameReferenceInsns.push_back(FrameRef(&MI, LocalOffset, Idx, Order++)); break; } } } } - // Sort the frame references by local offset - array_pod_sort(FrameReferenceInsns.begin(), FrameReferenceInsns.end()); + // Sort the frame references by local offset. + // Use frame index as a tie-breaker in case MI's have the same offset. + std::sort(FrameReferenceInsns.begin(), FrameReferenceInsns.end()); MachineBasicBlock *Entry = &Fn.front(); @@ -332,7 +344,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { MachineInstr &MI = *FR.getMachineInstr(); int64_t LocalOffset = FR.getLocalOffset(); int FrameIdx = FR.getFrameIndex(); - assert(MFI->isObjectPreAllocated(FrameIdx) && + assert(MFI.isObjectPreAllocated(FrameIdx) && "Only pre-allocated locals expected!"); DEBUG(dbgs() << "Considering: " << MI); @@ -349,7 +361,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { assert(idx < MI.getNumOperands() && "Cannot find FI operand"); int64_t Offset = 0; - int64_t FrameSizeAdjust = StackGrowsDown ? MFI->getLocalFrameSize() : 0; + int64_t FrameSizeAdjust = StackGrowsDown ? MFI.getLocalFrameSize() : 0; DEBUG(dbgs() << " Replacing FI in: " << MI); diff --git a/contrib/llvm/lib/CodeGen/LowLevelType.cpp b/contrib/llvm/lib/CodeGen/LowLevelType.cpp new file mode 100644 index 0000000..d74b730 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/LowLevelType.cpp @@ -0,0 +1,71 @@ +//===-- llvm/CodeGen/GlobalISel/LowLevelType.cpp --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file This file implements the more header-heavy bits of the LLT class to +/// avoid polluting users' namespaces. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/LowLevelType.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +LLT::LLT(Type &Ty, const DataLayout &DL) { + if (auto VTy = dyn_cast<VectorType>(&Ty)) { + SizeInBits = VTy->getElementType()->getPrimitiveSizeInBits(); + ElementsOrAddrSpace = VTy->getNumElements(); + Kind = ElementsOrAddrSpace == 1 ? Scalar : Vector; + } else if (auto PTy = dyn_cast<PointerType>(&Ty)) { + Kind = Pointer; + SizeInBits = DL.getTypeSizeInBits(&Ty); + ElementsOrAddrSpace = PTy->getAddressSpace(); + } else if (Ty.isSized()) { + // Aggregates are no different from real scalars as far as GlobalISel is + // concerned. + Kind = Scalar; + SizeInBits = DL.getTypeSizeInBits(&Ty); + ElementsOrAddrSpace = 1; + assert(SizeInBits != 0 && "invalid zero-sized type"); + } else { + Kind = Invalid; + SizeInBits = ElementsOrAddrSpace = 0; + } +} + +LLT::LLT(MVT VT) { + if (VT.isVector()) { + SizeInBits = VT.getVectorElementType().getSizeInBits(); + ElementsOrAddrSpace = VT.getVectorNumElements(); + Kind = ElementsOrAddrSpace == 1 ? Scalar : Vector; + } else if (VT.isValid()) { + // Aggregates are no different from real scalars as far as GlobalISel is + // concerned. + Kind = Scalar; + SizeInBits = VT.getSizeInBits(); + ElementsOrAddrSpace = 1; + assert(SizeInBits != 0 && "invalid zero-sized type"); + } else { + Kind = Invalid; + SizeInBits = ElementsOrAddrSpace = 0; + } +} + +void LLT::print(raw_ostream &OS) const { + if (isVector()) + OS << "<" << ElementsOrAddrSpace << " x s" << SizeInBits << ">"; + else if (isPointer()) + OS << "p" << getAddressSpace(); + else if (isValid()) { + assert(isScalar() && "unexpected type"); + OS << "s" << getScalarSizeInBits(); + } else + llvm_unreachable("trying to print an invalid type"); +} diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp index 6e3de52..1f1ce6e 100644 --- a/contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp +++ b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp @@ -173,14 +173,20 @@ static Cursor lexName(Cursor C, MIToken &Token, MIToken::TokenKind Type, return C; } -static Cursor maybeLexIntegerType(Cursor C, MIToken &Token) { - if (C.peek() != 'i' || !isdigit(C.peek(1))) +static Cursor maybeLexIntegerOrScalarType(Cursor C, MIToken &Token) { + if ((C.peek() != 'i' && C.peek() != 's' && C.peek() != 'p') || + !isdigit(C.peek(1))) return None; + char Kind = C.peek(); auto Range = C; - C.advance(); // Skip 'i' + C.advance(); // Skip 'i', 's', or 'p' while (isdigit(C.peek())) C.advance(); - Token.reset(MIToken::IntegerType, Range.upto(C)); + + Token.reset(Kind == 'i' + ? MIToken::IntegerType + : (Kind == 's' ? MIToken::ScalarType : MIToken::PointerType), + Range.upto(C)); return C; } @@ -199,12 +205,13 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { .Case("tied-def", MIToken::kw_tied_def) .Case("frame-setup", MIToken::kw_frame_setup) .Case("debug-location", MIToken::kw_debug_location) - .Case(".cfi_same_value", MIToken::kw_cfi_same_value) - .Case(".cfi_offset", MIToken::kw_cfi_offset) - .Case(".cfi_def_cfa_register", MIToken::kw_cfi_def_cfa_register) - .Case(".cfi_def_cfa_offset", MIToken::kw_cfi_def_cfa_offset) - .Case(".cfi_def_cfa", MIToken::kw_cfi_def_cfa) + .Case("same_value", MIToken::kw_cfi_same_value) + .Case("offset", MIToken::kw_cfi_offset) + .Case("def_cfa_register", MIToken::kw_cfi_def_cfa_register) + .Case("def_cfa_offset", MIToken::kw_cfi_def_cfa_offset) + .Case("def_cfa", MIToken::kw_cfi_def_cfa) .Case("blockaddress", MIToken::kw_blockaddress) + .Case("intrinsic", MIToken::kw_intrinsic) .Case("target-index", MIToken::kw_target_index) .Case("half", MIToken::kw_half) .Case("float", MIToken::kw_float) @@ -215,6 +222,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { .Case("target-flags", MIToken::kw_target_flags) .Case("volatile", MIToken::kw_volatile) .Case("non-temporal", MIToken::kw_non_temporal) + .Case("dereferenceable", MIToken::kw_dereferenceable) .Case("invariant", MIToken::kw_invariant) .Case("align", MIToken::kw_align) .Case("stack", MIToken::kw_stack) @@ -227,11 +235,13 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { .Case("landing-pad", MIToken::kw_landing_pad) .Case("liveins", MIToken::kw_liveins) .Case("successors", MIToken::kw_successors) + .Case("floatpred", MIToken::kw_floatpred) + .Case("intpred", MIToken::kw_intpred) .Default(MIToken::Identifier); } static Cursor maybeLexIdentifier(Cursor C, MIToken &Token) { - if (!isalpha(C.peek()) && C.peek() != '_' && C.peek() != '.') + if (!isalpha(C.peek()) && C.peek() != '_') return None; auto Range = C; while (isIdentifierChar(C.peek())) @@ -366,6 +376,11 @@ static Cursor lexVirtualRegister(Cursor C, MIToken &Token) { return C; } +/// Returns true for a character allowed in a register name. +static bool isRegisterChar(char C) { + return isIdentifierChar(C) && C != '.'; +} + static Cursor maybeLexRegister(Cursor C, MIToken &Token) { if (C.peek() != '%') return None; @@ -373,7 +388,7 @@ static Cursor maybeLexRegister(Cursor C, MIToken &Token) { return lexVirtualRegister(C, Token); auto Range = C; C.advance(); // Skip '%' - while (isIdentifierChar(C.peek())) + while (isRegisterChar(C.peek())) C.advance(); Token.reset(MIToken::NamedRegister, Range.upto(C)) .setStringValue(Range.upto(C).drop_front(1)); // Drop the '%' @@ -409,19 +424,6 @@ static bool isValidHexFloatingPointPrefix(char C) { return C == 'H' || C == 'K' || C == 'L' || C == 'M'; } -static Cursor maybeLexHexFloatingPointLiteral(Cursor C, MIToken &Token) { - if (C.peek() != '0' || C.peek(1) != 'x') - return None; - Cursor Range = C; - C.advance(2); // Skip '0x' - if (isValidHexFloatingPointPrefix(C.peek())) - C.advance(); - while (isxdigit(C.peek())) - C.advance(); - Token.reset(MIToken::FloatingPointLiteral, Range.upto(C)); - return C; -} - static Cursor lexFloatingPointLiteral(Cursor Range, Cursor C, MIToken &Token) { C.advance(); // Skip over [0-9]*([eE][-+]?[0-9]+)? @@ -438,6 +440,28 @@ static Cursor lexFloatingPointLiteral(Cursor Range, Cursor C, MIToken &Token) { return C; } +static Cursor maybeLexHexadecimalLiteral(Cursor C, MIToken &Token) { + if (C.peek() != '0' || (C.peek(1) != 'x' && C.peek(1) != 'X')) + return None; + Cursor Range = C; + C.advance(2); + unsigned PrefLen = 2; + if (isValidHexFloatingPointPrefix(C.peek())) { + C.advance(); + PrefLen++; + } + while (isxdigit(C.peek())) + C.advance(); + StringRef StrVal = Range.upto(C); + if (StrVal.size() <= PrefLen) + return None; + if (PrefLen == 2) + Token.reset(MIToken::HexLiteral, Range.upto(C)); + else // It must be 3, which means that there was a floating-point prefix. + Token.reset(MIToken::FloatingPointLiteral, Range.upto(C)); + return C; +} + static Cursor maybeLexNumericalLiteral(Cursor C, MIToken &Token) { if (!isdigit(C.peek()) && (C.peek() != '-' || !isdigit(C.peek(1)))) return None; @@ -485,6 +509,8 @@ static MIToken::TokenKind symbolToken(char C) { switch (C) { case ',': return MIToken::comma; + case '.': + return MIToken::dot; case '=': return MIToken::equal; case ':': @@ -566,7 +592,7 @@ StringRef llvm::lexMIToken(StringRef Source, MIToken &Token, return C.remaining(); } - if (Cursor R = maybeLexIntegerType(C, Token)) + if (Cursor R = maybeLexIntegerOrScalarType(C, Token)) return R.remaining(); if (Cursor R = maybeLexMachineBasicBlock(C, Token, ErrorCallback)) return R.remaining(); @@ -592,7 +618,7 @@ StringRef llvm::lexMIToken(StringRef Source, MIToken &Token, return R.remaining(); if (Cursor R = maybeLexExternalSymbol(C, Token, ErrorCallback)) return R.remaining(); - if (Cursor R = maybeLexHexFloatingPointLiteral(C, Token)) + if (Cursor R = maybeLexHexadecimalLiteral(C, Token)) return R.remaining(); if (Cursor R = maybeLexNumericalLiteral(C, Token)) return R.remaining(); diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MILexer.h b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.h index 32fc8ab..edba749 100644 --- a/contrib/llvm/lib/CodeGen/MIRParser/MILexer.h +++ b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.h @@ -38,6 +38,7 @@ struct MIToken { underscore, colon, coloncolon, + dot, exclaim, lparen, rparen, @@ -53,6 +54,7 @@ struct MIToken { kw_implicit_define, kw_def, kw_dead, + kw_dereferenceable, kw_killed, kw_undef, kw_internal, @@ -67,6 +69,7 @@ struct MIToken { kw_cfi_def_cfa_offset, kw_cfi_def_cfa, kw_blockaddress, + kw_intrinsic, kw_target_index, kw_half, kw_float, @@ -89,6 +92,8 @@ struct MIToken { kw_landing_pad, kw_liveins, kw_successors, + kw_floatpred, + kw_intpred, // Named metadata keywords md_tbaa, @@ -102,6 +107,8 @@ struct MIToken { NamedRegister, MachineBasicBlockLabel, MachineBasicBlock, + PointerType, + ScalarType, StackObject, FixedStackObject, NamedGlobalValue, @@ -111,6 +118,7 @@ struct MIToken { // Other tokens IntegerLiteral, FloatingPointLiteral, + HexLiteral, VirtualRegister, ConstantPoolItem, JumpTableIndex, @@ -160,7 +168,7 @@ public: bool isMemoryOperandFlag() const { return Kind == kw_volatile || Kind == kw_non_temporal || - Kind == kw_invariant; + Kind == kw_dereferenceable || Kind == kw_invariant; } bool is(TokenKind K) const { return Kind == K; } diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp index b3fd16f..c8bed08 100644 --- a/contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp +++ b/contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp @@ -14,6 +14,7 @@ #include "MIParser.h" #include "MILexer.h" #include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/AsmParser/Parser.h" #include "llvm/AsmParser/SlotMapping.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -26,13 +27,16 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" #include "llvm/IR/ModuleSlotTracker.h" #include "llvm/IR/ValueSymbolTable.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include <cctype> using namespace llvm; @@ -41,6 +45,17 @@ PerFunctionMIParsingState::PerFunctionMIParsingState(MachineFunction &MF, : MF(MF), SM(&SM), IRSlots(IRSlots) { } +VRegInfo &PerFunctionMIParsingState::getVRegInfo(unsigned Num) { + auto I = VRegInfos.insert(std::make_pair(Num, nullptr)); + if (I.second) { + MachineRegisterInfo &MRI = MF.getRegInfo(); + VRegInfo *Info = new (Allocator) VRegInfo; + Info->VReg = MRI.createIncompleteVirtualRegister(); + I.first->second = Info; + } + return *I.first->second; +} + namespace { /// A wrapper struct around the 'MachineOperand' struct that includes a source @@ -65,7 +80,7 @@ class MIParser { SMDiagnostic &Error; StringRef Source, CurrentSource; MIToken Token; - const PerFunctionMIParsingState &PFS; + PerFunctionMIParsingState &PFS; /// Maps from instruction names to op codes. StringMap<unsigned> Names2InstrOpCodes; /// Maps from register names to registers. @@ -86,7 +101,7 @@ class MIParser { StringMap<unsigned> Names2BitmaskTargetFlags; public: - MIParser(const PerFunctionMIParsingState &PFS, SMDiagnostic &Error, + MIParser(PerFunctionMIParsingState &PFS, SMDiagnostic &Error, StringRef Source); /// \p SkipChar gives the number of characters to skip before looking @@ -109,7 +124,8 @@ public: bool parse(MachineInstr *&MI); bool parseStandaloneMBB(MachineBasicBlock *&MBB); bool parseStandaloneNamedRegister(unsigned &Reg); - bool parseStandaloneVirtualRegister(unsigned &Reg); + bool parseStandaloneVirtualRegister(VRegInfo *&Info); + bool parseStandaloneRegister(unsigned &Reg); bool parseStandaloneStackObject(int &FI); bool parseStandaloneMDNode(MDNode *&Node); @@ -119,21 +135,19 @@ public: bool parseBasicBlockLiveins(MachineBasicBlock &MBB); bool parseBasicBlockSuccessors(MachineBasicBlock &MBB); - bool parseRegister(unsigned &Reg); + bool parseNamedRegister(unsigned &Reg); + bool parseVirtualRegister(VRegInfo *&Info); + bool parseRegister(unsigned &Reg, VRegInfo *&VRegInfo); bool parseRegisterFlag(unsigned &Flags); bool parseSubRegisterIndex(unsigned &SubReg); bool parseRegisterTiedDefIndex(unsigned &TiedDefIdx); - bool parseSize(unsigned &Size); bool parseRegisterOperand(MachineOperand &Dest, Optional<unsigned> &TiedDefIdx, bool IsDef = false); bool parseImmediateOperand(MachineOperand &Dest); bool parseIRConstant(StringRef::iterator Loc, StringRef Source, const Constant *&C); bool parseIRConstant(StringRef::iterator Loc, const Constant *&C); - bool parseIRType(StringRef::iterator Loc, StringRef Source, unsigned &Read, - Type *&Ty); - // \p MustBeSized defines whether or not \p Ty must be sized. - bool parseIRType(StringRef::iterator Loc, Type *&Ty, bool MustBeSized = true); + bool parseLowLevelType(StringRef::iterator Loc, LLT &Ty); bool parseTypedImmediateOperand(MachineOperand &Dest); bool parseFPImmediateOperand(MachineOperand &Dest); bool parseMBBReference(MachineBasicBlock *&MBB); @@ -155,6 +169,8 @@ public: bool parseCFIOperand(MachineOperand &Dest); bool parseIRBlock(BasicBlock *&BB, const Function &F); bool parseBlockAddressOperand(MachineOperand &Dest); + bool parseIntrinsicOperand(MachineOperand &Dest); + bool parsePredicateOperand(MachineOperand &Dest); bool parseTargetIndexOperand(MachineOperand &Dest); bool parseLiveoutRegisterMaskOperand(MachineOperand &Dest); bool parseMachineOperand(MachineOperand &Dest, @@ -181,6 +197,12 @@ private: /// Return true if an error occurred. bool getUint64(uint64_t &Result); + /// Convert the hexadecimal literal in the current token into an unsigned + /// APInt with a minimum bitwidth required to represent the value. + /// + /// Return true if the literal does not represent an integer value. + bool getHexUint(APInt &Result); + /// If the current token is of the given kind, consume it and return false. /// Otherwise report an error and return true. bool expectAndConsume(MIToken::TokenKind TokenKind); @@ -254,7 +276,7 @@ private: } // end anonymous namespace -MIParser::MIParser(const PerFunctionMIParsingState &PFS, SMDiagnostic &Error, +MIParser::MIParser(PerFunctionMIParsingState &PFS, SMDiagnostic &Error, StringRef Source) : MF(PFS.MF), Error(Error), Source(Source), CurrentSource(Source), PFS(PFS) {} @@ -362,7 +384,7 @@ bool MIParser::parseBasicBlockDefinition( if (!Name.empty()) { BB = dyn_cast_or_null<BasicBlock>( - MF.getFunction()->getValueSymbolTable().lookup(Name)); + MF.getFunction()->getValueSymbolTable()->lookup(Name)); if (!BB) return error(Loc, Twine("basic block '") + Name + "' is not defined in the function '" + @@ -437,10 +459,24 @@ bool MIParser::parseBasicBlockLiveins(MachineBasicBlock &MBB) { if (Token.isNot(MIToken::NamedRegister)) return error("expected a named register"); unsigned Reg = 0; - if (parseRegister(Reg)) + if (parseNamedRegister(Reg)) return true; - MBB.addLiveIn(Reg); lex(); + LaneBitmask Mask = LaneBitmask::getAll(); + if (consumeIfPresent(MIToken::colon)) { + // Parse lane mask. + if (Token.isNot(MIToken::IntegerLiteral) && + Token.isNot(MIToken::HexLiteral)) + return error("expected a lane mask"); + static_assert(sizeof(LaneBitmask::Type) == sizeof(unsigned), + "Use correct get-function for lane mask"); + LaneBitmask::Type V; + if (getUnsigned(V)) + return error("invalid lane mask value"); + Mask = LaneBitmask(V); + lex(); + } + MBB.addLiveIn(Reg, Mask); } while (consumeIfPresent(MIToken::comma)); return false; } @@ -461,7 +497,8 @@ bool MIParser::parseBasicBlockSuccessors(MachineBasicBlock &MBB) { lex(); unsigned Weight = 0; if (consumeIfPresent(MIToken::lparen)) { - if (Token.isNot(MIToken::IntegerLiteral)) + if (Token.isNot(MIToken::IntegerLiteral) && + Token.isNot(MIToken::HexLiteral)) return error("expected an integer literal after '('"); if (getUnsigned(Weight)) return true; @@ -597,14 +634,6 @@ bool MIParser::parse(MachineInstr *&MI) { if (Token.isError() || parseInstruction(OpCode, Flags)) return true; - Type *Ty = nullptr; - if (isPreISelGenericOpcode(OpCode)) { - // For generic opcode, a type is mandatory. - auto Loc = Token.location(); - if (parseIRType(Loc, Ty)) - return true; - } - // Parse the remaining machine operands. while (!Token.isNewlineOrEOF() && Token.isNot(MIToken::kw_debug_location) && Token.isNot(MIToken::coloncolon) && Token.isNot(MIToken::lbrace)) { @@ -660,8 +689,6 @@ bool MIParser::parse(MachineInstr *&MI) { // TODO: Check for extraneous machine operands. MI = MF.CreateMachineInstr(MCID, DebugLocation, /*NoImplicit=*/true); MI->setFlags(Flags); - if (Ty) - MI->setType(Ty); for (const auto &Operand : Operands) MI->addOperand(MF, Operand.Operand); if (assignRegisterTies(*MI, Operands)) @@ -692,7 +719,7 @@ bool MIParser::parseStandaloneNamedRegister(unsigned &Reg) { lex(); if (Token.isNot(MIToken::NamedRegister)) return error("expected a named register"); - if (parseRegister(Reg)) + if (parseNamedRegister(Reg)) return true; lex(); if (Token.isNot(MIToken::Eof)) @@ -700,12 +727,28 @@ bool MIParser::parseStandaloneNamedRegister(unsigned &Reg) { return false; } -bool MIParser::parseStandaloneVirtualRegister(unsigned &Reg) { +bool MIParser::parseStandaloneVirtualRegister(VRegInfo *&Info) { lex(); if (Token.isNot(MIToken::VirtualRegister)) return error("expected a virtual register"); - if (parseRegister(Reg)) + if (parseVirtualRegister(Info)) + return true; + lex(); + if (Token.isNot(MIToken::Eof)) + return error("expected end of string after the register reference"); + return false; +} + +bool MIParser::parseStandaloneRegister(unsigned &Reg) { + lex(); + if (Token.isNot(MIToken::NamedRegister) && + Token.isNot(MIToken::VirtualRegister)) + return error("expected either a named or virtual register"); + + VRegInfo *Info; + if (parseRegister(Reg, Info)) return true; + lex(); if (Token.isNot(MIToken::Eof)) return error("expected end of string after the register reference"); @@ -800,33 +843,39 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) { return false; } -bool MIParser::parseRegister(unsigned &Reg) { +bool MIParser::parseNamedRegister(unsigned &Reg) { + assert(Token.is(MIToken::NamedRegister) && "Needs NamedRegister token"); + StringRef Name = Token.stringValue(); + if (getRegisterByName(Name, Reg)) + return error(Twine("unknown register name '") + Name + "'"); + return false; +} + +bool MIParser::parseVirtualRegister(VRegInfo *&Info) { + assert(Token.is(MIToken::VirtualRegister) && "Needs VirtualRegister token"); + unsigned ID; + if (getUnsigned(ID)) + return true; + Info = &PFS.getVRegInfo(ID); + return false; +} + +bool MIParser::parseRegister(unsigned &Reg, VRegInfo *&Info) { switch (Token.kind()) { case MIToken::underscore: Reg = 0; - break; - case MIToken::NamedRegister: { - StringRef Name = Token.stringValue(); - if (getRegisterByName(Name, Reg)) - return error(Twine("unknown register name '") + Name + "'"); - break; - } - case MIToken::VirtualRegister: { - unsigned ID; - if (getUnsigned(ID)) + return false; + case MIToken::NamedRegister: + return parseNamedRegister(Reg); + case MIToken::VirtualRegister: + if (parseVirtualRegister(Info)) return true; - const auto RegInfo = PFS.VirtualRegisterSlots.find(ID); - if (RegInfo == PFS.VirtualRegisterSlots.end()) - return error(Twine("use of undefined virtual register '%") + Twine(ID) + - "'"); - Reg = RegInfo->second; - break; - } + Reg = Info->VReg; + return false; // TODO: Parse other register kinds. default: llvm_unreachable("The current token should be a register"); } - return false; } bool MIParser::parseRegisterFlag(unsigned &Flags) { @@ -871,10 +920,10 @@ bool MIParser::parseRegisterFlag(unsigned &Flags) { } bool MIParser::parseSubRegisterIndex(unsigned &SubReg) { - assert(Token.is(MIToken::colon)); + assert(Token.is(MIToken::dot)); lex(); if (Token.isNot(MIToken::Identifier)) - return error("expected a subregister index after ':'"); + return error("expected a subregister index after '.'"); auto Name = Token.stringValue(); SubReg = getSubRegIndex(Name); if (!SubReg) @@ -885,7 +934,7 @@ bool MIParser::parseSubRegisterIndex(unsigned &SubReg) { bool MIParser::parseRegisterTiedDefIndex(unsigned &TiedDefIdx) { if (!consumeIfPresent(MIToken::kw_tied_def)) - return error("expected 'tied-def' after '('"); + return true; if (Token.isNot(MIToken::IntegerLiteral)) return error("expected an integer literal after 'tied-def'"); if (getUnsigned(TiedDefIdx)) @@ -896,17 +945,6 @@ bool MIParser::parseRegisterTiedDefIndex(unsigned &TiedDefIdx) { return false; } -bool MIParser::parseSize(unsigned &Size) { - if (Token.isNot(MIToken::IntegerLiteral)) - return error("expected an integer literal for the size"); - if (getUnsigned(Size)) - return true; - lex(); - if (expectAndConsume(MIToken::rparen)) - return true; - return false; -} - bool MIParser::assignRegisterTies(MachineInstr &MI, ArrayRef<ParsedMachineOperand> Operands) { SmallVector<std::pair<unsigned, unsigned>, 4> TiedRegisterPairs; @@ -947,7 +985,6 @@ bool MIParser::assignRegisterTies(MachineInstr &MI, bool MIParser::parseRegisterOperand(MachineOperand &Dest, Optional<unsigned> &TiedDefIdx, bool IsDef) { - unsigned Reg; unsigned Flags = IsDef ? RegState::Define : 0; while (Token.isRegisterFlag()) { if (parseRegisterFlag(Flags)) @@ -955,38 +992,62 @@ bool MIParser::parseRegisterOperand(MachineOperand &Dest, } if (!Token.isRegister()) return error("expected a register after register flags"); - if (parseRegister(Reg)) + unsigned Reg; + VRegInfo *RegInfo; + if (parseRegister(Reg, RegInfo)) return true; lex(); unsigned SubReg = 0; - if (Token.is(MIToken::colon)) { + if (Token.is(MIToken::dot)) { if (parseSubRegisterIndex(SubReg)) return true; if (!TargetRegisterInfo::isVirtualRegister(Reg)) return error("subregister index expects a virtual register"); } + MachineRegisterInfo &MRI = MF.getRegInfo(); if ((Flags & RegState::Define) == 0) { if (consumeIfPresent(MIToken::lparen)) { unsigned Idx; - if (parseRegisterTiedDefIndex(Idx)) - return true; - TiedDefIdx = Idx; + if (!parseRegisterTiedDefIndex(Idx)) + TiedDefIdx = Idx; + else { + // Try a redundant low-level type. + LLT Ty; + if (parseLowLevelType(Token.location(), Ty)) + return error("expected tied-def or low-level type after '('"); + + if (expectAndConsume(MIToken::rparen)) + return true; + + if (MRI.getType(Reg).isValid() && MRI.getType(Reg) != Ty) + return error("inconsistent type for generic virtual register"); + + MRI.setType(Reg, Ty); + } } } else if (consumeIfPresent(MIToken::lparen)) { - // Virtual registers may have a size with GlobalISel. + // Virtual registers may have a tpe with GlobalISel. if (!TargetRegisterInfo::isVirtualRegister(Reg)) - return error("unexpected size on physical register"); - unsigned Size; - if (parseSize(Size)) + return error("unexpected type on physical register"); + + LLT Ty; + if (parseLowLevelType(Token.location(), Ty)) return true; - MachineRegisterInfo &MRI = MF.getRegInfo(); - MRI.setSize(Reg, Size); - } else if (PFS.GenericVRegs.count(Reg)) { - // Generic virtual registers must have a size. - // If we end up here this means the size hasn't been specified and + if (expectAndConsume(MIToken::rparen)) + return true; + + if (MRI.getType(Reg).isValid() && MRI.getType(Reg) != Ty) + return error("inconsistent type for generic virtual register"); + + MRI.setType(Reg, Ty); + } else if (TargetRegisterInfo::isVirtualRegister(Reg)) { + // Generic virtual registers must have a type. + // If we end up here this means the type hasn't been specified and // this is bad! - return error("generic virtual registers must have a size"); + if (RegInfo->Kind == VRegInfo::GENERIC || + RegInfo->Kind == VRegInfo::REGBANK) + return error("generic virtual registers must have a type"); } Dest = MachineOperand::CreateReg( Reg, Flags & RegState::Define, Flags & RegState::Implicit, @@ -1010,7 +1071,7 @@ bool MIParser::parseIRConstant(StringRef::iterator Loc, StringRef StringValue, const Constant *&C) { auto Source = StringValue.str(); // The source has to be null terminated. SMDiagnostic Err; - C = parseConstantValue(Source.c_str(), Err, *MF.getFunction()->getParent(), + C = parseConstantValue(Source, Err, *MF.getFunction()->getParent(), &PFS.IRSlots); if (!C) return error(Loc + Err.getColumnNo(), Err.getMessage()); @@ -1024,35 +1085,45 @@ bool MIParser::parseIRConstant(StringRef::iterator Loc, const Constant *&C) { return false; } -bool MIParser::parseIRType(StringRef::iterator Loc, StringRef StringValue, - unsigned &Read, Type *&Ty) { - auto Source = StringValue.str(); // The source has to be null terminated. - SMDiagnostic Err; - Ty = parseTypeAtBeginning(Source.c_str(), Read, Err, - *MF.getFunction()->getParent(), &PFS.IRSlots); - if (!Ty) - return error(Loc + Err.getColumnNo(), Err.getMessage()); - return false; -} +bool MIParser::parseLowLevelType(StringRef::iterator Loc, LLT &Ty) { + if (Token.is(MIToken::ScalarType)) { + Ty = LLT::scalar(APSInt(Token.range().drop_front()).getZExtValue()); + lex(); + return false; + } else if (Token.is(MIToken::PointerType)) { + const DataLayout &DL = MF.getFunction()->getParent()->getDataLayout(); + unsigned AS = APSInt(Token.range().drop_front()).getZExtValue(); + Ty = LLT::pointer(AS, DL.getPointerSizeInBits(AS)); + lex(); + return false; + } -bool MIParser::parseIRType(StringRef::iterator Loc, Type *&Ty, - bool MustBeSized) { - // At this point we enter in the IR world, i.e., to get the correct type, - // we need to hand off the whole string, not just the current token. - // E.g., <4 x i64> would give '<' as a token and there is not much - // the IR parser can do with that. - unsigned Read = 0; - if (parseIRType(Loc, StringRef(Loc), Read, Ty)) - return true; - // The type must be sized, otherwise there is not much the backend - // can do with it. - if (MustBeSized && !Ty->isSized()) - return error("expected a sized type"); - // The next token is Read characters from the Loc. - // However, the current location is not Loc, but Loc + the length of Token. - // Therefore, subtract the length of Token (range().end() - Loc) to the - // number of characters to skip before the next token. - lex(Read - (Token.range().end() - Loc)); + // Now we're looking for a vector. + if (Token.isNot(MIToken::less)) + return error(Loc, + "expected unsized, pN, sN or <N x sM> for GlobalISel type"); + + lex(); + + if (Token.isNot(MIToken::IntegerLiteral)) + return error(Loc, "expected <N x sM> for vctor type"); + uint64_t NumElements = Token.integerValue().getZExtValue(); + lex(); + + if (Token.isNot(MIToken::Identifier) || Token.stringValue() != "x") + return error(Loc, "expected '<N x sM>' for vector type"); + lex(); + + if (Token.isNot(MIToken::ScalarType)) + return error(Loc, "expected '<N x sM>' for vector type"); + uint64_t ScalarSize = APSInt(Token.range().drop_front()).getZExtValue(); + lex(); + + if (Token.isNot(MIToken::greater)) + return error(Loc, "expected '<N x sM>' for vector type"); + lex(); + + Ty = LLT::vector(NumElements, ScalarSize); return false; } @@ -1072,7 +1143,8 @@ bool MIParser::parseTypedImmediateOperand(MachineOperand &Dest) { bool MIParser::parseFPImmediateOperand(MachineOperand &Dest) { auto Loc = Token.location(); lex(); - if (Token.isNot(MIToken::FloatingPointLiteral)) + if (Token.isNot(MIToken::FloatingPointLiteral) && + Token.isNot(MIToken::HexLiteral)) return error("expected a floating point literal"); const Constant *C = nullptr; if (parseIRConstant(Loc, C)) @@ -1082,13 +1154,24 @@ bool MIParser::parseFPImmediateOperand(MachineOperand &Dest) { } bool MIParser::getUnsigned(unsigned &Result) { - assert(Token.hasIntegerValue() && "Expected a token with an integer value"); - const uint64_t Limit = uint64_t(std::numeric_limits<unsigned>::max()) + 1; - uint64_t Val64 = Token.integerValue().getLimitedValue(Limit); - if (Val64 == Limit) - return error("expected 32-bit integer (too large)"); - Result = Val64; - return false; + if (Token.hasIntegerValue()) { + const uint64_t Limit = uint64_t(std::numeric_limits<unsigned>::max()) + 1; + uint64_t Val64 = Token.integerValue().getLimitedValue(Limit); + if (Val64 == Limit) + return error("expected 32-bit integer (too large)"); + Result = Val64; + return false; + } + if (Token.is(MIToken::HexLiteral)) { + APInt A; + if (getHexUint(A)) + return true; + if (A.getBitWidth() > 32) + return error("expected 32-bit integer (too large)"); + Result = A.getZExtValue(); + return false; + } + return true; } bool MIParser::parseMBBReference(MachineBasicBlock *&MBB) { @@ -1128,7 +1211,7 @@ bool MIParser::parseStackFrameIndex(int &FI) { "'"); StringRef Name; if (const auto *Alloca = - MF.getFrameInfo()->getObjectAllocation(ObjectInfo->second)) + MF.getFrameInfo().getObjectAllocation(ObjectInfo->second)) Name = Alloca->getName(); if (!Token.stringValue().empty() && Token.stringValue() != Name) return error(Twine("the name of the stack object '%stack.") + Twine(ID) + @@ -1293,7 +1376,7 @@ bool MIParser::parseCFIRegister(unsigned &Reg) { if (Token.isNot(MIToken::NamedRegister)) return error("expected a cfi register"); unsigned LLVMReg; - if (parseRegister(LLVMReg)) + if (parseNamedRegister(LLVMReg)) return true; const auto *TRI = MF.getSubtarget().getRegisterInfo(); assert(TRI && "Expected target register info"); @@ -1308,7 +1391,6 @@ bool MIParser::parseCFIRegister(unsigned &Reg) { bool MIParser::parseCFIOperand(MachineOperand &Dest) { auto Kind = Token.kind(); lex(); - auto &MMI = MF.getMMI(); int Offset; unsigned Reg; unsigned CFIIndex; @@ -1316,27 +1398,26 @@ bool MIParser::parseCFIOperand(MachineOperand &Dest) { case MIToken::kw_cfi_same_value: if (parseCFIRegister(Reg)) return true; - CFIIndex = - MMI.addFrameInst(MCCFIInstruction::createSameValue(nullptr, Reg)); + CFIIndex = MF.addFrameInst(MCCFIInstruction::createSameValue(nullptr, Reg)); break; case MIToken::kw_cfi_offset: if (parseCFIRegister(Reg) || expectAndConsume(MIToken::comma) || parseCFIOffset(Offset)) return true; CFIIndex = - MMI.addFrameInst(MCCFIInstruction::createOffset(nullptr, Reg, Offset)); + MF.addFrameInst(MCCFIInstruction::createOffset(nullptr, Reg, Offset)); break; case MIToken::kw_cfi_def_cfa_register: if (parseCFIRegister(Reg)) return true; CFIIndex = - MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); + MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); break; case MIToken::kw_cfi_def_cfa_offset: if (parseCFIOffset(Offset)) return true; // NB: MCCFIInstruction::createDefCfaOffset negates the offset. - CFIIndex = MMI.addFrameInst( + CFIIndex = MF.addFrameInst( MCCFIInstruction::createDefCfaOffset(nullptr, -Offset)); break; case MIToken::kw_cfi_def_cfa: @@ -1345,7 +1426,7 @@ bool MIParser::parseCFIOperand(MachineOperand &Dest) { return true; // NB: MCCFIInstruction::createDefCfa negates the offset. CFIIndex = - MMI.addFrameInst(MCCFIInstruction::createDefCfa(nullptr, Reg, -Offset)); + MF.addFrameInst(MCCFIInstruction::createDefCfa(nullptr, Reg, -Offset)); break; default: // TODO: Parse the other CFI operands. @@ -1359,7 +1440,7 @@ bool MIParser::parseIRBlock(BasicBlock *&BB, const Function &F) { switch (Token.kind()) { case MIToken::NamedIRBlock: { BB = dyn_cast_or_null<BasicBlock>( - F.getValueSymbolTable().lookup(Token.stringValue())); + F.getValueSymbolTable()->lookup(Token.stringValue())); if (!BB) return error(Twine("use of undefined IR block '") + Token.range() + "'"); break; @@ -1411,6 +1492,93 @@ bool MIParser::parseBlockAddressOperand(MachineOperand &Dest) { return false; } +bool MIParser::parseIntrinsicOperand(MachineOperand &Dest) { + assert(Token.is(MIToken::kw_intrinsic)); + lex(); + if (expectAndConsume(MIToken::lparen)) + return error("expected syntax intrinsic(@llvm.whatever)"); + + if (Token.isNot(MIToken::NamedGlobalValue)) + return error("expected syntax intrinsic(@llvm.whatever)"); + + std::string Name = Token.stringValue(); + lex(); + + if (expectAndConsume(MIToken::rparen)) + return error("expected ')' to terminate intrinsic name"); + + // Find out what intrinsic we're dealing with, first try the global namespace + // and then the target's private intrinsics if that fails. + const TargetIntrinsicInfo *TII = MF.getTarget().getIntrinsicInfo(); + Intrinsic::ID ID = Function::lookupIntrinsicID(Name); + if (ID == Intrinsic::not_intrinsic && TII) + ID = static_cast<Intrinsic::ID>(TII->lookupName(Name)); + + if (ID == Intrinsic::not_intrinsic) + return error("unknown intrinsic name"); + Dest = MachineOperand::CreateIntrinsicID(ID); + + return false; +} + +bool MIParser::parsePredicateOperand(MachineOperand &Dest) { + assert(Token.is(MIToken::kw_intpred) || Token.is(MIToken::kw_floatpred)); + bool IsFloat = Token.is(MIToken::kw_floatpred); + lex(); + + if (expectAndConsume(MIToken::lparen)) + return error("expected syntax intpred(whatever) or floatpred(whatever"); + + if (Token.isNot(MIToken::Identifier)) + return error("whatever"); + + CmpInst::Predicate Pred; + if (IsFloat) { + Pred = StringSwitch<CmpInst::Predicate>(Token.stringValue()) + .Case("false", CmpInst::FCMP_FALSE) + .Case("oeq", CmpInst::FCMP_OEQ) + .Case("ogt", CmpInst::FCMP_OGT) + .Case("oge", CmpInst::FCMP_OGE) + .Case("olt", CmpInst::FCMP_OLT) + .Case("ole", CmpInst::FCMP_OLE) + .Case("one", CmpInst::FCMP_ONE) + .Case("ord", CmpInst::FCMP_ORD) + .Case("uno", CmpInst::FCMP_UNO) + .Case("ueq", CmpInst::FCMP_UEQ) + .Case("ugt", CmpInst::FCMP_UGT) + .Case("uge", CmpInst::FCMP_UGE) + .Case("ult", CmpInst::FCMP_ULT) + .Case("ule", CmpInst::FCMP_ULE) + .Case("une", CmpInst::FCMP_UNE) + .Case("true", CmpInst::FCMP_TRUE) + .Default(CmpInst::BAD_FCMP_PREDICATE); + if (!CmpInst::isFPPredicate(Pred)) + return error("invalid floating-point predicate"); + } else { + Pred = StringSwitch<CmpInst::Predicate>(Token.stringValue()) + .Case("eq", CmpInst::ICMP_EQ) + .Case("ne", CmpInst::ICMP_NE) + .Case("sgt", CmpInst::ICMP_SGT) + .Case("sge", CmpInst::ICMP_SGE) + .Case("slt", CmpInst::ICMP_SLT) + .Case("sle", CmpInst::ICMP_SLE) + .Case("ugt", CmpInst::ICMP_UGT) + .Case("uge", CmpInst::ICMP_UGE) + .Case("ult", CmpInst::ICMP_ULT) + .Case("ule", CmpInst::ICMP_ULE) + .Default(CmpInst::BAD_ICMP_PREDICATE); + if (!CmpInst::isIntPredicate(Pred)) + return error("invalid integer predicate"); + } + + lex(); + Dest = MachineOperand::CreatePredicate(Pred); + if (expectAndConsume(MIToken::rparen)) + return error("predicate should be terminated by ')'."); + + return false; +} + bool MIParser::parseTargetIndexOperand(MachineOperand &Dest) { assert(Token.is(MIToken::kw_target_index)); lex(); @@ -1441,8 +1609,8 @@ bool MIParser::parseLiveoutRegisterMaskOperand(MachineOperand &Dest) { while (true) { if (Token.isNot(MIToken::NamedRegister)) return error("expected a named register"); - unsigned Reg = 0; - if (parseRegister(Reg)) + unsigned Reg; + if (parseNamedRegister(Reg)) return true; lex(); Mask[Reg / 32] |= 1U << (Reg % 32); @@ -1511,10 +1679,15 @@ bool MIParser::parseMachineOperand(MachineOperand &Dest, return parseCFIOperand(Dest); case MIToken::kw_blockaddress: return parseBlockAddressOperand(Dest); + case MIToken::kw_intrinsic: + return parseIntrinsicOperand(Dest); case MIToken::kw_target_index: return parseTargetIndexOperand(Dest); case MIToken::kw_liveout: return parseLiveoutRegisterMaskOperand(Dest); + case MIToken::kw_floatpred: + case MIToken::kw_intpred: + return parsePredicateOperand(Dest); case MIToken::Error: return true; case MIToken::Identifier: @@ -1523,7 +1696,7 @@ bool MIParser::parseMachineOperand(MachineOperand &Dest, lex(); break; } - // fallthrough + LLVM_FALLTHROUGH; default: // FIXME: Parse the MCSymbol machine operand. return error("expected a machine operand"); @@ -1613,7 +1786,7 @@ bool MIParser::parseOperandsOffset(MachineOperand &Op) { bool MIParser::parseIRValue(const Value *&V) { switch (Token.kind()) { case MIToken::NamedIRValue: { - V = MF.getFunction()->getValueSymbolTable().lookup(Token.stringValue()); + V = MF.getFunction()->getValueSymbolTable()->lookup(Token.stringValue()); break; } case MIToken::IRValue: { @@ -1647,10 +1820,35 @@ bool MIParser::parseIRValue(const Value *&V) { } bool MIParser::getUint64(uint64_t &Result) { - assert(Token.hasIntegerValue()); - if (Token.integerValue().getActiveBits() > 64) - return error("expected 64-bit integer (too large)"); - Result = Token.integerValue().getZExtValue(); + if (Token.hasIntegerValue()) { + if (Token.integerValue().getActiveBits() > 64) + return error("expected 64-bit integer (too large)"); + Result = Token.integerValue().getZExtValue(); + return false; + } + if (Token.is(MIToken::HexLiteral)) { + APInt A; + if (getHexUint(A)) + return true; + if (A.getBitWidth() > 64) + return error("expected 64-bit integer (too large)"); + Result = A.getZExtValue(); + return false; + } + return true; +} + +bool MIParser::getHexUint(APInt &Result) { + assert(Token.is(MIToken::HexLiteral)); + StringRef S = Token.range(); + assert(S[0] == '0' && tolower(S[1]) == 'x'); + // This could be a floating point literal with a special prefix. + if (!isxdigit(S[2])) + return true; + StringRef V = S.substr(2); + APInt A(V.size()*4, V, 16); + Result = APInt(A.getActiveBits(), + ArrayRef<uint64_t>(A.getRawData(), A.getNumWords())); return false; } @@ -1663,6 +1861,9 @@ bool MIParser::parseMemoryOperandFlag(MachineMemOperand::Flags &Flags) { case MIToken::kw_non_temporal: Flags |= MachineMemOperand::MONonTemporal; break; + case MIToken::kw_dereferenceable: + Flags |= MachineMemOperand::MODereferenceable; + break; case MIToken::kw_invariant: Flags |= MachineMemOperand::MOInvariant; break; @@ -2059,36 +2260,42 @@ bool llvm::parseMachineBasicBlockDefinitions(PerFunctionMIParsingState &PFS, return MIParser(PFS, Error, Src).parseBasicBlockDefinitions(PFS.MBBSlots); } -bool llvm::parseMachineInstructions(const PerFunctionMIParsingState &PFS, +bool llvm::parseMachineInstructions(PerFunctionMIParsingState &PFS, StringRef Src, SMDiagnostic &Error) { return MIParser(PFS, Error, Src).parseBasicBlocks(); } -bool llvm::parseMBBReference(const PerFunctionMIParsingState &PFS, +bool llvm::parseMBBReference(PerFunctionMIParsingState &PFS, MachineBasicBlock *&MBB, StringRef Src, SMDiagnostic &Error) { return MIParser(PFS, Error, Src).parseStandaloneMBB(MBB); } -bool llvm::parseNamedRegisterReference(const PerFunctionMIParsingState &PFS, +bool llvm::parseRegisterReference(PerFunctionMIParsingState &PFS, + unsigned &Reg, StringRef Src, + SMDiagnostic &Error) { + return MIParser(PFS, Error, Src).parseStandaloneRegister(Reg); +} + +bool llvm::parseNamedRegisterReference(PerFunctionMIParsingState &PFS, unsigned &Reg, StringRef Src, SMDiagnostic &Error) { return MIParser(PFS, Error, Src).parseStandaloneNamedRegister(Reg); } -bool llvm::parseVirtualRegisterReference(const PerFunctionMIParsingState &PFS, - unsigned &Reg, StringRef Src, +bool llvm::parseVirtualRegisterReference(PerFunctionMIParsingState &PFS, + VRegInfo *&Info, StringRef Src, SMDiagnostic &Error) { - return MIParser(PFS, Error, Src).parseStandaloneVirtualRegister(Reg); + return MIParser(PFS, Error, Src).parseStandaloneVirtualRegister(Info); } -bool llvm::parseStackObjectReference(const PerFunctionMIParsingState &PFS, +bool llvm::parseStackObjectReference(PerFunctionMIParsingState &PFS, int &FI, StringRef Src, SMDiagnostic &Error) { return MIParser(PFS, Error, Src).parseStandaloneStackObject(FI); } -bool llvm::parseMDNode(const PerFunctionMIParsingState &PFS, +bool llvm::parseMDNode(PerFunctionMIParsingState &PFS, MDNode *&Node, StringRef Src, SMDiagnostic &Error) { return MIParser(PFS, Error, Src).parseStandaloneMDNode(Node); } diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MIParser.h b/contrib/llvm/lib/CodeGen/MIRParser/MIParser.h index 18895b9..93a4d84 100644 --- a/contrib/llvm/lib/CodeGen/MIRParser/MIParser.h +++ b/contrib/llvm/lib/CodeGen/MIRParser/MIParser.h @@ -26,26 +26,42 @@ class MachineFunction; class MachineInstr; class MachineRegisterInfo; class MDNode; +class RegisterBank; struct SlotMapping; class SMDiagnostic; class SourceMgr; +class TargetRegisterClass; + +struct VRegInfo { + enum uint8_t { + UNKNOWN, NORMAL, GENERIC, REGBANK + } Kind = UNKNOWN; + bool Explicit = false; ///< VReg was explicitly specified in the .mir file. + union { + const TargetRegisterClass *RC; + const RegisterBank *RegBank; + } D; + unsigned VReg; + unsigned PreferredReg = 0; +}; struct PerFunctionMIParsingState { + BumpPtrAllocator Allocator; MachineFunction &MF; SourceMgr *SM; const SlotMapping &IRSlots; DenseMap<unsigned, MachineBasicBlock *> MBBSlots; - DenseMap<unsigned, unsigned> VirtualRegisterSlots; + DenseMap<unsigned, VRegInfo*> VRegInfos; DenseMap<unsigned, int> FixedStackObjectSlots; DenseMap<unsigned, int> StackObjectSlots; DenseMap<unsigned, unsigned> ConstantPoolSlots; DenseMap<unsigned, unsigned> JumpTableSlots; - /// Hold the generic virtual registers. - SmallSet<unsigned, 8> GenericVRegs; PerFunctionMIParsingState(MachineFunction &MF, SourceMgr &SM, const SlotMapping &IRSlots); + + VRegInfo &getVRegInfo(unsigned VReg); }; /// Parse the machine basic block definitions, and skip the machine @@ -73,26 +89,29 @@ bool parseMachineBasicBlockDefinitions(PerFunctionMIParsingState &PFS, /// on the given source string. /// /// Return true if an error occurred. -bool parseMachineInstructions(const PerFunctionMIParsingState &PFS, - StringRef Src, SMDiagnostic &Error); +bool parseMachineInstructions(PerFunctionMIParsingState &PFS, StringRef Src, + SMDiagnostic &Error); -bool parseMBBReference(const PerFunctionMIParsingState &PFS, +bool parseMBBReference(PerFunctionMIParsingState &PFS, MachineBasicBlock *&MBB, StringRef Src, SMDiagnostic &Error); -bool parseNamedRegisterReference(const PerFunctionMIParsingState &PFS, - unsigned &Reg, StringRef Src, - SMDiagnostic &Error); +bool parseRegisterReference(PerFunctionMIParsingState &PFS, + unsigned &Reg, StringRef Src, + SMDiagnostic &Error); + +bool parseNamedRegisterReference(PerFunctionMIParsingState &PFS, unsigned &Reg, + StringRef Src, SMDiagnostic &Error); -bool parseVirtualRegisterReference(const PerFunctionMIParsingState &PFS, - unsigned &Reg, StringRef Src, +bool parseVirtualRegisterReference(PerFunctionMIParsingState &PFS, + VRegInfo *&Info, StringRef Src, SMDiagnostic &Error); -bool parseStackObjectReference(const PerFunctionMIParsingState &PFS, - int &FI, StringRef Src, SMDiagnostic &Error); +bool parseStackObjectReference(PerFunctionMIParsingState &PFS, int &FI, + StringRef Src, SMDiagnostic &Error); -bool parseMDNode(const PerFunctionMIParsingState &PFS, MDNode *&Node, - StringRef Src, SMDiagnostic &Error); +bool parseMDNode(PerFunctionMIParsingState &PFS, MDNode *&Node, StringRef Src, + SMDiagnostic &Error); } // end namespace llvm diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp index 4aa3df6..3dff114 100644 --- a/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp +++ b/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp @@ -102,10 +102,10 @@ public: /// Return true if error occurred. bool initializeMachineFunction(MachineFunction &MF); - bool initializeRegisterInfo(PerFunctionMIParsingState &PFS, - const yaml::MachineFunction &YamlMF); + bool parseRegisterInfo(PerFunctionMIParsingState &PFS, + const yaml::MachineFunction &YamlMF); - void inferRegisterInfo(const PerFunctionMIParsingState &PFS, + bool setupRegisterInfo(const PerFunctionMIParsingState &PFS, const yaml::MachineFunction &YamlMF); bool initializeFrameInfo(PerFunctionMIParsingState &PFS, @@ -128,10 +128,10 @@ public: const yaml::MachineJumpTable &YamlJTI); private: - bool parseMDNode(const PerFunctionMIParsingState &PFS, MDNode *&Node, + bool parseMDNode(PerFunctionMIParsingState &PFS, MDNode *&Node, const yaml::StringValue &Source); - bool parseMBBReference(const PerFunctionMIParsingState &PFS, + bool parseMBBReference(PerFunctionMIParsingState &PFS, MachineBasicBlock *&MBB, const yaml::StringValue &Source); @@ -160,6 +160,8 @@ private: /// /// Return null if the name isn't a register bank. const RegisterBank *getRegBank(const MachineFunction &MF, StringRef Name); + + void computeFunctionProperties(MachineFunction &MF); }; } // end namespace llvm @@ -255,7 +257,8 @@ std::unique_ptr<Module> MIRParserImpl::parse() { bool MIRParserImpl::parseMachineFunction(yaml::Input &In, Module &M, bool NoLLVMIR) { auto MF = llvm::make_unique<yaml::MachineFunction>(); - yaml::yamlize(In, *MF, false); + yaml::EmptyContext Ctx; + yaml::yamlize(In, *MF, false, Ctx); if (In.error()) return true; auto FunctionName = MF->Name; @@ -279,6 +282,43 @@ void MIRParserImpl::createDummyFunction(StringRef Name, Module &M) { new UnreachableInst(Context, BB); } +static bool isSSA(const MachineFunction &MF) { + const MachineRegisterInfo &MRI = MF.getRegInfo(); + for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(I); + if (!MRI.hasOneDef(Reg) && !MRI.def_empty(Reg)) + return false; + } + return true; +} + +void MIRParserImpl::computeFunctionProperties(MachineFunction &MF) { + MachineFunctionProperties &Properties = MF.getProperties(); + + bool HasPHI = false; + bool HasInlineAsm = false; + for (const MachineBasicBlock &MBB : MF) { + for (const MachineInstr &MI : MBB) { + if (MI.isPHI()) + HasPHI = true; + if (MI.isInlineAsm()) + HasInlineAsm = true; + } + } + if (!HasPHI) + Properties.set(MachineFunctionProperties::Property::NoPHIs); + MF.setHasInlineAsm(HasInlineAsm); + + if (isSSA(MF)) + Properties.set(MachineFunctionProperties::Property::IsSSA); + else + Properties.reset(MachineFunctionProperties::Property::IsSSA); + + const MachineRegisterInfo &MRI = MF.getRegInfo(); + if (MRI.getNumVirtRegs() == 0) + Properties.set(MachineFunctionProperties::Property::NoVRegs); +} + bool MIRParserImpl::initializeMachineFunction(MachineFunction &MF) { auto It = Functions.find(MF.getName()); if (It == Functions.end()) @@ -289,11 +329,17 @@ bool MIRParserImpl::initializeMachineFunction(MachineFunction &MF) { if (YamlMF.Alignment) MF.setAlignment(YamlMF.Alignment); MF.setExposesReturnsTwice(YamlMF.ExposesReturnsTwice); - MF.setHasInlineAsm(YamlMF.HasInlineAsm); - if (YamlMF.AllVRegsAllocated) - MF.getProperties().set(MachineFunctionProperties::Property::AllVRegsAllocated); + + if (YamlMF.Legalized) + MF.getProperties().set(MachineFunctionProperties::Property::Legalized); + if (YamlMF.RegBankSelected) + MF.getProperties().set( + MachineFunctionProperties::Property::RegBankSelected); + if (YamlMF.Selected) + MF.getProperties().set(MachineFunctionProperties::Property::Selected); + PerFunctionMIParsingState PFS(MF, SM, IRSlots); - if (initializeRegisterInfo(PFS, YamlMF)) + if (parseRegisterInfo(PFS, YamlMF)) return true; if (!YamlMF.Constants.empty()) { auto *ConstantPool = MF.getConstantPool(); @@ -343,62 +389,60 @@ bool MIRParserImpl::initializeMachineFunction(MachineFunction &MF) { } PFS.SM = &SM; - inferRegisterInfo(PFS, YamlMF); - // FIXME: This is a temporary workaround until the reserved registers can be - // serialized. - MF.getRegInfo().freezeReservedRegs(MF); + if (setupRegisterInfo(PFS, YamlMF)) + return true; + + computeFunctionProperties(MF); + MF.verify(); return false; } -bool MIRParserImpl::initializeRegisterInfo(PerFunctionMIParsingState &PFS, - const yaml::MachineFunction &YamlMF) { +bool MIRParserImpl::parseRegisterInfo(PerFunctionMIParsingState &PFS, + const yaml::MachineFunction &YamlMF) { MachineFunction &MF = PFS.MF; MachineRegisterInfo &RegInfo = MF.getRegInfo(); - assert(RegInfo.isSSA()); - if (!YamlMF.IsSSA) - RegInfo.leaveSSA(); assert(RegInfo.tracksLiveness()); if (!YamlMF.TracksRegLiveness) RegInfo.invalidateLiveness(); - RegInfo.enableSubRegLiveness(YamlMF.TracksSubRegLiveness); SMDiagnostic Error; // Parse the virtual register information. for (const auto &VReg : YamlMF.VirtualRegisters) { - unsigned Reg; + VRegInfo &Info = PFS.getVRegInfo(VReg.ID.Value); + if (Info.Explicit) + return error(VReg.ID.SourceRange.Start, + Twine("redefinition of virtual register '%") + + Twine(VReg.ID.Value) + "'"); + Info.Explicit = true; + if (StringRef(VReg.Class.Value).equals("_")) { - // This is a generic virtual register. - // The size will be set appropriately when we reach the definition. - Reg = RegInfo.createGenericVirtualRegister(/*Size*/ 1); - PFS.GenericVRegs.insert(Reg); + Info.Kind = VRegInfo::GENERIC; } else { const auto *RC = getRegClass(MF, VReg.Class.Value); if (RC) { - Reg = RegInfo.createVirtualRegister(RC); + Info.Kind = VRegInfo::NORMAL; + Info.D.RC = RC; } else { - const auto *RegBank = getRegBank(MF, VReg.Class.Value); + const RegisterBank *RegBank = getRegBank(MF, VReg.Class.Value); if (!RegBank) return error( VReg.Class.SourceRange.Start, Twine("use of undefined register class or register bank '") + VReg.Class.Value + "'"); - Reg = RegInfo.createGenericVirtualRegister(/*Size*/ 1); - RegInfo.setRegBank(Reg, *RegBank); - PFS.GenericVRegs.insert(Reg); + Info.Kind = VRegInfo::REGBANK; + Info.D.RegBank = RegBank; } } - if (!PFS.VirtualRegisterSlots.insert(std::make_pair(VReg.ID.Value, Reg)) - .second) - return error(VReg.ID.SourceRange.Start, - Twine("redefinition of virtual register '%") + - Twine(VReg.ID.Value) + "'"); + if (!VReg.PreferredRegister.Value.empty()) { - unsigned PreferredReg = 0; - if (parseNamedRegisterReference(PFS, PreferredReg, - VReg.PreferredRegister.Value, Error)) + if (Info.Kind != VRegInfo::NORMAL) + return error(VReg.Class.SourceRange.Start, + Twine("preferred register can only be set for normal vregs")); + + if (parseRegisterReference(PFS, Info.PreferredReg, + VReg.PreferredRegister.Value, Error)) return error(Error, VReg.PreferredRegister.SourceRange); - RegInfo.setSimpleHint(Reg, PreferredReg); } } @@ -409,9 +453,11 @@ bool MIRParserImpl::initializeRegisterInfo(PerFunctionMIParsingState &PFS, return error(Error, LiveIn.Register.SourceRange); unsigned VReg = 0; if (!LiveIn.VirtualRegister.Value.empty()) { - if (parseVirtualRegisterReference(PFS, VReg, LiveIn.VirtualRegister.Value, + VRegInfo *Info; + if (parseVirtualRegisterReference(PFS, Info, LiveIn.VirtualRegister.Value, Error)) return error(Error, LiveIn.VirtualRegister.SourceRange); + VReg = Info->VReg; } RegInfo.addLiveIn(Reg, VReg); } @@ -430,26 +476,57 @@ bool MIRParserImpl::initializeRegisterInfo(PerFunctionMIParsingState &PFS, return false; } -void MIRParserImpl::inferRegisterInfo(const PerFunctionMIParsingState &PFS, +bool MIRParserImpl::setupRegisterInfo(const PerFunctionMIParsingState &PFS, const yaml::MachineFunction &YamlMF) { - if (YamlMF.CalleeSavedRegisters) - return; - MachineRegisterInfo &MRI = PFS.MF.getRegInfo(); - for (const MachineBasicBlock &MBB : PFS.MF) { - for (const MachineInstr &MI : MBB) { - for (const MachineOperand &MO : MI.operands()) { - if (!MO.isRegMask()) - continue; - MRI.addPhysRegsUsedFromRegMask(MO.getRegMask()); + MachineFunction &MF = PFS.MF; + MachineRegisterInfo &MRI = MF.getRegInfo(); + bool Error = false; + // Create VRegs + for (auto P : PFS.VRegInfos) { + const VRegInfo &Info = *P.second; + unsigned Reg = Info.VReg; + switch (Info.Kind) { + case VRegInfo::UNKNOWN: + error(Twine("Cannot determine class/bank of virtual register ") + + Twine(P.first) + " in function '" + MF.getName() + "'"); + Error = true; + break; + case VRegInfo::NORMAL: + MRI.setRegClass(Reg, Info.D.RC); + if (Info.PreferredReg != 0) + MRI.setSimpleHint(Reg, Info.PreferredReg); + break; + case VRegInfo::GENERIC: + break; + case VRegInfo::REGBANK: + MRI.setRegBank(Reg, *Info.D.RegBank); + break; + } + } + + // Compute MachineRegisterInfo::UsedPhysRegMask + if (!YamlMF.CalleeSavedRegisters) { + for (const MachineBasicBlock &MBB : MF) { + for (const MachineInstr &MI : MBB) { + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isRegMask()) + continue; + MRI.addPhysRegsUsedFromRegMask(MO.getRegMask()); + } } } } + + // FIXME: This is a temporary workaround until the reserved registers can be + // serialized. + MRI.freezeReservedRegs(MF); + return Error; } bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS, const yaml::MachineFunction &YamlMF) { MachineFunction &MF = PFS.MF; - MachineFrameInfo &MFI = *MF.getFrameInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); const Function &F = *MF.getFunction(); const yaml::MachineFrameInfo &YamlMFI = YamlMF.FrameInfo; MFI.setFrameAddressIsTaken(YamlMFI.IsFrameAddressTaken); @@ -507,7 +584,7 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS, const yaml::StringValue &Name = Object.Name; if (!Name.Value.empty()) { Alloca = dyn_cast_or_null<AllocaInst>( - F.getValueSymbolTable().lookup(Name.Value)); + F.getValueSymbolTable()->lookup(Name.Value)); if (!Alloca) return error(Name.SourceRange.Start, "alloca instruction named '" + Name.Value + @@ -597,11 +674,11 @@ bool MIRParserImpl::parseStackObjectsDebugInfo(PerFunctionMIParsingState &PFS, typecheckMDNode(DIExpr, Expr, Object.DebugExpr, "DIExpression", *this) || typecheckMDNode(DILoc, Loc, Object.DebugLoc, "DILocation", *this)) return true; - PFS.MF.getMMI().setVariableDbgInfo(DIVar, DIExpr, unsigned(FrameIdx), DILoc); + PFS.MF.setVariableDbgInfo(DIVar, DIExpr, unsigned(FrameIdx), DILoc); return false; } -bool MIRParserImpl::parseMDNode(const PerFunctionMIParsingState &PFS, +bool MIRParserImpl::parseMDNode(PerFunctionMIParsingState &PFS, MDNode *&Node, const yaml::StringValue &Source) { if (Source.Value.empty()) return false; @@ -657,7 +734,7 @@ bool MIRParserImpl::initializeJumpTableInfo(PerFunctionMIParsingState &PFS, return false; } -bool MIRParserImpl::parseMBBReference(const PerFunctionMIParsingState &PFS, +bool MIRParserImpl::parseMBBReference(PerFunctionMIParsingState &PFS, MachineBasicBlock *&MBB, const yaml::StringValue &Source) { SMDiagnostic Error; @@ -784,6 +861,14 @@ std::unique_ptr<MIRParser> llvm::createMIRParser(std::unique_ptr<MemoryBuffer> Contents, LLVMContext &Context) { auto Filename = Contents->getBufferIdentifier(); + if (Context.shouldDiscardValueNames()) { + Context.diagnose(DiagnosticInfoMIRParser( + DS_Error, + SMDiagnostic( + Filename, SourceMgr::DK_Error, + "Can't read MIR with a Context that discards named Values"))); + return nullptr; + } return llvm::make_unique<MIRParser>( llvm::make_unique<MIRParserImpl>(std::move(Contents), Filename, Context)); } diff --git a/contrib/llvm/lib/CodeGen/MIRPrinter.cpp b/contrib/llvm/lib/CodeGen/MIRPrinter.cpp index 703c99d..db87092 100644 --- a/contrib/llvm/lib/CodeGen/MIRPrinter.cpp +++ b/contrib/llvm/lib/CodeGen/MIRPrinter.cpp @@ -14,6 +14,7 @@ #include "MIRPrinter.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallBitVector.h" #include "llvm/CodeGen/GlobalISel/RegisterBank.h" #include "llvm/CodeGen/MIRYamlMapping.h" #include "llvm/CodeGen/MachineConstantPool.h" @@ -27,13 +28,16 @@ #include "llvm/IR/DebugInfo.h" #include "llvm/IR/IRPrintingPasses.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" #include "llvm/IR/ModuleSlotTracker.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/Support/Format.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/YAMLTraits.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; @@ -86,10 +90,8 @@ public: const MachineConstantPool &ConstantPool); void convert(ModuleSlotTracker &MST, yaml::MachineJumpTable &YamlJTI, const MachineJumpTableInfo &JTI); - void convertStackObjects(yaml::MachineFunction &MF, - const MachineFrameInfo &MFI, MachineModuleInfo &MMI, - ModuleSlotTracker &MST, - const TargetRegisterInfo *TRI); + void convertStackObjects(yaml::MachineFunction &YMF, + const MachineFunction &MF, ModuleSlotTracker &MST); private: void initRegisterMaskIds(const MachineFunction &MF); @@ -121,7 +123,7 @@ public: void printTargetFlags(const MachineOperand &Op); void print(const MachineOperand &Op, const TargetRegisterInfo *TRI, unsigned I, bool ShouldPrintRegisterTies, - const MachineRegisterInfo *MRI = nullptr, bool IsDef = false); + LLT TypeToPrint, bool IsDef = false); void print(const MachineMemOperand &Op); void print(const MCCFIInstruction &CFI, const TargetRegisterInfo *TRI); @@ -172,16 +174,19 @@ void MIRPrinter::print(const MachineFunction &MF) { YamlMF.Name = MF.getName(); YamlMF.Alignment = MF.getAlignment(); YamlMF.ExposesReturnsTwice = MF.exposesReturnsTwice(); - YamlMF.HasInlineAsm = MF.hasInlineAsm(); - YamlMF.AllVRegsAllocated = MF.getProperties().hasProperty( - MachineFunctionProperties::Property::AllVRegsAllocated); + + YamlMF.Legalized = MF.getProperties().hasProperty( + MachineFunctionProperties::Property::Legalized); + YamlMF.RegBankSelected = MF.getProperties().hasProperty( + MachineFunctionProperties::Property::RegBankSelected); + YamlMF.Selected = MF.getProperties().hasProperty( + MachineFunctionProperties::Property::Selected); convert(YamlMF, MF.getRegInfo(), MF.getSubtarget().getRegisterInfo()); ModuleSlotTracker MST(MF.getFunction()->getParent()); MST.incorporateFunction(*MF.getFunction()); - convert(MST, YamlMF.FrameInfo, *MF.getFrameInfo()); - convertStackObjects(YamlMF, *MF.getFrameInfo(), MF.getMMI(), MST, - MF.getSubtarget().getRegisterInfo()); + convert(MST, YamlMF.FrameInfo, MF.getFrameInfo()); + convertStackObjects(YamlMF, MF, MST); if (const auto *ConstantPool = MF.getConstantPool()) convert(YamlMF, *ConstantPool); if (const auto *JumpTableInfo = MF.getJumpTableInfo()) @@ -203,9 +208,7 @@ void MIRPrinter::print(const MachineFunction &MF) { void MIRPrinter::convert(yaml::MachineFunction &MF, const MachineRegisterInfo &RegInfo, const TargetRegisterInfo *TRI) { - MF.IsSSA = RegInfo.isSSA(); MF.TracksRegLiveness = RegInfo.tracksLiveness(); - MF.TracksSubRegLiveness = RegInfo.subRegLivenessEnabled(); // Print the virtual register definitions. for (unsigned I = 0, E = RegInfo.getNumVirtRegs(); I < E; ++I) { @@ -219,7 +222,8 @@ void MIRPrinter::convert(yaml::MachineFunction &MF, VReg.Class = StringRef(RegInfo.getRegBankOrNull(Reg)->getName()).lower(); else { VReg.Class = std::string("_"); - assert(RegInfo.getSize(Reg) && "Generic registers must have a size"); + assert((RegInfo.def_empty(Reg) || RegInfo.getType(Reg).isValid()) && + "Generic registers must have a valid type"); } unsigned PreferredReg = RegInfo.getSimpleHint(Reg); if (PreferredReg) @@ -279,11 +283,11 @@ void MIRPrinter::convert(ModuleSlotTracker &MST, } } -void MIRPrinter::convertStackObjects(yaml::MachineFunction &MF, - const MachineFrameInfo &MFI, - MachineModuleInfo &MMI, - ModuleSlotTracker &MST, - const TargetRegisterInfo *TRI) { +void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF, + const MachineFunction &MF, + ModuleSlotTracker &MST) { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); // Process fixed stack objects. unsigned ID = 0; for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) { @@ -300,7 +304,7 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &MF, YamlObject.Alignment = MFI.getObjectAlignment(I); YamlObject.IsImmutable = MFI.isImmutableObjectIndex(I); YamlObject.IsAliased = MFI.isAliasedObjectIndex(I); - MF.FixedStackObjects.push_back(YamlObject); + YMF.FixedStackObjects.push_back(YamlObject); StackObjectOperandMapping.insert( std::make_pair(I, FrameIndexOperand::createFixed(ID++))); } @@ -325,7 +329,7 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &MF, YamlObject.Size = MFI.getObjectSize(I); YamlObject.Alignment = MFI.getObjectAlignment(I); - MF.StackObjects.push_back(YamlObject); + YMF.StackObjects.push_back(YamlObject); StackObjectOperandMapping.insert(std::make_pair( I, FrameIndexOperand::create(YamlObject.Name.Value, ID++))); } @@ -338,9 +342,9 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &MF, "Invalid stack object index"); const FrameIndexOperand &StackObject = StackObjectInfo->second; if (StackObject.IsFixed) - MF.FixedStackObjects[StackObject.ID].CalleeSavedRegister = Reg; + YMF.FixedStackObjects[StackObject.ID].CalleeSavedRegister = Reg; else - MF.StackObjects[StackObject.ID].CalleeSavedRegister = Reg; + YMF.StackObjects[StackObject.ID].CalleeSavedRegister = Reg; } for (unsigned I = 0, E = MFI.getLocalFrameObjectCount(); I < E; ++I) { auto LocalObject = MFI.getLocalFrameObjectMap(I); @@ -349,26 +353,26 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &MF, "Invalid stack object index"); const FrameIndexOperand &StackObject = StackObjectInfo->second; assert(!StackObject.IsFixed && "Expected a locally mapped stack object"); - MF.StackObjects[StackObject.ID].LocalOffset = LocalObject.second; + YMF.StackObjects[StackObject.ID].LocalOffset = LocalObject.second; } // Print the stack object references in the frame information class after // converting the stack objects. if (MFI.hasStackProtectorIndex()) { - raw_string_ostream StrOS(MF.FrameInfo.StackProtector.Value); + raw_string_ostream StrOS(YMF.FrameInfo.StackProtector.Value); MIPrinter(StrOS, MST, RegisterMaskIds, StackObjectOperandMapping) .printStackObjectReference(MFI.getStackProtectorIndex()); } // Print the debug variable information. - for (MachineModuleInfo::VariableDbgInfo &DebugVar : - MMI.getVariableDbgInfo()) { + for (const MachineFunction::VariableDbgInfo &DebugVar : + MF.getVariableDbgInfo()) { auto StackObjectInfo = StackObjectOperandMapping.find(DebugVar.Slot); assert(StackObjectInfo != StackObjectOperandMapping.end() && "Invalid stack object index"); const FrameIndexOperand &StackObject = StackObjectInfo->second; assert(!StackObject.IsFixed && "Expected a non-fixed stack object"); - auto &Object = MF.StackObjects[StackObject.ID]; + auto &Object = YMF.StackObjects[StackObject.ID]; { raw_string_ostream StrOS(Object.DebugVar.Value); DebugVar.Var->printAsOperand(StrOS, MST); @@ -475,25 +479,27 @@ void MIPrinter::print(const MachineBasicBlock &MBB) { OS << ", "; printMBBReference(**I); if (MBB.hasSuccessorProbabilities()) - OS << '(' << MBB.getSuccProbability(I) << ')'; + OS << '(' + << format("0x%08" PRIx32, MBB.getSuccProbability(I).getNumerator()) + << ')'; } OS << "\n"; HasLineAttributes = true; } // Print the live in registers. - const auto *TRI = MBB.getParent()->getSubtarget().getRegisterInfo(); - assert(TRI && "Expected target register info"); - if (!MBB.livein_empty()) { + const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + if (MRI.tracksLiveness() && !MBB.livein_empty()) { + const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); OS.indent(2) << "liveins: "; bool First = true; for (const auto &LI : MBB.liveins()) { if (!First) OS << ", "; First = false; - printReg(LI.PhysReg, OS, TRI); - if (LI.LaneMask != ~0u) - OS << ':' << PrintLaneMask(LI.LaneMask); + printReg(LI.PhysReg, OS, &TRI); + if (!LI.LaneMask.all()) + OS << ":0x" << PrintLaneMask(LI.LaneMask); } OS << "\n"; HasLineAttributes = true; @@ -537,6 +543,27 @@ static bool hasComplexRegisterTies(const MachineInstr &MI) { return false; } +static LLT getTypeToPrint(const MachineInstr &MI, unsigned OpIdx, + SmallBitVector &PrintedTypes, + const MachineRegisterInfo &MRI) { + const MachineOperand &Op = MI.getOperand(OpIdx); + if (!Op.isReg()) + return LLT{}; + + if (MI.isVariadic() || OpIdx >= MI.getNumExplicitOperands()) + return MRI.getType(Op.getReg()); + + auto &OpInfo = MI.getDesc().OpInfo[OpIdx]; + if (!OpInfo.isGenericType()) + return MRI.getType(Op.getReg()); + + if (PrintedTypes[OpInfo.getGenericTypeIndex()]) + return LLT{}; + + PrintedTypes.set(OpInfo.getGenericTypeIndex()); + return MRI.getType(Op.getReg()); +} + void MIPrinter::print(const MachineInstr &MI) { const auto *MF = MI.getParent()->getParent(); const auto &MRI = MF->getRegInfo(); @@ -548,6 +575,7 @@ void MIPrinter::print(const MachineInstr &MI) { if (MI.isCFIInstruction()) assert(MI.getNumOperands() == 1 && "Expected 1 operand in CFI instruction"); + SmallBitVector PrintedTypes(8); bool ShouldPrintRegisterTies = hasComplexRegisterTies(MI); unsigned I = 0, E = MI.getNumOperands(); for (; I < E && MI.getOperand(I).isReg() && MI.getOperand(I).isDef() && @@ -555,7 +583,8 @@ void MIPrinter::print(const MachineInstr &MI) { ++I) { if (I) OS << ", "; - print(MI.getOperand(I), TRI, I, ShouldPrintRegisterTies, &MRI, + print(MI.getOperand(I), TRI, I, ShouldPrintRegisterTies, + getTypeToPrint(MI, I, PrintedTypes, MRI), /*IsDef=*/true); } @@ -564,11 +593,6 @@ void MIPrinter::print(const MachineInstr &MI) { if (MI.getFlag(MachineInstr::FrameSetup)) OS << "frame-setup "; OS << TII->getName(MI.getOpcode()); - if (isPreISelGenericOpcode(MI.getOpcode())) { - assert(MI.getType() && "Generic instructions must have a type"); - OS << ' '; - MI.getType()->print(OS, /*IsForDebug*/ false, /*NoDetails*/ true); - } if (I < E) OS << ' '; @@ -576,7 +600,8 @@ void MIPrinter::print(const MachineInstr &MI) { for (; I < E; ++I) { if (NeedComma) OS << ", "; - print(MI.getOperand(I), TRI, I, ShouldPrintRegisterTies); + print(MI.getOperand(I), TRI, I, ShouldPrintRegisterTies, + getTypeToPrint(MI, I, PrintedTypes, MRI)); NeedComma = true; } @@ -748,8 +773,8 @@ static const char *getTargetIndexName(const MachineFunction &MF, int Index) { } void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI, - unsigned I, bool ShouldPrintRegisterTies, - const MachineRegisterInfo *MRI, bool IsDef) { + unsigned I, bool ShouldPrintRegisterTies, LLT TypeToPrint, + bool IsDef) { printTargetFlags(Op); switch (Op.getType()) { case MachineOperand::MO_Register: @@ -773,12 +798,11 @@ void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI, printReg(Op.getReg(), OS, TRI); // Print the sub register. if (Op.getSubReg() != 0) - OS << ':' << TRI->getSubRegIndexName(Op.getSubReg()); + OS << '.' << TRI->getSubRegIndexName(Op.getSubReg()); if (ShouldPrintRegisterTies && Op.isTied() && !Op.isDef()) OS << "(tied-def " << Op.getParent()->findTiedOperandIdx(I) << ")"; - assert((!IsDef || MRI) && "for IsDef, MRI must be provided"); - if (IsDef && MRI->getSize(Op.getReg())) - OS << '(' << MRI->getSize(Op.getReg()) << ')'; + if (TypeToPrint.isValid()) + OS << '(' << TypeToPrint << ')'; break; case MachineOperand::MO_Immediate: OS << Op.getImm(); @@ -861,8 +885,25 @@ void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI, OS << "<mcsymbol " << *Op.getMCSymbol() << ">"; break; case MachineOperand::MO_CFIIndex: { - const auto &MMI = Op.getParent()->getParent()->getParent()->getMMI(); - print(MMI.getFrameInstructions()[Op.getCFIIndex()], TRI); + const MachineFunction &MF = *Op.getParent()->getParent()->getParent(); + print(MF.getFrameInstructions()[Op.getCFIIndex()], TRI); + break; + } + case MachineOperand::MO_IntrinsicID: { + Intrinsic::ID ID = Op.getIntrinsicID(); + if (ID < Intrinsic::num_intrinsics) + OS << "intrinsic(@" << Intrinsic::getName(ID, None) << ')'; + else { + const MachineFunction &MF = *Op.getParent()->getParent()->getParent(); + const TargetIntrinsicInfo *TII = MF.getTarget().getIntrinsicInfo(); + OS << "intrinsic(@" << TII->getName(ID) << ')'; + } + break; + } + case MachineOperand::MO_Predicate: { + auto Pred = static_cast<CmpInst::Predicate>(Op.getPredicate()); + OS << (CmpInst::isIntPredicate(Pred) ? "int" : "float") << "pred(" + << CmpInst::getPredicateName(Pred) << ')'; break; } } @@ -875,6 +916,8 @@ void MIPrinter::print(const MachineMemOperand &Op) { OS << "volatile "; if (Op.isNonTemporal()) OS << "non-temporal "; + if (Op.isDereferenceable()) + OS << "dereferenceable "; if (Op.isInvariant()) OS << "invariant "; if (Op.isLoad()) @@ -917,6 +960,9 @@ void MIPrinter::print(const MachineMemOperand &Op) { printLLVMNameWithoutPrefix( OS, cast<ExternalSymbolPseudoSourceValue>(PVal)->getSymbol()); break; + case PseudoSourceValue::TargetCustom: + llvm_unreachable("TargetCustom pseudo source values are not supported"); + break; } } printOffset(Op.getOffset()); @@ -956,32 +1002,32 @@ void MIPrinter::print(const MCCFIInstruction &CFI, const TargetRegisterInfo *TRI) { switch (CFI.getOperation()) { case MCCFIInstruction::OpSameValue: - OS << ".cfi_same_value "; + OS << "same_value "; if (CFI.getLabel()) OS << "<mcsymbol> "; printCFIRegister(CFI.getRegister(), OS, TRI); break; case MCCFIInstruction::OpOffset: - OS << ".cfi_offset "; + OS << "offset "; if (CFI.getLabel()) OS << "<mcsymbol> "; printCFIRegister(CFI.getRegister(), OS, TRI); OS << ", " << CFI.getOffset(); break; case MCCFIInstruction::OpDefCfaRegister: - OS << ".cfi_def_cfa_register "; + OS << "def_cfa_register "; if (CFI.getLabel()) OS << "<mcsymbol> "; printCFIRegister(CFI.getRegister(), OS, TRI); break; case MCCFIInstruction::OpDefCfaOffset: - OS << ".cfi_def_cfa_offset "; + OS << "def_cfa_offset "; if (CFI.getLabel()) OS << "<mcsymbol> "; OS << CFI.getOffset(); break; case MCCFIInstruction::OpDefCfa: - OS << ".cfi_def_cfa "; + OS << "def_cfa "; if (CFI.getLabel()) OS << "<mcsymbol> "; printCFIRegister(CFI.getRegister(), OS, TRI); diff --git a/contrib/llvm/lib/CodeGen/MIRPrintingPass.cpp b/contrib/llvm/lib/CodeGen/MIRPrintingPass.cpp index 8e7566a..c690bcf 100644 --- a/contrib/llvm/lib/CodeGen/MIRPrintingPass.cpp +++ b/contrib/llvm/lib/CodeGen/MIRPrintingPass.cpp @@ -33,7 +33,7 @@ struct MIRPrintingPass : public MachineFunctionPass { MIRPrintingPass() : MachineFunctionPass(ID), OS(dbgs()) {} MIRPrintingPass(raw_ostream &OS) : MachineFunctionPass(ID), OS(OS) {} - const char *getPassName() const override { return "MIR Printing Pass"; } + StringRef getPassName() const override { return "MIR Printing Pass"; } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); diff --git a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp index 689dd07..3869f97 100644 --- a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -51,7 +51,7 @@ MCSymbol *MachineBasicBlock::getSymbol() const { if (!CachedMCSymbol) { const MachineFunction *MF = getParent(); MCContext &Ctx = MF->getContext(); - const char *Prefix = Ctx.getAsmInfo()->getPrivateLabelPrefix(); + auto Prefix = Ctx.getAsmInfo()->getPrivateLabelPrefix(); assert(getNumber() >= 0 && "cannot get label for unreachable MBB"); CachedMCSymbol = Ctx.getOrCreateSymbol(Twine(Prefix) + "BB" + Twine(MF->getFunctionNumber()) + @@ -74,7 +74,8 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineBasicBlock &MBB) { /// MBBs start out as #-1. When a MBB is added to a MachineFunction, it /// gets the next available unique MBB number. If it is removed from a /// MachineFunction, it goes back to being #-1. -void ilist_traits<MachineBasicBlock>::addNodeToList(MachineBasicBlock *N) { +void ilist_callback_traits<MachineBasicBlock>::addNodeToList( + MachineBasicBlock *N) { MachineFunction &MF = *N->getParent(); N->Number = MF.addToMBBNumbering(N); @@ -85,7 +86,8 @@ void ilist_traits<MachineBasicBlock>::addNodeToList(MachineBasicBlock *N) { I->AddRegOperandsToUseLists(RegInfo); } -void ilist_traits<MachineBasicBlock>::removeNodeFromList(MachineBasicBlock *N) { +void ilist_callback_traits<MachineBasicBlock>::removeNodeFromList( + MachineBasicBlock *N) { N->getParent()->removeFromMBBNumbering(N->Number); N->Number = -1; } @@ -116,15 +118,13 @@ void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr *N) { /// When moving a range of instructions from one MBB list to another, we need to /// update the parent pointers and the use/def lists. -void ilist_traits<MachineInstr>:: -transferNodesFromList(ilist_traits<MachineInstr> &FromList, - ilist_iterator<MachineInstr> First, - ilist_iterator<MachineInstr> Last) { +void ilist_traits<MachineInstr>::transferNodesFromList(ilist_traits &FromList, + instr_iterator First, + instr_iterator Last) { assert(Parent->getParent() == FromList.Parent->getParent() && "MachineInstr parent mismatch!"); - - // Splice within the same MBB -> no change. - if (Parent == FromList.Parent) return; + assert(this != &FromList && "Called without a real transfer..."); + assert(Parent != FromList.Parent && "Two lists have the same parent?"); // If splicing between two blocks within the same function, just update the // parent pointers. @@ -132,7 +132,7 @@ transferNodesFromList(ilist_traits<MachineInstr> &FromList, First->setParent(Parent); } -void ilist_traits<MachineInstr>::deleteNode(MachineInstr* MI) { +void ilist_traits<MachineInstr>::deleteNode(MachineInstr *MI) { assert(!MI->getParent() && "MI is still in a block!"); Parent->getParent()->DeleteMachineInstr(MI); } @@ -149,12 +149,25 @@ MachineBasicBlock::iterator MachineBasicBlock::getFirstNonPHI() { MachineBasicBlock::iterator MachineBasicBlock::SkipPHIsAndLabels(MachineBasicBlock::iterator I) { iterator E = end(); + while (I != E && (I->isPHI() || I->isPosition())) + ++I; + // FIXME: This needs to change if we wish to bundle labels + // inside the bundle. + assert((I == E || !I->isInsideBundle()) && + "First non-phi / non-label instruction is inside a bundle!"); + return I; +} + +MachineBasicBlock::iterator +MachineBasicBlock::SkipPHIsLabelsAndDebug(MachineBasicBlock::iterator I) { + iterator E = end(); while (I != E && (I->isPHI() || I->isPosition() || I->isDebugValue())) ++I; // FIXME: This needs to change if we wish to bundle labels / dbg_values // inside the bundle. assert((I == E || !I->isInsideBundle()) && - "First non-phi / non-label instruction is inside a bundle!"); + "First non-phi / non-label / non-debug " + "instruction is inside a bundle!"); return I; } @@ -178,10 +191,7 @@ MachineBasicBlock::instr_iterator MachineBasicBlock::getFirstInstrTerminator() { MachineBasicBlock::iterator MachineBasicBlock::getFirstNonDebugInstr() { // Skip over begin-of-block dbg_value instructions. - iterator I = begin(), E = end(); - while (I != E && I->isDebugValue()) - ++I; - return I; + return skipDebugInstructionsForward(begin(), end()); } MachineBasicBlock::iterator MachineBasicBlock::getLastNonDebugInstr() { @@ -276,9 +286,9 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST, if (!livein_empty()) { if (Indexes) OS << '\t'; OS << " Live Ins:"; - for (const auto &LI : make_range(livein_begin(), livein_end())) { + for (const auto &LI : LiveIns) { OS << ' ' << PrintReg(LI.PhysReg, TRI); - if (LI.LaneMask != ~0u) + if (!LI.LaneMask.all()) OS << ':' << PrintLaneMask(LI.LaneMask); } OS << '\n'; @@ -323,22 +333,20 @@ void MachineBasicBlock::printAsOperand(raw_ostream &OS, } void MachineBasicBlock::removeLiveIn(MCPhysReg Reg, LaneBitmask LaneMask) { - LiveInVector::iterator I = std::find_if( - LiveIns.begin(), LiveIns.end(), - [Reg] (const RegisterMaskPair &LI) { return LI.PhysReg == Reg; }); + LiveInVector::iterator I = find_if( + LiveIns, [Reg](const RegisterMaskPair &LI) { return LI.PhysReg == Reg; }); if (I == LiveIns.end()) return; I->LaneMask &= ~LaneMask; - if (I->LaneMask == 0) + if (I->LaneMask.none()) LiveIns.erase(I); } bool MachineBasicBlock::isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask) const { - livein_iterator I = std::find_if( - LiveIns.begin(), LiveIns.end(), - [Reg] (const RegisterMaskPair &LI) { return LI.PhysReg == Reg; }); - return I != livein_end() && (I->LaneMask & LaneMask) != 0; + livein_iterator I = find_if( + LiveIns, [Reg](const RegisterMaskPair &LI) { return LI.PhysReg == Reg; }); + return I != livein_end() && (I->LaneMask & LaneMask).any(); } void MachineBasicBlock::sortUniqueLiveIns() { @@ -418,7 +426,7 @@ void MachineBasicBlock::updateTerminator() { // The block has an unconditional branch. If its successor is now its // layout successor, delete the branch. if (isLayoutSuccessor(TBB)) - TII->RemoveBranch(*this); + TII->removeBranch(*this); } else { // The block has an unconditional fallthrough. If its successor is not its // layout successor, insert a branch. First we have to locate the only @@ -438,7 +446,7 @@ void MachineBasicBlock::updateTerminator() { // Finally update the unconditional successor to be reached via a branch // if it would not be reached by fallthrough. if (!isLayoutSuccessor(TBB)) - TII->InsertBranch(*this, TBB, nullptr, Cond, DL); + TII->insertBranch(*this, TBB, nullptr, Cond, DL); } return; } @@ -448,13 +456,13 @@ void MachineBasicBlock::updateTerminator() { // successors is its layout successor, rewrite it to a fallthrough // conditional branch. if (isLayoutSuccessor(TBB)) { - if (TII->ReverseBranchCondition(Cond)) + if (TII->reverseBranchCondition(Cond)) return; - TII->RemoveBranch(*this); - TII->InsertBranch(*this, FBB, nullptr, Cond, DL); + TII->removeBranch(*this); + TII->insertBranch(*this, FBB, nullptr, Cond, DL); } else if (isLayoutSuccessor(FBB)) { - TII->RemoveBranch(*this); - TII->InsertBranch(*this, TBB, nullptr, Cond, DL); + TII->removeBranch(*this); + TII->insertBranch(*this, TBB, nullptr, Cond, DL); } return; } @@ -476,37 +484,37 @@ void MachineBasicBlock::updateTerminator() { // Remove the conditional jump, leaving unconditional fallthrough. // FIXME: This does not seem like a reasonable pattern to support, but it // has been seen in the wild coming out of degenerate ARM test cases. - TII->RemoveBranch(*this); + TII->removeBranch(*this); // Finally update the unconditional successor to be reached via a branch if // it would not be reached by fallthrough. if (!isLayoutSuccessor(TBB)) - TII->InsertBranch(*this, TBB, nullptr, Cond, DL); + TII->insertBranch(*this, TBB, nullptr, Cond, DL); return; } // We enter here iff exactly one successor is TBB which cannot fallthrough // and the rest successors if any are EHPads. In this case, we need to // change the conditional branch into unconditional branch. - TII->RemoveBranch(*this); + TII->removeBranch(*this); Cond.clear(); - TII->InsertBranch(*this, TBB, nullptr, Cond, DL); + TII->insertBranch(*this, TBB, nullptr, Cond, DL); return; } // The block has a fallthrough conditional branch. if (isLayoutSuccessor(TBB)) { - if (TII->ReverseBranchCondition(Cond)) { + if (TII->reverseBranchCondition(Cond)) { // We can't reverse the condition, add an unconditional branch. Cond.clear(); - TII->InsertBranch(*this, FallthroughBB, nullptr, Cond, DL); + TII->insertBranch(*this, FallthroughBB, nullptr, Cond, DL); return; } - TII->RemoveBranch(*this); - TII->InsertBranch(*this, FallthroughBB, nullptr, Cond, DL); + TII->removeBranch(*this); + TII->insertBranch(*this, FallthroughBB, nullptr, Cond, DL); } else if (!isLayoutSuccessor(FallthroughBB)) { - TII->RemoveBranch(*this); - TII->InsertBranch(*this, TBB, FallthroughBB, Cond, DL); + TII->removeBranch(*this); + TII->insertBranch(*this, TBB, FallthroughBB, Cond, DL); } } @@ -545,7 +553,7 @@ void MachineBasicBlock::addSuccessorWithoutProb(MachineBasicBlock *Succ) { void MachineBasicBlock::removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs) { - succ_iterator I = std::find(Successors.begin(), Successors.end(), Succ); + succ_iterator I = find(Successors, Succ); removeSuccessor(I, NormalizeSuccProbs); } @@ -611,7 +619,7 @@ void MachineBasicBlock::addPredecessor(MachineBasicBlock *Pred) { } void MachineBasicBlock::removePredecessor(MachineBasicBlock *Pred) { - pred_iterator I = std::find(Predecessors.begin(), Predecessors.end(), Pred); + pred_iterator I = find(Predecessors, Pred); assert(I != Predecessors.end() && "Pred is not a predecessor of this block!"); Predecessors.erase(I); } @@ -661,11 +669,11 @@ MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB) { } bool MachineBasicBlock::isPredecessor(const MachineBasicBlock *MBB) const { - return std::find(pred_begin(), pred_end(), MBB) != pred_end(); + return is_contained(predecessors(), MBB); } bool MachineBasicBlock::isSuccessor(const MachineBasicBlock *MBB) const { - return std::find(succ_begin(), succ_end(), MBB) != succ_end(); + return is_contained(successors(), MBB); } bool MachineBasicBlock::isLayoutSuccessor(const MachineBasicBlock *MBB) const { @@ -775,7 +783,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, continue; unsigned Reg = OI->getReg(); - if (std::find(UsedRegs.begin(), UsedRegs.end(), Reg) == UsedRegs.end()) + if (!is_contained(UsedRegs, Reg)) UsedRegs.push_back(Reg); } } @@ -802,9 +810,8 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, for (SmallVectorImpl<MachineInstr*>::iterator I = Terminators.begin(), E = Terminators.end(); I != E; ++I) { - if (std::find(NewTerminators.begin(), NewTerminators.end(), *I) == - NewTerminators.end()) - Indexes->removeMachineInstrFromMaps(**I); + if (!is_contained(NewTerminators, *I)) + Indexes->removeMachineInstrFromMaps(**I); } } @@ -813,7 +820,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, if (!NMBB->isLayoutSuccessor(Succ)) { SmallVector<MachineOperand, 4> Cond; const TargetInstrInfo *TII = getParent()->getSubtarget().getInstrInfo(); - TII->InsertBranch(*NMBB, Succ, nullptr, Cond, DL); + TII->insertBranch(*NMBB, Succ, nullptr, Cond, DL); if (Indexes) { for (MachineInstr &MI : NMBB->instrs()) { @@ -1090,16 +1097,16 @@ bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA, bool Changed = false; - MachineFunction::iterator FallThru = std::next(getIterator()); + MachineBasicBlock *FallThru = getNextNode(); if (!DestA && !DestB) { // Block falls through to successor. - DestA = &*FallThru; - DestB = &*FallThru; + DestA = FallThru; + DestB = FallThru; } else if (DestA && !DestB) { if (IsCond) // Block ends in conditional jump that falls through to successor. - DestB = &*FallThru; + DestB = FallThru; } else { assert(DestA && DestB && IsCond && "CFG in a bad state. Cannot correct CFG edges"); @@ -1130,17 +1137,11 @@ bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA, /// instructions. Return UnknownLoc if there is none. DebugLoc MachineBasicBlock::findDebugLoc(instr_iterator MBBI) { - DebugLoc DL; - instr_iterator E = instr_end(); - if (MBBI == E) - return DL; - // Skip debug declarations, we don't want a DebugLoc from them. - while (MBBI != E && MBBI->isDebugValue()) - MBBI++; - if (MBBI != E) - DL = MBBI->getDebugLoc(); - return DL; + MBBI = skipDebugInstructionsForward(MBBI, instr_end()); + if (MBBI != instr_end()) + return MBBI->getDebugLoc(); + return {}; } /// Return probability of the edge from this block to MBB. @@ -1287,3 +1288,14 @@ MachineBasicBlock::getEndClobberMask(const TargetRegisterInfo *TRI) const { // care what kind of return it is, putting a mask after it is a no-op. return isReturnBlock() && !succ_empty() ? TRI->getNoPreservedMask() : nullptr; } + +void MachineBasicBlock::clearLiveIns() { + LiveIns.clear(); +} + +MachineBasicBlock::livein_iterator MachineBasicBlock::livein_begin() const { + assert(getParent()->getProperties().hasProperty( + MachineFunctionProperties::Property::TracksLiveness) && + "Liveness information is accurate"); + return LiveIns.begin(); +} diff --git a/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp index 6c0f99f..7d5124d 100644 --- a/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp +++ b/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp @@ -42,9 +42,7 @@ static cl::opt<GVDAGType> ViewMachineBlockFreqPropagationDAG( "display a graph using the raw " "integer fractional block frequency representation."), clEnumValN(GVDT_Count, "count", "display a graph using the real " - "profile count if available."), - - clEnumValEnd)); + "profile count if available."))); extern cl::opt<std::string> ViewBlockFreqFuncName; extern cl::opt<unsigned> ViewHotFreqPercent; @@ -52,29 +50,26 @@ extern cl::opt<unsigned> ViewHotFreqPercent; namespace llvm { template <> struct GraphTraits<MachineBlockFrequencyInfo *> { - typedef const MachineBasicBlock NodeType; + typedef const MachineBasicBlock *NodeRef; typedef MachineBasicBlock::const_succ_iterator ChildIteratorType; - typedef MachineFunction::const_iterator nodes_iterator; + typedef pointer_iterator<MachineFunction::const_iterator> nodes_iterator; - static inline const NodeType * - getEntryNode(const MachineBlockFrequencyInfo *G) { + static NodeRef getEntryNode(const MachineBlockFrequencyInfo *G) { return &G->getFunction()->front(); } - static ChildIteratorType child_begin(const NodeType *N) { + static ChildIteratorType child_begin(const NodeRef N) { return N->succ_begin(); } - static ChildIteratorType child_end(const NodeType *N) { - return N->succ_end(); - } + static ChildIteratorType child_end(const NodeRef N) { return N->succ_end(); } static nodes_iterator nodes_begin(const MachineBlockFrequencyInfo *G) { - return G->getFunction()->begin(); + return nodes_iterator(G->getFunction()->begin()); } static nodes_iterator nodes_end(const MachineBlockFrequencyInfo *G) { - return G->getFunction()->end(); + return nodes_iterator(G->getFunction()->end()); } }; @@ -175,6 +170,12 @@ Optional<uint64_t> MachineBlockFrequencyInfo::getBlockProfileCount( return MBFI ? MBFI->getBlockProfileCount(*F, MBB) : None; } +Optional<uint64_t> +MachineBlockFrequencyInfo::getProfileCountFromFreq(uint64_t Freq) const { + const Function *F = MBFI->getFunction()->getFunction(); + return MBFI ? MBFI->getProfileCountFromFreq(*F, Freq) : None; +} + const MachineFunction *MachineBlockFrequencyInfo::getFunction() const { return MBFI ? MBFI->getFunction() : nullptr; } diff --git a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp index 03dda8b..40e3840 100644 --- a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -40,6 +40,7 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/TailDuplicator.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -78,10 +79,14 @@ static cl::opt<unsigned> ExitBlockBias( "over the original exit to be considered the new exit."), cl::init(0), cl::Hidden); +// Definition: +// - Outlining: placement of a basic block outside the chain or hot path. + static cl::opt<bool> OutlineOptionalBranches( "outline-optional-branches", - cl::desc("Put completely optional branches, i.e. branches with a common " - "post dominator, out of line."), + cl::desc("Outlining optional branches will place blocks that are optional " + "branches, i.e. branches with a common post dominator, outside " + "the hot path or chain"), cl::init(false), cl::Hidden); static cl::opt<unsigned> OutlineOptionalThreshold( @@ -117,6 +122,12 @@ static cl::opt<unsigned> MisfetchCost( static cl::opt<unsigned> JumpInstCost("jump-inst-cost", cl::desc("Cost of jump instructions."), cl::init(1), cl::Hidden); +static cl::opt<bool> +TailDupPlacement("tail-dup-placement", + cl::desc("Perform tail duplication during placement. " + "Creates more fallthrough opportunites in " + "outline branches."), + cl::init(true), cl::Hidden); static cl::opt<bool> BranchFoldPlacement("branch-fold-placement", @@ -124,6 +135,14 @@ BranchFoldPlacement("branch-fold-placement", "Reduces code size."), cl::init(true), cl::Hidden); +// Heuristic for tail duplication. +static cl::opt<unsigned> TailDuplicatePlacementThreshold( + "tail-dup-placement-threshold", + cl::desc("Instruction cutoff for tail duplication during layout. " + "Tail merging during layout is forced to have a threshold " + "that won't conflict."), cl::init(2), + cl::Hidden); + extern cl::opt<unsigned> StaticLikelyProb; extern cl::opt<unsigned> ProfileLikelyProb; @@ -181,6 +200,16 @@ public: /// \brief End of blocks within the chain. iterator end() { return Blocks.end(); } + bool remove(MachineBasicBlock* BB) { + for(iterator i = begin(); i != end(); ++i) { + if (*i == BB) { + Blocks.erase(i); + return true; + } + } + return false; + } + /// \brief Merge a block chain into this one. /// /// This routine merges a block chain into this one. It takes care of forming @@ -235,7 +264,7 @@ public: namespace { class MachineBlockPlacement : public MachineFunctionPass { /// \brief A typedef for a block filter set. - typedef SmallPtrSet<MachineBasicBlock *, 16> BlockFilterSet; + typedef SmallSetVector<MachineBasicBlock *, 16> BlockFilterSet; /// \brief work lists of blocks that are ready to be laid out SmallVector<MachineBasicBlock *, 16> BlockWorkList; @@ -253,6 +282,11 @@ class MachineBlockPlacement : public MachineFunctionPass { /// \brief A handle to the loop info. MachineLoopInfo *MLI; + /// \brief Preferred loop exit. + /// Member variable for convenience. It may be removed by duplication deep + /// in the call stack. + MachineBasicBlock *PreferredLoopExit; + /// \brief A handle to the target's instruction info. const TargetInstrInfo *TII; @@ -262,6 +296,13 @@ class MachineBlockPlacement : public MachineFunctionPass { /// \brief A handle to the post dominator tree. MachineDominatorTree *MDT; + /// \brief Duplicator used to duplicate tails during placement. + /// + /// Placement decisions can open up new tail duplication opportunities, but + /// since tail duplication affects placement decisions of later blocks, it + /// must be done inline. + TailDuplicator TailDup; + /// \brief A set of blocks that are unavoidably execute, i.e. they dominate /// all terminators of the MachineFunction. SmallPtrSet<MachineBasicBlock *, 4> UnavoidableBlocks; @@ -283,8 +324,26 @@ class MachineBlockPlacement : public MachineFunctionPass { /// between basic blocks. DenseMap<MachineBasicBlock *, BlockChain *> BlockToChain; +#ifndef NDEBUG + /// The set of basic blocks that have terminators that cannot be fully + /// analyzed. These basic blocks cannot be re-ordered safely by + /// MachineBlockPlacement, and we must preserve physical layout of these + /// blocks and their successors through the pass. + SmallPtrSet<MachineBasicBlock *, 4> BlocksWithUnanalyzableExits; +#endif + + /// Decrease the UnscheduledPredecessors count for all blocks in chain, and + /// if the count goes to 0, add them to the appropriate work list. void markChainSuccessors(BlockChain &Chain, MachineBasicBlock *LoopHeaderBB, const BlockFilterSet *BlockFilter = nullptr); + + /// Decrease the UnscheduledPredecessors count for a single block, and + /// if the count goes to 0, add them to the appropriate work list. + void markBlockSuccessors( + BlockChain &Chain, MachineBasicBlock *BB, MachineBasicBlock *LoopHeaderBB, + const BlockFilterSet *BlockFilter = nullptr); + + BranchProbability collectViableSuccessors(MachineBasicBlock *BB, BlockChain &Chain, const BlockFilterSet *BlockFilter, @@ -294,6 +353,16 @@ class MachineBlockPlacement : public MachineFunctionPass { const BlockFilterSet *BlockFilter, BranchProbability SuccProb, BranchProbability HotProb); + bool repeatedlyTailDuplicateBlock( + MachineBasicBlock *BB, MachineBasicBlock *&LPred, + MachineBasicBlock *LoopHeaderBB, + BlockChain &Chain, BlockFilterSet *BlockFilter, + MachineFunction::iterator &PrevUnplacedBlockIt); + bool maybeTailDuplicateBlock(MachineBasicBlock *BB, MachineBasicBlock *LPred, + const BlockChain &Chain, + BlockFilterSet *BlockFilter, + MachineFunction::iterator &PrevUnplacedBlockIt, + bool &DuplicatedToPred); bool hasBetterLayoutPredecessor(MachineBasicBlock *BB, MachineBasicBlock *Succ, BlockChain &SuccChain, BranchProbability SuccProb, @@ -319,7 +388,7 @@ class MachineBlockPlacement : public MachineFunctionPass { SmallPtrSetImpl<BlockChain *> &UpdatedPreds, const BlockFilterSet *BlockFilter); void buildChain(MachineBasicBlock *BB, BlockChain &Chain, - const BlockFilterSet *BlockFilter = nullptr); + BlockFilterSet *BlockFilter = nullptr); MachineBasicBlock *findBestLoopTop(MachineLoop &L, const BlockFilterSet &LoopBlockSet); MachineBasicBlock *findBestLoopExit(MachineLoop &L, @@ -384,37 +453,49 @@ static std::string getBlockName(MachineBasicBlock *BB) { /// When a chain is being merged into the "placed" chain, this routine will /// quickly walk the successors of each block in the chain and mark them as /// having one fewer active predecessor. It also adds any successors of this -/// chain which reach the zero-predecessor state to the worklist passed in. +/// chain which reach the zero-predecessor state to the appropriate worklist. void MachineBlockPlacement::markChainSuccessors( BlockChain &Chain, MachineBasicBlock *LoopHeaderBB, const BlockFilterSet *BlockFilter) { // Walk all the blocks in this chain, marking their successors as having // a predecessor placed. for (MachineBasicBlock *MBB : Chain) { - // Add any successors for which this is the only un-placed in-loop - // predecessor to the worklist as a viable candidate for CFG-neutral - // placement. No subsequent placement of this block will violate the CFG - // shape, so we get to use heuristics to choose a favorable placement. - for (MachineBasicBlock *Succ : MBB->successors()) { - if (BlockFilter && !BlockFilter->count(Succ)) - continue; - BlockChain &SuccChain = *BlockToChain[Succ]; - // Disregard edges within a fixed chain, or edges to the loop header. - if (&Chain == &SuccChain || Succ == LoopHeaderBB) - continue; + markBlockSuccessors(Chain, MBB, LoopHeaderBB, BlockFilter); + } +} - // This is a cross-chain edge that is within the loop, so decrement the - // loop predecessor count of the destination chain. - if (SuccChain.UnscheduledPredecessors == 0 || - --SuccChain.UnscheduledPredecessors > 0) - continue; +/// \brief Mark a single block's successors as having one fewer preds. +/// +/// Under normal circumstances, this is only called by markChainSuccessors, +/// but if a block that was to be placed is completely tail-duplicated away, +/// and was duplicated into the chain end, we need to redo markBlockSuccessors +/// for just that block. +void MachineBlockPlacement::markBlockSuccessors( + BlockChain &Chain, MachineBasicBlock *MBB, MachineBasicBlock *LoopHeaderBB, + const BlockFilterSet *BlockFilter) { + // Add any successors for which this is the only un-placed in-loop + // predecessor to the worklist as a viable candidate for CFG-neutral + // placement. No subsequent placement of this block will violate the CFG + // shape, so we get to use heuristics to choose a favorable placement. + for (MachineBasicBlock *Succ : MBB->successors()) { + if (BlockFilter && !BlockFilter->count(Succ)) + continue; + BlockChain &SuccChain = *BlockToChain[Succ]; + // Disregard edges within a fixed chain, or edges to the loop header. + if (&Chain == &SuccChain || Succ == LoopHeaderBB) + continue; - auto *MBB = *SuccChain.begin(); - if (MBB->isEHPad()) - EHPadWorkList.push_back(MBB); - else - BlockWorkList.push_back(MBB); - } + // This is a cross-chain edge that is within the loop, so decrement the + // loop predecessor count of the destination chain. + if (SuccChain.UnscheduledPredecessors == 0 || + --SuccChain.UnscheduledPredecessors > 0) + continue; + + auto *NewBB = *SuccChain.begin(); + if (NewBB->isEHPad()) + EHPadWorkList.push_back(NewBB); + else + BlockWorkList.push_back(NewBB); } } @@ -627,16 +708,46 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor( // BB->Succ. This is equivalent to looking the CFG backward with backward // edge: Prob(Succ->BB) needs to >= HotProb in order to be selected (without // profile data). - + // -------------------------------------------------------------------------- + // Case 3: forked diamond + // S + // / \ + // / \ + // BB Pred + // | \ / | + // | \ / | + // | X | + // | / \ | + // | / \ | + // S1 S2 + // + // The current block is BB and edge BB->S1 is now being evaluated. + // As above S->BB was already selected because + // prob(S->BB) > prob(S->Pred). Assume that prob(BB->S1) >= prob(BB->S2). + // + // topo-order: + // + // S-------| ---S + // | | | | + // ---BB | | BB + // | | | | + // | Pred----| | S1---- + // | | | | + // --(S1 or S2) ---Pred-- + // + // topo-cost = freq(S->Pred) + freq(BB->S1) + freq(BB->S2) + // + min(freq(Pred->S1), freq(Pred->S2)) + // Non-topo-order cost: + // In the worst case, S2 will not get laid out after Pred. + // non-topo-cost = 2 * freq(S->Pred) + freq(BB->S2). + // To be conservative, we can assume that min(freq(Pred->S1), freq(Pred->S2)) + // is 0. Then the non topo layout is better when + // freq(S->Pred) < freq(BB->S1). + // This is exactly what is checked below. + // Note there are other shapes that apply (Pred may not be a single block, + // but they all fit this general pattern.) BranchProbability HotProb = getLayoutSuccessorProbThreshold(BB); - // Forward checking. For case 2, SuccProb will be 1. - if (SuccProb < HotProb) { - DEBUG(dbgs() << " " << getBlockName(Succ) << " -> " << SuccProb - << " (prob) (CFG conflict)\n"); - return true; - } - // Make sure that a hot successor doesn't have a globally more // important predecessor. BlockFrequency CandidateEdgeFreq = MBFI->getBlockFreq(BB) * RealSuccProb; @@ -647,11 +758,11 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor( (BlockFilter && !BlockFilter->count(Pred)) || BlockToChain[Pred] == &Chain) continue; - // Do backward checking. For case 1, it is actually redundant check. For - // case 2 above, we need a backward checking to filter out edges that are - // not 'strongly' biased. With profile data available, the check is mostly - // redundant too (when threshold prob is set at 50%) unless S has more than - // two successors. + // Do backward checking. + // For all cases above, we need a backward checking to filter out edges that + // are not 'strongly' biased. With profile data available, the check is + // mostly redundant for case 2 (when threshold prob is set at 50%) unless S + // has more than two successors. // BB Pred // \ / // Succ @@ -660,6 +771,8 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor( // i.e. freq(BB->Succ) > freq(BB->Succ) * HotProb + freq(Pred->Succ) * // HotProb // i.e. freq((BB->Succ) * (1 - HotProb) > freq(Pred->Succ) * HotProb + // Case 1 is covered too, because the first equation reduces to: + // prob(BB->Succ) > HotProb. (freq(Succ) = freq(BB) for a triangle) BlockFrequency PredEdgeFreq = MBFI->getBlockFreq(Pred) * MBPI->getEdgeProbability(Pred, Succ); if (PredEdgeFreq * HotProb >= CandidateEdgeFreq * HotProb.getCompl()) { @@ -669,7 +782,7 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor( } if (BadCFGConflict) { - DEBUG(dbgs() << " " << getBlockName(Succ) << " -> " << SuccProb + DEBUG(dbgs() << " Not a candidate: " << getBlockName(Succ) << " -> " << SuccProb << " (prob) (non-cold CFG conflict)\n"); return true; } @@ -699,7 +812,7 @@ MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB, auto AdjustedSumProb = collectViableSuccessors(BB, Chain, BlockFilter, Successors); - DEBUG(dbgs() << "Attempting merge from: " << getBlockName(BB) << "\n"); + DEBUG(dbgs() << "Selecting best successor for: " << getBlockName(BB) << "\n"); for (MachineBasicBlock *Succ : Successors) { auto RealSuccProb = MBPI->getEdgeProbability(BB, Succ); BranchProbability SuccProb = @@ -718,15 +831,23 @@ MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB, continue; DEBUG( - dbgs() << " " << getBlockName(Succ) << " -> " << SuccProb - << " (prob)" + dbgs() << " Candidate: " << getBlockName(Succ) << ", probability: " + << SuccProb << (SuccChain.UnscheduledPredecessors != 0 ? " (CFG break)" : "") << "\n"); - if (BestSucc && BestProb >= SuccProb) + + if (BestSucc && BestProb >= SuccProb) { + DEBUG(dbgs() << " Not the best candidate, continuing\n"); continue; + } + + DEBUG(dbgs() << " Setting it as best candidate\n"); BestSucc = Succ; BestProb = SuccProb; } + if (BestSucc) + DEBUG(dbgs() << " Selected: " << getBlockName(BestSucc) << "\n"); + return BestSucc; } @@ -746,10 +867,10 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock( // worklist of already placed entries. // FIXME: If this shows up on profiles, it could be folded (at the cost of // some code complexity) into the loop below. - WorkList.erase(std::remove_if(WorkList.begin(), WorkList.end(), - [&](MachineBasicBlock *BB) { - return BlockToChain.lookup(BB) == &Chain; - }), + WorkList.erase(remove_if(WorkList, + [&](MachineBasicBlock *BB) { + return BlockToChain.lookup(BB) == &Chain; + }), WorkList.end()); if (WorkList.empty()) @@ -858,7 +979,7 @@ void MachineBlockPlacement::fillWorkLists( void MachineBlockPlacement::buildChain( MachineBasicBlock *BB, BlockChain &Chain, - const BlockFilterSet *BlockFilter) { + BlockFilterSet *BlockFilter) { assert(BB && "BB must not be null.\n"); assert(BlockToChain[BB] == &Chain && "BlockToChainMap mis-match.\n"); MachineFunction::iterator PrevUnplacedBlockIt = F->begin(); @@ -893,6 +1014,17 @@ void MachineBlockPlacement::buildChain( "layout successor until the CFG reduces\n"); } + // Placement may have changed tail duplication opportunities. + // Check for that now. + if (TailDupPlacement && BestSucc) { + // If the chosen successor was duplicated into all its predecessors, + // don't bother laying it out, just go round the loop again with BB as + // the chain end. + if (repeatedlyTailDuplicateBlock(BestSucc, BB, LoopHeaderBB, Chain, + BlockFilter, PrevUnplacedBlockIt)) + continue; + } + // Place this block, updating the datastructures to reflect its placement. BlockChain &SuccChain = *BlockToChain[BestSucc]; // Zero out UnscheduledPredecessors for the successor we're about to merge in case @@ -922,6 +1054,16 @@ void MachineBlockPlacement::buildChain( MachineBasicBlock * MachineBlockPlacement::findBestLoopTop(MachineLoop &L, const BlockFilterSet &LoopBlockSet) { + // Placing the latch block before the header may introduce an extra branch + // that skips this block the first time the loop is executed, which we want + // to avoid when optimising for size. + // FIXME: in theory there is a case that does not introduce a new branch, + // i.e. when the layout predecessor does not fallthrough to the loop header. + // In practice this never happens though: there always seems to be a preheader + // that can fallthrough and that is also placed before the header. + if (F->getFunction()->optForSize()) + return L.getHeader(); + // Check that the header hasn't been fused with a preheader block due to // crazy branches. If it has, we need to start with the header at the top to // prevent pulling the preheader into the loop body. @@ -937,7 +1079,7 @@ MachineBlockPlacement::findBestLoopTop(MachineLoop &L, for (MachineBasicBlock *Pred : L.getHeader()->predecessors()) { if (!LoopBlockSet.count(Pred)) continue; - DEBUG(dbgs() << " header pred: " << getBlockName(Pred) << ", " + DEBUG(dbgs() << " header pred: " << getBlockName(Pred) << ", has " << Pred->succ_size() << " successors, "; MBFI->printBlockFreq(dbgs(), Pred) << " freq\n"); if (Pred->succ_size() > 1) @@ -1066,8 +1208,14 @@ MachineBlockPlacement::findBestLoopExit(MachineLoop &L, } // Without a candidate exiting block or with only a single block in the // loop, just use the loop header to layout the loop. - if (!ExitingBB || L.getNumBlocks() == 1) + if (!ExitingBB) { + DEBUG(dbgs() << " No other candidate exit blocks, using loop header\n"); return nullptr; + } + if (L.getNumBlocks() == 1) { + DEBUG(dbgs() << " Loop has 1 block, using loop header as exit\n"); + return nullptr; + } // Also, if we have exit blocks which lead to outer loops but didn't select // one of them as the exiting block we are rotating toward, disable loop @@ -1116,8 +1264,7 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain, } } - BlockChain::iterator ExitIt = - std::find(LoopChain.begin(), LoopChain.end(), ExitingBB); + BlockChain::iterator ExitIt = find(LoopChain, ExitingBB); if (ExitIt == LoopChain.end()) return; @@ -1140,7 +1287,7 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain, void MachineBlockPlacement::rotateLoopWithProfile( BlockChain &LoopChain, MachineLoop &L, const BlockFilterSet &LoopBlockSet) { auto HeaderBB = L.getHeader(); - auto HeaderIter = std::find(LoopChain.begin(), LoopChain.end(), HeaderBB); + auto HeaderIter = find(LoopChain, HeaderBB); auto RotationPos = LoopChain.end(); BlockFrequency SmallestRotationCost = BlockFrequency::getMaxFrequency(); @@ -1340,9 +1487,8 @@ void MachineBlockPlacement::buildLoopChains(MachineLoop &L) { // If we selected just the header for the loop top, look for a potentially // profitable exit block in the event that rotating the loop can eliminate // branches by placing an exit edge at the bottom. - MachineBasicBlock *ExitingBB = nullptr; if (!RotateLoopWithProfile && LoopTop == L.getHeader()) - ExitingBB = findBestLoopExit(L, LoopBlockSet); + PreferredLoopExit = findBestLoopExit(L, LoopBlockSet); BlockChain &LoopChain = *BlockToChain[LoopTop]; @@ -1361,7 +1507,7 @@ void MachineBlockPlacement::buildLoopChains(MachineLoop &L) { if (RotateLoopWithProfile) rotateLoopWithProfile(LoopChain, L, LoopBlockSet); else - rotateLoop(LoopChain, ExitingBB, LoopBlockSet); + rotateLoop(LoopChain, PreferredLoopExit, LoopBlockSet); DEBUG({ // Crash at the end so we get all of the debugging output first. @@ -1374,7 +1520,7 @@ void MachineBlockPlacement::buildLoopChains(MachineLoop &L) { } for (MachineBasicBlock *ChainBB : LoopChain) { dbgs() << " ... " << getBlockName(ChainBB) << "\n"; - if (!LoopBlockSet.erase(ChainBB)) { + if (!LoopBlockSet.remove(ChainBB)) { // We don't mark the loop as bad here because there are real situations // where this can occur. For example, with an unanalyzable fallthrough // from a loop block to a non-loop block or vice versa. @@ -1451,6 +1597,9 @@ void MachineBlockPlacement::buildCFGChains() { << getBlockName(BB) << " -> " << getBlockName(NextBB) << "\n"); Chain->merge(NextBB, nullptr); +#ifndef NDEBUG + BlocksWithUnanalyzableExits.insert(&*BB); +#endif FI = NextFI; BB = NextBB; } @@ -1460,6 +1609,7 @@ void MachineBlockPlacement::buildCFGChains() { collectMustExecuteBBs(); // Build any loop-based chains. + PreferredLoopExit = nullptr; for (MachineLoop *L : *MLI) buildLoopChains(*L); @@ -1522,6 +1672,19 @@ void MachineBlockPlacement::buildCFGChains() { Cond.clear(); MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch. +#ifndef NDEBUG + if (!BlocksWithUnanalyzableExits.count(PrevBB)) { + // Given the exact block placement we chose, we may actually not _need_ to + // be able to edit PrevBB's terminator sequence, but not being _able_ to + // do that at this point is a bug. + assert((!TII->analyzeBranch(*PrevBB, TBB, FBB, Cond) || + !PrevBB->canFallThrough()) && + "Unexpected block with un-analyzable fallthrough!"); + Cond.clear(); + TBB = FBB = nullptr; + } +#endif + // The "PrevBB" is not yet updated to reflect current code layout, so, // o. it may fall-through to a block without explicit "goto" instruction // before layout, and no longer fall-through it after layout; or @@ -1576,15 +1739,15 @@ void MachineBlockPlacement::optimizeBranches() { if (TBB && !Cond.empty() && FBB && MBPI->getEdgeProbability(ChainBB, FBB) > MBPI->getEdgeProbability(ChainBB, TBB) && - !TII->ReverseBranchCondition(Cond)) { + !TII->reverseBranchCondition(Cond)) { DEBUG(dbgs() << "Reverse order of the two branches: " << getBlockName(ChainBB) << "\n"); DEBUG(dbgs() << " Edge probability: " << MBPI->getEdgeProbability(ChainBB, FBB) << " vs " << MBPI->getEdgeProbability(ChainBB, TBB) << "\n"); DebugLoc dl; // FIXME: this is nowhere - TII->RemoveBranch(*ChainBB); - TII->InsertBranch(*ChainBB, FBB, TBB, Cond, dl); + TII->removeBranch(*ChainBB); + TII->insertBranch(*ChainBB, FBB, TBB, Cond, dl); ChainBB->updateTerminator(); } } @@ -1659,6 +1822,175 @@ void MachineBlockPlacement::alignBlocks() { } } +/// Tail duplicate \p BB into (some) predecessors if profitable, repeating if +/// it was duplicated into its chain predecessor and removed. +/// \p BB - Basic block that may be duplicated. +/// +/// \p LPred - Chosen layout predecessor of \p BB. +/// Updated to be the chain end if LPred is removed. +/// \p Chain - Chain to which \p LPred belongs, and \p BB will belong. +/// \p BlockFilter - Set of blocks that belong to the loop being laid out. +/// Used to identify which blocks to update predecessor +/// counts. +/// \p PrevUnplacedBlockIt - Iterator pointing to the last block that was +/// chosen in the given order due to unnatural CFG +/// only needed if \p BB is removed and +/// \p PrevUnplacedBlockIt pointed to \p BB. +/// @return true if \p BB was removed. +bool MachineBlockPlacement::repeatedlyTailDuplicateBlock( + MachineBasicBlock *BB, MachineBasicBlock *&LPred, + MachineBasicBlock *LoopHeaderBB, + BlockChain &Chain, BlockFilterSet *BlockFilter, + MachineFunction::iterator &PrevUnplacedBlockIt) { + bool Removed, DuplicatedToLPred; + bool DuplicatedToOriginalLPred; + Removed = maybeTailDuplicateBlock(BB, LPred, Chain, BlockFilter, + PrevUnplacedBlockIt, + DuplicatedToLPred); + if (!Removed) + return false; + DuplicatedToOriginalLPred = DuplicatedToLPred; + // Iteratively try to duplicate again. It can happen that a block that is + // duplicated into is still small enough to be duplicated again. + // No need to call markBlockSuccessors in this case, as the blocks being + // duplicated from here on are already scheduled. + // Note that DuplicatedToLPred always implies Removed. + while (DuplicatedToLPred) { + assert (Removed && "Block must have been removed to be duplicated into its " + "layout predecessor."); + MachineBasicBlock *DupBB, *DupPred; + // The removal callback causes Chain.end() to be updated when a block is + // removed. On the first pass through the loop, the chain end should be the + // same as it was on function entry. On subsequent passes, because we are + // duplicating the block at the end of the chain, if it is removed the + // chain will have shrunk by one block. + BlockChain::iterator ChainEnd = Chain.end(); + DupBB = *(--ChainEnd); + // Now try to duplicate again. + if (ChainEnd == Chain.begin()) + break; + DupPred = *std::prev(ChainEnd); + Removed = maybeTailDuplicateBlock(DupBB, DupPred, Chain, BlockFilter, + PrevUnplacedBlockIt, + DuplicatedToLPred); + } + // If BB was duplicated into LPred, it is now scheduled. But because it was + // removed, markChainSuccessors won't be called for its chain. Instead we + // call markBlockSuccessors for LPred to achieve the same effect. This must go + // at the end because repeating the tail duplication can increase the number + // of unscheduled predecessors. + LPred = *std::prev(Chain.end()); + if (DuplicatedToOriginalLPred) + markBlockSuccessors(Chain, LPred, LoopHeaderBB, BlockFilter); + return true; +} + +/// Tail duplicate \p BB into (some) predecessors if profitable. +/// \p BB - Basic block that may be duplicated +/// \p LPred - Chosen layout predecessor of \p BB +/// \p Chain - Chain to which \p LPred belongs, and \p BB will belong. +/// \p BlockFilter - Set of blocks that belong to the loop being laid out. +/// Used to identify which blocks to update predecessor +/// counts. +/// \p PrevUnplacedBlockIt - Iterator pointing to the last block that was +/// chosen in the given order due to unnatural CFG +/// only needed if \p BB is removed and +/// \p PrevUnplacedBlockIt pointed to \p BB. +/// \p DuplicatedToLPred - True if the block was duplicated into LPred. Will +/// only be true if the block was removed. +/// \return - True if the block was duplicated into all preds and removed. +bool MachineBlockPlacement::maybeTailDuplicateBlock( + MachineBasicBlock *BB, MachineBasicBlock *LPred, + const BlockChain &Chain, BlockFilterSet *BlockFilter, + MachineFunction::iterator &PrevUnplacedBlockIt, + bool &DuplicatedToLPred) { + + DuplicatedToLPred = false; + DEBUG(dbgs() << "Redoing tail duplication for Succ#" + << BB->getNumber() << "\n"); + bool IsSimple = TailDup.isSimpleBB(BB); + // Blocks with single successors don't create additional fallthrough + // opportunities. Don't duplicate them. TODO: When conditional exits are + // analyzable, allow them to be duplicated. + if (!IsSimple && BB->succ_size() == 1) + return false; + if (!TailDup.shouldTailDuplicate(IsSimple, *BB)) + return false; + // This has to be a callback because none of it can be done after + // BB is deleted. + bool Removed = false; + auto RemovalCallback = + [&](MachineBasicBlock *RemBB) { + // Signal to outer function + Removed = true; + + // Conservative default. + bool InWorkList = true; + // Remove from the Chain and Chain Map + if (BlockToChain.count(RemBB)) { + BlockChain *Chain = BlockToChain[RemBB]; + InWorkList = Chain->UnscheduledPredecessors == 0; + Chain->remove(RemBB); + BlockToChain.erase(RemBB); + } + + // Handle the unplaced block iterator + if (&(*PrevUnplacedBlockIt) == RemBB) { + PrevUnplacedBlockIt++; + } + + // Handle the Work Lists + if (InWorkList) { + SmallVectorImpl<MachineBasicBlock *> &RemoveList = BlockWorkList; + if (RemBB->isEHPad()) + RemoveList = EHPadWorkList; + RemoveList.erase( + remove_if(RemoveList, + [RemBB](MachineBasicBlock *BB) {return BB == RemBB;}), + RemoveList.end()); + } + + // Handle the filter set + if (BlockFilter) { + BlockFilter->remove(RemBB); + } + + // Remove the block from loop info. + MLI->removeBlock(RemBB); + if (RemBB == PreferredLoopExit) + PreferredLoopExit = nullptr; + + DEBUG(dbgs() << "TailDuplicator deleted block: " + << getBlockName(RemBB) << "\n"); + }; + auto RemovalCallbackRef = + llvm::function_ref<void(MachineBasicBlock*)>(RemovalCallback); + + SmallVector<MachineBasicBlock *, 8> DuplicatedPreds; + TailDup.tailDuplicateAndUpdate(IsSimple, BB, LPred, + &DuplicatedPreds, &RemovalCallbackRef); + + // Update UnscheduledPredecessors to reflect tail-duplication. + DuplicatedToLPred = false; + for (MachineBasicBlock *Pred : DuplicatedPreds) { + // We're only looking for unscheduled predecessors that match the filter. + BlockChain* PredChain = BlockToChain[Pred]; + if (Pred == LPred) + DuplicatedToLPred = true; + if (Pred == LPred || (BlockFilter && !BlockFilter->count(Pred)) + || PredChain == &Chain) + continue; + for (MachineBasicBlock *NewSucc : Pred->successors()) { + if (BlockFilter && !BlockFilter->count(NewSucc)) + continue; + BlockChain *NewChain = BlockToChain[NewSucc]; + if (NewChain != &Chain && NewChain != PredChain) + NewChain->UnscheduledPredecessors++; + } + } + return Removed; +} + bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(*MF.getFunction())) return false; @@ -1675,6 +2007,18 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { TII = MF.getSubtarget().getInstrInfo(); TLI = MF.getSubtarget().getTargetLowering(); MDT = &getAnalysis<MachineDominatorTree>(); + + // Initialize PreferredLoopExit to nullptr here since it may never be set if + // there are no MachineLoops. + PreferredLoopExit = nullptr; + + if (TailDupPlacement) { + unsigned TailDupSize = TailDuplicatePlacementThreshold; + if (MF.getFunction()->optForSize()) + TailDupSize = 1; + TailDup.initMF(MF, MBPI, /* LayoutMode */ true, TailDupSize); + } + assert(BlockToChain.empty()); buildCFGChains(); @@ -1688,14 +2032,17 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { BranchFoldPlacement; // No tail merging opportunities if the block number is less than four. if (MF.size() > 3 && EnableTailMerge) { + unsigned TailMergeSize = TailDuplicatePlacementThreshold + 1; BranchFolder BF(/*EnableTailMerge=*/true, /*CommonHoist=*/false, *MBFI, - *MBPI); + *MBPI, TailMergeSize); if (BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(), getAnalysisIfAvailable<MachineModuleInfo>(), MLI, /*AfterBlockPlacement=*/true)) { // Redo the layout if tail merging creates/removes/moves blocks. BlockToChain.clear(); + // Must redo the dominator tree if blocks were changed. + MDT->runOnMachineFunction(MF); ChainAllocator.DestroyAll(); buildCFGChains(); } diff --git a/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp index fe73406..21eff9d 100644 --- a/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp +++ b/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp @@ -50,8 +50,7 @@ BranchProbability MachineBranchProbabilityInfo::getEdgeProbability( const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const { // This is a linear search. Try to use the const_succ_iterator version when // possible. - return getEdgeProbability(Src, - std::find(Src->succ_begin(), Src->succ_end(), Dst)); + return getEdgeProbability(Src, find(Src->successors(), Dst)); } bool MachineBranchProbabilityInfo::isEdgeHot( diff --git a/contrib/llvm/lib/CodeGen/MachineCSE.cpp b/contrib/llvm/lib/CodeGen/MachineCSE.cpp index 1209f73..0766f46 100644 --- a/contrib/llvm/lib/CodeGen/MachineCSE.cpp +++ b/contrib/llvm/lib/CodeGen/MachineCSE.cpp @@ -177,8 +177,7 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg, unsigned LookAheadLeft = LookAheadLimit; while (LookAheadLeft) { // Skip over dbg_value's. - while (I != E && I->isDebugValue()) - ++I; + I = skipDebugInstructionsForward(I, E); if (I == E) // Reached end of block, register is obviously dead. @@ -227,7 +226,7 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI, if (TargetRegisterInfo::isVirtualRegister(Reg)) continue; // Reading constant physregs is ok. - if (!MRI->isConstantPhysReg(Reg, *MBB->getParent())) + if (!MRI->isConstantPhysReg(Reg)) for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) PhysRefs.insert(*AI); } @@ -346,7 +345,7 @@ bool MachineCSE::isCSECandidate(MachineInstr *MI) { // Okay, this instruction does a load. As a refinement, we allow the target // to decide whether the loaded value is actually a constant. If so, we can // actually use it as a load. - if (!MI->isInvariantLoad(AA)) + if (!MI->isDereferenceableInvariantLoad(AA)) // FIXME: we should be able to hoist loads with no other side effects if // there are no other instructions which can change memory in this loop. // This is a trivial form of alias analysis. diff --git a/contrib/llvm/lib/CodeGen/MachineCombiner.cpp b/contrib/llvm/lib/CodeGen/MachineCombiner.cpp index 6b5c6ba..5beed5f 100644 --- a/contrib/llvm/lib/CodeGen/MachineCombiner.cpp +++ b/contrib/llvm/lib/CodeGen/MachineCombiner.cpp @@ -56,7 +56,7 @@ public: } void getAnalysisUsage(AnalysisUsage &AU) const override; bool runOnMachineFunction(MachineFunction &MF) override; - const char *getPassName() const override { return "Machine InstCombiner"; } + StringRef getPassName() const override { return "Machine InstCombiner"; } private: bool doSubstitute(unsigned NewSize, unsigned OldSize); @@ -71,6 +71,7 @@ private: improvesCriticalPathLen(MachineBasicBlock *MBB, MachineInstr *Root, MachineTraceMetrics::Trace BlockTrace, SmallVectorImpl<MachineInstr *> &InsInstrs, + SmallVectorImpl<MachineInstr *> &DelInstrs, DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, MachineCombinerPattern Pattern); bool preservesResourceLen(MachineBasicBlock *MBB, @@ -134,7 +135,7 @@ MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs, // are tracked in the InstrIdxForVirtReg map depth is looked up in InstrDepth for (auto *InstrPtr : InsInstrs) { // for each Use unsigned IDepth = 0; - DEBUG(dbgs() << "NEW INSTR "; InstrPtr->dump(); dbgs() << "\n";); + DEBUG(dbgs() << "NEW INSTR "; InstrPtr->dump(TII); dbgs() << "\n";); for (const MachineOperand &MO : InstrPtr->operands()) { // Check for virtual register operand. if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))) @@ -242,6 +243,7 @@ bool MachineCombiner::improvesCriticalPathLen( MachineBasicBlock *MBB, MachineInstr *Root, MachineTraceMetrics::Trace BlockTrace, SmallVectorImpl<MachineInstr *> &InsInstrs, + SmallVectorImpl<MachineInstr *> &DelInstrs, DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, MachineCombinerPattern Pattern) { assert(TSchedModel.hasInstrSchedModelOrItineraries() && @@ -269,8 +271,13 @@ bool MachineCombiner::improvesCriticalPathLen( // A more flexible cost calculation for the critical path includes the slack // of the original code sequence. This may allow the transform to proceed // even if the instruction depths (data dependency cycles) become worse. + unsigned NewRootLatency = getLatency(Root, NewRoot, BlockTrace); - unsigned RootLatency = TSchedModel.computeInstrLatency(Root); + unsigned RootLatency = 0; + + for (auto I : DelInstrs) + RootLatency += TSchedModel.computeInstrLatency(I); + unsigned RootSlack = BlockTrace.getInstrSlack(*Root); DEBUG(dbgs() << " NewRootLatency: " << NewRootLatency << "\n"; @@ -421,7 +428,7 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) { // resource pressure. if (SubstituteAlways || doSubstitute(NewInstCount, OldInstCount) || (improvesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs, - InstrIdxForVirtReg, P) && + DelInstrs, InstrIdxForVirtReg, P) && preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs))) { for (auto *InstrPtr : InsInstrs) MBB->insert((MachineBasicBlock::iterator) &MI, InstrPtr); diff --git a/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp index 8fdf39d..5de6dec 100644 --- a/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp +++ b/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp @@ -56,11 +56,12 @@ namespace { MachineFunctionProperties getRequiredProperties() const override { return MachineFunctionProperties().set( - MachineFunctionProperties::Property::AllVRegsAllocated); + MachineFunctionProperties::Property::NoVRegs); } private: void ClobberRegister(unsigned Reg); + void ReadRegister(unsigned Reg); void CopyPropagateBlock(MachineBasicBlock &MBB); bool eraseIfRedundant(MachineInstr &Copy, unsigned Src, unsigned Def); @@ -120,6 +121,18 @@ void MachineCopyPropagation::ClobberRegister(unsigned Reg) { } } +void MachineCopyPropagation::ReadRegister(unsigned Reg) { + // If 'Reg' is defined by a copy, the copy is no longer a candidate + // for elimination. + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { + Reg2MIMap::iterator CI = CopyMap.find(*AI); + if (CI != CopyMap.end()) { + DEBUG(dbgs() << "MCP: Copy is used - not dead: "; CI->second->dump()); + MaybeDeadCopies.remove(CI->second); + } + } +} + /// Return true if \p PreviousCopy did copy register \p Src to register \p Def. /// This fact may have been obscured by sub register usage or may not be true at /// all even though Src and Def are subregisters of the registers used in @@ -212,12 +225,14 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { // If Src is defined by a previous copy, the previous copy cannot be // eliminated. - for (MCRegAliasIterator AI(Src, TRI, true); AI.isValid(); ++AI) { - Reg2MIMap::iterator CI = CopyMap.find(*AI); - if (CI != CopyMap.end()) { - DEBUG(dbgs() << "MCP: Copy is no longer dead: "; CI->second->dump()); - MaybeDeadCopies.remove(CI->second); - } + ReadRegister(Src); + for (const MachineOperand &MO : MI->implicit_operands()) { + if (!MO.isReg() || !MO.readsReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + ReadRegister(Reg); } DEBUG(dbgs() << "MCP: Copy is a deletion candidate: "; MI->dump()); @@ -234,6 +249,14 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { // ... // %xmm2<def> = copy %xmm9 ClobberRegister(Def); + for (const MachineOperand &MO : MI->implicit_operands()) { + if (!MO.isReg() || !MO.isDef()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + ClobberRegister(Reg); + } // Remember Def is defined by the copy. for (MCSubRegIterator SR(Def, TRI, /*IncludeSelf=*/true); SR.isValid(); @@ -245,7 +268,7 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { // Remember source that's copied to Def. Once it's clobbered, then // it's no longer available for copy propagation. RegList &DestList = SrcMap[Src]; - if (std::find(DestList.begin(), DestList.end(), Def) == DestList.end()) + if (!is_contained(DestList, Def)) DestList.push_back(Def); continue; @@ -268,17 +291,8 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { if (MO.isDef()) { Defs.push_back(Reg); - continue; - } - - // If 'Reg' is defined by a copy, the copy is no longer a candidate - // for elimination. - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { - Reg2MIMap::iterator CI = CopyMap.find(*AI); - if (CI != CopyMap.end()) { - DEBUG(dbgs() << "MCP: Copy is used - not dead: "; CI->second->dump()); - MaybeDeadCopies.remove(CI->second); - } + } else { + ReadRegister(Reg); } // Treat undef use like defs for copy propagation but not for // dead copy. We would need to do a liveness check to be sure the copy diff --git a/contrib/llvm/lib/CodeGen/MachineFunction.cpp b/contrib/llvm/lib/CodeGen/MachineFunction.cpp index a7c63ef..c1d5ea9 100644 --- a/contrib/llvm/lib/CodeGen/MachineFunction.cpp +++ b/contrib/llvm/lib/CodeGen/MachineFunction.cpp @@ -54,28 +54,29 @@ static cl::opt<unsigned> void MachineFunctionInitializer::anchor() {} -void MachineFunctionProperties::print(raw_ostream &ROS, bool OnlySet) const { - // Leave this function even in NDEBUG as an out-of-line anchor. -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - for (BitVector::size_type i = 0; i < Properties.size(); ++i) { - bool HasProperty = Properties[i]; - if (OnlySet && !HasProperty) +static const char *getPropertyName(MachineFunctionProperties::Property Prop) { + typedef MachineFunctionProperties::Property P; + switch(Prop) { + case P::FailedISel: return "FailedISel"; + case P::IsSSA: return "IsSSA"; + case P::Legalized: return "Legalized"; + case P::NoPHIs: return "NoPHIs"; + case P::NoVRegs: return "NoVRegs"; + case P::RegBankSelected: return "RegBankSelected"; + case P::Selected: return "Selected"; + case P::TracksLiveness: return "TracksLiveness"; + } + llvm_unreachable("Invalid machine function property"); +} + +void MachineFunctionProperties::print(raw_ostream &OS) const { + const char *Separator = ""; + for (BitVector::size_type I = 0; I < Properties.size(); ++I) { + if (!Properties[I]) continue; - switch(static_cast<Property>(i)) { - case Property::IsSSA: - ROS << (HasProperty ? "SSA, " : "Post SSA, "); - break; - case Property::TracksLiveness: - ROS << (HasProperty ? "" : "not ") << "tracking liveness, "; - break; - case Property::AllVRegsAllocated: - ROS << (HasProperty ? "AllVRegsAllocated" : "HasVRegs"); - break; - default: - break; - } + OS << Separator << getPropertyName(static_cast<Property>(I)); + Separator = ", "; } -#endif } //===----------------------------------------------------------------------===// @@ -85,7 +86,7 @@ void MachineFunctionProperties::print(raw_ostream &ROS, bool OnlySet) const { // Out-of-line virtual method. MachineFunctionInfo::~MachineFunctionInfo() {} -void ilist_traits<MachineBasicBlock>::deleteNode(MachineBasicBlock *MBB) { +void ilist_alloc_traits<MachineBasicBlock>::deleteNode(MachineBasicBlock *MBB) { MBB->getParent()->DeleteMachineBasicBlock(MBB); } @@ -100,6 +101,11 @@ MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM, unsigned FunctionNum, MachineModuleInfo &mmi) : Fn(F), Target(TM), STI(TM.getSubtargetImpl(*F)), Ctx(mmi.getContext()), MMI(mmi) { + FunctionNumber = FunctionNum; + init(); +} + +void MachineFunction::init() { // Assume the function starts in SSA form with correct liveness. Properties.set(MachineFunctionProperties::Property::IsSSA); Properties.set(MachineFunctionProperties::Property::TracksLiveness); @@ -112,11 +118,11 @@ MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM, // We can realign the stack if the target supports it and the user hasn't // explicitly asked us not to. bool CanRealignSP = STI->getFrameLowering()->isStackRealignable() && - !F->hasFnAttribute("no-realign-stack"); + !Fn->hasFnAttribute("no-realign-stack"); FrameInfo = new (Allocator) MachineFrameInfo( getFnStackAlignment(STI, Fn), /*StackRealignable=*/CanRealignSP, /*ForceRealign=*/CanRealignSP && - F->hasFnAttribute(Attribute::StackAlignment)); + Fn->hasFnAttribute(Attribute::StackAlignment)); if (Fn->hasFnAttribute(Attribute::StackAlignment)) FrameInfo->ensureMaxAlignment(Fn->getFnStackAlignment()); @@ -133,15 +139,14 @@ MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM, if (AlignAllFunctions) Alignment = AlignAllFunctions; - FunctionNumber = FunctionNum; JumpTableInfo = nullptr; if (isFuncletEHPersonality(classifyEHPersonality( - F->hasPersonalityFn() ? F->getPersonalityFn() : nullptr))) { + Fn->hasPersonalityFn() ? Fn->getPersonalityFn() : nullptr))) { WinEHInfo = new (Allocator) WinEHFuncInfo(); } - assert(TM.isCompatibleDataLayout(getDataLayout()) && + assert(Target.isCompatibleDataLayout(getDataLayout()) && "Can't create a MachineFunction using a Module with a " "Target-incompatible DataLayout attached\n"); @@ -149,6 +154,11 @@ MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM, } MachineFunction::~MachineFunction() { + clear(); +} + +void MachineFunction::clear() { + Properties.reset(); // Don't call destructors on MachineInstr and MachineOperand. All of their // memory comes from the BumpPtrAllocator which is about to be purged. // @@ -296,9 +306,12 @@ MachineFunction::DeleteMachineBasicBlock(MachineBasicBlock *MBB) { MachineMemOperand *MachineFunction::getMachineMemOperand( MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, - unsigned base_alignment, const AAMDNodes &AAInfo, const MDNode *Ranges) { + unsigned base_alignment, const AAMDNodes &AAInfo, const MDNode *Ranges, + SynchronizationScope SynchScope, AtomicOrdering Ordering, + AtomicOrdering FailureOrdering) { return new (Allocator) - MachineMemOperand(PtrInfo, f, s, base_alignment, AAInfo, Ranges); + MachineMemOperand(PtrInfo, f, s, base_alignment, AAInfo, Ranges, + SynchScope, Ordering, FailureOrdering); } MachineMemOperand * @@ -308,13 +321,15 @@ MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO, return new (Allocator) MachineMemOperand(MachinePointerInfo(MMO->getValue(), MMO->getOffset()+Offset), - MMO->getFlags(), Size, - MMO->getBaseAlignment()); + MMO->getFlags(), Size, MMO->getBaseAlignment(), + AAMDNodes(), nullptr, MMO->getSynchScope(), + MMO->getOrdering(), MMO->getFailureOrdering()); return new (Allocator) MachineMemOperand(MachinePointerInfo(MMO->getPseudoValue(), MMO->getOffset()+Offset), - MMO->getFlags(), Size, - MMO->getBaseAlignment()); + MMO->getFlags(), Size, MMO->getBaseAlignment(), + AAMDNodes(), nullptr, MMO->getSynchScope(), + MMO->getOrdering(), MMO->getFailureOrdering()); } MachineInstr::mmo_iterator @@ -345,7 +360,9 @@ MachineFunction::extractLoadMemRefs(MachineInstr::mmo_iterator Begin, getMachineMemOperand((*I)->getPointerInfo(), (*I)->getFlags() & ~MachineMemOperand::MOStore, (*I)->getSize(), (*I)->getBaseAlignment(), - (*I)->getAAInfo()); + (*I)->getAAInfo(), nullptr, + (*I)->getSynchScope(), (*I)->getOrdering(), + (*I)->getFailureOrdering()); Result[Index] = JustLoad; } ++Index; @@ -377,7 +394,9 @@ MachineFunction::extractStoreMemRefs(MachineInstr::mmo_iterator Begin, getMachineMemOperand((*I)->getPointerInfo(), (*I)->getFlags() & ~MachineMemOperand::MOLoad, (*I)->getSize(), (*I)->getBaseAlignment(), - (*I)->getAAInfo()); + (*I)->getAAInfo(), nullptr, + (*I)->getSynchScope(), (*I)->getOrdering(), + (*I)->getFailureOrdering()); Result[Index] = JustStore; } ++Index; @@ -406,9 +425,8 @@ StringRef MachineFunction::getName() const { void MachineFunction::print(raw_ostream &OS, const SlotIndexes *Indexes) const { OS << "# Machine code for function " << getName() << ": "; - OS << "Properties: <"; getProperties().print(OS); - OS << ">\n"; + OS << '\n'; // Print Frame Information FrameInfo->print(*this, OS); @@ -535,8 +553,8 @@ MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx, assert(JumpTableInfo && "No jump tables"); assert(JTI < JumpTableInfo->getJumpTables().size() && "Invalid JTI!"); - const char *Prefix = isLinkerPrivate ? DL.getLinkerPrivateGlobalPrefix() - : DL.getPrivateGlobalPrefix(); + StringRef Prefix = isLinkerPrivate ? DL.getLinkerPrivateGlobalPrefix() + : DL.getPrivateGlobalPrefix(); SmallString<60> Name; raw_svector_ostream(Name) << Prefix << "JTI" << getFunctionNumber() << '_' << JTI; @@ -550,6 +568,193 @@ MCSymbol *MachineFunction::getPICBaseSymbol() const { Twine(getFunctionNumber()) + "$pb"); } +/// \name Exception Handling +/// \{ + +LandingPadInfo & +MachineFunction::getOrCreateLandingPadInfo(MachineBasicBlock *LandingPad) { + unsigned N = LandingPads.size(); + for (unsigned i = 0; i < N; ++i) { + LandingPadInfo &LP = LandingPads[i]; + if (LP.LandingPadBlock == LandingPad) + return LP; + } + + LandingPads.push_back(LandingPadInfo(LandingPad)); + return LandingPads[N]; +} + +void MachineFunction::addInvoke(MachineBasicBlock *LandingPad, + MCSymbol *BeginLabel, MCSymbol *EndLabel) { + LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); + LP.BeginLabels.push_back(BeginLabel); + LP.EndLabels.push_back(EndLabel); +} + +MCSymbol *MachineFunction::addLandingPad(MachineBasicBlock *LandingPad) { + MCSymbol *LandingPadLabel = Ctx.createTempSymbol(); + LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); + LP.LandingPadLabel = LandingPadLabel; + return LandingPadLabel; +} + +void MachineFunction::addCatchTypeInfo(MachineBasicBlock *LandingPad, + ArrayRef<const GlobalValue *> TyInfo) { + LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); + for (unsigned N = TyInfo.size(); N; --N) + LP.TypeIds.push_back(getTypeIDFor(TyInfo[N - 1])); +} + +void MachineFunction::addFilterTypeInfo(MachineBasicBlock *LandingPad, + ArrayRef<const GlobalValue *> TyInfo) { + LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); + std::vector<unsigned> IdsInFilter(TyInfo.size()); + for (unsigned I = 0, E = TyInfo.size(); I != E; ++I) + IdsInFilter[I] = getTypeIDFor(TyInfo[I]); + LP.TypeIds.push_back(getFilterIDFor(IdsInFilter)); +} + +void MachineFunction::tidyLandingPads(DenseMap<MCSymbol*, uintptr_t> *LPMap) { + for (unsigned i = 0; i != LandingPads.size(); ) { + LandingPadInfo &LandingPad = LandingPads[i]; + if (LandingPad.LandingPadLabel && + !LandingPad.LandingPadLabel->isDefined() && + (!LPMap || (*LPMap)[LandingPad.LandingPadLabel] == 0)) + LandingPad.LandingPadLabel = nullptr; + + // Special case: we *should* emit LPs with null LP MBB. This indicates + // "nounwind" case. + if (!LandingPad.LandingPadLabel && LandingPad.LandingPadBlock) { + LandingPads.erase(LandingPads.begin() + i); + continue; + } + + for (unsigned j = 0, e = LandingPads[i].BeginLabels.size(); j != e; ++j) { + MCSymbol *BeginLabel = LandingPad.BeginLabels[j]; + MCSymbol *EndLabel = LandingPad.EndLabels[j]; + if ((BeginLabel->isDefined() || + (LPMap && (*LPMap)[BeginLabel] != 0)) && + (EndLabel->isDefined() || + (LPMap && (*LPMap)[EndLabel] != 0))) continue; + + LandingPad.BeginLabels.erase(LandingPad.BeginLabels.begin() + j); + LandingPad.EndLabels.erase(LandingPad.EndLabels.begin() + j); + --j; + --e; + } + + // Remove landing pads with no try-ranges. + if (LandingPads[i].BeginLabels.empty()) { + LandingPads.erase(LandingPads.begin() + i); + continue; + } + + // If there is no landing pad, ensure that the list of typeids is empty. + // If the only typeid is a cleanup, this is the same as having no typeids. + if (!LandingPad.LandingPadBlock || + (LandingPad.TypeIds.size() == 1 && !LandingPad.TypeIds[0])) + LandingPad.TypeIds.clear(); + ++i; + } +} + +void MachineFunction::addCleanup(MachineBasicBlock *LandingPad) { + LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); + LP.TypeIds.push_back(0); +} + +void MachineFunction::addSEHCatchHandler(MachineBasicBlock *LandingPad, + const Function *Filter, + const BlockAddress *RecoverBA) { + LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); + SEHHandler Handler; + Handler.FilterOrFinally = Filter; + Handler.RecoverBA = RecoverBA; + LP.SEHHandlers.push_back(Handler); +} + +void MachineFunction::addSEHCleanupHandler(MachineBasicBlock *LandingPad, + const Function *Cleanup) { + LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); + SEHHandler Handler; + Handler.FilterOrFinally = Cleanup; + Handler.RecoverBA = nullptr; + LP.SEHHandlers.push_back(Handler); +} + +void MachineFunction::setCallSiteLandingPad(MCSymbol *Sym, + ArrayRef<unsigned> Sites) { + LPadToCallSiteMap[Sym].append(Sites.begin(), Sites.end()); +} + +unsigned MachineFunction::getTypeIDFor(const GlobalValue *TI) { + for (unsigned i = 0, N = TypeInfos.size(); i != N; ++i) + if (TypeInfos[i] == TI) return i + 1; + + TypeInfos.push_back(TI); + return TypeInfos.size(); +} + +int MachineFunction::getFilterIDFor(std::vector<unsigned> &TyIds) { + // If the new filter coincides with the tail of an existing filter, then + // re-use the existing filter. Folding filters more than this requires + // re-ordering filters and/or their elements - probably not worth it. + for (std::vector<unsigned>::iterator I = FilterEnds.begin(), + E = FilterEnds.end(); I != E; ++I) { + unsigned i = *I, j = TyIds.size(); + + while (i && j) + if (FilterIds[--i] != TyIds[--j]) + goto try_next; + + if (!j) + // The new filter coincides with range [i, end) of the existing filter. + return -(1 + i); + +try_next:; + } + + // Add the new filter. + int FilterID = -(1 + FilterIds.size()); + FilterIds.reserve(FilterIds.size() + TyIds.size() + 1); + FilterIds.insert(FilterIds.end(), TyIds.begin(), TyIds.end()); + FilterEnds.push_back(FilterIds.size()); + FilterIds.push_back(0); // terminator + return FilterID; +} + +void llvm::addLandingPadInfo(const LandingPadInst &I, MachineBasicBlock &MBB) { + MachineFunction &MF = *MBB.getParent(); + if (const auto *PF = dyn_cast<Function>( + I.getParent()->getParent()->getPersonalityFn()->stripPointerCasts())) + MF.getMMI().addPersonality(PF); + + if (I.isCleanup()) + MF.addCleanup(&MBB); + + // FIXME: New EH - Add the clauses in reverse order. This isn't 100% correct, + // but we need to do it this way because of how the DWARF EH emitter + // processes the clauses. + for (unsigned i = I.getNumClauses(); i != 0; --i) { + Value *Val = I.getClause(i - 1); + if (I.isCatch(i - 1)) { + MF.addCatchTypeInfo(&MBB, + dyn_cast<GlobalValue>(Val->stripPointerCasts())); + } else { + // Add filters in a list. + Constant *CVal = cast<Constant>(Val); + SmallVector<const GlobalValue *, 4> FilterList; + for (User::op_iterator II = CVal->op_begin(), IE = CVal->op_end(); + II != IE; ++II) + FilterList.push_back(cast<GlobalValue>((*II)->stripPointerCasts())); + + MF.addFilterTypeInfo(&MBB, FilterList); + } + } +} + +/// \} + //===----------------------------------------------------------------------===// // MachineFrameInfo implementation //===----------------------------------------------------------------------===// @@ -634,11 +839,11 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset, /// Create a spill slot at a fixed location on the stack. /// Returns an index with a negative value. int MachineFrameInfo::CreateFixedSpillStackObject(uint64_t Size, - int64_t SPOffset) { + int64_t SPOffset, + bool Immutable) { unsigned Align = MinAlign(SPOffset, ForcedRealign ? 1 : StackAlignment); Align = clampStackAlignment(!StackRealignable, Align, StackAlignment); - Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, - /*Immutable*/ true, + Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable, /*isSS*/ true, /*Alloca*/ nullptr, /*isAliased*/ false)); @@ -890,13 +1095,20 @@ MachineConstantPoolEntry::getSectionKind(const DataLayout *DL) const { } MachineConstantPool::~MachineConstantPool() { + // A constant may be a member of both Constants and MachineCPVsSharingEntries, + // so keep track of which we've deleted to avoid double deletions. + DenseSet<MachineConstantPoolValue*> Deleted; for (unsigned i = 0, e = Constants.size(); i != e; ++i) - if (Constants[i].isMachineConstantPoolEntry()) + if (Constants[i].isMachineConstantPoolEntry()) { + Deleted.insert(Constants[i].Val.MachineCPVal); delete Constants[i].Val.MachineCPVal; + } for (DenseSet<MachineConstantPoolValue*>::iterator I = MachineCPVsSharingEntries.begin(), E = MachineCPVsSharingEntries.end(); - I != E; ++I) - delete *I; + I != E; ++I) { + if (Deleted.count(*I) == 0) + delete *I; + } } /// Test whether the given two constants can be allocated the same constant pool diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp deleted file mode 100644 index 338cd1e..0000000 --- a/contrib/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp +++ /dev/null @@ -1,60 +0,0 @@ -//===-- MachineFunctionAnalysis.cpp ---------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the definitions of the MachineFunctionAnalysis members. -// -//===----------------------------------------------------------------------===// - -#include "llvm/CodeGen/MachineFunctionAnalysis.h" -#include "llvm/CodeGen/GCMetadata.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/MachineFunctionInitializer.h" -using namespace llvm; - -char MachineFunctionAnalysis::ID = 0; - -MachineFunctionAnalysis::MachineFunctionAnalysis( - const TargetMachine &tm, MachineFunctionInitializer *MFInitializer) - : FunctionPass(ID), TM(tm), MF(nullptr), MFInitializer(MFInitializer) { - initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry()); -} - -MachineFunctionAnalysis::~MachineFunctionAnalysis() { - releaseMemory(); - assert(!MF && "MachineFunctionAnalysis left initialized!"); -} - -void MachineFunctionAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); - AU.addRequired<MachineModuleInfo>(); -} - -bool MachineFunctionAnalysis::doInitialization(Module &M) { - MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>(); - assert(MMI && "MMI not around yet??"); - MMI->setModule(&M); - NextFnNum = 0; - return false; -} - - -bool MachineFunctionAnalysis::runOnFunction(Function &F) { - assert(!MF && "MachineFunctionAnalysis already initialized!"); - MF = new MachineFunction(&F, TM, NextFnNum++, - getAnalysis<MachineModuleInfo>()); - if (MFInitializer) - MFInitializer->initializeMachineFunction(*MF); - return false; -} - -void MachineFunctionAnalysis::releaseMemory() { - delete MF; - MF = nullptr; -} diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp index 228fe17..2265676 100644 --- a/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp +++ b/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp @@ -22,7 +22,7 @@ #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/StackProtector.h" #include "llvm/IR/Dominators.h" @@ -41,7 +41,9 @@ bool MachineFunctionPass::runOnFunction(Function &F) { if (F.hasAvailableExternallyLinkage()) return false; - MachineFunction &MF = getAnalysis<MachineFunctionAnalysis>().getMF(); + MachineModuleInfo &MMI = getAnalysis<MachineModuleInfo>(); + MachineFunction &MF = MMI.getMachineFunction(F); + MachineFunctionProperties &MFProps = MF.getProperties(); #ifndef NDEBUG @@ -49,7 +51,7 @@ bool MachineFunctionPass::runOnFunction(Function &F) { errs() << "MachineFunctionProperties required by " << getPassName() << " pass are not met by function " << F.getName() << ".\n" << "Required properties: "; - RequiredProperties.print(errs(), /*OnlySet=*/true); + RequiredProperties.print(errs()); errs() << "\nCurrent properties: "; MFProps.print(errs()); errs() << "\n"; @@ -60,13 +62,13 @@ bool MachineFunctionPass::runOnFunction(Function &F) { bool RV = runOnMachineFunction(MF); MFProps.set(SetProperties); - MFProps.clear(ClearedProperties); + MFProps.reset(ClearedProperties); return RV; } void MachineFunctionPass::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<MachineFunctionAnalysis>(); - AU.addPreserved<MachineFunctionAnalysis>(); + AU.addRequired<MachineModuleInfo>(); + AU.addPreserved<MachineModuleInfo>(); // MachineFunctionPass preserves all LLVM IR passes, but there's no // high-level way to express this. Instead, just list a bunch of diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp index 4f424ff..0d533c3 100644 --- a/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp +++ b/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp @@ -34,7 +34,7 @@ struct MachineFunctionPrinterPass : public MachineFunctionPass { MachineFunctionPrinterPass(raw_ostream &os, const std::string &banner) : MachineFunctionPass(ID), OS(os), Banner(banner) {} - const char *getPassName() const override { return "MachineFunction Printer"; } + StringRef getPassName() const override { return "MachineFunction Printer"; } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); diff --git a/contrib/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm/lib/CodeGen/MachineInstr.cpp index 3cdf8d2..2f2e3b3 100644 --- a/contrib/llvm/lib/CodeGen/MachineInstr.cpp +++ b/contrib/llvm/lib/CodeGen/MachineInstr.cpp @@ -26,6 +26,7 @@ #include "llvm/IR/DebugInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" @@ -40,6 +41,7 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" @@ -91,6 +93,8 @@ void MachineOperand::substPhysReg(unsigned Reg, const TargetRegisterInfo &TRI) { // Note that getSubReg() may return 0 if the sub-register doesn't exist. // That won't happen in legal code. setSubReg(0); + if (isDef()) + setIsUndef(false); } setReg(Reg); } @@ -171,6 +175,16 @@ void MachineOperand::ChangeToMCSymbol(MCSymbol *Sym) { Contents.Sym = Sym; } +void MachineOperand::ChangeToFrameIndex(int Idx) { + assert((!isReg() || !isTied()) && + "Cannot change a tied operand into a FrameIndex"); + + removeRegFromUses(); + + OpKind = MO_FrameIndex; + setIndex(Idx); +} + /// ChangeToRegister - Replace this operand with a new register operand of /// the specified value. If an operand is known to be an register already, /// the setReg method should be used. @@ -256,6 +270,10 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const { return getCFIIndex() == Other.getCFIIndex(); case MachineOperand::MO_Metadata: return getMetadata() == Other.getMetadata(); + case MachineOperand::MO_IntrinsicID: + return getIntrinsicID() == Other.getIntrinsicID(); + case MachineOperand::MO_Predicate: + return getPredicate() == Other.getPredicate(); } llvm_unreachable("Invalid machine operand type"); } @@ -300,18 +318,23 @@ hash_code llvm::hash_value(const MachineOperand &MO) { return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMCSymbol()); case MachineOperand::MO_CFIIndex: return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getCFIIndex()); + case MachineOperand::MO_IntrinsicID: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIntrinsicID()); + case MachineOperand::MO_Predicate: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getPredicate()); } llvm_unreachable("Invalid machine operand type"); } -void MachineOperand::print(raw_ostream &OS, - const TargetRegisterInfo *TRI) const { +void MachineOperand::print(raw_ostream &OS, const TargetRegisterInfo *TRI, + const TargetIntrinsicInfo *IntrinsicInfo) const { ModuleSlotTracker DummyMST(nullptr); - print(OS, DummyMST, TRI); + print(OS, DummyMST, TRI, IntrinsicInfo); } void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, - const TargetRegisterInfo *TRI) const { + const TargetRegisterInfo *TRI, + const TargetIntrinsicInfo *IntrinsicInfo) const { switch (getType()) { case MachineOperand::MO_Register: OS << PrintReg(getReg(), TRI, getSubReg()); @@ -378,7 +401,7 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, } else if (getFPImm()->getType()->isHalfTy()) { APFloat APF = getFPImm()->getValueAPF(); bool Unused; - APF.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &Unused); + APF.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &Unused); OS << "half " << APF.convertToFloat(); } else { OS << getFPImm()->getValueAPF().convertToDouble(); @@ -454,12 +477,32 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, case MachineOperand::MO_CFIIndex: OS << "<call frame instruction>"; break; + case MachineOperand::MO_IntrinsicID: { + Intrinsic::ID ID = getIntrinsicID(); + if (ID < Intrinsic::num_intrinsics) + OS << "<intrinsic:@" << Intrinsic::getName(ID, None) << '>'; + else if (IntrinsicInfo) + OS << "<intrinsic:@" << IntrinsicInfo->getName(ID) << '>'; + else + OS << "<intrinsic:" << ID << '>'; + break; + } + case MachineOperand::MO_Predicate: { + auto Pred = static_cast<CmpInst::Predicate>(getPredicate()); + OS << '<' << (CmpInst::isIntPredicate(Pred) ? "intpred" : "floatpred") + << CmpInst::getPredicateName(Pred) << '>'; + } } - if (unsigned TF = getTargetFlags()) OS << "[TF=" << TF << ']'; } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void MachineOperand::dump() const { + dbgs() << *this << '\n'; +} +#endif + //===----------------------------------------------------------------------===// // MachineMemOperand Implementation //===----------------------------------------------------------------------===// @@ -500,7 +543,10 @@ MachinePointerInfo MachinePointerInfo::getStack(MachineFunction &MF, MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags f, uint64_t s, unsigned int a, const AAMDNodes &AAInfo, - const MDNode *Ranges) + const MDNode *Ranges, + SynchronizationScope SynchScope, + AtomicOrdering Ordering, + AtomicOrdering FailureOrdering) : PtrInfo(ptrinfo), Size(s), FlagVals(f), BaseAlignLog2(Log2_32(a) + 1), AAInfo(AAInfo), Ranges(Ranges) { assert((PtrInfo.V.isNull() || PtrInfo.V.is<const PseudoSourceValue*>() || @@ -508,6 +554,13 @@ MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags f, "invalid pointer value"); assert(getBaseAlignment() == a && "Alignment is not a power of 2!"); assert((isLoad() || isStore()) && "Not a load/store!"); + + AtomicInfo.SynchScope = static_cast<unsigned>(SynchScope); + assert(getSynchScope() == SynchScope && "Value truncated"); + AtomicInfo.Ordering = static_cast<unsigned>(Ordering); + assert(getOrdering() == Ordering && "Value truncated"); + AtomicInfo.FailureOrdering = static_cast<unsigned>(FailureOrdering); + assert(getFailureOrdering() == FailureOrdering && "Value truncated"); } /// Profile - Gather unique data for the object. @@ -623,10 +676,10 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST) const { OS << ")"; } - // Print nontemporal info. if (isNonTemporal()) OS << "(nontemporal)"; - + if (isDereferenceable()) + OS << "(dereferenceable)"; if (isInvariant()) OS << "(invariant)"; } @@ -653,12 +706,7 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &tid, DebugLoc dl, bool NoImp) : MCID(&tid), Parent(nullptr), Operands(nullptr), NumOperands(0), Flags(0), AsmPrinterFlags(0), NumMemRefs(0), MemRefs(nullptr), - debugLoc(std::move(dl)) -#ifdef LLVM_BUILD_GLOBAL_ISEL - , - Ty(nullptr) -#endif -{ + debugLoc(std::move(dl)) { assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor"); // Reserve space for the expected number of operands. @@ -677,12 +725,7 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &tid, MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI) : MCID(&MI.getDesc()), Parent(nullptr), Operands(nullptr), NumOperands(0), Flags(0), AsmPrinterFlags(0), NumMemRefs(MI.NumMemRefs), - MemRefs(MI.MemRefs), debugLoc(MI.getDebugLoc()) -#ifdef LLVM_BUILD_GLOBAL_ISEL - , - Ty(nullptr) -#endif -{ + MemRefs(MI.MemRefs), debugLoc(MI.getDebugLoc()) { assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor"); CapOperands = OperandCapacity::get(MI.getNumOperands()); @@ -705,25 +748,6 @@ MachineRegisterInfo *MachineInstr::getRegInfo() { return nullptr; } -// Implement dummy setter and getter for type when -// global-isel is not built. -// The proper implementation is WIP and is tracked here: -// PR26576. -#ifndef LLVM_BUILD_GLOBAL_ISEL -void MachineInstr::setType(Type *Ty) {} - -Type *MachineInstr::getType() const { return nullptr; } - -#else -void MachineInstr::setType(Type *Ty) { - assert((!Ty || isPreISelGenericOpcode(getOpcode())) && - "Non generic instructions are not supposed to be typed"); - this->Ty = Ty; -} - -Type *MachineInstr::getType() const { return Ty; } -#endif // LLVM_BUILD_GLOBAL_ISEL - /// RemoveRegOperandsFromUseLists - Unlink all of the register operands in /// this instruction from their respective use lists. This requires that the /// operands already be on their use lists. @@ -976,16 +1000,24 @@ bool MachineInstr::isIdenticalTo(const MachineInstr &Other, return false; if (isBundle()) { - // Both instructions are bundles, compare MIs inside the bundle. + // We have passed the test above that both instructions have the same + // opcode, so we know that both instructions are bundles here. Let's compare + // MIs inside the bundle. + assert(Other.isBundle() && "Expected that both instructions are bundles."); MachineBasicBlock::const_instr_iterator I1 = getIterator(); - MachineBasicBlock::const_instr_iterator E1 = getParent()->instr_end(); MachineBasicBlock::const_instr_iterator I2 = Other.getIterator(); - MachineBasicBlock::const_instr_iterator E2 = Other.getParent()->instr_end(); - while (++I1 != E1 && I1->isInsideBundle()) { + // Loop until we analysed the last intruction inside at least one of the + // bundles. + while (I1->isBundledWithSucc() && I2->isBundledWithSucc()) { + ++I1; ++I2; - if (I2 == E2 || !I2->isInsideBundle() || !I1->isIdenticalTo(*I2, Check)) + if (!I1->isIdenticalTo(*I2, Check)) return false; } + // If we've reached the end of just one of the two bundles, but not both, + // the instructions are not identical. + if (I1->isBundledWithSucc() || I2->isBundledWithSucc()) + return false; } // Check operands to make sure they match. @@ -1287,8 +1319,8 @@ bool MachineInstr::hasRegisterImplicitUseOperand(unsigned Reg) const { /// findRegisterUseOperandIdx() - Returns the MachineOperand that is a use of /// the specific register or -1 if it is not found. It further tightens /// the search criteria to a use that kills the register if isKill is true. -int MachineInstr::findRegisterUseOperandIdx(unsigned Reg, bool isKill, - const TargetRegisterInfo *TRI) const { +int MachineInstr::findRegisterUseOperandIdx( + unsigned Reg, bool isKill, const TargetRegisterInfo *TRI) const { for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { const MachineOperand &MO = getOperand(i); if (!MO.isReg() || !MO.isUse()) @@ -1296,11 +1328,9 @@ int MachineInstr::findRegisterUseOperandIdx(unsigned Reg, bool isKill, unsigned MOReg = MO.getReg(); if (!MOReg) continue; - if (MOReg == Reg || - (TRI && - TargetRegisterInfo::isPhysicalRegister(MOReg) && - TargetRegisterInfo::isPhysicalRegister(Reg) && - TRI->isSubRegister(MOReg, Reg))) + if (MOReg == Reg || (TRI && TargetRegisterInfo::isPhysicalRegister(MOReg) && + TargetRegisterInfo::isPhysicalRegister(Reg) && + TRI->isSubRegister(MOReg, Reg))) if (!isKill || MO.isKill()) return i; } @@ -1533,7 +1563,7 @@ bool MachineInstr::isSafeToMove(AliasAnalysis *AA, bool &SawStore) const { // destination. The check for isInvariantLoad gives the targe the chance to // classify the load as always returning a constant, e.g. a constant pool // load. - if (mayLoad() && !isInvariantLoad(AA)) + if (mayLoad() && !isDereferenceableInvariantLoad(AA)) // Otherwise, this is a real load. If there is a store between the load and // end of block, we can't move it. return !SawStore; @@ -1564,12 +1594,10 @@ bool MachineInstr::hasOrderedMemoryRef() const { }); } -/// isInvariantLoad - Return true if this instruction is loading from a -/// location whose value is invariant across the function. For example, -/// loading a value from the constant pool or from the argument area -/// of a function if it does not change. This should only return true of -/// *all* loads the instruction does are invariant (if it does multiple loads). -bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const { +/// isDereferenceableInvariantLoad - Return true if this instruction will never +/// trap and is loading from a location whose value is invariant across a run of +/// this function. +bool MachineInstr::isDereferenceableInvariantLoad(AliasAnalysis *AA) const { // If the instruction doesn't load at all, it isn't an invariant load. if (!mayLoad()) return false; @@ -1579,16 +1607,17 @@ bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const { if (memoperands_empty()) return false; - const MachineFrameInfo *MFI = getParent()->getParent()->getFrameInfo(); + const MachineFrameInfo &MFI = getParent()->getParent()->getFrameInfo(); for (MachineMemOperand *MMO : memoperands()) { if (MMO->isVolatile()) return false; if (MMO->isStore()) return false; - if (MMO->isInvariant()) continue; + if (MMO->isInvariant() && MMO->isDereferenceable()) + continue; // A load from a constant PseudoSourceValue is invariant. if (const PseudoSourceValue *PSV = MMO->getPseudoValue()) - if (PSV->isConstant(MFI)) + if (PSV->isConstant(&MFI)) continue; if (const Value *V = MMO->getValue()) { @@ -1663,35 +1692,40 @@ void MachineInstr::copyImplicitOps(MachineFunction &MF, } } -LLVM_DUMP_METHOD void MachineInstr::dump() const { +LLVM_DUMP_METHOD void MachineInstr::dump(const TargetInstrInfo *TII) const { #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - dbgs() << " " << *this; + dbgs() << " "; + print(dbgs(), false /* SkipOpers */, TII); #endif } -void MachineInstr::print(raw_ostream &OS, bool SkipOpers) const { +void MachineInstr::print(raw_ostream &OS, bool SkipOpers, + const TargetInstrInfo *TII) const { const Module *M = nullptr; if (const MachineBasicBlock *MBB = getParent()) if (const MachineFunction *MF = MBB->getParent()) M = MF->getFunction()->getParent(); ModuleSlotTracker MST(M); - print(OS, MST, SkipOpers); + print(OS, MST, SkipOpers, TII); } void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, - bool SkipOpers) const { + bool SkipOpers, const TargetInstrInfo *TII) const { // We can be a bit tidier if we know the MachineFunction. const MachineFunction *MF = nullptr; const TargetRegisterInfo *TRI = nullptr; const MachineRegisterInfo *MRI = nullptr; - const TargetInstrInfo *TII = nullptr; + const TargetIntrinsicInfo *IntrinsicInfo = nullptr; + if (const MachineBasicBlock *MBB = getParent()) { MF = MBB->getParent(); if (MF) { MRI = &MF->getRegInfo(); TRI = MF->getSubtarget().getRegisterInfo(); - TII = MF->getSubtarget().getInstrInfo(); + if (!TII) + TII = MF->getSubtarget().getInstrInfo(); + IntrinsicInfo = MF->getTarget().getIntrinsicInfo(); } } @@ -1705,13 +1739,13 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, !getOperand(StartOp).isImplicit(); ++StartOp) { if (StartOp != 0) OS << ", "; - getOperand(StartOp).print(OS, MST, TRI); + getOperand(StartOp).print(OS, MST, TRI, IntrinsicInfo); unsigned Reg = getOperand(StartOp).getReg(); if (TargetRegisterInfo::isVirtualRegister(Reg)) { VirtRegs.push_back(Reg); - unsigned Size; - if (MRI && (Size = MRI->getSize(Reg))) - OS << '(' << Size << ')'; + LLT Ty = MRI ? MRI->getType(Reg) : LLT{}; + if (Ty.isValid()) + OS << '(' << Ty << ')'; } } @@ -1724,12 +1758,6 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, else OS << "UNKNOWN"; - if (getType()) { - OS << ' '; - getType()->print(OS, /*IsForDebug*/ false, /*NoDetails*/ true); - OS << ' '; - } - if (SkipOpers) return; @@ -1812,7 +1840,8 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, OS << "!\"" << DIV->getName() << '\"'; else MO.print(OS, MST, TRI); - } else if (TRI && (isInsertSubreg() || isRegSequence()) && MO.isImm()) { + } else if (TRI && (isInsertSubreg() || isRegSequence() || + (isSubregToReg() && i == 3)) && MO.isImm()) { OS << TRI->getSubRegIndexName(MO.getImm()); } else if (i == AsmDescOp && MO.isImm()) { // Pretty print the inline asm operand descriptor. @@ -2145,8 +2174,8 @@ void MachineInstr::setPhysRegsDeadExcept(ArrayRef<unsigned> UsedRegs, unsigned Reg = MO.getReg(); if (!TargetRegisterInfo::isPhysicalRegister(Reg)) continue; // If there are no uses, including partial uses, the def is dead. - if (std::none_of(UsedRegs.begin(), UsedRegs.end(), - [&](unsigned Use) { return TRI.regsOverlap(Use, Reg); })) + if (none_of(UsedRegs, + [&](unsigned Use) { return TRI.regsOverlap(Use, Reg); })) MO.setIsDead(); } diff --git a/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp b/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp index e4686b3..b5621a0 100644 --- a/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp +++ b/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp @@ -24,7 +24,8 @@ namespace { class UnpackMachineBundles : public MachineFunctionPass { public: static char ID; // Pass identification - UnpackMachineBundles(std::function<bool(const Function &)> Ftor = nullptr) + UnpackMachineBundles( + std::function<bool(const MachineFunction &)> Ftor = nullptr) : MachineFunctionPass(ID), PredicateFtor(std::move(Ftor)) { initializeUnpackMachineBundlesPass(*PassRegistry::getPassRegistry()); } @@ -32,7 +33,7 @@ namespace { bool runOnMachineFunction(MachineFunction &MF) override; private: - std::function<bool(const Function &)> PredicateFtor; + std::function<bool(const MachineFunction &)> PredicateFtor; }; } // end anonymous namespace @@ -42,7 +43,7 @@ INITIALIZE_PASS(UnpackMachineBundles, "unpack-mi-bundles", "Unpack machine instruction bundles", false, false) bool UnpackMachineBundles::runOnMachineFunction(MachineFunction &MF) { - if (PredicateFtor && !PredicateFtor(*MF.getFunction())) + if (PredicateFtor && !PredicateFtor(MF)) return false; bool Changed = false; @@ -78,7 +79,8 @@ bool UnpackMachineBundles::runOnMachineFunction(MachineFunction &MF) { } FunctionPass * -llvm::createUnpackMachineBundles(std::function<bool(const Function &)> Ftor) { +llvm::createUnpackMachineBundles( + std::function<bool(const MachineFunction &)> Ftor) { return new UnpackMachineBundles(std::move(Ftor)); } diff --git a/contrib/llvm/lib/CodeGen/MachineLICM.cpp b/contrib/llvm/lib/CodeGen/MachineLICM.cpp index 119751b..b3d1843 100644 --- a/contrib/llvm/lib/CodeGen/MachineLICM.cpp +++ b/contrib/llvm/lib/CodeGen/MachineLICM.cpp @@ -92,8 +92,7 @@ namespace { SmallVector<MachineBasicBlock*, 8> ExitBlocks; bool isExitBlock(const MachineBasicBlock *MBB) const { - return std::find(ExitBlocks.begin(), ExitBlocks.end(), MBB) != - ExitBlocks.end(); + return is_contained(ExitBlocks, MBB); } // Track 'estimated' register pressure. @@ -268,7 +267,7 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { TII = ST.getInstrInfo(); TLI = ST.getTargetLowering(); TRI = ST.getRegisterInfo(); - MFI = MF.getFrameInfo(); + MFI = &MF.getFrameInfo(); MRI = &MF.getRegInfo(); SchedModel.init(ST.getSchedModel(), &ST, TII); @@ -896,7 +895,7 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { // If the physreg has no defs anywhere, it's just an ambient register // and we can freely move its uses. Alternatively, if it's allocatable, // it could get allocated to something with a def during allocation. - if (!MRI->isConstantPhysReg(Reg, *I.getParent()->getParent())) + if (!MRI->isConstantPhysReg(Reg)) return false; // Otherwise it's safe to move. continue; @@ -1139,7 +1138,8 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { // High register pressure situation, only hoist if the instruction is going // to be remat'ed. - if (!TII->isTriviallyReMaterializable(MI, AA) && !MI.isInvariantLoad(AA)) { + if (!TII->isTriviallyReMaterializable(MI, AA) && + !MI.isDereferenceableInvariantLoad(AA)) { DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI); return false; } @@ -1158,7 +1158,7 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) { // If not, we may be able to unfold a load and hoist that. // First test whether the instruction is loading from an amenable // memory location. - if (!MI->isInvariantLoad(AA)) + if (!MI->isDereferenceableInvariantLoad(AA)) return nullptr; // Next determine the register class for a temporary register. @@ -1336,6 +1336,11 @@ bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) { // Otherwise, splice the instruction to the preheader. Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI); + // Since we are moving the instruction out of its basic block, we do not + // retain its debug location. Doing so would degrade the debugging + // experience and adversely affect the accuracy of profiling information. + MI->setDebugLoc(DebugLoc()); + // Update register pressure for BBs from header to this block. UpdateBackTraceRegPressure(MI); diff --git a/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp b/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp index 376f78f..fdeaf7b 100644 --- a/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp +++ b/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp @@ -77,6 +77,51 @@ MachineBasicBlock *MachineLoop::getBottomBlock() { return BotMBB; } +MachineBasicBlock *MachineLoop::findLoopControlBlock() { + if (MachineBasicBlock *Latch = getLoopLatch()) { + if (isLoopExiting(Latch)) + return Latch; + else + return getExitingBlock(); + } + return nullptr; +} + +MachineBasicBlock * +MachineLoopInfo::findLoopPreheader(MachineLoop *L, + bool SpeculativePreheader) const { + if (MachineBasicBlock *PB = L->getLoopPreheader()) + return PB; + + if (!SpeculativePreheader) + return nullptr; + + MachineBasicBlock *HB = L->getHeader(), *LB = L->getLoopLatch(); + if (HB->pred_size() != 2 || HB->hasAddressTaken()) + return nullptr; + // Find the predecessor of the header that is not the latch block. + MachineBasicBlock *Preheader = nullptr; + for (MachineBasicBlock *P : HB->predecessors()) { + if (P == LB) + continue; + // Sanity. + if (Preheader) + return nullptr; + Preheader = P; + } + + // Check if the preheader candidate is a successor of any other loop + // headers. We want to avoid having two loop setups in the same block. + for (MachineBasicBlock *S : Preheader->successors()) { + if (S == HB) + continue; + MachineLoop *T = getLoopFor(S); + if (T && T->getHeader() == S) + return nullptr; + } + return Preheader; +} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void MachineLoop::dump() const { print(dbgs()); diff --git a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp index 244e3fb..6618857 100644 --- a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp +++ b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp @@ -9,26 +9,31 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/ADT/PointerUnion.h" +#include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/TinyPtrVector.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionInitializer.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/Passes.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" using namespace llvm; using namespace llvm::dwarf; // Handle the Pass registration stuff necessary to use DataLayout's. -INITIALIZE_PASS(MachineModuleInfo, "machinemoduleinfo", - "Machine Module Information", false, false) +INITIALIZE_TM_PASS(MachineModuleInfo, "machinemoduleinfo", + "Machine Module Information", false, false) char MachineModuleInfo::ID = 0; // Out of line virtual method. @@ -54,7 +59,7 @@ public: class MMIAddrLabelMap { MCContext &Context; struct AddrLabelSymEntry { - /// Symbols - The symbols for the label. + /// The symbols for the label. TinyPtrVector<MCSymbol *> Symbols; Function *Fn; // The containing function of the BasicBlock. @@ -63,14 +68,13 @@ class MMIAddrLabelMap { DenseMap<AssertingVH<BasicBlock>, AddrLabelSymEntry> AddrLabelSymbols; - /// BBCallbacks - Callbacks for the BasicBlock's that we have entries for. We - /// use this so we get notified if a block is deleted or RAUWd. + /// Callbacks for the BasicBlock's that we have entries for. We use this so + /// we get notified if a block is deleted or RAUWd. std::vector<MMIAddrLabelMapCallbackPtr> BBCallbacks; - /// DeletedAddrLabelsNeedingEmission - This is a per-function list of symbols - /// whose corresponding BasicBlock got deleted. These symbols need to be - /// emitted at some point in the file, so AsmPrinter emits them after the - /// function body. + /// This is a per-function list of symbols whose corresponding BasicBlock got + /// deleted. These symbols need to be emitted at some point in the file, so + /// AsmPrinter emits them after the function body. DenseMap<AssertingVH<Function>, std::vector<MCSymbol*> > DeletedAddrLabelsNeedingEmission; public: @@ -112,8 +116,7 @@ ArrayRef<MCSymbol *> MMIAddrLabelMap::getAddrLabelSymbolToEmit(BasicBlock *BB) { return Entry.Symbols; } -/// takeDeletedSymbolsForFunction - If we have any deleted symbols for F, return -/// them. +/// If we have any deleted symbols for F, return them. void MMIAddrLabelMap:: takeDeletedSymbolsForFunction(Function *F, std::vector<MCSymbol*> &Result) { DenseMap<AssertingVH<Function>, std::vector<MCSymbol*> >::iterator I = @@ -186,20 +189,13 @@ void MMIAddrLabelMapCallbackPtr::allUsesReplacedWith(Value *V2) { //===----------------------------------------------------------------------===// -MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI, - const MCRegisterInfo &MRI, - const MCObjectFileInfo *MOFI) - : ImmutablePass(ID), Context(&MAI, &MRI, MOFI, nullptr, false) { +MachineModuleInfo::MachineModuleInfo(const TargetMachine *TM) + : ImmutablePass(ID), TM(*TM), + Context(TM->getMCAsmInfo(), TM->getMCRegisterInfo(), + TM->getObjFileLowering(), nullptr, false) { initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry()); } -MachineModuleInfo::MachineModuleInfo() - : ImmutablePass(ID), Context(nullptr, nullptr, nullptr) { - llvm_unreachable("This MachineModuleInfo constructor should never be called, " - "MMI should always be explicitly constructed by " - "LLVMTargetMachine"); -} - MachineModuleInfo::~MachineModuleInfo() { } @@ -207,13 +203,9 @@ bool MachineModuleInfo::doInitialization(Module &M) { ObjFileMMI = nullptr; CurCallSite = 0; - CallsEHReturn = false; - CallsUnwindInit = false; - HasEHFunclets = false; DbgInfoAvailable = UsesVAFloatArgument = UsesMorestackAddr = false; - PersonalityTypeCache = EHPersonality::Unknown; AddrLabelSymbols = nullptr; - TheModule = nullptr; + TheModule = &M; return false; } @@ -233,30 +225,8 @@ bool MachineModuleInfo::doFinalization(Module &M) { return false; } -/// EndFunction - Discard function meta information. -/// -void MachineModuleInfo::EndFunction() { - // Clean up frame info. - FrameInstructions.clear(); - - // Clean up exception info. - LandingPads.clear(); - PersonalityTypeCache = EHPersonality::Unknown; - CallSiteMap.clear(); - TypeInfos.clear(); - FilterIds.clear(); - FilterEnds.clear(); - CallsEHReturn = false; - CallsUnwindInit = false; - HasEHFunclets = false; - VariableDbgInfos.clear(); -} - //===- Address of Block Management ----------------------------------------===// -/// getAddrLabelSymbolToEmit - Return the symbol to be used for the specified -/// basic block when its address is taken. If other blocks were RAUW'd to -/// this one, we may have to emit them as well, return the whole set. ArrayRef<MCSymbol *> MachineModuleInfo::getAddrLabelSymbolToEmit(const BasicBlock *BB) { // Lazily create AddrLabelSymbols. @@ -265,11 +235,6 @@ MachineModuleInfo::getAddrLabelSymbolToEmit(const BasicBlock *BB) { return AddrLabelSymbols->getAddrLabelSymbolToEmit(const_cast<BasicBlock*>(BB)); } - -/// takeDeletedSymbolsForFunction - If the specified function has had any -/// references to address-taken blocks generated, but the block got deleted, -/// return the symbol now so we can emit it. This prevents emitting a -/// reference to a symbol that has no definition. void MachineModuleInfo:: takeDeletedSymbolsForFunction(const Function *F, std::vector<MCSymbol*> &Result) { @@ -279,40 +244,8 @@ takeDeletedSymbolsForFunction(const Function *F, takeDeletedSymbolsForFunction(const_cast<Function*>(F), Result); } -//===- EH -----------------------------------------------------------------===// - -/// getOrCreateLandingPadInfo - Find or create an LandingPadInfo for the -/// specified MachineBasicBlock. -LandingPadInfo &MachineModuleInfo::getOrCreateLandingPadInfo - (MachineBasicBlock *LandingPad) { - unsigned N = LandingPads.size(); - for (unsigned i = 0; i < N; ++i) { - LandingPadInfo &LP = LandingPads[i]; - if (LP.LandingPadBlock == LandingPad) - return LP; - } - - LandingPads.push_back(LandingPadInfo(LandingPad)); - return LandingPads[N]; -} - -/// addInvoke - Provide the begin and end labels of an invoke style call and -/// associate it with a try landing pad block. -void MachineModuleInfo::addInvoke(MachineBasicBlock *LandingPad, - MCSymbol *BeginLabel, MCSymbol *EndLabel) { - LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); - LP.BeginLabels.push_back(BeginLabel); - LP.EndLabels.push_back(EndLabel); -} - -/// addLandingPad - Provide the label of a try LandingPad block. -/// -MCSymbol *MachineModuleInfo::addLandingPad(MachineBasicBlock *LandingPad) { - MCSymbol *LandingPadLabel = Context.createTempSymbol(); - LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); - LP.LandingPadLabel = LandingPadLabel; - return LandingPadLabel; -} +/// \name Exception Handling +/// \{ void MachineModuleInfo::addPersonality(const Function *Personality) { for (unsigned i = 0; i < Personalities.size(); ++i) @@ -321,143 +254,83 @@ void MachineModuleInfo::addPersonality(const Function *Personality) { Personalities.push_back(Personality); } -/// addCatchTypeInfo - Provide the catch typeinfo for a landing pad. -/// -void MachineModuleInfo:: -addCatchTypeInfo(MachineBasicBlock *LandingPad, - ArrayRef<const GlobalValue *> TyInfo) { - LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); - for (unsigned N = TyInfo.size(); N; --N) - LP.TypeIds.push_back(getTypeIDFor(TyInfo[N - 1])); -} - -/// addFilterTypeInfo - Provide the filter typeinfo for a landing pad. -/// -void MachineModuleInfo:: -addFilterTypeInfo(MachineBasicBlock *LandingPad, - ArrayRef<const GlobalValue *> TyInfo) { - LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); - std::vector<unsigned> IdsInFilter(TyInfo.size()); - for (unsigned I = 0, E = TyInfo.size(); I != E; ++I) - IdsInFilter[I] = getTypeIDFor(TyInfo[I]); - LP.TypeIds.push_back(getFilterIDFor(IdsInFilter)); -} - -/// addCleanup - Add a cleanup action for a landing pad. -/// -void MachineModuleInfo::addCleanup(MachineBasicBlock *LandingPad) { - LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); - LP.TypeIds.push_back(0); -} +/// \} + +MachineFunction &MachineModuleInfo::getMachineFunction(const Function &F) { + // Shortcut for the common case where a sequence of MachineFunctionPasses + // all query for the same Function. + if (LastRequest == &F) + return *LastResult; + + auto I = MachineFunctions.insert( + std::make_pair(&F, std::unique_ptr<MachineFunction>())); + MachineFunction *MF; + if (I.second) { + // No pre-existing machine function, create a new one. + MF = new MachineFunction(&F, TM, NextFnNum++, *this); + // Update the set entry. + I.first->second.reset(MF); + + if (MFInitializer) + if (MFInitializer->initializeMachineFunction(*MF)) + report_fatal_error("Unable to initialize machine function"); + } else { + MF = I.first->second.get(); + } -void MachineModuleInfo::addSEHCatchHandler(MachineBasicBlock *LandingPad, - const Function *Filter, - const BlockAddress *RecoverBA) { - LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); - SEHHandler Handler; - Handler.FilterOrFinally = Filter; - Handler.RecoverBA = RecoverBA; - LP.SEHHandlers.push_back(Handler); + LastRequest = &F; + LastResult = MF; + return *MF; } -void MachineModuleInfo::addSEHCleanupHandler(MachineBasicBlock *LandingPad, - const Function *Cleanup) { - LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); - SEHHandler Handler; - Handler.FilterOrFinally = Cleanup; - Handler.RecoverBA = nullptr; - LP.SEHHandlers.push_back(Handler); +void MachineModuleInfo::deleteMachineFunctionFor(Function &F) { + MachineFunctions.erase(&F); + LastRequest = nullptr; + LastResult = nullptr; } -/// TidyLandingPads - Remap landing pad labels and remove any deleted landing -/// pads. -void MachineModuleInfo::TidyLandingPads(DenseMap<MCSymbol*, uintptr_t> *LPMap) { - for (unsigned i = 0; i != LandingPads.size(); ) { - LandingPadInfo &LandingPad = LandingPads[i]; - if (LandingPad.LandingPadLabel && - !LandingPad.LandingPadLabel->isDefined() && - (!LPMap || (*LPMap)[LandingPad.LandingPadLabel] == 0)) - LandingPad.LandingPadLabel = nullptr; - - // Special case: we *should* emit LPs with null LP MBB. This indicates - // "nounwind" case. - if (!LandingPad.LandingPadLabel && LandingPad.LandingPadBlock) { - LandingPads.erase(LandingPads.begin() + i); - continue; - } - - for (unsigned j = 0, e = LandingPads[i].BeginLabels.size(); j != e; ++j) { - MCSymbol *BeginLabel = LandingPad.BeginLabels[j]; - MCSymbol *EndLabel = LandingPad.EndLabels[j]; - if ((BeginLabel->isDefined() || - (LPMap && (*LPMap)[BeginLabel] != 0)) && - (EndLabel->isDefined() || - (LPMap && (*LPMap)[EndLabel] != 0))) continue; - - LandingPad.BeginLabels.erase(LandingPad.BeginLabels.begin() + j); - LandingPad.EndLabels.erase(LandingPad.EndLabels.begin() + j); - --j; - --e; - } - - // Remove landing pads with no try-ranges. - if (LandingPads[i].BeginLabels.empty()) { - LandingPads.erase(LandingPads.begin() + i); - continue; - } +namespace { +/// This pass frees the MachineFunction object associated with a Function. +class FreeMachineFunction : public FunctionPass { +public: + static char ID; + FreeMachineFunction() : FunctionPass(ID) {} - // If there is no landing pad, ensure that the list of typeids is empty. - // If the only typeid is a cleanup, this is the same as having no typeids. - if (!LandingPad.LandingPadBlock || - (LandingPad.TypeIds.size() == 1 && !LandingPad.TypeIds[0])) - LandingPad.TypeIds.clear(); - ++i; + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<MachineModuleInfo>(); + AU.addPreserved<MachineModuleInfo>(); } -} -/// setCallSiteLandingPad - Map the landing pad's EH symbol to the call site -/// indexes. -void MachineModuleInfo::setCallSiteLandingPad(MCSymbol *Sym, - ArrayRef<unsigned> Sites) { - LPadToCallSiteMap[Sym].append(Sites.begin(), Sites.end()); -} - -/// getTypeIDFor - Return the type id for the specified typeinfo. This is -/// function wide. -unsigned MachineModuleInfo::getTypeIDFor(const GlobalValue *TI) { - for (unsigned i = 0, N = TypeInfos.size(); i != N; ++i) - if (TypeInfos[i] == TI) return i + 1; + bool runOnFunction(Function &F) override { + MachineModuleInfo &MMI = getAnalysis<MachineModuleInfo>(); + MMI.deleteMachineFunctionFor(F); + return true; + } +}; +char FreeMachineFunction::ID; +} // end anonymous namespace - TypeInfos.push_back(TI); - return TypeInfos.size(); +namespace llvm { +FunctionPass *createFreeMachineFunctionPass() { + return new FreeMachineFunction(); } - -/// getFilterIDFor - Return the filter id for the specified typeinfos. This is -/// function wide. -int MachineModuleInfo::getFilterIDFor(std::vector<unsigned> &TyIds) { - // If the new filter coincides with the tail of an existing filter, then - // re-use the existing filter. Folding filters more than this requires - // re-ordering filters and/or their elements - probably not worth it. - for (std::vector<unsigned>::iterator I = FilterEnds.begin(), - E = FilterEnds.end(); I != E; ++I) { - unsigned i = *I, j = TyIds.size(); - - while (i && j) - if (FilterIds[--i] != TyIds[--j]) - goto try_next; - - if (!j) - // The new filter coincides with range [i, end) of the existing filter. - return -(1 + i); - -try_next:; +} // end namespace llvm + +//===- MMI building helpers -----------------------------------------------===// + +void llvm::computeUsesVAFloatArgument(const CallInst &I, + MachineModuleInfo &MMI) { + FunctionType *FT = + cast<FunctionType>(I.getCalledValue()->getType()->getContainedType(0)); + if (FT->isVarArg() && !MMI.usesVAFloatArgument()) { + for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { + Type *T = I.getArgOperand(i)->getType(); + for (auto i : post_order(T)) { + if (i->isFloatingPointTy()) { + MMI.setUsesVAFloatArgument(true); + return; + } + } + } } - - // Add the new filter. - int FilterID = -(1 + FilterIds.size()); - FilterIds.reserve(FilterIds.size() + TyIds.size() + 1); - FilterIds.insert(FilterIds.end(), TyIds.begin(), TyIds.end()); - FilterEnds.push_back(FilterIds.size()); - FilterIds.push_back(0); // terminator - return FilterID; } diff --git a/contrib/llvm/lib/CodeGen/MachinePipeliner.cpp b/contrib/llvm/lib/CodeGen/MachinePipeliner.cpp new file mode 100644 index 0000000..43a1809 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachinePipeliner.cpp @@ -0,0 +1,3984 @@ +//===-- MachinePipeliner.cpp - Machine Software Pipeliner Pass ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// An implementation of the Swing Modulo Scheduling (SMS) software pipeliner. +// +// Software pipelining (SWP) is an instruction scheduling technique for loops +// that overlap loop iterations and explioits ILP via a compiler transformation. +// +// Swing Modulo Scheduling is an implementation of software pipelining +// that generates schedules that are near optimal in terms of initiation +// interval, register requirements, and stage count. See the papers: +// +// "Swing Modulo Scheduling: A Lifetime-Sensitive Approach", by J. Llosa, +// A. Gonzalez, E. Ayguade, and M. Valero. In PACT '96 Processings of the 1996 +// Conference on Parallel Architectures and Compilation Techiniques. +// +// "Lifetime-Sensitive Modulo Scheduling in a Production Environment", by J. +// Llosa, E. Ayguade, A. Gonzalez, M. Valero, and J. Eckhardt. In IEEE +// Transactions on Computers, Vol. 50, No. 3, 2001. +// +// "An Implementation of Swing Modulo Scheduling With Extensions for +// Superblocks", by T. Lattner, Master's Thesis, University of Illinois at +// Urbana-Chambpain, 2005. +// +// +// The SMS algorithm consists of three main steps after computing the minimal +// initiation interval (MII). +// 1) Analyze the dependence graph and compute information about each +// instruction in the graph. +// 2) Order the nodes (instructions) by priority based upon the heuristics +// described in the algorithm. +// 3) Attempt to schedule the nodes in the specified order using the MII. +// +// This SMS implementation is a target-independent back-end pass. When enabled, +// the pass runs just prior to the register allocation pass, while the machine +// IR is in SSA form. If software pipelining is successful, then the original +// loop is replaced by the optimized loop. The optimized loop contains one or +// more prolog blocks, the pipelined kernel, and one or more epilog blocks. If +// the instructions cannot be scheduled in a given MII, we increase the MII by +// one and try again. +// +// The SMS implementation is an extension of the ScheduleDAGInstrs class. We +// represent loop carried dependences in the DAG as order edges to the Phi +// nodes. We also perform several passes over the DAG to eliminate unnecessary +// edges that inhibit the ability to pipeline. The implementation uses the +// DFAPacketizer class to compute the minimum initiation interval and the check +// where an instruction may be inserted in the pipelined schedule. +// +// In order for the SMS pass to work, several target specific hooks need to be +// implemented to get information about the loop structure and to rewrite +// instructions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/PriorityQueue.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/DFAPacketizer.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineInstrBundle.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/RegisterPressure.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/CodeGen/ScheduleDAGInstrs.h" +#include "llvm/CodeGen/ScheduleDAGMutation.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/MC/MCInstrItineraries.h" +#include "llvm/PassAnalysisSupport.h" +#include "llvm/PassRegistry.h" +#include "llvm/PassSupport.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include <algorithm> +#include <cassert> +#include <climits> +#include <cstdint> +#include <deque> +#include <functional> +#include <iterator> +#include <map> +#include <tuple> +#include <utility> +#include <vector> + +using namespace llvm; + +#define DEBUG_TYPE "pipeliner" + +STATISTIC(NumTrytoPipeline, "Number of loops that we attempt to pipeline"); +STATISTIC(NumPipelined, "Number of loops software pipelined"); + +/// A command line option to turn software pipelining on or off. +static cl::opt<bool> EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true), + cl::ZeroOrMore, + cl::desc("Enable Software Pipelining")); + +/// A command line option to enable SWP at -Os. +static cl::opt<bool> EnableSWPOptSize("enable-pipeliner-opt-size", + cl::desc("Enable SWP at Os."), cl::Hidden, + cl::init(false)); + +/// A command line argument to limit minimum initial interval for pipelining. +static cl::opt<int> SwpMaxMii("pipeliner-max-mii", + cl::desc("Size limit for the the MII."), + cl::Hidden, cl::init(27)); + +/// A command line argument to limit the number of stages in the pipeline. +static cl::opt<int> + SwpMaxStages("pipeliner-max-stages", + cl::desc("Maximum stages allowed in the generated scheduled."), + cl::Hidden, cl::init(3)); + +/// A command line option to disable the pruning of chain dependences due to +/// an unrelated Phi. +static cl::opt<bool> + SwpPruneDeps("pipeliner-prune-deps", + cl::desc("Prune dependences between unrelated Phi nodes."), + cl::Hidden, cl::init(true)); + +/// A command line option to disable the pruning of loop carried order +/// dependences. +static cl::opt<bool> + SwpPruneLoopCarried("pipeliner-prune-loop-carried", + cl::desc("Prune loop carried order dependences."), + cl::Hidden, cl::init(true)); + +#ifndef NDEBUG +static cl::opt<int> SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1)); +#endif + +static cl::opt<bool> SwpIgnoreRecMII("pipeliner-ignore-recmii", + cl::ReallyHidden, cl::init(false), + cl::ZeroOrMore, cl::desc("Ignore RecMII")); + +namespace { + +class NodeSet; +class SMSchedule; +class SwingSchedulerDAG; + +/// The main class in the implementation of the target independent +/// software pipeliner pass. +class MachinePipeliner : public MachineFunctionPass { +public: + MachineFunction *MF = nullptr; + const MachineLoopInfo *MLI = nullptr; + const MachineDominatorTree *MDT = nullptr; + const InstrItineraryData *InstrItins; + const TargetInstrInfo *TII = nullptr; + RegisterClassInfo RegClassInfo; + +#ifndef NDEBUG + static int NumTries; +#endif + /// Cache the target analysis information about the loop. + struct LoopInfo { + MachineBasicBlock *TBB = nullptr; + MachineBasicBlock *FBB = nullptr; + SmallVector<MachineOperand, 4> BrCond; + MachineInstr *LoopInductionVar = nullptr; + MachineInstr *LoopCompare = nullptr; + }; + LoopInfo LI; + + static char ID; + MachinePipeliner() : MachineFunctionPass(ID) { + initializeMachinePipelinerPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<AAResultsWrapperPass>(); + AU.addPreserved<AAResultsWrapperPass>(); + AU.addRequired<MachineLoopInfo>(); + AU.addRequired<MachineDominatorTree>(); + AU.addRequired<LiveIntervals>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + +private: + bool canPipelineLoop(MachineLoop &L); + bool scheduleLoop(MachineLoop &L); + bool swingModuloScheduler(MachineLoop &L); +}; + +/// This class builds the dependence graph for the instructions in a loop, +/// and attempts to schedule the instructions using the SMS algorithm. +class SwingSchedulerDAG : public ScheduleDAGInstrs { + MachinePipeliner &Pass; + /// The minimum initiation interval between iterations for this schedule. + unsigned MII; + /// Set to true if a valid pipelined schedule is found for the loop. + bool Scheduled; + MachineLoop &Loop; + LiveIntervals &LIS; + const RegisterClassInfo &RegClassInfo; + + /// A toplogical ordering of the SUnits, which is needed for changing + /// dependences and iterating over the SUnits. + ScheduleDAGTopologicalSort Topo; + + struct NodeInfo { + int ASAP; + int ALAP; + NodeInfo() : ASAP(0), ALAP(0) {} + }; + /// Computed properties for each node in the graph. + std::vector<NodeInfo> ScheduleInfo; + + enum OrderKind { BottomUp = 0, TopDown = 1 }; + /// Computed node ordering for scheduling. + SetVector<SUnit *> NodeOrder; + + typedef SmallVector<NodeSet, 8> NodeSetType; + typedef DenseMap<unsigned, unsigned> ValueMapTy; + typedef SmallVectorImpl<MachineBasicBlock *> MBBVectorTy; + typedef DenseMap<MachineInstr *, MachineInstr *> InstrMapTy; + + /// Instructions to change when emitting the final schedule. + DenseMap<SUnit *, std::pair<unsigned, int64_t>> InstrChanges; + + /// We may create a new instruction, so remember it because it + /// must be deleted when the pass is finished. + SmallPtrSet<MachineInstr *, 4> NewMIs; + + /// Ordered list of DAG postprocessing steps. + std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations; + + /// Helper class to implement Johnson's circuit finding algorithm. + class Circuits { + std::vector<SUnit> &SUnits; + SetVector<SUnit *> Stack; + BitVector Blocked; + SmallVector<SmallPtrSet<SUnit *, 4>, 10> B; + SmallVector<SmallVector<int, 4>, 16> AdjK; + unsigned NumPaths; + static unsigned MaxPaths; + + public: + Circuits(std::vector<SUnit> &SUs) + : SUnits(SUs), Stack(), Blocked(SUs.size()), B(SUs.size()), + AdjK(SUs.size()) {} + /// Reset the data structures used in the circuit algorithm. + void reset() { + Stack.clear(); + Blocked.reset(); + B.assign(SUnits.size(), SmallPtrSet<SUnit *, 4>()); + NumPaths = 0; + } + void createAdjacencyStructure(SwingSchedulerDAG *DAG); + bool circuit(int V, int S, NodeSetType &NodeSets, bool HasBackedge = false); + void unblock(int U); + }; + +public: + SwingSchedulerDAG(MachinePipeliner &P, MachineLoop &L, LiveIntervals &lis, + const RegisterClassInfo &rci) + : ScheduleDAGInstrs(*P.MF, P.MLI, false), Pass(P), MII(0), + Scheduled(false), Loop(L), LIS(lis), RegClassInfo(rci), + Topo(SUnits, &ExitSU) { + P.MF->getSubtarget().getSMSMutations(Mutations); + } + + void schedule() override; + void finishBlock() override; + + /// Return true if the loop kernel has been scheduled. + bool hasNewSchedule() { return Scheduled; } + + /// Return the earliest time an instruction may be scheduled. + int getASAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ASAP; } + + /// Return the latest time an instruction my be scheduled. + int getALAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ALAP; } + + /// The mobility function, which the the number of slots in which + /// an instruction may be scheduled. + int getMOV(SUnit *Node) { return getALAP(Node) - getASAP(Node); } + + /// The depth, in the dependence graph, for a node. + int getDepth(SUnit *Node) { return Node->getDepth(); } + + /// The height, in the dependence graph, for a node. + int getHeight(SUnit *Node) { return Node->getHeight(); } + + /// Return true if the dependence is a back-edge in the data dependence graph. + /// Since the DAG doesn't contain cycles, we represent a cycle in the graph + /// using an anti dependence from a Phi to an instruction. + bool isBackedge(SUnit *Source, const SDep &Dep) { + if (Dep.getKind() != SDep::Anti) + return false; + return Source->getInstr()->isPHI() || Dep.getSUnit()->getInstr()->isPHI(); + } + + /// Return true if the dependence is an order dependence between non-Phis. + static bool isOrder(SUnit *Source, const SDep &Dep) { + if (Dep.getKind() != SDep::Order) + return false; + return (!Source->getInstr()->isPHI() && + !Dep.getSUnit()->getInstr()->isPHI()); + } + + bool isLoopCarriedOrder(SUnit *Source, const SDep &Dep, bool isSucc = true); + + /// The latency of the dependence. + unsigned getLatency(SUnit *Source, const SDep &Dep) { + // Anti dependences represent recurrences, so use the latency of the + // instruction on the back-edge. + if (Dep.getKind() == SDep::Anti) { + if (Source->getInstr()->isPHI()) + return Dep.getSUnit()->Latency; + if (Dep.getSUnit()->getInstr()->isPHI()) + return Source->Latency; + return Dep.getLatency(); + } + return Dep.getLatency(); + } + + /// The distance function, which indicates that operation V of iteration I + /// depends on operations U of iteration I-distance. + unsigned getDistance(SUnit *U, SUnit *V, const SDep &Dep) { + // Instructions that feed a Phi have a distance of 1. Computing larger + // values for arrays requires data dependence information. + if (V->getInstr()->isPHI() && Dep.getKind() == SDep::Anti) + return 1; + return 0; + } + + /// Set the Minimum Initiation Interval for this schedule attempt. + void setMII(unsigned mii) { MII = mii; } + + MachineInstr *applyInstrChange(MachineInstr *MI, SMSchedule &Schedule, + bool UpdateDAG = false); + + /// Return the new base register that was stored away for the changed + /// instruction. + unsigned getInstrBaseReg(SUnit *SU) { + DenseMap<SUnit *, std::pair<unsigned, int64_t>>::iterator It = + InstrChanges.find(SU); + if (It != InstrChanges.end()) + return It->second.first; + return 0; + } + + void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation) { + Mutations.push_back(std::move(Mutation)); + } + +private: + void addLoopCarriedDependences(AliasAnalysis *AA); + void updatePhiDependences(); + void changeDependences(); + unsigned calculateResMII(); + unsigned calculateRecMII(NodeSetType &RecNodeSets); + void findCircuits(NodeSetType &NodeSets); + void fuseRecs(NodeSetType &NodeSets); + void removeDuplicateNodes(NodeSetType &NodeSets); + void computeNodeFunctions(NodeSetType &NodeSets); + void registerPressureFilter(NodeSetType &NodeSets); + void colocateNodeSets(NodeSetType &NodeSets); + void checkNodeSets(NodeSetType &NodeSets); + void groupRemainingNodes(NodeSetType &NodeSets); + void addConnectedNodes(SUnit *SU, NodeSet &NewSet, + SetVector<SUnit *> &NodesAdded); + void computeNodeOrder(NodeSetType &NodeSets); + bool schedulePipeline(SMSchedule &Schedule); + void generatePipelinedLoop(SMSchedule &Schedule); + void generateProlog(SMSchedule &Schedule, unsigned LastStage, + MachineBasicBlock *KernelBB, ValueMapTy *VRMap, + MBBVectorTy &PrologBBs); + void generateEpilog(SMSchedule &Schedule, unsigned LastStage, + MachineBasicBlock *KernelBB, ValueMapTy *VRMap, + MBBVectorTy &EpilogBBs, MBBVectorTy &PrologBBs); + void generateExistingPhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1, + MachineBasicBlock *BB2, MachineBasicBlock *KernelBB, + SMSchedule &Schedule, ValueMapTy *VRMap, + InstrMapTy &InstrMap, unsigned LastStageNum, + unsigned CurStageNum, bool IsLast); + void generatePhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1, + MachineBasicBlock *BB2, MachineBasicBlock *KernelBB, + SMSchedule &Schedule, ValueMapTy *VRMap, + InstrMapTy &InstrMap, unsigned LastStageNum, + unsigned CurStageNum, bool IsLast); + void removeDeadInstructions(MachineBasicBlock *KernelBB, + MBBVectorTy &EpilogBBs); + void splitLifetimes(MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs, + SMSchedule &Schedule); + void addBranches(MBBVectorTy &PrologBBs, MachineBasicBlock *KernelBB, + MBBVectorTy &EpilogBBs, SMSchedule &Schedule, + ValueMapTy *VRMap); + bool computeDelta(MachineInstr &MI, unsigned &Delta); + void updateMemOperands(MachineInstr &NewMI, MachineInstr &OldMI, + unsigned Num); + MachineInstr *cloneInstr(MachineInstr *OldMI, unsigned CurStageNum, + unsigned InstStageNum); + MachineInstr *cloneAndChangeInstr(MachineInstr *OldMI, unsigned CurStageNum, + unsigned InstStageNum, + SMSchedule &Schedule); + void updateInstruction(MachineInstr *NewMI, bool LastDef, + unsigned CurStageNum, unsigned InstStageNum, + SMSchedule &Schedule, ValueMapTy *VRMap); + MachineInstr *findDefInLoop(unsigned Reg); + unsigned getPrevMapVal(unsigned StageNum, unsigned PhiStage, unsigned LoopVal, + unsigned LoopStage, ValueMapTy *VRMap, + MachineBasicBlock *BB); + void rewritePhiValues(MachineBasicBlock *NewBB, unsigned StageNum, + SMSchedule &Schedule, ValueMapTy *VRMap, + InstrMapTy &InstrMap); + void rewriteScheduledInstr(MachineBasicBlock *BB, SMSchedule &Schedule, + InstrMapTy &InstrMap, unsigned CurStageNum, + unsigned PhiNum, MachineInstr *Phi, + unsigned OldReg, unsigned NewReg, + unsigned PrevReg = 0); + bool canUseLastOffsetValue(MachineInstr *MI, unsigned &BasePos, + unsigned &OffsetPos, unsigned &NewBase, + int64_t &NewOffset); + void postprocessDAG(); +}; + +/// A NodeSet contains a set of SUnit DAG nodes with additional information +/// that assigns a priority to the set. +class NodeSet { + SetVector<SUnit *> Nodes; + bool HasRecurrence; + unsigned RecMII = 0; + int MaxMOV = 0; + int MaxDepth = 0; + unsigned Colocate = 0; + SUnit *ExceedPressure = nullptr; + +public: + typedef SetVector<SUnit *>::const_iterator iterator; + + NodeSet() : Nodes(), HasRecurrence(false) {} + + NodeSet(iterator S, iterator E) : Nodes(S, E), HasRecurrence(true) {} + + bool insert(SUnit *SU) { return Nodes.insert(SU); } + + void insert(iterator S, iterator E) { Nodes.insert(S, E); } + + template <typename UnaryPredicate> bool remove_if(UnaryPredicate P) { + return Nodes.remove_if(P); + } + + unsigned count(SUnit *SU) const { return Nodes.count(SU); } + + bool hasRecurrence() { return HasRecurrence; }; + + unsigned size() const { return Nodes.size(); } + + bool empty() const { return Nodes.empty(); } + + SUnit *getNode(unsigned i) const { return Nodes[i]; }; + + void setRecMII(unsigned mii) { RecMII = mii; }; + + void setColocate(unsigned c) { Colocate = c; }; + + void setExceedPressure(SUnit *SU) { ExceedPressure = SU; } + + bool isExceedSU(SUnit *SU) { return ExceedPressure == SU; } + + int compareRecMII(NodeSet &RHS) { return RecMII - RHS.RecMII; } + + int getRecMII() { return RecMII; } + + /// Summarize node functions for the entire node set. + void computeNodeSetInfo(SwingSchedulerDAG *SSD) { + for (SUnit *SU : *this) { + MaxMOV = std::max(MaxMOV, SSD->getMOV(SU)); + MaxDepth = std::max(MaxDepth, SSD->getDepth(SU)); + } + } + + void clear() { + Nodes.clear(); + RecMII = 0; + HasRecurrence = false; + MaxMOV = 0; + MaxDepth = 0; + Colocate = 0; + ExceedPressure = nullptr; + } + + operator SetVector<SUnit *> &() { return Nodes; } + + /// Sort the node sets by importance. First, rank them by recurrence MII, + /// then by mobility (least mobile done first), and finally by depth. + /// Each node set may contain a colocate value which is used as the first + /// tie breaker, if it's set. + bool operator>(const NodeSet &RHS) const { + if (RecMII == RHS.RecMII) { + if (Colocate != 0 && RHS.Colocate != 0 && Colocate != RHS.Colocate) + return Colocate < RHS.Colocate; + if (MaxMOV == RHS.MaxMOV) + return MaxDepth > RHS.MaxDepth; + return MaxMOV < RHS.MaxMOV; + } + return RecMII > RHS.RecMII; + } + + bool operator==(const NodeSet &RHS) const { + return RecMII == RHS.RecMII && MaxMOV == RHS.MaxMOV && + MaxDepth == RHS.MaxDepth; + } + + bool operator!=(const NodeSet &RHS) const { return !operator==(RHS); } + + iterator begin() { return Nodes.begin(); } + iterator end() { return Nodes.end(); } + + void print(raw_ostream &os) const { + os << "Num nodes " << size() << " rec " << RecMII << " mov " << MaxMOV + << " depth " << MaxDepth << " col " << Colocate << "\n"; + for (const auto &I : Nodes) + os << " SU(" << I->NodeNum << ") " << *(I->getInstr()); + os << "\n"; + } + + void dump() const { print(dbgs()); } +}; + +/// This class repesents the scheduled code. The main data structure is a +/// map from scheduled cycle to instructions. During scheduling, the +/// data structure explicitly represents all stages/iterations. When +/// the algorithm finshes, the schedule is collapsed into a single stage, +/// which represents instructions from different loop iterations. +/// +/// The SMS algorithm allows negative values for cycles, so the first cycle +/// in the schedule is the smallest cycle value. +class SMSchedule { +private: + /// Map from execution cycle to instructions. + DenseMap<int, std::deque<SUnit *>> ScheduledInstrs; + + /// Map from instruction to execution cycle. + std::map<SUnit *, int> InstrToCycle; + + /// Map for each register and the max difference between its uses and def. + /// The first element in the pair is the max difference in stages. The + /// second is true if the register defines a Phi value and loop value is + /// scheduled before the Phi. + std::map<unsigned, std::pair<unsigned, bool>> RegToStageDiff; + + /// Keep track of the first cycle value in the schedule. It starts + /// as zero, but the algorithm allows negative values. + int FirstCycle; + + /// Keep track of the last cycle value in the schedule. + int LastCycle; + + /// The initiation interval (II) for the schedule. + int InitiationInterval; + + /// Target machine information. + const TargetSubtargetInfo &ST; + + /// Virtual register information. + MachineRegisterInfo &MRI; + + DFAPacketizer *Resources; + +public: + SMSchedule(MachineFunction *mf) + : ST(mf->getSubtarget()), MRI(mf->getRegInfo()), + Resources(ST.getInstrInfo()->CreateTargetScheduleState(ST)) { + FirstCycle = 0; + LastCycle = 0; + InitiationInterval = 0; + } + + ~SMSchedule() { + ScheduledInstrs.clear(); + InstrToCycle.clear(); + RegToStageDiff.clear(); + delete Resources; + } + + void reset() { + ScheduledInstrs.clear(); + InstrToCycle.clear(); + RegToStageDiff.clear(); + FirstCycle = 0; + LastCycle = 0; + InitiationInterval = 0; + } + + /// Set the initiation interval for this schedule. + void setInitiationInterval(int ii) { InitiationInterval = ii; } + + /// Return the first cycle in the completed schedule. This + /// can be a negative value. + int getFirstCycle() const { return FirstCycle; } + + /// Return the last cycle in the finalized schedule. + int getFinalCycle() const { return FirstCycle + InitiationInterval - 1; } + + /// Return the cycle of the earliest scheduled instruction in the dependence + /// chain. + int earliestCycleInChain(const SDep &Dep); + + /// Return the cycle of the latest scheduled instruction in the dependence + /// chain. + int latestCycleInChain(const SDep &Dep); + + void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart, + int *MinEnd, int *MaxStart, int II, SwingSchedulerDAG *DAG); + bool insert(SUnit *SU, int StartCycle, int EndCycle, int II); + + /// Iterators for the cycle to instruction map. + typedef DenseMap<int, std::deque<SUnit *>>::iterator sched_iterator; + typedef DenseMap<int, std::deque<SUnit *>>::const_iterator + const_sched_iterator; + + /// Return true if the instruction is scheduled at the specified stage. + bool isScheduledAtStage(SUnit *SU, unsigned StageNum) { + return (stageScheduled(SU) == (int)StageNum); + } + + /// Return the stage for a scheduled instruction. Return -1 if + /// the instruction has not been scheduled. + int stageScheduled(SUnit *SU) const { + std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU); + if (it == InstrToCycle.end()) + return -1; + return (it->second - FirstCycle) / InitiationInterval; + } + + /// Return the cycle for a scheduled instruction. This function normalizes + /// the first cycle to be 0. + unsigned cycleScheduled(SUnit *SU) const { + std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU); + assert(it != InstrToCycle.end() && "Instruction hasn't been scheduled."); + return (it->second - FirstCycle) % InitiationInterval; + } + + /// Return the maximum stage count needed for this schedule. + unsigned getMaxStageCount() { + return (LastCycle - FirstCycle) / InitiationInterval; + } + + /// Return the max. number of stages/iterations that can occur between a + /// register definition and its uses. + unsigned getStagesForReg(int Reg, unsigned CurStage) { + std::pair<unsigned, bool> Stages = RegToStageDiff[Reg]; + if (CurStage > getMaxStageCount() && Stages.first == 0 && Stages.second) + return 1; + return Stages.first; + } + + /// The number of stages for a Phi is a little different than other + /// instructions. The minimum value computed in RegToStageDiff is 1 + /// because we assume the Phi is needed for at least 1 iteration. + /// This is not the case if the loop value is scheduled prior to the + /// Phi in the same stage. This function returns the number of stages + /// or iterations needed between the Phi definition and any uses. + unsigned getStagesForPhi(int Reg) { + std::pair<unsigned, bool> Stages = RegToStageDiff[Reg]; + if (Stages.second) + return Stages.first; + return Stages.first - 1; + } + + /// Return the instructions that are scheduled at the specified cycle. + std::deque<SUnit *> &getInstructions(int cycle) { + return ScheduledInstrs[cycle]; + } + + bool isValidSchedule(SwingSchedulerDAG *SSD); + void finalizeSchedule(SwingSchedulerDAG *SSD); + bool orderDependence(SwingSchedulerDAG *SSD, SUnit *SU, + std::deque<SUnit *> &Insts); + bool isLoopCarried(SwingSchedulerDAG *SSD, MachineInstr &Phi); + bool isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD, MachineInstr *Inst, + MachineOperand &MO); + void print(raw_ostream &os) const; + void dump() const; +}; + +} // end anonymous namespace + +unsigned SwingSchedulerDAG::Circuits::MaxPaths = 5; +char MachinePipeliner::ID = 0; +#ifndef NDEBUG +int MachinePipeliner::NumTries = 0; +#endif +char &llvm::MachinePipelinerID = MachinePipeliner::ID; +INITIALIZE_PASS_BEGIN(MachinePipeliner, "pipeliner", + "Modulo Software Pipelining", false, false) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(LiveIntervals) +INITIALIZE_PASS_END(MachinePipeliner, "pipeliner", + "Modulo Software Pipelining", false, false) + +/// The "main" function for implementing Swing Modulo Scheduling. +bool MachinePipeliner::runOnMachineFunction(MachineFunction &mf) { + if (skipFunction(*mf.getFunction())) + return false; + + if (!EnableSWP) + return false; + + if (mf.getFunction()->getAttributes().hasAttribute( + AttributeSet::FunctionIndex, Attribute::OptimizeForSize) && + !EnableSWPOptSize.getPosition()) + return false; + + MF = &mf; + MLI = &getAnalysis<MachineLoopInfo>(); + MDT = &getAnalysis<MachineDominatorTree>(); + TII = MF->getSubtarget().getInstrInfo(); + RegClassInfo.runOnMachineFunction(*MF); + + for (auto &L : *MLI) + scheduleLoop(*L); + + return false; +} + +/// Attempt to perform the SMS algorithm on the specified loop. This function is +/// the main entry point for the algorithm. The function identifies candidate +/// loops, calculates the minimum initiation interval, and attempts to schedule +/// the loop. +bool MachinePipeliner::scheduleLoop(MachineLoop &L) { + bool Changed = false; + for (auto &InnerLoop : L) + Changed |= scheduleLoop(*InnerLoop); + +#ifndef NDEBUG + // Stop trying after reaching the limit (if any). + int Limit = SwpLoopLimit; + if (Limit >= 0) { + if (NumTries >= SwpLoopLimit) + return Changed; + NumTries++; + } +#endif + + if (!canPipelineLoop(L)) + return Changed; + + ++NumTrytoPipeline; + + Changed = swingModuloScheduler(L); + + return Changed; +} + +/// Return true if the loop can be software pipelined. The algorithm is +/// restricted to loops with a single basic block. Make sure that the +/// branch in the loop can be analyzed. +bool MachinePipeliner::canPipelineLoop(MachineLoop &L) { + if (L.getNumBlocks() != 1) + return false; + + // Check if the branch can't be understood because we can't do pipelining + // if that's the case. + LI.TBB = nullptr; + LI.FBB = nullptr; + LI.BrCond.clear(); + if (TII->analyzeBranch(*L.getHeader(), LI.TBB, LI.FBB, LI.BrCond)) + return false; + + LI.LoopInductionVar = nullptr; + LI.LoopCompare = nullptr; + if (TII->analyzeLoop(L, LI.LoopInductionVar, LI.LoopCompare)) + return false; + + if (!L.getLoopPreheader()) + return false; + + // If any of the Phis contain subregs, then we can't pipeline + // because we don't know how to maintain subreg information in the + // VMap structure. + MachineBasicBlock *MBB = L.getHeader(); + for (MachineBasicBlock::iterator BBI = MBB->instr_begin(), + BBE = MBB->getFirstNonPHI(); + BBI != BBE; ++BBI) + for (unsigned i = 1; i != BBI->getNumOperands(); i += 2) + if (BBI->getOperand(i).getSubReg() != 0) + return false; + + return true; +} + +/// The SMS algorithm consists of the following main steps: +/// 1. Computation and analysis of the dependence graph. +/// 2. Ordering of the nodes (instructions). +/// 3. Attempt to Schedule the loop. +bool MachinePipeliner::swingModuloScheduler(MachineLoop &L) { + assert(L.getBlocks().size() == 1 && "SMS works on single blocks only."); + + SwingSchedulerDAG SMS(*this, L, getAnalysis<LiveIntervals>(), RegClassInfo); + + MachineBasicBlock *MBB = L.getHeader(); + // The kernel should not include any terminator instructions. These + // will be added back later. + SMS.startBlock(MBB); + + // Compute the number of 'real' instructions in the basic block by + // ignoring terminators. + unsigned size = MBB->size(); + for (MachineBasicBlock::iterator I = MBB->getFirstTerminator(), + E = MBB->instr_end(); + I != E; ++I, --size) + ; + + SMS.enterRegion(MBB, MBB->begin(), MBB->getFirstTerminator(), size); + SMS.schedule(); + SMS.exitRegion(); + + SMS.finishBlock(); + return SMS.hasNewSchedule(); +} + +/// We override the schedule function in ScheduleDAGInstrs to implement the +/// scheduling part of the Swing Modulo Scheduling algorithm. +void SwingSchedulerDAG::schedule() { + AliasAnalysis *AA = &Pass.getAnalysis<AAResultsWrapperPass>().getAAResults(); + buildSchedGraph(AA); + addLoopCarriedDependences(AA); + updatePhiDependences(); + Topo.InitDAGTopologicalSorting(); + postprocessDAG(); + changeDependences(); + DEBUG({ + for (unsigned su = 0, e = SUnits.size(); su != e; ++su) + SUnits[su].dumpAll(this); + }); + + NodeSetType NodeSets; + findCircuits(NodeSets); + + // Calculate the MII. + unsigned ResMII = calculateResMII(); + unsigned RecMII = calculateRecMII(NodeSets); + + fuseRecs(NodeSets); + + // This flag is used for testing and can cause correctness problems. + if (SwpIgnoreRecMII) + RecMII = 0; + + MII = std::max(ResMII, RecMII); + DEBUG(dbgs() << "MII = " << MII << " (rec=" << RecMII << ", res=" << ResMII + << ")\n"); + + // Can't schedule a loop without a valid MII. + if (MII == 0) + return; + + // Don't pipeline large loops. + if (SwpMaxMii != -1 && (int)MII > SwpMaxMii) + return; + + computeNodeFunctions(NodeSets); + + registerPressureFilter(NodeSets); + + colocateNodeSets(NodeSets); + + checkNodeSets(NodeSets); + + DEBUG({ + for (auto &I : NodeSets) { + dbgs() << " Rec NodeSet "; + I.dump(); + } + }); + + std::sort(NodeSets.begin(), NodeSets.end(), std::greater<NodeSet>()); + + groupRemainingNodes(NodeSets); + + removeDuplicateNodes(NodeSets); + + DEBUG({ + for (auto &I : NodeSets) { + dbgs() << " NodeSet "; + I.dump(); + } + }); + + computeNodeOrder(NodeSets); + + SMSchedule Schedule(Pass.MF); + Scheduled = schedulePipeline(Schedule); + + if (!Scheduled) + return; + + unsigned numStages = Schedule.getMaxStageCount(); + // No need to generate pipeline if there are no overlapped iterations. + if (numStages == 0) + return; + + // Check that the maximum stage count is less than user-defined limit. + if (SwpMaxStages > -1 && (int)numStages > SwpMaxStages) + return; + + generatePipelinedLoop(Schedule); + ++NumPipelined; +} + +/// Clean up after the software pipeliner runs. +void SwingSchedulerDAG::finishBlock() { + for (MachineInstr *I : NewMIs) + MF.DeleteMachineInstr(I); + NewMIs.clear(); + + // Call the superclass. + ScheduleDAGInstrs::finishBlock(); +} + +/// Return the register values for the operands of a Phi instruction. +/// This function assume the instruction is a Phi. +static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, + unsigned &InitVal, unsigned &LoopVal) { + assert(Phi.isPHI() && "Expecting a Phi."); + + InitVal = 0; + LoopVal = 0; + for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2) + if (Phi.getOperand(i + 1).getMBB() != Loop) + InitVal = Phi.getOperand(i).getReg(); + else if (Phi.getOperand(i + 1).getMBB() == Loop) + LoopVal = Phi.getOperand(i).getReg(); + + assert(InitVal != 0 && LoopVal != 0 && "Unexpected Phi structure."); +} + +/// Return the Phi register value that comes from the incoming block. +static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) { + for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2) + if (Phi.getOperand(i + 1).getMBB() != LoopBB) + return Phi.getOperand(i).getReg(); + return 0; +} + +/// Return the Phi register value that comes the the loop block. +static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) { + for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2) + if (Phi.getOperand(i + 1).getMBB() == LoopBB) + return Phi.getOperand(i).getReg(); + return 0; +} + +/// Return true if SUb can be reached from SUa following the chain edges. +static bool isSuccOrder(SUnit *SUa, SUnit *SUb) { + SmallPtrSet<SUnit *, 8> Visited; + SmallVector<SUnit *, 8> Worklist; + Worklist.push_back(SUa); + while (!Worklist.empty()) { + const SUnit *SU = Worklist.pop_back_val(); + for (auto &SI : SU->Succs) { + SUnit *SuccSU = SI.getSUnit(); + if (SI.getKind() == SDep::Order) { + if (Visited.count(SuccSU)) + continue; + if (SuccSU == SUb) + return true; + Worklist.push_back(SuccSU); + Visited.insert(SuccSU); + } + } + } + return false; +} + +/// Return true if the instruction causes a chain between memory +/// references before and after it. +static bool isDependenceBarrier(MachineInstr &MI, AliasAnalysis *AA) { + return MI.isCall() || MI.hasUnmodeledSideEffects() || + (MI.hasOrderedMemoryRef() && + (!MI.mayLoad() || !MI.isDereferenceableInvariantLoad(AA))); +} + +/// Return the underlying objects for the memory references of an instruction. +/// This function calls the code in ValueTracking, but first checks that the +/// instruction has a memory operand. +static void getUnderlyingObjects(MachineInstr *MI, + SmallVectorImpl<Value *> &Objs, + const DataLayout &DL) { + if (!MI->hasOneMemOperand()) + return; + MachineMemOperand *MM = *MI->memoperands_begin(); + if (!MM->getValue()) + return; + GetUnderlyingObjects(const_cast<Value *>(MM->getValue()), Objs, DL); +} + +/// Add a chain edge between a load and store if the store can be an +/// alias of the load on a subsequent iteration, i.e., a loop carried +/// dependence. This code is very similar to the code in ScheduleDAGInstrs +/// but that code doesn't create loop carried dependences. +void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) { + MapVector<Value *, SmallVector<SUnit *, 4>> PendingLoads; + for (auto &SU : SUnits) { + MachineInstr &MI = *SU.getInstr(); + if (isDependenceBarrier(MI, AA)) + PendingLoads.clear(); + else if (MI.mayLoad()) { + SmallVector<Value *, 4> Objs; + getUnderlyingObjects(&MI, Objs, MF.getDataLayout()); + for (auto V : Objs) { + SmallVector<SUnit *, 4> &SUs = PendingLoads[V]; + SUs.push_back(&SU); + } + } else if (MI.mayStore()) { + SmallVector<Value *, 4> Objs; + getUnderlyingObjects(&MI, Objs, MF.getDataLayout()); + for (auto V : Objs) { + MapVector<Value *, SmallVector<SUnit *, 4>>::iterator I = + PendingLoads.find(V); + if (I == PendingLoads.end()) + continue; + for (auto Load : I->second) { + if (isSuccOrder(Load, &SU)) + continue; + MachineInstr &LdMI = *Load->getInstr(); + // First, perform the cheaper check that compares the base register. + // If they are the same and the load offset is less than the store + // offset, then mark the dependence as loop carried potentially. + unsigned BaseReg1, BaseReg2; + int64_t Offset1, Offset2; + if (!TII->getMemOpBaseRegImmOfs(LdMI, BaseReg1, Offset1, TRI) || + !TII->getMemOpBaseRegImmOfs(MI, BaseReg2, Offset2, TRI)) { + SU.addPred(SDep(Load, SDep::Barrier)); + continue; + } + if (BaseReg1 == BaseReg2 && (int)Offset1 < (int)Offset2) { + assert(TII->areMemAccessesTriviallyDisjoint(LdMI, MI, AA) && + "What happened to the chain edge?"); + SU.addPred(SDep(Load, SDep::Barrier)); + continue; + } + // Second, the more expensive check that uses alias analysis on the + // base registers. If they alias, and the load offset is less than + // the store offset, the mark the dependence as loop carried. + if (!AA) { + SU.addPred(SDep(Load, SDep::Barrier)); + continue; + } + MachineMemOperand *MMO1 = *LdMI.memoperands_begin(); + MachineMemOperand *MMO2 = *MI.memoperands_begin(); + if (!MMO1->getValue() || !MMO2->getValue()) { + SU.addPred(SDep(Load, SDep::Barrier)); + continue; + } + if (MMO1->getValue() == MMO2->getValue() && + MMO1->getOffset() <= MMO2->getOffset()) { + SU.addPred(SDep(Load, SDep::Barrier)); + continue; + } + AliasResult AAResult = AA->alias( + MemoryLocation(MMO1->getValue(), MemoryLocation::UnknownSize, + MMO1->getAAInfo()), + MemoryLocation(MMO2->getValue(), MemoryLocation::UnknownSize, + MMO2->getAAInfo())); + + if (AAResult != NoAlias) + SU.addPred(SDep(Load, SDep::Barrier)); + } + } + } + } +} + +/// Update the phi dependences to the DAG because ScheduleDAGInstrs no longer +/// processes dependences for PHIs. This function adds true dependences +/// from a PHI to a use, and a loop carried dependence from the use to the +/// PHI. The loop carried dependence is represented as an anti dependence +/// edge. This function also removes chain dependences between unrelated +/// PHIs. +void SwingSchedulerDAG::updatePhiDependences() { + SmallVector<SDep, 4> RemoveDeps; + const TargetSubtargetInfo &ST = MF.getSubtarget<TargetSubtargetInfo>(); + + // Iterate over each DAG node. + for (SUnit &I : SUnits) { + RemoveDeps.clear(); + // Set to true if the instruction has an operand defined by a Phi. + unsigned HasPhiUse = 0; + unsigned HasPhiDef = 0; + MachineInstr *MI = I.getInstr(); + // Iterate over each operand, and we process the definitions. + for (MachineInstr::mop_iterator MOI = MI->operands_begin(), + MOE = MI->operands_end(); + MOI != MOE; ++MOI) { + if (!MOI->isReg()) + continue; + unsigned Reg = MOI->getReg(); + if (MOI->isDef()) { + // If the register is used by a Phi, then create an anti dependence. + for (MachineRegisterInfo::use_instr_iterator + UI = MRI.use_instr_begin(Reg), + UE = MRI.use_instr_end(); + UI != UE; ++UI) { + MachineInstr *UseMI = &*UI; + SUnit *SU = getSUnit(UseMI); + if (SU != nullptr && UseMI->isPHI()) { + if (!MI->isPHI()) { + SDep Dep(SU, SDep::Anti, Reg); + I.addPred(Dep); + } else { + HasPhiDef = Reg; + // Add a chain edge to a dependent Phi that isn't an existing + // predecessor. + if (SU->NodeNum < I.NodeNum && !I.isPred(SU)) + I.addPred(SDep(SU, SDep::Barrier)); + } + } + } + } else if (MOI->isUse()) { + // If the register is defined by a Phi, then create a true dependence. + MachineInstr *DefMI = MRI.getUniqueVRegDef(Reg); + if (DefMI == nullptr) + continue; + SUnit *SU = getSUnit(DefMI); + if (SU != nullptr && DefMI->isPHI()) { + if (!MI->isPHI()) { + SDep Dep(SU, SDep::Data, Reg); + Dep.setLatency(0); + ST.adjustSchedDependency(SU, &I, Dep); + I.addPred(Dep); + } else { + HasPhiUse = Reg; + // Add a chain edge to a dependent Phi that isn't an existing + // predecessor. + if (SU->NodeNum < I.NodeNum && !I.isPred(SU)) + I.addPred(SDep(SU, SDep::Barrier)); + } + } + } + } + // Remove order dependences from an unrelated Phi. + if (!SwpPruneDeps) + continue; + for (auto &PI : I.Preds) { + MachineInstr *PMI = PI.getSUnit()->getInstr(); + if (PMI->isPHI() && PI.getKind() == SDep::Order) { + if (I.getInstr()->isPHI()) { + if (PMI->getOperand(0).getReg() == HasPhiUse) + continue; + if (getLoopPhiReg(*PMI, PMI->getParent()) == HasPhiDef) + continue; + } + RemoveDeps.push_back(PI); + } + } + for (int i = 0, e = RemoveDeps.size(); i != e; ++i) + I.removePred(RemoveDeps[i]); + } +} + +/// Iterate over each DAG node and see if we can change any dependences +/// in order to reduce the recurrence MII. +void SwingSchedulerDAG::changeDependences() { + // See if an instruction can use a value from the previous iteration. + // If so, we update the base and offset of the instruction and change + // the dependences. + for (SUnit &I : SUnits) { + unsigned BasePos = 0, OffsetPos = 0, NewBase = 0; + int64_t NewOffset = 0; + if (!canUseLastOffsetValue(I.getInstr(), BasePos, OffsetPos, NewBase, + NewOffset)) + continue; + + // Get the MI and SUnit for the instruction that defines the original base. + unsigned OrigBase = I.getInstr()->getOperand(BasePos).getReg(); + MachineInstr *DefMI = MRI.getUniqueVRegDef(OrigBase); + if (!DefMI) + continue; + SUnit *DefSU = getSUnit(DefMI); + if (!DefSU) + continue; + // Get the MI and SUnit for the instruction that defins the new base. + MachineInstr *LastMI = MRI.getUniqueVRegDef(NewBase); + if (!LastMI) + continue; + SUnit *LastSU = getSUnit(LastMI); + if (!LastSU) + continue; + + if (Topo.IsReachable(&I, LastSU)) + continue; + + // Remove the dependence. The value now depends on a prior iteration. + SmallVector<SDep, 4> Deps; + for (SUnit::pred_iterator P = I.Preds.begin(), E = I.Preds.end(); P != E; + ++P) + if (P->getSUnit() == DefSU) + Deps.push_back(*P); + for (int i = 0, e = Deps.size(); i != e; i++) { + Topo.RemovePred(&I, Deps[i].getSUnit()); + I.removePred(Deps[i]); + } + // Remove the chain dependence between the instructions. + Deps.clear(); + for (auto &P : LastSU->Preds) + if (P.getSUnit() == &I && P.getKind() == SDep::Order) + Deps.push_back(P); + for (int i = 0, e = Deps.size(); i != e; i++) { + Topo.RemovePred(LastSU, Deps[i].getSUnit()); + LastSU->removePred(Deps[i]); + } + + // Add a dependence between the new instruction and the instruction + // that defines the new base. + SDep Dep(&I, SDep::Anti, NewBase); + LastSU->addPred(Dep); + + // Remember the base and offset information so that we can update the + // instruction during code generation. + InstrChanges[&I] = std::make_pair(NewBase, NewOffset); + } +} + +namespace { + +// FuncUnitSorter - Comparison operator used to sort instructions by +// the number of functional unit choices. +struct FuncUnitSorter { + const InstrItineraryData *InstrItins; + DenseMap<unsigned, unsigned> Resources; + + // Compute the number of functional unit alternatives needed + // at each stage, and take the minimum value. We prioritize the + // instructions by the least number of choices first. + unsigned minFuncUnits(const MachineInstr *Inst, unsigned &F) const { + unsigned schedClass = Inst->getDesc().getSchedClass(); + unsigned min = UINT_MAX; + for (const InstrStage *IS = InstrItins->beginStage(schedClass), + *IE = InstrItins->endStage(schedClass); + IS != IE; ++IS) { + unsigned funcUnits = IS->getUnits(); + unsigned numAlternatives = countPopulation(funcUnits); + if (numAlternatives < min) { + min = numAlternatives; + F = funcUnits; + } + } + return min; + } + + // Compute the critical resources needed by the instruction. This + // function records the functional units needed by instructions that + // must use only one functional unit. We use this as a tie breaker + // for computing the resource MII. The instrutions that require + // the same, highly used, functional unit have high priority. + void calcCriticalResources(MachineInstr &MI) { + unsigned SchedClass = MI.getDesc().getSchedClass(); + for (const InstrStage *IS = InstrItins->beginStage(SchedClass), + *IE = InstrItins->endStage(SchedClass); + IS != IE; ++IS) { + unsigned FuncUnits = IS->getUnits(); + if (countPopulation(FuncUnits) == 1) + Resources[FuncUnits]++; + } + } + + FuncUnitSorter(const InstrItineraryData *IID) : InstrItins(IID) {} + /// Return true if IS1 has less priority than IS2. + bool operator()(const MachineInstr *IS1, const MachineInstr *IS2) const { + unsigned F1 = 0, F2 = 0; + unsigned MFUs1 = minFuncUnits(IS1, F1); + unsigned MFUs2 = minFuncUnits(IS2, F2); + if (MFUs1 == 1 && MFUs2 == 1) + return Resources.lookup(F1) < Resources.lookup(F2); + return MFUs1 > MFUs2; + } +}; + +} // end anonymous namespace + +/// Calculate the resource constrained minimum initiation interval for the +/// specified loop. We use the DFA to model the resources needed for +/// each instruction, and we ignore dependences. A different DFA is created +/// for each cycle that is required. When adding a new instruction, we attempt +/// to add it to each existing DFA, until a legal space is found. If the +/// instruction cannot be reserved in an existing DFA, we create a new one. +unsigned SwingSchedulerDAG::calculateResMII() { + SmallVector<DFAPacketizer *, 8> Resources; + MachineBasicBlock *MBB = Loop.getHeader(); + Resources.push_back(TII->CreateTargetScheduleState(MF.getSubtarget())); + + // Sort the instructions by the number of available choices for scheduling, + // least to most. Use the number of critical resources as the tie breaker. + FuncUnitSorter FUS = + FuncUnitSorter(MF.getSubtarget().getInstrItineraryData()); + for (MachineBasicBlock::iterator I = MBB->getFirstNonPHI(), + E = MBB->getFirstTerminator(); + I != E; ++I) + FUS.calcCriticalResources(*I); + PriorityQueue<MachineInstr *, std::vector<MachineInstr *>, FuncUnitSorter> + FuncUnitOrder(FUS); + + for (MachineBasicBlock::iterator I = MBB->getFirstNonPHI(), + E = MBB->getFirstTerminator(); + I != E; ++I) + FuncUnitOrder.push(&*I); + + while (!FuncUnitOrder.empty()) { + MachineInstr *MI = FuncUnitOrder.top(); + FuncUnitOrder.pop(); + if (TII->isZeroCost(MI->getOpcode())) + continue; + // Attempt to reserve the instruction in an existing DFA. At least one + // DFA is needed for each cycle. + unsigned NumCycles = getSUnit(MI)->Latency; + unsigned ReservedCycles = 0; + SmallVectorImpl<DFAPacketizer *>::iterator RI = Resources.begin(); + SmallVectorImpl<DFAPacketizer *>::iterator RE = Resources.end(); + for (unsigned C = 0; C < NumCycles; ++C) + while (RI != RE) { + if ((*RI++)->canReserveResources(*MI)) { + ++ReservedCycles; + break; + } + } + // Start reserving resources using existing DFAs. + for (unsigned C = 0; C < ReservedCycles; ++C) { + --RI; + (*RI)->reserveResources(*MI); + } + // Add new DFAs, if needed, to reserve resources. + for (unsigned C = ReservedCycles; C < NumCycles; ++C) { + DFAPacketizer *NewResource = + TII->CreateTargetScheduleState(MF.getSubtarget()); + assert(NewResource->canReserveResources(*MI) && "Reserve error."); + NewResource->reserveResources(*MI); + Resources.push_back(NewResource); + } + } + int Resmii = Resources.size(); + // Delete the memory for each of the DFAs that were created earlier. + for (DFAPacketizer *RI : Resources) { + DFAPacketizer *D = RI; + delete D; + } + Resources.clear(); + return Resmii; +} + +/// Calculate the recurrence-constrainted minimum initiation interval. +/// Iterate over each circuit. Compute the delay(c) and distance(c) +/// for each circuit. The II needs to satisfy the inequality +/// delay(c) - II*distance(c) <= 0. For each circuit, choose the smallest +/// II that satistifies the inequality, and the RecMII is the maximum +/// of those values. +unsigned SwingSchedulerDAG::calculateRecMII(NodeSetType &NodeSets) { + unsigned RecMII = 0; + + for (NodeSet &Nodes : NodeSets) { + if (Nodes.size() == 0) + continue; + + unsigned Delay = Nodes.size() - 1; + unsigned Distance = 1; + + // ii = ceil(delay / distance) + unsigned CurMII = (Delay + Distance - 1) / Distance; + Nodes.setRecMII(CurMII); + if (CurMII > RecMII) + RecMII = CurMII; + } + + return RecMII; +} + +/// Swap all the anti dependences in the DAG. That means it is no longer a DAG, +/// but we do this to find the circuits, and then change them back. +static void swapAntiDependences(std::vector<SUnit> &SUnits) { + SmallVector<std::pair<SUnit *, SDep>, 8> DepsAdded; + for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { + SUnit *SU = &SUnits[i]; + for (SUnit::pred_iterator IP = SU->Preds.begin(), EP = SU->Preds.end(); + IP != EP; ++IP) { + if (IP->getKind() != SDep::Anti) + continue; + DepsAdded.push_back(std::make_pair(SU, *IP)); + } + } + for (SmallVector<std::pair<SUnit *, SDep>, 8>::iterator I = DepsAdded.begin(), + E = DepsAdded.end(); + I != E; ++I) { + // Remove this anti dependency and add one in the reverse direction. + SUnit *SU = I->first; + SDep &D = I->second; + SUnit *TargetSU = D.getSUnit(); + unsigned Reg = D.getReg(); + unsigned Lat = D.getLatency(); + SU->removePred(D); + SDep Dep(SU, SDep::Anti, Reg); + Dep.setLatency(Lat); + TargetSU->addPred(Dep); + } +} + +/// Create the adjacency structure of the nodes in the graph. +void SwingSchedulerDAG::Circuits::createAdjacencyStructure( + SwingSchedulerDAG *DAG) { + BitVector Added(SUnits.size()); + for (int i = 0, e = SUnits.size(); i != e; ++i) { + Added.reset(); + // Add any successor to the adjacency matrix and exclude duplicates. + for (auto &SI : SUnits[i].Succs) { + // Do not process a boundary node and a back-edge is processed only + // if it goes to a Phi. + if (SI.getSUnit()->isBoundaryNode() || + (SI.getKind() == SDep::Anti && !SI.getSUnit()->getInstr()->isPHI())) + continue; + int N = SI.getSUnit()->NodeNum; + if (!Added.test(N)) { + AdjK[i].push_back(N); + Added.set(N); + } + } + // A chain edge between a store and a load is treated as a back-edge in the + // adjacency matrix. + for (auto &PI : SUnits[i].Preds) { + if (!SUnits[i].getInstr()->mayStore() || + !DAG->isLoopCarriedOrder(&SUnits[i], PI, false)) + continue; + if (PI.getKind() == SDep::Order && PI.getSUnit()->getInstr()->mayLoad()) { + int N = PI.getSUnit()->NodeNum; + if (!Added.test(N)) { + AdjK[i].push_back(N); + Added.set(N); + } + } + } + } +} + +/// Identify an elementary circuit in the dependence graph starting at the +/// specified node. +bool SwingSchedulerDAG::Circuits::circuit(int V, int S, NodeSetType &NodeSets, + bool HasBackedge) { + SUnit *SV = &SUnits[V]; + bool F = false; + Stack.insert(SV); + Blocked.set(V); + + for (auto W : AdjK[V]) { + if (NumPaths > MaxPaths) + break; + if (W < S) + continue; + if (W == S) { + if (!HasBackedge) + NodeSets.push_back(NodeSet(Stack.begin(), Stack.end())); + F = true; + ++NumPaths; + break; + } else if (!Blocked.test(W)) { + if (circuit(W, S, NodeSets, W < V ? true : HasBackedge)) + F = true; + } + } + + if (F) + unblock(V); + else { + for (auto W : AdjK[V]) { + if (W < S) + continue; + if (B[W].count(SV) == 0) + B[W].insert(SV); + } + } + Stack.pop_back(); + return F; +} + +/// Unblock a node in the circuit finding algorithm. +void SwingSchedulerDAG::Circuits::unblock(int U) { + Blocked.reset(U); + SmallPtrSet<SUnit *, 4> &BU = B[U]; + while (!BU.empty()) { + SmallPtrSet<SUnit *, 4>::iterator SI = BU.begin(); + assert(SI != BU.end() && "Invalid B set."); + SUnit *W = *SI; + BU.erase(W); + if (Blocked.test(W->NodeNum)) + unblock(W->NodeNum); + } +} + +/// Identify all the elementary circuits in the dependence graph using +/// Johnson's circuit algorithm. +void SwingSchedulerDAG::findCircuits(NodeSetType &NodeSets) { + // Swap all the anti dependences in the DAG. That means it is no longer a DAG, + // but we do this to find the circuits, and then change them back. + swapAntiDependences(SUnits); + + Circuits Cir(SUnits); + // Create the adjacency structure. + Cir.createAdjacencyStructure(this); + for (int i = 0, e = SUnits.size(); i != e; ++i) { + Cir.reset(); + Cir.circuit(i, i, NodeSets); + } + + // Change the dependences back so that we've created a DAG again. + swapAntiDependences(SUnits); +} + +/// Return true for DAG nodes that we ignore when computing the cost functions. +/// We ignore the back-edge recurrence in order to avoid unbounded recurison +/// in the calculation of the ASAP, ALAP, etc functions. +static bool ignoreDependence(const SDep &D, bool isPred) { + if (D.isArtificial()) + return true; + return D.getKind() == SDep::Anti && isPred; +} + +/// Compute several functions need to order the nodes for scheduling. +/// ASAP - Earliest time to schedule a node. +/// ALAP - Latest time to schedule a node. +/// MOV - Mobility function, difference between ALAP and ASAP. +/// D - Depth of each node. +/// H - Height of each node. +void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) { + + ScheduleInfo.resize(SUnits.size()); + + DEBUG({ + for (ScheduleDAGTopologicalSort::const_iterator I = Topo.begin(), + E = Topo.end(); + I != E; ++I) { + SUnit *SU = &SUnits[*I]; + SU->dump(this); + } + }); + + int maxASAP = 0; + // Compute ASAP. + for (ScheduleDAGTopologicalSort::const_iterator I = Topo.begin(), + E = Topo.end(); + I != E; ++I) { + int asap = 0; + SUnit *SU = &SUnits[*I]; + for (SUnit::const_pred_iterator IP = SU->Preds.begin(), + EP = SU->Preds.end(); + IP != EP; ++IP) { + if (ignoreDependence(*IP, true)) + continue; + SUnit *pred = IP->getSUnit(); + asap = std::max(asap, (int)(getASAP(pred) + getLatency(SU, *IP) - + getDistance(pred, SU, *IP) * MII)); + } + maxASAP = std::max(maxASAP, asap); + ScheduleInfo[*I].ASAP = asap; + } + + // Compute ALAP and MOV. + for (ScheduleDAGTopologicalSort::const_reverse_iterator I = Topo.rbegin(), + E = Topo.rend(); + I != E; ++I) { + int alap = maxASAP; + SUnit *SU = &SUnits[*I]; + for (SUnit::const_succ_iterator IS = SU->Succs.begin(), + ES = SU->Succs.end(); + IS != ES; ++IS) { + if (ignoreDependence(*IS, true)) + continue; + SUnit *succ = IS->getSUnit(); + alap = std::min(alap, (int)(getALAP(succ) - getLatency(SU, *IS) + + getDistance(SU, succ, *IS) * MII)); + } + + ScheduleInfo[*I].ALAP = alap; + } + + // After computing the node functions, compute the summary for each node set. + for (NodeSet &I : NodeSets) + I.computeNodeSetInfo(this); + + DEBUG({ + for (unsigned i = 0; i < SUnits.size(); i++) { + dbgs() << "\tNode " << i << ":\n"; + dbgs() << "\t ASAP = " << getASAP(&SUnits[i]) << "\n"; + dbgs() << "\t ALAP = " << getALAP(&SUnits[i]) << "\n"; + dbgs() << "\t MOV = " << getMOV(&SUnits[i]) << "\n"; + dbgs() << "\t D = " << getDepth(&SUnits[i]) << "\n"; + dbgs() << "\t H = " << getHeight(&SUnits[i]) << "\n"; + } + }); +} + +/// Compute the Pred_L(O) set, as defined in the paper. The set is defined +/// as the predecessors of the elements of NodeOrder that are not also in +/// NodeOrder. +static bool pred_L(SetVector<SUnit *> &NodeOrder, + SmallSetVector<SUnit *, 8> &Preds, + const NodeSet *S = nullptr) { + Preds.clear(); + for (SetVector<SUnit *>::iterator I = NodeOrder.begin(), E = NodeOrder.end(); + I != E; ++I) { + for (SUnit::pred_iterator PI = (*I)->Preds.begin(), PE = (*I)->Preds.end(); + PI != PE; ++PI) { + if (S && S->count(PI->getSUnit()) == 0) + continue; + if (ignoreDependence(*PI, true)) + continue; + if (NodeOrder.count(PI->getSUnit()) == 0) + Preds.insert(PI->getSUnit()); + } + // Back-edges are predecessors with an anti-dependence. + for (SUnit::const_succ_iterator IS = (*I)->Succs.begin(), + ES = (*I)->Succs.end(); + IS != ES; ++IS) { + if (IS->getKind() != SDep::Anti) + continue; + if (S && S->count(IS->getSUnit()) == 0) + continue; + if (NodeOrder.count(IS->getSUnit()) == 0) + Preds.insert(IS->getSUnit()); + } + } + return Preds.size() > 0; +} + +/// Compute the Succ_L(O) set, as defined in the paper. The set is defined +/// as the successors of the elements of NodeOrder that are not also in +/// NodeOrder. +static bool succ_L(SetVector<SUnit *> &NodeOrder, + SmallSetVector<SUnit *, 8> &Succs, + const NodeSet *S = nullptr) { + Succs.clear(); + for (SetVector<SUnit *>::iterator I = NodeOrder.begin(), E = NodeOrder.end(); + I != E; ++I) { + for (SUnit::succ_iterator SI = (*I)->Succs.begin(), SE = (*I)->Succs.end(); + SI != SE; ++SI) { + if (S && S->count(SI->getSUnit()) == 0) + continue; + if (ignoreDependence(*SI, false)) + continue; + if (NodeOrder.count(SI->getSUnit()) == 0) + Succs.insert(SI->getSUnit()); + } + for (SUnit::const_pred_iterator PI = (*I)->Preds.begin(), + PE = (*I)->Preds.end(); + PI != PE; ++PI) { + if (PI->getKind() != SDep::Anti) + continue; + if (S && S->count(PI->getSUnit()) == 0) + continue; + if (NodeOrder.count(PI->getSUnit()) == 0) + Succs.insert(PI->getSUnit()); + } + } + return Succs.size() > 0; +} + +/// Return true if there is a path from the specified node to any of the nodes +/// in DestNodes. Keep track and return the nodes in any path. +static bool computePath(SUnit *Cur, SetVector<SUnit *> &Path, + SetVector<SUnit *> &DestNodes, + SetVector<SUnit *> &Exclude, + SmallPtrSet<SUnit *, 8> &Visited) { + if (Cur->isBoundaryNode()) + return false; + if (Exclude.count(Cur) != 0) + return false; + if (DestNodes.count(Cur) != 0) + return true; + if (!Visited.insert(Cur).second) + return Path.count(Cur) != 0; + bool FoundPath = false; + for (auto &SI : Cur->Succs) + FoundPath |= computePath(SI.getSUnit(), Path, DestNodes, Exclude, Visited); + for (auto &PI : Cur->Preds) + if (PI.getKind() == SDep::Anti) + FoundPath |= + computePath(PI.getSUnit(), Path, DestNodes, Exclude, Visited); + if (FoundPath) + Path.insert(Cur); + return FoundPath; +} + +/// Return true if Set1 is a subset of Set2. +template <class S1Ty, class S2Ty> static bool isSubset(S1Ty &Set1, S2Ty &Set2) { + for (typename S1Ty::iterator I = Set1.begin(), E = Set1.end(); I != E; ++I) + if (Set2.count(*I) == 0) + return false; + return true; +} + +/// Compute the live-out registers for the instructions in a node-set. +/// The live-out registers are those that are defined in the node-set, +/// but not used. Except for use operands of Phis. +static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker, + NodeSet &NS) { + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + SmallVector<RegisterMaskPair, 8> LiveOutRegs; + SmallSet<unsigned, 4> Uses; + for (SUnit *SU : NS) { + const MachineInstr *MI = SU->getInstr(); + if (MI->isPHI()) + continue; + for (const MachineOperand &MO : MI->operands()) + if (MO.isReg() && MO.isUse()) { + unsigned Reg = MO.getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) + Uses.insert(Reg); + else if (MRI.isAllocatable(Reg)) + for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) + Uses.insert(*Units); + } + } + for (SUnit *SU : NS) + for (const MachineOperand &MO : SU->getInstr()->operands()) + if (MO.isReg() && MO.isDef() && !MO.isDead()) { + unsigned Reg = MO.getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (!Uses.count(Reg)) + LiveOutRegs.push_back(RegisterMaskPair(Reg, + LaneBitmask::getNone())); + } else if (MRI.isAllocatable(Reg)) { + for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) + if (!Uses.count(*Units)) + LiveOutRegs.push_back(RegisterMaskPair(*Units, + LaneBitmask::getNone())); + } + } + RPTracker.addLiveRegs(LiveOutRegs); +} + +/// A heuristic to filter nodes in recurrent node-sets if the register +/// pressure of a set is too high. +void SwingSchedulerDAG::registerPressureFilter(NodeSetType &NodeSets) { + for (auto &NS : NodeSets) { + // Skip small node-sets since they won't cause register pressure problems. + if (NS.size() <= 2) + continue; + IntervalPressure RecRegPressure; + RegPressureTracker RecRPTracker(RecRegPressure); + RecRPTracker.init(&MF, &RegClassInfo, &LIS, BB, BB->end(), false, true); + computeLiveOuts(MF, RecRPTracker, NS); + RecRPTracker.closeBottom(); + + std::vector<SUnit *> SUnits(NS.begin(), NS.end()); + std::sort(SUnits.begin(), SUnits.end(), [](const SUnit *A, const SUnit *B) { + return A->NodeNum > B->NodeNum; + }); + + for (auto &SU : SUnits) { + // Since we're computing the register pressure for a subset of the + // instructions in a block, we need to set the tracker for each + // instruction in the node-set. The tracker is set to the instruction + // just after the one we're interested in. + MachineBasicBlock::const_iterator CurInstI = SU->getInstr(); + RecRPTracker.setPos(std::next(CurInstI)); + + RegPressureDelta RPDelta; + ArrayRef<PressureChange> CriticalPSets; + RecRPTracker.getMaxUpwardPressureDelta(SU->getInstr(), nullptr, RPDelta, + CriticalPSets, + RecRegPressure.MaxSetPressure); + if (RPDelta.Excess.isValid()) { + DEBUG(dbgs() << "Excess register pressure: SU(" << SU->NodeNum << ") " + << TRI->getRegPressureSetName(RPDelta.Excess.getPSet()) + << ":" << RPDelta.Excess.getUnitInc()); + NS.setExceedPressure(SU); + break; + } + RecRPTracker.recede(); + } + } +} + +/// A heuristic to colocate node sets that have the same set of +/// successors. +void SwingSchedulerDAG::colocateNodeSets(NodeSetType &NodeSets) { + unsigned Colocate = 0; + for (int i = 0, e = NodeSets.size(); i < e; ++i) { + NodeSet &N1 = NodeSets[i]; + SmallSetVector<SUnit *, 8> S1; + if (N1.empty() || !succ_L(N1, S1)) + continue; + for (int j = i + 1; j < e; ++j) { + NodeSet &N2 = NodeSets[j]; + if (N1.compareRecMII(N2) != 0) + continue; + SmallSetVector<SUnit *, 8> S2; + if (N2.empty() || !succ_L(N2, S2)) + continue; + if (isSubset(S1, S2) && S1.size() == S2.size()) { + N1.setColocate(++Colocate); + N2.setColocate(Colocate); + break; + } + } + } +} + +/// Check if the existing node-sets are profitable. If not, then ignore the +/// recurrent node-sets, and attempt to schedule all nodes together. This is +/// a heuristic. If the MII is large and there is a non-recurrent node with +/// a large depth compared to the MII, then it's best to try and schedule +/// all instruction together instead of starting with the recurrent node-sets. +void SwingSchedulerDAG::checkNodeSets(NodeSetType &NodeSets) { + // Look for loops with a large MII. + if (MII <= 20) + return; + // Check if the node-set contains only a simple add recurrence. + for (auto &NS : NodeSets) + if (NS.size() > 2) + return; + // If the depth of any instruction is significantly larger than the MII, then + // ignore the recurrent node-sets and treat all instructions equally. + for (auto &SU : SUnits) + if (SU.getDepth() > MII * 1.5) { + NodeSets.clear(); + DEBUG(dbgs() << "Clear recurrence node-sets\n"); + return; + } +} + +/// Add the nodes that do not belong to a recurrence set into groups +/// based upon connected componenets. +void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) { + SetVector<SUnit *> NodesAdded; + SmallPtrSet<SUnit *, 8> Visited; + // Add the nodes that are on a path between the previous node sets and + // the current node set. + for (NodeSet &I : NodeSets) { + SmallSetVector<SUnit *, 8> N; + // Add the nodes from the current node set to the previous node set. + if (succ_L(I, N)) { + SetVector<SUnit *> Path; + for (SUnit *NI : N) { + Visited.clear(); + computePath(NI, Path, NodesAdded, I, Visited); + } + if (Path.size() > 0) + I.insert(Path.begin(), Path.end()); + } + // Add the nodes from the previous node set to the current node set. + N.clear(); + if (succ_L(NodesAdded, N)) { + SetVector<SUnit *> Path; + for (SUnit *NI : N) { + Visited.clear(); + computePath(NI, Path, I, NodesAdded, Visited); + } + if (Path.size() > 0) + I.insert(Path.begin(), Path.end()); + } + NodesAdded.insert(I.begin(), I.end()); + } + + // Create a new node set with the connected nodes of any successor of a node + // in a recurrent set. + NodeSet NewSet; + SmallSetVector<SUnit *, 8> N; + if (succ_L(NodesAdded, N)) + for (SUnit *I : N) + addConnectedNodes(I, NewSet, NodesAdded); + if (NewSet.size() > 0) + NodeSets.push_back(NewSet); + + // Create a new node set with the connected nodes of any predecessor of a node + // in a recurrent set. + NewSet.clear(); + if (pred_L(NodesAdded, N)) + for (SUnit *I : N) + addConnectedNodes(I, NewSet, NodesAdded); + if (NewSet.size() > 0) + NodeSets.push_back(NewSet); + + // Create new nodes sets with the connected nodes any any remaining node that + // has no predecessor. + for (unsigned i = 0; i < SUnits.size(); ++i) { + SUnit *SU = &SUnits[i]; + if (NodesAdded.count(SU) == 0) { + NewSet.clear(); + addConnectedNodes(SU, NewSet, NodesAdded); + if (NewSet.size() > 0) + NodeSets.push_back(NewSet); + } + } +} + +/// Add the node to the set, and add all is its connected nodes to the set. +void SwingSchedulerDAG::addConnectedNodes(SUnit *SU, NodeSet &NewSet, + SetVector<SUnit *> &NodesAdded) { + NewSet.insert(SU); + NodesAdded.insert(SU); + for (auto &SI : SU->Succs) { + SUnit *Successor = SI.getSUnit(); + if (!SI.isArtificial() && NodesAdded.count(Successor) == 0) + addConnectedNodes(Successor, NewSet, NodesAdded); + } + for (auto &PI : SU->Preds) { + SUnit *Predecessor = PI.getSUnit(); + if (!PI.isArtificial() && NodesAdded.count(Predecessor) == 0) + addConnectedNodes(Predecessor, NewSet, NodesAdded); + } +} + +/// Return true if Set1 contains elements in Set2. The elements in common +/// are returned in a different container. +static bool isIntersect(SmallSetVector<SUnit *, 8> &Set1, const NodeSet &Set2, + SmallSetVector<SUnit *, 8> &Result) { + Result.clear(); + for (unsigned i = 0, e = Set1.size(); i != e; ++i) { + SUnit *SU = Set1[i]; + if (Set2.count(SU) != 0) + Result.insert(SU); + } + return !Result.empty(); +} + +/// Merge the recurrence node sets that have the same initial node. +void SwingSchedulerDAG::fuseRecs(NodeSetType &NodeSets) { + for (NodeSetType::iterator I = NodeSets.begin(), E = NodeSets.end(); I != E; + ++I) { + NodeSet &NI = *I; + for (NodeSetType::iterator J = I + 1; J != E;) { + NodeSet &NJ = *J; + if (NI.getNode(0)->NodeNum == NJ.getNode(0)->NodeNum) { + if (NJ.compareRecMII(NI) > 0) + NI.setRecMII(NJ.getRecMII()); + for (NodeSet::iterator NII = J->begin(), ENI = J->end(); NII != ENI; + ++NII) + I->insert(*NII); + NodeSets.erase(J); + E = NodeSets.end(); + } else { + ++J; + } + } + } +} + +/// Remove nodes that have been scheduled in previous NodeSets. +void SwingSchedulerDAG::removeDuplicateNodes(NodeSetType &NodeSets) { + for (NodeSetType::iterator I = NodeSets.begin(), E = NodeSets.end(); I != E; + ++I) + for (NodeSetType::iterator J = I + 1; J != E;) { + J->remove_if([&](SUnit *SUJ) { return I->count(SUJ); }); + + if (J->size() == 0) { + NodeSets.erase(J); + E = NodeSets.end(); + } else { + ++J; + } + } +} + +/// Return true if Inst1 defines a value that is used in Inst2. +static bool hasDataDependence(SUnit *Inst1, SUnit *Inst2) { + for (auto &SI : Inst1->Succs) + if (SI.getSUnit() == Inst2 && SI.getKind() == SDep::Data) + return true; + return false; +} + +/// Compute an ordered list of the dependence graph nodes, which +/// indicates the order that the nodes will be scheduled. This is a +/// two-level algorithm. First, a partial order is created, which +/// consists of a list of sets ordered from highest to lowest priority. +void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) { + SmallSetVector<SUnit *, 8> R; + NodeOrder.clear(); + + for (auto &Nodes : NodeSets) { + DEBUG(dbgs() << "NodeSet size " << Nodes.size() << "\n"); + OrderKind Order; + SmallSetVector<SUnit *, 8> N; + if (pred_L(NodeOrder, N) && isSubset(N, Nodes)) { + R.insert(N.begin(), N.end()); + Order = BottomUp; + DEBUG(dbgs() << " Bottom up (preds) "); + } else if (succ_L(NodeOrder, N) && isSubset(N, Nodes)) { + R.insert(N.begin(), N.end()); + Order = TopDown; + DEBUG(dbgs() << " Top down (succs) "); + } else if (isIntersect(N, Nodes, R)) { + // If some of the successors are in the existing node-set, then use the + // top-down ordering. + Order = TopDown; + DEBUG(dbgs() << " Top down (intersect) "); + } else if (NodeSets.size() == 1) { + for (auto &N : Nodes) + if (N->Succs.size() == 0) + R.insert(N); + Order = BottomUp; + DEBUG(dbgs() << " Bottom up (all) "); + } else { + // Find the node with the highest ASAP. + SUnit *maxASAP = nullptr; + for (SUnit *SU : Nodes) { + if (maxASAP == nullptr || getASAP(SU) >= getASAP(maxASAP)) + maxASAP = SU; + } + R.insert(maxASAP); + Order = BottomUp; + DEBUG(dbgs() << " Bottom up (default) "); + } + + while (!R.empty()) { + if (Order == TopDown) { + // Choose the node with the maximum height. If more than one, choose + // the node with the lowest MOV. If still more than one, check if there + // is a dependence between the instructions. + while (!R.empty()) { + SUnit *maxHeight = nullptr; + for (SUnit *I : R) { + if (maxHeight == nullptr || getHeight(I) > getHeight(maxHeight)) + maxHeight = I; + else if (getHeight(I) == getHeight(maxHeight) && + getMOV(I) < getMOV(maxHeight) && + !hasDataDependence(maxHeight, I)) + maxHeight = I; + else if (hasDataDependence(I, maxHeight)) + maxHeight = I; + } + NodeOrder.insert(maxHeight); + DEBUG(dbgs() << maxHeight->NodeNum << " "); + R.remove(maxHeight); + for (const auto &I : maxHeight->Succs) { + if (Nodes.count(I.getSUnit()) == 0) + continue; + if (NodeOrder.count(I.getSUnit()) != 0) + continue; + if (ignoreDependence(I, false)) + continue; + R.insert(I.getSUnit()); + } + // Back-edges are predecessors with an anti-dependence. + for (const auto &I : maxHeight->Preds) { + if (I.getKind() != SDep::Anti) + continue; + if (Nodes.count(I.getSUnit()) == 0) + continue; + if (NodeOrder.count(I.getSUnit()) != 0) + continue; + R.insert(I.getSUnit()); + } + } + Order = BottomUp; + DEBUG(dbgs() << "\n Switching order to bottom up "); + SmallSetVector<SUnit *, 8> N; + if (pred_L(NodeOrder, N, &Nodes)) + R.insert(N.begin(), N.end()); + } else { + // Choose the node with the maximum depth. If more than one, choose + // the node with the lowest MOV. If there is still more than one, check + // for a dependence between the instructions. + while (!R.empty()) { + SUnit *maxDepth = nullptr; + for (SUnit *I : R) { + if (maxDepth == nullptr || getDepth(I) > getDepth(maxDepth)) + maxDepth = I; + else if (getDepth(I) == getDepth(maxDepth) && + getMOV(I) < getMOV(maxDepth) && + !hasDataDependence(I, maxDepth)) + maxDepth = I; + else if (hasDataDependence(maxDepth, I)) + maxDepth = I; + } + NodeOrder.insert(maxDepth); + DEBUG(dbgs() << maxDepth->NodeNum << " "); + R.remove(maxDepth); + if (Nodes.isExceedSU(maxDepth)) { + Order = TopDown; + R.clear(); + R.insert(Nodes.getNode(0)); + break; + } + for (const auto &I : maxDepth->Preds) { + if (Nodes.count(I.getSUnit()) == 0) + continue; + if (NodeOrder.count(I.getSUnit()) != 0) + continue; + if (I.getKind() == SDep::Anti) + continue; + R.insert(I.getSUnit()); + } + // Back-edges are predecessors with an anti-dependence. + for (const auto &I : maxDepth->Succs) { + if (I.getKind() != SDep::Anti) + continue; + if (Nodes.count(I.getSUnit()) == 0) + continue; + if (NodeOrder.count(I.getSUnit()) != 0) + continue; + R.insert(I.getSUnit()); + } + } + Order = TopDown; + DEBUG(dbgs() << "\n Switching order to top down "); + SmallSetVector<SUnit *, 8> N; + if (succ_L(NodeOrder, N, &Nodes)) + R.insert(N.begin(), N.end()); + } + } + DEBUG(dbgs() << "\nDone with Nodeset\n"); + } + + DEBUG({ + dbgs() << "Node order: "; + for (SUnit *I : NodeOrder) + dbgs() << " " << I->NodeNum << " "; + dbgs() << "\n"; + }); +} + +/// Process the nodes in the computed order and create the pipelined schedule +/// of the instructions, if possible. Return true if a schedule is found. +bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) { + + if (NodeOrder.size() == 0) + return false; + + bool scheduleFound = false; + // Keep increasing II until a valid schedule is found. + for (unsigned II = MII; II < MII + 10 && !scheduleFound; ++II) { + Schedule.reset(); + Schedule.setInitiationInterval(II); + DEBUG(dbgs() << "Try to schedule with " << II << "\n"); + + SetVector<SUnit *>::iterator NI = NodeOrder.begin(); + SetVector<SUnit *>::iterator NE = NodeOrder.end(); + do { + SUnit *SU = *NI; + + // Compute the schedule time for the instruction, which is based + // upon the scheduled time for any predecessors/successors. + int EarlyStart = INT_MIN; + int LateStart = INT_MAX; + // These values are set when the size of the schedule window is limited + // due to chain dependences. + int SchedEnd = INT_MAX; + int SchedStart = INT_MIN; + Schedule.computeStart(SU, &EarlyStart, &LateStart, &SchedEnd, &SchedStart, + II, this); + DEBUG({ + dbgs() << "Inst (" << SU->NodeNum << ") "; + SU->getInstr()->dump(); + dbgs() << "\n"; + }); + DEBUG({ + dbgs() << "\tes: " << EarlyStart << " ls: " << LateStart + << " me: " << SchedEnd << " ms: " << SchedStart << "\n"; + }); + + if (EarlyStart > LateStart || SchedEnd < EarlyStart || + SchedStart > LateStart) + scheduleFound = false; + else if (EarlyStart != INT_MIN && LateStart == INT_MAX) { + SchedEnd = std::min(SchedEnd, EarlyStart + (int)II - 1); + scheduleFound = Schedule.insert(SU, EarlyStart, SchedEnd, II); + } else if (EarlyStart == INT_MIN && LateStart != INT_MAX) { + SchedStart = std::max(SchedStart, LateStart - (int)II + 1); + scheduleFound = Schedule.insert(SU, LateStart, SchedStart, II); + } else if (EarlyStart != INT_MIN && LateStart != INT_MAX) { + SchedEnd = + std::min(SchedEnd, std::min(LateStart, EarlyStart + (int)II - 1)); + // When scheduling a Phi it is better to start at the late cycle and go + // backwards. The default order may insert the Phi too far away from + // its first dependence. + if (SU->getInstr()->isPHI()) + scheduleFound = Schedule.insert(SU, SchedEnd, EarlyStart, II); + else + scheduleFound = Schedule.insert(SU, EarlyStart, SchedEnd, II); + } else { + int FirstCycle = Schedule.getFirstCycle(); + scheduleFound = Schedule.insert(SU, FirstCycle + getASAP(SU), + FirstCycle + getASAP(SU) + II - 1, II); + } + // Even if we find a schedule, make sure the schedule doesn't exceed the + // allowable number of stages. We keep trying if this happens. + if (scheduleFound) + if (SwpMaxStages > -1 && + Schedule.getMaxStageCount() > (unsigned)SwpMaxStages) + scheduleFound = false; + + DEBUG({ + if (!scheduleFound) + dbgs() << "\tCan't schedule\n"; + }); + } while (++NI != NE && scheduleFound); + + // If a schedule is found, check if it is a valid schedule too. + if (scheduleFound) + scheduleFound = Schedule.isValidSchedule(this); + } + + DEBUG(dbgs() << "Schedule Found? " << scheduleFound << "\n"); + + if (scheduleFound) + Schedule.finalizeSchedule(this); + else + Schedule.reset(); + + return scheduleFound && Schedule.getMaxStageCount() > 0; +} + +/// Given a schedule for the loop, generate a new version of the loop, +/// and replace the old version. This function generates a prolog +/// that contains the initial iterations in the pipeline, and kernel +/// loop, and the epilogue that contains the code for the final +/// iterations. +void SwingSchedulerDAG::generatePipelinedLoop(SMSchedule &Schedule) { + // Create a new basic block for the kernel and add it to the CFG. + MachineBasicBlock *KernelBB = MF.CreateMachineBasicBlock(BB->getBasicBlock()); + + unsigned MaxStageCount = Schedule.getMaxStageCount(); + + // Remember the registers that are used in different stages. The index is + // the iteration, or stage, that the instruction is scheduled in. This is + // a map between register names in the orignal block and the names created + // in each stage of the pipelined loop. + ValueMapTy *VRMap = new ValueMapTy[(MaxStageCount + 1) * 2]; + InstrMapTy InstrMap; + + SmallVector<MachineBasicBlock *, 4> PrologBBs; + // Generate the prolog instructions that set up the pipeline. + generateProlog(Schedule, MaxStageCount, KernelBB, VRMap, PrologBBs); + MF.insert(BB->getIterator(), KernelBB); + + // Rearrange the instructions to generate the new, pipelined loop, + // and update register names as needed. + for (int Cycle = Schedule.getFirstCycle(), + LastCycle = Schedule.getFinalCycle(); + Cycle <= LastCycle; ++Cycle) { + std::deque<SUnit *> &CycleInstrs = Schedule.getInstructions(Cycle); + // This inner loop schedules each instruction in the cycle. + for (SUnit *CI : CycleInstrs) { + if (CI->getInstr()->isPHI()) + continue; + unsigned StageNum = Schedule.stageScheduled(getSUnit(CI->getInstr())); + MachineInstr *NewMI = cloneInstr(CI->getInstr(), MaxStageCount, StageNum); + updateInstruction(NewMI, false, MaxStageCount, StageNum, Schedule, VRMap); + KernelBB->push_back(NewMI); + InstrMap[NewMI] = CI->getInstr(); + } + } + + // Copy any terminator instructions to the new kernel, and update + // names as needed. + for (MachineBasicBlock::iterator I = BB->getFirstTerminator(), + E = BB->instr_end(); + I != E; ++I) { + MachineInstr *NewMI = MF.CloneMachineInstr(&*I); + updateInstruction(NewMI, false, MaxStageCount, 0, Schedule, VRMap); + KernelBB->push_back(NewMI); + InstrMap[NewMI] = &*I; + } + + KernelBB->transferSuccessors(BB); + KernelBB->replaceSuccessor(BB, KernelBB); + + generateExistingPhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, Schedule, + VRMap, InstrMap, MaxStageCount, MaxStageCount, false); + generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, Schedule, VRMap, + InstrMap, MaxStageCount, MaxStageCount, false); + + DEBUG(dbgs() << "New block\n"; KernelBB->dump();); + + SmallVector<MachineBasicBlock *, 4> EpilogBBs; + // Generate the epilog instructions to complete the pipeline. + generateEpilog(Schedule, MaxStageCount, KernelBB, VRMap, EpilogBBs, + PrologBBs); + + // We need this step because the register allocation doesn't handle some + // situations well, so we insert copies to help out. + splitLifetimes(KernelBB, EpilogBBs, Schedule); + + // Remove dead instructions due to loop induction variables. + removeDeadInstructions(KernelBB, EpilogBBs); + + // Add branches between prolog and epilog blocks. + addBranches(PrologBBs, KernelBB, EpilogBBs, Schedule, VRMap); + + // Remove the original loop since it's no longer referenced. + BB->clear(); + BB->eraseFromParent(); + + delete[] VRMap; +} + +/// Generate the pipeline prolog code. +void SwingSchedulerDAG::generateProlog(SMSchedule &Schedule, unsigned LastStage, + MachineBasicBlock *KernelBB, + ValueMapTy *VRMap, + MBBVectorTy &PrologBBs) { + MachineBasicBlock *PreheaderBB = MLI->getLoopFor(BB)->getLoopPreheader(); + assert(PreheaderBB != NULL && + "Need to add code to handle loops w/o preheader"); + MachineBasicBlock *PredBB = PreheaderBB; + InstrMapTy InstrMap; + + // Generate a basic block for each stage, not including the last stage, + // which will be generated in the kernel. Each basic block may contain + // instructions from multiple stages/iterations. + for (unsigned i = 0; i < LastStage; ++i) { + // Create and insert the prolog basic block prior to the original loop + // basic block. The original loop is removed later. + MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(BB->getBasicBlock()); + PrologBBs.push_back(NewBB); + MF.insert(BB->getIterator(), NewBB); + NewBB->transferSuccessors(PredBB); + PredBB->addSuccessor(NewBB); + PredBB = NewBB; + + // Generate instructions for each appropriate stage. Process instructions + // in original program order. + for (int StageNum = i; StageNum >= 0; --StageNum) { + for (MachineBasicBlock::iterator BBI = BB->instr_begin(), + BBE = BB->getFirstTerminator(); + BBI != BBE; ++BBI) { + if (Schedule.isScheduledAtStage(getSUnit(&*BBI), (unsigned)StageNum)) { + if (BBI->isPHI()) + continue; + MachineInstr *NewMI = + cloneAndChangeInstr(&*BBI, i, (unsigned)StageNum, Schedule); + updateInstruction(NewMI, false, i, (unsigned)StageNum, Schedule, + VRMap); + NewBB->push_back(NewMI); + InstrMap[NewMI] = &*BBI; + } + } + } + rewritePhiValues(NewBB, i, Schedule, VRMap, InstrMap); + DEBUG({ + dbgs() << "prolog:\n"; + NewBB->dump(); + }); + } + + PredBB->replaceSuccessor(BB, KernelBB); + + // Check if we need to remove the branch from the preheader to the original + // loop, and replace it with a branch to the new loop. + unsigned numBranches = TII->removeBranch(*PreheaderBB); + if (numBranches) { + SmallVector<MachineOperand, 0> Cond; + TII->insertBranch(*PreheaderBB, PrologBBs[0], nullptr, Cond, DebugLoc()); + } +} + +/// Generate the pipeline epilog code. The epilog code finishes the iterations +/// that were started in either the prolog or the kernel. We create a basic +/// block for each stage that needs to complete. +void SwingSchedulerDAG::generateEpilog(SMSchedule &Schedule, unsigned LastStage, + MachineBasicBlock *KernelBB, + ValueMapTy *VRMap, + MBBVectorTy &EpilogBBs, + MBBVectorTy &PrologBBs) { + // We need to change the branch from the kernel to the first epilog block, so + // this call to analyze branch uses the kernel rather than the original BB. + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; + SmallVector<MachineOperand, 4> Cond; + bool checkBranch = TII->analyzeBranch(*KernelBB, TBB, FBB, Cond); + assert(!checkBranch && "generateEpilog must be able to analyze the branch"); + if (checkBranch) + return; + + MachineBasicBlock::succ_iterator LoopExitI = KernelBB->succ_begin(); + if (*LoopExitI == KernelBB) + ++LoopExitI; + assert(LoopExitI != KernelBB->succ_end() && "Expecting a successor"); + MachineBasicBlock *LoopExitBB = *LoopExitI; + + MachineBasicBlock *PredBB = KernelBB; + MachineBasicBlock *EpilogStart = LoopExitBB; + InstrMapTy InstrMap; + + // Generate a basic block for each stage, not including the last stage, + // which was generated for the kernel. Each basic block may contain + // instructions from multiple stages/iterations. + int EpilogStage = LastStage + 1; + for (unsigned i = LastStage; i >= 1; --i, ++EpilogStage) { + MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(); + EpilogBBs.push_back(NewBB); + MF.insert(BB->getIterator(), NewBB); + + PredBB->replaceSuccessor(LoopExitBB, NewBB); + NewBB->addSuccessor(LoopExitBB); + + if (EpilogStart == LoopExitBB) + EpilogStart = NewBB; + + // Add instructions to the epilog depending on the current block. + // Process instructions in original program order. + for (unsigned StageNum = i; StageNum <= LastStage; ++StageNum) { + for (auto &BBI : *BB) { + if (BBI.isPHI()) + continue; + MachineInstr *In = &BBI; + if (Schedule.isScheduledAtStage(getSUnit(In), StageNum)) { + MachineInstr *NewMI = cloneInstr(In, EpilogStage - LastStage, 0); + updateInstruction(NewMI, i == 1, EpilogStage, 0, Schedule, VRMap); + NewBB->push_back(NewMI); + InstrMap[NewMI] = In; + } + } + } + generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, Schedule, + VRMap, InstrMap, LastStage, EpilogStage, i == 1); + generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, Schedule, VRMap, + InstrMap, LastStage, EpilogStage, i == 1); + PredBB = NewBB; + + DEBUG({ + dbgs() << "epilog:\n"; + NewBB->dump(); + }); + } + + // Fix any Phi nodes in the loop exit block. + for (MachineInstr &MI : *LoopExitBB) { + if (!MI.isPHI()) + break; + for (unsigned i = 2, e = MI.getNumOperands() + 1; i != e; i += 2) { + MachineOperand &MO = MI.getOperand(i); + if (MO.getMBB() == BB) + MO.setMBB(PredBB); + } + } + + // Create a branch to the new epilog from the kernel. + // Remove the original branch and add a new branch to the epilog. + TII->removeBranch(*KernelBB); + TII->insertBranch(*KernelBB, KernelBB, EpilogStart, Cond, DebugLoc()); + // Add a branch to the loop exit. + if (EpilogBBs.size() > 0) { + MachineBasicBlock *LastEpilogBB = EpilogBBs.back(); + SmallVector<MachineOperand, 4> Cond1; + TII->insertBranch(*LastEpilogBB, LoopExitBB, nullptr, Cond1, DebugLoc()); + } +} + +/// Replace all uses of FromReg that appear outside the specified +/// basic block with ToReg. +static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg, + MachineBasicBlock *MBB, + MachineRegisterInfo &MRI, + LiveIntervals &LIS) { + for (MachineRegisterInfo::use_iterator I = MRI.use_begin(FromReg), + E = MRI.use_end(); + I != E;) { + MachineOperand &O = *I; + ++I; + if (O.getParent()->getParent() != MBB) + O.setReg(ToReg); + } + if (!LIS.hasInterval(ToReg)) + LIS.createEmptyInterval(ToReg); +} + +/// Return true if the register has a use that occurs outside the +/// specified loop. +static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB, + MachineRegisterInfo &MRI) { + for (MachineRegisterInfo::use_iterator I = MRI.use_begin(Reg), + E = MRI.use_end(); + I != E; ++I) + if (I->getParent()->getParent() != BB) + return true; + return false; +} + +/// Generate Phis for the specific block in the generated pipelined code. +/// This function looks at the Phis from the original code to guide the +/// creation of new Phis. +void SwingSchedulerDAG::generateExistingPhis( + MachineBasicBlock *NewBB, MachineBasicBlock *BB1, MachineBasicBlock *BB2, + MachineBasicBlock *KernelBB, SMSchedule &Schedule, ValueMapTy *VRMap, + InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum, + bool IsLast) { + // Compute the stage number for the inital value of the Phi, which + // comes from the prolog. The prolog to use depends on to which kernel/ + // epilog that we're adding the Phi. + unsigned PrologStage = 0; + unsigned PrevStage = 0; + bool InKernel = (LastStageNum == CurStageNum); + if (InKernel) { + PrologStage = LastStageNum - 1; + PrevStage = CurStageNum; + } else { + PrologStage = LastStageNum - (CurStageNum - LastStageNum); + PrevStage = LastStageNum + (CurStageNum - LastStageNum) - 1; + } + + for (MachineBasicBlock::iterator BBI = BB->instr_begin(), + BBE = BB->getFirstNonPHI(); + BBI != BBE; ++BBI) { + unsigned Def = BBI->getOperand(0).getReg(); + + unsigned InitVal = 0; + unsigned LoopVal = 0; + getPhiRegs(*BBI, BB, InitVal, LoopVal); + + unsigned PhiOp1 = 0; + // The Phi value from the loop body typically is defined in the loop, but + // not always. So, we need to check if the value is defined in the loop. + unsigned PhiOp2 = LoopVal; + if (VRMap[LastStageNum].count(LoopVal)) + PhiOp2 = VRMap[LastStageNum][LoopVal]; + + int StageScheduled = Schedule.stageScheduled(getSUnit(&*BBI)); + int LoopValStage = + Schedule.stageScheduled(getSUnit(MRI.getVRegDef(LoopVal))); + unsigned NumStages = Schedule.getStagesForReg(Def, CurStageNum); + if (NumStages == 0) { + // We don't need to generate a Phi anymore, but we need to rename any uses + // of the Phi value. + unsigned NewReg = VRMap[PrevStage][LoopVal]; + rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, 0, &*BBI, + Def, NewReg); + if (VRMap[CurStageNum].count(LoopVal)) + VRMap[CurStageNum][Def] = VRMap[CurStageNum][LoopVal]; + } + // Adjust the number of Phis needed depending on the number of prologs left, + // and the distance from where the Phi is first scheduled. + unsigned NumPhis = NumStages; + if (!InKernel && (int)PrologStage < LoopValStage) + // The NumPhis is the maximum number of new Phis needed during the steady + // state. If the Phi has not been scheduled in current prolog, then we + // need to generate less Phis. + NumPhis = std::max((int)NumPhis - (int)(LoopValStage - PrologStage), 1); + // The number of Phis cannot exceed the number of prolog stages. Each + // stage can potentially define two values. + NumPhis = std::min(NumPhis, PrologStage + 2); + + unsigned NewReg = 0; + + unsigned AccessStage = (LoopValStage != -1) ? LoopValStage : StageScheduled; + // In the epilog, we may need to look back one stage to get the correct + // Phi name because the epilog and prolog blocks execute the same stage. + // The correct name is from the previous block only when the Phi has + // been completely scheduled prior to the epilog, and Phi value is not + // needed in multiple stages. + int StageDiff = 0; + if (!InKernel && StageScheduled >= LoopValStage && AccessStage == 0 && + NumPhis == 1) + StageDiff = 1; + // Adjust the computations below when the phi and the loop definition + // are scheduled in different stages. + if (InKernel && LoopValStage != -1 && StageScheduled > LoopValStage) + StageDiff = StageScheduled - LoopValStage; + for (unsigned np = 0; np < NumPhis; ++np) { + // If the Phi hasn't been scheduled, then use the initial Phi operand + // value. Otherwise, use the scheduled version of the instruction. This + // is a little complicated when a Phi references another Phi. + if (np > PrologStage || StageScheduled >= (int)LastStageNum) + PhiOp1 = InitVal; + // Check if the Phi has already been scheduled in a prolog stage. + else if (PrologStage >= AccessStage + StageDiff + np && + VRMap[PrologStage - StageDiff - np].count(LoopVal) != 0) + PhiOp1 = VRMap[PrologStage - StageDiff - np][LoopVal]; + // Check if the Phi has already been scheduled, but the loop intruction + // is either another Phi, or doesn't occur in the loop. + else if (PrologStage >= AccessStage + StageDiff + np) { + // If the Phi references another Phi, we need to examine the other + // Phi to get the correct value. + PhiOp1 = LoopVal; + MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1); + int Indirects = 1; + while (InstOp1 && InstOp1->isPHI() && InstOp1->getParent() == BB) { + int PhiStage = Schedule.stageScheduled(getSUnit(InstOp1)); + if ((int)(PrologStage - StageDiff - np) < PhiStage + Indirects) + PhiOp1 = getInitPhiReg(*InstOp1, BB); + else + PhiOp1 = getLoopPhiReg(*InstOp1, BB); + InstOp1 = MRI.getVRegDef(PhiOp1); + int PhiOpStage = Schedule.stageScheduled(getSUnit(InstOp1)); + int StageAdj = (PhiOpStage != -1 ? PhiStage - PhiOpStage : 0); + if (PhiOpStage != -1 && PrologStage - StageAdj >= Indirects + np && + VRMap[PrologStage - StageAdj - Indirects - np].count(PhiOp1)) { + PhiOp1 = VRMap[PrologStage - StageAdj - Indirects - np][PhiOp1]; + break; + } + ++Indirects; + } + } else + PhiOp1 = InitVal; + // If this references a generated Phi in the kernel, get the Phi operand + // from the incoming block. + if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1)) + if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB) + PhiOp1 = getInitPhiReg(*InstOp1, KernelBB); + + MachineInstr *PhiInst = MRI.getVRegDef(LoopVal); + bool LoopDefIsPhi = PhiInst && PhiInst->isPHI(); + // In the epilog, a map lookup is needed to get the value from the kernel, + // or previous epilog block. How is does this depends on if the + // instruction is scheduled in the previous block. + if (!InKernel) { + int StageDiffAdj = 0; + if (LoopValStage != -1 && StageScheduled > LoopValStage) + StageDiffAdj = StageScheduled - LoopValStage; + // Use the loop value defined in the kernel, unless the kernel + // contains the last definition of the Phi. + if (np == 0 && PrevStage == LastStageNum && + (StageScheduled != 0 || LoopValStage != 0) && + VRMap[PrevStage - StageDiffAdj].count(LoopVal)) + PhiOp2 = VRMap[PrevStage - StageDiffAdj][LoopVal]; + // Use the value defined by the Phi. We add one because we switch + // from looking at the loop value to the Phi definition. + else if (np > 0 && PrevStage == LastStageNum && + VRMap[PrevStage - np + 1].count(Def)) + PhiOp2 = VRMap[PrevStage - np + 1][Def]; + // Use the loop value defined in the kernel. + else if ((unsigned)LoopValStage + StageDiffAdj > PrologStage + 1 && + VRMap[PrevStage - StageDiffAdj - np].count(LoopVal)) + PhiOp2 = VRMap[PrevStage - StageDiffAdj - np][LoopVal]; + // Use the value defined by the Phi, unless we're generating the first + // epilog and the Phi refers to a Phi in a different stage. + else if (VRMap[PrevStage - np].count(Def) && + (!LoopDefIsPhi || PrevStage != LastStageNum)) + PhiOp2 = VRMap[PrevStage - np][Def]; + } + + // Check if we can reuse an existing Phi. This occurs when a Phi + // references another Phi, and the other Phi is scheduled in an + // earlier stage. We can try to reuse an existing Phi up until the last + // stage of the current Phi. + if (LoopDefIsPhi && (int)PrologStage >= StageScheduled) { + int LVNumStages = Schedule.getStagesForPhi(LoopVal); + int StageDiff = (StageScheduled - LoopValStage); + LVNumStages -= StageDiff; + if (LVNumStages > (int)np) { + NewReg = PhiOp2; + unsigned ReuseStage = CurStageNum; + if (Schedule.isLoopCarried(this, *PhiInst)) + ReuseStage -= LVNumStages; + // Check if the Phi to reuse has been generated yet. If not, then + // there is nothing to reuse. + if (VRMap[ReuseStage].count(LoopVal)) { + NewReg = VRMap[ReuseStage][LoopVal]; + + rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, + &*BBI, Def, NewReg); + // Update the map with the new Phi name. + VRMap[CurStageNum - np][Def] = NewReg; + PhiOp2 = NewReg; + if (VRMap[LastStageNum - np - 1].count(LoopVal)) + PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal]; + + if (IsLast && np == NumPhis - 1) + replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS); + continue; + } + } else if (InKernel && StageDiff > 0 && + VRMap[CurStageNum - StageDiff - np].count(LoopVal)) + PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal]; + } + + const TargetRegisterClass *RC = MRI.getRegClass(Def); + NewReg = MRI.createVirtualRegister(RC); + + MachineInstrBuilder NewPhi = + BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(), + TII->get(TargetOpcode::PHI), NewReg); + NewPhi.addReg(PhiOp1).addMBB(BB1); + NewPhi.addReg(PhiOp2).addMBB(BB2); + if (np == 0) + InstrMap[NewPhi] = &*BBI; + + // We define the Phis after creating the new pipelined code, so + // we need to rename the Phi values in scheduled instructions. + + unsigned PrevReg = 0; + if (InKernel && VRMap[PrevStage - np].count(LoopVal)) + PrevReg = VRMap[PrevStage - np][LoopVal]; + rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, &*BBI, + Def, NewReg, PrevReg); + // If the Phi has been scheduled, use the new name for rewriting. + if (VRMap[CurStageNum - np].count(Def)) { + unsigned R = VRMap[CurStageNum - np][Def]; + rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, &*BBI, + R, NewReg); + } + + // Check if we need to rename any uses that occurs after the loop. The + // register to replace depends on whether the Phi is scheduled in the + // epilog. + if (IsLast && np == NumPhis - 1) + replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS); + + // In the kernel, a dependent Phi uses the value from this Phi. + if (InKernel) + PhiOp2 = NewReg; + + // Update the map with the new Phi name. + VRMap[CurStageNum - np][Def] = NewReg; + } + + while (NumPhis++ < NumStages) { + rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, NumPhis, + &*BBI, Def, NewReg, 0); + } + + // Check if we need to rename a Phi that has been eliminated due to + // scheduling. + if (NumStages == 0 && IsLast && VRMap[CurStageNum].count(LoopVal)) + replaceRegUsesAfterLoop(Def, VRMap[CurStageNum][LoopVal], BB, MRI, LIS); + } +} + +/// Generate Phis for the specified block in the generated pipelined code. +/// These are new Phis needed because the definition is scheduled after the +/// use in the pipelened sequence. +void SwingSchedulerDAG::generatePhis( + MachineBasicBlock *NewBB, MachineBasicBlock *BB1, MachineBasicBlock *BB2, + MachineBasicBlock *KernelBB, SMSchedule &Schedule, ValueMapTy *VRMap, + InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum, + bool IsLast) { + // Compute the stage number that contains the initial Phi value, and + // the Phi from the previous stage. + unsigned PrologStage = 0; + unsigned PrevStage = 0; + unsigned StageDiff = CurStageNum - LastStageNum; + bool InKernel = (StageDiff == 0); + if (InKernel) { + PrologStage = LastStageNum - 1; + PrevStage = CurStageNum; + } else { + PrologStage = LastStageNum - StageDiff; + PrevStage = LastStageNum + StageDiff - 1; + } + + for (MachineBasicBlock::iterator BBI = BB->getFirstNonPHI(), + BBE = BB->instr_end(); + BBI != BBE; ++BBI) { + for (unsigned i = 0, e = BBI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = BBI->getOperand(i); + if (!MO.isReg() || !MO.isDef() || + !TargetRegisterInfo::isVirtualRegister(MO.getReg())) + continue; + + int StageScheduled = Schedule.stageScheduled(getSUnit(&*BBI)); + assert(StageScheduled != -1 && "Expecting scheduled instruction."); + unsigned Def = MO.getReg(); + unsigned NumPhis = Schedule.getStagesForReg(Def, CurStageNum); + // An instruction scheduled in stage 0 and is used after the loop + // requires a phi in the epilog for the last definition from either + // the kernel or prolog. + if (!InKernel && NumPhis == 0 && StageScheduled == 0 && + hasUseAfterLoop(Def, BB, MRI)) + NumPhis = 1; + if (!InKernel && (unsigned)StageScheduled > PrologStage) + continue; + + unsigned PhiOp2 = VRMap[PrevStage][Def]; + if (MachineInstr *InstOp2 = MRI.getVRegDef(PhiOp2)) + if (InstOp2->isPHI() && InstOp2->getParent() == NewBB) + PhiOp2 = getLoopPhiReg(*InstOp2, BB2); + // The number of Phis can't exceed the number of prolog stages. The + // prolog stage number is zero based. + if (NumPhis > PrologStage + 1 - StageScheduled) + NumPhis = PrologStage + 1 - StageScheduled; + for (unsigned np = 0; np < NumPhis; ++np) { + unsigned PhiOp1 = VRMap[PrologStage][Def]; + if (np <= PrologStage) + PhiOp1 = VRMap[PrologStage - np][Def]; + if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1)) { + if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB) + PhiOp1 = getInitPhiReg(*InstOp1, KernelBB); + if (InstOp1->isPHI() && InstOp1->getParent() == NewBB) + PhiOp1 = getInitPhiReg(*InstOp1, NewBB); + } + if (!InKernel) + PhiOp2 = VRMap[PrevStage - np][Def]; + + const TargetRegisterClass *RC = MRI.getRegClass(Def); + unsigned NewReg = MRI.createVirtualRegister(RC); + + MachineInstrBuilder NewPhi = + BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(), + TII->get(TargetOpcode::PHI), NewReg); + NewPhi.addReg(PhiOp1).addMBB(BB1); + NewPhi.addReg(PhiOp2).addMBB(BB2); + if (np == 0) + InstrMap[NewPhi] = &*BBI; + + // Rewrite uses and update the map. The actions depend upon whether + // we generating code for the kernel or epilog blocks. + if (InKernel) { + rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, + &*BBI, PhiOp1, NewReg); + rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, + &*BBI, PhiOp2, NewReg); + + PhiOp2 = NewReg; + VRMap[PrevStage - np - 1][Def] = NewReg; + } else { + VRMap[CurStageNum - np][Def] = NewReg; + if (np == NumPhis - 1) + rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, + &*BBI, Def, NewReg); + } + if (IsLast && np == NumPhis - 1) + replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS); + } + } + } +} + +/// Remove instructions that generate values with no uses. +/// Typically, these are induction variable operations that generate values +/// used in the loop itself. A dead instruction has a definition with +/// no uses, or uses that occur in the original loop only. +void SwingSchedulerDAG::removeDeadInstructions(MachineBasicBlock *KernelBB, + MBBVectorTy &EpilogBBs) { + // For each epilog block, check that the value defined by each instruction + // is used. If not, delete it. + for (MBBVectorTy::reverse_iterator MBB = EpilogBBs.rbegin(), + MBE = EpilogBBs.rend(); + MBB != MBE; ++MBB) + for (MachineBasicBlock::reverse_instr_iterator MI = (*MBB)->instr_rbegin(), + ME = (*MBB)->instr_rend(); + MI != ME;) { + // From DeadMachineInstructionElem. Don't delete inline assembly. + if (MI->isInlineAsm()) { + ++MI; + continue; + } + bool SawStore = false; + // Check if it's safe to remove the instruction due to side effects. + // We can, and want to, remove Phis here. + if (!MI->isSafeToMove(nullptr, SawStore) && !MI->isPHI()) { + ++MI; + continue; + } + bool used = true; + for (MachineInstr::mop_iterator MOI = MI->operands_begin(), + MOE = MI->operands_end(); + MOI != MOE; ++MOI) { + if (!MOI->isReg() || !MOI->isDef()) + continue; + unsigned reg = MOI->getReg(); + unsigned realUses = 0; + for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(reg), + EI = MRI.use_end(); + UI != EI; ++UI) { + // Check if there are any uses that occur only in the original + // loop. If so, that's not a real use. + if (UI->getParent()->getParent() != BB) { + realUses++; + used = true; + break; + } + } + if (realUses > 0) + break; + used = false; + } + if (!used) { + MI++->eraseFromParent(); + continue; + } + ++MI; + } + // In the kernel block, check if we can remove a Phi that generates a value + // used in an instruction removed in the epilog block. + for (MachineBasicBlock::iterator BBI = KernelBB->instr_begin(), + BBE = KernelBB->getFirstNonPHI(); + BBI != BBE;) { + MachineInstr *MI = &*BBI; + ++BBI; + unsigned reg = MI->getOperand(0).getReg(); + if (MRI.use_begin(reg) == MRI.use_end()) { + MI->eraseFromParent(); + } + } +} + +/// For loop carried definitions, we split the lifetime of a virtual register +/// that has uses past the definition in the next iteration. A copy with a new +/// virtual register is inserted before the definition, which helps with +/// generating a better register assignment. +/// +/// v1 = phi(a, v2) v1 = phi(a, v2) +/// v2 = phi(b, v3) v2 = phi(b, v3) +/// v3 = .. v4 = copy v1 +/// .. = V1 v3 = .. +/// .. = v4 +void SwingSchedulerDAG::splitLifetimes(MachineBasicBlock *KernelBB, + MBBVectorTy &EpilogBBs, + SMSchedule &Schedule) { + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + for (MachineBasicBlock::iterator BBI = KernelBB->instr_begin(), + BBF = KernelBB->getFirstNonPHI(); + BBI != BBF; ++BBI) { + unsigned Def = BBI->getOperand(0).getReg(); + // Check for any Phi definition that used as an operand of another Phi + // in the same block. + for (MachineRegisterInfo::use_instr_iterator I = MRI.use_instr_begin(Def), + E = MRI.use_instr_end(); + I != E; ++I) { + if (I->isPHI() && I->getParent() == KernelBB) { + // Get the loop carried definition. + unsigned LCDef = getLoopPhiReg(*BBI, KernelBB); + if (!LCDef) + continue; + MachineInstr *MI = MRI.getVRegDef(LCDef); + if (!MI || MI->getParent() != KernelBB || MI->isPHI()) + continue; + // Search through the rest of the block looking for uses of the Phi + // definition. If one occurs, then split the lifetime. + unsigned SplitReg = 0; + for (auto &BBJ : make_range(MachineBasicBlock::instr_iterator(MI), + KernelBB->instr_end())) + if (BBJ.readsRegister(Def)) { + // We split the lifetime when we find the first use. + if (SplitReg == 0) { + SplitReg = MRI.createVirtualRegister(MRI.getRegClass(Def)); + BuildMI(*KernelBB, MI, MI->getDebugLoc(), + TII->get(TargetOpcode::COPY), SplitReg) + .addReg(Def); + } + BBJ.substituteRegister(Def, SplitReg, 0, *TRI); + } + if (!SplitReg) + continue; + // Search through each of the epilog blocks for any uses to be renamed. + for (auto &Epilog : EpilogBBs) + for (auto &I : *Epilog) + if (I.readsRegister(Def)) + I.substituteRegister(Def, SplitReg, 0, *TRI); + break; + } + } + } +} + +/// Remove the incoming block from the Phis in a basic block. +static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming) { + for (MachineInstr &MI : *BB) { + if (!MI.isPHI()) + break; + for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) + if (MI.getOperand(i + 1).getMBB() == Incoming) { + MI.RemoveOperand(i + 1); + MI.RemoveOperand(i); + break; + } + } +} + +/// Create branches from each prolog basic block to the appropriate epilog +/// block. These edges are needed if the loop ends before reaching the +/// kernel. +void SwingSchedulerDAG::addBranches(MBBVectorTy &PrologBBs, + MachineBasicBlock *KernelBB, + MBBVectorTy &EpilogBBs, + SMSchedule &Schedule, ValueMapTy *VRMap) { + assert(PrologBBs.size() == EpilogBBs.size() && "Prolog/Epilog mismatch"); + MachineInstr *IndVar = Pass.LI.LoopInductionVar; + MachineInstr *Cmp = Pass.LI.LoopCompare; + MachineBasicBlock *LastPro = KernelBB; + MachineBasicBlock *LastEpi = KernelBB; + + // Start from the blocks connected to the kernel and work "out" + // to the first prolog and the last epilog blocks. + SmallVector<MachineInstr *, 4> PrevInsts; + unsigned MaxIter = PrologBBs.size() - 1; + unsigned LC = UINT_MAX; + unsigned LCMin = UINT_MAX; + for (unsigned i = 0, j = MaxIter; i <= MaxIter; ++i, --j) { + // Add branches to the prolog that go to the corresponding + // epilog, and the fall-thru prolog/kernel block. + MachineBasicBlock *Prolog = PrologBBs[j]; + MachineBasicBlock *Epilog = EpilogBBs[i]; + // We've executed one iteration, so decrement the loop count and check for + // the loop end. + SmallVector<MachineOperand, 4> Cond; + // Check if the LOOP0 has already been removed. If so, then there is no need + // to reduce the trip count. + if (LC != 0) + LC = TII->reduceLoopCount(*Prolog, IndVar, *Cmp, Cond, PrevInsts, j, + MaxIter); + + // Record the value of the first trip count, which is used to determine if + // branches and blocks can be removed for constant trip counts. + if (LCMin == UINT_MAX) + LCMin = LC; + + unsigned numAdded = 0; + if (TargetRegisterInfo::isVirtualRegister(LC)) { + Prolog->addSuccessor(Epilog); + numAdded = TII->insertBranch(*Prolog, Epilog, LastPro, Cond, DebugLoc()); + } else if (j >= LCMin) { + Prolog->addSuccessor(Epilog); + Prolog->removeSuccessor(LastPro); + LastEpi->removeSuccessor(Epilog); + numAdded = TII->insertBranch(*Prolog, Epilog, nullptr, Cond, DebugLoc()); + removePhis(Epilog, LastEpi); + // Remove the blocks that are no longer referenced. + if (LastPro != LastEpi) { + LastEpi->clear(); + LastEpi->eraseFromParent(); + } + LastPro->clear(); + LastPro->eraseFromParent(); + } else { + numAdded = TII->insertBranch(*Prolog, LastPro, nullptr, Cond, DebugLoc()); + removePhis(Epilog, Prolog); + } + LastPro = Prolog; + LastEpi = Epilog; + for (MachineBasicBlock::reverse_instr_iterator I = Prolog->instr_rbegin(), + E = Prolog->instr_rend(); + I != E && numAdded > 0; ++I, --numAdded) + updateInstruction(&*I, false, j, 0, Schedule, VRMap); + } +} + +/// Return true if we can compute the amount the instruction changes +/// during each iteration. Set Delta to the amount of the change. +bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) { + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + unsigned BaseReg; + int64_t Offset; + if (!TII->getMemOpBaseRegImmOfs(MI, BaseReg, Offset, TRI)) + return false; + + MachineRegisterInfo &MRI = MF.getRegInfo(); + // Check if there is a Phi. If so, get the definition in the loop. + MachineInstr *BaseDef = MRI.getVRegDef(BaseReg); + if (BaseDef && BaseDef->isPHI()) { + BaseReg = getLoopPhiReg(*BaseDef, MI.getParent()); + BaseDef = MRI.getVRegDef(BaseReg); + } + if (!BaseDef) + return false; + + int D = 0; + if (!TII->getIncrementValue(*BaseDef, D) && D >= 0) + return false; + + Delta = D; + return true; +} + +/// Update the memory operand with a new offset when the pipeliner +/// generates a new copy of the instruction that refers to a +/// different memory location. +void SwingSchedulerDAG::updateMemOperands(MachineInstr &NewMI, + MachineInstr &OldMI, unsigned Num) { + if (Num == 0) + return; + // If the instruction has memory operands, then adjust the offset + // when the instruction appears in different stages. + unsigned NumRefs = NewMI.memoperands_end() - NewMI.memoperands_begin(); + if (NumRefs == 0) + return; + MachineInstr::mmo_iterator NewMemRefs = MF.allocateMemRefsArray(NumRefs); + unsigned Refs = 0; + for (MachineMemOperand *MMO : NewMI.memoperands()) { + if (MMO->isVolatile() || (MMO->isInvariant() && MMO->isDereferenceable()) || + (!MMO->getValue())) { + NewMemRefs[Refs++] = MMO; + continue; + } + unsigned Delta; + if (computeDelta(OldMI, Delta)) { + int64_t AdjOffset = Delta * Num; + NewMemRefs[Refs++] = + MF.getMachineMemOperand(MMO, AdjOffset, MMO->getSize()); + } else + NewMemRefs[Refs++] = MF.getMachineMemOperand(MMO, 0, UINT64_MAX); + } + NewMI.setMemRefs(NewMemRefs, NewMemRefs + NumRefs); +} + +/// Clone the instruction for the new pipelined loop and update the +/// memory operands, if needed. +MachineInstr *SwingSchedulerDAG::cloneInstr(MachineInstr *OldMI, + unsigned CurStageNum, + unsigned InstStageNum) { + MachineInstr *NewMI = MF.CloneMachineInstr(OldMI); + // Check for tied operands in inline asm instructions. This should be handled + // elsewhere, but I'm not sure of the best solution. + if (OldMI->isInlineAsm()) + for (unsigned i = 0, e = OldMI->getNumOperands(); i != e; ++i) { + const auto &MO = OldMI->getOperand(i); + if (MO.isReg() && MO.isUse()) + break; + unsigned UseIdx; + if (OldMI->isRegTiedToUseOperand(i, &UseIdx)) + NewMI->tieOperands(i, UseIdx); + } + updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum); + return NewMI; +} + +/// Clone the instruction for the new pipelined loop. If needed, this +/// function updates the instruction using the values saved in the +/// InstrChanges structure. +MachineInstr *SwingSchedulerDAG::cloneAndChangeInstr(MachineInstr *OldMI, + unsigned CurStageNum, + unsigned InstStageNum, + SMSchedule &Schedule) { + MachineInstr *NewMI = MF.CloneMachineInstr(OldMI); + DenseMap<SUnit *, std::pair<unsigned, int64_t>>::iterator It = + InstrChanges.find(getSUnit(OldMI)); + if (It != InstrChanges.end()) { + std::pair<unsigned, int64_t> RegAndOffset = It->second; + unsigned BasePos, OffsetPos; + if (!TII->getBaseAndOffsetPosition(*OldMI, BasePos, OffsetPos)) + return nullptr; + int64_t NewOffset = OldMI->getOperand(OffsetPos).getImm(); + MachineInstr *LoopDef = findDefInLoop(RegAndOffset.first); + if (Schedule.stageScheduled(getSUnit(LoopDef)) > (signed)InstStageNum) + NewOffset += RegAndOffset.second * (CurStageNum - InstStageNum); + NewMI->getOperand(OffsetPos).setImm(NewOffset); + } + updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum); + return NewMI; +} + +/// Update the machine instruction with new virtual registers. This +/// function may change the defintions and/or uses. +void SwingSchedulerDAG::updateInstruction(MachineInstr *NewMI, bool LastDef, + unsigned CurStageNum, + unsigned InstrStageNum, + SMSchedule &Schedule, + ValueMapTy *VRMap) { + for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = NewMI->getOperand(i); + if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) + continue; + unsigned reg = MO.getReg(); + if (MO.isDef()) { + // Create a new virtual register for the definition. + const TargetRegisterClass *RC = MRI.getRegClass(reg); + unsigned NewReg = MRI.createVirtualRegister(RC); + MO.setReg(NewReg); + VRMap[CurStageNum][reg] = NewReg; + if (LastDef) + replaceRegUsesAfterLoop(reg, NewReg, BB, MRI, LIS); + } else if (MO.isUse()) { + MachineInstr *Def = MRI.getVRegDef(reg); + // Compute the stage that contains the last definition for instruction. + int DefStageNum = Schedule.stageScheduled(getSUnit(Def)); + unsigned StageNum = CurStageNum; + if (DefStageNum != -1 && (int)InstrStageNum > DefStageNum) { + // Compute the difference in stages between the defintion and the use. + unsigned StageDiff = (InstrStageNum - DefStageNum); + // Make an adjustment to get the last definition. + StageNum -= StageDiff; + } + if (VRMap[StageNum].count(reg)) + MO.setReg(VRMap[StageNum][reg]); + } + } +} + +/// Return the instruction in the loop that defines the register. +/// If the definition is a Phi, then follow the Phi operand to +/// the instruction in the loop. +MachineInstr *SwingSchedulerDAG::findDefInLoop(unsigned Reg) { + SmallPtrSet<MachineInstr *, 8> Visited; + MachineInstr *Def = MRI.getVRegDef(Reg); + while (Def->isPHI()) { + if (!Visited.insert(Def).second) + break; + for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2) + if (Def->getOperand(i + 1).getMBB() == BB) { + Def = MRI.getVRegDef(Def->getOperand(i).getReg()); + break; + } + } + return Def; +} + +/// Return the new name for the value from the previous stage. +unsigned SwingSchedulerDAG::getPrevMapVal(unsigned StageNum, unsigned PhiStage, + unsigned LoopVal, unsigned LoopStage, + ValueMapTy *VRMap, + MachineBasicBlock *BB) { + unsigned PrevVal = 0; + if (StageNum > PhiStage) { + MachineInstr *LoopInst = MRI.getVRegDef(LoopVal); + if (PhiStage == LoopStage && VRMap[StageNum - 1].count(LoopVal)) + // The name is defined in the previous stage. + PrevVal = VRMap[StageNum - 1][LoopVal]; + else if (VRMap[StageNum].count(LoopVal)) + // The previous name is defined in the current stage when the instruction + // order is swapped. + PrevVal = VRMap[StageNum][LoopVal]; + else if (!LoopInst->isPHI() || LoopInst->getParent() != BB) + // The loop value hasn't yet been scheduled. + PrevVal = LoopVal; + else if (StageNum == PhiStage + 1) + // The loop value is another phi, which has not been scheduled. + PrevVal = getInitPhiReg(*LoopInst, BB); + else if (StageNum > PhiStage + 1 && LoopInst->getParent() == BB) + // The loop value is another phi, which has been scheduled. + PrevVal = + getPrevMapVal(StageNum - 1, PhiStage, getLoopPhiReg(*LoopInst, BB), + LoopStage, VRMap, BB); + } + return PrevVal; +} + +/// Rewrite the Phi values in the specified block to use the mappings +/// from the initial operand. Once the Phi is scheduled, we switch +/// to using the loop value instead of the Phi value, so those names +/// do not need to be rewritten. +void SwingSchedulerDAG::rewritePhiValues(MachineBasicBlock *NewBB, + unsigned StageNum, + SMSchedule &Schedule, + ValueMapTy *VRMap, + InstrMapTy &InstrMap) { + for (MachineBasicBlock::iterator BBI = BB->instr_begin(), + BBE = BB->getFirstNonPHI(); + BBI != BBE; ++BBI) { + unsigned InitVal = 0; + unsigned LoopVal = 0; + getPhiRegs(*BBI, BB, InitVal, LoopVal); + unsigned PhiDef = BBI->getOperand(0).getReg(); + + unsigned PhiStage = + (unsigned)Schedule.stageScheduled(getSUnit(MRI.getVRegDef(PhiDef))); + unsigned LoopStage = + (unsigned)Schedule.stageScheduled(getSUnit(MRI.getVRegDef(LoopVal))); + unsigned NumPhis = Schedule.getStagesForPhi(PhiDef); + if (NumPhis > StageNum) + NumPhis = StageNum; + for (unsigned np = 0; np <= NumPhis; ++np) { + unsigned NewVal = + getPrevMapVal(StageNum - np, PhiStage, LoopVal, LoopStage, VRMap, BB); + if (!NewVal) + NewVal = InitVal; + rewriteScheduledInstr(NewBB, Schedule, InstrMap, StageNum - np, np, &*BBI, + PhiDef, NewVal); + } + } +} + +/// Rewrite a previously scheduled instruction to use the register value +/// from the new instruction. Make sure the instruction occurs in the +/// basic block, and we don't change the uses in the new instruction. +void SwingSchedulerDAG::rewriteScheduledInstr( + MachineBasicBlock *BB, SMSchedule &Schedule, InstrMapTy &InstrMap, + unsigned CurStageNum, unsigned PhiNum, MachineInstr *Phi, unsigned OldReg, + unsigned NewReg, unsigned PrevReg) { + bool InProlog = (CurStageNum < Schedule.getMaxStageCount()); + int StagePhi = Schedule.stageScheduled(getSUnit(Phi)) + PhiNum; + // Rewrite uses that have been scheduled already to use the new + // Phi register. + for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(OldReg), + EI = MRI.use_end(); + UI != EI;) { + MachineOperand &UseOp = *UI; + MachineInstr *UseMI = UseOp.getParent(); + ++UI; + if (UseMI->getParent() != BB) + continue; + if (UseMI->isPHI()) { + if (!Phi->isPHI() && UseMI->getOperand(0).getReg() == NewReg) + continue; + if (getLoopPhiReg(*UseMI, BB) != OldReg) + continue; + } + InstrMapTy::iterator OrigInstr = InstrMap.find(UseMI); + assert(OrigInstr != InstrMap.end() && "Instruction not scheduled."); + SUnit *OrigMISU = getSUnit(OrigInstr->second); + int StageSched = Schedule.stageScheduled(OrigMISU); + int CycleSched = Schedule.cycleScheduled(OrigMISU); + unsigned ReplaceReg = 0; + // This is the stage for the scheduled instruction. + if (StagePhi == StageSched && Phi->isPHI()) { + int CyclePhi = Schedule.cycleScheduled(getSUnit(Phi)); + if (PrevReg && InProlog) + ReplaceReg = PrevReg; + else if (PrevReg && !Schedule.isLoopCarried(this, *Phi) && + (CyclePhi <= CycleSched || OrigMISU->getInstr()->isPHI())) + ReplaceReg = PrevReg; + else + ReplaceReg = NewReg; + } + // The scheduled instruction occurs before the scheduled Phi, and the + // Phi is not loop carried. + if (!InProlog && StagePhi + 1 == StageSched && + !Schedule.isLoopCarried(this, *Phi)) + ReplaceReg = NewReg; + if (StagePhi > StageSched && Phi->isPHI()) + ReplaceReg = NewReg; + if (!InProlog && !Phi->isPHI() && StagePhi < StageSched) + ReplaceReg = NewReg; + if (ReplaceReg) { + MRI.constrainRegClass(ReplaceReg, MRI.getRegClass(OldReg)); + UseOp.setReg(ReplaceReg); + } + } +} + +/// Check if we can change the instruction to use an offset value from the +/// previous iteration. If so, return true and set the base and offset values +/// so that we can rewrite the load, if necessary. +/// v1 = Phi(v0, v3) +/// v2 = load v1, 0 +/// v3 = post_store v1, 4, x +/// This function enables the load to be rewritten as v2 = load v3, 4. +bool SwingSchedulerDAG::canUseLastOffsetValue(MachineInstr *MI, + unsigned &BasePos, + unsigned &OffsetPos, + unsigned &NewBase, + int64_t &Offset) { + // Get the load instruction. + if (TII->isPostIncrement(*MI)) + return false; + unsigned BasePosLd, OffsetPosLd; + if (!TII->getBaseAndOffsetPosition(*MI, BasePosLd, OffsetPosLd)) + return false; + unsigned BaseReg = MI->getOperand(BasePosLd).getReg(); + + // Look for the Phi instruction. + MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); + MachineInstr *Phi = MRI.getVRegDef(BaseReg); + if (!Phi || !Phi->isPHI()) + return false; + // Get the register defined in the loop block. + unsigned PrevReg = getLoopPhiReg(*Phi, MI->getParent()); + if (!PrevReg) + return false; + + // Check for the post-increment load/store instruction. + MachineInstr *PrevDef = MRI.getVRegDef(PrevReg); + if (!PrevDef || PrevDef == MI) + return false; + + if (!TII->isPostIncrement(*PrevDef)) + return false; + + unsigned BasePos1 = 0, OffsetPos1 = 0; + if (!TII->getBaseAndOffsetPosition(*PrevDef, BasePos1, OffsetPos1)) + return false; + + // Make sure offset values are both positive or both negative. + int64_t LoadOffset = MI->getOperand(OffsetPosLd).getImm(); + int64_t StoreOffset = PrevDef->getOperand(OffsetPos1).getImm(); + if ((LoadOffset >= 0) != (StoreOffset >= 0)) + return false; + + // Set the return value once we determine that we return true. + BasePos = BasePosLd; + OffsetPos = OffsetPosLd; + NewBase = PrevReg; + Offset = StoreOffset; + return true; +} + +/// Apply changes to the instruction if needed. The changes are need +/// to improve the scheduling and depend up on the final schedule. +MachineInstr *SwingSchedulerDAG::applyInstrChange(MachineInstr *MI, + SMSchedule &Schedule, + bool UpdateDAG) { + SUnit *SU = getSUnit(MI); + DenseMap<SUnit *, std::pair<unsigned, int64_t>>::iterator It = + InstrChanges.find(SU); + if (It != InstrChanges.end()) { + std::pair<unsigned, int64_t> RegAndOffset = It->second; + unsigned BasePos, OffsetPos; + if (!TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos)) + return nullptr; + unsigned BaseReg = MI->getOperand(BasePos).getReg(); + MachineInstr *LoopDef = findDefInLoop(BaseReg); + int DefStageNum = Schedule.stageScheduled(getSUnit(LoopDef)); + int DefCycleNum = Schedule.cycleScheduled(getSUnit(LoopDef)); + int BaseStageNum = Schedule.stageScheduled(SU); + int BaseCycleNum = Schedule.cycleScheduled(SU); + if (BaseStageNum < DefStageNum) { + MachineInstr *NewMI = MF.CloneMachineInstr(MI); + int OffsetDiff = DefStageNum - BaseStageNum; + if (DefCycleNum < BaseCycleNum) { + NewMI->getOperand(BasePos).setReg(RegAndOffset.first); + if (OffsetDiff > 0) + --OffsetDiff; + } + int64_t NewOffset = + MI->getOperand(OffsetPos).getImm() + RegAndOffset.second * OffsetDiff; + NewMI->getOperand(OffsetPos).setImm(NewOffset); + if (UpdateDAG) { + SU->setInstr(NewMI); + MISUnitMap[NewMI] = SU; + } + NewMIs.insert(NewMI); + return NewMI; + } + } + return nullptr; +} + +/// Return true for an order dependence that is loop carried potentially. +/// An order dependence is loop carried if the destination defines a value +/// that may be used by the source in a subsequent iteration. +bool SwingSchedulerDAG::isLoopCarriedOrder(SUnit *Source, const SDep &Dep, + bool isSucc) { + if (!isOrder(Source, Dep) || Dep.isArtificial()) + return false; + + if (!SwpPruneLoopCarried) + return true; + + MachineInstr *SI = Source->getInstr(); + MachineInstr *DI = Dep.getSUnit()->getInstr(); + if (!isSucc) + std::swap(SI, DI); + assert(SI != nullptr && DI != nullptr && "Expecting SUnit with an MI."); + + // Assume ordered loads and stores may have a loop carried dependence. + if (SI->hasUnmodeledSideEffects() || DI->hasUnmodeledSideEffects() || + SI->hasOrderedMemoryRef() || DI->hasOrderedMemoryRef()) + return true; + + // Only chain dependences between a load and store can be loop carried. + if (!DI->mayStore() || !SI->mayLoad()) + return false; + + unsigned DeltaS, DeltaD; + if (!computeDelta(*SI, DeltaS) || !computeDelta(*DI, DeltaD)) + return true; + + unsigned BaseRegS, BaseRegD; + int64_t OffsetS, OffsetD; + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + if (!TII->getMemOpBaseRegImmOfs(*SI, BaseRegS, OffsetS, TRI) || + !TII->getMemOpBaseRegImmOfs(*DI, BaseRegD, OffsetD, TRI)) + return true; + + if (BaseRegS != BaseRegD) + return true; + + uint64_t AccessSizeS = (*SI->memoperands_begin())->getSize(); + uint64_t AccessSizeD = (*DI->memoperands_begin())->getSize(); + + // This is the main test, which checks the offset values and the loop + // increment value to determine if the accesses may be loop carried. + if (OffsetS >= OffsetD) + return OffsetS + AccessSizeS > DeltaS; + else if (OffsetS < OffsetD) + return OffsetD + AccessSizeD > DeltaD; + + return true; +} + +void SwingSchedulerDAG::postprocessDAG() { + for (auto &M : Mutations) + M->apply(this); +} + +/// Try to schedule the node at the specified StartCycle and continue +/// until the node is schedule or the EndCycle is reached. This function +/// returns true if the node is scheduled. This routine may search either +/// forward or backward for a place to insert the instruction based upon +/// the relative values of StartCycle and EndCycle. +bool SMSchedule::insert(SUnit *SU, int StartCycle, int EndCycle, int II) { + bool forward = true; + if (StartCycle > EndCycle) + forward = false; + + // The terminating condition depends on the direction. + int termCycle = forward ? EndCycle + 1 : EndCycle - 1; + for (int curCycle = StartCycle; curCycle != termCycle; + forward ? ++curCycle : --curCycle) { + + // Add the already scheduled instructions at the specified cycle to the DFA. + Resources->clearResources(); + for (int checkCycle = FirstCycle + ((curCycle - FirstCycle) % II); + checkCycle <= LastCycle; checkCycle += II) { + std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[checkCycle]; + + for (std::deque<SUnit *>::iterator I = cycleInstrs.begin(), + E = cycleInstrs.end(); + I != E; ++I) { + if (ST.getInstrInfo()->isZeroCost((*I)->getInstr()->getOpcode())) + continue; + assert(Resources->canReserveResources(*(*I)->getInstr()) && + "These instructions have already been scheduled."); + Resources->reserveResources(*(*I)->getInstr()); + } + } + if (ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()) || + Resources->canReserveResources(*SU->getInstr())) { + DEBUG({ + dbgs() << "\tinsert at cycle " << curCycle << " "; + SU->getInstr()->dump(); + }); + + ScheduledInstrs[curCycle].push_back(SU); + InstrToCycle.insert(std::make_pair(SU, curCycle)); + if (curCycle > LastCycle) + LastCycle = curCycle; + if (curCycle < FirstCycle) + FirstCycle = curCycle; + return true; + } + DEBUG({ + dbgs() << "\tfailed to insert at cycle " << curCycle << " "; + SU->getInstr()->dump(); + }); + } + return false; +} + +// Return the cycle of the earliest scheduled instruction in the chain. +int SMSchedule::earliestCycleInChain(const SDep &Dep) { + SmallPtrSet<SUnit *, 8> Visited; + SmallVector<SDep, 8> Worklist; + Worklist.push_back(Dep); + int EarlyCycle = INT_MAX; + while (!Worklist.empty()) { + const SDep &Cur = Worklist.pop_back_val(); + SUnit *PrevSU = Cur.getSUnit(); + if (Visited.count(PrevSU)) + continue; + std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(PrevSU); + if (it == InstrToCycle.end()) + continue; + EarlyCycle = std::min(EarlyCycle, it->second); + for (const auto &PI : PrevSU->Preds) + if (SwingSchedulerDAG::isOrder(PrevSU, PI)) + Worklist.push_back(PI); + Visited.insert(PrevSU); + } + return EarlyCycle; +} + +// Return the cycle of the latest scheduled instruction in the chain. +int SMSchedule::latestCycleInChain(const SDep &Dep) { + SmallPtrSet<SUnit *, 8> Visited; + SmallVector<SDep, 8> Worklist; + Worklist.push_back(Dep); + int LateCycle = INT_MIN; + while (!Worklist.empty()) { + const SDep &Cur = Worklist.pop_back_val(); + SUnit *SuccSU = Cur.getSUnit(); + if (Visited.count(SuccSU)) + continue; + std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SuccSU); + if (it == InstrToCycle.end()) + continue; + LateCycle = std::max(LateCycle, it->second); + for (const auto &SI : SuccSU->Succs) + if (SwingSchedulerDAG::isOrder(SuccSU, SI)) + Worklist.push_back(SI); + Visited.insert(SuccSU); + } + return LateCycle; +} + +/// If an instruction has a use that spans multiple iterations, then +/// return true. These instructions are characterized by having a back-ege +/// to a Phi, which contains a reference to another Phi. +static SUnit *multipleIterations(SUnit *SU, SwingSchedulerDAG *DAG) { + for (auto &P : SU->Preds) + if (DAG->isBackedge(SU, P) && P.getSUnit()->getInstr()->isPHI()) + for (auto &S : P.getSUnit()->Succs) + if (S.getKind() == SDep::Order && S.getSUnit()->getInstr()->isPHI()) + return P.getSUnit(); + return nullptr; +} + +/// Compute the scheduling start slot for the instruction. The start slot +/// depends on any predecessor or successor nodes scheduled already. +void SMSchedule::computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart, + int *MinEnd, int *MaxStart, int II, + SwingSchedulerDAG *DAG) { + // Iterate over each instruction that has been scheduled already. The start + // slot computuation depends on whether the previously scheduled instruction + // is a predecessor or successor of the specified instruction. + for (int cycle = getFirstCycle(); cycle <= LastCycle; ++cycle) { + + // Iterate over each instruction in the current cycle. + for (SUnit *I : getInstructions(cycle)) { + // Because we're processing a DAG for the dependences, we recognize + // the back-edge in recurrences by anti dependences. + for (unsigned i = 0, e = (unsigned)SU->Preds.size(); i != e; ++i) { + const SDep &Dep = SU->Preds[i]; + if (Dep.getSUnit() == I) { + if (!DAG->isBackedge(SU, Dep)) { + int EarlyStart = cycle + DAG->getLatency(SU, Dep) - + DAG->getDistance(Dep.getSUnit(), SU, Dep) * II; + *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart); + if (DAG->isLoopCarriedOrder(SU, Dep, false)) { + int End = earliestCycleInChain(Dep) + (II - 1); + *MinEnd = std::min(*MinEnd, End); + } + } else { + int LateStart = cycle - DAG->getLatency(SU, Dep) + + DAG->getDistance(SU, Dep.getSUnit(), Dep) * II; + *MinLateStart = std::min(*MinLateStart, LateStart); + } + } + // For instruction that requires multiple iterations, make sure that + // the dependent instruction is not scheduled past the definition. + SUnit *BE = multipleIterations(I, DAG); + if (BE && Dep.getSUnit() == BE && !SU->getInstr()->isPHI() && + !SU->isPred(I)) + *MinLateStart = std::min(*MinLateStart, cycle); + } + for (unsigned i = 0, e = (unsigned)SU->Succs.size(); i != e; ++i) + if (SU->Succs[i].getSUnit() == I) { + const SDep &Dep = SU->Succs[i]; + if (!DAG->isBackedge(SU, Dep)) { + int LateStart = cycle - DAG->getLatency(SU, Dep) + + DAG->getDistance(SU, Dep.getSUnit(), Dep) * II; + *MinLateStart = std::min(*MinLateStart, LateStart); + if (DAG->isLoopCarriedOrder(SU, Dep)) { + int Start = latestCycleInChain(Dep) + 1 - II; + *MaxStart = std::max(*MaxStart, Start); + } + } else { + int EarlyStart = cycle + DAG->getLatency(SU, Dep) - + DAG->getDistance(Dep.getSUnit(), SU, Dep) * II; + *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart); + } + } + } + } +} + +/// Order the instructions within a cycle so that the definitions occur +/// before the uses. Returns true if the instruction is added to the start +/// of the list, or false if added to the end. +bool SMSchedule::orderDependence(SwingSchedulerDAG *SSD, SUnit *SU, + std::deque<SUnit *> &Insts) { + MachineInstr *MI = SU->getInstr(); + bool OrderBeforeUse = false; + bool OrderAfterDef = false; + bool OrderBeforeDef = false; + unsigned MoveDef = 0; + unsigned MoveUse = 0; + int StageInst1 = stageScheduled(SU); + + unsigned Pos = 0; + for (std::deque<SUnit *>::iterator I = Insts.begin(), E = Insts.end(); I != E; + ++I, ++Pos) { + // Relative order of Phis does not matter. + if (MI->isPHI() && (*I)->getInstr()->isPHI()) + continue; + for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) + continue; + unsigned Reg = MO.getReg(); + unsigned BasePos, OffsetPos; + if (ST.getInstrInfo()->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos)) + if (MI->getOperand(BasePos).getReg() == Reg) + if (unsigned NewReg = SSD->getInstrBaseReg(SU)) + Reg = NewReg; + bool Reads, Writes; + std::tie(Reads, Writes) = + (*I)->getInstr()->readsWritesVirtualRegister(Reg); + if (MO.isDef() && Reads && stageScheduled(*I) <= StageInst1) { + OrderBeforeUse = true; + MoveUse = Pos; + } else if (MO.isDef() && Reads && stageScheduled(*I) > StageInst1) { + // Add the instruction after the scheduled instruction. + OrderAfterDef = true; + MoveDef = Pos; + } else if (MO.isUse() && Writes && stageScheduled(*I) == StageInst1) { + if (cycleScheduled(*I) == cycleScheduled(SU) && !(*I)->isSucc(SU)) { + OrderBeforeUse = true; + MoveUse = Pos; + } else { + OrderAfterDef = true; + MoveDef = Pos; + } + } else if (MO.isUse() && Writes && stageScheduled(*I) > StageInst1) { + OrderBeforeUse = true; + MoveUse = Pos; + if (MoveUse != 0) { + OrderAfterDef = true; + MoveDef = Pos - 1; + } + } else if (MO.isUse() && Writes && stageScheduled(*I) < StageInst1) { + // Add the instruction before the scheduled instruction. + OrderBeforeUse = true; + MoveUse = Pos; + } else if (MO.isUse() && stageScheduled(*I) == StageInst1 && + isLoopCarriedDefOfUse(SSD, (*I)->getInstr(), MO)) { + OrderBeforeDef = true; + MoveUse = Pos; + } + } + // Check for order dependences between instructions. Make sure the source + // is ordered before the destination. + for (auto &S : SU->Succs) + if (S.getKind() == SDep::Order) { + if (S.getSUnit() == *I && stageScheduled(*I) == StageInst1) { + OrderBeforeUse = true; + MoveUse = Pos; + } + } else if (TargetRegisterInfo::isPhysicalRegister(S.getReg())) { + if (cycleScheduled(SU) != cycleScheduled(S.getSUnit())) { + if (S.isAssignedRegDep()) { + OrderAfterDef = true; + MoveDef = Pos; + } + } else { + OrderBeforeUse = true; + MoveUse = Pos; + } + } + for (auto &P : SU->Preds) + if (P.getKind() == SDep::Order) { + if (P.getSUnit() == *I && stageScheduled(*I) == StageInst1) { + OrderAfterDef = true; + MoveDef = Pos; + } + } else if (TargetRegisterInfo::isPhysicalRegister(P.getReg())) { + if (cycleScheduled(SU) != cycleScheduled(P.getSUnit())) { + if (P.isAssignedRegDep()) { + OrderBeforeUse = true; + MoveUse = Pos; + } + } else { + OrderAfterDef = true; + MoveDef = Pos; + } + } + } + + // A circular dependence. + if (OrderAfterDef && OrderBeforeUse && MoveUse == MoveDef) + OrderBeforeUse = false; + + // OrderAfterDef takes precedences over OrderBeforeDef. The latter is due + // to a loop-carried dependence. + if (OrderBeforeDef) + OrderBeforeUse = !OrderAfterDef || (MoveUse > MoveDef); + + // The uncommon case when the instruction order needs to be updated because + // there is both a use and def. + if (OrderBeforeUse && OrderAfterDef) { + SUnit *UseSU = Insts.at(MoveUse); + SUnit *DefSU = Insts.at(MoveDef); + if (MoveUse > MoveDef) { + Insts.erase(Insts.begin() + MoveUse); + Insts.erase(Insts.begin() + MoveDef); + } else { + Insts.erase(Insts.begin() + MoveDef); + Insts.erase(Insts.begin() + MoveUse); + } + if (orderDependence(SSD, UseSU, Insts)) { + Insts.push_front(SU); + orderDependence(SSD, DefSU, Insts); + return true; + } + Insts.pop_back(); + Insts.push_back(SU); + Insts.push_back(UseSU); + orderDependence(SSD, DefSU, Insts); + return false; + } + // Put the new instruction first if there is a use in the list. Otherwise, + // put it at the end of the list. + if (OrderBeforeUse) + Insts.push_front(SU); + else + Insts.push_back(SU); + return OrderBeforeUse; +} + +/// Return true if the scheduled Phi has a loop carried operand. +bool SMSchedule::isLoopCarried(SwingSchedulerDAG *SSD, MachineInstr &Phi) { + if (!Phi.isPHI()) + return false; + assert(Phi.isPHI() && "Expecing a Phi."); + SUnit *DefSU = SSD->getSUnit(&Phi); + unsigned DefCycle = cycleScheduled(DefSU); + int DefStage = stageScheduled(DefSU); + + unsigned InitVal = 0; + unsigned LoopVal = 0; + getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal); + SUnit *UseSU = SSD->getSUnit(MRI.getVRegDef(LoopVal)); + if (!UseSU) + return true; + if (UseSU->getInstr()->isPHI()) + return true; + unsigned LoopCycle = cycleScheduled(UseSU); + int LoopStage = stageScheduled(UseSU); + return (LoopCycle > DefCycle) || (LoopStage <= DefStage); +} + +/// Return true if the instruction is a definition that is loop carried +/// and defines the use on the next iteration. +/// v1 = phi(v2, v3) +/// (Def) v3 = op v1 +/// (MO) = v1 +/// If MO appears before Def, then then v1 and v3 may get assigned to the same +/// register. +bool SMSchedule::isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD, + MachineInstr *Def, MachineOperand &MO) { + if (!MO.isReg()) + return false; + if (Def->isPHI()) + return false; + MachineInstr *Phi = MRI.getVRegDef(MO.getReg()); + if (!Phi || !Phi->isPHI() || Phi->getParent() != Def->getParent()) + return false; + if (!isLoopCarried(SSD, *Phi)) + return false; + unsigned LoopReg = getLoopPhiReg(*Phi, Phi->getParent()); + for (unsigned i = 0, e = Def->getNumOperands(); i != e; ++i) { + MachineOperand &DMO = Def->getOperand(i); + if (!DMO.isReg() || !DMO.isDef()) + continue; + if (DMO.getReg() == LoopReg) + return true; + } + return false; +} + +// Check if the generated schedule is valid. This function checks if +// an instruction that uses a physical register is scheduled in a +// different stage than the definition. The pipeliner does not handle +// physical register values that may cross a basic block boundary. +bool SMSchedule::isValidSchedule(SwingSchedulerDAG *SSD) { + for (int i = 0, e = SSD->SUnits.size(); i < e; ++i) { + SUnit &SU = SSD->SUnits[i]; + if (!SU.hasPhysRegDefs) + continue; + int StageDef = stageScheduled(&SU); + assert(StageDef != -1 && "Instruction should have been scheduled."); + for (auto &SI : SU.Succs) + if (SI.isAssignedRegDep()) + if (ST.getRegisterInfo()->isPhysicalRegister(SI.getReg())) + if (stageScheduled(SI.getSUnit()) != StageDef) + return false; + } + return true; +} + +/// After the schedule has been formed, call this function to combine +/// the instructions from the different stages/cycles. That is, this +/// function creates a schedule that represents a single iteration. +void SMSchedule::finalizeSchedule(SwingSchedulerDAG *SSD) { + // Move all instructions to the first stage from later stages. + for (int cycle = getFirstCycle(); cycle <= getFinalCycle(); ++cycle) { + for (int stage = 1, lastStage = getMaxStageCount(); stage <= lastStage; + ++stage) { + std::deque<SUnit *> &cycleInstrs = + ScheduledInstrs[cycle + (stage * InitiationInterval)]; + for (std::deque<SUnit *>::reverse_iterator I = cycleInstrs.rbegin(), + E = cycleInstrs.rend(); + I != E; ++I) + ScheduledInstrs[cycle].push_front(*I); + } + } + // Iterate over the definitions in each instruction, and compute the + // stage difference for each use. Keep the maximum value. + for (auto &I : InstrToCycle) { + int DefStage = stageScheduled(I.first); + MachineInstr *MI = I.first->getInstr(); + for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) { + MachineOperand &Op = MI->getOperand(i); + if (!Op.isReg() || !Op.isDef()) + continue; + + unsigned Reg = Op.getReg(); + unsigned MaxDiff = 0; + bool PhiIsSwapped = false; + for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(Reg), + EI = MRI.use_end(); + UI != EI; ++UI) { + MachineOperand &UseOp = *UI; + MachineInstr *UseMI = UseOp.getParent(); + SUnit *SUnitUse = SSD->getSUnit(UseMI); + int UseStage = stageScheduled(SUnitUse); + unsigned Diff = 0; + if (UseStage != -1 && UseStage >= DefStage) + Diff = UseStage - DefStage; + if (MI->isPHI()) { + if (isLoopCarried(SSD, *MI)) + ++Diff; + else + PhiIsSwapped = true; + } + MaxDiff = std::max(Diff, MaxDiff); + } + RegToStageDiff[Reg] = std::make_pair(MaxDiff, PhiIsSwapped); + } + } + + // Erase all the elements in the later stages. Only one iteration should + // remain in the scheduled list, and it contains all the instructions. + for (int cycle = getFinalCycle() + 1; cycle <= LastCycle; ++cycle) + ScheduledInstrs.erase(cycle); + + // Change the registers in instruction as specified in the InstrChanges + // map. We need to use the new registers to create the correct order. + for (int i = 0, e = SSD->SUnits.size(); i != e; ++i) { + SUnit *SU = &SSD->SUnits[i]; + SSD->applyInstrChange(SU->getInstr(), *this, true); + } + + // Reorder the instructions in each cycle to fix and improve the + // generated code. + for (int Cycle = getFirstCycle(), E = getFinalCycle(); Cycle <= E; ++Cycle) { + std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[Cycle]; + std::deque<SUnit *> newOrderZC; + // Put the zero-cost, pseudo instructions at the start of the cycle. + for (unsigned i = 0, e = cycleInstrs.size(); i < e; ++i) { + SUnit *SU = cycleInstrs[i]; + if (ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode())) + orderDependence(SSD, SU, newOrderZC); + } + std::deque<SUnit *> newOrderI; + // Then, add the regular instructions back. + for (unsigned i = 0, e = cycleInstrs.size(); i < e; ++i) { + SUnit *SU = cycleInstrs[i]; + if (!ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode())) + orderDependence(SSD, SU, newOrderI); + } + // Replace the old order with the new order. + cycleInstrs.swap(newOrderZC); + cycleInstrs.insert(cycleInstrs.end(), newOrderI.begin(), newOrderI.end()); + } + + DEBUG(dump();); +} + +/// Print the schedule information to the given output. +void SMSchedule::print(raw_ostream &os) const { + // Iterate over each cycle. + for (int cycle = getFirstCycle(); cycle <= getFinalCycle(); ++cycle) { + // Iterate over each instruction in the cycle. + const_sched_iterator cycleInstrs = ScheduledInstrs.find(cycle); + for (SUnit *CI : cycleInstrs->second) { + os << "cycle " << cycle << " (" << stageScheduled(CI) << ") "; + os << "(" << CI->NodeNum << ") "; + CI->getInstr()->print(os); + os << "\n"; + } + } +} + +/// Utility function used for debugging to print the schedule. +void SMSchedule::dump() const { print(dbgs()); } diff --git a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp index 613598d..242cb0b 100644 --- a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp +++ b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp @@ -21,11 +21,16 @@ using namespace llvm; +static cl::opt<bool> EnableSubRegLiveness("enable-subreg-liveness", cl::Hidden, + cl::init(true), cl::desc("Enable subregister liveness tracking.")); + // Pin the vtable to this file. void MachineRegisterInfo::Delegate::anchor() {} MachineRegisterInfo::MachineRegisterInfo(MachineFunction *MF) - : MF(MF), TheDelegate(nullptr), TracksSubRegLiveness(false) { + : MF(MF), TheDelegate(nullptr), + TracksSubRegLiveness(MF->getSubtarget().enableSubRegLiveness() && + EnableSubRegLiveness) { unsigned NumRegs = getTargetRegisterInfo()->getNumRegs(); VRegInfo.reserve(256); RegAllocHints.reserve(256); @@ -88,6 +93,13 @@ MachineRegisterInfo::recomputeRegClass(unsigned Reg) { return true; } +unsigned MachineRegisterInfo::createIncompleteVirtualRegister() { + unsigned Reg = TargetRegisterInfo::index2VirtReg(getNumVirtRegs()); + VRegInfo.grow(Reg); + RegAllocHints.grow(Reg); + return Reg; +} + /// createVirtualRegister - Create and return a new virtual register in the /// function with the specified register class. /// @@ -98,41 +110,42 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){ "Virtual register RegClass must be allocatable."); // New virtual register number. - unsigned Reg = TargetRegisterInfo::index2VirtReg(getNumVirtRegs()); - VRegInfo.grow(Reg); + unsigned Reg = createIncompleteVirtualRegister(); VRegInfo[Reg].first = RegClass; - RegAllocHints.grow(Reg); if (TheDelegate) TheDelegate->MRI_NoteNewVirtualRegister(Reg); return Reg; } -unsigned -MachineRegisterInfo::getSize(unsigned VReg) const { - VRegToSizeMap::const_iterator SizeIt = getVRegToSize().find(VReg); - return SizeIt != getVRegToSize().end() ? SizeIt->second : 0; +LLT MachineRegisterInfo::getType(unsigned VReg) const { + VRegToTypeMap::const_iterator TypeIt = getVRegToType().find(VReg); + return TypeIt != getVRegToType().end() ? TypeIt->second : LLT{}; } -void MachineRegisterInfo::setSize(unsigned VReg, unsigned Size) { - getVRegToSize()[VReg] = Size; +void MachineRegisterInfo::setType(unsigned VReg, LLT Ty) { + // Check that VReg doesn't have a class. + assert((getRegClassOrRegBank(VReg).isNull() || + !getRegClassOrRegBank(VReg).is<const TargetRegisterClass *>()) && + "Can't set the size of a non-generic virtual register"); + getVRegToType()[VReg] = Ty; } unsigned -MachineRegisterInfo::createGenericVirtualRegister(unsigned Size) { - assert(Size && "Cannot create empty virtual register"); - +MachineRegisterInfo::createGenericVirtualRegister(LLT Ty) { // New virtual register number. - unsigned Reg = TargetRegisterInfo::index2VirtReg(getNumVirtRegs()); - VRegInfo.grow(Reg); + unsigned Reg = createIncompleteVirtualRegister(); // FIXME: Should we use a dummy register class? - VRegInfo[Reg].first = static_cast<TargetRegisterClass *>(nullptr); - getVRegToSize()[Reg] = Size; - RegAllocHints.grow(Reg); + VRegInfo[Reg].first = static_cast<RegisterBank *>(nullptr); + getVRegToType()[Reg] = Ty; if (TheDelegate) TheDelegate->MRI_NoteNewVirtualRegister(Reg); return Reg; } +void MachineRegisterInfo::clearVirtRegTypes() { + getVRegToType().clear(); +} + /// clearVirtRegs - Remove all virtual registers (after physreg assignment). void MachineRegisterInfo::clearVirtRegs() { #ifndef NDEBUG @@ -444,13 +457,16 @@ void MachineRegisterInfo::freezeReservedRegs(const MachineFunction &MF) { "Invalid ReservedRegs vector from target"); } -bool MachineRegisterInfo::isConstantPhysReg(unsigned PhysReg, - const MachineFunction &MF) const { +bool MachineRegisterInfo::isConstantPhysReg(unsigned PhysReg) const { assert(TargetRegisterInfo::isPhysicalRegister(PhysReg)); + const TargetRegisterInfo *TRI = getTargetRegisterInfo(); + if (TRI->isConstantPhysReg(PhysReg)) + return true; + // Check if any overlapping register is modified, or allocatable so it may be // used later. - for (MCRegAliasIterator AI(PhysReg, getTargetRegisterInfo(), true); + for (MCRegAliasIterator AI(PhysReg, TRI, true); AI.isValid(); ++AI) if (!def_empty(*AI) || isAllocatable(*AI)) return false; diff --git a/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp b/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp index 47ad60c..e9b4755 100644 --- a/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp +++ b/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp @@ -18,7 +18,6 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Support/AlignOf.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" diff --git a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp index d921e29..e06bc51 100644 --- a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp @@ -230,11 +230,6 @@ static cl::opt<bool> EnablePostRAMachineSched( cl::desc("Enable the post-ra machine instruction scheduling pass."), cl::init(true), cl::Hidden); -/// Forward declare the standard machine scheduler. This will be used as the -/// default scheduler if the target does not set a default. -static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C); -static ScheduleDAGInstrs *createGenericSchedPostRA(MachineSchedContext *C); - /// Decrement this iterator until reaching the top or a non-debug instr. static MachineBasicBlock::const_iterator priorNonDebug(MachineBasicBlock::const_iterator I, @@ -251,8 +246,8 @@ priorNonDebug(MachineBasicBlock::const_iterator I, static MachineBasicBlock::iterator priorNonDebug(MachineBasicBlock::iterator I, MachineBasicBlock::const_iterator Beg) { - return const_cast<MachineInstr*>( - &*priorNonDebug(MachineBasicBlock::const_iterator(I), Beg)); + return priorNonDebug(MachineBasicBlock::const_iterator(I), Beg) + .getNonConstIterator(); } /// If this iterator is a debug value, increment until reaching the End or a @@ -271,12 +266,8 @@ nextIfDebug(MachineBasicBlock::const_iterator I, static MachineBasicBlock::iterator nextIfDebug(MachineBasicBlock::iterator I, MachineBasicBlock::const_iterator End) { - // Cast the return value to nonconst MachineInstr, then cast to an - // instr_iterator, which does not check for null, finally return a - // bundle_iterator. - return MachineBasicBlock::instr_iterator( - const_cast<MachineInstr*>( - &*nextIfDebug(MachineBasicBlock::const_iterator(I), End))); + return nextIfDebug(MachineBasicBlock::const_iterator(I), End) + .getNonConstIterator(); } /// Instantiate a ScheduleDAGInstrs that will be owned by the caller. @@ -458,9 +449,10 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler, unsigned NumRegionInstrs = 0; MachineBasicBlock::iterator I = RegionEnd; for (;I != MBB->begin(); --I) { - if (isSchedBoundary(&*std::prev(I), &*MBB, MF, TII)) + MachineInstr &MI = *std::prev(I); + if (isSchedBoundary(&MI, &*MBB, MF, TII)) break; - if (!I->isDebugValue()) + if (!MI.isDebugValue()) ++NumRegionInstrs; } // Notify the scheduler of the region, even if we may skip scheduling @@ -692,8 +684,14 @@ void ScheduleDAGMI::schedule() { // This may initialize a DFSResult to be used for queue priority. SchedImpl->initialize(this); - DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) - SUnits[su].dumpAll(this)); + DEBUG( + if (EntrySU.getInstr() != nullptr) + EntrySU.dumpAll(this); + for (unsigned su = 0, e = SUnits.size(); su != e; ++su) + SUnits[su].dumpAll(this); + if (ExitSU.getInstr() != nullptr) + ExitSU.dumpAll(this); + ); if (ViewMISchedDAGs) viewGraph(); // Initialize ready queues now that the DAG and priority data are finalized. @@ -862,6 +860,44 @@ ScheduleDAGMILive::~ScheduleDAGMILive() { delete DFSResult; } +void ScheduleDAGMILive::collectVRegUses(SUnit &SU) { + const MachineInstr &MI = *SU.getInstr(); + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isReg()) + continue; + if (!MO.readsReg()) + continue; + if (TrackLaneMasks && !MO.isUse()) + continue; + + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + + // Ignore re-defs. + if (TrackLaneMasks) { + bool FoundDef = false; + for (const MachineOperand &MO2 : MI.operands()) { + if (MO2.isReg() && MO2.isDef() && MO2.getReg() == Reg && !MO2.isDead()) { + FoundDef = true; + break; + } + } + if (FoundDef) + continue; + } + + // Record this local VReg use. + VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg); + for (; UI != VRegUses.end(); ++UI) { + if (UI->SU == &SU) + break; + } + if (UI == VRegUses.end()) + VRegUses.insert(VReg2SUnit(Reg, LaneBitmask::getNone(), &SU)); + } +} + /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after /// crossing a scheduling boundary. [begin, end) includes all instructions in /// the region, including the boundary itself and single-instruction regions @@ -889,6 +925,11 @@ void ScheduleDAGMILive::enterRegion(MachineBasicBlock *bb, // Setup the register pressure trackers for the top scheduled top and bottom // scheduled regions. void ScheduleDAGMILive::initRegPressure() { + VRegUses.clear(); + VRegUses.setUniverse(MRI.getNumVirtRegs()); + for (SUnit &SU : SUnits) + collectVRegUses(SU); + TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin, ShouldTrackLaneMasks, false); BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd, @@ -999,7 +1040,7 @@ void ScheduleDAGMILive::updatePressureDiffs( // this fact anymore => decrement pressure. // If the register has just become dead then other uses make it come // back to life => increment pressure. - bool Decrement = P.LaneMask != 0; + bool Decrement = P.LaneMask.any(); for (const VReg2SUnit &V2SU : make_range(VRegUses.find(Reg), VRegUses.end())) { @@ -1018,7 +1059,7 @@ void ScheduleDAGMILive::updatePressureDiffs( ); } } else { - assert(P.LaneMask != 0); + assert(P.LaneMask.any()); DEBUG(dbgs() << " LiveReg: " << PrintVRegOrUnit(Reg, TRI) << "\n"); // This may be called before CurrentBottom has been initialized. However, // BotRPTracker must have a valid position. We want the value live into the @@ -1087,6 +1128,8 @@ void ScheduleDAGMILive::schedule() { SchedImpl->initialize(this); DEBUG( + if (EntrySU.getInstr() != nullptr) + EntrySU.dumpAll(this); for (const SUnit &SU : SUnits) { SU.dumpAll(this); if (ShouldTrackPressure) { @@ -1095,6 +1138,8 @@ void ScheduleDAGMILive::schedule() { } dbgs() << '\n'; } + if (ExitSU.getInstr() != nullptr) + ExitSU.dumpAll(this); ); if (ViewMISchedDAGs) viewGraph(); @@ -1362,7 +1407,8 @@ class BaseMemOpClusterMutation : public ScheduleDAGMutation { : SU(su), BaseReg(reg), Offset(ofs) {} bool operator<(const MemOpInfo&RHS) const { - return std::tie(BaseReg, Offset) < std::tie(RHS.BaseReg, RHS.Offset); + return std::tie(BaseReg, Offset, SU->NodeNum) < + std::tie(RHS.BaseReg, RHS.Offset, RHS.SU->NodeNum); } }; @@ -1395,6 +1441,24 @@ public: }; } // anonymous +namespace llvm { + +std::unique_ptr<ScheduleDAGMutation> +createLoadClusterDAGMutation(const TargetInstrInfo *TII, + const TargetRegisterInfo *TRI) { + return EnableMemOpCluster ? make_unique<LoadClusterMutation>(TII, TRI) + : nullptr; +} + +std::unique_ptr<ScheduleDAGMutation> +createStoreClusterDAGMutation(const TargetInstrInfo *TII, + const TargetRegisterInfo *TRI) { + return EnableMemOpCluster ? make_unique<StoreClusterMutation>(TII, TRI) + : nullptr; +} + +} // namespace llvm + void BaseMemOpClusterMutation::clusterNeighboringMemOps( ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG) { SmallVector<MemOpInfo, 32> MemOpRecords; @@ -1487,29 +1551,23 @@ namespace { /// that may be fused by the processor into a single operation. class MacroFusion : public ScheduleDAGMutation { const TargetInstrInfo &TII; - const TargetRegisterInfo &TRI; public: - MacroFusion(const TargetInstrInfo &TII, const TargetRegisterInfo &TRI) - : TII(TII), TRI(TRI) {} + MacroFusion(const TargetInstrInfo &TII) + : TII(TII) {} void apply(ScheduleDAGInstrs *DAGInstrs) override; }; } // anonymous -/// Returns true if \p MI reads a register written by \p Other. -static bool HasDataDep(const TargetRegisterInfo &TRI, const MachineInstr &MI, - const MachineInstr &Other) { - for (const MachineOperand &MO : MI.uses()) { - if (!MO.isReg() || !MO.readsReg()) - continue; +namespace llvm { - unsigned Reg = MO.getReg(); - if (Other.modifiesRegister(Reg, &TRI)) - return true; - } - return false; +std::unique_ptr<ScheduleDAGMutation> +createMacroFusionDAGMutation(const TargetInstrInfo *TII) { + return EnableMacroFusion ? make_unique<MacroFusion>(*TII) : nullptr; } +} // namespace llvm + /// \brief Callback from DAG postProcessing to create cluster edges to encourage /// fused operations. void MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) { @@ -1521,16 +1579,12 @@ void MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) { if (!Branch) return; - for (SUnit &SU : DAG->SUnits) { - // SUnits with successors can't be schedule in front of the ExitSU. - if (!SU.Succs.empty()) - continue; - // We only care if the node writes to a register that the branch reads. - MachineInstr *Pred = SU.getInstr(); - if (!HasDataDep(TRI, *Branch, *Pred)) + for (SDep &PredDep : ExitSU.Preds) { + if (PredDep.isWeak()) continue; - - if (!TII.shouldScheduleAdjacent(*Pred, *Branch)) + SUnit &SU = *PredDep.getSUnit(); + MachineInstr &Pred = *SU.getInstr(); + if (!TII.shouldScheduleAdjacent(Pred, *Branch)) continue; // Create a single weak edge from SU to ExitSU. The only effect is to cause @@ -1543,6 +1597,16 @@ void MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) { (void)Success; assert(Success && "No DAG nodes should be reachable from ExitSU"); + // Adjust latency of data deps between the nodes. + for (SDep &PredDep : ExitSU.Preds) { + if (PredDep.getSUnit() == &SU) + PredDep.setLatency(0); + } + for (SDep &SuccDep : SU.Succs) { + if (SuccDep.getSUnit() == &ExitSU) + SuccDep.setLatency(0); + } + DEBUG(dbgs() << "Macro Fuse SU(" << SU.NodeNum << ")\n"); break; } @@ -1572,6 +1636,16 @@ protected: }; } // anonymous +namespace llvm { + +std::unique_ptr<ScheduleDAGMutation> +createCopyConstrainDAGMutation(const TargetInstrInfo *TII, + const TargetRegisterInfo *TRI) { + return make_unique<CopyConstrain>(TII, TRI); +} + +} // namespace llvm + /// constrainLocalCopy handles two possibilities: /// 1) Local src: /// I0: = dst @@ -1760,7 +1834,6 @@ void SchedBoundary::reset() { Available.clear(); Pending.clear(); CheckPending = false; - NextSUs.clear(); CurrCycle = 0; CurrMOps = 0; MinReadyCycle = UINT_MAX; @@ -1961,23 +2034,6 @@ void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) { Pending.push(SU); else Available.push(SU); - - // Record this node as an immediate dependent of the scheduled node. - NextSUs.insert(SU); -} - -void SchedBoundary::releaseTopNode(SUnit *SU) { - if (SU->isScheduled) - return; - - releaseNode(SU, SU->TopReadyCycle); -} - -void SchedBoundary::releaseBottomNode(SUnit *SU) { - if (SU->isScheduled) - return; - - releaseNode(SU, SU->BotReadyCycle); } /// Move the boundary of scheduled code by one cycle. @@ -2828,9 +2884,8 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand, bool SameBoundary = Zone != nullptr; if (SameBoundary) { // For loops that are acyclic path limited, aggressively schedule for - // latency. This can result in very long dependence chains scheduled in - // sequence, so once every cycle (when CurrMOps == 0), switch to normal - // heuristics. + // latency. Within an single cycle, whenever CurrMOps > 0, allow normal + // heuristics to take precedence. if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() && tryLatency(TryCand, Cand, *Zone)) return; @@ -2888,13 +2943,6 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand, !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone)) return; - // Prefer immediate defs/users of the last scheduled instruction. This is a - // local pressure avoidance strategy that also makes the machine code - // readable. - if (tryGreater(Zone->isNextSU(TryCand.SU), Zone->isNextSU(Cand.SU), - TryCand, Cand, NextDefUse)) - return; - // Fall through to original instruction order. if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) { @@ -3105,28 +3153,24 @@ void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) { /// Create the standard converging machine scheduler. This will be used as the /// default scheduler if the target does not set a default. -static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C) { +ScheduleDAGMILive *llvm::createGenericSchedLive(MachineSchedContext *C) { ScheduleDAGMILive *DAG = new ScheduleDAGMILive(C, make_unique<GenericScheduler>(C)); // Register DAG post-processors. // // FIXME: extend the mutation API to allow earlier mutations to instantiate // data and pass it to later mutations. Have a single mutation that gathers // the interesting nodes in one pass. - DAG->addMutation(make_unique<CopyConstrain>(DAG->TII, DAG->TRI)); - if (EnableMemOpCluster) { - if (DAG->TII->enableClusterLoads()) - DAG->addMutation(make_unique<LoadClusterMutation>(DAG->TII, DAG->TRI)); - if (DAG->TII->enableClusterStores()) - DAG->addMutation(make_unique<StoreClusterMutation>(DAG->TII, DAG->TRI)); - } - if (EnableMacroFusion) - DAG->addMutation(make_unique<MacroFusion>(*DAG->TII, *DAG->TRI)); + DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI)); return DAG; } +static ScheduleDAGInstrs *createConveringSched(MachineSchedContext *C) { + return createGenericSchedLive(C); +} + static MachineSchedRegistry GenericSchedRegistry("converge", "Standard converging scheduler.", - createGenericSchedLive); + createConveringSched); //===----------------------------------------------------------------------===// // PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy. @@ -3257,9 +3301,9 @@ void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) { Top.bumpNode(SU); } -/// Create a generic scheduler with no vreg liveness or DAG mutation passes. -static ScheduleDAGInstrs *createGenericSchedPostRA(MachineSchedContext *C) { - return new ScheduleDAGMI(C, make_unique<PostGenericScheduler>(C), /*IsPostRA=*/true); +ScheduleDAGMI *llvm::createGenericSchedPostRA(MachineSchedContext *C) { + return new ScheduleDAGMI(C, make_unique<PostGenericScheduler>(C), + /*RemoveKillFlags=*/true); } //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/CodeGen/MachineSink.cpp b/contrib/llvm/lib/CodeGen/MachineSink.cpp index 571a5c1..5f87b68 100644 --- a/contrib/llvm/lib/CodeGen/MachineSink.cpp +++ b/contrib/llvm/lib/CodeGen/MachineSink.cpp @@ -22,9 +22,15 @@ #include "llvm/ADT/SparseBitVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachinePostDominators.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/IR/LLVMContext.h" @@ -34,6 +40,13 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <map> +#include <utility> +#include <vector> + using namespace llvm; #define DEBUG_TYPE "machine-sink" @@ -48,12 +61,21 @@ UseBlockFreqInfo("machine-sink-bfi", cl::desc("Use block frequency info to find successors to sink"), cl::init(true), cl::Hidden); +static cl::opt<unsigned> SplitEdgeProbabilityThreshold( + "machine-sink-split-probability-threshold", + cl::desc( + "Percentage threshold for splitting single-instruction critical edge. " + "If the branch threshold is higher than this threshold, we allow " + "speculative execution of up to 1 instruction to avoid branching to " + "splitted critical edge"), + cl::init(40), cl::Hidden); STATISTIC(NumSunk, "Number of machine instructions sunk"); STATISTIC(NumSplit, "Number of critical edges split"); STATISTIC(NumCoalesces, "Number of copies coalesced"); namespace { + class MachineSinking : public MachineFunctionPass { const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; @@ -62,15 +84,16 @@ namespace { MachinePostDominatorTree *PDT; // Machine post dominator tree MachineLoopInfo *LI; const MachineBlockFrequencyInfo *MBFI; + const MachineBranchProbabilityInfo *MBPI; AliasAnalysis *AA; // Remember which edges have been considered for breaking. - SmallSet<std::pair<MachineBasicBlock*,MachineBasicBlock*>, 8> + SmallSet<std::pair<MachineBasicBlock*, MachineBasicBlock*>, 8> CEBCandidates; // Remember which edges we are about to split. // This is different from CEBCandidates since those edges // will be split. - SetVector<std::pair<MachineBasicBlock*,MachineBasicBlock*> > ToSplit; + SetVector<std::pair<MachineBasicBlock*, MachineBasicBlock*> > ToSplit; SparseBitVector<> RegsToClearKillFlags; @@ -79,6 +102,7 @@ namespace { public: static char ID; // Pass identification + MachineSinking() : MachineFunctionPass(ID) { initializeMachineSinkingPass(*PassRegistry::getPassRegistry()); } @@ -92,6 +116,7 @@ namespace { AU.addRequired<MachineDominatorTree>(); AU.addRequired<MachinePostDominatorTree>(); AU.addRequired<MachineLoopInfo>(); + AU.addRequired<MachineBranchProbabilityInfo>(); AU.addPreserved<MachineDominatorTree>(); AU.addPreserved<MachinePostDominatorTree>(); AU.addPreserved<MachineLoopInfo>(); @@ -143,12 +168,14 @@ namespace { GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB, AllSuccsCache &AllSuccessors) const; }; + } // end anonymous namespace char MachineSinking::ID = 0; char &llvm::MachineSinkingID = MachineSinking::ID; INITIALIZE_PASS_BEGIN(MachineSinking, "machine-sink", "Machine code sinking", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) @@ -269,11 +296,12 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { PDT = &getAnalysis<MachinePostDominatorTree>(); LI = &getAnalysis<MachineLoopInfo>(); MBFI = UseBlockFreqInfo ? &getAnalysis<MachineBlockFrequencyInfo>() : nullptr; + MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); bool EverMadeChange = false; - while (1) { + while (true) { bool MadeChange = false; // Process all basic blocks. @@ -369,6 +397,10 @@ bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr &MI, if (!MI.isCopy() && !TII->isAsCheapAsAMove(MI)) return true; + if (From->isSuccessor(To) && MBPI->getEdgeProbability(From, To) <= + BranchProbability(SplitEdgeProbabilityThreshold, 100)) + return true; + // MI is cheap, we probably don't want to break the critical edge for it. // However, if this would allow some definitions of its source operands // to be sunk then it's probably worth it. @@ -604,7 +636,7 @@ MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB, // If the physreg has no defs anywhere, it's just an ambient register // and we can freely move its uses. Alternatively, if it's allocatable, // it could get allocated to something with a def during allocation. - if (!MRI->isConstantPhysReg(Reg, *MBB->getParent())) + if (!MRI->isConstantPhysReg(Reg)) return nullptr; } else if (!MO.isDead()) { // A def that isn't dead. We can't move it. diff --git a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp index 86332c8..ef7e525 100644 --- a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp +++ b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp @@ -430,16 +430,17 @@ public: po_iterator_storage(LoopBounds &lb) : LB(lb) {} void finishPostorder(const MachineBasicBlock*) {} - bool insertEdge(const MachineBasicBlock *From, const MachineBasicBlock *To) { + bool insertEdge(Optional<const MachineBasicBlock *> From, + const MachineBasicBlock *To) { // Skip already visited To blocks. MachineTraceMetrics::TraceBlockInfo &TBI = LB.Blocks[To->getNumber()]; if (LB.Downward ? TBI.hasValidHeight() : TBI.hasValidDepth()) return false; // From is null once when To is the trace center block. if (From) { - if (const MachineLoop *FromLoop = LB.Loops->getLoopFor(From)) { + if (const MachineLoop *FromLoop = LB.Loops->getLoopFor(*From)) { // Don't follow backedges, don't leave FromLoop when going upwards. - if ((LB.Downward ? To : From) == FromLoop->getHeader()) + if ((LB.Downward ? To : *From) == FromLoop->getHeader()) return false; // Don't leave FromLoop. if (isExitingLoop(FromLoop, LB.Loops->getLoopFor(To))) diff --git a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp index a70adb0..a98139f 100644 --- a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp @@ -70,6 +70,10 @@ namespace { unsigned foundErrors; + // Avoid querying the MachineFunctionProperties for each operand. + bool isFunctionRegBankSelected; + bool isFunctionSelected; + typedef SmallVector<unsigned, 16> RegVector; typedef SmallVector<const uint32_t*, 4> RegMaskVector; typedef DenseSet<unsigned> RegSet; @@ -204,16 +208,13 @@ namespace { void visitMachineBasicBlockAfter(const MachineBasicBlock *MBB); void visitMachineFunctionAfter(); - template <typename T> void report(const char *msg, ilist_iterator<T> I) { - report(msg, &*I); - } void report(const char *msg, const MachineFunction *MF); void report(const char *msg, const MachineBasicBlock *MBB); void report(const char *msg, const MachineInstr *MI); void report(const char *msg, const MachineOperand *MO, unsigned MONum); void report_context(const LiveInterval &LI) const; - void report_context(const LiveRange &LR, unsigned Reg, + void report_context(const LiveRange &LR, unsigned VRegUnit, LaneBitmask LaneMask) const; void report_context(const LiveRange::Segment &S) const; void report_context(const VNInfo &VNI) const; @@ -228,10 +229,10 @@ namespace { void checkLiveness(const MachineOperand *MO, unsigned MONum); void checkLivenessAtUse(const MachineOperand *MO, unsigned MONum, SlotIndex UseIdx, const LiveRange &LR, unsigned Reg, - LaneBitmask LaneMask = 0); + LaneBitmask LaneMask = LaneBitmask::getNone()); void checkLivenessAtDef(const MachineOperand *MO, unsigned MONum, SlotIndex DefIdx, const LiveRange &LR, unsigned Reg, - LaneBitmask LaneMask = 0); + LaneBitmask LaneMask = LaneBitmask::getNone()); void markReachable(const MachineBasicBlock *MBB); void calcRegsPassed(); @@ -242,11 +243,12 @@ namespace { void verifyLiveIntervals(); void verifyLiveInterval(const LiveInterval&); void verifyLiveRangeValue(const LiveRange&, const VNInfo*, unsigned, - unsigned); + LaneBitmask); void verifyLiveRangeSegment(const LiveRange&, const LiveRange::const_iterator I, unsigned, - unsigned); - void verifyLiveRange(const LiveRange&, unsigned, LaneBitmask LaneMask = 0); + LaneBitmask); + void verifyLiveRange(const LiveRange&, unsigned, + LaneBitmask LaneMask = LaneBitmask::getNone()); void verifyStackFrame(); @@ -310,15 +312,12 @@ void MachineVerifier::verifySlotIndexes() const { void MachineVerifier::verifyProperties(const MachineFunction &MF) { // If a pass has introduced virtual registers without clearing the - // AllVRegsAllocated property (or set it without allocating the vregs) + // NoVRegs property (or set it without allocating the vregs) // then report an error. if (MF.getProperties().hasProperty( - MachineFunctionProperties::Property::AllVRegsAllocated) && - MRI->getNumVirtRegs()) { - report( - "Function has AllVRegsAllocated property but there are VReg operands", - &MF); - } + MachineFunctionProperties::Property::NoVRegs) && + MRI->getNumVirtRegs()) + report("Function has NoVRegs property but there are VReg operands", &MF); } unsigned MachineVerifier::verify(MachineFunction &MF) { @@ -330,6 +329,11 @@ unsigned MachineVerifier::verify(MachineFunction &MF) { TRI = MF.getSubtarget().getRegisterInfo(); MRI = &MF.getRegInfo(); + isFunctionRegBankSelected = MF.getProperties().hasProperty( + MachineFunctionProperties::Property::RegBankSelected); + isFunctionSelected = MF.getProperties().hasProperty( + MachineFunctionProperties::Property::Selected); + LiveVars = nullptr; LiveInts = nullptr; LiveStks = nullptr; @@ -359,7 +363,7 @@ unsigned MachineVerifier::verify(MachineFunction &MF) { for (MachineBasicBlock::const_instr_iterator MBBI = MFI->instr_begin(), MBBE = MFI->instr_end(); MBBI != MBBE; ++MBBI) { if (MBBI->getParent() != &*MFI) { - report("Bad instruction parent pointer", MFI); + report("Bad instruction parent pointer", &*MFI); errs() << "Instruction: " << *MBBI; continue; } @@ -381,7 +385,7 @@ unsigned MachineVerifier::verify(MachineFunction &MF) { CurBundle = &*MBBI; visitMachineBundleBefore(CurBundle); } else if (!CurBundle) - report("No bundle header", MBBI); + report("No bundle header", &*MBBI); visitMachineInstrBefore(&*MBBI); for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) { const MachineInstr &MI = *MBBI; @@ -474,11 +478,11 @@ void MachineVerifier::report_context(const LiveInterval &LI) const { errs() << "- interval: " << LI << '\n'; } -void MachineVerifier::report_context(const LiveRange &LR, unsigned Reg, +void MachineVerifier::report_context(const LiveRange &LR, unsigned VRegUnit, LaneBitmask LaneMask) const { report_context_liverange(LR); - errs() << "- register: " << PrintReg(Reg, TRI) << '\n'; - if (LaneMask != 0) + report_context_vreg_regunit(VRegUnit); + if (LaneMask.any()) report_context_lanemask(LaneMask); } @@ -524,16 +528,6 @@ void MachineVerifier::visitMachineFunctionBefore() { lastIndex = SlotIndex(); regsReserved = MRI->getReservedRegs(); - // A sub-register of a reserved register is also reserved - for (int Reg = regsReserved.find_first(); Reg>=0; - Reg = regsReserved.find_next(Reg)) { - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { - // FIXME: This should probably be: - // assert(regsReserved.test(*SubRegs) && "Non-reserved sub-register"); - regsReserved.set(*SubRegs); - } - } - markReachable(&MF->front()); // Build a set of the basic blocks in the function. @@ -571,7 +565,8 @@ void MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { FirstTerminator = nullptr; - if (MRI->isSSA()) { + if (!MF->getProperties().hasProperty( + MachineFunctionProperties::Property::NoPHIs) && MRI->tracksLiveness()) { // If this block has allocatable physical registers live-in, check that // it is an entry block or landing pad. for (const auto &LI : MBB->liveins()) { @@ -746,20 +741,21 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { } regsLive.clear(); - for (const auto &LI : MBB->liveins()) { - if (!TargetRegisterInfo::isPhysicalRegister(LI.PhysReg)) { - report("MBB live-in list contains non-physical register", MBB); - continue; + if (MRI->tracksLiveness()) { + for (const auto &LI : MBB->liveins()) { + if (!TargetRegisterInfo::isPhysicalRegister(LI.PhysReg)) { + report("MBB live-in list contains non-physical register", MBB); + continue; + } + for (MCSubRegIterator SubRegs(LI.PhysReg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) + regsLive.insert(*SubRegs); } - for (MCSubRegIterator SubRegs(LI.PhysReg, TRI, /*IncludeSelf=*/true); - SubRegs.isValid(); ++SubRegs) - regsLive.insert(*SubRegs); } regsLiveInButUnused = regsLive; - const MachineFrameInfo *MFI = MF->getFrameInfo(); - assert(MFI && "Function has no frame info"); - BitVector PR = MFI->getPristineRegs(*MF); + const MachineFrameInfo &MFI = MF->getFrameInfo(); + BitVector PR = MFI.getPristineRegs(*MF); for (int I = PR.find_first(); I>0; I = PR.find_next(I)) { for (MCSubRegIterator SubRegs(I, TRI, /*IncludeSelf=*/true); SubRegs.isValid(); ++SubRegs) @@ -850,6 +846,10 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { << MI->getNumOperands() << " given.\n"; } + if (MI->isPHI() && MF->getProperties().hasProperty( + MachineFunctionProperties::Property::NoPHIs)) + report("Found PHI instruction with NoPHIs property set", MI); + // Check the tied operands. if (MI->isInlineAsm()) verifyInlineAsm(MI); @@ -879,6 +879,35 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { } } + // Check types. + if (isPreISelGenericOpcode(MCID.getOpcode())) { + if (isFunctionSelected) + report("Unexpected generic instruction in a Selected function", MI); + + // Generic instructions specify equality constraints between some + // of their operands. Make sure these are consistent. + SmallVector<LLT, 4> Types; + for (unsigned i = 0; i < MCID.getNumOperands(); ++i) { + if (!MCID.OpInfo[i].isGenericType()) + continue; + size_t TypeIdx = MCID.OpInfo[i].getGenericTypeIndex(); + Types.resize(std::max(TypeIdx + 1, Types.size())); + + LLT OpTy = MRI->getType(MI->getOperand(i).getReg()); + if (Types[TypeIdx].isValid() && Types[TypeIdx] != OpTy) + report("type mismatch in generic instruction", MI); + Types[TypeIdx] = OpTy; + } + } + + // Generic opcodes must not have physical register operands. + if (isPreISelGenericOpcode(MCID.getOpcode())) { + for (auto &Op : MI->operands()) { + if (Op.isReg() && TargetRegisterInfo::isPhysicalRegister(Op.getReg())) + report("Generic instruction cannot have physical register", MI); + } + } + StringRef ErrorInfo; if (!TII->verifyInstruction(*MI, ErrorInfo)) report(ErrorInfo.data(), MI); @@ -988,25 +1017,62 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { const TargetRegisterClass *RC = MRI->getRegClassOrNull(Reg); if (!RC) { // This is a generic virtual register. - // It must have a size and it must not have a SubIdx. - unsigned Size = MRI->getSize(Reg); - if (!Size) { - report("Generic virtual register must have a size", MO, MONum); + + // If we're post-Select, we can't have gvregs anymore. + if (isFunctionSelected) { + report("Generic virtual register invalid in a Selected function", + MO, MONum); return; } - // Make sure the register fits into its register bank if any. + + // The gvreg must have a type and it must not have a SubIdx. + LLT Ty = MRI->getType(Reg); + if (!Ty.isValid()) { + report("Generic virtual register must have a valid type", MO, + MONum); + return; + } + const RegisterBank *RegBank = MRI->getRegBankOrNull(Reg); - if (RegBank && RegBank->getSize() < Size) { + + // If we're post-RegBankSelect, the gvreg must have a bank. + if (!RegBank && isFunctionRegBankSelected) { + report("Generic virtual register must have a bank in a " + "RegBankSelected function", + MO, MONum); + return; + } + + // Make sure the register fits into its register bank if any. + if (RegBank && Ty.isValid() && + RegBank->getSize() < Ty.getSizeInBits()) { report("Register bank is too small for virtual register", MO, MONum); errs() << "Register bank " << RegBank->getName() << " too small(" - << RegBank->getSize() << ") to fit " << Size << "-bits\n"; + << RegBank->getSize() << ") to fit " << Ty.getSizeInBits() + << "-bits\n"; return; } if (SubIdx) { - report("Generic virtual register does not subregister index", MO, MONum); + report("Generic virtual register does not subregister index", MO, + MONum); return; } + + // If this is a target specific instruction and this operand + // has register class constraint, the virtual register must + // comply to it. + if (!isPreISelGenericOpcode(MCID.getOpcode()) && + TII->getRegClass(MCID, MONum, TRI, *MF)) { + report("Virtual register does not match instruction constraint", MO, + MONum); + errs() << "Expect register class " + << TRI->getRegClassName( + TII->getRegClass(MCID, MONum, TRI, *MF)) + << " but got nothing\n"; + return; + } + break; } if (SubIdx) { @@ -1113,7 +1179,7 @@ void MachineVerifier::checkLivenessAtUse(const MachineOperand *MO, LiveQueryResult LRQ = LR.Query(UseIdx); // Check if we have a segment at the use, note however that we only need one // live subregister range, the others may be dead. - if (!LRQ.valueIn() && LaneMask == 0) { + if (!LRQ.valueIn() && LaneMask.none()) { report("No live segment at use", MO, MONum); report_context_liverange(LR); report_context_vreg_regunit(VRegOrUnit); @@ -1123,7 +1189,7 @@ void MachineVerifier::checkLivenessAtUse(const MachineOperand *MO, report("Live range continues after kill flag", MO, MONum); report_context_liverange(LR); report_context_vreg_regunit(VRegOrUnit); - if (LaneMask != 0) + if (LaneMask.any()) report_context_lanemask(LaneMask); report_context(UseIdx); } @@ -1138,7 +1204,7 @@ void MachineVerifier::checkLivenessAtDef(const MachineOperand *MO, report("Inconsistent valno->def", MO, MONum); report_context_liverange(LR); report_context_vreg_regunit(VRegOrUnit); - if (LaneMask != 0) + if (LaneMask.any()) report_context_lanemask(LaneMask); report_context(*VNI); report_context(DefIdx); @@ -1147,7 +1213,7 @@ void MachineVerifier::checkLivenessAtDef(const MachineOperand *MO, report("No live segment at def", MO, MONum); report_context_liverange(LR); report_context_vreg_regunit(VRegOrUnit); - if (LaneMask != 0) + if (LaneMask.any()) report_context_lanemask(LaneMask); report_context(DefIdx); } @@ -1177,7 +1243,7 @@ void MachineVerifier::checkLivenessAtDef(const MachineOperand *MO, report("Live range continues after dead def flag", MO, MONum); report_context_liverange(LR); report_context_vreg_regunit(VRegOrUnit); - if (LaneMask != 0) + if (LaneMask.any()) report_context_lanemask(LaneMask); } } @@ -1199,7 +1265,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { if (LiveVars && TargetRegisterInfo::isVirtualRegister(Reg) && MO->isKill()) { LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg); - if (std::find(VI.Kills.begin(), VI.Kills.end(), MI) == VI.Kills.end()) + if (!is_contained(VI.Kills, MI)) report("Kill missing from LiveVariables", MO, MONum); } @@ -1225,9 +1291,9 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { LaneBitmask MOMask = SubRegIdx != 0 ? TRI->getSubRegIndexLaneMask(SubRegIdx) : MRI->getMaxLaneMaskForVReg(Reg); - LaneBitmask LiveInMask = 0; + LaneBitmask LiveInMask; for (const LiveInterval::SubRange &SR : LI.subranges()) { - if ((MOMask & SR.LaneMask) == 0) + if ((MOMask & SR.LaneMask).none()) continue; checkLivenessAtUse(MO, MONum, UseIdx, SR, Reg, SR.LaneMask); LiveQueryResult LRQ = SR.Query(UseIdx); @@ -1235,7 +1301,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { LiveInMask |= SR.LaneMask; } // At least parts of the register has to be live at the use. - if ((LiveInMask & MOMask) == 0) { + if ((LiveInMask & MOMask).none()) { report("No live subrange at use", MO, MONum); report_context(LI); report_context(UseIdx); @@ -1327,7 +1393,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { ? TRI->getSubRegIndexLaneMask(SubRegIdx) : MRI->getMaxLaneMaskForVReg(Reg); for (const LiveInterval::SubRange &SR : LI.subranges()) { - if ((SR.LaneMask & MOMask) == 0) + if ((SR.LaneMask & MOMask).none()) continue; checkLivenessAtDef(MO, MONum, DefIdx, SR, Reg, SR.LaneMask); } @@ -1640,8 +1706,8 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR, !TRI->hasRegUnit(MOI->getReg(), Reg)) continue; } - if (LaneMask != 0 && - (TRI->getSubRegIndexLaneMask(MOI->getSubReg()) & LaneMask) == 0) + if (LaneMask.any() && + (TRI->getSubRegIndexLaneMask(MOI->getSubReg()) & LaneMask).none()) continue; hasDef = true; if (MOI->isEarlyClobber()) @@ -1772,15 +1838,22 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, for (ConstMIBundleOperands MOI(*MI); MOI.isValid(); ++MOI) { if (!MOI->isReg() || MOI->getReg() != Reg) continue; - if (LaneMask != 0 && - (LaneMask & TRI->getSubRegIndexLaneMask(MOI->getSubReg())) == 0) - continue; + unsigned Sub = MOI->getSubReg(); + LaneBitmask SLM = Sub != 0 ? TRI->getSubRegIndexLaneMask(Sub) + : LaneBitmask::getAll(); if (MOI->isDef()) { - if (MOI->getSubReg() != 0) + if (Sub != 0) { hasSubRegDef = true; + // An operand vreg0:sub0<def> reads vreg0:sub1..n. Invert the lane + // mask for subregister defs. Read-undef defs will be handled by + // readsReg below. + SLM = ~SLM; + } if (MOI->isDead()) hasDeadDef = true; } + if (LaneMask.any() && (LaneMask & SLM).none()) + continue; if (MOI->readsReg()) hasRead = true; } @@ -1788,7 +1861,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, // Make sure that the corresponding machine operand for a "dead" live // range has the dead flag. We cannot perform this check for subregister // liveranges as partially dead values are allowed. - if (LaneMask == 0 && !hasDeadDef) { + if (LaneMask.none() && !hasDeadDef) { report("Instruction ending live segment on dead slot has no dead flag", MI); report_context(LR, Reg, LaneMask); @@ -1798,7 +1871,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, if (!hasRead) { // When tracking subregister liveness, the main range must start new // values on partial register writes, even if there is no read. - if (!MRI->shouldTrackSubRegLiveness(Reg) || LaneMask != 0 || + if (!MRI->shouldTrackSubRegLiveness(Reg) || LaneMask.any() || !hasSubRegDef) { report("Instruction ending live segment doesn't read the register", MI); @@ -1842,7 +1915,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, // All predecessors must have a live-out value if this is not a // subregister liverange. - if (!PVNI && LaneMask == 0) { + if (!PVNI && LaneMask.none()) { report("Register not marked live out of predecessor", *PI); report_context(LR, Reg, LaneMask); report_context(*VNI); @@ -1882,14 +1955,14 @@ void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) { assert(TargetRegisterInfo::isVirtualRegister(Reg)); verifyLiveRange(LI, Reg); - LaneBitmask Mask = 0; + LaneBitmask Mask; LaneBitmask MaxMask = MRI->getMaxLaneMaskForVReg(Reg); for (const LiveInterval::SubRange &SR : LI.subranges()) { - if ((Mask & SR.LaneMask) != 0) { + if ((Mask & SR.LaneMask).any()) { report("Lane masks of sub ranges overlap in live interval", MF); report_context(LI); } - if ((SR.LaneMask & ~MaxMask) != 0) { + if ((SR.LaneMask & ~MaxMask).any()) { report("Subrange lanemask is invalid", MF); report_context(LI); } @@ -1950,11 +2023,11 @@ void MachineVerifier::verifyStackFrame() { SmallVector<StackStateOfBB, 8> SPState; SPState.resize(MF->getNumBlockIDs()); - SmallPtrSet<const MachineBasicBlock*, 8> Reachable; + df_iterator_default_set<const MachineBasicBlock*> Reachable; // Visit the MBBs in DFS order. for (df_ext_iterator<const MachineFunction*, - SmallPtrSet<const MachineBasicBlock*, 8> > + df_iterator_default_set<const MachineBasicBlock*> > DFI = df_ext_begin(MF, Reachable), DFE = df_ext_end(MF, Reachable); DFI != DFE; ++DFI) { const MachineBasicBlock *MBB = *DFI; diff --git a/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp b/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp index 0177e41..2a8531f 100644 --- a/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp +++ b/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp @@ -184,7 +184,7 @@ bool OptimizePHIs::OptimizeBB(MachineBasicBlock &MBB) { for (InstrSetIterator PI = PHIsInCycle.begin(), PE = PHIsInCycle.end(); PI != PE; ++PI) { MachineInstr *PhiMI = *PI; - if (&*MII == PhiMI) + if (MII == PhiMI) ++MII; PhiMI->eraseFromParent(); } diff --git a/contrib/llvm/lib/CodeGen/PHIElimination.cpp b/contrib/llvm/lib/CodeGen/PHIElimination.cpp index b8d5431..c67a25b 100644 --- a/contrib/llvm/lib/CodeGen/PHIElimination.cpp +++ b/contrib/llvm/lib/CodeGen/PHIElimination.cpp @@ -175,6 +175,8 @@ bool PHIElimination::runOnMachineFunction(MachineFunction &MF) { ImpDefs.clear(); VRegPHIUseCount.clear(); + MF.getProperties().set(MachineFunctionProperties::Property::NoPHIs); + return Changed; } diff --git a/contrib/llvm/lib/CodeGen/PHIEliminationUtils.cpp b/contrib/llvm/lib/CodeGen/PHIEliminationUtils.cpp index 4cabc3a..4e67ff2 100644 --- a/contrib/llvm/lib/CodeGen/PHIEliminationUtils.cpp +++ b/contrib/llvm/lib/CodeGen/PHIEliminationUtils.cpp @@ -54,6 +54,7 @@ llvm::findPHICopyInsertPoint(MachineBasicBlock* MBB, MachineBasicBlock* SuccMBB, ++InsertPoint; } - // Make sure the copy goes after any phi nodes however. + // Make sure the copy goes after any phi nodes but before + // any debug nodes. return MBB->SkipPHIsAndLabels(InsertPoint); } diff --git a/contrib/llvm/lib/CodeGen/ParallelCG.cpp b/contrib/llvm/lib/CodeGen/ParallelCG.cpp index ccdaec1..50dd44f 100644 --- a/contrib/llvm/lib/CodeGen/ParallelCG.cpp +++ b/contrib/llvm/lib/CodeGen/ParallelCG.cpp @@ -12,7 +12,8 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/ParallelCG.h" -#include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/Bitcode/BitcodeReader.h" +#include "llvm/Bitcode/BitcodeWriter.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Module.h" @@ -78,7 +79,7 @@ std::unique_ptr<Module> llvm::splitCodeGen( CodegenThreadPool.async( [TMFactory, FileType, ThreadOS](const SmallString<0> &BC) { LLVMContext Ctx; - ErrorOr<std::unique_ptr<Module>> MOrErr = parseBitcodeFile( + Expected<std::unique_ptr<Module>> MOrErr = parseBitcodeFile( MemoryBufferRef(StringRef(BC.data(), BC.size()), "<split-module>"), Ctx); diff --git a/contrib/llvm/lib/CodeGen/PatchableFunction.cpp b/contrib/llvm/lib/CodeGen/PatchableFunction.cpp index 32468c9..ad9166f 100644 --- a/contrib/llvm/lib/CodeGen/PatchableFunction.cpp +++ b/contrib/llvm/lib/CodeGen/PatchableFunction.cpp @@ -32,7 +32,7 @@ struct PatchableFunction : public MachineFunctionPass { bool runOnMachineFunction(MachineFunction &F) override; MachineFunctionProperties getRequiredProperties() const override { return MachineFunctionProperties().set( - MachineFunctionProperties::Property::AllVRegsAllocated); + MachineFunctionProperties::Property::NoVRegs); } }; } diff --git a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp index 60b27dd..6d64345 100644 --- a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp +++ b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp @@ -70,17 +70,28 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/MC/MCInstrDesc.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include <cassert> +#include <cstdint> +#include <memory> #include <utility> + using namespace llvm; #define DEBUG_TYPE "peephole-opt" @@ -118,6 +129,7 @@ STATISTIC(NumRewrittenCopies, "Number of copies rewritten"); STATISTIC(NumNAPhysCopies, "Number of non-allocatable physical copies removed"); namespace { + class ValueTrackerResult; class PeepholeOptimizer : public MachineFunctionPass { @@ -128,6 +140,7 @@ namespace { public: static char ID; // Pass identification + PeepholeOptimizer() : MachineFunctionPass(ID) { initializePeepholeOptimizerPass(*PassRegistry::getPassRegistry()); } @@ -390,10 +403,12 @@ namespace { /// register of the last source. unsigned getReg() const { return Reg; } }; -} + +} // end anonymous namespace char PeepholeOptimizer::ID = 0; char &llvm::PeepholeOptimizerID = PeepholeOptimizer::ID; + INITIALIZE_PASS_BEGIN(PeepholeOptimizer, DEBUG_TYPE, "Peephole Optimizations", false, false) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) @@ -737,6 +752,7 @@ insertPHI(MachineRegisterInfo *MRI, const TargetInstrInfo *TII, } namespace { + /// \brief Helper class to rewrite the arguments of a copy-like instruction. class CopyRewriter { protected: @@ -820,7 +836,6 @@ public: TargetInstrInfo::RegSubRegPair Def, PeepholeOptimizer::RewriteMapTy &RewriteMap, bool HandleMultipleSources = true) { - TargetInstrInfo::RegSubRegPair LookupSrc(Def.Reg, Def.SubReg); do { ValueTrackerResult Res = RewriteMap.lookup(LookupSrc); @@ -859,7 +874,7 @@ public: const MachineOperand &MODef = NewPHI->getOperand(0); return TargetInstrInfo::RegSubRegPair(MODef.getReg(), MODef.getSubReg()); - } while (1); + } while (true); return TargetInstrInfo::RegSubRegPair(0, 0); } @@ -1001,6 +1016,7 @@ public: TrackSubReg = (unsigned)CopyLike.getOperand(3).getImm(); return true; } + bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override { if (CurrentSrcIdx != 2) return false; @@ -1141,7 +1157,8 @@ public: return true; } }; -} // End namespace. + +} // end anonymous namespace /// \brief Get the appropriated CopyRewriter for \p MI. /// \return A pointer to a dynamically allocated CopyRewriter or nullptr @@ -1523,11 +1540,6 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { if (MI->isDebugValue()) continue; - // If we run into an instruction we can't fold across, discard - // the load candidates. - if (MI->isLoadFoldBarrier()) - FoldAsLoadDefCandidates.clear(); - if (MI->isPosition() || MI->isPHI()) continue; @@ -1571,7 +1583,6 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "NAPhysCopy: blowing away all info due to " << *MI << '\n'); NAPhysToVirtMIs.clear(); - continue; } if ((isUncoalescableCopy(*MI) && @@ -1622,8 +1633,14 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { // earlier load into MI. if (!isLoadFoldable(MI, FoldAsLoadDefCandidates) && !FoldAsLoadDefCandidates.empty()) { + + // We visit each operand even after successfully folding a previous + // one. This allows us to fold multiple loads into a single + // instruction. We do assume that optimizeLoadInstr doesn't insert + // foldable uses earlier in the argument list. Since we don't restart + // iteration, we'd miss such cases. const MCInstrDesc &MIDesc = MI->getDesc(); - for (unsigned i = MIDesc.getNumDefs(); i != MIDesc.getNumOperands(); + for (unsigned i = MIDesc.getNumDefs(); i != MI->getNumOperands(); ++i) { const MachineOperand &MOp = MI->getOperand(i); if (!MOp.isReg()) @@ -1650,13 +1667,23 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { MRI->markUsesInDebugValueAsUndef(FoldedReg); FoldAsLoadDefCandidates.erase(FoldedReg); ++NumLoadFold; - // MI is replaced with FoldMI. + + // MI is replaced with FoldMI so we can continue trying to fold Changed = true; - break; + MI = FoldMI; } } } } + + // If we run into an instruction we can't fold across, discard + // the load candidates. Note: We might be able to fold *into* this + // instruction, so this needs to be after the folding logic. + if (MI->isLoadFoldBarrier()) { + DEBUG(dbgs() << "Encountered load fold barrier on " << *MI << "\n"); + FoldAsLoadDefCandidates.clear(); + } + } } @@ -1688,7 +1715,8 @@ ValueTrackerResult ValueTracker::getNextSourceFromBitcast() { // Bitcasts with more than one def are not supported. if (Def->getDesc().getNumDefs() != 1) return ValueTrackerResult(); - if (Def->getOperand(DefIdx).getSubReg() != DefSubReg) + const MachineOperand DefOp = Def->getOperand(DefIdx); + if (DefOp.getSubReg() != DefSubReg) // If we look for a different subreg, it means we want a subreg of the src. // Bails as we do not support composing subregs yet. return ValueTrackerResult(); @@ -1708,6 +1736,14 @@ ValueTrackerResult ValueTracker::getNextSourceFromBitcast() { return ValueTrackerResult(); SrcIdx = OpIdx; } + + // Stop when any user of the bitcast is a SUBREG_TO_REG, replacing with a COPY + // will break the assumed guarantees for the upper bits. + for (const MachineInstr &UseMI : MRI.use_nodbg_instructions(DefOp.getReg())) { + if (UseMI.isSubregToReg()) + return ValueTrackerResult(); + } + const MachineOperand &Src = Def->getOperand(SrcIdx); return ValueTrackerResult(Src.getReg(), Src.getSubReg()); } @@ -1806,8 +1842,8 @@ ValueTrackerResult ValueTracker::getNextSourceFromInsertSubreg() { // sub-register we are tracking. const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); if (!TRI || - (TRI->getSubRegIndexLaneMask(DefSubReg) & - TRI->getSubRegIndexLaneMask(InsertedReg.SubIdx)) != 0) + !(TRI->getSubRegIndexLaneMask(DefSubReg) & + TRI->getSubRegIndexLaneMask(InsertedReg.SubIdx)).none()) return ValueTrackerResult(); // At this point, the value is available in v0 via the same subreg // we used for Def. diff --git a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp index 3fce307..6081916 100644 --- a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp +++ b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp @@ -98,7 +98,7 @@ namespace { MachineFunctionProperties getRequiredProperties() const override { return MachineFunctionProperties().set( - MachineFunctionProperties::Property::AllVRegsAllocated); + MachineFunctionProperties::Property::NoVRegs); } bool runOnMachineFunction(MachineFunction &Fn) override; diff --git a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp index 20a9a39..5fca7fa 100644 --- a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -80,7 +80,7 @@ public: MachineFunctionProperties getRequiredProperties() const override { MachineFunctionProperties MFP; if (UsesCalleeSaves) - MFP.set(MachineFunctionProperties::Property::AllVRegsAllocated); + MFP.set(MachineFunctionProperties::Property::NoVRegs); return MFP; } @@ -117,6 +117,10 @@ private: // TRI->requiresFrameIndexScavenging() for the current function. bool FrameIndexVirtualScavenging; + // Flag to control whether the scavenger should be passed even though + // FrameIndexVirtualScavenging is used. + bool FrameIndexEliminationScavenging; + void calculateCallFrameInfo(MachineFunction &Fn); void calculateSaveRestoreBlocks(MachineFunction &Fn); @@ -176,6 +180,8 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : nullptr; FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn); + FrameIndexEliminationScavenging = (RS && !FrameIndexVirtualScavenging) || + TRI->requiresFrameIndexReplacementScavenging(Fn); // Calculate the MaxCallFrameSize and AdjustsStack variables for the // function's frame information. Also eliminates call frame pseudo @@ -221,8 +227,8 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { } // Warn on stack size when we exceeds the given limit. - MachineFrameInfo *MFI = Fn.getFrameInfo(); - uint64_t StackSize = MFI->getStackSize(); + MachineFrameInfo &MFI = Fn.getFrameInfo(); + uint64_t StackSize = MFI.getStackSize(); if (WarnStackSize.getNumOccurrences() > 0 && WarnStackSize < StackSize) { DiagnosticInfoStackSize DiagStackSize(*F, StackSize); F->getContext().diagnose(DiagStackSize); @@ -231,8 +237,8 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { delete RS; SaveBlocks.clear(); RestoreBlocks.clear(); - MFI->setSavePoint(nullptr); - MFI->setRestorePoint(nullptr); + MFI.setSavePoint(nullptr); + MFI.setRestorePoint(nullptr); return true; } @@ -242,10 +248,10 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { void PEI::calculateCallFrameInfo(MachineFunction &Fn) { const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo(); const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering(); - MachineFrameInfo *MFI = Fn.getFrameInfo(); + MachineFrameInfo &MFI = Fn.getFrameInfo(); unsigned MaxCallFrameSize = 0; - bool AdjustsStack = MFI->adjustsStack(); + bool AdjustsStack = MFI.adjustsStack(); // Get the function call frame set-up and tear-down instruction opcode unsigned FrameSetupOpcode = TII.getCallFrameSetupOpcode(); @@ -274,8 +280,8 @@ void PEI::calculateCallFrameInfo(MachineFunction &Fn) { AdjustsStack = true; } - MFI->setAdjustsStack(AdjustsStack); - MFI->setMaxCallFrameSize(MaxCallFrameSize); + MFI.setAdjustsStack(AdjustsStack); + MFI.setMaxCallFrameSize(MaxCallFrameSize); for (std::vector<MachineBasicBlock::iterator>::iterator i = FrameSDOps.begin(), e = FrameSDOps.end(); i != e; ++i) { @@ -293,17 +299,17 @@ void PEI::calculateCallFrameInfo(MachineFunction &Fn) { /// Compute the sets of entry and return blocks for saving and restoring /// callee-saved registers, and placing prolog and epilog code. void PEI::calculateSaveRestoreBlocks(MachineFunction &Fn) { - const MachineFrameInfo *MFI = Fn.getFrameInfo(); + const MachineFrameInfo &MFI = Fn.getFrameInfo(); // Even when we do not change any CSR, we still want to insert the // prologue and epilogue of the function. // So set the save points for those. // Use the points found by shrink-wrapping, if any. - if (MFI->getSavePoint()) { - SaveBlocks.push_back(MFI->getSavePoint()); - assert(MFI->getRestorePoint() && "Both restore and save must be set"); - MachineBasicBlock *RestoreBlock = MFI->getRestorePoint(); + if (MFI.getSavePoint()) { + SaveBlocks.push_back(MFI.getSavePoint()); + assert(MFI.getRestorePoint() && "Both restore and save must be set"); + MachineBasicBlock *RestoreBlock = MFI.getRestorePoint(); // If RestoreBlock does not have any successor and is not a return block // then the end point is unreachable and we do not need to insert any // epilogue. @@ -340,7 +346,7 @@ static void assignCalleeSavedSpillSlots(MachineFunction &F, } const TargetFrameLowering *TFI = F.getSubtarget().getFrameLowering(); - MachineFrameInfo *MFI = F.getFrameInfo(); + MachineFrameInfo &MFI = F.getFrameInfo(); if (!TFI->assignCalleeSavedSpillSlots(F, RegInfo, CSI)) { // If target doesn't implement this, use generic code. @@ -379,26 +385,26 @@ static void assignCalleeSavedSpillSlots(MachineFunction &F, // the TargetRegisterClass if the stack alignment is smaller. Use the // min. Align = std::min(Align, StackAlign); - FrameIdx = MFI->CreateStackObject(RC->getSize(), Align, true); + FrameIdx = MFI.CreateStackObject(RC->getSize(), Align, true); if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx; if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx; } else { // Spill it to the stack where we must. FrameIdx = - MFI->CreateFixedSpillStackObject(RC->getSize(), FixedSlot->Offset); + MFI.CreateFixedSpillStackObject(RC->getSize(), FixedSlot->Offset); } CS.setFrameIdx(FrameIdx); } } - MFI->setCalleeSavedInfo(CSI); + MFI.setCalleeSavedInfo(CSI); } /// Helper function to update the liveness information for the callee-saved /// registers. static void updateLiveness(MachineFunction &MF) { - MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); // Visited will contain all the basic blocks that are in the region // where the callee saved registers are alive: // - Anything that is not Save or Restore -> LiveThrough. @@ -409,7 +415,7 @@ static void updateLiveness(MachineFunction &MF) { SmallPtrSet<MachineBasicBlock *, 8> Visited; SmallVector<MachineBasicBlock *, 8> WorkList; MachineBasicBlock *Entry = &MF.front(); - MachineBasicBlock *Save = MFI->getSavePoint(); + MachineBasicBlock *Save = MFI.getSavePoint(); if (!Save) Save = Entry; @@ -420,7 +426,7 @@ static void updateLiveness(MachineFunction &MF) { } Visited.insert(Save); - MachineBasicBlock *Restore = MFI->getRestorePoint(); + MachineBasicBlock *Restore = MFI.getRestorePoint(); if (Restore) // By construction Restore cannot be visited, otherwise it // means there exists a path to Restore that does not go @@ -440,7 +446,7 @@ static void updateLiveness(MachineFunction &MF) { WorkList.push_back(SuccBB); } - const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); + const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); for (unsigned i = 0, e = CSI.size(); i != e; ++i) { for (MachineBasicBlock *MBB : Visited) { @@ -460,10 +466,10 @@ static void insertCSRSpillsAndRestores(MachineFunction &Fn, const MBBVector &SaveBlocks, const MBBVector &RestoreBlocks) { // Get callee saved register information. - MachineFrameInfo *MFI = Fn.getFrameInfo(); - const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); + MachineFrameInfo &MFI = Fn.getFrameInfo(); + const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); - MFI->setCalleeSavedInfoValid(true); + MFI.setCalleeSavedInfoValid(true); // Early exit if no callee saved registers are modified! if (CSI.empty()) @@ -551,14 +557,14 @@ static void doSpillCalleeSavedRegs(MachineFunction &Fn, RegScavenger *RS, /// AdjustStackOffset - Helper function used to adjust the stack frame offset. static inline void -AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx, +AdjustStackOffset(MachineFrameInfo &MFI, int FrameIdx, bool StackGrowsDown, int64_t &Offset, unsigned &MaxAlign, unsigned Skew) { // If the stack grows down, add the object size to find the lowest address. if (StackGrowsDown) - Offset += MFI->getObjectSize(FrameIdx); + Offset += MFI.getObjectSize(FrameIdx); - unsigned Align = MFI->getObjectAlignment(FrameIdx); + unsigned Align = MFI.getObjectAlignment(FrameIdx); // If the alignment of this object is greater than that of the stack, then // increase the stack alignment to match. @@ -569,11 +575,11 @@ AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx, if (StackGrowsDown) { DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << -Offset << "]\n"); - MFI->setObjectOffset(FrameIdx, -Offset); // Set the computed offset + MFI.setObjectOffset(FrameIdx, -Offset); // Set the computed offset } else { DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << Offset << "]\n"); - MFI->setObjectOffset(FrameIdx, Offset); - Offset += MFI->getObjectSize(FrameIdx); + MFI.setObjectOffset(FrameIdx, Offset); + Offset += MFI.getObjectSize(FrameIdx); } } @@ -581,7 +587,7 @@ AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx, /// track of them in StackBytesFree. /// static inline void -computeFreeStackSlots(MachineFrameInfo *MFI, bool StackGrowsDown, +computeFreeStackSlots(MachineFrameInfo &MFI, bool StackGrowsDown, unsigned MinCSFrameIndex, unsigned MaxCSFrameIndex, int64_t FixedCSEnd, BitVector &StackBytesFree) { // Avoid undefined int64_t -> int conversion below in extreme case. @@ -592,7 +598,7 @@ computeFreeStackSlots(MachineFrameInfo *MFI, bool StackGrowsDown, SmallVector<int, 16> AllocatedFrameSlots; // Add fixed objects. - for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) + for (int i = MFI.getObjectIndexBegin(); i != 0; ++i) AllocatedFrameSlots.push_back(i); // Add callee-save objects. for (int i = MinCSFrameIndex; i <= (int)MaxCSFrameIndex; ++i) @@ -601,8 +607,8 @@ computeFreeStackSlots(MachineFrameInfo *MFI, bool StackGrowsDown, for (int i : AllocatedFrameSlots) { // These are converted from int64_t, but they should always fit in int // because of the FixedCSEnd check above. - int ObjOffset = MFI->getObjectOffset(i); - int ObjSize = MFI->getObjectSize(i); + int ObjOffset = MFI.getObjectOffset(i); + int ObjSize = MFI.getObjectSize(i); int ObjStart, ObjEnd; if (StackGrowsDown) { // ObjOffset is negative when StackGrowsDown is true. @@ -621,10 +627,10 @@ computeFreeStackSlots(MachineFrameInfo *MFI, bool StackGrowsDown, /// Assign frame object to an unused portion of the stack in the fixed stack /// object range. Return true if the allocation was successful. /// -static inline bool scavengeStackSlot(MachineFrameInfo *MFI, int FrameIdx, +static inline bool scavengeStackSlot(MachineFrameInfo &MFI, int FrameIdx, bool StackGrowsDown, unsigned MaxAlign, BitVector &StackBytesFree) { - if (MFI->isVariableSizedObjectIndex(FrameIdx)) + if (MFI.isVariableSizedObjectIndex(FrameIdx)) return false; if (StackBytesFree.none()) { @@ -634,11 +640,11 @@ static inline bool scavengeStackSlot(MachineFrameInfo *MFI, int FrameIdx, return false; } - unsigned ObjAlign = MFI->getObjectAlignment(FrameIdx); + unsigned ObjAlign = MFI.getObjectAlignment(FrameIdx); if (ObjAlign > MaxAlign) return false; - int64_t ObjSize = MFI->getObjectSize(FrameIdx); + int64_t ObjSize = MFI.getObjectSize(FrameIdx); int FreeStart; for (FreeStart = StackBytesFree.find_first(); FreeStart != -1; FreeStart = StackBytesFree.find_next(FreeStart)) { @@ -668,11 +674,11 @@ static inline bool scavengeStackSlot(MachineFrameInfo *MFI, int FrameIdx, int ObjStart = -(FreeStart + ObjSize); DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") scavenged at SP[" << ObjStart << "]\n"); - MFI->setObjectOffset(FrameIdx, ObjStart); + MFI.setObjectOffset(FrameIdx, ObjStart); } else { DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") scavenged at SP[" << FreeStart << "]\n"); - MFI->setObjectOffset(FrameIdx, FreeStart); + MFI.setObjectOffset(FrameIdx, FreeStart); } StackBytesFree.reset(FreeStart, FreeStart + ObjSize); @@ -684,7 +690,7 @@ static inline bool scavengeStackSlot(MachineFrameInfo *MFI, int FrameIdx, static void AssignProtectedObjSet(const StackObjSet &UnassignedObjs, SmallSet<int, 16> &ProtectedObjs, - MachineFrameInfo *MFI, bool StackGrowsDown, + MachineFrameInfo &MFI, bool StackGrowsDown, int64_t &Offset, unsigned &MaxAlign, unsigned Skew) { for (StackObjSet::const_iterator I = UnassignedObjs.begin(), @@ -706,7 +712,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; // Loop over all of the stack objects, assigning sequential addresses... - MachineFrameInfo *MFI = Fn.getFrameInfo(); + MachineFrameInfo &MFI = Fn.getFrameInfo(); // Start at the beginning of the local area. // The Offset is the distance from the stack top in the direction @@ -725,17 +731,17 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // non-fixed objects can't be allocated right at the start of local area. // Adjust 'Offset' to point to the end of last fixed sized preallocated // object. - for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) { + for (int i = MFI.getObjectIndexBegin(); i != 0; ++i) { int64_t FixedOff; if (StackGrowsDown) { // The maximum distance from the stack pointer is at lower address of // the object -- which is given by offset. For down growing stack // the offset is negative, so we negate the offset to get the distance. - FixedOff = -MFI->getObjectOffset(i); + FixedOff = -MFI.getObjectOffset(i); } else { // The maximum distance from the start pointer is at the upper // address of the object. - FixedOff = MFI->getObjectOffset(i) + MFI->getObjectSize(i); + FixedOff = MFI.getObjectOffset(i) + MFI.getObjectSize(i); } if (FixedOff > Offset) Offset = FixedOff; } @@ -746,32 +752,32 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { for (unsigned i = MinCSFrameIndex; i <= MaxCSFrameIndex; ++i) { // If the stack grows down, we need to add the size to find the lowest // address of the object. - Offset += MFI->getObjectSize(i); + Offset += MFI.getObjectSize(i); - unsigned Align = MFI->getObjectAlignment(i); + unsigned Align = MFI.getObjectAlignment(i); // Adjust to alignment boundary Offset = alignTo(Offset, Align, Skew); DEBUG(dbgs() << "alloc FI(" << i << ") at SP[" << -Offset << "]\n"); - MFI->setObjectOffset(i, -Offset); // Set the computed offset + MFI.setObjectOffset(i, -Offset); // Set the computed offset } } else if (MaxCSFrameIndex >= MinCSFrameIndex) { // Be careful about underflow in comparisons agains MinCSFrameIndex. for (unsigned i = MaxCSFrameIndex; i != MinCSFrameIndex - 1; --i) { - unsigned Align = MFI->getObjectAlignment(i); + unsigned Align = MFI.getObjectAlignment(i); // Adjust to alignment boundary Offset = alignTo(Offset, Align, Skew); DEBUG(dbgs() << "alloc FI(" << i << ") at SP[" << Offset << "]\n"); - MFI->setObjectOffset(i, Offset); - Offset += MFI->getObjectSize(i); + MFI.setObjectOffset(i, Offset); + Offset += MFI.getObjectSize(i); } } // FixedCSEnd is the stack offset to the end of the fixed and callee-save // stack area. int64_t FixedCSEnd = Offset; - unsigned MaxAlign = MFI->getMaxAlignment(); + unsigned MaxAlign = MFI.getMaxAlignment(); // Make sure the special register scavenging spill slot is closest to the // incoming stack pointer if a frame pointer is required and is closer @@ -793,8 +799,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // check for whether the frame is large enough to want to use virtual // frame index registers. Functions which don't want/need this optimization // will continue to use the existing code path. - if (MFI->getUseLocalStackAllocationBlock()) { - unsigned Align = MFI->getLocalFrameMaxAlign(); + if (MFI.getUseLocalStackAllocationBlock()) { + unsigned Align = MFI.getLocalFrameMaxAlign(); // Adjust to alignment boundary. Offset = alignTo(Offset, Align, Skew); @@ -802,15 +808,15 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n"); // Resolve offsets for objects in the local block. - for (unsigned i = 0, e = MFI->getLocalFrameObjectCount(); i != e; ++i) { - std::pair<int, int64_t> Entry = MFI->getLocalFrameObjectMap(i); + for (unsigned i = 0, e = MFI.getLocalFrameObjectCount(); i != e; ++i) { + std::pair<int, int64_t> Entry = MFI.getLocalFrameObjectMap(i); int64_t FIOffset = (StackGrowsDown ? -Offset : Offset) + Entry.second; DEBUG(dbgs() << "alloc FI(" << Entry.first << ") at SP[" << FIOffset << "]\n"); - MFI->setObjectOffset(Entry.first, FIOffset); + MFI.setObjectOffset(Entry.first, FIOffset); } // Allocate the local block - Offset += MFI->getLocalFrameSize(); + Offset += MFI.getLocalFrameSize(); MaxAlign = std::max(Align, MaxAlign); } @@ -823,30 +829,30 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // Make sure that the stack protector comes before the local variables on the // stack. SmallSet<int, 16> ProtectedObjs; - if (MFI->getStackProtectorIndex() >= 0) { + if (MFI.getStackProtectorIndex() >= 0) { StackObjSet LargeArrayObjs; StackObjSet SmallArrayObjs; StackObjSet AddrOfObjs; - AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), StackGrowsDown, + AdjustStackOffset(MFI, MFI.getStackProtectorIndex(), StackGrowsDown, Offset, MaxAlign, Skew); // Assign large stack objects first. - for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { - if (MFI->isObjectPreAllocated(i) && - MFI->getUseLocalStackAllocationBlock()) + for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) { + if (MFI.isObjectPreAllocated(i) && + MFI.getUseLocalStackAllocationBlock()) continue; if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex) continue; if (RS && RS->isScavengingFrameIndex((int)i)) continue; - if (MFI->isDeadObjectIndex(i)) + if (MFI.isDeadObjectIndex(i)) continue; - if (MFI->getStackProtectorIndex() == (int)i || + if (MFI.getStackProtectorIndex() == (int)i || EHRegNodeFrameIndex == (int)i) continue; - switch (SP->getSSPLayout(MFI->getObjectAllocation(i))) { + switch (SP->getSSPLayout(MFI.getObjectAllocation(i))) { case StackProtector::SSPLK_None: continue; case StackProtector::SSPLK_SmallArray: @@ -874,17 +880,16 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // Then prepare to assign frame offsets to stack objects that are not used to // spill callee saved registers. - for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { - if (MFI->isObjectPreAllocated(i) && - MFI->getUseLocalStackAllocationBlock()) + for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) { + if (MFI.isObjectPreAllocated(i) && MFI.getUseLocalStackAllocationBlock()) continue; if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex) continue; if (RS && RS->isScavengingFrameIndex((int)i)) continue; - if (MFI->isDeadObjectIndex(i)) + if (MFI.isDeadObjectIndex(i)) continue; - if (MFI->getStackProtectorIndex() == (int)i || + if (MFI.getStackProtectorIndex() == (int)i || EHRegNodeFrameIndex == (int)i) continue; if (ProtectedObjs.count(i)) @@ -911,7 +916,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { BitVector StackBytesFree; if (!ObjectsToAllocate.empty() && Fn.getTarget().getOptLevel() != CodeGenOpt::None && - MFI->getStackProtectorIndex() < 0 && TFI.enableStackSlotScavenging(Fn)) + MFI.getStackProtectorIndex() < 0 && TFI.enableStackSlotScavenging(Fn)) computeFreeStackSlots(MFI, StackGrowsDown, MinCSFrameIndex, MaxCSFrameIndex, FixedCSEnd, StackBytesFree); @@ -935,8 +940,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // If we have reserved argument space for call sites in the function // immediately on entry to the current function, count it as part of the // overall stack size. - if (MFI->adjustsStack() && TFI.hasReservedCallFrame(Fn)) - Offset += MFI->getMaxCallFrameSize(); + if (MFI.adjustsStack() && TFI.hasReservedCallFrame(Fn)) + Offset += MFI.getMaxCallFrameSize(); // Round up the size to a multiple of the alignment. If the function has // any calls or alloca's, align to the target's StackAlignment value to @@ -944,8 +949,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // otherwise, for leaf functions, align to the TransientStackAlignment // value. unsigned StackAlign; - if (MFI->adjustsStack() || MFI->hasVarSizedObjects() || - (RegInfo->needsStackRealignment(Fn) && MFI->getObjectIndexEnd() != 0)) + if (MFI.adjustsStack() || MFI.hasVarSizedObjects() || + (RegInfo->needsStackRealignment(Fn) && MFI.getObjectIndexEnd() != 0)) StackAlign = TFI.getStackAlignment(); else StackAlign = TFI.getTransientStackAlignment(); @@ -958,7 +963,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // Update frame info to pretend that this is part of the stack... int64_t StackSize = Offset - LocalAreaOffset; - MFI->setStackSize(StackSize); + MFI.setStackSize(StackSize); NumBytesStackSpace += StackSize; } @@ -1009,7 +1014,7 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { // Store SPAdj at exit of a basic block. SmallVector<int, 8> SPState; SPState.resize(Fn.getNumBlockIDs()); - SmallPtrSet<MachineBasicBlock*, 8> Reachable; + df_iterator_default_set<MachineBasicBlock*> Reachable; // Iterate over the reachable blocks in DFS order. for (auto DFI = df_ext_begin(&Fn, Reachable), DFE = df_ext_end(&Fn, Reachable); @@ -1047,7 +1052,8 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, unsigned FrameSetupOpcode = TII.getCallFrameSetupOpcode(); unsigned FrameDestroyOpcode = TII.getCallFrameDestroyOpcode(); - if (RS && !FrameIndexVirtualScavenging) RS->enterBasicBlock(*BB); + if (RS && FrameIndexEliminationScavenging) + RS->enterBasicBlock(*BB); bool InsideCallSequence = false; @@ -1116,7 +1122,7 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, // use that target machine register info object to eliminate // it. TRI.eliminateFrameIndex(MI, SPAdj, i, - FrameIndexVirtualScavenging ? nullptr : RS); + FrameIndexEliminationScavenging ? RS : nullptr); // Reset the iterator if we were at the beginning of the BB. if (AtBeginning) { @@ -1132,7 +1138,7 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, // the SP adjustment made by each instruction in the sequence. // This includes both the frame setup/destroy pseudos (handled above), // as well as other instructions that have side effects w.r.t the SP. - // Note that this must come after eliminateFrameIndex, because + // Note that this must come after eliminateFrameIndex, because // if I itself referred to a frame index, we shouldn't count its own // adjustment. if (DidFinishLoop && InsideCallSequence) @@ -1141,7 +1147,7 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, if (DoIncr && I != BB->end()) ++I; // Update register states. - if (RS && !FrameIndexVirtualScavenging && DidFinishLoop) + if (RS && FrameIndexEliminationScavenging && DidFinishLoop) RS->forward(MI); } } diff --git a/contrib/llvm/lib/CodeGen/RegAllocBase.cpp b/contrib/llvm/lib/CodeGen/RegAllocBase.cpp index 93eeb9c..fb49a93 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocBase.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocBase.cpp @@ -41,7 +41,8 @@ static cl::opt<bool, true> VerifyRegAlloc("verify-regalloc", cl::location(RegAllocBase::VerifyEnabled), cl::desc("Verify during register allocation")); -const char RegAllocBase::TimerGroupName[] = "Register Allocation"; +const char RegAllocBase::TimerGroupName[] = "regalloc"; +const char RegAllocBase::TimerGroupDescription[] = "Register Allocation"; bool RegAllocBase::VerifyEnabled = false; //===----------------------------------------------------------------------===// @@ -67,7 +68,8 @@ void RegAllocBase::init(VirtRegMap &vrm, // register, unify them with the corresponding LiveIntervalUnion, otherwise push // them on the priority queue for later assignment. void RegAllocBase::seedLiveRegs() { - NamedRegionTimer T("Seed Live Regs", TimerGroupName, TimePassesIsEnabled); + NamedRegionTimer T("seed", "Seed Live Regs", TimerGroupName, + TimerGroupDescription, TimePassesIsEnabled); for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { unsigned Reg = TargetRegisterInfo::index2VirtReg(i); if (MRI->reg_nodbg_empty(Reg)) @@ -143,6 +145,7 @@ void RegAllocBase::allocatePhysRegs() { continue; } DEBUG(dbgs() << "queuing new interval: " << *SplitVirtReg << "\n"); + assert(!SplitVirtReg->empty() && "expecting non-empty interval"); assert(TargetRegisterInfo::isVirtualRegister(SplitVirtReg->reg) && "expect split value in virtual register"); enqueue(SplitVirtReg); diff --git a/contrib/llvm/lib/CodeGen/RegAllocBase.h b/contrib/llvm/lib/CodeGen/RegAllocBase.h index 296ffe8..d8921b5 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocBase.h +++ b/contrib/llvm/lib/CodeGen/RegAllocBase.h @@ -105,6 +105,7 @@ protected: // Use this group name for NamedRegionTimer. static const char TimerGroupName[]; + static const char TimerGroupDescription[]; /// Method called when the allocator is about to remove a LiveInterval. virtual void aboutToRemoveInterval(LiveInterval &LI) {} diff --git a/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp b/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp index 11dfda6..a558e37 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp @@ -76,9 +76,7 @@ public: RABasic(); /// Return the pass name. - const char* getPassName() const override { - return "Basic Register Allocator"; - } + StringRef getPassName() const override { return "Basic Register Allocator"; } /// RABasic analysis usage. void getAnalysisUsage(AnalysisUsage &AU) const override; @@ -105,6 +103,11 @@ public: /// Perform register allocation. bool runOnMachineFunction(MachineFunction &mf) override; + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoPHIs); + } + // Helper for spilling all live virtual registers currently unified under preg // that interfere with the most recently queried lvr. Return true if spilling // was successful, and append any new spilled/split intervals to splitLVRs. diff --git a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp index 55fb33e..fd759bc 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp @@ -149,18 +149,21 @@ namespace { spillImpossible = ~0u }; public: - const char *getPassName() const override { - return "Fast Register Allocator"; - } + StringRef getPassName() const override { return "Fast Register Allocator"; } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); } + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoPHIs); + } + MachineFunctionProperties getSetProperties() const override { return MachineFunctionProperties().set( - MachineFunctionProperties::Property::AllVRegsAllocated); + MachineFunctionProperties::Property::NoVRegs); } private: @@ -209,8 +212,8 @@ int RAFast::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) { return SS; // Already has space allocated? // Allocate a new stack object for this spill location... - int FrameIdx = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(), - RC->getAlignment()); + int FrameIdx = MF->getFrameInfo().CreateSpillStackObject(RC->getSize(), + RC->getAlignment()); // Assign the slot. StackSlotForVirtReg[VirtReg] = FrameIdx; @@ -360,7 +363,7 @@ void RAFast::usePhysReg(MachineOperand &MO) { break; case regReserved: PhysRegState[PhysReg] = regFree; - // Fall through + LLVM_FALLTHROUGH; case regFree: MO.setIsKill(); return; @@ -389,7 +392,7 @@ void RAFast::usePhysReg(MachineOperand &MO) { assert((TRI->isSuperRegister(PhysReg, Alias) || TRI->isSuperRegister(Alias, PhysReg)) && "Instruction is not using a subregister of a reserved register"); - // Fall through. + LLVM_FALLTHROUGH; case regFree: if (TRI->isSuperRegister(PhysReg, Alias)) { // Leave the superregister in the working set. @@ -421,7 +424,7 @@ void RAFast::definePhysReg(MachineInstr &MI, unsigned PhysReg, break; default: spillVirtReg(MI, VirtReg); - // Fall through. + LLVM_FALLTHROUGH; case regFree: case regReserved: PhysRegState[PhysReg] = NewState; @@ -437,7 +440,7 @@ void RAFast::definePhysReg(MachineInstr &MI, unsigned PhysReg, break; default: spillVirtReg(MI, VirtReg); - // Fall through. + LLVM_FALLTHROUGH; case regFree: case regReserved: PhysRegState[Alias] = regDisabled; @@ -1093,8 +1096,6 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) { UsedInInstr.clear(); UsedInInstr.setUniverse(TRI->getNumRegUnits()); - assert(!MRI->isSSA() && "regalloc requires leaving SSA"); - // initialize the virtual->physical register map to have a 'null' // mapping for all virtual registers StackSlotForVirtReg.resize(MRI->getNumVirtRegs()); diff --git a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp index c4d4b1e..c47cfb1 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -61,8 +61,7 @@ static cl::opt<SplitEditor::ComplementSpillMode> SplitSpillMode( cl::desc("Spill mode for splitting live ranges"), cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"), clEnumValN(SplitEditor::SM_Size, "size", "Optimize for size"), - clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed"), - clEnumValEnd), + clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed")), cl::init(SplitEditor::SM_Speed)); static cl::opt<unsigned> @@ -318,9 +317,7 @@ public: RAGreedy(); /// Return the pass name. - const char* getPassName() const override { - return "Greedy Register Allocator"; - } + StringRef getPassName() const override { return "Greedy Register Allocator"; } /// RAGreedy analysis usage. void getAnalysisUsage(AnalysisUsage &AU) const override; @@ -334,6 +331,11 @@ public: /// Perform register allocation. bool runOnMachineFunction(MachineFunction &mf) override; + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoPHIs); + } + static char ID; private: @@ -421,6 +423,24 @@ private: } // end anonymous namespace char RAGreedy::ID = 0; +char &llvm::RAGreedyID = RAGreedy::ID; + +INITIALIZE_PASS_BEGIN(RAGreedy, "greedy", + "Greedy Register Allocator", false, false) +INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables) +INITIALIZE_PASS_DEPENDENCY(SlotIndexes) +INITIALIZE_PASS_DEPENDENCY(LiveIntervals) +INITIALIZE_PASS_DEPENDENCY(RegisterCoalescer) +INITIALIZE_PASS_DEPENDENCY(MachineScheduler) +INITIALIZE_PASS_DEPENDENCY(LiveStacks) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(VirtRegMap) +INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix) +INITIALIZE_PASS_DEPENDENCY(EdgeBundles) +INITIALIZE_PASS_DEPENDENCY(SpillPlacement) +INITIALIZE_PASS_END(RAGreedy, "greedy", + "Greedy Register Allocator", false, false) #ifndef NDEBUG const char *const RAGreedy::StageName[] = { @@ -444,19 +464,6 @@ FunctionPass* llvm::createGreedyRegisterAllocator() { } RAGreedy::RAGreedy(): MachineFunctionPass(ID) { - initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry()); - initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); - initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); - initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); - initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry()); - initializeMachineSchedulerPass(*PassRegistry::getPassRegistry()); - initializeLiveStacksPass(*PassRegistry::getPassRegistry()); - initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); - initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); - initializeVirtRegMapPass(*PassRegistry::getPassRegistry()); - initializeLiveRegMatrixPass(*PassRegistry::getPassRegistry()); - initializeEdgeBundlesPass(*PassRegistry::getPassRegistry()); - initializeSpillPlacementPass(*PassRegistry::getPassRegistry()); } void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { @@ -639,6 +646,9 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, evictInterference(VirtReg, Hint, NewVRegs); return Hint; } + // Record the missed hint, we may be able to recover + // at the end if the surrounding allocation changed. + SetOfBrokenHints.insert(&VirtReg); } // Try to evict interference from a cheaper alternative. @@ -859,7 +869,8 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, AllocationOrder &Order, SmallVectorImpl<unsigned> &NewVRegs, unsigned CostPerUseLimit) { - NamedRegionTimer T("Evict", TimerGroupName, TimePassesIsEnabled); + NamedRegionTimer T("evict", "Evict", TimerGroupName, TimerGroupDescription, + TimePassesIsEnabled); // Keep track of the cheapest interference seen so far. EvictionCost BestCost; @@ -1957,7 +1968,8 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, // Local intervals are handled separately. if (LIS->intervalIsInOneMBB(VirtReg)) { - NamedRegionTimer T("Local Splitting", TimerGroupName, TimePassesIsEnabled); + NamedRegionTimer T("local_split", "Local Splitting", TimerGroupName, + TimerGroupDescription, TimePassesIsEnabled); SA->analyze(&VirtReg); unsigned PhysReg = tryLocalSplit(VirtReg, Order, NewVRegs); if (PhysReg || !NewVRegs.empty()) @@ -1965,7 +1977,8 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, return tryInstructionSplit(VirtReg, Order, NewVRegs); } - NamedRegionTimer T("Global Splitting", TimerGroupName, TimePassesIsEnabled); + NamedRegionTimer T("global_split", "Global Splitting", TimerGroupName, + TimerGroupDescription, TimePassesIsEnabled); SA->analyze(&VirtReg); @@ -2103,6 +2116,7 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, // Mark VirtReg as fixed, i.e., it will not be recolored pass this point in // this recoloring "session". FixedRegisters.insert(VirtReg.reg); + SmallVector<unsigned, 4> CurrentNewVRegs; Order.rewind(); while (unsigned PhysReg = Order.next()) { @@ -2110,6 +2124,7 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, << PrintReg(PhysReg, TRI) << '\n'); RecoloringCandidates.clear(); VirtRegToPhysReg.clear(); + CurrentNewVRegs.clear(); // It is only possible to recolor virtual register interference. if (Matrix->checkInterference(VirtReg, PhysReg) > @@ -2154,8 +2169,11 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, // If we cannot recolor all the interferences, we will have to start again // at this point for the next physical register. SmallVirtRegSet SaveFixedRegisters(FixedRegisters); - if (tryRecoloringCandidates(RecoloringQueue, NewVRegs, FixedRegisters, - Depth)) { + if (tryRecoloringCandidates(RecoloringQueue, CurrentNewVRegs, + FixedRegisters, Depth)) { + // Push the queued vregs into the main queue. + for (unsigned NewVReg : CurrentNewVRegs) + NewVRegs.push_back(NewVReg); // Do not mess up with the global assignment process. // I.e., VirtReg must be unassigned. Matrix->unassign(VirtReg); @@ -2169,6 +2187,18 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, FixedRegisters = SaveFixedRegisters; Matrix->unassign(VirtReg); + // For a newly created vreg which is also in RecoloringCandidates, + // don't add it to NewVRegs because its physical register will be restored + // below. Other vregs in CurrentNewVRegs are created by calling + // selectOrSplit and should be added into NewVRegs. + for (SmallVectorImpl<unsigned>::iterator Next = CurrentNewVRegs.begin(), + End = CurrentNewVRegs.end(); + Next != End; ++Next) { + if (RecoloringCandidates.count(&LIS->getInterval(*Next))) + continue; + NewVRegs.push_back(*Next); + } + for (SmallLISet::iterator It = RecoloringCandidates.begin(), EndIt = RecoloringCandidates.end(); It != EndIt; ++It) { @@ -2201,10 +2231,21 @@ bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue, DEBUG(dbgs() << "Try to recolor: " << *LI << '\n'); unsigned PhysReg; PhysReg = selectOrSplitImpl(*LI, NewVRegs, FixedRegisters, Depth + 1); - if (PhysReg == ~0u || !PhysReg) + // When splitting happens, the live-range may actually be empty. + // In that case, this is okay to continue the recoloring even + // if we did not find an alternative color for it. Indeed, + // there will not be anything to color for LI in the end. + if (PhysReg == ~0u || (!PhysReg && !LI->empty())) return false; + + if (!PhysReg) { + assert(LI->empty() && "Only empty live-range do not require a register"); + DEBUG(dbgs() << "Recoloring of " << *LI << " succeeded. Empty LI.\n"); + continue; + } DEBUG(dbgs() << "Recoloring of " << *LI << " succeeded with: " << PrintReg(PhysReg, TRI) << '\n'); + Matrix->assign(*LI, PhysReg); FixedRegisters.insert(LI->reg); } @@ -2519,7 +2560,7 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, return PhysReg; } - assert(NewVRegs.empty() && "Cannot append to existing NewVRegs"); + assert((NewVRegs.empty() || Depth) && "Cannot append to existing NewVRegs"); // The first time we see a live range, don't try to split or spill. // Wait until the second time, when all smaller ranges have been allocated. @@ -2531,17 +2572,20 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, return 0; } + if (Stage < RS_Spill) { + // Try splitting VirtReg or interferences. + unsigned NewVRegSizeBefore = NewVRegs.size(); + unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs); + if (PhysReg || (NewVRegs.size() - NewVRegSizeBefore)) + return PhysReg; + } + // If we couldn't allocate a register from spilling, there is probably some // invalid inline assembly. The base class wil report it. if (Stage >= RS_Done || !VirtReg.isSpillable()) return tryLastChanceRecoloring(VirtReg, Order, NewVRegs, FixedRegisters, Depth); - // Try splitting VirtReg or interferences. - unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs); - if (PhysReg || !NewVRegs.empty()) - return PhysReg; - // Finally spill VirtReg itself. if (EnableDeferredSpilling && getStage(VirtReg) < RS_Memory) { // TODO: This is experimental and in particular, we do not model @@ -2552,7 +2596,8 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, DEBUG(dbgs() << "Do as if this register is in memory\n"); NewVRegs.push_back(VirtReg.reg); } else { - NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled); + NamedRegionTimer T("spill", "Spiller", TimerGroupName, + TimerGroupDescription, TimePassesIsEnabled); LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats); spiller().spill(LRE); setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done); diff --git a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp index d1221ec..101b30b 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp @@ -99,9 +99,7 @@ public: } /// Return the pass name. - const char* getPassName() const override { - return "PBQP Register Allocator"; - } + StringRef getPassName() const override { return "PBQP Register Allocator"; } /// PBQP analysis usage. void getAnalysisUsage(AnalysisUsage &au) const override; @@ -109,6 +107,11 @@ public: /// Perform register allocation bool runOnMachineFunction(MachineFunction &MF) override; + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoPHIs); + } + private: typedef std::map<const LiveInterval*, unsigned> LI2NodeMap; diff --git a/contrib/llvm/lib/CodeGen/RegUsageInfoCollector.cpp b/contrib/llvm/lib/CodeGen/RegUsageInfoCollector.cpp index 50b8854..ece44c2 100644 --- a/contrib/llvm/lib/CodeGen/RegUsageInfoCollector.cpp +++ b/contrib/llvm/lib/CodeGen/RegUsageInfoCollector.cpp @@ -48,7 +48,7 @@ public: initializeRegUsageInfoCollectorPass(Registry); } - const char *getPassName() const override { + StringRef getPassName() const override { return "Register Usage Information Collector Pass"; } @@ -57,10 +57,6 @@ public: bool runOnMachineFunction(MachineFunction &MF) override; static char ID; - -private: - void markRegClobbered(const TargetRegisterInfo *TRI, uint32_t *RegMask, - unsigned PReg); }; } // end of anonymous namespace @@ -76,13 +72,6 @@ FunctionPass *llvm::createRegUsageInfoCollector() { return new RegUsageInfoCollector(); } -void RegUsageInfoCollector::markRegClobbered(const TargetRegisterInfo *TRI, - uint32_t *RegMask, unsigned PReg) { - // If PReg is clobbered then all of its alias are also clobbered. - for (MCRegAliasIterator AI(PReg, TRI, true); AI.isValid(); ++AI) - RegMask[*AI / 32] &= ~(1u << *AI % 32); -} - void RegUsageInfoCollector::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<PhysicalRegisterUsageInfo>(); AU.setPreservesAll(); @@ -116,7 +105,7 @@ bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) { for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg) if (MRI->isPhysRegModified(PReg, true)) - markRegClobbered(TRI, &RegMask[0], PReg); + RegMask[PReg / 32] &= ~(1u << PReg % 32); if (!TargetFrameLowering::isSafeForNoCSROpt(F)) { const uint32_t *CallPreservedMask = diff --git a/contrib/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp b/contrib/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp index 7595661..5cc35bf 100644 --- a/contrib/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp +++ b/contrib/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp @@ -52,7 +52,7 @@ public: initializeRegUsageInfoPropagationPassPass(Registry); } - const char *getPassName() const override { return RUIP_NAME; } + StringRef getPassName() const override { return RUIP_NAME; } bool runOnMachineFunction(MachineFunction &MF) override; diff --git a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp index 617ece9..4bb3c22 100644 --- a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -815,14 +815,14 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, for (LiveInterval::SubRange &SB : IntB.subranges()) { LaneBitmask BMask = SB.LaneMask; LaneBitmask Common = BMask & AMask; - if (Common == 0) + if (Common.none()) continue; DEBUG( dbgs() << "\t\tCopy_Merge " << PrintLaneMask(BMask) << " into " << PrintLaneMask(Common) << '\n'); LaneBitmask BRest = BMask & ~AMask; LiveInterval::SubRange *CommonRange; - if (BRest != 0) { + if (BRest.any()) { SB.LaneMask = BRest; DEBUG(dbgs() << "\t\tReduce Lane to " << PrintLaneMask(BRest) << '\n'); @@ -841,7 +841,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, addSegmentsWithValNo(*CommonRange, BSubValNo, SA, ASubValNo); AMask &= ~BMask; } - if (AMask != 0) { + if (AMask.any()) { DEBUG(dbgs() << "\t\tNew Lane " << PrintLaneMask(AMask) << '\n'); LiveRange *NewRange = IntB.createSubRange(Allocator, AMask); VNInfo *BSubValNo = NewRange->getNextValue(CopyIdx, Allocator); @@ -975,6 +975,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, NewRC = CommonRC; DstIdx = 0; DefMO.setSubReg(0); + DefMO.setIsUndef(false); // Only subregs can have def+undef. } } } @@ -1060,7 +1061,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, SR.createDeadDef(DefIndex, Alloc); MaxMask &= ~SR.LaneMask; } - if (MaxMask != 0) { + if (MaxMask.any()) { LiveInterval::SubRange *SR = DstInt.createSubRange(Alloc, MaxMask); SR->createDeadDef(DefIndex, Alloc); } @@ -1153,7 +1154,7 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) { if (SrcSubIdx != 0 && SrcLI.hasSubRanges()) { LaneBitmask SrcMask = TRI->getSubRegIndexLaneMask(SrcSubIdx); for (const LiveInterval::SubRange &SR : SrcLI.subranges()) { - if ((SR.LaneMask & SrcMask) == 0) + if ((SR.LaneMask & SrcMask).none()) continue; if (SR.liveAt(Idx)) return false; @@ -1174,7 +1175,7 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) { // The affected subregister segments can be removed. LaneBitmask DstMask = TRI->getSubRegIndexLaneMask(DstSubIdx); for (LiveInterval::SubRange &SR : DstLI.subranges()) { - if ((SR.LaneMask & DstMask) == 0) + if ((SR.LaneMask & DstMask).none()) continue; VNInfo *SVNI = SR.getVNInfoAt(RegIndex); @@ -1193,10 +1194,10 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) { SlotIndex UseIdx = LIS->getInstructionIndex(MI); LaneBitmask UseMask = TRI->getSubRegIndexLaneMask(MO.getSubReg()); bool isLive; - if (UseMask != ~0u && DstLI.hasSubRanges()) { + if (!UseMask.all() && DstLI.hasSubRanges()) { isLive = false; for (const LiveInterval::SubRange &SR : DstLI.subranges()) { - if ((SR.LaneMask & UseMask) == 0) + if ((SR.LaneMask & UseMask).none()) continue; if (SR.liveAt(UseIdx)) { isLive = true; @@ -1210,6 +1211,17 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) { MO.setIsUndef(true); DEBUG(dbgs() << "\tnew undef: " << UseIdx << '\t' << MI); } + + // A def of a subregister may be a use of the other subregisters, so + // deleting a def of a subregister may also remove uses. Since CopyMI + // is still part of the function (but about to be erased), mark all + // defs of DstReg in it as <undef>, so that shrinkToUses would + // ignore them. + for (MachineOperand &MO : CopyMI->operands()) + if (MO.isReg() && MO.isDef() && MO.getReg() == DstReg) + MO.setIsUndef(true); + LIS->shrinkToUses(&DstLI); + return true; } @@ -1220,7 +1232,7 @@ void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx, Mask = ~Mask; bool IsUndef = true; for (const LiveInterval::SubRange &S : Int.subranges()) { - if ((S.LaneMask & Mask) == 0) + if ((S.LaneMask & Mask).none()) continue; if (S.liveAt(UseIdx)) { IsUndef = false; @@ -1446,7 +1458,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { }); } - ShrinkMask = 0; + ShrinkMask = LaneBitmask::getNone(); ShrinkMainRange = false; // Okay, attempt to join these two intervals. On failure, this returns false. @@ -1504,10 +1516,10 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx()); // Shrink subregister ranges if necessary. - if (ShrinkMask != 0) { + if (ShrinkMask.any()) { LiveInterval &LI = LIS->getInterval(CP.getDstReg()); for (LiveInterval::SubRange &S : LI.subranges()) { - if ((S.LaneMask & ShrinkMask) == 0) + if ((S.LaneMask & ShrinkMask).none()) continue; DEBUG(dbgs() << "Shrink LaneUses (Lane " << PrintLaneMask(S.LaneMask) << ")\n"); @@ -1544,9 +1556,10 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) { unsigned DstReg = CP.getDstReg(); + unsigned SrcReg = CP.getSrcReg(); assert(CP.isPhys() && "Must be a physreg copy"); assert(MRI->isReserved(DstReg) && "Not a reserved register"); - LiveInterval &RHS = LIS->getInterval(CP.getSrcReg()); + LiveInterval &RHS = LIS->getInterval(SrcReg); DEBUG(dbgs() << "\t\tRHS = " << RHS << '\n'); assert(RHS.containsOneValue() && "Invalid join with reserved register"); @@ -1558,11 +1571,19 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) { // Deny any overlapping intervals. This depends on all the reserved // register live ranges to look like dead defs. - for (MCRegUnitIterator UI(DstReg, TRI); UI.isValid(); ++UI) - if (RHS.overlaps(LIS->getRegUnit(*UI))) { - DEBUG(dbgs() << "\t\tInterference: " << PrintRegUnit(*UI, TRI) << '\n'); - return false; + if (!MRI->isConstantPhysReg(DstReg)) { + for (MCRegUnitIterator UI(DstReg, TRI); UI.isValid(); ++UI) { + // Abort if not all the regunits are reserved. + for (MCRegUnitRootIterator RI(*UI, TRI); RI.isValid(); ++RI) { + if (!MRI->isReserved(*RI)) + return false; + } + if (RHS.overlaps(LIS->getRegUnit(*UI))) { + DEBUG(dbgs() << "\t\tInterference: " << PrintRegUnit(*UI, TRI) << '\n'); + return false; + } } + } // Skip any value computations, we are not adding new values to the // reserved register. Also skip merging the live ranges, the reserved @@ -1572,43 +1593,64 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) { // Delete the identity copy. MachineInstr *CopyMI; if (CP.isFlipped()) { - CopyMI = MRI->getVRegDef(RHS.reg); + // Physreg is copied into vreg + // %vregY = COPY %X + // ... //< no other def of %X here + // use %vregY + // => + // ... + // use %X + CopyMI = MRI->getVRegDef(SrcReg); } else { - if (!MRI->hasOneNonDBGUse(RHS.reg)) { + // VReg is copied into physreg: + // %vregX = def + // ... //< no other def or use of %Y here + // %Y = COPY %vregX + // => + // %Y = def + // ... + if (!MRI->hasOneNonDBGUse(SrcReg)) { DEBUG(dbgs() << "\t\tMultiple vreg uses!\n"); return false; } - MachineInstr *DestMI = MRI->getVRegDef(RHS.reg); - CopyMI = &*MRI->use_instr_nodbg_begin(RHS.reg); - const SlotIndex CopyRegIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot(); - const SlotIndex DestRegIdx = LIS->getInstructionIndex(*DestMI).getRegSlot(); - - // We checked above that there are no interfering defs of the physical - // register. However, for this case, where we intent to move up the def of - // the physical register, we also need to check for interfering uses. - SlotIndexes *Indexes = LIS->getSlotIndexes(); - for (SlotIndex SI = Indexes->getNextNonNullIndex(DestRegIdx); - SI != CopyRegIdx; SI = Indexes->getNextNonNullIndex(SI)) { - MachineInstr *MI = LIS->getInstructionFromIndex(SI); - if (MI->readsRegister(DstReg, TRI)) { - DEBUG(dbgs() << "\t\tInterference (read): " << *MI); - return false; - } + if (!LIS->intervalIsInOneMBB(RHS)) { + DEBUG(dbgs() << "\t\tComplex control flow!\n"); + return false; + } - // We must also check for clobbers caused by regmasks. - for (const auto &MO : MI->operands()) { - if (MO.isRegMask() && MO.clobbersPhysReg(DstReg)) { - DEBUG(dbgs() << "\t\tInterference (regmask clobber): " << *MI); + MachineInstr &DestMI = *MRI->getVRegDef(SrcReg); + CopyMI = &*MRI->use_instr_nodbg_begin(SrcReg); + SlotIndex CopyRegIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot(); + SlotIndex DestRegIdx = LIS->getInstructionIndex(DestMI).getRegSlot(); + + if (!MRI->isConstantPhysReg(DstReg)) { + // We checked above that there are no interfering defs of the physical + // register. However, for this case, where we intent to move up the def of + // the physical register, we also need to check for interfering uses. + SlotIndexes *Indexes = LIS->getSlotIndexes(); + for (SlotIndex SI = Indexes->getNextNonNullIndex(DestRegIdx); + SI != CopyRegIdx; SI = Indexes->getNextNonNullIndex(SI)) { + MachineInstr *MI = LIS->getInstructionFromIndex(SI); + if (MI->readsRegister(DstReg, TRI)) { + DEBUG(dbgs() << "\t\tInterference (read): " << *MI); return false; } + + // We must also check for clobbers caused by regmasks. + for (const auto &MO : MI->operands()) { + if (MO.isRegMask() && MO.clobbersPhysReg(DstReg)) { + DEBUG(dbgs() << "\t\tInterference (regmask clobber): " << *MI); + return false; + } + } } } // We're going to remove the copy which defines a physical reserved // register, so remove its valno, etc. - DEBUG(dbgs() << "\t\tRemoving phys reg def of " << DstReg << " at " - << CopyRegIdx << "\n"); + DEBUG(dbgs() << "\t\tRemoving phys reg def of " << PrintReg(DstReg, TRI) + << " at " << CopyRegIdx << "\n"); LIS->removePhysRegDefAt(DstReg, CopyRegIdx); // Create a new dead def at the new def location. @@ -1795,11 +1837,11 @@ class JoinVals { /// True once Pruned above has been computed. bool PrunedComputed; - Val() : Resolution(CR_Keep), WriteLanes(0), ValidLanes(0), + Val() : Resolution(CR_Keep), WriteLanes(), ValidLanes(), RedefVNI(nullptr), OtherVNI(nullptr), ErasableImplicitDef(false), Pruned(false), PrunedComputed(false) {} - bool isAnalyzed() const { return WriteLanes != 0; } + bool isAnalyzed() const { return WriteLanes.any(); } }; /// One entry per value number in LI. @@ -1889,12 +1931,22 @@ public: /// no useful information and can be removed. void pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask); + /// Pruning values in subranges can lead to removing segments in these + /// subranges started by IMPLICIT_DEFs. The corresponding segments in + /// the main range also need to be removed. This function will mark + /// the corresponding values in the main range as pruned, so that + /// eraseInstrs can do the final cleanup. + /// The parameter @p LI must be the interval whose main range is the + /// live range LR. + void pruneMainSegments(LiveInterval &LI, bool &ShrinkMainRange); + /// Erase any machine instructions that have been coalesced away. /// Add erased instructions to ErasedInstrs. /// Add foreign virtual registers to ShrinkRegs if their live range ended at /// the erased instrs. void eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs, - SmallVectorImpl<unsigned> &ShrinkRegs); + SmallVectorImpl<unsigned> &ShrinkRegs, + LiveInterval *LI = nullptr); /// Remove liverange defs at places where implicit defs will be removed. void removeImplicitDefs(); @@ -1906,7 +1958,7 @@ public: LaneBitmask JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef) const { - LaneBitmask L = 0; + LaneBitmask L; for (const MachineOperand &MO : DefMI->operands()) { if (!MO.isReg() || MO.getReg() != Reg || !MO.isDef()) continue; @@ -1944,7 +1996,7 @@ std::pair<const VNInfo*, unsigned> JoinVals::followCopyChain( for (const LiveInterval::SubRange &S : LI.subranges()) { // Transform lanemask to a mask in the joined live interval. LaneBitmask SMask = TRI->composeSubRegIndexLaneMask(SubIdx, S.LaneMask); - if ((SMask & LaneMask) == 0) + if ((SMask & LaneMask).none()) continue; LiveQueryResult LRQ = S.Query(Def); ValueIn = LRQ.valueIn(); @@ -1984,7 +2036,7 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { assert(!V.isAnalyzed() && "Value has already been analyzed!"); VNInfo *VNI = LR.getValNumInfo(ValNo); if (VNI->isUnused()) { - V.WriteLanes = ~0u; + V.WriteLanes = LaneBitmask::getAll(); return CR_Keep; } @@ -1992,16 +2044,17 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { const MachineInstr *DefMI = nullptr; if (VNI->isPHIDef()) { // Conservatively assume that all lanes in a PHI are valid. - LaneBitmask Lanes = SubRangeJoin ? 1 : TRI->getSubRegIndexLaneMask(SubIdx); + LaneBitmask Lanes = SubRangeJoin ? LaneBitmask(1) + : TRI->getSubRegIndexLaneMask(SubIdx); V.ValidLanes = V.WriteLanes = Lanes; } else { DefMI = Indexes->getInstructionFromIndex(VNI->def); assert(DefMI != nullptr); if (SubRangeJoin) { // We don't care about the lanes when joining subregister ranges. - V.WriteLanes = V.ValidLanes = 1; + V.WriteLanes = V.ValidLanes = LaneBitmask(1); if (DefMI->isImplicitDef()) { - V.ValidLanes = 0; + V.ValidLanes = LaneBitmask::getNone(); V.ErasableImplicitDef = true; } } else { @@ -2074,7 +2127,7 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { // predecessor, the PHI itself can't introduce any conflicts. if (VNI->isPHIDef()) return CR_Merge; - if (V.ValidLanes & OtherV.ValidLanes) + if ((V.ValidLanes & OtherV.ValidLanes).any()) // Overlapping lanes can't be resolved. return CR_Impossible; else @@ -2119,7 +2172,7 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { // We need the def for the subregister if there is nothing else live at the // subrange at this point. if (TrackSubRegLiveness - && (V.WriteLanes & (OtherV.ValidLanes | OtherV.WriteLanes)) == 0) + && (V.WriteLanes & (OtherV.ValidLanes | OtherV.WriteLanes)).none()) return CR_Replace; return CR_Erase; } @@ -2159,7 +2212,7 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { // // Here OtherVNI will map to itself in [1;2), but to VNI in [2;5). CR_Replace // handles this complex value mapping. - if ((V.WriteLanes & OtherV.ValidLanes) == 0) + if ((V.WriteLanes & OtherV.ValidLanes).none()) return CR_Replace; // If the other live range is killed by DefMI and the live ranges are still @@ -2180,7 +2233,7 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { // possibility that no instructions actually read the clobbered lanes. // If we're clobbering all the lanes in OtherVNI, at least one must be read. // Otherwise Other.RI wouldn't be live here. - if ((TRI->getSubRegIndexLaneMask(Other.SubIdx) & ~V.WriteLanes) == 0) + if ((TRI->getSubRegIndexLaneMask(Other.SubIdx) & ~V.WriteLanes).none()) return CR_Impossible; // We need to verify that no instructions are reading the clobbered lanes. To @@ -2228,11 +2281,11 @@ void JoinVals::computeAssignment(unsigned ValNo, JoinVals &Other) { Val &OtherV = Other.Vals[V.OtherVNI->id]; // We cannot erase an IMPLICIT_DEF if we don't have valid values for all // its lanes. - if ((OtherV.WriteLanes & ~V.ValidLanes) != 0 && TrackSubRegLiveness) + if ((OtherV.WriteLanes & ~V.ValidLanes).any() && TrackSubRegLiveness) OtherV.ErasableImplicitDef = false; OtherV.Pruned = true; + LLVM_FALLTHROUGH; } - // Fall through. default: // This value number needs to go in the final joined live range. Assignments[ValNo] = NewVNInfo.size(); @@ -2289,7 +2342,7 @@ taintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other, TaintedLanes &= ~OV.WriteLanes; if (!OV.RedefVNI) break; - } while (TaintedLanes); + } while (TaintedLanes.any()); return true; } @@ -2302,8 +2355,8 @@ bool JoinVals::usesLanes(const MachineInstr &MI, unsigned Reg, unsigned SubIdx, continue; if (!MO.readsReg()) continue; - if (Lanes & TRI->getSubRegIndexLaneMask( - TRI->composeSubRegIndices(SubIdx, MO.getSubReg()))) + unsigned S = TRI->composeSubRegIndices(SubIdx, MO.getSubReg()); + if ((Lanes & TRI->getSubRegIndexLaneMask(S)).any()) return true; } return false; @@ -2350,7 +2403,7 @@ bool JoinVals::resolveConflicts(JoinVals &Other) { Indexes->getInstructionFromIndex(TaintExtent.front().first); assert(LastMI && "Range must end at a proper instruction"); unsigned TaintNum = 0; - for(;;) { + for (;;) { assert(MI != MBB->end() && "Bad LastMI"); if (usesLanes(*MI, Other.Reg, Other.SubIdx, TaintedLanes)) { DEBUG(dbgs() << "\t\ttainted lanes used by: " << *MI); @@ -2415,7 +2468,8 @@ void JoinVals::pruneValues(JoinVals &Other, for (MachineOperand &MO : Indexes->getInstructionFromIndex(Def)->operands()) { if (MO.isReg() && MO.isDef() && MO.getReg() == Reg) { - MO.setIsUndef(EraseImpDef); + if (MO.getSubReg() != 0) + MO.setIsUndef(EraseImpDef); MO.setIsDead(false); } } @@ -2448,8 +2502,7 @@ void JoinVals::pruneValues(JoinVals &Other, } } -void JoinVals::pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask) -{ +void JoinVals::pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask) { // Look for values being erased. bool DidPrune = false; for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) { @@ -2486,6 +2539,30 @@ void JoinVals::pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask) LI.removeEmptySubRanges(); } +/// Check if any of the subranges of @p LI contain a definition at @p Def. +static bool isDefInSubRange(LiveInterval &LI, SlotIndex Def) { + for (LiveInterval::SubRange &SR : LI.subranges()) { + if (VNInfo *VNI = SR.Query(Def).valueOutOrDead()) + if (VNI->def == Def) + return true; + } + return false; +} + +void JoinVals::pruneMainSegments(LiveInterval &LI, bool &ShrinkMainRange) { + assert(&static_cast<LiveRange&>(LI) == &LR); + + for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) { + if (Vals[i].Resolution != CR_Keep) + continue; + VNInfo *VNI = LR.getValNumInfo(i); + if (VNI->isUnused() || VNI->isPHIDef() || isDefInSubRange(LI, VNI->def)) + continue; + Vals[i].Pruned = true; + ShrinkMainRange = true; + } +} + void JoinVals::removeImplicitDefs() { for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) { Val &V = Vals[i]; @@ -2499,7 +2576,8 @@ void JoinVals::removeImplicitDefs() { } void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs, - SmallVectorImpl<unsigned> &ShrinkRegs) { + SmallVectorImpl<unsigned> &ShrinkRegs, + LiveInterval *LI) { for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) { // Get the def location before markUnused() below invalidates it. SlotIndex Def = LR.getValNumInfo(i)->def; @@ -2511,13 +2589,65 @@ void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs, if (!Vals[i].ErasableImplicitDef || !Vals[i].Pruned) break; // Remove value number i from LR. + // For intervals with subranges, removing a segment from the main range + // may require extending the previous segment: for each definition of + // a subregister, there will be a corresponding def in the main range. + // That def may fall in the middle of a segment from another subrange. + // In such cases, removing this def from the main range must be + // complemented by extending the main range to account for the liveness + // of the other subrange. VNInfo *VNI = LR.getValNumInfo(i); + SlotIndex Def = VNI->def; + // The new end point of the main range segment to be extended. + SlotIndex NewEnd; + if (LI != nullptr) { + LiveRange::iterator I = LR.FindSegmentContaining(Def); + assert(I != LR.end()); + // Do not extend beyond the end of the segment being removed. + // The segment may have been pruned in preparation for joining + // live ranges. + NewEnd = I->end; + } + LR.removeValNo(VNI); // Note that this VNInfo is reused and still referenced in NewVNInfo, // make it appear like an unused value number. VNI->markUnused(); - DEBUG(dbgs() << "\t\tremoved " << i << '@' << Def << ": " << LR << '\n'); - // FALL THROUGH. + + if (LI != nullptr && LI->hasSubRanges()) { + assert(static_cast<LiveRange*>(LI) == &LR); + // Determine the end point based on the subrange information: + // minimum of (earliest def of next segment, + // latest end point of containing segment) + SlotIndex ED, LE; + for (LiveInterval::SubRange &SR : LI->subranges()) { + LiveRange::iterator I = SR.find(Def); + if (I == SR.end()) + continue; + if (I->start > Def) + ED = ED.isValid() ? std::min(ED, I->start) : I->start; + else + LE = LE.isValid() ? std::max(LE, I->end) : I->end; + } + if (LE.isValid()) + NewEnd = std::min(NewEnd, LE); + if (ED.isValid()) + NewEnd = std::min(NewEnd, ED); + + // We only want to do the extension if there was a subrange that + // was live across Def. + if (LE.isValid()) { + LiveRange::iterator S = LR.find(Def); + if (S != LR.begin()) + std::prev(S)->end = NewEnd; + } + } + DEBUG({ + dbgs() << "\t\tremoved " << i << '@' << Def << ": " << LR << '\n'; + if (LI != nullptr) + dbgs() << "\t\t LHS = " << *LI << '\n'; + }); + LLVM_FALLTHROUGH; } case CR_Erase: { @@ -2591,8 +2721,15 @@ void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange, // Recompute the parts of the live range we had to remove because of // CR_Replace conflicts. - DEBUG(dbgs() << "\t\trestoring liveness to " << EndPoints.size() - << " points: " << LRange << '\n'); + DEBUG({ + dbgs() << "\t\trestoring liveness to " << EndPoints.size() << " points: "; + for (unsigned i = 0, n = EndPoints.size(); i != n; ++i) { + dbgs() << EndPoints[i]; + if (i != n-1) + dbgs() << ','; + } + dbgs() << ": " << LRange << '\n'; + }); LIS->extendToIndices(LRange, EndPoints); } @@ -2606,7 +2743,7 @@ void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI, // LaneMask of subregisters common to subrange R and ToMerge. LaneBitmask Common = RMask & LaneMask; // There is nothing to do without common subregs. - if (Common == 0) + if (Common.none()) continue; DEBUG(dbgs() << "\t\tCopy+Merge " << PrintLaneMask(RMask) << " into " @@ -2615,7 +2752,7 @@ void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI, // they have to split into their own subrange. LaneBitmask LRest = RMask & ~LaneMask; LiveInterval::SubRange *CommonRange; - if (LRest != 0) { + if (LRest.any()) { R.LaneMask = LRest; DEBUG(dbgs() << "\t\tReduce Lane to " << PrintLaneMask(LRest) << '\n'); // Duplicate SubRange for newly merged common stuff. @@ -2630,7 +2767,7 @@ void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI, LaneMask &= ~RMask; } - if (LaneMask != 0) { + if (LaneMask.any()) { DEBUG(dbgs() << "\t\tNew Lane " << PrintLaneMask(LaneMask) << '\n'); LI.createSubRangeFrom(Allocator, LaneMask, ToMerge); } @@ -2641,10 +2778,10 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { LiveInterval &RHS = LIS->getInterval(CP.getSrcReg()); LiveInterval &LHS = LIS->getInterval(CP.getDstReg()); bool TrackSubRegLiveness = MRI->shouldTrackSubRegLiveness(*CP.getNewRC()); - JoinVals RHSVals(RHS, CP.getSrcReg(), CP.getSrcIdx(), 0, NewVNInfo, CP, LIS, - TRI, false, TrackSubRegLiveness); - JoinVals LHSVals(LHS, CP.getDstReg(), CP.getDstIdx(), 0, NewVNInfo, CP, LIS, - TRI, false, TrackSubRegLiveness); + JoinVals RHSVals(RHS, CP.getSrcReg(), CP.getSrcIdx(), LaneBitmask::getNone(), + NewVNInfo, CP, LIS, TRI, false, TrackSubRegLiveness); + JoinVals LHSVals(LHS, CP.getDstReg(), CP.getDstIdx(), LaneBitmask::getNone(), + NewVNInfo, CP, LIS, TRI, false, TrackSubRegLiveness); DEBUG(dbgs() << "\t\tRHS = " << RHS << "\n\t\tLHS = " << LHS @@ -2670,7 +2807,7 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { LaneBitmask Mask = DstIdx == 0 ? CP.getNewRC()->getLaneMask() : TRI->getSubRegIndexLaneMask(DstIdx); // LHS must support subregs or we wouldn't be in this codepath. - assert(Mask != 0); + assert(Mask.any()); LHS.createSubRangeFrom(Allocator, Mask, LHS); } else if (DstIdx != 0) { // Transform LHS lanemasks to new register class if necessary. @@ -2697,6 +2834,10 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { } DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n"); + // Pruning implicit defs from subranges may result in the main range + // having stale segments. + LHSVals.pruneMainSegments(LHS, ShrinkMainRange); + LHSVals.pruneSubRegValues(LHS, ShrinkMask); RHSVals.pruneSubRegValues(LHS, ShrinkMask); } @@ -2712,7 +2853,7 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { // Erase COPY and IMPLICIT_DEF instructions. This may cause some external // registers to require trimming. SmallVector<unsigned, 8> ShrinkRegs; - LHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs); + LHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs, &LHS); RHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs); while (!ShrinkRegs.empty()) shrinkToUses(&LIS->getInterval(ShrinkRegs.pop_back_val())); @@ -2729,8 +2870,15 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { if (!EndPoints.empty()) { // Recompute the parts of the live range we had to remove because of // CR_Replace conflicts. - DEBUG(dbgs() << "\t\trestoring liveness to " << EndPoints.size() - << " points: " << LHS << '\n'); + DEBUG({ + dbgs() << "\t\trestoring liveness to " << EndPoints.size() << " points: "; + for (unsigned i = 0, n = EndPoints.size(); i != n; ++i) { + dbgs() << EndPoints[i]; + if (i != n-1) + dbgs() << ','; + } + dbgs() << ": " << LHS << '\n'; + }); LIS->extendToIndices((LiveRange&)LHS, EndPoints); } @@ -3039,7 +3187,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { // If subranges are still supported, then the same subregs // should still be supported. for (LiveInterval::SubRange &S : LI.subranges()) { - assert((S.LaneMask & ~MaxMask) == 0); + assert((S.LaneMask & ~MaxMask).none()); } #endif } diff --git a/contrib/llvm/lib/CodeGen/RegisterPressure.cpp b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp index a21d6c1..fc84aeb 100644 --- a/contrib/llvm/lib/CodeGen/RegisterPressure.cpp +++ b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp @@ -26,8 +26,8 @@ using namespace llvm; static void increaseSetPressure(std::vector<unsigned> &CurrSetPressure, const MachineRegisterInfo &MRI, unsigned Reg, LaneBitmask PrevMask, LaneBitmask NewMask) { - assert((PrevMask & ~NewMask) == 0 && "Must not remove bits"); - if (PrevMask != 0 || NewMask == 0) + assert((PrevMask & ~NewMask).none() && "Must not remove bits"); + if (PrevMask.any() || NewMask.none()) return; PSetIterator PSetI = MRI.getPressureSets(Reg); @@ -40,8 +40,8 @@ static void increaseSetPressure(std::vector<unsigned> &CurrSetPressure, static void decreaseSetPressure(std::vector<unsigned> &CurrSetPressure, const MachineRegisterInfo &MRI, unsigned Reg, LaneBitmask PrevMask, LaneBitmask NewMask) { - assert((NewMask & !PrevMask) == 0 && "Must not add bits"); - if (NewMask != 0 || PrevMask == 0) + //assert((NewMask & !PrevMask) == 0 && "Must not add bits"); + if (NewMask.any() || PrevMask.none()) return; PSetIterator PSetI = MRI.getPressureSets(Reg); @@ -73,7 +73,7 @@ void RegisterPressure::dump(const TargetRegisterInfo *TRI) const { dbgs() << "Live In: "; for (const RegisterMaskPair &P : LiveInRegs) { dbgs() << PrintVRegOrUnit(P.RegUnit, TRI); - if (P.LaneMask != ~0u) + if (!P.LaneMask.all()) dbgs() << ':' << PrintLaneMask(P.LaneMask); dbgs() << ' '; } @@ -81,7 +81,7 @@ void RegisterPressure::dump(const TargetRegisterInfo *TRI) const { dbgs() << "Live Out: "; for (const RegisterMaskPair &P : LiveOutRegs) { dbgs() << PrintVRegOrUnit(P.RegUnit, TRI); - if (P.LaneMask != ~0u) + if (!P.LaneMask.all()) dbgs() << ':' << PrintLaneMask(P.LaneMask); dbgs() << ' '; } @@ -112,7 +112,7 @@ void PressureDiff::dump(const TargetRegisterInfo &TRI) const { void RegPressureTracker::increaseRegPressure(unsigned RegUnit, LaneBitmask PreviousMask, LaneBitmask NewMask) { - if (PreviousMask != 0 || NewMask == 0) + if (PreviousMask.any() || NewMask.none()) return; PSetIterator PSetI = MRI->getPressureSets(RegUnit); @@ -266,9 +266,8 @@ bool RegPressureTracker::isBottomClosed() const { SlotIndex RegPressureTracker::getCurrSlot() const { - MachineBasicBlock::const_iterator IdxPos = CurrPos; - while (IdxPos != MBB->end() && IdxPos->isDebugValue()) - ++IdxPos; + MachineBasicBlock::const_iterator IdxPos = + skipDebugInstructionsForward(CurrPos, MBB->end()); if (IdxPos == MBB->end()) return LIS->getMBBEndIdx(MBB); return LIS->getInstructionIndex(*IdxPos).getRegSlot(); @@ -322,29 +321,28 @@ void RegPressureTracker::initLiveThru(const RegPressureTracker &RPTracker) { unsigned RegUnit = Pair.RegUnit; if (TargetRegisterInfo::isVirtualRegister(RegUnit) && !RPTracker.hasUntiedDef(RegUnit)) - increaseSetPressure(LiveThruPressure, *MRI, RegUnit, 0, Pair.LaneMask); + increaseSetPressure(LiveThruPressure, *MRI, RegUnit, + LaneBitmask::getNone(), Pair.LaneMask); } } static LaneBitmask getRegLanes(ArrayRef<RegisterMaskPair> RegUnits, unsigned RegUnit) { - auto I = std::find_if(RegUnits.begin(), RegUnits.end(), - [RegUnit](const RegisterMaskPair Other) { - return Other.RegUnit == RegUnit; - }); + auto I = find_if(RegUnits, [RegUnit](const RegisterMaskPair Other) { + return Other.RegUnit == RegUnit; + }); if (I == RegUnits.end()) - return 0; + return LaneBitmask::getNone(); return I->LaneMask; } static void addRegLanes(SmallVectorImpl<RegisterMaskPair> &RegUnits, RegisterMaskPair Pair) { unsigned RegUnit = Pair.RegUnit; - assert(Pair.LaneMask != 0); - auto I = std::find_if(RegUnits.begin(), RegUnits.end(), - [RegUnit](const RegisterMaskPair Other) { - return Other.RegUnit == RegUnit; - }); + assert(Pair.LaneMask.any()); + auto I = find_if(RegUnits, [RegUnit](const RegisterMaskPair Other) { + return Other.RegUnit == RegUnit; + }); if (I == RegUnits.end()) { RegUnits.push_back(Pair); } else { @@ -354,28 +352,26 @@ static void addRegLanes(SmallVectorImpl<RegisterMaskPair> &RegUnits, static void setRegZero(SmallVectorImpl<RegisterMaskPair> &RegUnits, unsigned RegUnit) { - auto I = std::find_if(RegUnits.begin(), RegUnits.end(), - [RegUnit](const RegisterMaskPair Other) { - return Other.RegUnit == RegUnit; - }); + auto I = find_if(RegUnits, [RegUnit](const RegisterMaskPair Other) { + return Other.RegUnit == RegUnit; + }); if (I == RegUnits.end()) { - RegUnits.push_back(RegisterMaskPair(RegUnit, 0)); + RegUnits.push_back(RegisterMaskPair(RegUnit, LaneBitmask::getNone())); } else { - I->LaneMask = 0; + I->LaneMask = LaneBitmask::getNone(); } } static void removeRegLanes(SmallVectorImpl<RegisterMaskPair> &RegUnits, RegisterMaskPair Pair) { unsigned RegUnit = Pair.RegUnit; - assert(Pair.LaneMask != 0); - auto I = std::find_if(RegUnits.begin(), RegUnits.end(), - [RegUnit](const RegisterMaskPair Other) { - return Other.RegUnit == RegUnit; - }); + assert(Pair.LaneMask.any()); + auto I = find_if(RegUnits, [RegUnit](const RegisterMaskPair Other) { + return Other.RegUnit == RegUnit; + }); if (I != RegUnits.end()) { I->LaneMask &= ~Pair.LaneMask; - if (I->LaneMask == 0) + if (I->LaneMask.none()) RegUnits.erase(I); } } @@ -386,14 +382,15 @@ static LaneBitmask getLanesWithProperty(const LiveIntervals &LIS, bool(*Property)(const LiveRange &LR, SlotIndex Pos)) { if (TargetRegisterInfo::isVirtualRegister(RegUnit)) { const LiveInterval &LI = LIS.getInterval(RegUnit); - LaneBitmask Result = 0; + LaneBitmask Result; if (TrackLaneMasks && LI.hasSubRanges()) { for (const LiveInterval::SubRange &SR : LI.subranges()) { if (Property(SR, Pos)) Result |= SR.LaneMask; } } else if (Property(LI, Pos)) { - Result = TrackLaneMasks ? MRI.getMaxLaneMaskForVReg(RegUnit) : ~0u; + Result = TrackLaneMasks ? MRI.getMaxLaneMaskForVReg(RegUnit) + : LaneBitmask::getAll(); } return Result; @@ -403,7 +400,7 @@ static LaneBitmask getLanesWithProperty(const LiveIntervals &LIS, // for physical registers on targets with many registers (GPUs). if (LR == nullptr) return SafeDefault; - return Property(*LR, Pos) ? ~0u : 0; + return Property(*LR, Pos) ? LaneBitmask::getAll() : LaneBitmask::getNone(); } } @@ -411,7 +408,8 @@ static LaneBitmask getLiveLanesAt(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, bool TrackLaneMasks, unsigned RegUnit, SlotIndex Pos) { - return getLanesWithProperty(LIS, MRI, TrackLaneMasks, RegUnit, Pos, ~0u, + return getLanesWithProperty(LIS, MRI, TrackLaneMasks, RegUnit, Pos, + LaneBitmask::getAll(), [](const LiveRange &LR, SlotIndex Pos) { return LR.liveAt(Pos); }); @@ -478,10 +476,10 @@ class RegisterOperandsCollector { void pushReg(unsigned Reg, SmallVectorImpl<RegisterMaskPair> &RegUnits) const { if (TargetRegisterInfo::isVirtualRegister(Reg)) { - addRegLanes(RegUnits, RegisterMaskPair(Reg, ~0u)); + addRegLanes(RegUnits, RegisterMaskPair(Reg, LaneBitmask::getAll())); } else if (MRI.isAllocatable(Reg)) { for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units) - addRegLanes(RegUnits, RegisterMaskPair(*Units, ~0u)); + addRegLanes(RegUnits, RegisterMaskPair(*Units, LaneBitmask::getAll())); } } @@ -516,7 +514,7 @@ class RegisterOperandsCollector { addRegLanes(RegUnits, RegisterMaskPair(Reg, LaneMask)); } else if (MRI.isAllocatable(Reg)) { for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units) - addRegLanes(RegUnits, RegisterMaskPair(*Units, ~0u)); + addRegLanes(RegUnits, RegisterMaskPair(*Units, LaneBitmask::getAll())); } } @@ -567,11 +565,11 @@ void RegisterOperands::adjustLaneLiveness(const LiveIntervals &LIS, // of a subregister def we need a read-undef flag. unsigned RegUnit = I->RegUnit; if (TargetRegisterInfo::isVirtualRegister(RegUnit) && - AddFlagsMI != nullptr && (LiveAfter & ~I->LaneMask) == 0) + AddFlagsMI != nullptr && (LiveAfter & ~I->LaneMask).none()) AddFlagsMI->setRegisterDefReadUndef(RegUnit); LaneBitmask ActualDef = I->LaneMask & LiveAfter; - if (ActualDef == 0) { + if (ActualDef.none()) { I = Defs.erase(I); } else { I->LaneMask = ActualDef; @@ -582,7 +580,7 @@ void RegisterOperands::adjustLaneLiveness(const LiveIntervals &LIS, LaneBitmask LiveBefore = getLiveLanesAt(LIS, MRI, true, I->RegUnit, Pos.getBaseIndex()); LaneBitmask LaneMask = I->LaneMask & LiveBefore; - if (LaneMask == 0) { + if (LaneMask.none()) { I = Uses.erase(I); } else { I->LaneMask = LaneMask; @@ -596,7 +594,7 @@ void RegisterOperands::adjustLaneLiveness(const LiveIntervals &LIS, continue; LaneBitmask LiveAfter = getLiveLanesAt(LIS, MRI, true, RegUnit, Pos.getDeadSlot()); - if (LiveAfter == 0) + if (LiveAfter.none()) AddFlagsMI->setRegisterDefReadUndef(RegUnit); } } @@ -673,17 +671,16 @@ void RegPressureTracker::addLiveRegs(ArrayRef<RegisterMaskPair> Regs) { void RegPressureTracker::discoverLiveInOrOut(RegisterMaskPair Pair, SmallVectorImpl<RegisterMaskPair> &LiveInOrOut) { - assert(Pair.LaneMask != 0); + assert(Pair.LaneMask.any()); unsigned RegUnit = Pair.RegUnit; - auto I = std::find_if(LiveInOrOut.begin(), LiveInOrOut.end(), - [RegUnit](const RegisterMaskPair &Other) { - return Other.RegUnit == RegUnit; - }); + auto I = find_if(LiveInOrOut, [RegUnit](const RegisterMaskPair &Other) { + return Other.RegUnit == RegUnit; + }); LaneBitmask PrevMask; LaneBitmask NewMask; if (I == LiveInOrOut.end()) { - PrevMask = 0; + PrevMask = LaneBitmask::getNone(); NewMask = Pair.LaneMask; LiveInOrOut.push_back(Pair); } else { @@ -738,14 +735,15 @@ void RegPressureTracker::recede(const RegisterOperands &RegOpers, LaneBitmask NewMask = PreviousMask & ~Def.LaneMask; LaneBitmask LiveOut = Def.LaneMask & ~PreviousMask; - if (LiveOut != 0) { + if (LiveOut.any()) { discoverLiveOut(RegisterMaskPair(Reg, LiveOut)); // Retroactively model effects on pressure of the live out lanes. - increaseSetPressure(CurrSetPressure, *MRI, Reg, 0, LiveOut); + increaseSetPressure(CurrSetPressure, *MRI, Reg, LaneBitmask::getNone(), + LiveOut); PreviousMask = LiveOut; } - if (NewMask == 0) { + if (NewMask.none()) { // Add a 0 entry to LiveUses as a marker that the complete vreg has become // dead. if (TrackLaneMasks && LiveUses != nullptr) @@ -762,26 +760,25 @@ void RegPressureTracker::recede(const RegisterOperands &RegOpers, // Generate liveness for uses. for (const RegisterMaskPair &Use : RegOpers.Uses) { unsigned Reg = Use.RegUnit; - assert(Use.LaneMask != 0); + assert(Use.LaneMask.any()); LaneBitmask PreviousMask = LiveRegs.insert(Use); LaneBitmask NewMask = PreviousMask | Use.LaneMask; if (NewMask == PreviousMask) continue; // Did the register just become live? - if (PreviousMask == 0) { + if (PreviousMask.none()) { if (LiveUses != nullptr) { if (!TrackLaneMasks) { addRegLanes(*LiveUses, RegisterMaskPair(Reg, NewMask)); } else { - auto I = std::find_if(LiveUses->begin(), LiveUses->end(), - [Reg](const RegisterMaskPair Other) { - return Other.RegUnit == Reg; - }); + auto I = find_if(*LiveUses, [Reg](const RegisterMaskPair Other) { + return Other.RegUnit == Reg; + }); bool IsRedef = I != LiveUses->end(); if (IsRedef) { // ignore re-defs here... - assert(I->LaneMask == 0); + assert(I->LaneMask.none()); removeRegLanes(*LiveUses, RegisterMaskPair(Reg, NewMask)); } else { addRegLanes(*LiveUses, RegisterMaskPair(Reg, NewMask)); @@ -792,7 +789,7 @@ void RegPressureTracker::recede(const RegisterOperands &RegOpers, // Discover live outs if this may be the first occurance of this register. if (RequireIntervals) { LaneBitmask LiveOut = getLiveThroughAt(Reg, SlotIdx); - if (LiveOut != 0) + if (LiveOut.any()) discoverLiveOut(RegisterMaskPair(Reg, LiveOut)); } } @@ -803,7 +800,7 @@ void RegPressureTracker::recede(const RegisterOperands &RegOpers, for (const RegisterMaskPair &Def : RegOpers.Defs) { unsigned RegUnit = Def.RegUnit; if (TargetRegisterInfo::isVirtualRegister(RegUnit) && - (LiveRegs.contains(RegUnit) & Def.LaneMask) == 0) + (LiveRegs.contains(RegUnit) & Def.LaneMask).none()) UntiedDefs.insert(RegUnit); } } @@ -819,9 +816,7 @@ void RegPressureTracker::recedeSkipDebugValues() { static_cast<RegionPressure&>(P).openTop(CurrPos); // Find the previous instruction. - do - --CurrPos; - while (CurrPos != MBB->begin() && CurrPos->isDebugValue()); + CurrPos = skipDebugInstructionsBackward(std::prev(CurrPos), MBB->begin()); SlotIndex SlotIdx; if (RequireIntervals) @@ -871,7 +866,7 @@ void RegPressureTracker::advance(const RegisterOperands &RegOpers) { unsigned Reg = Use.RegUnit; LaneBitmask LiveMask = LiveRegs.contains(Reg); LaneBitmask LiveIn = Use.LaneMask & ~LiveMask; - if (LiveIn != 0) { + if (LiveIn.any()) { discoverLiveIn(RegisterMaskPair(Reg, LiveIn)); increaseRegPressure(Reg, LiveMask, LiveMask | LiveIn); LiveRegs.insert(RegisterMaskPair(Reg, LiveIn)); @@ -879,7 +874,7 @@ void RegPressureTracker::advance(const RegisterOperands &RegOpers) { // Kill liveness at last uses. if (RequireIntervals) { LaneBitmask LastUseMask = getLastUsedLanes(Reg, SlotIdx); - if (LastUseMask != 0) { + if (LastUseMask.any()) { LiveRegs.erase(RegisterMaskPair(Reg, LastUseMask)); decreaseRegPressure(Reg, LiveMask, LiveMask & ~LastUseMask); } @@ -897,9 +892,7 @@ void RegPressureTracker::advance(const RegisterOperands &RegOpers) { bumpDeadDefs(RegOpers.DeadDefs); // Find the next instruction. - do - ++CurrPos; - while (CurrPos != MBB->end() && CurrPos->isDebugValue()); + CurrPos = skipDebugInstructionsForward(std::next(CurrPos), MBB->end()); } void RegPressureTracker::advance() { @@ -1192,8 +1185,8 @@ static LaneBitmask findUseBetween(unsigned Reg, LaneBitmask LastUseMask, unsigned SubRegIdx = MO.getSubReg(); LaneBitmask UseMask = TRI.getSubRegIndexLaneMask(SubRegIdx); LastUseMask &= ~UseMask; - if (LastUseMask == 0) - return 0; + if (LastUseMask.none()) + return LaneBitmask::getNone(); } } return LastUseMask; @@ -1202,7 +1195,8 @@ static LaneBitmask findUseBetween(unsigned Reg, LaneBitmask LastUseMask, LaneBitmask RegPressureTracker::getLiveLanesAt(unsigned RegUnit, SlotIndex Pos) const { assert(RequireIntervals); - return getLanesWithProperty(*LIS, *MRI, TrackLaneMasks, RegUnit, Pos, ~0u, + return getLanesWithProperty(*LIS, *MRI, TrackLaneMasks, RegUnit, Pos, + LaneBitmask::getAll(), [](const LiveRange &LR, SlotIndex Pos) { return LR.liveAt(Pos); }); @@ -1212,7 +1206,7 @@ LaneBitmask RegPressureTracker::getLastUsedLanes(unsigned RegUnit, SlotIndex Pos) const { assert(RequireIntervals); return getLanesWithProperty(*LIS, *MRI, TrackLaneMasks, RegUnit, - Pos.getBaseIndex(), 0, + Pos.getBaseIndex(), LaneBitmask::getNone(), [](const LiveRange &LR, SlotIndex Pos) { const LiveRange::Segment *S = LR.getSegmentContaining(Pos); return S != nullptr && S->end == Pos.getRegSlot(); @@ -1222,7 +1216,8 @@ LaneBitmask RegPressureTracker::getLastUsedLanes(unsigned RegUnit, LaneBitmask RegPressureTracker::getLiveThroughAt(unsigned RegUnit, SlotIndex Pos) const { assert(RequireIntervals); - return getLanesWithProperty(*LIS, *MRI, TrackLaneMasks, RegUnit, Pos, 0u, + return getLanesWithProperty(*LIS, *MRI, TrackLaneMasks, RegUnit, Pos, + LaneBitmask::getNone(), [](const LiveRange &LR, SlotIndex Pos) { const LiveRange::Segment *S = LR.getSegmentContaining(Pos); return S != nullptr && S->start < Pos.getRegSlot(true) && @@ -1253,7 +1248,7 @@ void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) { for (const RegisterMaskPair &Use : RegOpers.Uses) { unsigned Reg = Use.RegUnit; LaneBitmask LastUseMask = getLastUsedLanes(Reg, SlotIdx); - if (LastUseMask == 0) + if (LastUseMask.none()) continue; // The LastUseMask is queried from the liveness information of instruction // which may be further down the schedule. Some lanes may actually not be @@ -1263,7 +1258,7 @@ void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) { SlotIndex CurrIdx = getCurrSlot(); LastUseMask = findUseBetween(Reg, LastUseMask, CurrIdx, SlotIdx, *MRI, LIS); - if (LastUseMask == 0) + if (LastUseMask.none()) continue; LaneBitmask LiveMask = LiveRegs.contains(Reg); diff --git a/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp index 6b80179..fdf741f 100644 --- a/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp +++ b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp @@ -34,33 +34,12 @@ using namespace llvm; void RegScavenger::setRegUsed(unsigned Reg, LaneBitmask LaneMask) { for (MCRegUnitMaskIterator RUI(Reg, TRI); RUI.isValid(); ++RUI) { LaneBitmask UnitMask = (*RUI).second; - if (UnitMask == 0 || (LaneMask & UnitMask) != 0) + if (UnitMask.none() || (LaneMask & UnitMask).any()) RegUnitsAvailable.reset((*RUI).first); } } -void RegScavenger::initRegState() { - for (SmallVectorImpl<ScavengedInfo>::iterator I = Scavenged.begin(), - IE = Scavenged.end(); I != IE; ++I) { - I->Reg = 0; - I->Restore = nullptr; - } - - // All register units start out unused. - RegUnitsAvailable.set(); - - // Live-in registers are in use. - for (const auto &LI : MBB->liveins()) - setRegUsed(LI.PhysReg, LI.LaneMask); - - // Pristine CSRs are also unavailable. - const MachineFunction &MF = *MBB->getParent(); - BitVector PR = MF.getFrameInfo()->getPristineRegs(MF); - for (int I = PR.find_first(); I>0; I = PR.find_next(I)) - setRegUsed(I); -} - -void RegScavenger::enterBasicBlock(MachineBasicBlock &MBB) { +void RegScavenger::init(MachineBasicBlock &MBB) { MachineFunction &MF = *MBB.getParent(); TII = MF.getSubtarget().getInstrInfo(); TRI = MF.getSubtarget().getRegisterInfo(); @@ -69,11 +48,6 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock &MBB) { assert((NumRegUnits == 0 || NumRegUnits == TRI->getNumRegUnits()) && "Target changed?"); - // It is not possible to use the register scavenger after late optimization - // passes that don't preserve accurate liveness information. - assert(MRI->tracksLiveness() && - "Cannot use register scavenger with inaccurate liveness"); - // Self-initialize. if (!this->MBB) { NumRegUnits = TRI->getNumRegUnits(); @@ -84,16 +58,56 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock &MBB) { } this->MBB = &MBB; - initRegState(); + for (SmallVectorImpl<ScavengedInfo>::iterator I = Scavenged.begin(), + IE = Scavenged.end(); I != IE; ++I) { + I->Reg = 0; + I->Restore = nullptr; + } + + // All register units start out unused. + RegUnitsAvailable.set(); + + // Pristine CSRs are not available. + BitVector PR = MF.getFrameInfo().getPristineRegs(MF); + for (int I = PR.find_first(); I>0; I = PR.find_next(I)) + setRegUsed(I); Tracking = false; } +void RegScavenger::setLiveInsUsed(const MachineBasicBlock &MBB) { + for (const auto &LI : MBB.liveins()) + setRegUsed(LI.PhysReg, LI.LaneMask); +} + +void RegScavenger::enterBasicBlock(MachineBasicBlock &MBB) { + init(MBB); + setLiveInsUsed(MBB); +} + +void RegScavenger::enterBasicBlockEnd(MachineBasicBlock &MBB) { + init(MBB); + // Merge live-ins of successors to get live-outs. + for (const MachineBasicBlock *Succ : MBB.successors()) + setLiveInsUsed(*Succ); + + // Move internal iterator at the last instruction of the block. + if (MBB.begin() != MBB.end()) { + MBBI = std::prev(MBB.end()); + Tracking = true; + } +} + void RegScavenger::addRegUnits(BitVector &BV, unsigned Reg) { for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI) BV.set(*RUI); } +void RegScavenger::removeRegUnits(BitVector &BV, unsigned Reg) { + for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI) + BV.reset(*RUI); +} + void RegScavenger::determineKillsAndDefs() { assert(Tracking && "Must be tracking to determine kills and defs"); @@ -245,6 +259,48 @@ void RegScavenger::forward() { setUsed(DefRegUnits); } +void RegScavenger::backward() { + assert(Tracking && "Must be tracking to determine kills and defs"); + + const MachineInstr &MI = *MBBI; + // Defined or clobbered registers are available now. + for (const MachineOperand &MO : MI.operands()) { + if (MO.isRegMask()) { + for (unsigned RU = 0, RUEnd = TRI->getNumRegUnits(); RU != RUEnd; + ++RU) { + for (MCRegUnitRootIterator RURI(RU, TRI); RURI.isValid(); ++RURI) { + if (MO.clobbersPhysReg(*RURI)) { + RegUnitsAvailable.set(RU); + break; + } + } + } + } else if (MO.isReg() && MO.isDef()) { + unsigned Reg = MO.getReg(); + if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg) || + isReserved(Reg)) + continue; + addRegUnits(RegUnitsAvailable, Reg); + } + } + // Mark read registers as unavailable. + for (const MachineOperand &MO : MI.uses()) { + if (MO.isReg() && MO.readsReg()) { + unsigned Reg = MO.getReg(); + if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg) || + isReserved(Reg)) + continue; + removeRegUnits(RegUnitsAvailable, Reg); + } + } + + if (MBBI == MBB->begin()) { + MBBI = MachineBasicBlock::iterator(nullptr); + Tracking = false; + } else + --MBBI; +} + bool RegScavenger::isRegUsed(unsigned Reg, bool includeReserved) const { if (includeReserved && isReserved(Reg)) return true; @@ -358,7 +414,8 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, for (const MachineOperand &MO : MI.operands()) { if (MO.isReg() && MO.getReg() != 0 && !(MO.isUse() && MO.isUndef()) && !TargetRegisterInfo::isVirtualRegister(MO.getReg())) - Candidates.reset(MO.getReg()); + for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI) + Candidates.reset(*AI); } // Try to find a register that's unused if there is one, as then we won't @@ -380,7 +437,7 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, // Find an available scavenging slot with size and alignment matching // the requirements of the class RC. - const MachineFrameInfo &MFI = *MF.getFrameInfo(); + const MachineFrameInfo &MFI = MF.getFrameInfo(); unsigned NeedSize = RC->getSize(); unsigned NeedAlign = RC->getAlignment(); diff --git a/contrib/llvm/lib/CodeGen/RegisterUsageInfo.cpp b/contrib/llvm/lib/CodeGen/RegisterUsageInfo.cpp index 5cf3e57..66f1966 100644 --- a/contrib/llvm/lib/CodeGen/RegisterUsageInfo.cpp +++ b/contrib/llvm/lib/CodeGen/RegisterUsageInfo.cpp @@ -22,7 +22,7 @@ using namespace llvm; #define DEBUG_TYPE "ip-regalloc" -cl::opt<bool> DumpRegUsage( +static cl::opt<bool> DumpRegUsage( "print-regusage", cl::init(false), cl::Hidden, cl::desc("print register usage details collected for analysis.")); diff --git a/contrib/llvm/lib/CodeGen/RenameIndependentSubregs.cpp b/contrib/llvm/lib/CodeGen/RenameIndependentSubregs.cpp index ea952d9..2f7ee8b 100644 --- a/contrib/llvm/lib/CodeGen/RenameIndependentSubregs.cpp +++ b/contrib/llvm/lib/CodeGen/RenameIndependentSubregs.cpp @@ -48,7 +48,7 @@ public: static char ID; RenameIndependentSubregs() : MachineFunctionPass(ID) {} - const char *getPassName() const override { + StringRef getPassName() const override { return "Rename Disconnected Subregister Components"; } @@ -184,7 +184,7 @@ bool RenameIndependentSubregs::findComponents(IntEqClasses &Classes, unsigned MergedID = ~0u; for (RenameIndependentSubregs::SubRangeInfo &SRInfo : SubRangeInfos) { const LiveInterval::SubRange &SR = *SRInfo.SR; - if ((SR.LaneMask & LaneMask) == 0) + if ((SR.LaneMask & LaneMask).none()) continue; SlotIndex Pos = LIS->getInstructionIndex(*MO.getParent()); Pos = MO.isDef() ? Pos.getRegSlot(MO.isEarlyClobber()) @@ -219,24 +219,23 @@ void RenameIndependentSubregs::rewriteOperands(const IntEqClasses &Classes, if (!MO.isDef() && !MO.readsReg()) continue; - MachineInstr &MI = *MO.getParent(); - - SlotIndex Pos = LIS->getInstructionIndex(MI); + SlotIndex Pos = LIS->getInstructionIndex(*MO.getParent()); + Pos = MO.isDef() ? Pos.getRegSlot(MO.isEarlyClobber()) + : Pos.getBaseIndex(); unsigned SubRegIdx = MO.getSubReg(); LaneBitmask LaneMask = TRI.getSubRegIndexLaneMask(SubRegIdx); unsigned ID = ~0u; for (const SubRangeInfo &SRInfo : SubRangeInfos) { const LiveInterval::SubRange &SR = *SRInfo.SR; - if ((SR.LaneMask & LaneMask) == 0) + if ((SR.LaneMask & LaneMask).none()) continue; - LiveRange::const_iterator I = SR.find(Pos); - if (I == SR.end()) + const VNInfo *VNI = SR.getVNInfoAt(Pos); + if (VNI == nullptr) continue; - const VNInfo &VNI = *I->valno; // Map to local representant ID. - unsigned LocalID = SRInfo.ConEQ.getEqClass(&VNI); + unsigned LocalID = SRInfo.ConEQ.getEqClass(VNI); // Global ID ID = Classes[LocalID + SRInfo.Index]; break; @@ -354,19 +353,24 @@ void RenameIndependentSubregs::computeMainRangesFixFlags( if (I == 0) LI.clear(); LIS->constructMainRangeFromSubranges(LI); + // A def of a subregister may be a use of other register lanes. Replacing + // such a def with a def of a different register will eliminate the use, + // and may cause the recorded live range to be larger than the actual + // liveness in the program IR. + LIS->shrinkToUses(&LI); } } bool RenameIndependentSubregs::runOnMachineFunction(MachineFunction &MF) { // Skip renaming if liveness of subregister is not tracked. - if (!MF.getSubtarget().enableSubRegLiveness()) + MRI = &MF.getRegInfo(); + if (!MRI->subRegLivenessEnabled()) return false; DEBUG(dbgs() << "Renaming independent subregister live ranges in " << MF.getName() << '\n'); LIS = &getAnalysis<LiveIntervals>(); - MRI = &MF.getRegInfo(); TII = MF.getSubtarget().getInstrInfo(); // Iterate over all vregs. Note that we query getNumVirtRegs() the newly diff --git a/contrib/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp b/contrib/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp new file mode 100644 index 0000000..4519641 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp @@ -0,0 +1,67 @@ +//===-- ResetMachineFunctionPass.cpp - Reset Machine Function ----*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements a pass that will conditionally reset a machine +/// function as if it was just created. This is used to provide a fallback +/// mechanism when GlobalISel fails, thus the condition for the reset to +/// happen is that the MachineFunction has the FailedISel property. +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/Support/Debug.h" +using namespace llvm; + +#define DEBUG_TYPE "reset-machine-function" + +STATISTIC(NumFunctionsReset, "Number of functions reset"); + +namespace { + class ResetMachineFunction : public MachineFunctionPass { + /// Tells whether or not this pass should emit a fallback + /// diagnostic when it resets a function. + bool EmitFallbackDiag; + + public: + static char ID; // Pass identification, replacement for typeid + ResetMachineFunction(bool EmitFallbackDiag = false) + : MachineFunctionPass(ID), EmitFallbackDiag(EmitFallbackDiag) {} + + StringRef getPassName() const override { return "ResetMachineFunction"; } + + bool runOnMachineFunction(MachineFunction &MF) override { + if (MF.getProperties().hasProperty( + MachineFunctionProperties::Property::FailedISel)) { + DEBUG(dbgs() << "Reseting: " << MF.getName() << '\n'); + ++NumFunctionsReset; + MF.reset(); + if (EmitFallbackDiag) { + const Function &F = *MF.getFunction(); + DiagnosticInfoISelFallback DiagFallback(F); + F.getContext().diagnose(DiagFallback); + } + return true; + } + return false; + } + + }; +} // end anonymous namespace + +char ResetMachineFunction::ID = 0; +INITIALIZE_PASS(ResetMachineFunction, DEBUG_TYPE, + "reset machine function if ISel failed", false, false) + +MachineFunctionPass * +llvm::createResetMachineFunctionPass(bool EmitFallbackDiag = false) { + return new ResetMachineFunction(EmitFallbackDiag); +} diff --git a/contrib/llvm/lib/CodeGen/SafeStack.cpp b/contrib/llvm/lib/CodeGen/SafeStack.cpp index 4a1b995..2b82df2 100644 --- a/contrib/llvm/lib/CodeGen/SafeStack.cpp +++ b/contrib/llvm/lib/CodeGen/SafeStack.cpp @@ -52,17 +52,6 @@ using namespace llvm::safestack; #define DEBUG_TYPE "safestack" -enum UnsafeStackPtrStorageVal { ThreadLocalUSP, SingleThreadUSP }; - -static cl::opt<UnsafeStackPtrStorageVal> USPStorage("safe-stack-usp-storage", - cl::Hidden, cl::init(ThreadLocalUSP), - cl::desc("Type of storage for the unsafe stack pointer"), - cl::values(clEnumValN(ThreadLocalUSP, "thread-local", - "Thread-local storage"), - clEnumValN(SingleThreadUSP, "single-thread", - "Non-thread-local storage"), - clEnumValEnd)); - namespace llvm { STATISTIC(NumFunctions, "Total number of functions"); @@ -124,9 +113,6 @@ class SafeStack : public FunctionPass { /// might expect to appear on the stack on most common targets. enum { StackAlignment = 16 }; - /// \brief Build a value representing a pointer to the unsafe stack pointer. - Value *getOrCreateUnsafeStackPtr(IRBuilder<> &IRB, Function &F); - /// \brief Return the value of the stack canary. Value *getStackGuard(IRBuilder<> &IRB, Function &F); @@ -356,46 +342,8 @@ bool SafeStack::IsSafeStackAlloca(const Value *AllocaPtr, uint64_t AllocaSize) { return true; } -Value *SafeStack::getOrCreateUnsafeStackPtr(IRBuilder<> &IRB, Function &F) { - // Check if there is a target-specific location for the unsafe stack pointer. - if (TL) - if (Value *V = TL->getSafeStackPointerLocation(IRB)) - return V; - - // Otherwise, assume the target links with compiler-rt, which provides a - // thread-local variable with a magic name. - Module &M = *F.getParent(); - const char *UnsafeStackPtrVar = "__safestack_unsafe_stack_ptr"; - auto UnsafeStackPtr = - dyn_cast_or_null<GlobalVariable>(M.getNamedValue(UnsafeStackPtrVar)); - - bool UseTLS = USPStorage == ThreadLocalUSP; - - if (!UnsafeStackPtr) { - auto TLSModel = UseTLS ? - GlobalValue::InitialExecTLSModel : - GlobalValue::NotThreadLocal; - // The global variable is not defined yet, define it ourselves. - // We use the initial-exec TLS model because we do not support the - // variable living anywhere other than in the main executable. - UnsafeStackPtr = new GlobalVariable( - M, StackPtrTy, false, GlobalValue::ExternalLinkage, nullptr, - UnsafeStackPtrVar, nullptr, TLSModel); - } else { - // The variable exists, check its type and attributes. - if (UnsafeStackPtr->getValueType() != StackPtrTy) - report_fatal_error(Twine(UnsafeStackPtrVar) + " must have void* type"); - if (UseTLS != UnsafeStackPtr->isThreadLocal()) - report_fatal_error(Twine(UnsafeStackPtrVar) + " must " + - (UseTLS ? "" : "not ") + "be thread-local"); - } - return UnsafeStackPtr; -} - Value *SafeStack::getStackGuard(IRBuilder<> &IRB, Function &F) { - Value *StackGuardVar = nullptr; - if (TL) - StackGuardVar = TL->getIRStackGuard(IRB); + Value *StackGuardVar = TL->getIRStackGuard(IRB); if (!StackGuardVar) StackGuardVar = F.getParent()->getOrInsertGlobal("__stack_chk_guard", StackPtrTy); @@ -752,7 +700,9 @@ bool SafeStack::runOnFunction(Function &F) { return false; } - TL = TM ? TM->getSubtargetImpl(F)->getTargetLowering() : nullptr; + if (!TM) + report_fatal_error("Target machine is required"); + TL = TM->getSubtargetImpl(F)->getTargetLowering(); SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); ++NumFunctions; @@ -764,7 +714,7 @@ bool SafeStack::runOnFunction(Function &F) { // Collect all points where stack gets unwound and needs to be restored // This is only necessary because the runtime (setjmp and unwind code) is - // not aware of the unsafe stack and won't unwind/restore it prorerly. + // not aware of the unsafe stack and won't unwind/restore it properly. // To work around this problem without changing the runtime, we insert // instrumentation to restore the unsafe stack pointer when necessary. SmallVector<Instruction *, 4> StackRestorePoints; @@ -786,7 +736,7 @@ bool SafeStack::runOnFunction(Function &F) { ++NumUnsafeStackRestorePointsFunctions; IRBuilder<> IRB(&F.front(), F.begin()->getFirstInsertionPt()); - UnsafeStackPtr = getOrCreateUnsafeStackPtr(IRB, F); + UnsafeStackPtr = TL->getSafeStackPointerLocation(IRB); // Load the current stack pointer (we'll also use it as a base pointer). // FIXME: use a dedicated register for it ? diff --git a/contrib/llvm/lib/CodeGen/SafeStackColoring.cpp b/contrib/llvm/lib/CodeGen/SafeStackColoring.cpp index 795eb8d..7fbeadd 100644 --- a/contrib/llvm/lib/CodeGen/SafeStackColoring.cpp +++ b/contrib/llvm/lib/CodeGen/SafeStackColoring.cpp @@ -214,10 +214,12 @@ void StackColoring::calculateLiveIntervals() { unsigned AllocaNo = It.second.AllocaNo; if (IsStart) { - assert(!Started.test(AllocaNo)); - Started.set(AllocaNo); - Ended.reset(AllocaNo); - Start[AllocaNo] = InstNo; + assert(!Started.test(AllocaNo) || Start[AllocaNo] == BBStart); + if (!Started.test(AllocaNo)) { + Started.set(AllocaNo); + Ended.reset(AllocaNo); + Start[AllocaNo] = InstNo; + } } else { assert(!Ended.test(AllocaNo)); if (Started.test(AllocaNo)) { diff --git a/contrib/llvm/lib/CodeGen/SafeStackLayout.cpp b/contrib/llvm/lib/CodeGen/SafeStackLayout.cpp index fb433c1..7d4dbd1 100644 --- a/contrib/llvm/lib/CodeGen/SafeStackLayout.cpp +++ b/contrib/llvm/lib/CodeGen/SafeStackLayout.cpp @@ -132,6 +132,14 @@ void StackLayout::computeLayout() { // If this is replaced with something smarter, it must preserve the property // that the first object is always at the offset 0 in the stack frame (for // StackProtectorSlot), or handle stack protector in some other way. + + // Sort objects by size (largest first) to reduce fragmentation. + if (StackObjects.size() > 2) + std::stable_sort(StackObjects.begin() + 1, StackObjects.end(), + [](const StackObject &a, const StackObject &b) { + return a.Size > b.Size; + }); + for (auto &Obj : StackObjects) layoutObject(Obj); diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp index efde61e..427d952 100644 --- a/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp +++ b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp @@ -139,8 +139,7 @@ void SUnit::removePred(const SDep &D) { SDep P = D; P.setSUnit(this); SUnit *N = D.getSUnit(); - SmallVectorImpl<SDep>::iterator Succ = std::find(N->Succs.begin(), - N->Succs.end(), P); + SmallVectorImpl<SDep>::iterator Succ = find(N->Succs, P); assert(Succ != N->Succs.end() && "Mismatching preds / succs lists!"); N->Succs.erase(Succ); Preds.erase(I); @@ -311,10 +310,20 @@ void SUnit::biasCriticalPath() { } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void SUnit::print(raw_ostream &OS, const ScheduleDAG *DAG) const { + if (this == &DAG->ExitSU) + OS << "ExitSU"; + else if (this == &DAG->EntrySU) + OS << "EntrySU"; + else + OS << "SU(" << NodeNum << ")"; +} + /// SUnit - Scheduling unit. It's an wrapper around either a single SDNode or /// a group of nodes flagged together. void SUnit::dump(const ScheduleDAG *G) const { - dbgs() << "SU(" << NodeNum << "): "; + print(dbgs(), G); + dbgs() << ": "; G->dumpNode(this); } @@ -338,12 +347,12 @@ void SUnit::dumpAll(const ScheduleDAG *G) const { I != E; ++I) { dbgs() << " "; switch (I->getKind()) { - case SDep::Data: dbgs() << "val "; break; - case SDep::Anti: dbgs() << "anti"; break; - case SDep::Output: dbgs() << "out "; break; - case SDep::Order: dbgs() << "ch "; break; + case SDep::Data: dbgs() << "data "; break; + case SDep::Anti: dbgs() << "anti "; break; + case SDep::Output: dbgs() << "out "; break; + case SDep::Order: dbgs() << "ord "; break; } - dbgs() << "SU(" << I->getSUnit()->NodeNum << ")"; + I->getSUnit()->print(dbgs(), G); if (I->isArtificial()) dbgs() << " *"; dbgs() << ": Latency=" << I->getLatency(); @@ -358,12 +367,12 @@ void SUnit::dumpAll(const ScheduleDAG *G) const { I != E; ++I) { dbgs() << " "; switch (I->getKind()) { - case SDep::Data: dbgs() << "val "; break; - case SDep::Anti: dbgs() << "anti"; break; - case SDep::Output: dbgs() << "out "; break; - case SDep::Order: dbgs() << "ch "; break; + case SDep::Data: dbgs() << "data "; break; + case SDep::Anti: dbgs() << "anti "; break; + case SDep::Output: dbgs() << "out "; break; + case SDep::Order: dbgs() << "ord "; break; } - dbgs() << "SU(" << I->getSUnit()->NodeNum << ")"; + I->getSUnit()->print(dbgs(), G); if (I->isArtificial()) dbgs() << " *"; dbgs() << ": Latency=" << I->getLatency(); diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp index 22bfd4d..611c5a7 100644 --- a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -77,7 +77,7 @@ static unsigned getReductionSize() { static void dumpSUList(ScheduleDAGInstrs::SUList &L) { #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) dbgs() << "{ "; - for (auto *su : L) { + for (const SUnit *su : L) { dbgs() << "SU(" << su->NodeNum << ")"; if (su != L.back()) dbgs() << ", "; @@ -142,9 +142,7 @@ static void getUnderlyingObjects(const Value *V, SmallVector<Value *, 4> Objs; GetUnderlyingObjects(const_cast<Value *>(V), Objs, DL); - for (SmallVectorImpl<Value *>::iterator I = Objs.begin(), IE = Objs.end(); - I != IE; ++I) { - V = *I; + for (Value *V : Objs) { if (!Visited.insert(V).second) continue; if (Operator::getOpcode(V) == Instruction::IntToPtr) { @@ -164,7 +162,7 @@ static void getUnderlyingObjects(const Value *V, /// information and it can be tracked to a normal reference to a known /// object, return the Value for that object. static void getUnderlyingObjectsForInstr(const MachineInstr *MI, - const MachineFrameInfo *MFI, + const MachineFrameInfo &MFI, UnderlyingObjectsVector &Objects, const DataLayout &DL) { auto allMMOsOkay = [&]() { @@ -178,16 +176,16 @@ static void getUnderlyingObjectsForInstr(const MachineInstr *MI, // overlapping locations. The client code calling this function assumes // this is not the case. So return a conservative answer of no known // object. - if (MFI->hasTailCall()) + if (MFI.hasTailCall()) return false; // For now, ignore PseudoSourceValues which may alias LLVM IR values // because the code that uses this function has no way to cope with // such aliases. - if (PSV->isAliased(MFI)) + if (PSV->isAliased(&MFI)) return false; - bool MayAlias = PSV->mayAlias(MFI); + bool MayAlias = PSV->mayAlias(&MFI); Objects.push_back(UnderlyingObjectsVector::value_type(PSV, MayAlias)); } else if (const Value *V = MMO->getValue()) { SmallVector<Value *, 4> Objs; @@ -249,32 +247,27 @@ void ScheduleDAGInstrs::exitRegion() { void ScheduleDAGInstrs::addSchedBarrierDeps() { MachineInstr *ExitMI = RegionEnd != BB->end() ? &*RegionEnd : nullptr; ExitSU.setInstr(ExitMI); - bool AllDepKnown = ExitMI && - (ExitMI->isCall() || ExitMI->isBarrier()); - if (ExitMI && AllDepKnown) { - // If it's a call or a barrier, add dependencies on the defs and uses of - // instruction. - for (unsigned i = 0, e = ExitMI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = ExitMI->getOperand(i); + // Add dependencies on the defs and uses of the instruction. + if (ExitMI) { + for (const MachineOperand &MO : ExitMI->operands()) { if (!MO.isReg() || MO.isDef()) continue; unsigned Reg = MO.getReg(); - if (Reg == 0) continue; - - if (TRI->isPhysicalRegister(Reg)) + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg)); - else if (MO.readsReg()) // ignore undef operands - addVRegUseDeps(&ExitSU, i); + } else if (TargetRegisterInfo::isVirtualRegister(Reg) && MO.readsReg()) { + addVRegUseDeps(&ExitSU, ExitMI->getOperandNo(&MO)); + } } - } else { + } + if (!ExitMI || (!ExitMI->isCall() && !ExitMI->isBarrier())) { // For others, e.g. fallthrough, conditional branch, assume the exit // uses all the registers that are livein to the successor blocks. - assert(Uses.empty() && "Uses in set before adding deps?"); - for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), - SE = BB->succ_end(); SI != SE; ++SI) - for (const auto &LI : (*SI)->liveins()) { + for (const MachineBasicBlock *Succ : BB->successors()) { + for (const auto &LI : Succ->liveins()) { if (!Uses.contains(LI.PhysReg)) Uses.insert(PhysRegSUOper(&ExitSU, -1, LI.PhysReg)); } + } } } @@ -326,6 +319,10 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) { void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { MachineInstr *MI = SU->getInstr(); MachineOperand &MO = MI->getOperand(OperIdx); + unsigned Reg = MO.getReg(); + // We do not need to track any dependencies for constant registers. + if (MRI.isConstantPhysReg(Reg)) + return; // Optionally add output and anti dependencies. For anti // dependencies we use a latency of 0 because for a multi-issue @@ -334,8 +331,7 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { // TODO: Using a latency of 1 here for output dependencies assumes // there's no cost for reusing registers. SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output; - for (MCRegAliasIterator Alias(MO.getReg(), TRI, true); - Alias.isValid(); ++Alias) { + for (MCRegAliasIterator Alias(Reg, TRI, true); Alias.isValid(); ++Alias) { if (!Defs.contains(*Alias)) continue; for (Reg2SUnitsMap::iterator I = Defs.find(*Alias); I != Defs.end(); ++I) { @@ -362,13 +358,11 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { // Either insert a new Reg2SUnits entry with an empty SUnits list, or // retrieve the existing SUnits list for this register's uses. // Push this SUnit on the use list. - Uses.insert(PhysRegSUOper(SU, OperIdx, MO.getReg())); + Uses.insert(PhysRegSUOper(SU, OperIdx, Reg)); if (RemoveKillFlags) MO.setIsKill(false); - } - else { + } else { addPhysRegDataDeps(SU, OperIdx); - unsigned Reg = MO.getReg(); // clear this register's use list if (Uses.contains(Reg)) @@ -404,7 +398,7 @@ LaneBitmask ScheduleDAGInstrs::getLaneMaskForMO(const MachineOperand &MO) const // No point in tracking lanemasks if we don't have interesting subregisters. const TargetRegisterClass &RC = *MRI.getRegClass(Reg); if (!RC.HasDisjunctSubRegs) - return ~0u; + return LaneBitmask::getAll(); unsigned SubReg = MO.getSubReg(); if (SubReg == 0) @@ -430,14 +424,14 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { DefLaneMask = getLaneMaskForMO(MO); // If we have a <read-undef> flag, none of the lane values comes from an // earlier instruction. - KillLaneMask = IsKill ? ~0u : DefLaneMask; + KillLaneMask = IsKill ? LaneBitmask::getAll() : DefLaneMask; // Clear undef flag, we'll re-add it later once we know which subregister // Def is first. MO.setIsUndef(false); } else { - DefLaneMask = ~0u; - KillLaneMask = ~0u; + DefLaneMask = LaneBitmask::getAll(); + KillLaneMask = LaneBitmask::getAll(); } if (MO.isDead()) { @@ -450,12 +444,12 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { E = CurrentVRegUses.end(); I != E; /*empty*/) { LaneBitmask LaneMask = I->LaneMask; // Ignore uses of other lanes. - if ((LaneMask & KillLaneMask) == 0) { + if ((LaneMask & KillLaneMask).none()) { ++I; continue; } - if ((LaneMask & DefLaneMask) != 0) { + if ((LaneMask & DefLaneMask).any()) { SUnit *UseSU = I->SU; MachineInstr *Use = UseSU->getInstr(); SDep Dep(SU, SDep::Data, Reg); @@ -467,7 +461,7 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { LaneMask &= ~KillLaneMask; // If we found a Def for all lanes of this use, remove it from the list. - if (LaneMask != 0) { + if (LaneMask.any()) { I->LaneMask = LaneMask; ++I; } else @@ -490,7 +484,7 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { for (VReg2SUnit &V2SU : make_range(CurrentVRegDefs.find(Reg), CurrentVRegDefs.end())) { // Ignore defs for other lanes. - if ((V2SU.LaneMask & LaneMask) == 0) + if ((V2SU.LaneMask & LaneMask).none()) continue; // Add an output dependence. SUnit *DefSU = V2SU.SU; @@ -513,11 +507,11 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { LaneBitmask NonOverlapMask = V2SU.LaneMask & ~LaneMask; V2SU.SU = SU; V2SU.LaneMask = OverlapMask; - if (NonOverlapMask != 0) + if (NonOverlapMask.any()) CurrentVRegDefs.insert(VReg2SUnit(Reg, NonOverlapMask, DefSU)); } // If there was no CurrentVRegDefs entry for some lanes yet, create one. - if (LaneMask != 0) + if (LaneMask.any()) CurrentVRegDefs.insert(VReg2SUnit(Reg, LaneMask, SU)); } @@ -533,7 +527,8 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { unsigned Reg = MO.getReg(); // Remember the use. Data dependencies will be added when we find the def. - LaneBitmask LaneMask = TrackLaneMasks ? getLaneMaskForMO(MO) : ~0u; + LaneBitmask LaneMask = TrackLaneMasks ? getLaneMaskForMO(MO) + : LaneBitmask::getAll(); CurrentVRegUses.insert(VReg2SUnitOperIdx(Reg, LaneMask, OperIdx, SU)); // Add antidependences to the following defs of the vreg. @@ -541,7 +536,7 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { CurrentVRegDefs.end())) { // Ignore defs for unrelated lanes. LaneBitmask PrevDefLaneMask = V2SU.LaneMask; - if ((PrevDefLaneMask & LaneMask) == 0) + if ((PrevDefLaneMask & LaneMask).none()) continue; if (V2SU.SU == SU) continue; @@ -554,7 +549,7 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { /// (like a call or something with unmodeled side effects). static inline bool isGlobalMemoryObject(AliasAnalysis *AA, MachineInstr *MI) { return MI->isCall() || MI->hasUnmodeledSideEffects() || - (MI->hasOrderedMemoryRef() && !MI->isInvariantLoad(AA)); + (MI->hasOrderedMemoryRef() && !MI->isDereferenceableInvariantLoad(AA)); } /// This returns true if the two MIs need a chain edge between them. @@ -621,8 +616,8 @@ static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI, /// Check whether two objects need a chain edge and add it if needed. void ScheduleDAGInstrs::addChainDependency (SUnit *SUa, SUnit *SUb, unsigned Latency) { - if (MIsNeedChainEdge(AAForDep, MFI, MF.getDataLayout(), SUa->getInstr(), - SUb->getInstr())) { + if (MIsNeedChainEdge(AAForDep, &MFI, MF.getDataLayout(), SUa->getInstr(), + SUb->getInstr())) { SDep Dep(SUa, SDep::MayAliasMem); Dep.setLatency(Latency); SUb->addPred(Dep); @@ -668,10 +663,10 @@ void ScheduleDAGInstrs::initSUnits() { // within an out-of-order core. These are identified by BufferSize=1. if (SchedModel.hasInstrSchedModel()) { const MCSchedClassDesc *SC = getSchedClass(SU); - for (TargetSchedModel::ProcResIter - PI = SchedModel.getWriteProcResBegin(SC), - PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) { - switch (SchedModel.getProcResource(PI->ProcResourceIdx)->BufferSize) { + for (const MCWriteProcResEntry &PRE : + make_range(SchedModel.getWriteProcResBegin(SC), + SchedModel.getWriteProcResEnd(SC))) { + switch (SchedModel.getProcResource(PRE.ProcResourceIdx)->BufferSize) { case 0: SU->hasReservedResource = true; break; @@ -686,44 +681,6 @@ void ScheduleDAGInstrs::initSUnits() { } } -void ScheduleDAGInstrs::collectVRegUses(SUnit *SU) { - const MachineInstr *MI = SU->getInstr(); - for (const MachineOperand &MO : MI->operands()) { - if (!MO.isReg()) - continue; - if (!MO.readsReg()) - continue; - if (TrackLaneMasks && !MO.isUse()) - continue; - - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) - continue; - - // Ignore re-defs. - if (TrackLaneMasks) { - bool FoundDef = false; - for (const MachineOperand &MO2 : MI->operands()) { - if (MO2.isReg() && MO2.isDef() && MO2.getReg() == Reg && !MO2.isDead()) { - FoundDef = true; - break; - } - } - if (FoundDef) - continue; - } - - // Record this local VReg use. - VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg); - for (; UI != VRegUses.end(); ++UI) { - if (UI->SU == SU) - break; - } - if (UI == VRegUses.end()) - VRegUses.insert(VReg2SUnit(Reg, 0, SU)); - } -} - class ScheduleDAGInstrs::Value2SUsMap : public MapVector<ValueType, SUList> { /// Current total number of SUs in map. @@ -901,9 +858,6 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, CurrentVRegDefs.setUniverse(NumVirtRegs); CurrentVRegUses.setUniverse(NumVirtRegs); - VRegUses.clear(); - VRegUses.setUniverse(NumVirtRegs); - // Model data dependencies between instructions being scheduled and the // ExitSU. addSchedBarrierDeps(); @@ -926,8 +880,6 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, assert(SU && "No SUnit mapped to this MI"); if (RPTracker) { - collectVRegUses(SU); - RegisterOperands RegOpers; RegOpers.collect(MI, *TRI, MRI, TrackLaneMasks, false); if (TrackLaneMasks) { @@ -957,12 +909,9 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, if (!MO.isReg() || !MO.isDef()) continue; unsigned Reg = MO.getReg(); - if (Reg == 0) - continue; - - if (TRI->isPhysicalRegister(Reg)) + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { addPhysRegDeps(SU, j); - else { + } else if (TargetRegisterInfo::isVirtualRegister(Reg)) { HasVRegDef = true; addVRegDefDeps(SU, j); } @@ -977,13 +926,11 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, if (!MO.isReg() || !MO.isUse()) continue; unsigned Reg = MO.getReg(); - if (Reg == 0) - continue; - - if (TRI->isPhysicalRegister(Reg)) + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { addPhysRegDeps(SU, j); - else if (MO.readsReg()) // ignore undef operands + } else if (TargetRegisterInfo::isVirtualRegister(Reg) && MO.readsReg()) { addVRegUseDeps(SU, j); + } } // If we haven't seen any uses in this scheduling region, create a @@ -1023,7 +970,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, } // If it's not a store or a variant load, we're done. - if (!MI.mayStore() && !(MI.mayLoad() && !MI.isInvariantLoad(AA))) + if (!MI.mayStore() && + !(MI.mayLoad() && !MI.isDereferenceableInvariantLoad(AA))) continue; // Always add dependecy edge to BarrierChain if present. @@ -1200,9 +1148,8 @@ void ScheduleDAGInstrs::startBlockForKills(MachineBasicBlock *BB) { LiveRegs.reset(); // Examine the live-in regs of all successors. - for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), - SE = BB->succ_end(); SI != SE; ++SI) { - for (const auto &LI : (*SI)->liveins()) { + for (const MachineBasicBlock *Succ : BB->successors()) { + for (const auto &LI : Succ->liveins()) { // Repeat, for reg and all subregs. for (MCSubRegIterator SubRegs(LI.PhysReg, TRI, /*IncludeSelf=*/true); SubRegs.isValid(); ++SubRegs) @@ -1225,7 +1172,7 @@ static void toggleBundleKillFlag(MachineInstr *MI, unsigned Reg, // might set it on too many operands. We will clear as many flags as we // can though. MachineBasicBlock::instr_iterator Begin = MI->getIterator(); - MachineBasicBlock::instr_iterator End = getBundleEnd(*MI); + MachineBasicBlock::instr_iterator End = getBundleEnd(Begin); while (Begin != End) { if (NewKillState) { if ((--End)->addRegisterKilled(Reg, TRI, /* addIfNotFound= */ false)) @@ -1312,6 +1259,11 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock *MBB) { // register is used multiple times we only set the kill flag on // the first use. Don't set kill flags on undef operands. killedRegs.reset(); + + // toggleKillFlag can append new operands (implicit defs), so using + // a range-based loop is not safe. The new operands will be appended + // at the end of the operand list and they don't need to be visited, + // so iterating until the currently last operand is ok. for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { MachineOperand &MO = MI.getOperand(i); if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue; @@ -1337,13 +1289,12 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock *MBB) { if (MO.isKill() != kill) { DEBUG(dbgs() << "Fixing " << MO << " in "); - // Warning: toggleKillFlag may invalidate MO. toggleKillFlag(&MI, MO); DEBUG(MI.dump()); DEBUG({ if (MI.getOpcode() == TargetOpcode::BUNDLE) { MachineBasicBlock::instr_iterator Begin = MI.getIterator(); - MachineBasicBlock::instr_iterator End = getBundleEnd(MI); + MachineBasicBlock::instr_iterator End = getBundleEnd(Begin); while (++Begin != End) DEBUG(Begin->dump()); } @@ -1355,8 +1306,7 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock *MBB) { // Mark any used register (that is not using undef) and subregs as // now live... - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI.getOperand(i); + for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue; unsigned Reg = MO.getReg(); if ((Reg == 0) || MRI.isReserved(Reg)) continue; @@ -1457,13 +1407,12 @@ public: // the subtree limit, then try to join it now since splitting subtrees is // only useful if multiple high-pressure paths are possible. unsigned InstrCount = R.DFSNodeData[SU->NodeNum].InstrCount; - for (SUnit::const_pred_iterator - PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) { - if (PI->getKind() != SDep::Data) + for (const SDep &PredDep : SU->Preds) { + if (PredDep.getKind() != SDep::Data) continue; - unsigned PredNum = PI->getSUnit()->NodeNum; + unsigned PredNum = PredDep.getSUnit()->NodeNum; if ((InstrCount - R.DFSNodeData[PredNum].InstrCount) < R.SubtreeLimit) - joinPredSubtree(*PI, SU, /*CheckLimit=*/false); + joinPredSubtree(PredDep, SU, /*CheckLimit=*/false); // Either link or merge the TreeData entry from the child to the parent. if (R.DFSNodeData[PredNum].SubtreeID == PredNum) { @@ -1505,12 +1454,11 @@ public: R.DFSTreeData.resize(SubtreeClasses.getNumClasses()); assert(SubtreeClasses.getNumClasses() == RootSet.size() && "number of roots should match trees"); - for (SparseSet<RootData>::const_iterator - RI = RootSet.begin(), RE = RootSet.end(); RI != RE; ++RI) { - unsigned TreeID = SubtreeClasses[RI->NodeID]; - if (RI->ParentNodeID != SchedDFSResult::InvalidSubtreeID) - R.DFSTreeData[TreeID].ParentTreeID = SubtreeClasses[RI->ParentNodeID]; - R.DFSTreeData[TreeID].SubInstrCount = RI->SubInstrCount; + for (const RootData &Root : RootSet) { + unsigned TreeID = SubtreeClasses[Root.NodeID]; + if (Root.ParentNodeID != SchedDFSResult::InvalidSubtreeID) + R.DFSTreeData[TreeID].ParentTreeID = SubtreeClasses[Root.ParentNodeID]; + R.DFSTreeData[TreeID].SubInstrCount = Root.SubInstrCount; // Note that SubInstrCount may be greater than InstrCount if we joined // subtrees across a cross edge. InstrCount will be attributed to the // original parent, while SubInstrCount will be attributed to the joined @@ -1524,14 +1472,12 @@ public: DEBUG(dbgs() << " SU(" << Idx << ") in tree " << R.DFSNodeData[Idx].SubtreeID << '\n'); } - for (std::vector<std::pair<const SUnit*, const SUnit*> >::const_iterator - I = ConnectionPairs.begin(), E = ConnectionPairs.end(); - I != E; ++I) { - unsigned PredTree = SubtreeClasses[I->first->NodeNum]; - unsigned SuccTree = SubtreeClasses[I->second->NodeNum]; + for (const std::pair<const SUnit*, const SUnit*> &P : ConnectionPairs) { + unsigned PredTree = SubtreeClasses[P.first->NodeNum]; + unsigned SuccTree = SubtreeClasses[P.second->NodeNum]; if (PredTree == SuccTree) continue; - unsigned Depth = I->first->getDepth(); + unsigned Depth = P.first->getDepth(); addConnection(PredTree, SuccTree, Depth); addConnection(SuccTree, PredTree, Depth); } @@ -1553,9 +1499,8 @@ protected: // Four is the magic number of successors before a node is considered a // pinch point. unsigned NumDataSucs = 0; - for (SUnit::const_succ_iterator SI = PredSU->Succs.begin(), - SE = PredSU->Succs.end(); SI != SE; ++SI) { - if (SI->getKind() == SDep::Data) { + for (const SDep &SuccDep : PredSU->Succs) { + if (SuccDep.getKind() == SDep::Data) { if (++NumDataSucs >= 4) return false; } @@ -1575,10 +1520,9 @@ protected: do { SmallVectorImpl<SchedDFSResult::Connection> &Connections = R.SubtreeConnections[FromTree]; - for (SmallVectorImpl<SchedDFSResult::Connection>::iterator - I = Connections.begin(), E = Connections.end(); I != E; ++I) { - if (I->TreeID == ToTree) { - I->Level = std::max(I->Level, Depth); + for (SchedDFSResult::Connection &C : Connections) { + if (C.TreeID == ToTree) { + C.Level = std::max(C.Level, Depth); return; } } @@ -1617,9 +1561,9 @@ public: } // anonymous static bool hasDataSucc(const SUnit *SU) { - for (SUnit::const_succ_iterator - SI = SU->Succs.begin(), SE = SU->Succs.end(); SI != SE; ++SI) { - if (SI->getKind() == SDep::Data && !SI->getSUnit()->isBoundaryNode()) + for (const SDep &SuccDep : SU->Succs) { + if (SuccDep.getKind() == SDep::Data && + !SuccDep.getSUnit()->isBoundaryNode()) return true; } return false; @@ -1632,15 +1576,13 @@ void SchedDFSResult::compute(ArrayRef<SUnit> SUnits) { llvm_unreachable("Top-down ILP metric is unimplemnted"); SchedDFSImpl Impl(*this); - for (ArrayRef<SUnit>::const_iterator - SI = SUnits.begin(), SE = SUnits.end(); SI != SE; ++SI) { - const SUnit *SU = &*SI; - if (Impl.isVisited(SU) || hasDataSucc(SU)) + for (const SUnit &SU : SUnits) { + if (Impl.isVisited(&SU) || hasDataSucc(&SU)) continue; SchedDAGReverseDFS DFS; - Impl.visitPreorder(SU); - DFS.follow(SU); + Impl.visitPreorder(&SU); + DFS.follow(&SU); for (;;) { // Traverse the leftmost path as far as possible. while (DFS.getPred() != DFS.getPredEnd()) { @@ -1676,13 +1618,11 @@ void SchedDFSResult::compute(ArrayRef<SUnit> SUnits) { /// connected to this tree, record the depth of the connection so that the /// nearest connected subtrees can be prioritized. void SchedDFSResult::scheduleTree(unsigned SubtreeID) { - for (SmallVectorImpl<Connection>::const_iterator - I = SubtreeConnections[SubtreeID].begin(), - E = SubtreeConnections[SubtreeID].end(); I != E; ++I) { - SubtreeConnectLevels[I->TreeID] = - std::max(SubtreeConnectLevels[I->TreeID], I->Level); - DEBUG(dbgs() << " Tree: " << I->TreeID - << " @" << SubtreeConnectLevels[I->TreeID] << '\n'); + for (const Connection &C : SubtreeConnections[SubtreeID]) { + SubtreeConnectLevels[C.TreeID] = + std::max(SubtreeConnectLevels[C.TreeID], C.Level); + DEBUG(dbgs() << " Tree: " << C.TreeID + << " @" << SubtreeConnectLevels[C.TreeID] << '\n'); } } diff --git a/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp index 69c4870..83bc1ba 100644 --- a/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp +++ b/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp @@ -145,7 +145,7 @@ ScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { case InstrStage::Required: // Required FUs conflict with both reserved and required ones freeUnits &= ~ReservedScoreboard[StageCycle]; - // FALLTHROUGH + LLVM_FALLTHROUGH; case InstrStage::Reserved: // Reserved FUs can conflict only with required ones. freeUnits &= ~RequiredScoreboard[StageCycle]; @@ -197,7 +197,7 @@ void ScoreboardHazardRecognizer::EmitInstruction(SUnit *SU) { case InstrStage::Required: // Required FUs conflict with both reserved and required ones freeUnits &= ~ReservedScoreboard[cycle + i]; - // FALLTHROUGH + LLVM_FALLTHROUGH; case InstrStage::Reserved: // Reserved FUs can conflict only with required ones. freeUnits &= ~RequiredScoreboard[cycle + i]; diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 5ecc6da..2c7bffe 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -16,14 +16,15 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/SelectionDAG.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" @@ -181,7 +182,7 @@ namespace { /// if things it uses can be simplified by bit propagation. /// If so, return true. bool SimplifyDemandedBits(SDValue Op) { - unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits(); + unsigned BitWidth = Op.getScalarValueSizeInBits(); APInt Demanded = APInt::getAllOnesValue(BitWidth); return SimplifyDemandedBits(Op, Demanded); } @@ -326,7 +327,7 @@ namespace { SDValue visitFADDForFMACombine(SDNode *N); SDValue visitFSUBForFMACombine(SDNode *N); - SDValue visitFMULForFMACombine(SDNode *N); + SDValue visitFMULForFMADistributiveCombine(SDNode *N); SDValue XformToShuffleWithZero(SDNode *N); SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue LHS, @@ -334,12 +335,15 @@ namespace { SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt); + SDValue foldSelectOfConstants(SDNode *N); bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS); SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N); SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2); SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, bool NotExtCompare = false); + SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1, + SDValue N2, SDValue N3, ISD::CondCode CC); SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &DL, bool foldBooleans = true); @@ -356,6 +360,7 @@ namespace { SDValue BuildSDIV(SDNode *N); SDValue BuildSDIVPow2(SDNode *N); SDValue BuildUDIV(SDNode *N); + SDValue BuildLogBase2(SDValue Op, const SDLoc &DL); SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags); SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags); SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags *Flags); @@ -374,9 +379,14 @@ namespace { SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL); SDValue ReduceLoadWidth(SDNode *N); SDValue ReduceLoadOpStoreWidth(SDNode *N); + SDValue splitMergedValStore(StoreSDNode *ST); SDValue TransformFPLoadStorePair(SDNode *N); SDValue reduceBuildVecExtToExtBuildVec(SDNode *N); SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N); + SDValue reduceBuildVecToShuffle(SDNode *N); + SDValue createBuildVecShuffle(SDLoc DL, SDNode *N, ArrayRef<int> VectorMask, + SDValue VecIn1, SDValue VecIn2, + unsigned LeftIdx); SDValue GetDemandedBits(SDValue V, const APInt &Mask); @@ -444,10 +454,11 @@ namespace { /// This is a helper function for MergeConsecutiveStores. When the source /// elements of the consecutive stores are all constants or all extracted /// vector elements, try to merge them into one larger store. - /// \return True if a merged store was created. - bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes, - EVT MemVT, unsigned NumStores, - bool IsConstantSrc, bool UseVector); + /// \return number of stores that were merged into a merged store (always + /// a prefix of \p StoreNode). + bool MergeStoresOfConstantsOrVecElts( + SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores, + bool IsConstantSrc, bool UseVector); /// This is a helper function for MergeConsecutiveStores. /// Stores that may be merged are placed in StoreNodes. @@ -464,8 +475,10 @@ namespace { /// Merge consecutive store operations into a wide store. /// This optimization uses wide integers or vectors when possible. - /// \return True if some memory operations were changed. - bool MergeConsecutiveStores(StoreSDNode *N); + /// \return number of stores that were merged into a merged store (the + /// affected nodes are stored as a prefix in \p StoreNodes). + bool MergeConsecutiveStores(StoreSDNode *N, + SmallVectorImpl<MemOpLink> &StoreNodes); /// \brief Try to transform a truncation where C is a constant: /// (trunc (and X, C)) -> (and (trunc X), (trunc C)) @@ -536,10 +549,6 @@ void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) { ((DAGCombiner*)DC)->AddToWorklist(N); } -void TargetLowering::DAGCombinerInfo::RemoveFromWorklist(SDNode *N) { - ((DAGCombiner*)DC)->removeFromWorklist(N); -} - SDValue TargetLowering::DAGCombinerInfo:: CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) { return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo); @@ -620,7 +629,8 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations, Depth + 1); case ISD::FSUB: // We can't turn -(A-B) into B-A when we honor signed zeros. - if (!Options->UnsafeFPMath) return 0; + if (!Options->UnsafeFPMath && !Op.getNode()->getFlags()->hasNoSignedZeros()) + return 0; // fold (fneg (fsub A, B)) -> (fsub B, A) return 1; @@ -683,9 +693,6 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, LegalOperations, Depth+1), Op.getOperand(0), Flags); case ISD::FSUB: - // We can't turn -(A-B) into B-A when we honor signed zeros. - assert(Options.UnsafeFPMath); - // fold (fneg (fsub 0, B)) -> B if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0))) if (N0CFP->isZero()) @@ -726,6 +733,15 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, } } +// APInts must be the same size for most operations, this helper +// function zero extends the shorter of the pair so that they match. +// We provide an Offset so that we can create bitwidths that won't overflow. +static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) { + unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth()); + LHS = LHS.zextOrSelf(Bits); + RHS = RHS.zextOrSelf(Bits); +} + // Return true if this node is a setcc, or is a select_cc // that selects between the target values used for true and false, making it // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to @@ -775,42 +791,61 @@ static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) { return nullptr; } -// \brief Returns the SDNode if it is a constant splat BuildVector or constant -// int. -static ConstantSDNode *isConstOrConstSplat(SDValue N) { - if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) - return CN; - - if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) { - BitVector UndefElements; - ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements); - - // BuildVectors can truncate their operands. Ignore that case here. - // FIXME: We blindly ignore splats which include undef which is overly - // pessimistic. - if (CN && UndefElements.none() && - CN->getValueType(0) == N.getValueType().getScalarType()) - return CN; +// Determines if it is a constant integer or a build vector of constant +// integers (and undefs). +// Do not permit build vector implicit truncation. +static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) { + if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N)) + return !(Const->isOpaque() && NoOpaques); + if (N.getOpcode() != ISD::BUILD_VECTOR) + return false; + unsigned BitWidth = N.getScalarValueSizeInBits(); + for (const SDValue &Op : N->op_values()) { + if (Op.isUndef()) + continue; + ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op); + if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth || + (Const->isOpaque() && NoOpaques)) + return false; } - - return nullptr; + return true; } -// \brief Returns the SDNode if it is a constant splat BuildVector or constant -// float. -static ConstantFPSDNode *isConstOrConstSplatFP(SDValue N) { - if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N)) - return CN; +// Determines if it is a constant null integer or a splatted vector of a +// constant null integer (with no undefs). +// Build vector implicit truncation is not an issue for null values. +static bool isNullConstantOrNullSplatConstant(SDValue N) { + if (ConstantSDNode *Splat = isConstOrConstSplat(N)) + return Splat->isNullValue(); + return false; +} - if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) { - BitVector UndefElements; - ConstantFPSDNode *CN = BV->getConstantFPSplatNode(&UndefElements); +// Determines if it is a constant integer of one or a splatted vector of a +// constant integer of one (with no undefs). +// Do not permit build vector implicit truncation. +static bool isOneConstantOrOneSplatConstant(SDValue N) { + unsigned BitWidth = N.getScalarValueSizeInBits(); + if (ConstantSDNode *Splat = isConstOrConstSplat(N)) + return Splat->isOne() && Splat->getAPIntValue().getBitWidth() == BitWidth; + return false; +} - if (CN && UndefElements.none()) - return CN; - } +// Determines if it is a constant integer of all ones or a splatted vector of a +// constant integer of all ones (with no undefs). +// Do not permit build vector implicit truncation. +static bool isAllOnesConstantOrAllOnesSplatConstant(SDValue N) { + unsigned BitWidth = N.getScalarValueSizeInBits(); + if (ConstantSDNode *Splat = isConstOrConstSplat(N)) + return Splat->isAllOnesValue() && + Splat->getAPIntValue().getBitWidth() == BitWidth; + return false; +} - return nullptr; +// Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with +// undef's. +static bool isAnyConstantBuildVector(const SDNode *N) { + return ISD::isBuildVectorOfConstantSDNodes(N) || + ISD::isBuildVectorOfConstantFPSDNodes(N); } SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0, @@ -935,9 +970,9 @@ bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) { } void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) { - SDLoc dl(Load); + SDLoc DL(Load); EVT VT = Load->getValueType(0); - SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, VT, SDValue(ExtLoad, 0)); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0)); DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); @@ -953,7 +988,7 @@ void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) { SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { Replace = false; - SDLoc dl(Op); + SDLoc DL(Op); if (ISD::isUNINDEXEDLoad(Op.getNode())) { LoadSDNode *LD = cast<LoadSDNode>(Op); EVT MemVT = LD->getMemoryVT(); @@ -962,7 +997,7 @@ SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { : ISD::EXTLOAD) : LD->getExtensionType(); Replace = true; - return DAG.getExtLoad(ExtType, dl, PVT, + return DAG.getExtLoad(ExtType, DL, PVT, LD->getChain(), LD->getBasePtr(), MemVT, LD->getMemOperand()); } @@ -971,30 +1006,30 @@ SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { switch (Opc) { default: break; case ISD::AssertSext: - return DAG.getNode(ISD::AssertSext, dl, PVT, + return DAG.getNode(ISD::AssertSext, DL, PVT, SExtPromoteOperand(Op.getOperand(0), PVT), Op.getOperand(1)); case ISD::AssertZext: - return DAG.getNode(ISD::AssertZext, dl, PVT, + return DAG.getNode(ISD::AssertZext, DL, PVT, ZExtPromoteOperand(Op.getOperand(0), PVT), Op.getOperand(1)); case ISD::Constant: { unsigned ExtOpc = Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; - return DAG.getNode(ExtOpc, dl, PVT, Op); + return DAG.getNode(ExtOpc, DL, PVT, Op); } } if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT)) return SDValue(); - return DAG.getNode(ISD::ANY_EXTEND, dl, PVT, Op); + return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op); } SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) { if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT)) return SDValue(); EVT OldVT = Op.getValueType(); - SDLoc dl(Op); + SDLoc DL(Op); bool Replace = false; SDValue NewOp = PromoteOperand(Op, PVT, Replace); if (!NewOp.getNode()) @@ -1003,13 +1038,13 @@ SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) { if (Replace) ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); - return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NewOp.getValueType(), NewOp, + return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp, DAG.getValueType(OldVT)); } SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) { EVT OldVT = Op.getValueType(); - SDLoc dl(Op); + SDLoc DL(Op); bool Replace = false; SDValue NewOp = PromoteOperand(Op, PVT, Replace); if (!NewOp.getNode()) @@ -1018,7 +1053,7 @@ SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) { if (Replace) ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); - return DAG.getZeroExtendInReg(NewOp, dl, OldVT); + return DAG.getZeroExtendInReg(NewOp, DL, OldVT); } /// Promote the specified integer binary operation if the target indicates it is @@ -1072,9 +1107,9 @@ SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) { DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG)); - SDLoc dl(Op); - return DAG.getNode(ISD::TRUNCATE, dl, VT, - DAG.getNode(Opc, dl, PVT, NN0, NN1)); + SDLoc DL(Op); + return DAG.getNode(ISD::TRUNCATE, DL, VT, + DAG.getNode(Opc, DL, PVT, NN0, NN1)); } return SDValue(); } @@ -1119,9 +1154,9 @@ SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) { DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG)); - SDLoc dl(Op); - return DAG.getNode(ISD::TRUNCATE, dl, VT, - DAG.getNode(Opc, dl, PVT, N0, Op.getOperand(1))); + SDLoc DL(Op); + return DAG.getNode(ISD::TRUNCATE, DL, VT, + DAG.getNode(Opc, DL, PVT, N0, Op.getOperand(1))); } return SDValue(); } @@ -1178,7 +1213,7 @@ bool DAGCombiner::PromoteLoad(SDValue Op) { if (TLI.IsDesirableToPromoteOp(Op, PVT)) { assert(PVT != VT && "Don't know what type to promote to!"); - SDLoc dl(Op); + SDLoc DL(Op); SDNode *N = Op.getNode(); LoadSDNode *LD = cast<LoadSDNode>(N); EVT MemVT = LD->getMemoryVT(); @@ -1186,10 +1221,10 @@ bool DAGCombiner::PromoteLoad(SDValue Op) { ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD) : LD->getExtensionType(); - SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT, + SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT, LD->getChain(), LD->getBasePtr(), MemVT, LD->getMemOperand()); - SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD); + SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD); DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); @@ -1315,7 +1350,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) { continue; assert(N->getOpcode() != ISD::DELETED_NODE && - RV.getNode()->getOpcode() != ISD::DELETED_NODE && + RV.getOpcode() != ISD::DELETED_NODE && "Node was deleted but visit returned new node!"); DEBUG(dbgs() << " ... into: "; @@ -1562,8 +1597,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { break; case ISD::TokenFactor: - if (Op.hasOneUse() && - std::find(TFs.begin(), TFs.end(), Op.getNode()) == TFs.end()) { + if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) { // Queue up for processing. TFs.push_back(Op.getNode()); // Clean up in case the token factor is removed. @@ -1571,7 +1605,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { Changed = true; break; } - // Fall thru + LLVM_FALLTHROUGH; default: // Only add if it isn't already in the list. @@ -1634,6 +1668,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N0.getValueType(); + SDLoc DL(N); // fold vector ops if (VT.isVector()) { @@ -1650,61 +1685,73 @@ SDValue DAGCombiner::visitADD(SDNode *N) { // fold (add x, undef) -> undef if (N0.isUndef()) return N0; + if (N1.isUndef()) return N1; + if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) { // canonicalize constant to RHS if (!DAG.isConstantIntBuildVectorOrConstantInt(N1)) - return DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, N0); + return DAG.getNode(ISD::ADD, DL, VT, N1, N0); // fold (add c1, c2) -> c1+c2 - return DAG.FoldConstantArithmetic(ISD::ADD, SDLoc(N), VT, - N0.getNode(), N1.getNode()); + return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(), + N1.getNode()); } + // fold (add x, 0) -> x if (isNullConstant(N1)) return N0; + // fold ((c1-A)+c2) -> (c1+c2)-A - if (ConstantSDNode *N1C = getAsNonOpaqueConstant(N1)) { + if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) { if (N0.getOpcode() == ISD::SUB) - if (ConstantSDNode *N0C = getAsNonOpaqueConstant(N0.getOperand(0))) { - SDLoc DL(N); + if (isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) { return DAG.getNode(ISD::SUB, DL, VT, - DAG.getConstant(N1C->getAPIntValue()+ - N0C->getAPIntValue(), DL, VT), + DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)), N0.getOperand(1)); } } + // reassociate add - if (SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1)) + if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1)) return RADD; + // fold ((0-A) + B) -> B-A - if (N0.getOpcode() == ISD::SUB && isNullConstant(N0.getOperand(0))) - return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, N0.getOperand(1)); + if (N0.getOpcode() == ISD::SUB && + isNullConstantOrNullSplatConstant(N0.getOperand(0))) + return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1)); + // fold (A + (0-B)) -> A-B - if (N1.getOpcode() == ISD::SUB && isNullConstant(N1.getOperand(0))) - return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1.getOperand(1)); + if (N1.getOpcode() == ISD::SUB && + isNullConstantOrNullSplatConstant(N1.getOperand(0))) + return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1)); + // fold (A+(B-A)) -> B if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1)) return N1.getOperand(0); + // fold ((B-A)+A) -> B if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1)) return N0.getOperand(0); + // fold (A+(B-(A+C))) to (B-C) if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && N0 == N1.getOperand(1).getOperand(0)) - return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0), + return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0), N1.getOperand(1).getOperand(1)); + // fold (A+(B-(C+A))) to (B-C) if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && N0 == N1.getOperand(1).getOperand(1)) - return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0), + return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0), N1.getOperand(1).getOperand(0)); + // fold (A+((B-A)+or-C)) to (B+or-C) if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) && N1.getOperand(0).getOpcode() == ISD::SUB && N0 == N1.getOperand(0).getOperand(1)) - return DAG.getNode(N1.getOpcode(), SDLoc(N), VT, - N1.getOperand(0).getOperand(0), N1.getOperand(1)); + return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0), + N1.getOperand(1)); // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) { @@ -1713,52 +1760,50 @@ SDValue DAGCombiner::visitADD(SDNode *N) { SDValue N10 = N1.getOperand(0); SDValue N11 = N1.getOperand(1); - if (isa<ConstantSDNode>(N00) || isa<ConstantSDNode>(N10)) - return DAG.getNode(ISD::SUB, SDLoc(N), VT, + if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10)) + return DAG.getNode(ISD::SUB, DL, VT, DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10), DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11)); } - if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0))) + if (SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); // fold (a+b) -> (a|b) iff a and b share no bits. if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) && - VT.isInteger() && !VT.isVector() && DAG.haveNoCommonBitsSet(N0, N1)) - return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1); + VT.isInteger() && DAG.haveNoCommonBitsSet(N0, N1)) + return DAG.getNode(ISD::OR, DL, VT, N0, N1); // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n)) if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB && - isNullConstant(N1.getOperand(0).getOperand(0))) - return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, - DAG.getNode(ISD::SHL, SDLoc(N), VT, + isNullConstantOrNullSplatConstant(N1.getOperand(0).getOperand(0))) + return DAG.getNode(ISD::SUB, DL, VT, N0, + DAG.getNode(ISD::SHL, DL, VT, N1.getOperand(0).getOperand(1), N1.getOperand(1))); if (N0.getOpcode() == ISD::SHL && N0.getOperand(0).getOpcode() == ISD::SUB && - isNullConstant(N0.getOperand(0).getOperand(0))) - return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, - DAG.getNode(ISD::SHL, SDLoc(N), VT, + isNullConstantOrNullSplatConstant(N0.getOperand(0).getOperand(0))) + return DAG.getNode(ISD::SUB, DL, VT, N1, + DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0).getOperand(1), N0.getOperand(1))); if (N1.getOpcode() == ISD::AND) { SDValue AndOp0 = N1.getOperand(0); unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0); - unsigned DestBits = VT.getScalarType().getSizeInBits(); + unsigned DestBits = VT.getScalarSizeInBits(); // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x)) // and similar xforms where the inner op is either ~0 or 0. - if (NumSignBits == DestBits && isOneConstant(N1->getOperand(1))) { - SDLoc DL(N); + if (NumSignBits == DestBits && + isOneConstantOrOneSplatConstant(N1->getOperand(1))) return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0); - } } // add (sext i1), X -> sub X, (zext i1) if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.getOperand(0).getValueType() == MVT::i1 && !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) { - SDLoc DL(N); SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)); return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt); } @@ -1767,7 +1812,6 @@ SDValue DAGCombiner::visitADD(SDNode *N) { if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) { VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1)); if (TN->getVT() == MVT::i1) { - SDLoc DL(N); SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0), DAG.getConstant(1, DL, VT)); return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt); @@ -1853,6 +1897,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N0.getValueType(); + SDLoc DL(N); // fold vector ops if (VT.isVector()) { @@ -1867,62 +1912,97 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { // fold (sub x, x) -> 0 // FIXME: Refactor this and xor and other similar operations together. if (N0 == N1) - return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes); + return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations, LegalTypes); if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && DAG.isConstantIntBuildVectorOrConstantInt(N1)) { // fold (sub c1, c2) -> c1-c2 - return DAG.FoldConstantArithmetic(ISD::SUB, SDLoc(N), VT, - N0.getNode(), N1.getNode()); + return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(), + N1.getNode()); } - ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); + ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); + // fold (sub x, c) -> (add x, -c) if (N1C) { - SDLoc DL(N); return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getConstant(-N1C->getAPIntValue(), DL, VT)); } + + if (isNullConstantOrNullSplatConstant(N0)) { + unsigned BitWidth = VT.getScalarSizeInBits(); + // Right-shifting everything out but the sign bit followed by negation is + // the same as flipping arithmetic/logical shift type without the negation: + // -(X >>u 31) -> (X >>s 31) + // -(X >>s 31) -> (X >>u 31) + if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) { + ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1)); + if (ShiftAmt && ShiftAmt->getZExtValue() == BitWidth - 1) { + auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA; + if (!LegalOperations || TLI.isOperationLegal(NewSh, VT)) + return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1)); + } + } + + // 0 - X --> 0 if the sub is NUW. + if (N->getFlags()->hasNoUnsignedWrap()) + return N0; + + if (DAG.MaskedValueIsZero(N1, ~APInt::getSignBit(BitWidth))) { + // N1 is either 0 or the minimum signed value. If the sub is NSW, then + // N1 must be 0 because negating the minimum signed value is undefined. + if (N->getFlags()->hasNoSignedWrap()) + return N0; + + // 0 - X --> X if X is 0 or the minimum signed value. + return N1; + } + } + // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) - if (isAllOnesConstant(N0)) - return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0); + if (isAllOnesConstantOrAllOnesSplatConstant(N0)) + return DAG.getNode(ISD::XOR, DL, VT, N1, N0); + // fold A-(A-B) -> B if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0)) return N1.getOperand(1); + // fold (A+B)-A -> B if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1) return N0.getOperand(1); + // fold (A+B)-B -> A if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1) return N0.getOperand(0); + // fold C2-(A+C1) -> (C2-C1)-A - ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? nullptr : - dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode()); - if (N1.getOpcode() == ISD::ADD && N0C && N1C1) { - SDLoc DL(N); - SDValue NewC = DAG.getConstant(N0C->getAPIntValue() - N1C1->getAPIntValue(), - DL, VT); - return DAG.getNode(ISD::SUB, DL, VT, NewC, - N1.getOperand(0)); + if (N1.getOpcode() == ISD::ADD) { + SDValue N11 = N1.getOperand(1); + if (isConstantOrConstantVector(N0, /* NoOpaques */ true) && + isConstantOrConstantVector(N11, /* NoOpaques */ true)) { + SDValue NewC = DAG.getNode(ISD::SUB, DL, VT, N0, N11); + return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0)); + } } + // fold ((A+(B+or-C))-B) -> A+or-C if (N0.getOpcode() == ISD::ADD && (N0.getOperand(1).getOpcode() == ISD::SUB || N0.getOperand(1).getOpcode() == ISD::ADD) && N0.getOperand(1).getOperand(0) == N1) - return DAG.getNode(N0.getOperand(1).getOpcode(), SDLoc(N), VT, - N0.getOperand(0), N0.getOperand(1).getOperand(1)); + return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0), + N0.getOperand(1).getOperand(1)); + // fold ((A+(C+B))-B) -> A+C - if (N0.getOpcode() == ISD::ADD && - N0.getOperand(1).getOpcode() == ISD::ADD && + if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD && N0.getOperand(1).getOperand(1) == N1) - return DAG.getNode(ISD::ADD, SDLoc(N), VT, - N0.getOperand(0), N0.getOperand(1).getOperand(0)); + return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), + N0.getOperand(1).getOperand(0)); + // fold ((A-(B-C))-C) -> A-B - if (N0.getOpcode() == ISD::SUB && - N0.getOperand(1).getOpcode() == ISD::SUB && + if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB && N0.getOperand(1).getOperand(1) == N1) - return DAG.getNode(ISD::SUB, SDLoc(N), VT, - N0.getOperand(0), N0.getOperand(1).getOperand(0)); + return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), + N0.getOperand(1).getOperand(0)); // If either operand of a sub is undef, the result is undef if (N0.isUndef()) @@ -1937,19 +2017,18 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (N1C && GA->getOpcode() == ISD::GlobalAddress) return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT, GA->getOffset() - - (uint64_t)N1C->getSExtValue()); + (uint64_t)N1C->getSExtValue()); // fold (sub Sym+c1, Sym+c2) -> c1-c2 if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1)) if (GA->getGlobal() == GB->getGlobal()) return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(), - SDLoc(N), VT); + DL, VT); } // sub X, (sextinreg Y i1) -> add X, (and Y 1) if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) { VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1)); if (TN->getVT() == MVT::i1) { - SDLoc DL(N); SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0), DAG.getConstant(1, DL, VT)); return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt); @@ -2048,7 +2127,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { // We require a splat of the entire scalar bit width for non-contiguous // bit patterns. bool IsFullSplat = - ConstValue1.getBitWidth() == VT.getScalarType().getSizeInBits(); + ConstValue1.getBitWidth() == VT.getScalarSizeInBits(); // fold (mul x, 1) -> x if (N1IsConst && ConstValue1 == 1 && IsFullSplat) return N0; @@ -2080,28 +2159,27 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { getShiftAmountTy(N0.getValueType())))); } - APInt Val; // (mul (shl X, c1), c2) -> (mul X, c2 << c1) - if (N1IsConst && N0.getOpcode() == ISD::SHL && - (ISD::isConstantSplatVector(N0.getOperand(1).getNode(), Val) || - isa<ConstantSDNode>(N0.getOperand(1)))) { + if (N0.getOpcode() == ISD::SHL && + isConstantOrConstantVector(N1, /* NoOpaques */ true) && + isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) { SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1)); - AddToWorklist(C3.getNode()); - return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3); + if (isConstantOrConstantVector(C3)) + return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3); } // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one // use. { SDValue Sh(nullptr, 0), Y(nullptr, 0); + // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)). if (N0.getOpcode() == ISD::SHL && - (ISD::isConstantSplatVector(N0.getOperand(1).getNode(), Val) || - isa<ConstantSDNode>(N0.getOperand(1))) && + isConstantOrConstantVector(N0.getOperand(1)) && N0.getNode()->hasOneUse()) { Sh = N0; Y = N1; } else if (N1.getOpcode() == ISD::SHL && - isa<ConstantSDNode>(N1.getOperand(1)) && + isConstantOrConstantVector(N1.getOperand(1)) && N1.getNode()->hasOneUse()) { Sh = N1; Y = N0; } @@ -2188,8 +2266,8 @@ SDValue DAGCombiner::useDivRem(SDNode *Node) { SDValue Op1 = Node->getOperand(1); SDValue combined; for (SDNode::use_iterator UI = Op0.getNode()->use_begin(), - UE = Op0.getNode()->use_end(); UI != UE; ++UI) { - SDNode *User = *UI; + UE = Op0.getNode()->use_end(); UI != UE;) { + SDNode *User = *UI++; if (User == Node || User->use_empty()) continue; // Convert the other matching node(s), too; @@ -2246,10 +2324,8 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { // If we know the sign bits of both operands are zero, strength reduce to a // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2 - if (!VT.isVector()) { - if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) - return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1); - } + if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) + return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1); // fold (sdiv X, pow2) -> simple ops after legalize // FIXME: We check for the exact bit here because the generic lowering gives @@ -2302,8 +2378,8 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { return Op; // sdiv, srem -> sdivrem - // If the divisor is constant, then return DIVREM only if isIntDivCheap() is true. - // Otherwise, we break the simplification logic in visitREM(). + // If the divisor is constant, then return DIVREM only if isIntDivCheap() is + // true. Otherwise, we break the simplification logic in visitREM(). if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr)) if (SDValue DivRem = useDivRem(N)) return DivRem; @@ -2337,25 +2413,33 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, N0C, N1C)) return Folded; + // fold (udiv x, (1 << c)) -> x >>u c - if (N1C && !N1C->isOpaque() && N1C->getAPIntValue().isPowerOf2()) - return DAG.getNode(ISD::SRL, DL, VT, N0, - DAG.getConstant(N1C->getAPIntValue().logBase2(), DL, - getShiftAmountTy(N0.getValueType()))); + if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) && + DAG.isKnownToBeAPowerOfTwo(N1)) { + SDValue LogBase2 = BuildLogBase2(N1, DL); + AddToWorklist(LogBase2.getNode()); + + EVT ShiftVT = getShiftAmountTy(N0.getValueType()); + SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT); + AddToWorklist(Trunc.getNode()); + return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc); + } // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2 if (N1.getOpcode() == ISD::SHL) { - if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) { - if (SHC->getAPIntValue().isPowerOf2()) { - EVT ADDVT = N1.getOperand(1).getValueType(); - SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, - N1.getOperand(1), - DAG.getConstant(SHC->getAPIntValue() - .logBase2(), - DL, ADDVT)); - AddToWorklist(Add.getNode()); - return DAG.getNode(ISD::SRL, DL, VT, N0, Add); - } + SDValue N10 = N1.getOperand(0); + if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) && + DAG.isKnownToBeAPowerOfTwo(N10)) { + SDValue LogBase2 = BuildLogBase2(N10, DL); + AddToWorklist(LogBase2.getNode()); + + EVT ADDVT = N1.getOperand(1).getValueType(); + SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT); + AddToWorklist(Trunc.getNode()); + SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc); + AddToWorklist(Add.getNode()); + return DAG.getNode(ISD::SRL, DL, VT, N0, Add); } } @@ -2366,8 +2450,8 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { return Op; // sdiv, srem -> sdivrem - // If the divisor is constant, then return DIVREM only if isIntDivCheap() is true. - // Otherwise, we break the simplification logic in visitREM(). + // If the divisor is constant, then return DIVREM only if isIntDivCheap() is + // true. Otherwise, we break the simplification logic in visitREM(). if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr)) if (SDValue DivRem = useDivRem(N)) return DivRem; @@ -2401,27 +2485,25 @@ SDValue DAGCombiner::visitREM(SDNode *N) { if (isSigned) { // If we know the sign bits of both operands are zero, strength reduce to a // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15 - if (!VT.isVector()) { - if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) - return DAG.getNode(ISD::UREM, DL, VT, N0, N1); - } + if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) + return DAG.getNode(ISD::UREM, DL, VT, N0, N1); } else { // fold (urem x, pow2) -> (and x, pow2-1) - if (N1C && !N1C->isNullValue() && !N1C->isOpaque() && - N1C->getAPIntValue().isPowerOf2()) { - return DAG.getNode(ISD::AND, DL, VT, N0, - DAG.getConstant(N1C->getAPIntValue() - 1, DL, VT)); + if (DAG.isKnownToBeAPowerOfTwo(N1)) { + APInt NegOne = APInt::getAllOnesValue(VT.getScalarSizeInBits()); + SDValue Add = + DAG.getNode(ISD::ADD, DL, VT, N1, DAG.getConstant(NegOne, DL, VT)); + AddToWorklist(Add.getNode()); + return DAG.getNode(ISD::AND, DL, VT, N0, Add); } // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1)) - if (N1.getOpcode() == ISD::SHL) { - ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0)); - if (SHC && SHC->getAPIntValue().isPowerOf2()) { - APInt NegOne = APInt::getAllOnesValue(VT.getSizeInBits()); - SDValue Add = - DAG.getNode(ISD::ADD, DL, VT, N1, DAG.getConstant(NegOne, DL, VT)); - AddToWorklist(Add.getNode()); - return DAG.getNode(ISD::AND, DL, VT, N0, Add); - } + if (N1.getOpcode() == ISD::SHL && + DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) { + APInt NegOne = APInt::getAllOnesValue(VT.getScalarSizeInBits()); + SDValue Add = + DAG.getNode(ISD::ADD, DL, VT, N1, DAG.getConstant(NegOne, DL, VT)); + AddToWorklist(Add.getNode()); + return DAG.getNode(ISD::AND, DL, VT, N0, Add); } } @@ -2477,8 +2559,7 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) { if (isOneConstant(N1)) { SDLoc DL(N); return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0, - DAG.getConstant(N0.getValueType().getSizeInBits() - 1, - DL, + DAG.getConstant(N0.getValueSizeInBits() - 1, DL, getShiftAmountTy(N0.getValueType()))); } // fold (mulhs x, undef) -> 0 @@ -2706,7 +2787,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { assert(N0.getOpcode() == N1.getOpcode() && "Bad input!"); // Bail early if none of these transforms apply. - if (N0.getNode()->getNumOperands() == 0) return SDValue(); + if (N0.getNumOperands() == 0) return SDValue(); // For each of OP in AND/OR/XOR: // fold (OP (zext x), (zext y)) -> (zext (OP x, y)) @@ -2872,25 +2953,34 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, LL.getValueType().isInteger()) { // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0) if (isNullConstant(LR) && Op1 == ISD::SETEQ) { - SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), - LR.getValueType(), LL, RL); - AddToWorklist(ORNode.getNode()); - return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1); + EVT CCVT = getSetCCResultType(LR.getValueType()); + if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) { + SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), + LR.getValueType(), LL, RL); + AddToWorklist(ORNode.getNode()); + return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1); + } } if (isAllOnesConstant(LR)) { // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1) if (Op1 == ISD::SETEQ) { - SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0), - LR.getValueType(), LL, RL); - AddToWorklist(ANDNode.getNode()); - return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1); + EVT CCVT = getSetCCResultType(LR.getValueType()); + if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) { + SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0), + LR.getValueType(), LL, RL); + AddToWorklist(ANDNode.getNode()); + return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1); + } } // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1) if (Op1 == ISD::SETGT) { - SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), - LR.getValueType(), LL, RL); - AddToWorklist(ORNode.getNode()); - return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1); + EVT CCVT = getSetCCResultType(LR.getValueType()); + if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) { + SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), + LR.getValueType(), LL, RL); + AddToWorklist(ORNode.getNode()); + return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1); + } } } } @@ -2899,14 +2989,17 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, Op0 == Op1 && LL.getValueType().isInteger() && Op0 == ISD::SETNE && ((isNullConstant(LR) && isAllOnesConstant(RR)) || (isAllOnesConstant(LR) && isNullConstant(RR)))) { - SDLoc DL(N0); - SDValue ADDNode = DAG.getNode(ISD::ADD, DL, LL.getValueType(), - LL, DAG.getConstant(1, DL, - LL.getValueType())); - AddToWorklist(ADDNode.getNode()); - return DAG.getSetCC(SDLoc(LocReference), VT, ADDNode, - DAG.getConstant(2, DL, LL.getValueType()), - ISD::SETUGE); + EVT CCVT = getSetCCResultType(LL.getValueType()); + if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) { + SDLoc DL(N0); + SDValue ADDNode = DAG.getNode(ISD::ADD, DL, LL.getValueType(), + LL, DAG.getConstant(1, DL, + LL.getValueType())); + AddToWorklist(ADDNode.getNode()); + return DAG.getSetCC(SDLoc(LocReference), VT, ADDNode, + DAG.getConstant(2, DL, LL.getValueType()), + ISD::SETUGE); + } } // canonicalize equivalent to ll == rl if (LL == RR && LR == RL) { @@ -2967,6 +3060,11 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, unsigned Size = VT.getSizeInBits(); const APInt &AndMask = CAnd->getAPIntValue(); unsigned ShiftBits = CShift->getZExtValue(); + + // Bail out, this node will probably disappear anyway. + if (ShiftBits == 0) + return SDValue(); + unsigned MaskBits = AndMask.countTrailingOnes(); EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2); @@ -2985,7 +3083,7 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, // extended to handle extensions mixed in. SDValue SL(N0); - assert(ShiftBits != 0 && MaskBits <= Size); + assert(MaskBits <= Size); // Extracting the highest bit of the low half. EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout()); @@ -3050,6 +3148,10 @@ SDValue DAGCombiner::visitAND(SDNode *N) { SDValue N1 = N->getOperand(1); EVT VT = N1.getValueType(); + // x & x --> x + if (N0 == N1) + return N0; + // fold vector ops if (VT.isVector()) { if (SDValue FoldedVOp = SimplifyVBinOp(N)) @@ -3058,16 +3160,12 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // fold (and x, 0) -> 0, vector edition if (ISD::isBuildVectorAllZeros(N0.getNode())) // do not return N0, because undef node may exist in N0 - return DAG.getConstant( - APInt::getNullValue( - N0.getValueType().getScalarType().getSizeInBits()), - SDLoc(N), N0.getValueType()); + return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()), + SDLoc(N), N0.getValueType()); if (ISD::isBuildVectorAllZeros(N1.getNode())) // do not return N1, because undef node may exist in N1 - return DAG.getConstant( - APInt::getNullValue( - N1.getValueType().getScalarType().getSizeInBits()), - SDLoc(N), N1.getValueType()); + return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()), + SDLoc(N), N1.getValueType()); // fold (and x, -1) -> x, vector edition if (ISD::isBuildVectorAllOnes(N0.getNode())) @@ -3078,7 +3176,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // fold (and c1, c2) -> c1&c2 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + ConstantSDNode *N1C = isConstOrConstSplat(N1); if (N0C && N1C && !N1C->isOpaque()) return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C); // canonicalize constant to RHS @@ -3089,7 +3187,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (isAllOnesConstant(N1)) return N0; // if (and x, c) is known to be zero, return 0 - unsigned BitWidth = VT.getScalarType().getSizeInBits(); + unsigned BitWidth = VT.getScalarSizeInBits(); if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnesValue(BitWidth))) return DAG.getConstant(0, SDLoc(N), VT); @@ -3098,14 +3196,14 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return RAND; // fold (and (or x, C), D) -> D if (C & D) == D if (N1C && N0.getOpcode() == ISD::OR) - if (ConstantSDNode *ORI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) + if (ConstantSDNode *ORI = isConstOrConstSplat(N0.getOperand(1))) if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue()) return N1; // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits. if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { SDValue N0Op0 = N0.getOperand(0); APInt Mask = ~N1C->getAPIntValue(); - Mask = Mask.trunc(N0Op0.getValueSizeInBits()); + Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits()); if (DAG.MaskedValueIsZero(N0Op0, Mask)) { SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N0.getValueType(), N0Op0); @@ -3156,7 +3254,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // that will apply equally to all members of the vector, so AND all the // lanes of the constant together. EVT VT = Vector->getValueType(0); - unsigned BitWidth = VT.getVectorElementType().getSizeInBits(); + unsigned BitWidth = VT.getScalarSizeInBits(); // If the splat value has been compressed to a bitlength lower // than the size of the vector lane, we need to re-expand it to @@ -3187,8 +3285,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // Resize the constant to the same size as the original memory access before // extension. If it is still the AllOnesValue then this AND is completely // unneeded. - Constant = - Constant.zextOrTrunc(Load->getMemoryVT().getScalarType().getSizeInBits()); + Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits()); bool B; switch (Load->getExtensionType()) { @@ -3230,9 +3327,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // fold (and (load x), 255) -> (zextload x, i8) // fold (and (extload x, i16), 255) -> (zextload x, i8) // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8) - if (N1C && (N0.getOpcode() == ISD::LOAD || - (N0.getOpcode() == ISD::ANY_EXTEND && - N0.getOperand(0).getOpcode() == ISD::LOAD))) { + if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD || + (N0.getOpcode() == ISD::ANY_EXTEND && + N0.getOperand(0).getOpcode() == ISD::LOAD))) { bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND; LoadSDNode *LN0 = HasAnyExt ? cast<LoadSDNode>(N0.getOperand(0)) @@ -3293,10 +3390,29 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N)) return Tmp; + // Masking the negated extension of a boolean is just the zero-extended + // boolean: + // and (sub 0, zext(bool X)), 1 --> zext(bool X) + // and (sub 0, sext(bool X)), 1 --> zext(bool X) + // + // Note: the SimplifyDemandedBits fold below can make an information-losing + // transform, and then we have no way to find this better fold. + if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) { + ConstantSDNode *SubLHS = isConstOrConstSplat(N0.getOperand(0)); + SDValue SubRHS = N0.getOperand(1); + if (SubLHS && SubLHS->isNullValue()) { + if (SubRHS.getOpcode() == ISD::ZERO_EXTEND && + SubRHS.getOperand(0).getScalarValueSizeInBits() == 1) + return SubRHS; + if (SubRHS.getOpcode() == ISD::SIGN_EXTEND && + SubRHS.getOperand(0).getScalarValueSizeInBits() == 1) + return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0)); + } + } + // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1) // fold (and (sra)) -> (and (srl)) when possible. - if (!VT.isVector() && - SimplifyDemandedBits(SDValue(N, 0))) + if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); // fold (zext_inreg (extload x)) -> (zextload x) @@ -3305,9 +3421,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) { EVT MemVT = LN0->getMemoryVT(); // If we zero all the possible extended bits, then we can turn this into // a zextload if we are running before legalize or the operation is legal. - unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits(); + unsigned BitWidth = N1.getScalarValueSizeInBits(); if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, - BitWidth - MemVT.getScalarType().getSizeInBits())) && + BitWidth - MemVT.getScalarSizeInBits())) && ((!LegalOperations && !LN0->isVolatile()) || TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, @@ -3325,9 +3441,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) { EVT MemVT = LN0->getMemoryVT(); // If we zero all the possible extended bits, then we can turn this into // a zextload if we are running before legalize or the operation is legal. - unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits(); + unsigned BitWidth = N1.getScalarValueSizeInBits(); if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, - BitWidth - MemVT.getScalarType().getSizeInBits())) && + BitWidth - MemVT.getScalarSizeInBits())) && ((!LegalOperations && !LN0->isVolatile()) || TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, @@ -3391,8 +3507,7 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, std::swap(N0, N1); if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL) return SDValue(); - if (!N0.getNode()->hasOneUse() || - !N1.getNode()->hasOneUse()) + if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse()) return SDValue(); ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); @@ -3627,18 +3742,24 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) { // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0) // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0) if (isNullConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) { - SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR), - LR.getValueType(), LL, RL); - AddToWorklist(ORNode.getNode()); - return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1); + EVT CCVT = getSetCCResultType(LR.getValueType()); + if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) { + SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR), + LR.getValueType(), LL, RL); + AddToWorklist(ORNode.getNode()); + return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1); + } } // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1) // fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1) if (isAllOnesConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) { - SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR), - LR.getValueType(), LL, RL); - AddToWorklist(ANDNode.getNode()); - return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1); + EVT CCVT = getSetCCResultType(LR.getValueType()); + if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) { + SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR), + LR.getValueType(), LL, RL); + AddToWorklist(ANDNode.getNode()); + return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1); + } } } // canonicalize equivalent to ll == rl @@ -3708,6 +3829,10 @@ SDValue DAGCombiner::visitOR(SDNode *N) { SDValue N1 = N->getOperand(1); EVT VT = N1.getValueType(); + // x | x --> x + if (N0 == N1) + return N0; + // fold vector ops if (VT.isVector()) { if (SDValue FoldedVOp = SimplifyVBinOp(N)) @@ -3723,15 +3848,13 @@ SDValue DAGCombiner::visitOR(SDNode *N) { if (ISD::isBuildVectorAllOnes(N0.getNode())) // do not return N0, because undef node may exist in N0 return DAG.getConstant( - APInt::getAllOnesValue( - N0.getValueType().getScalarType().getSizeInBits()), - SDLoc(N), N0.getValueType()); + APInt::getAllOnesValue(N0.getScalarValueSizeInBits()), SDLoc(N), + N0.getValueType()); if (ISD::isBuildVectorAllOnes(N1.getNode())) // do not return N1, because undef node may exist in N1 return DAG.getConstant( - APInt::getAllOnesValue( - N1.getValueType().getScalarType().getSizeInBits()), - SDLoc(N), N1.getValueType()); + APInt::getAllOnesValue(N1.getScalarValueSizeInBits()), SDLoc(N), + N1.getValueType()); // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask) // Do this only if the resulting shuffle is legal. @@ -4122,6 +4245,110 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { return nullptr; } +namespace { +/// Helper struct to parse and store a memory address as base + index + offset. +/// We ignore sign extensions when it is safe to do so. +/// The following two expressions are not equivalent. To differentiate we need +/// to store whether there was a sign extension involved in the index +/// computation. +/// (load (i64 add (i64 copyfromreg %c) +/// (i64 signextend (add (i8 load %index) +/// (i8 1)))) +/// vs +/// +/// (load (i64 add (i64 copyfromreg %c) +/// (i64 signextend (i32 add (i32 signextend (i8 load %index)) +/// (i32 1))))) +struct BaseIndexOffset { + SDValue Base; + SDValue Index; + int64_t Offset; + bool IsIndexSignExt; + + BaseIndexOffset() : Offset(0), IsIndexSignExt(false) {} + + BaseIndexOffset(SDValue Base, SDValue Index, int64_t Offset, + bool IsIndexSignExt) : + Base(Base), Index(Index), Offset(Offset), IsIndexSignExt(IsIndexSignExt) {} + + bool equalBaseIndex(const BaseIndexOffset &Other) { + return Other.Base == Base && Other.Index == Index && + Other.IsIndexSignExt == IsIndexSignExt; + } + + /// Parses tree in Ptr for base, index, offset addresses. + static BaseIndexOffset match(SDValue Ptr, SelectionDAG &DAG, + int64_t PartialOffset = 0) { + bool IsIndexSignExt = false; + + // Split up a folded GlobalAddress+Offset into its component parts. + if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Ptr)) + if (GA->getOpcode() == ISD::GlobalAddress && GA->getOffset() != 0) { + return BaseIndexOffset(DAG.getGlobalAddress(GA->getGlobal(), + SDLoc(GA), + GA->getValueType(0), + /*Offset=*/PartialOffset, + /*isTargetGA=*/false, + GA->getTargetFlags()), + SDValue(), + GA->getOffset(), + IsIndexSignExt); + } + + // We only can pattern match BASE + INDEX + OFFSET. If Ptr is not an ADD + // instruction, then it could be just the BASE or everything else we don't + // know how to handle. Just use Ptr as BASE and give up. + if (Ptr->getOpcode() != ISD::ADD) + return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt); + + // We know that we have at least an ADD instruction. Try to pattern match + // the simple case of BASE + OFFSET. + if (isa<ConstantSDNode>(Ptr->getOperand(1))) { + int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue(); + return match(Ptr->getOperand(0), DAG, Offset + PartialOffset); + } + + // Inside a loop the current BASE pointer is calculated using an ADD and a + // MUL instruction. In this case Ptr is the actual BASE pointer. + // (i64 add (i64 %array_ptr) + // (i64 mul (i64 %induction_var) + // (i64 %element_size))) + if (Ptr->getOperand(1)->getOpcode() == ISD::MUL) + return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt); + + // Look at Base + Index + Offset cases. + SDValue Base = Ptr->getOperand(0); + SDValue IndexOffset = Ptr->getOperand(1); + + // Skip signextends. + if (IndexOffset->getOpcode() == ISD::SIGN_EXTEND) { + IndexOffset = IndexOffset->getOperand(0); + IsIndexSignExt = true; + } + + // Either the case of Base + Index (no offset) or something else. + if (IndexOffset->getOpcode() != ISD::ADD) + return BaseIndexOffset(Base, IndexOffset, PartialOffset, IsIndexSignExt); + + // Now we have the case of Base + Index + offset. + SDValue Index = IndexOffset->getOperand(0); + SDValue Offset = IndexOffset->getOperand(1); + + if (!isa<ConstantSDNode>(Offset)) + return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt); + + // Ignore signextends. + if (Index->getOpcode() == ISD::SIGN_EXTEND) { + Index = Index->getOperand(0); + IsIndexSignExt = true; + } else IsIndexSignExt = false; + + int64_t Off = cast<ConstantSDNode>(Offset)->getSExtValue(); + return BaseIndexOffset(Base, Index, Off + PartialOffset, IsIndexSignExt); + } +}; +} // namespace + SDValue DAGCombiner::visitXOR(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -4317,16 +4544,20 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) { ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1)); if (!BinOpCst) return SDValue(); - // FIXME: disable this unless the input to the binop is a shift by a constant. - // If it is not a shift, it pessimizes some common cases like: - // - // void foo(int *X, int i) { X[i & 1235] = 1; } - // int bar(int *X, int i) { return X[i & 255]; } + // FIXME: disable this unless the input to the binop is a shift by a constant + // or is copy/select.Enable this in other cases when figure out it's exactly profitable. SDNode *BinOpLHSVal = LHS->getOperand(0).getNode(); - if ((BinOpLHSVal->getOpcode() != ISD::SHL && - BinOpLHSVal->getOpcode() != ISD::SRA && - BinOpLHSVal->getOpcode() != ISD::SRL) || - !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))) + bool isShift = BinOpLHSVal->getOpcode() == ISD::SHL || + BinOpLHSVal->getOpcode() == ISD::SRA || + BinOpLHSVal->getOpcode() == ISD::SRL; + bool isCopyOrSelect = BinOpLHSVal->getOpcode() == ISD::CopyFromReg || + BinOpLHSVal->getOpcode() == ISD::SELECT; + + if ((!isShift || !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))) && + !isCopyOrSelect) + return SDValue(); + + if (isCopyOrSelect && N->hasOneUse()) return SDValue(); EVT VT = N->getValueType(0); @@ -4366,19 +4597,15 @@ SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) { // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC) if (N->hasOneUse() && N->getOperand(0).hasOneUse()) { SDValue N01 = N->getOperand(0).getOperand(1); - - if (ConstantSDNode *N01C = isConstOrConstSplat(N01)) { - if (!N01C->isOpaque()) { - EVT TruncVT = N->getValueType(0); - SDValue N00 = N->getOperand(0).getOperand(0); - APInt TruncC = N01C->getAPIntValue(); - TruncC = TruncC.trunc(TruncVT.getScalarSizeInBits()); - SDLoc DL(N); - - return DAG.getNode(ISD::AND, DL, TruncVT, - DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00), - DAG.getConstant(TruncC, DL, TruncVT)); - } + if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) { + SDLoc DL(N); + EVT TruncVT = N->getValueType(0); + SDValue N00 = N->getOperand(0).getOperand(0); + SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00); + SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01); + AddToWorklist(Trunc00.getNode()); + AddToWorklist(Trunc01.getNode()); + return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01); } } @@ -4404,7 +4631,6 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { unsigned OpSizeInBits = VT.getScalarSizeInBits(); // fold vector ops - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); if (VT.isVector()) { if (SDValue FoldedVOp = SimplifyVBinOp(N)) return FoldedVOp; @@ -4425,12 +4651,12 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { N01CV, N1CV)) return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C); } - } else { - N1C = isConstOrConstSplat(N1); } } } + ConstantSDNode *N1C = isConstOrConstSplat(N1); + // fold (shl c1, c2) -> c1<<c2 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); if (N0C && N1C && !N1C->isOpaque()) @@ -4464,13 +4690,18 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2)) if (N1C && N0.getOpcode() == ISD::SHL) { if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { - uint64_t c1 = N0C1->getZExtValue(); - uint64_t c2 = N1C->getZExtValue(); SDLoc DL(N); - if (c1 + c2 >= OpSizeInBits) + APInt c1 = N0C1->getAPIntValue(); + APInt c2 = N1C->getAPIntValue(); + zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); + + APInt Sum = c1 + c2; + if (Sum.uge(OpSizeInBits)) return DAG.getConstant(0, DL, VT); - return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), - DAG.getConstant(c1 + c2, DL, N1.getValueType())); + + return DAG.getNode( + ISD::SHL, DL, VT, N0.getOperand(0), + DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType())); } } @@ -4485,18 +4716,22 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { N0.getOperand(0).getOpcode() == ISD::SHL) { SDValue N0Op0 = N0.getOperand(0); if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) { - uint64_t c1 = N0Op0C1->getZExtValue(); - uint64_t c2 = N1C->getZExtValue(); + APInt c1 = N0Op0C1->getAPIntValue(); + APInt c2 = N1C->getAPIntValue(); + zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); + EVT InnerShiftVT = N0Op0.getValueType(); uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits(); - if (c2 >= OpSizeInBits - InnerShiftSize) { + if (c2.uge(OpSizeInBits - InnerShiftSize)) { SDLoc DL(N0); - if (c1 + c2 >= OpSizeInBits) + APInt Sum = c1 + c2; + if (Sum.uge(OpSizeInBits)) return DAG.getConstant(0, DL, VT); - return DAG.getNode(ISD::SHL, DL, VT, - DAG.getNode(N0.getOpcode(), DL, VT, - N0Op0->getOperand(0)), - DAG.getConstant(c1 + c2, DL, N1.getValueType())); + + return DAG.getNode( + ISD::SHL, DL, VT, + DAG.getNode(N0.getOpcode(), DL, VT, N0Op0->getOperand(0)), + DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType())); } } } @@ -4508,8 +4743,8 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { N0.getOperand(0).getOpcode() == ISD::SRL) { SDValue N0Op0 = N0.getOperand(0); if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) { - uint64_t c1 = N0Op0C1->getZExtValue(); - if (c1 < VT.getScalarSizeInBits()) { + if (N0Op0C1->getAPIntValue().ult(VT.getScalarSizeInBits())) { + uint64_t c1 = N0Op0C1->getZExtValue(); uint64_t c2 = N1C->getZExtValue(); if (c1 == c2) { SDValue NewOp0 = N0.getOperand(0); @@ -4569,37 +4804,37 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { } } } + // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1)) - if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) { + if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) && + isConstantOrConstantVector(N1, /* No Opaques */ true)) { unsigned BitSize = VT.getScalarSizeInBits(); SDLoc DL(N); - SDValue HiBitsMask = - DAG.getConstant(APInt::getHighBitsSet(BitSize, - BitSize - N1C->getZExtValue()), - DL, VT); - return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), - HiBitsMask); + SDValue AllBits = DAG.getConstant(APInt::getAllOnesValue(BitSize), DL, VT); + SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1); + return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask); } // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2) // Variant of version done on multiply, except mul by a power of 2 is turned // into a shift. - APInt Val; - if (N1C && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() && - (isa<ConstantSDNode>(N0.getOperand(1)) || - ISD::isConstantSplatVector(N0.getOperand(1).getNode(), Val))) { + if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() && + isConstantOrConstantVector(N1, /* No Opaques */ true) && + isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) { SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1); SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1); + AddToWorklist(Shl0.getNode()); + AddToWorklist(Shl1.getNode()); return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1); } // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2) - if (N1C && N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse()) { - if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { - if (SDValue Folded = - DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, N0C1, N1C)) - return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Folded); - } + if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() && + isConstantOrConstantVector(N1, /* No Opaques */ true) && + isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) { + SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1); + if (isConstantOrConstantVector(Shl)) + return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl); } if (N1C && !N1C->isOpaque()) @@ -4613,16 +4848,18 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N0.getValueType(); - unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); + unsigned OpSizeInBits = VT.getScalarSizeInBits(); + + // Arithmetic shifting an all-sign-bit value is a no-op. + if (DAG.ComputeNumSignBits(N0) == OpSizeInBits) + return N0; // fold vector ops - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); - if (VT.isVector()) { + if (VT.isVector()) if (SDValue FoldedVOp = SimplifyVBinOp(N)) return FoldedVOp; - N1C = isConstOrConstSplat(N1); - } + ConstantSDNode *N1C = isConstOrConstSplat(N1); // fold (sra c1, c2) -> (sra c1, c2) ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); @@ -4634,8 +4871,8 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { // fold (sra -1, x) -> -1 if (isAllOnesConstant(N0)) return N0; - // fold (sra x, (setge c, size(x))) -> undef - if (N1C && N1C->getZExtValue() >= OpSizeInBits) + // fold (sra x, c >= size(x)) -> undef + if (N1C && N1C->getAPIntValue().uge(OpSizeInBits)) return DAG.getUNDEF(VT); // fold (sra x, 0) -> x if (N1C && N1C->isNullValue()) @@ -4656,13 +4893,19 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2)) if (N1C && N0.getOpcode() == ISD::SRA) { - if (ConstantSDNode *C1 = isConstOrConstSplat(N0.getOperand(1))) { - unsigned Sum = N1C->getZExtValue() + C1->getZExtValue(); - if (Sum >= OpSizeInBits) - Sum = OpSizeInBits - 1; + if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { SDLoc DL(N); - return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), - DAG.getConstant(Sum, DL, N1.getValueType())); + APInt c1 = N0C1->getAPIntValue(); + APInt c2 = N1C->getAPIntValue(); + zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); + + APInt Sum = c1 + c2; + if (Sum.uge(OpSizeInBits)) + Sum = APInt(OpSizeInBits, OpSizeInBits - 1); + + return DAG.getNode( + ISD::SRA, DL, VT, N0.getOperand(0), + DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType())); } } @@ -4759,16 +5002,14 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N0.getValueType(); - unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); + unsigned OpSizeInBits = VT.getScalarSizeInBits(); // fold vector ops - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); - if (VT.isVector()) { + if (VT.isVector()) if (SDValue FoldedVOp = SimplifyVBinOp(N)) return FoldedVOp; - N1C = isConstOrConstSplat(N1); - } + ConstantSDNode *N1C = isConstOrConstSplat(N1); // fold (srl c1, c2) -> c1 >>u c2 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); @@ -4778,7 +5019,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (isNullConstant(N0)) return N0; // fold (srl x, c >= size(x)) -> undef - if (N1C && N1C->getZExtValue() >= OpSizeInBits) + if (N1C && N1C->getAPIntValue().uge(OpSizeInBits)) return DAG.getUNDEF(VT); // fold (srl x, 0) -> x if (N1C && N1C->isNullValue()) @@ -4790,14 +5031,19 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2)) if (N1C && N0.getOpcode() == ISD::SRL) { - if (ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1))) { - uint64_t c1 = N01C->getZExtValue(); - uint64_t c2 = N1C->getZExtValue(); + if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { SDLoc DL(N); - if (c1 + c2 >= OpSizeInBits) + APInt c1 = N0C1->getAPIntValue(); + APInt c2 = N1C->getAPIntValue(); + zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); + + APInt Sum = c1 + c2; + if (Sum.uge(OpSizeInBits)) return DAG.getConstant(0, DL, VT); - return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), - DAG.getConstant(c1 + c2, DL, N1.getValueType())); + + return DAG.getNode( + ISD::SRL, DL, VT, N0.getOperand(0), + DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType())); } } @@ -4810,7 +5056,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { uint64_t c2 = N1C->getZExtValue(); EVT InnerShiftVT = N0.getOperand(0).getValueType(); EVT ShiftCountVT = N0.getOperand(0)->getOperand(1).getValueType(); - uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits(); + uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits(); // This is only valid if the OpSizeInBits + c1 = size of inner shift. if (c1 + OpSizeInBits == InnerShiftSize) { SDLoc DL(N0); @@ -4825,14 +5071,14 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { } // fold (srl (shl x, c), c) -> (and x, cst2) - if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1) { - unsigned BitSize = N0.getScalarValueSizeInBits(); - if (BitSize <= 64) { - uint64_t ShAmt = N1C->getZExtValue() + 64 - BitSize; - SDLoc DL(N); - return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), - DAG.getConstant(~0ULL >> ShAmt, DL, VT)); - } + if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 && + isConstantOrConstantVector(N1, /* NoOpaques */ true)) { + SDLoc DL(N); + APInt AllBits = APInt::getAllOnesValue(N0.getScalarValueSizeInBits()); + SDValue Mask = + DAG.getNode(ISD::SRL, DL, VT, DAG.getConstant(AllBits, DL, VT), N1); + AddToWorklist(Mask.getNode()); + return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask); } // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask) @@ -5065,6 +5311,41 @@ static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS, } } +// TODO: We should handle other cases of selecting between {-1,0,1} here. +SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) { + SDValue Cond = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + EVT VT = N->getValueType(0); + EVT CondVT = Cond.getValueType(); + SDLoc DL(N); + + // fold (select Cond, 0, 1) -> (xor Cond, 1) + // We can't do this reliably if integer based booleans have different contents + // to floating point based booleans. This is because we can't tell whether we + // have an integer-based boolean or a floating-point-based boolean unless we + // can find the SETCC that produced it and inspect its operands. This is + // fairly easy if C is the SETCC node, but it can potentially be + // undiscoverable (or not reasonably discoverable). For example, it could be + // in another basic block or it could require searching a complicated + // expression. + if (VT.isInteger() && + (CondVT == MVT::i1 || (CondVT.isInteger() && + TLI.getBooleanContents(false, true) == + TargetLowering::ZeroOrOneBooleanContent && + TLI.getBooleanContents(false, false) == + TargetLowering::ZeroOrOneBooleanContent)) && + isNullConstant(N1) && isOneConstant(N2)) { + SDValue NotCond = DAG.getNode(ISD::XOR, DL, CondVT, Cond, + DAG.getConstant(1, DL, CondVT)); + if (VT.bitsEq(CondVT)) + return NotCond; + return DAG.getZExtOrTrunc(NotCond, DL, VT); + } + + return SDValue(); +} + SDValue DAGCombiner::visitSELECT(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -5080,39 +5361,14 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { // fold (select false, X, Y) -> Y return !N0C->isNullValue() ? N1 : N2; } - // fold (select C, 1, X) -> (or C, X) - if (VT == MVT::i1 && isOneConstant(N1)) + // fold (select X, X, Y) -> (or X, Y) + // fold (select X, 1, Y) -> (or C, Y) + if (VT == VT0 && VT == MVT::i1 && (N0 == N1 || isOneConstant(N1))) return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2); - // fold (select C, 0, 1) -> (xor C, 1) - // We can't do this reliably if integer based booleans have different contents - // to floating point based booleans. This is because we can't tell whether we - // have an integer-based boolean or a floating-point-based boolean unless we - // can find the SETCC that produced it and inspect its operands. This is - // fairly easy if C is the SETCC node, but it can potentially be - // undiscoverable (or not reasonably discoverable). For example, it could be - // in another basic block or it could require searching a complicated - // expression. - if (VT.isInteger() && - (VT0 == MVT::i1 || (VT0.isInteger() && - TLI.getBooleanContents(false, false) == - TLI.getBooleanContents(false, true) && - TLI.getBooleanContents(false, false) == - TargetLowering::ZeroOrOneBooleanContent)) && - isNullConstant(N1) && isOneConstant(N2)) { - SDValue XORNode; - if (VT == VT0) { - SDLoc DL(N); - return DAG.getNode(ISD::XOR, DL, VT0, - N0, DAG.getConstant(1, DL, VT0)); - } - SDLoc DL0(N0); - XORNode = DAG.getNode(ISD::XOR, DL0, VT0, - N0, DAG.getConstant(1, DL0, VT0)); - AddToWorklist(XORNode.getNode()); - if (VT.bitsGT(VT0)) - return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, XORNode); - return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, XORNode); - } + + if (SDValue V = foldSelectOfConstants(N)) + return V; + // fold (select C, 0, X) -> (and (not C), X) if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) { SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT); @@ -5125,16 +5381,9 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { AddToWorklist(NOTNode.getNode()); return DAG.getNode(ISD::OR, SDLoc(N), VT, NOTNode, N1); } - // fold (select C, X, 0) -> (and C, X) - if (VT == MVT::i1 && isNullConstant(N2)) - return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1); - // fold (select X, X, Y) -> (or X, Y) - // fold (select X, 1, Y) -> (or X, Y) - if (VT == MVT::i1 && (N0 == N1 || isOneConstant(N1))) - return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2); // fold (select X, Y, X) -> (and X, Y) // fold (select X, Y, 0) -> (and X, Y) - if (VT == MVT::i1 && (N0 == N2 || isNullConstant(N2))) + if (VT == VT0 && VT == MVT::i1 && (N0 == N2 || isNullConstant(N2))) return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1); // If we can fold this based on the true/false value, do so. @@ -5145,7 +5394,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { // The code in this block deals with the following 2 equivalences: // select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y)) // select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y) - // The target can specify its prefered form with the + // The target can specify its preferred form with the // shouldNormalizeToSelectSequence() callback. However we always transform // to the right anyway if we find the inner select exists in the DAG anyway // and we always transform to the left side if we know that we can further @@ -5214,6 +5463,18 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { } } + // select (xor Cond, 1), X, Y -> select Cond, Y, X + if (VT0 == MVT::i1) { + if (N0->getOpcode() == ISD::XOR) { + if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1))) { + SDValue Cond0 = N0->getOperand(0); + if (C->isOne()) + return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), + Cond0, N2, N1); + } + } + } + // fold selects based on a setcc into other things, such as min/max/abs if (N0.getOpcode() == ISD::SETCC) { // select x, y (fcmp lt x, y) -> fminnum x, y @@ -5269,7 +5530,7 @@ std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) { // This function assumes all the vselect's arguments are CONCAT_VECTOR // nodes and that the condition is a BV of ConstantSDNodes (or undefs). static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) { - SDLoc dl(N); + SDLoc DL(N); SDValue Cond = N->getOperand(0); SDValue LHS = N->getOperand(1); SDValue RHS = N->getOperand(2); @@ -5316,7 +5577,7 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) { "One half of the selector was all UNDEFs and the other was all the " "same value. This should have been addressed before this function."); return DAG.getNode( - ISD::CONCAT_VECTORS, dl, VT, + ISD::CONCAT_VECTORS, DL, VT, BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0), TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1)); } @@ -5390,6 +5651,7 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) { MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N); SDValue Mask = MST->getMask(); SDValue Data = MST->getValue(); + EVT VT = Data.getValueType(); SDLoc DL(N); // If the MSTORE data type requires splitting and the mask is provided by a @@ -5399,16 +5661,13 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) { if (Mask.getOpcode() == ISD::SETCC) { // Check if any splitting is required. - if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) != + if (TLI.getTypeAction(*DAG.getContext(), VT) != TargetLowering::TypeSplitVector) return SDValue(); SDValue MaskLo, MaskHi, Lo, Hi; std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); - EVT LoVT, HiVT; - std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MST->getValueType(0)); - SDValue Chain = MST->getChain(); SDValue Ptr = MST->getBasePtr(); @@ -5418,8 +5677,7 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) { // if Alignment is equal to the vector size, // take the half of it for the second part unsigned SecondHalfAlignment = - (Alignment == Data->getValueType(0).getSizeInBits()/8) ? - Alignment/2 : Alignment; + (Alignment == VT.getSizeInBits() / 8) ? Alignment / 2 : Alignment; EVT LoMemVT, HiMemVT; std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); @@ -5433,11 +5691,11 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) { Alignment, MST->getAAInfo(), MST->getRanges()); Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO, - MST->isTruncatingStore()); + MST->isTruncatingStore(), + MST->isCompressingStore()); - unsigned IncrementSize = LoMemVT.getSizeInBits()/8; - Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, DL, Ptr.getValueType())); + Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG, + MST->isCompressingStore()); MMO = DAG.getMachineFunction(). getMachineMemOperand(MST->getPointerInfo(), @@ -5446,7 +5704,8 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) { MST->getRanges()); Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO, - MST->isTruncatingStore()); + MST->isTruncatingStore(), + MST->isCompressingStore()); AddToWorklist(Lo.getNode()); AddToWorklist(Hi.getNode()); @@ -5585,11 +5844,10 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) { Alignment, MLD->getAAInfo(), MLD->getRanges()); Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, LoMemVT, MMO, - ISD::NON_EXTLOAD); + ISD::NON_EXTLOAD, MLD->isExpandingLoad()); - unsigned IncrementSize = LoMemVT.getSizeInBits()/8; - Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, DL, Ptr.getValueType())); + Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG, + MLD->isExpandingLoad()); MMO = DAG.getMachineFunction(). getMachineMemOperand(MLD->getPointerInfo(), @@ -5597,7 +5855,7 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) { SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges()); Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO, - ISD::NON_EXTLOAD); + ISD::NON_EXTLOAD, MLD->isExpandingLoad()); AddToWorklist(Lo.getNode()); AddToWorklist(Hi.getNode()); @@ -5625,6 +5883,10 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { SDValue N2 = N->getOperand(2); SDLoc DL(N); + // fold (vselect C, X, X) -> X + if (N1 == N2) + return N1; + // Canonicalize integer abs. // vselect (setg[te] X, 0), X, -X -> // vselect (setgt X, -1), X, -X -> @@ -5648,7 +5910,7 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { EVT VT = LHS.getValueType(); SDValue Shift = DAG.getNode( ISD::SRA, DL, VT, LHS, - DAG.getConstant(VT.getScalarType().getSizeInBits() - 1, DL, VT)); + DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT)); SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift); AddToWorklist(Shift.getNode()); AddToWorklist(Add.getNode()); @@ -5803,7 +6065,7 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, // We can fold this node into a build_vector. unsigned VTBits = SVT.getSizeInBits(); - unsigned EVTBits = N0->getValueType(0).getScalarType().getSizeInBits(); + unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits(); SmallVector<SDValue, 8> Elts; unsigned NumElts = VT.getVectorNumElements(); SDLoc DL(N); @@ -6026,7 +6288,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // fold (sext (truncate (load x))) -> (sext (smaller load x)) // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n))) if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) { - SDNode* oye = N0.getNode()->getOperand(0).getNode(); + SDNode *oye = N0.getOperand(0).getNode(); if (NarrowLoad.getNode() != N0.getNode()) { CombineTo(N0.getNode(), NarrowLoad); // CombineTo deleted the truncate, if needed, but not what's under it. @@ -6038,9 +6300,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // See if the value being truncated is already sign extended. If so, just // eliminate the trunc/sext pair. SDValue Op = N0.getOperand(0); - unsigned OpBits = Op.getValueType().getScalarType().getSizeInBits(); - unsigned MidBits = N0.getValueType().getScalarType().getSizeInBits(); - unsigned DestBits = VT.getScalarType().getSizeInBits(); + unsigned OpBits = Op.getScalarValueSizeInBits(); + unsigned MidBits = N0.getScalarValueSizeInBits(); + unsigned DestBits = VT.getScalarSizeInBits(); unsigned NumSignBits = DAG.ComputeNumSignBits(Op); if (OpBits == DestBits) { @@ -6201,7 +6463,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0) // Here, T can be 1 or -1, depending on the type of the setcc and // getBooleanContents(). - unsigned SetCCWidth = N0.getValueType().getScalarSizeInBits(); + unsigned SetCCWidth = N0.getScalarValueSizeInBits(); SDLoc DL(N); // To determine the "true" side of the select, we need to know the high bit @@ -6323,7 +6585,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n))) if (N0.getOpcode() == ISD::TRUNCATE) { if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) { - SDNode* oye = N0.getNode()->getOperand(0).getNode(); + SDNode *oye = N0.getOperand(0).getNode(); if (NarrowLoad.getNode() != N0.getNode()) { CombineTo(N0.getNode(), NarrowLoad); // CombineTo deleted the truncate, if needed, but not what's under it. @@ -6338,7 +6600,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { // fold (zext (truncate (load x))) -> (zext (smaller load x)) // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n))) if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) { - SDNode *oye = N0.getNode()->getOperand(0).getNode(); + SDNode *oye = N0.getOperand(0).getNode(); if (NarrowLoad.getNode() != N0.getNode()) { CombineTo(N0.getNode(), NarrowLoad); // CombineTo deleted the truncate, if needed, but not what's under it. @@ -6528,7 +6790,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { // elements we can use a matching integer vector type and then // truncate/sign extend. EVT MatchingElementType = EVT::getIntegerVT( - *DAG.getContext(), N00VT.getScalarType().getSizeInBits()); + *DAG.getContext(), N00VT.getScalarSizeInBits()); EVT MatchingVectorType = EVT::getVectorVT( *DAG.getContext(), MatchingElementType, N00VT.getVectorNumElements()); SDValue VsetCC = @@ -6558,8 +6820,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { SDValue InnerZExt = N0.getOperand(0); // If the original shl may be shifting out bits, do not perform this // transformation. - unsigned KnownZeroBits = InnerZExt.getValueType().getSizeInBits() - - InnerZExt.getOperand(0).getValueType().getSizeInBits(); + unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() - + InnerZExt.getOperand(0).getValueSizeInBits(); if (ShAmtVal > KnownZeroBits) return SDValue(); } @@ -6598,7 +6860,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n))) if (N0.getOpcode() == ISD::TRUNCATE) { if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) { - SDNode* oye = N0.getNode()->getOperand(0).getNode(); + SDNode *oye = N0.getOperand(0).getNode(); if (NarrowLoad.getNode() != N0.getNode()) { CombineTo(N0.getNode(), NarrowLoad); // CombineTo deleted the truncate, if needed, but not what's under it. @@ -6625,15 +6887,15 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { N0.getOperand(1).getOpcode() == ISD::Constant && !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(), N0.getValueType())) { + SDLoc DL(N); SDValue X = N0.getOperand(0).getOperand(0); if (X.getValueType().bitsLT(VT)) { - X = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, X); + X = DAG.getNode(ISD::ANY_EXTEND, DL, VT, X); } else if (X.getValueType().bitsGT(VT)) { - X = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, X); + X = DAG.getNode(ISD::TRUNCATE, DL, VT, X); } APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); Mask = Mask.zext(VT.getSizeInBits()); - SDLoc DL(N); return DAG.getNode(ISD::AND, DL, VT, X, DAG.getConstant(Mask, DL, VT)); } @@ -6820,7 +7082,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { if ((ShAmt & (EVTBits-1)) == 0) { N0 = N0.getOperand(0); // Is the load width a multiple of size of VT? - if ((N0.getValueType().getSizeInBits() & (EVTBits-1)) != 0) + if ((N0.getValueSizeInBits() & (EVTBits-1)) != 0) return SDValue(); } @@ -6952,8 +7214,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); EVT EVT = cast<VTSDNode>(N1)->getVT(); - unsigned VTBits = VT.getScalarType().getSizeInBits(); - unsigned EVTBits = EVT.getScalarType().getSizeInBits(); + unsigned VTBits = VT.getScalarSizeInBits(); + unsigned EVTBits = EVT.getScalarSizeInBits(); if (N0.isUndef()) return DAG.getUNDEF(VT); @@ -6977,14 +7239,23 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { // if x is small enough. if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) { SDValue N00 = N0.getOperand(0); - if (N00.getValueType().getScalarType().getSizeInBits() <= EVTBits && + if (N00.getScalarValueSizeInBits() <= EVTBits && + (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT))) + return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1); + } + + // fold (sext_in_reg (zext x)) -> (sext x) + // iff we are extending the source sign bit. + if (N0.getOpcode() == ISD::ZERO_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (N00.getScalarValueSizeInBits() == EVTBits && (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT))) return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1); } // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero. if (DAG.MaskedValueIsZero(N0, APInt::getBitsSet(VTBits, EVTBits-1, EVTBits))) - return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT); + return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT.getScalarType()); // fold operands of sext_in_reg based on knowledge that the top bits are not // demanded. @@ -7111,6 +7382,10 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { return N0.getOperand(0); } + // If this is anyext(trunc), don't fold it, allow ourselves to be folded. + if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND)) + return SDValue(); + // Fold extract-and-trunc into a narrow extract. For example: // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1) // i32 y = TRUNCATE(i64 x) @@ -7148,7 +7423,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { } // trunc (select c, a, b) -> select c, (trunc a), (trunc b) - if (N0.getOpcode() == ISD::SELECT) { + if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) { EVT SrcVT = N0.getValueType(); if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) && TLI.isTruncateFree(SrcVT, VT)) { @@ -7160,15 +7435,15 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { } } - // trunc (shl x, K) -> shl (trunc x), K => K < vt.size / 2 + // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits() if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() && (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) && TLI.isTypeDesirableForOp(ISD::SHL, VT)) { if (const ConstantSDNode *CAmt = isConstOrConstSplat(N0.getOperand(1))) { uint64_t Amt = CAmt->getZExtValue(); - unsigned Size = VT.getSizeInBits(); + unsigned Size = VT.getScalarSizeInBits(); - if (Amt < Size / 2) { + if (Amt < Size) { SDLoc SL(N); EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); @@ -7525,7 +7800,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(0)) && VT.isInteger() && !VT.isVector()) { - unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits(); + unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits(); EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth); if (isTypeLegal(IntXVT)) { SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1)); @@ -7848,10 +8123,14 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { } // More folding opportunities when target permits. - if ((AllowFusion || HasFMAD) && Aggressive) { + if (Aggressive) { // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z)) - if (N0.getOpcode() == PreferredFusedOpcode && - N0.getOperand(2).getOpcode() == ISD::FMUL) { + // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF + // are currently only supported on binary nodes. + if (Options.UnsafeFPMath && + N0.getOpcode() == PreferredFusedOpcode && + N0.getOperand(2).getOpcode() == ISD::FMUL && + N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) { return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1), DAG.getNode(PreferredFusedOpcode, SL, VT, @@ -7861,8 +8140,12 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { } // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x)) - if (N1->getOpcode() == PreferredFusedOpcode && - N1.getOperand(2).getOpcode() == ISD::FMUL) { + // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF + // are currently only supported on binary nodes. + if (Options.UnsafeFPMath && + N1->getOpcode() == PreferredFusedOpcode && + N1.getOperand(2).getOpcode() == ISD::FMUL && + N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) { return DAG.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0), N1.getOperand(1), DAG.getNode(PreferredFusedOpcode, SL, VT, @@ -8090,11 +8373,15 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { } // More folding opportunities when target permits. - if ((AllowFusion || HasFMAD) && Aggressive) { + if (Aggressive) { // fold (fsub (fma x, y, (fmul u, v)), z) // -> (fma x, y (fma u, v, (fneg z))) - if (N0.getOpcode() == PreferredFusedOpcode && - N0.getOperand(2).getOpcode() == ISD::FMUL) { + // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF + // are currently only supported on binary nodes. + if (Options.UnsafeFPMath && + N0.getOpcode() == PreferredFusedOpcode && + N0.getOperand(2).getOpcode() == ISD::FMUL && + N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) { return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1), DAG.getNode(PreferredFusedOpcode, SL, VT, @@ -8106,7 +8393,10 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { // fold (fsub x, (fma y, z, (fmul u, v))) // -> (fma (fneg y), z, (fma (fneg u), v, x)) - if (N1.getOpcode() == PreferredFusedOpcode && + // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF + // are currently only supported on binary nodes. + if (Options.UnsafeFPMath && + N1.getOpcode() == PreferredFusedOpcode && N1.getOperand(2).getOpcode() == ISD::FMUL) { SDValue N20 = N1.getOperand(2).getOperand(0); SDValue N21 = N1.getOperand(2).getOperand(1); @@ -8221,8 +8511,10 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { return SDValue(); } -/// Try to perform FMA combining on a given FMUL node. -SDValue DAGCombiner::visitFMULForFMACombine(SDNode *N) { +/// Try to perform FMA combining on a given FMUL node based on the distributive +/// law x * (y + 1) = x * y + x and variants thereof (commuted versions, +/// subtraction instead of addition). +SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); @@ -8231,17 +8523,23 @@ SDValue DAGCombiner::visitFMULForFMACombine(SDNode *N) { assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation"); const TargetOptions &Options = DAG.getTarget().Options; - bool AllowFusion = - (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath); - // Floating-point multiply-add with intermediate rounding. - bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)); + // The transforms below are incorrect when x == 0 and y == inf, because the + // intermediate multiplication produces a nan. + if (!Options.NoInfsFPMath) + return SDValue(); // Floating-point multiply-add without intermediate rounding. bool HasFMA = - AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) && + (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) && + TLI.isFMAFasterThanFMulAndFAdd(VT) && (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)); + // Floating-point multiply-add with intermediate rounding. This can result + // in a less precise result due to the changed rounding order. + bool HasFMAD = Options.UnsafeFPMath && + (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)); + // No valid opcode, do not combine. if (!HasFMAD && !HasFMA) return SDValue(); @@ -8338,17 +8636,20 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { return DAG.getNode(ISD::FSUB, DL, VT, N1, GetNegatedExpression(N0, DAG, LegalOperations), Flags); + // FIXME: Auto-upgrade the target/function-level option. + if (Options.UnsafeFPMath || N->getFlags()->hasNoSignedZeros()) { + // fold (fadd A, 0) -> A + if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1)) + if (N1C->isZero()) + return N0; + } + // If 'unsafe math' is enabled, fold lots of things. if (Options.UnsafeFPMath) { // No FP constant should be created after legalization as Instruction // Selection pass has a hard time dealing with FP constants. bool AllowNewConst = (Level < AfterLegalizeDAG); - // fold (fadd A, 0) -> A - if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1)) - if (N1C->isZero()) - return N0; - // fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2)) if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() && isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) @@ -8457,7 +8758,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0); ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1); EVT VT = N->getValueType(0); - SDLoc dl(N); + SDLoc DL(N); const TargetOptions &Options = DAG.getTarget().Options; const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags; @@ -8468,30 +8769,33 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { // fold (fsub c1, c2) -> c1-c2 if (N0CFP && N1CFP) - return DAG.getNode(ISD::FSUB, dl, VT, N0, N1, Flags); + return DAG.getNode(ISD::FSUB, DL, VT, N0, N1, Flags); // fold (fsub A, (fneg B)) -> (fadd A, B) if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) - return DAG.getNode(ISD::FADD, dl, VT, N0, + return DAG.getNode(ISD::FADD, DL, VT, N0, GetNegatedExpression(N1, DAG, LegalOperations), Flags); - // If 'unsafe math' is enabled, fold lots of things. - if (Options.UnsafeFPMath) { - // (fsub A, 0) -> A - if (N1CFP && N1CFP->isZero()) - return N0; - + // FIXME: Auto-upgrade the target/function-level option. + if (Options.UnsafeFPMath || N->getFlags()->hasNoSignedZeros()) { // (fsub 0, B) -> -B if (N0CFP && N0CFP->isZero()) { if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) return GetNegatedExpression(N1, DAG, LegalOperations); if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) - return DAG.getNode(ISD::FNEG, dl, VT, N1); + return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags); } + } + + // If 'unsafe math' is enabled, fold lots of things. + if (Options.UnsafeFPMath) { + // (fsub A, 0) -> A + if (N1CFP && N1CFP->isZero()) + return N0; // (fsub x, x) -> 0.0 if (N0 == N1) - return DAG.getConstantFP(0.0f, dl, VT); + return DAG.getConstantFP(0.0f, DL, VT); // (fsub x, (fadd x, y)) -> (fneg y) // (fsub x, (fadd y, x)) -> (fneg y) @@ -8611,7 +8915,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { } // FMUL -> FMA combines: - if (SDValue Fused = visitFMULForFMACombine(N)) { + if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) { AddToWorklist(Fused.getNode()); return Fused; } @@ -8626,14 +8930,14 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); EVT VT = N->getValueType(0); - SDLoc dl(N); + SDLoc DL(N); const TargetOptions &Options = DAG.getTarget().Options; // Constant fold FMA. if (isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1) && isa<ConstantFPSDNode>(N2)) { - return DAG.getNode(ISD::FMA, dl, VT, N0, N1, N2); + return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2); } if (Options.UnsafeFPMath) { @@ -8663,8 +8967,8 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) && isConstantFPBuildVectorOrConstantFP(N1) && isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) { - return DAG.getNode(ISD::FMUL, dl, VT, N0, - DAG.getNode(ISD::FADD, dl, VT, N1, N2.getOperand(1), + return DAG.getNode(ISD::FMUL, DL, VT, N0, + DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1), &Flags), &Flags); } @@ -8672,9 +8976,9 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { if (N0.getOpcode() == ISD::FMUL && isConstantFPBuildVectorOrConstantFP(N1) && isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) { - return DAG.getNode(ISD::FMA, dl, VT, + return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0), - DAG.getNode(ISD::FMUL, dl, VT, N1, N0.getOperand(1), + DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1), &Flags), N2); } @@ -8685,32 +8989,32 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { if (N1CFP) { if (N1CFP->isExactlyValue(1.0)) // TODO: The FMA node should have flags that propagate to this node. - return DAG.getNode(ISD::FADD, dl, VT, N0, N2); + return DAG.getNode(ISD::FADD, DL, VT, N0, N2); if (N1CFP->isExactlyValue(-1.0) && (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) { - SDValue RHSNeg = DAG.getNode(ISD::FNEG, dl, VT, N0); + SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0); AddToWorklist(RHSNeg.getNode()); // TODO: The FMA node should have flags that propagate to this node. - return DAG.getNode(ISD::FADD, dl, VT, N2, RHSNeg); + return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg); } } if (Options.UnsafeFPMath) { // (fma x, c, x) -> (fmul x, (c+1)) if (N1CFP && N0 == N2) { - return DAG.getNode(ISD::FMUL, dl, VT, N0, - DAG.getNode(ISD::FADD, dl, VT, - N1, DAG.getConstantFP(1.0, dl, VT), - &Flags), &Flags); + return DAG.getNode(ISD::FMUL, DL, VT, N0, + DAG.getNode(ISD::FADD, DL, VT, N1, + DAG.getConstantFP(1.0, DL, VT), &Flags), + &Flags); } // (fma x, c, (fneg x)) -> (fmul x, (c-1)) if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) { - return DAG.getNode(ISD::FMUL, dl, VT, N0, - DAG.getNode(ISD::FADD, dl, VT, - N1, DAG.getConstantFP(-1.0, dl, VT), - &Flags), &Flags); + return DAG.getNode(ISD::FMUL, DL, VT, N0, + DAG.getNode(ISD::FADD, DL, VT, N1, + DAG.getConstantFP(-1.0, DL, VT), &Flags), + &Flags); } } @@ -8720,7 +9024,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { // Combine multiple FDIVs with the same divisor into multiple FMULs by the // reciprocal. // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip) -// Notice that this is not always beneficial. One reason is different target +// Notice that this is not always beneficial. One reason is different targets // may have different costs for FDIV and FMUL, so sometimes the cost of two // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL". @@ -8907,14 +9211,18 @@ SDValue DAGCombiner::visitFREM(SDNode *N) { } SDValue DAGCombiner::visitFSQRT(SDNode *N) { - if (!DAG.getTarget().Options.UnsafeFPMath || TLI.isFsqrtCheap()) + if (!DAG.getTarget().Options.UnsafeFPMath) + return SDValue(); + + SDValue N0 = N->getOperand(0); + if (TLI.isFsqrtCheap(N0, DAG)) return SDValue(); // TODO: FSQRT nodes should have flags that propagate to the created nodes. // For now, create a Flags object for use with all unsafe math transforms. SDNodeFlags Flags; Flags.setUnsafeAlgebra(true); - return buildSqrtEstimate(N->getOperand(0), &Flags); + return buildSqrtEstimate(N0, &Flags); } /// copysign(x, fp_extend(y)) -> copysign(x, y) @@ -8941,11 +9249,11 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); EVT VT = N->getValueType(0); - if (N0CFP && N1CFP) // Constant fold + if (N0CFP && N1CFP) // Constant fold return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1); if (N1CFP) { - const APFloat& V = N1CFP->getValueAPF(); + const APFloat &V = N1CFP->getValueAPF(); // copysign(x, c1) -> fabs(x) iff ispos(c1) // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1) if (!V.isNegative()) { @@ -8963,8 +9271,7 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { // copysign(copysign(x,z), y) -> copysign(x, y) if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN) - return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, - N0.getOperand(0), N1); + return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1); // copysign(x, abs(y)) -> abs(x) if (N1.getOpcode() == ISD::FABS) @@ -8972,14 +9279,12 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { // copysign(x, copysign(y,z)) -> copysign(x, z) if (N1.getOpcode() == ISD::FCOPYSIGN) - return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, - N0, N1.getOperand(1)); + return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1)); // copysign(x, fp_extend(y)) -> copysign(x, y) // copysign(x, fp_round(y)) -> copysign(x, y) if (CanCombineFCOPYSIGN_EXTEND_ROUND(N)) - return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, - N0, N1.getOperand(0)); + return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0)); return SDValue(); } @@ -9159,7 +9464,7 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { // fold (fp_round (fp_round x)) -> (fp_round x) if (N0.getOpcode() == ISD::FP_ROUND) { const bool NIsTrunc = N->getConstantOperandVal(1) == 1; - const bool N0IsTrunc = N0.getNode()->getConstantOperandVal(1) == 1; + const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1; // Skip this folding if it results in an fp_round from f80 to f16. // @@ -9232,7 +9537,7 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the // value of X. if (N0.getOpcode() == ISD::FP_ROUND - && N0.getNode()->getConstantOperandVal(1) == 1) { + && N0.getConstantOperandVal(1) == 1) { SDValue In = N0.getOperand(0); if (In.getValueType() == VT) return In; if (VT.bitsLT(In.getValueType())) @@ -9319,7 +9624,7 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { if (N0.getValueType().isVector()) { // For a vector, get a mask such as 0x80... per scalar element // and splat it. - SignMask = APInt::getSignBit(N0.getValueType().getScalarSizeInBits()); + SignMask = APInt::getSignBit(N0.getScalarValueSizeInBits()); SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask); } else { // For a scalar, just generate 0x80... @@ -9424,7 +9729,7 @@ SDValue DAGCombiner::visitFABS(SDNode *N) { if (N0.getValueType().isVector()) { // For a vector, get a mask such as 0x7f... per scalar element // and splat it. - SignMask = ~APInt::getSignBit(N0.getValueType().getScalarSizeInBits()); + SignMask = ~APInt::getSignBit(N0.getScalarValueSizeInBits()); SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask); } else { // For a scalar, just generate 0x7f... @@ -10103,7 +10408,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { // value. // TODO: Handle store large -> read small portion. // TODO: Handle TRUNCSTORE/LOADEXT - if (ISD::isNormalLoad(N) && !LD->isVolatile()) { + if (OptLevel != CodeGenOpt::None && + ISD::isNormalLoad(N) && !LD->isVolatile()) { if (ISD::isNON_TRUNCStore(Chain.getNode())) { StoreSDNode *PrevST = cast<StoreSDNode>(Chain); if (PrevST->getBasePtr() == Ptr && @@ -10405,7 +10711,7 @@ struct LoadedSlice { assert(Inst && Origin && "Unable to replace a non-existing slice."); const SDValue &OldBaseAddr = Origin->getBasePtr(); SDValue BaseAddr = OldBaseAddr; - // Get the offset in that chunk of bytes w.r.t. the endianess. + // Get the offset in that chunk of bytes w.r.t. the endianness. int64_t Offset = static_cast<int64_t>(getOffsetFromBase()); assert(Offset >= 0 && "Offset too big to fit in int64_t!"); if (Offset) { @@ -10705,7 +11011,7 @@ bool DAGCombiner::SliceUpLoad(SDNode *N) { LSIt != LSItEnd; ++LSIt) { SDValue SliceInst = LSIt->loadSlice(); CombineTo(LSIt->Inst, SliceInst, true); - if (SliceInst.getNode()->getOpcode() != ISD::LOAD) + if (SliceInst.getOpcode() != ISD::LOAD) SliceInst = SliceInst.getOperand(0); assert(SliceInst->getOpcode() == ISD::LOAD && "It takes more than a zext to get to the loaded slice!!"); @@ -11033,110 +11339,6 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { return SDValue(); } -namespace { -/// Helper struct to parse and store a memory address as base + index + offset. -/// We ignore sign extensions when it is safe to do so. -/// The following two expressions are not equivalent. To differentiate we need -/// to store whether there was a sign extension involved in the index -/// computation. -/// (load (i64 add (i64 copyfromreg %c) -/// (i64 signextend (add (i8 load %index) -/// (i8 1)))) -/// vs -/// -/// (load (i64 add (i64 copyfromreg %c) -/// (i64 signextend (i32 add (i32 signextend (i8 load %index)) -/// (i32 1))))) -struct BaseIndexOffset { - SDValue Base; - SDValue Index; - int64_t Offset; - bool IsIndexSignExt; - - BaseIndexOffset() : Offset(0), IsIndexSignExt(false) {} - - BaseIndexOffset(SDValue Base, SDValue Index, int64_t Offset, - bool IsIndexSignExt) : - Base(Base), Index(Index), Offset(Offset), IsIndexSignExt(IsIndexSignExt) {} - - bool equalBaseIndex(const BaseIndexOffset &Other) { - return Other.Base == Base && Other.Index == Index && - Other.IsIndexSignExt == IsIndexSignExt; - } - - /// Parses tree in Ptr for base, index, offset addresses. - static BaseIndexOffset match(SDValue Ptr, SelectionDAG &DAG) { - bool IsIndexSignExt = false; - - // Split up a folded GlobalAddress+Offset into its component parts. - if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Ptr)) - if (GA->getOpcode() == ISD::GlobalAddress && GA->getOffset() != 0) { - return BaseIndexOffset(DAG.getGlobalAddress(GA->getGlobal(), - SDLoc(GA), - GA->getValueType(0), - /*Offset=*/0, - /*isTargetGA=*/false, - GA->getTargetFlags()), - SDValue(), - GA->getOffset(), - IsIndexSignExt); - } - - // We only can pattern match BASE + INDEX + OFFSET. If Ptr is not an ADD - // instruction, then it could be just the BASE or everything else we don't - // know how to handle. Just use Ptr as BASE and give up. - if (Ptr->getOpcode() != ISD::ADD) - return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt); - - // We know that we have at least an ADD instruction. Try to pattern match - // the simple case of BASE + OFFSET. - if (isa<ConstantSDNode>(Ptr->getOperand(1))) { - int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue(); - return BaseIndexOffset(Ptr->getOperand(0), SDValue(), Offset, - IsIndexSignExt); - } - - // Inside a loop the current BASE pointer is calculated using an ADD and a - // MUL instruction. In this case Ptr is the actual BASE pointer. - // (i64 add (i64 %array_ptr) - // (i64 mul (i64 %induction_var) - // (i64 %element_size))) - if (Ptr->getOperand(1)->getOpcode() == ISD::MUL) - return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt); - - // Look at Base + Index + Offset cases. - SDValue Base = Ptr->getOperand(0); - SDValue IndexOffset = Ptr->getOperand(1); - - // Skip signextends. - if (IndexOffset->getOpcode() == ISD::SIGN_EXTEND) { - IndexOffset = IndexOffset->getOperand(0); - IsIndexSignExt = true; - } - - // Either the case of Base + Index (no offset) or something else. - if (IndexOffset->getOpcode() != ISD::ADD) - return BaseIndexOffset(Base, IndexOffset, 0, IsIndexSignExt); - - // Now we have the case of Base + Index + offset. - SDValue Index = IndexOffset->getOperand(0); - SDValue Offset = IndexOffset->getOperand(1); - - if (!isa<ConstantSDNode>(Offset)) - return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt); - - // Ignore signextends. - if (Index->getOpcode() == ISD::SIGN_EXTEND) { - Index = Index->getOperand(0); - IsIndexSignExt = true; - } else IsIndexSignExt = false; - - int64_t Off = cast<ConstantSDNode>(Offset)->getSExtValue(); - return BaseIndexOffset(Base, Index, Off, IsIndexSignExt); - } -}; -} // namespace - // This is a helper function for visitMUL to check the profitability // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2). // MulNode is the original multiply, AddNode is (add x, c1), @@ -11351,6 +11553,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( } } + StoreNodes.erase(StoreNodes.begin() + NumStores, StoreNodes.end()); return true; } @@ -11493,7 +11696,8 @@ bool DAGCombiner::checkMergeStoreCandidatesForDependencies( return true; } -bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { +bool DAGCombiner::MergeConsecutiveStores( + StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes) { if (OptLevel == CodeGenOpt::None) return false; @@ -11537,16 +11741,13 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { // any of the store nodes. SmallVector<LSBaseSDNode*, 8> AliasLoadNodes; - // Save the StoreSDNodes that we find in the chain. - SmallVector<MemOpLink, 8> StoreNodes; - getStoreMergeAndAliasCandidates(St, StoreNodes, AliasLoadNodes); // Check if there is anything to merge. if (StoreNodes.size() < 2) return false; - // only do dep endence check in AA case + // only do dependence check in AA case bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA : DAG.getSubtarget().useAA(); if (UseAA && !checkMergeStoreCandidatesForDependencies(StoreNodes)) @@ -11582,10 +11783,9 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { // Check if this store interferes with any of the loads that we found. // If we find a load that alias with this store. Stop the sequence. - if (std::any_of(AliasLoadNodes.begin(), AliasLoadNodes.end(), - [&](LSBaseSDNode* Ldn) { - return isAlias(Ldn, StoreNodes[i].MemNode); - })) + if (any_of(AliasLoadNodes, [&](LSBaseSDNode *Ldn) { + return isAlias(Ldn, StoreNodes[i].MemNode); + })) break; // Mark this node as useful. @@ -11899,6 +12099,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { } } + StoreNodes.erase(StoreNodes.begin() + NumElem, StoreNodes.end()); return true; } @@ -12088,11 +12289,9 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // See if we can simplify the input to this truncstore with knowledge that // only the low bits are being used. For example: // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8" - SDValue Shorter = - GetDemandedBits(Value, - APInt::getLowBitsSet( - Value.getValueType().getScalarType().getSizeInBits(), - ST->getMemoryVT().getScalarType().getSizeInBits())); + SDValue Shorter = GetDemandedBits( + Value, APInt::getLowBitsSet(Value.getScalarValueSizeInBits(), + ST->getMemoryVT().getScalarSizeInBits())); AddToWorklist(Value.getNode()); if (Shorter.getNode()) return DAG.getTruncStore(Chain, SDLoc(N), Shorter, @@ -12100,10 +12299,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // Otherwise, see if we can simplify the operation with // SimplifyDemandedBits, which only works if the value has a single use. - if (SimplifyDemandedBits(Value, - APInt::getLowBitsSet( - Value.getValueType().getScalarType().getSizeInBits(), - ST->getMemoryVT().getScalarType().getSizeInBits()))) + if (SimplifyDemandedBits( + Value, + APInt::getLowBitsSet(Value.getScalarValueSizeInBits(), + ST->getMemoryVT().getScalarSizeInBits()))) return SDValue(N, 0); } @@ -12144,19 +12343,20 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // Only perform this optimization before the types are legal, because we // don't want to perform this optimization on every DAGCombine invocation. if (!LegalTypes) { - bool EverChanged = false; - - do { + for (;;) { // There can be multiple store sequences on the same chain. // Keep trying to merge store sequences until we are unable to do so // or until we merge the last store on the chain. - bool Changed = MergeConsecutiveStores(ST); - EverChanged |= Changed; + SmallVector<MemOpLink, 8> StoreNodes; + bool Changed = MergeConsecutiveStores(ST, StoreNodes); if (!Changed) break; - } while (ST->getOpcode() != ISD::DELETED_NODE); - if (EverChanged) - return SDValue(N, 0); + if (any_of(StoreNodes, + [ST](const MemOpLink &Link) { return Link.MemNode == ST; })) { + // ST has been merged and no longer exists. + return SDValue(N, 0); + } + } } // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' @@ -12169,14 +12369,123 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { return NewSt; } + if (SDValue NewSt = splitMergedValStore(ST)) + return NewSt; + return ReduceLoadOpStoreWidth(N); } +/// For the instruction sequence of store below, F and I values +/// are bundled together as an i64 value before being stored into memory. +/// Sometimes it is more efficent to generate separate stores for F and I, +/// which can remove the bitwise instructions or sink them to colder places. +/// +/// (store (or (zext (bitcast F to i32) to i64), +/// (shl (zext I to i64), 32)), addr) --> +/// (store F, addr) and (store I, addr+4) +/// +/// Similarly, splitting for other merged store can also be beneficial, like: +/// For pair of {i32, i32}, i64 store --> two i32 stores. +/// For pair of {i32, i16}, i64 store --> two i32 stores. +/// For pair of {i16, i16}, i32 store --> two i16 stores. +/// For pair of {i16, i8}, i32 store --> two i16 stores. +/// For pair of {i8, i8}, i16 store --> two i8 stores. +/// +/// We allow each target to determine specifically which kind of splitting is +/// supported. +/// +/// The store patterns are commonly seen from the simple code snippet below +/// if only std::make_pair(...) is sroa transformed before inlined into hoo. +/// void goo(const std::pair<int, float> &); +/// hoo() { +/// ... +/// goo(std::make_pair(tmp, ftmp)); +/// ... +/// } +/// +SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) { + if (OptLevel == CodeGenOpt::None) + return SDValue(); + + SDValue Val = ST->getValue(); + SDLoc DL(ST); + + // Match OR operand. + if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR) + return SDValue(); + + // Match SHL operand and get Lower and Higher parts of Val. + SDValue Op1 = Val.getOperand(0); + SDValue Op2 = Val.getOperand(1); + SDValue Lo, Hi; + if (Op1.getOpcode() != ISD::SHL) { + std::swap(Op1, Op2); + if (Op1.getOpcode() != ISD::SHL) + return SDValue(); + } + Lo = Op2; + Hi = Op1.getOperand(0); + if (!Op1.hasOneUse()) + return SDValue(); + + // Match shift amount to HalfValBitSize. + unsigned HalfValBitSize = Val.getValueSizeInBits() / 2; + ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1)); + if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize) + return SDValue(); + + // Lo and Hi are zero-extended from int with size less equal than 32 + // to i64. + if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() || + !Lo.getOperand(0).getValueType().isScalarInteger() || + Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize || + Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() || + !Hi.getOperand(0).getValueType().isScalarInteger() || + Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize) + return SDValue(); + + // Use the EVT of low and high parts before bitcast as the input + // of target query. + EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST) + ? Lo.getOperand(0).getValueType() + : Lo.getValueType(); + EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST) + ? Hi.getOperand(0).getValueType() + : Hi.getValueType(); + if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy)) + return SDValue(); + + // Start to split store. + unsigned Alignment = ST->getAlignment(); + MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags(); + AAMDNodes AAInfo = ST->getAAInfo(); + + // Change the sizes of Lo and Hi's value types to HalfValBitSize. + EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize); + Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0)); + Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0)); + + SDValue Chain = ST->getChain(); + SDValue Ptr = ST->getBasePtr(); + // Lower value store. + SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(), + ST->getAlignment(), MMOFlags, AAInfo); + Ptr = + DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, + DAG.getConstant(HalfValBitSize / 8, DL, Ptr.getValueType())); + // Higher value store. + SDValue St1 = + DAG.getStore(St0, DL, Hi, Ptr, + ST->getPointerInfo().getWithOffset(HalfValBitSize / 8), + Alignment / 2, MMOFlags, AAInfo); + return St1; +} + SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { SDValue InVec = N->getOperand(0); SDValue InVal = N->getOperand(1); SDValue EltNo = N->getOperand(2); - SDLoc dl(N); + SDLoc DL(N); // If the inserted element is an UNDEF, just use the input vector. if (InVal.isUndef()) @@ -12206,7 +12515,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { cast<ConstantSDNode>(InVec.getOperand(2))->getZExtValue(); if (Elt < OtherElt) { // Swap nodes. - SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VT, + SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, InVec.getOperand(0), InVal, EltNo); AddToWorklist(NewOp.getNode()); return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()), @@ -12237,13 +12546,13 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { EVT OpVT = Ops[0].getValueType(); if (InVal.getValueType() != OpVT) InVal = OpVT.bitsGT(InVal.getValueType()) ? - DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) : - DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal); + DAG.getNode(ISD::ANY_EXTEND, DL, OpVT, InVal) : + DAG.getNode(ISD::TRUNCATE, DL, OpVT, InVal); Ops[Elt] = InVal; } // Return the new vector - return DAG.getBuildVector(VT, dl, Ops); + return DAG.getBuildVector(VT, DL, Ops); } SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad( @@ -12544,7 +12853,7 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) { return SDValue(); unsigned NumInScalars = N->getNumOperands(); - SDLoc dl(N); + SDLoc DL(N); EVT VT = N->getValueType(0); // Check to see if this is a BUILD_VECTOR of a bunch of values @@ -12603,7 +12912,7 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) { unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits(); assert(ElemRatio > 1 && "Invalid element size ratio"); SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType): - DAG.getConstant(0, SDLoc(N), SourceType); + DAG.getConstant(0, DL, SourceType); unsigned NewBVElems = ElemRatio * VT.getVectorNumElements(); SmallVector<SDValue, 8> Ops(NewBVElems, Filler); @@ -12634,7 +12943,7 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) { if (!isTypeLegal(VecVT)) return SDValue(); // Make the new BUILD_VECTOR. - SDValue BV = DAG.getBuildVector(VecVT, dl, Ops); + SDValue BV = DAG.getBuildVector(VecVT, DL, Ops); // The new BUILD_VECTOR node has the potential to be further optimized. AddToWorklist(BV.getNode()); @@ -12646,7 +12955,7 @@ SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) { EVT VT = N->getValueType(0); unsigned NumInScalars = N->getNumOperands(); - SDLoc dl(N); + SDLoc DL(N); EVT SrcVT = MVT::Other; unsigned Opcode = ISD::DELETED_NODE; @@ -12707,30 +13016,126 @@ SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) { else Opnds.push_back(In.getOperand(0)); } - SDValue BV = DAG.getBuildVector(NVT, dl, Opnds); + SDValue BV = DAG.getBuildVector(NVT, DL, Opnds); AddToWorklist(BV.getNode()); - return DAG.getNode(Opcode, dl, VT, BV); + return DAG.getNode(Opcode, DL, VT, BV); } -SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { - unsigned NumInScalars = N->getNumOperands(); - SDLoc dl(N); +SDValue DAGCombiner::createBuildVecShuffle(SDLoc DL, SDNode *N, + ArrayRef<int> VectorMask, + SDValue VecIn1, SDValue VecIn2, + unsigned LeftIdx) { + MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); + SDValue ZeroIdx = DAG.getConstant(0, DL, IdxTy); + EVT VT = N->getValueType(0); + EVT InVT1 = VecIn1.getValueType(); + EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1; + + unsigned Vec2Offset = InVT1.getVectorNumElements(); + unsigned NumElems = VT.getVectorNumElements(); + unsigned ShuffleNumElems = NumElems; + + // We can't generate a shuffle node with mismatched input and output types. + // Try to make the types match the type of the output. + if (InVT1 != VT || InVT2 != VT) { + if ((VT.getSizeInBits() % InVT1.getSizeInBits() == 0) && InVT1 == InVT2) { + // If the output vector length is a multiple of both input lengths, + // we can concatenate them and pad the rest with undefs. + unsigned NumConcats = VT.getSizeInBits() / InVT1.getSizeInBits(); + assert(NumConcats >= 2 && "Concat needs at least two inputs!"); + SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1)); + ConcatOps[0] = VecIn1; + ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1); + VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps); + VecIn2 = SDValue(); + } else if (InVT1.getSizeInBits() == VT.getSizeInBits() * 2) { + if (!TLI.isExtractSubvectorCheap(VT, NumElems)) + return SDValue(); - // A vector built entirely of undefs is undef. - if (ISD::allOperandsUndef(N)) - return DAG.getUNDEF(VT); + if (!VecIn2.getNode()) { + // If we only have one input vector, and it's twice the size of the + // output, split it in two. + VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, + DAG.getConstant(NumElems, DL, IdxTy)); + VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx); + // Since we now have shorter input vectors, adjust the offset of the + // second vector's start. + Vec2Offset = NumElems; + } else if (InVT2.getSizeInBits() <= InVT1.getSizeInBits()) { + // VecIn1 is wider than the output, and we have another, possibly + // smaller input. Pad the smaller input with undefs, shuffle at the + // input vector width, and extract the output. + // The shuffle type is different than VT, so check legality again. + if (LegalOperations && + !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1)) + return SDValue(); - if (SDValue V = reduceBuildVecExtToExtBuildVec(N)) - return V; + // Legalizing INSERT_SUBVECTOR is tricky - you basically have to + // lower it back into a BUILD_VECTOR. So if the inserted type is + // illegal, don't even try. + if (InVT1 != InVT2) { + if (!TLI.isTypeLegal(InVT2)) + return SDValue(); + VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1, + DAG.getUNDEF(InVT1), VecIn2, ZeroIdx); + } + ShuffleNumElems = NumElems * 2; + } else { + // Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider + // than VecIn1. We can't handle this for now - this case will disappear + // when we start sorting the vectors by type. + return SDValue(); + } + } else { + // TODO: Support cases where the length mismatch isn't exactly by a + // factor of 2. + // TODO: Move this check upwards, so that if we have bad type + // mismatches, we don't create any DAG nodes. + return SDValue(); + } + } - if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N)) - return V; + // Initialize mask to undef. + SmallVector<int, 8> Mask(ShuffleNumElems, -1); + + // Only need to run up to the number of elements actually used, not the + // total number of elements in the shuffle - if we are shuffling a wider + // vector, the high lanes should be set to undef. + for (unsigned i = 0; i != NumElems; ++i) { + if (VectorMask[i] <= 0) + continue; + + unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1); + if (VectorMask[i] == (int)LeftIdx) { + Mask[i] = ExtIndex; + } else if (VectorMask[i] == (int)LeftIdx + 1) { + Mask[i] = Vec2Offset + ExtIndex; + } + } + + // The type the input vectors may have changed above. + InVT1 = VecIn1.getValueType(); + + // If we already have a VecIn2, it should have the same type as VecIn1. + // If we don't, get an undef/zero vector of the appropriate type. + VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1); + assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type."); + + SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask); + if (ShuffleNumElems > NumElems) + Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx); - // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT - // operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from - // at most two distinct vectors, turn this into a shuffle node. + return Shuffle; +} + +// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT +// operations. If the types of the vectors we're extracting from allow it, +// turn this into a vector_shuffle node. +SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) { + SDLoc DL(N); + EVT VT = N->getValueType(0); // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes. if (!isTypeLegal(VT)) @@ -12740,149 +13145,169 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT)) return SDValue(); - SDValue VecIn1, VecIn2; bool UsesZeroVector = false; - for (unsigned i = 0; i != NumInScalars; ++i) { + unsigned NumElems = N->getNumOperands(); + + // Record, for each element of the newly built vector, which input vector + // that element comes from. -1 stands for undef, 0 for the zero vector, + // and positive values for the input vectors. + // VectorMask maps each element to its vector number, and VecIn maps vector + // numbers to their initial SDValues. + + SmallVector<int, 8> VectorMask(NumElems, -1); + SmallVector<SDValue, 8> VecIn; + VecIn.push_back(SDValue()); + + for (unsigned i = 0; i != NumElems; ++i) { SDValue Op = N->getOperand(i); - // Ignore undef inputs. - if (Op.isUndef()) continue; - // See if we can combine this build_vector into a blend with a zero vector. - if (!VecIn2.getNode() && (isNullConstant(Op) || isNullFPConstant(Op))) { + if (Op.isUndef()) + continue; + + // See if we can use a blend with a zero vector. + // TODO: Should we generalize this to a blend with an arbitrary constant + // vector? + if (isNullConstant(Op) || isNullFPConstant(Op)) { UsesZeroVector = true; + VectorMask[i] = 0; continue; } - // If this input is something other than a EXTRACT_VECTOR_ELT with a - // constant index, bail out. + // Not an undef or zero. If the input is something other than an + // EXTRACT_VECTOR_ELT with a constant index, bail out. if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT || - !isa<ConstantSDNode>(Op.getOperand(1))) { - VecIn1 = VecIn2 = SDValue(nullptr, 0); - break; - } + !isa<ConstantSDNode>(Op.getOperand(1))) + return SDValue(); - // We allow up to two distinct input vectors. SDValue ExtractedFromVec = Op.getOperand(0); - if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2) - continue; - if (!VecIn1.getNode()) { - VecIn1 = ExtractedFromVec; - } else if (!VecIn2.getNode() && !UsesZeroVector) { - VecIn2 = ExtractedFromVec; - } else { - // Too many inputs. - VecIn1 = VecIn2 = SDValue(nullptr, 0); - break; - } - } - - // If everything is good, we can make a shuffle operation. - if (VecIn1.getNode()) { - unsigned InNumElements = VecIn1.getValueType().getVectorNumElements(); - SmallVector<int, 8> Mask; - for (unsigned i = 0; i != NumInScalars; ++i) { - unsigned Opcode = N->getOperand(i).getOpcode(); - if (Opcode == ISD::UNDEF) { - Mask.push_back(-1); - continue; - } + // All inputs must have the same element type as the output. + if (VT.getVectorElementType() != + ExtractedFromVec.getValueType().getVectorElementType()) + return SDValue(); - // Operands can also be zero. - if (Opcode != ISD::EXTRACT_VECTOR_ELT) { - assert(UsesZeroVector && - (Opcode == ISD::Constant || Opcode == ISD::ConstantFP) && - "Unexpected node found!"); - Mask.push_back(NumInScalars+i); - continue; - } + // Have we seen this input vector before? + // The vectors are expected to be tiny (usually 1 or 2 elements), so using + // a map back from SDValues to numbers isn't worth it. + unsigned Idx = std::distance( + VecIn.begin(), std::find(VecIn.begin(), VecIn.end(), ExtractedFromVec)); + if (Idx == VecIn.size()) + VecIn.push_back(ExtractedFromVec); - // If extracting from the first vector, just use the index directly. - SDValue Extract = N->getOperand(i); - SDValue ExtVal = Extract.getOperand(1); - unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue(); - if (Extract.getOperand(0) == VecIn1) { - Mask.push_back(ExtIndex); - continue; - } + VectorMask[i] = Idx; + } - // Otherwise, use InIdx + InputVecSize - Mask.push_back(InNumElements + ExtIndex); - } + // If we didn't find at least one input vector, bail out. + if (VecIn.size() < 2) + return SDValue(); - // Avoid introducing illegal shuffles with zero. - if (UsesZeroVector && !TLI.isVectorClearMaskLegal(Mask, VT)) + // TODO: We want to sort the vectors by descending length, so that adjacent + // pairs have similar length, and the longer vector is always first in the + // pair. + + // TODO: Should this fire if some of the input vectors has illegal type (like + // it does now), or should we let legalization run its course first? + + // Shuffle phase: + // Take pairs of vectors, and shuffle them so that the result has elements + // from these vectors in the correct places. + // For example, given: + // t10: i32 = extract_vector_elt t1, Constant:i64<0> + // t11: i32 = extract_vector_elt t2, Constant:i64<0> + // t12: i32 = extract_vector_elt t3, Constant:i64<0> + // t13: i32 = extract_vector_elt t1, Constant:i64<1> + // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13 + // We will generate: + // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2 + // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef + SmallVector<SDValue, 4> Shuffles; + for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) { + unsigned LeftIdx = 2 * In + 1; + SDValue VecLeft = VecIn[LeftIdx]; + SDValue VecRight = + (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue(); + + if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft, + VecRight, LeftIdx)) + Shuffles.push_back(Shuffle); + else return SDValue(); + } - // We can't generate a shuffle node with mismatched input and output types. - // Attempt to transform a single input vector to the correct type. - if ((VT != VecIn1.getValueType())) { - // If the input vector type has a different base type to the output - // vector type, bail out. - EVT VTElemType = VT.getVectorElementType(); - if ((VecIn1.getValueType().getVectorElementType() != VTElemType) || - (VecIn2.getNode() && - (VecIn2.getValueType().getVectorElementType() != VTElemType))) - return SDValue(); + // If we need the zero vector as an "ingredient" in the blend tree, add it + // to the list of shuffles. + if (UsesZeroVector) + Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT) + : DAG.getConstantFP(0.0, DL, VT)); - // If the input vector is too small, widen it. - // We only support widening of vectors which are half the size of the - // output registers. For example XMM->YMM widening on X86 with AVX. - EVT VecInT = VecIn1.getValueType(); - if (VecInT.getSizeInBits() * 2 == VT.getSizeInBits()) { - // If we only have one small input, widen it by adding undef values. - if (!VecIn2.getNode()) - VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, VecIn1, - DAG.getUNDEF(VecIn1.getValueType())); - else if (VecIn1.getValueType() == VecIn2.getValueType()) { - // If we have two small inputs of the same type, try to concat them. - VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, VecIn1, VecIn2); - VecIn2 = SDValue(nullptr, 0); - } else - return SDValue(); - } else if (VecInT.getSizeInBits() == VT.getSizeInBits() * 2) { - // If the input vector is too large, try to split it. - // We don't support having two input vectors that are too large. - // If the zero vector was used, we can not split the vector, - // since we'd need 3 inputs. - if (UsesZeroVector || VecIn2.getNode()) - return SDValue(); + // If we only have one shuffle, we're done. + if (Shuffles.size() == 1) + return Shuffles[0]; - if (!TLI.isExtractSubvectorCheap(VT, VT.getVectorNumElements())) - return SDValue(); + // Update the vector mask to point to the post-shuffle vectors. + for (int &Vec : VectorMask) + if (Vec == 0) + Vec = Shuffles.size() - 1; + else + Vec = (Vec - 1) / 2; + + // More than one shuffle. Generate a binary tree of blends, e.g. if from + // the previous step we got the set of shuffles t10, t11, t12, t13, we will + // generate: + // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2 + // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4 + // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6 + // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8 + // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11 + // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13 + // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21 + + // Make sure the initial size of the shuffle list is even. + if (Shuffles.size() % 2) + Shuffles.push_back(DAG.getUNDEF(VT)); + + for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) { + if (CurSize % 2) { + Shuffles[CurSize] = DAG.getUNDEF(VT); + CurSize++; + } + for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) { + int Left = 2 * In; + int Right = 2 * In + 1; + SmallVector<int, 8> Mask(NumElems, -1); + for (unsigned i = 0; i != NumElems; ++i) { + if (VectorMask[i] == Left) { + Mask[i] = i; + VectorMask[i] = In; + } else if (VectorMask[i] == Right) { + Mask[i] = i + NumElems; + VectorMask[i] = In; + } + } - // Try to replace VecIn1 with two extract_subvectors - // No need to update the masks, they should still be correct. - VecIn2 = DAG.getNode( - ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1, - DAG.getConstant(VT.getVectorNumElements(), dl, - TLI.getVectorIdxTy(DAG.getDataLayout()))); - VecIn1 = DAG.getNode( - ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1, - DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); - } else - return SDValue(); + Shuffles[In] = + DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask); } + } - if (UsesZeroVector) - VecIn2 = VT.isInteger() ? DAG.getConstant(0, dl, VT) : - DAG.getConstantFP(0.0, dl, VT); - else - // If VecIn2 is unused then change it to undef. - VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT); + return Shuffles[0]; +} - // Check that we were able to transform all incoming values to the same - // type. - if (VecIn2.getValueType() != VecIn1.getValueType() || - VecIn1.getValueType() != VT) - return SDValue(); +SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { + EVT VT = N->getValueType(0); - // Return the new VECTOR_SHUFFLE node. - SDValue Ops[2]; - Ops[0] = VecIn1; - Ops[1] = VecIn2; - return DAG.getVectorShuffle(VT, dl, Ops[0], Ops[1], Mask); - } + // A vector built entirely of undefs is undef. + if (ISD::allOperandsUndef(N)) + return DAG.getUNDEF(VT); + + if (SDValue V = reduceBuildVecExtToExtBuildVec(N)) + return V; + + if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N)) + return V; + + if (SDValue V = reduceBuildVecToShuffle(N)) + return V; return SDValue(); } @@ -13071,8 +13496,7 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType())) return SDValue(); - SDLoc dl = SDLoc(N); - SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NVT, Scalar); + SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar); return DAG.getBitcast(VT, Res); } } @@ -13208,7 +13632,6 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { V = V.getOperand(0); if (V->getOpcode() == ISD::INSERT_SUBVECTOR) { - SDLoc dl(N); // Handle only simple case where vector being inserted and vector // being extracted are of same type, and are half size of larger vectors. EVT BigVT = V->getOperand(0).getValueType(); @@ -13228,11 +13651,11 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { // Into: // indices are equal or bit offsets are equal => V1 // otherwise => (extract_subvec V1, ExtIdx) - if (InsIdx->getZExtValue() * SmallVT.getScalarType().getSizeInBits() == - ExtIdx->getZExtValue() * NVT.getScalarType().getSizeInBits()) + if (InsIdx->getZExtValue() * SmallVT.getScalarSizeInBits() == + ExtIdx->getZExtValue() * NVT.getScalarSizeInBits()) return DAG.getBitcast(NVT, V->getOperand(1)); return DAG.getNode( - ISD::EXTRACT_SUBVECTOR, dl, NVT, + ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, DAG.getBitcast(N->getOperand(0).getValueType(), V->getOperand(0)), N->getOperand(1)); } @@ -13391,6 +13814,84 @@ static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) { return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops); } +// Attempt to combine a shuffle of 2 inputs of 'scalar sources' - +// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR. +// +// SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always +// a simplification in some sense, but it isn't appropriate in general: some +// BUILD_VECTORs are substantially cheaper than others. The general case +// of a BUILD_VECTOR requires inserting each element individually (or +// performing the equivalent in a temporary stack variable). A BUILD_VECTOR of +// all constants is a single constant pool load. A BUILD_VECTOR where each +// element is identical is a splat. A BUILD_VECTOR where most of the operands +// are undef lowers to a small number of element insertions. +// +// To deal with this, we currently use a bunch of mostly arbitrary heuristics. +// We don't fold shuffles where one side is a non-zero constant, and we don't +// fold shuffles if the resulting BUILD_VECTOR would have duplicate +// non-constant operands. This seems to work out reasonably well in practice. +static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, + SelectionDAG &DAG, + const TargetLowering &TLI) { + EVT VT = SVN->getValueType(0); + unsigned NumElts = VT.getVectorNumElements(); + SDValue N0 = SVN->getOperand(0); + SDValue N1 = SVN->getOperand(1); + + if (!N0->hasOneUse() || !N1->hasOneUse()) + return SDValue(); + // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as + // discussed above. + if (!N1.isUndef()) { + bool N0AnyConst = isAnyConstantBuildVector(N0.getNode()); + bool N1AnyConst = isAnyConstantBuildVector(N1.getNode()); + if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode())) + return SDValue(); + if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode())) + return SDValue(); + } + + SmallVector<SDValue, 8> Ops; + SmallSet<SDValue, 16> DuplicateOps; + for (int M : SVN->getMask()) { + SDValue Op = DAG.getUNDEF(VT.getScalarType()); + if (M >= 0) { + int Idx = M < (int)NumElts ? M : M - NumElts; + SDValue &S = (M < (int)NumElts ? N0 : N1); + if (S.getOpcode() == ISD::BUILD_VECTOR) { + Op = S.getOperand(Idx); + } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) { + if (Idx == 0) + Op = S.getOperand(0); + } else { + // Operand can't be combined - bail out. + return SDValue(); + } + } + + // Don't duplicate a non-constant BUILD_VECTOR operand; semantically, this is + // fine, but it's likely to generate low-quality code if the target can't + // reconstruct an appropriate shuffle. + if (!Op.isUndef() && !isa<ConstantSDNode>(Op) && !isa<ConstantFPSDNode>(Op)) + if (!DuplicateOps.insert(Op).second) + return SDValue(); + + Ops.push_back(Op); + } + // BUILD_VECTOR requires all inputs to be of the same type, find the + // maximum type and extend them all. + EVT SVT = VT.getScalarType(); + if (SVT.isInteger()) + for (SDValue &Op : Ops) + SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT); + if (SVT != VT.getScalarType()) + for (SDValue &Op : Ops) + Op = TLI.isZExtFree(Op.getValueType(), SVT) + ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT) + : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT); + return DAG.getBuildVector(VT, SDLoc(SVN), Ops); +} + SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { EVT VT = N->getValueType(0); unsigned NumElts = VT.getVectorNumElements(); @@ -13506,40 +14007,9 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { // Attempt to combine a shuffle of 2 inputs of 'scalar sources' - // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR. - if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) { - SmallVector<SDValue, 8> Ops; - for (int M : SVN->getMask()) { - SDValue Op = DAG.getUNDEF(VT.getScalarType()); - if (M >= 0) { - int Idx = M % NumElts; - SDValue &S = (M < (int)NumElts ? N0 : N1); - if (S.getOpcode() == ISD::BUILD_VECTOR && S.hasOneUse()) { - Op = S.getOperand(Idx); - } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR && S.hasOneUse()) { - if (Idx == 0) - Op = S.getOperand(0); - } else { - // Operand can't be combined - bail out. - break; - } - } - Ops.push_back(Op); - } - if (Ops.size() == VT.getVectorNumElements()) { - // BUILD_VECTOR requires all inputs to be of the same type, find the - // maximum type and extend them all. - EVT SVT = VT.getScalarType(); - if (SVT.isInteger()) - for (SDValue &Op : Ops) - SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT); - if (SVT != VT.getScalarType()) - for (SDValue &Op : Ops) - Op = TLI.isZExtFree(Op.getValueType(), SVT) - ? DAG.getZExtOrTrunc(Op, SDLoc(N), SVT) - : DAG.getSExtOrTrunc(Op, SDLoc(N), SVT); - return DAG.getBuildVector(VT, SDLoc(N), Ops); - } - } + if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) + if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI)) + return Res; // If this shuffle only has a single input that is a bitcasted shuffle, // attempt to merge the 2 shuffles and suitably bitcast the inputs/output @@ -13647,6 +14117,11 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) { ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0); + // Don't try to fold splats; they're likely to simplify somehow, or they + // might be free. + if (OtherSV->isSplat()) + return SDValue(); + // The incoming shuffle must be of the same type as the result of the // current shuffle. assert(OtherSV->getOperand(0).getValueType() == VT && @@ -13773,10 +14248,20 @@ SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) { } SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { + EVT VT = N->getValueType(0); SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue N2 = N->getOperand(2); + // Combine INSERT_SUBVECTORs where we are inserting to the same index. + // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx ) + // --> INSERT_SUBVECTOR( Vec, SubNew, Idx ) + if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && + N0.getOperand(1).getValueType() == N1.getValueType() && + N0.getOperand(2) == N2) + return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0), + N1, N2); + if (N0.getValueType() != N1.getValueType()) return SDValue(); @@ -13785,7 +14270,6 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0->getNumOperands() == 2 && N2.getOpcode() == ISD::Constant) { APInt InsIdx = cast<ConstantSDNode>(N2)->getAPIntValue(); - EVT VT = N->getValueType(0); // Lower half: fold (insert_subvector (concat_vectors X, Y), Z) -> // (concat_vectors Z, Y) @@ -13836,7 +14320,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { EVT VT = N->getValueType(0); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); - SDLoc dl(N); + SDLoc DL(N); // Make sure we're not running after operation legalization where it // may have custom lowered the vector shuffles. @@ -13904,8 +14388,8 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { if (!TLI.isVectorClearMaskLegal(Indices, ClearVT)) return SDValue(); - SDValue Zero = DAG.getConstant(0, dl, ClearVT); - return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, dl, + SDValue Zero = DAG.getConstant(0, DL, ClearVT); + return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL, DAG.getBitcast(ClearVT, LHS), Zero, Indices)); }; @@ -14119,6 +14603,8 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags(); if (!RLD->isInvariant()) MMOFlags &= ~MachineMemOperand::MOInvariant; + if (!RLD->isDereferenceable()) + MMOFlags &= ~MachineMemOperand::MODereferenceable; if (LLD->getExtensionType() == ISD::NON_EXTLOAD) { // FIXME: Discards pointer and AA info. Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect), @@ -14146,6 +14632,73 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, return false; } +/// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and +/// bitwise 'and'. +SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, + SDValue N1, SDValue N2, SDValue N3, + ISD::CondCode CC) { + // If this is a select where the false operand is zero and the compare is a + // check of the sign bit, see if we can perform the "gzip trick": + // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A + // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A + EVT XType = N0.getValueType(); + EVT AType = N2.getValueType(); + if (!isNullConstant(N3) || !XType.bitsGE(AType)) + return SDValue(); + + // If the comparison is testing for a positive value, we have to invert + // the sign bit mask, so only do that transform if the target has a bitwise + // 'and not' instruction (the invert is free). + if (CC == ISD::SETGT && TLI.hasAndNot(N2)) { + // (X > -1) ? A : 0 + // (X > 0) ? X : 0 <-- This is canonical signed max. + if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2))) + return SDValue(); + } else if (CC == ISD::SETLT) { + // (X < 0) ? A : 0 + // (X < 1) ? X : 0 <-- This is un-canonicalized signed min. + if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2))) + return SDValue(); + } else { + return SDValue(); + } + + // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit + // constant. + EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType()); + auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode()); + if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) { + unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1; + SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy); + SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt); + AddToWorklist(Shift.getNode()); + + if (XType.bitsGT(AType)) { + Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift); + AddToWorklist(Shift.getNode()); + } + + if (CC == ISD::SETGT) + Shift = DAG.getNOT(DL, Shift, AType); + + return DAG.getNode(ISD::AND, DL, AType, Shift, N2); + } + + SDValue ShiftAmt = DAG.getConstant(XType.getSizeInBits() - 1, DL, ShiftAmtTy); + SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt); + AddToWorklist(Shift.getNode()); + + if (XType.bitsGT(AType)) { + Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift); + AddToWorklist(Shift.getNode()); + } + + if (CC == ISD::SETGT) + Shift = DAG.getNOT(DL, Shift, AType); + + return DAG.getNode(ISD::AND, DL, AType, Shift, N2); +} + /// Simplify an expression of the form (N0 cond N1) ? N2 : N3 /// where 'cond' is the comparison specified by CC. SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, @@ -14242,48 +14795,8 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, } } - // Check to see if we can perform the "gzip trick", transforming - // (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A) - if (isNullConstant(N3) && CC == ISD::SETLT && - (isNullConstant(N1) || // (a < 0) ? b : 0 - (isOneConstant(N1) && N0 == N2))) { // (a < 1) ? a : 0 - EVT XType = N0.getValueType(); - EVT AType = N2.getValueType(); - if (XType.bitsGE(AType)) { - // and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a - // single-bit constant. - if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) { - unsigned ShCtV = N2C->getAPIntValue().logBase2(); - ShCtV = XType.getSizeInBits() - ShCtV - 1; - SDValue ShCt = DAG.getConstant(ShCtV, SDLoc(N0), - getShiftAmountTy(N0.getValueType())); - SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0), - XType, N0, ShCt); - AddToWorklist(Shift.getNode()); - - if (XType.bitsGT(AType)) { - Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift); - AddToWorklist(Shift.getNode()); - } - - return DAG.getNode(ISD::AND, DL, AType, Shift, N2); - } - - SDValue Shift = DAG.getNode(ISD::SRA, SDLoc(N0), - XType, N0, - DAG.getConstant(XType.getSizeInBits() - 1, - SDLoc(N0), - getShiftAmountTy(N0.getValueType()))); - AddToWorklist(Shift.getNode()); - - if (XType.bitsGT(AType)) { - Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift); - AddToWorklist(Shift.getNode()); - } - - return DAG.getNode(ISD::AND, DL, AType, Shift, N2); - } - } + if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC)) + return V; // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A) // where y is has a single bit set. @@ -14511,30 +15024,51 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) { return S; } +/// Determines the LogBase2 value for a non-null input value using the +/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V). +SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) { + EVT VT = V.getValueType(); + unsigned EltBits = VT.getScalarSizeInBits(); + SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V); + SDValue Base = DAG.getConstant(EltBits - 1, DL, VT); + SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz); + return LogBase2; +} + +/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) +/// For the reciprocal, we need to find the zero of the function: +/// F(X) = A X - 1 [which has a zero at X = 1/A] +/// => +/// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form +/// does not require additional intermediate precision] SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags) { if (Level >= AfterLegalizeDAG) return SDValue(); - // Expose the DAG combiner to the target combiner implementations. - TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this); + // TODO: Handle half and/or extended types? + EVT VT = Op.getValueType(); + if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64) + return SDValue(); + + // If estimates are explicitly disabled for this function, we're done. + MachineFunction &MF = DAG.getMachineFunction(); + int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF); + if (Enabled == TLI.ReciprocalEstimate::Disabled) + return SDValue(); + + // Estimates may be explicitly enabled for this type with a custom number of + // refinement steps. + int Iterations = TLI.getDivRefinementSteps(VT, MF); + if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) { + AddToWorklist(Est.getNode()); - unsigned Iterations = 0; - if (SDValue Est = TLI.getRecipEstimate(Op, DCI, Iterations)) { if (Iterations) { - // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) - // For the reciprocal, we need to find the zero of the function: - // F(X) = A X - 1 [which has a zero at X = 1/A] - // => - // X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form - // does not require additional intermediate precision] EVT VT = Op.getValueType(); SDLoc DL(Op); SDValue FPOne = DAG.getConstantFP(1.0, DL, VT); - AddToWorklist(Est.getNode()); - // Newton iterations: Est = Est + Est (1 - Arg * Est) - for (unsigned i = 0; i < Iterations; ++i) { + for (int i = 0; i < Iterations; ++i) { SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags); AddToWorklist(NewEst.getNode()); @@ -14656,16 +15190,47 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags *Flags, if (Level >= AfterLegalizeDAG) return SDValue(); - // Expose the DAG combiner to the target combiner implementations. - TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this); - unsigned Iterations = 0; + // TODO: Handle half and/or extended types? + EVT VT = Op.getValueType(); + if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64) + return SDValue(); + + // If estimates are explicitly disabled for this function, we're done. + MachineFunction &MF = DAG.getMachineFunction(); + int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF); + if (Enabled == TLI.ReciprocalEstimate::Disabled) + return SDValue(); + + // Estimates may be explicitly enabled for this type with a custom number of + // refinement steps. + int Iterations = TLI.getSqrtRefinementSteps(VT, MF); + bool UseOneConstNR = false; - if (SDValue Est = TLI.getRsqrtEstimate(Op, DCI, Iterations, UseOneConstNR)) { + if (SDValue Est = + TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR, + Reciprocal)) { AddToWorklist(Est.getNode()); + if (Iterations) { Est = UseOneConstNR - ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal) - : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal); + ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal) + : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal); + + if (!Reciprocal) { + // Unfortunately, Est is now NaN if the input was exactly 0.0. + // Select out this case and force the answer to 0.0. + EVT VT = Op.getValueType(); + SDLoc DL(Op); + + SDValue FPZero = DAG.getConstantFP(0.0, DL, VT); + EVT CCVT = getSetCCResultType(VT); + SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ); + AddToWorklist(ZeroCmp.getNode()); + + Est = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT, + ZeroCmp, FPZero, Est); + AddToWorklist(Est.getNode()); + } } return Est; } @@ -14678,23 +15243,7 @@ SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags) { } SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags *Flags) { - SDValue Est = buildSqrtEstimateImpl(Op, Flags, false); - if (!Est) - return SDValue(); - - // Unfortunately, Est is now NaN if the input was exactly 0. - // Select out this case and force the answer to 0. - EVT VT = Est.getValueType(); - SDLoc DL(Op); - SDValue Zero = DAG.getConstantFP(0.0, DL, VT); - EVT CCVT = getSetCCResultType(VT); - SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, Op, Zero, ISD::SETEQ); - AddToWorklist(ZeroCmp.getNode()); - - Est = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT, ZeroCmp, - Zero, Est); - AddToWorklist(Est.getNode()); - return Est; + return buildSqrtEstimateImpl(Op, Flags, false); } /// Return true if base is a frame index, which is known not to alias with @@ -14771,9 +15320,9 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const { // To catch this case, look up the actual index of frame indices to compute // the real alias relationship. if (isFrameIndex1 && isFrameIndex2) { - MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); - Offset1 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex()); - Offset2 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base2)->getIndex()); + MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); + Offset1 += MFI.getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex()); + Offset2 += MFI.getObjectOffset(cast<FrameIndexSDNode>(Base2)->getIndex()); return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 || (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1); } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index b10da00..e2f33bb 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -353,8 +353,8 @@ void FastISel::recomputeInsertPt() { void FastISel::removeDeadCode(MachineBasicBlock::iterator I, MachineBasicBlock::iterator E) { - assert(static_cast<MachineInstr *>(I) && static_cast<MachineInstr *>(E) && - std::distance(I, E) > 0 && "Invalid iterator!"); + assert(I.isValid() && E.isValid() && std::distance(I, E) > 0 && + "Invalid iterator!"); while (I != E) { MachineInstr *Dead = &*I; ++I; @@ -455,17 +455,6 @@ bool FastISel::selectBinaryOp(const User *I, unsigned ISDOpcode) { return true; } - // Check if the second operand is a constant float. - if (const auto *CF = dyn_cast<ConstantFP>(I->getOperand(1))) { - unsigned ResultReg = fastEmit_rf(VT.getSimpleVT(), VT.getSimpleVT(), - ISDOpcode, Op0, Op0IsKill, CF); - if (ResultReg) { - // We successfully emitted code for the given LLVM Instruction. - updateValueMap(I, ResultReg); - return true; - } - } - unsigned Op1 = getRegForValue(I->getOperand(1)); if (!Op1) // Unhandled operand. Halt "fast" selection and bail. return false; @@ -499,7 +488,7 @@ bool FastISel::selectGetElementPtr(const User *I) { for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I); GTI != E; ++GTI) { const Value *Idx = GTI.getOperand(); - if (auto *StTy = dyn_cast<StructType>(*GTI)) { + if (StructType *StTy = GTI.getStructTypeOrNull()) { uint64_t Field = cast<ConstantInt>(Idx)->getZExtValue(); if (Field) { // N = N + Offset @@ -581,7 +570,7 @@ bool FastISel::addStackMapLiveVars(SmallVectorImpl<MachineOperand> &Ops, Ops.push_back(MachineOperand::CreateImm(StackMaps::ConstantOp)); Ops.push_back(MachineOperand::CreateImm(0)); } else if (auto *AI = dyn_cast<AllocaInst>(Val)) { - // Values coming from a stack location also require a sepcial encoding, + // Values coming from a stack location also require a special encoding, // but that is added later on by the target specific frame index // elimination implementation. auto SI = FuncInfo.StaticAllocaMap.find(AI); @@ -666,7 +655,7 @@ bool FastISel::selectStackmap(const CallInst *I) { .addImm(0); // Inform the Frame Information that we have a stackmap in this function. - FuncInfo.MF->getFrameInfo()->setHasStackMap(); + FuncInfo.MF->getFrameInfo().setHasStackMap(); return true; } @@ -707,7 +696,7 @@ bool FastISel::lowerCallOperands(const CallInst *CI, unsigned ArgIdx, FastISel::CallLoweringInfo &FastISel::CallLoweringInfo::setCallee( const DataLayout &DL, MCContext &Ctx, CallingConv::ID CC, Type *ResultTy, - const char *Target, ArgListTy &&ArgsList, unsigned FixedArgs) { + StringRef Target, ArgListTy &&ArgsList, unsigned FixedArgs) { SmallString<32> MangledName; Mangler::getNameWithPrefix(MangledName, Target, DL); MCSymbol *Sym = Ctx.getOrCreateSymbol(MangledName); @@ -845,7 +834,7 @@ bool FastISel::selectPatchpoint(const CallInst *I) { CLI.Call->eraseFromParent(); // Inform the Frame Information that we have a patchpoint in this function. - FuncInfo.MF->getFrameInfo()->setHasPatchPoint(); + FuncInfo.MF->getFrameInfo().setHasPatchPoint(); if (CLI.NumResultRegs) updateValueMap(I, CLI.ResultReg, CLI.NumResultRegs); @@ -1077,7 +1066,7 @@ bool FastISel::selectCall(const User *I) { } MachineModuleInfo &MMI = FuncInfo.MF->getMMI(); - ComputeUsesVAFloatArgument(*Call, &MMI); + computeUsesVAFloatArgument(*Call, MMI); // Handle intrinsic function calls. if (const auto *II = dyn_cast<IntrinsicInst>(Call)) @@ -1104,6 +1093,8 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { case Intrinsic::lifetime_end: // The donothing intrinsic does, well, nothing. case Intrinsic::donothing: + // Neither does the assume intrinsic; it's also OK not to codegen its operand. + case Intrinsic::assume: return true; case Intrinsic::dbg_declare: { const DbgDeclareInst *DI = cast<DbgDeclareInst>(II); @@ -1225,6 +1216,7 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { updateValueMap(II, ResultReg); return true; } + case Intrinsic::invariant_group_barrier: case Intrinsic::expect: { unsigned ResultReg = getRegForValue(II->getArgOperand(0)); if (!ResultReg) @@ -1324,15 +1316,6 @@ bool FastISel::selectBitCast(const User *I) { return true; } -// Return true if we should copy from swift error to the final vreg as specified -// by SwiftErrorWorklist. -static bool shouldCopySwiftErrorsToFinalVRegs(const TargetLowering &TLI, - FunctionLoweringInfo &FuncInfo) { - if (!TLI.supportSwiftError()) - return false; - return FuncInfo.SwiftErrorWorklist.count(FuncInfo.MBB); -} - // Remove local value instructions starting from the instruction after // SavedLastLocalValue to the current function insert point. void FastISel::removeDeadLocalValueCode(MachineInstr *SavedLastLocalValue) @@ -1357,10 +1340,6 @@ bool FastISel::selectInstruction(const Instruction *I) { // Just before the terminator instruction, insert instructions to // feed PHI nodes in successor blocks. if (isa<TerminatorInst>(I)) { - // If we need to materialize any vreg from worklist, we bail out of - // FastISel. - if (shouldCopySwiftErrorsToFinalVRegs(TLI, FuncInfo)) - return false; if (!handlePHINodesInSuccessorBlocks(I->getParent())) { // PHI node handling may have generated local value instructions, // even though it failed to handle all PHI nodes. @@ -1444,7 +1423,7 @@ void FastISel::fastEmitBranch(MachineBasicBlock *MSucc, // fall-through case, which needs no instructions. } else { // The unconditional branch case. - TII.InsertBranch(*FuncInfo.MBB, MSucc, nullptr, + TII.insertBranch(*FuncInfo.MBB, MSucc, nullptr, SmallVector<MachineOperand, 0>(), DbgLoc); } if (FuncInfo.BPI) { @@ -1679,7 +1658,7 @@ FastISel::FastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo, bool SkipTargetIndependentISel) : FuncInfo(FuncInfo), MF(FuncInfo.MF), MRI(FuncInfo.MF->getRegInfo()), - MFI(*FuncInfo.MF->getFrameInfo()), MCP(*FuncInfo.MF->getConstantPool()), + MFI(FuncInfo.MF->getFrameInfo()), MCP(*FuncInfo.MF->getConstantPool()), TM(FuncInfo.MF->getTarget()), DL(MF->getDataLayout()), TII(*MF->getSubtarget().getInstrInfo()), TLI(*MF->getSubtarget().getTargetLowering()), @@ -1723,18 +1702,6 @@ unsigned FastISel::fastEmit_ri(MVT, MVT, unsigned, unsigned /*Op0*/, return 0; } -unsigned FastISel::fastEmit_rf(MVT, MVT, unsigned, unsigned /*Op0*/, - bool /*Op0IsKill*/, - const ConstantFP * /*FPImm*/) { - return 0; -} - -unsigned FastISel::fastEmit_rri(MVT, MVT, unsigned, unsigned /*Op0*/, - bool /*Op0IsKill*/, unsigned /*Op1*/, - bool /*Op1IsKill*/, uint64_t /*Imm*/) { - return 0; -} - /// This method is a wrapper of fastEmit_ri. It first tries to emit an /// instruction with an immediate operand using fastEmit_ri. /// If that fails, it materializes the immediate into a register and try @@ -2181,6 +2148,8 @@ FastISel::createMachineMemOperandFor(const Instruction *I) const { bool IsNonTemporal = I->getMetadata(LLVMContext::MD_nontemporal) != nullptr; bool IsInvariant = I->getMetadata(LLVMContext::MD_invariant_load) != nullptr; + bool IsDereferenceable = + I->getMetadata(LLVMContext::MD_dereferenceable) != nullptr; const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range); AAMDNodes AAInfo; @@ -2195,6 +2164,8 @@ FastISel::createMachineMemOperandFor(const Instruction *I) const { Flags |= MachineMemOperand::MOVolatile; if (IsNonTemporal) Flags |= MachineMemOperand::MONonTemporal; + if (IsDereferenceable) + Flags |= MachineMemOperand::MODereferenceable; if (IsInvariant) Flags |= MachineMemOperand::MOInvariant; diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index e669ffc..377a523 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -13,7 +13,6 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/FunctionLoweringInfo.h" -#include "llvm/ADT/PostOrderIterator.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -98,7 +97,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, Fn->isVarArg(), Outs, Fn->getContext()); // If this personality uses funclets, we need to do a bit more work. - DenseMap<const AllocaInst *, int *> CatchObjects; + DenseMap<const AllocaInst *, TinyPtrVector<int *>> CatchObjects; EHPersonality Personality = classifyEHPersonality( Fn->hasPersonalityFn() ? Fn->getPersonalityFn() : nullptr); if (isFuncletEHPersonality(Personality)) { @@ -115,7 +114,8 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) { for (WinEHHandlerType &H : TBME.HandlerArray) { if (const AllocaInst *AI = H.CatchObj.Alloca) - CatchObjects.insert({AI, &H.CatchObj.FrameIndex}); + CatchObjects.insert({AI, {}}).first->second.push_back( + &H.CatchObj.FrameIndex); else H.CatchObj.FrameIndex = INT_MAX; } @@ -125,11 +125,9 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, // Initialize the mapping of values to registers. This is only set up for // instruction values that are used outside of the block that defines // them. - Function::const_iterator BB = Fn->begin(), EB = Fn->end(); - for (; BB != EB; ++BB) - for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); - I != E; ++I) { - if (const AllocaInst *AI = dyn_cast<AllocaInst>(I)) { + for (const BasicBlock &BB : *Fn) { + for (const Instruction &I : BB) { + if (const AllocaInst *AI = dyn_cast<AllocaInst>(&I)) { Type *Ty = AI->getAllocatedType(); unsigned Align = std::max((unsigned)MF->getDataLayout().getPrefTypeAlignment(Ty), @@ -138,7 +136,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, // Static allocas can be folded into the initial stack frame // adjustment. For targets that don't realign the stack, don't // do this if there is an extra alignment requirement. - if (AI->isStaticAlloca() && + if (AI->isStaticAlloca() && (TFI->isStackRealignable() || (Align <= StackAlign))) { const ConstantInt *CUI = cast<ConstantInt>(AI->getArraySize()); uint64_t TySize = MF->getDataLayout().getTypeAllocSize(Ty); @@ -148,18 +146,20 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, int FrameIndex = INT_MAX; auto Iter = CatchObjects.find(AI); if (Iter != CatchObjects.end() && TLI->needsFixedCatchObjects()) { - FrameIndex = MF->getFrameInfo()->CreateFixedObject( + FrameIndex = MF->getFrameInfo().CreateFixedObject( TySize, 0, /*Immutable=*/false, /*isAliased=*/true); - MF->getFrameInfo()->setObjectAlignment(FrameIndex, Align); + MF->getFrameInfo().setObjectAlignment(FrameIndex, Align); } else { FrameIndex = - MF->getFrameInfo()->CreateStackObject(TySize, Align, false, AI); + MF->getFrameInfo().CreateStackObject(TySize, Align, false, AI); } StaticAllocaMap[AI] = FrameIndex; // Update the catch handler information. - if (Iter != CatchObjects.end()) - *Iter->second = FrameIndex; + if (Iter != CatchObjects.end()) { + for (int *CatchObjPtr : Iter->second) + *CatchObjPtr = FrameIndex; + } } else { // FIXME: Overaligned static allocas should be grouped into // a single dynamic allocation instead of using a separate @@ -167,20 +167,19 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, if (Align <= StackAlign) Align = 0; // Inform the Frame Information that we have variable-sized objects. - MF->getFrameInfo()->CreateVariableSizedObject(Align ? Align : 1, AI); + MF->getFrameInfo().CreateVariableSizedObject(Align ? Align : 1, AI); } } // Look for inline asm that clobbers the SP register. if (isa<CallInst>(I) || isa<InvokeInst>(I)) { - ImmutableCallSite CS(&*I); + ImmutableCallSite CS(&I); if (isa<InlineAsm>(CS.getCalledValue())) { unsigned SP = TLI->getStackPointerRegisterToSaveRestore(); const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); std::vector<TargetLowering::AsmOperandInfo> Ops = TLI->ParseConstraints(Fn->getParent()->getDataLayout(), TRI, CS); - for (size_t I = 0, E = Ops.size(); I != E; ++I) { - TargetLowering::AsmOperandInfo &Op = Ops[I]; + for (TargetLowering::AsmOperandInfo &Op : Ops) { if (Op.Type == InlineAsm::isClobber) { // Clobbers don't have SDValue operands, hence SDValue(). TLI->ComputeConstraintToUse(Op, SDValue(), DAG); @@ -188,7 +187,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, TLI->getRegForInlineAsmConstraint(TRI, Op.ConstraintCode, Op.ConstraintVT); if (PhysReg.first == SP) - MF->getFrameInfo()->setHasOpaqueSPAdjustment(true); + MF->getFrameInfo().setHasOpaqueSPAdjustment(true); } } } @@ -197,28 +196,28 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, // Look for calls to the @llvm.va_start intrinsic. We can omit some // prologue boilerplate for variadic functions that don't examine their // arguments. - if (const auto *II = dyn_cast<IntrinsicInst>(I)) { + if (const auto *II = dyn_cast<IntrinsicInst>(&I)) { if (II->getIntrinsicID() == Intrinsic::vastart) - MF->getFrameInfo()->setHasVAStart(true); + MF->getFrameInfo().setHasVAStart(true); } // If we have a musttail call in a variadic function, we need to ensure we // forward implicit register parameters. - if (const auto *CI = dyn_cast<CallInst>(I)) { + if (const auto *CI = dyn_cast<CallInst>(&I)) { if (CI->isMustTailCall() && Fn->isVarArg()) - MF->getFrameInfo()->setHasMustTailInVarArgFunc(true); + MF->getFrameInfo().setHasMustTailInVarArgFunc(true); } // Mark values used outside their block as exported, by allocating // a virtual register for them. - if (isUsedOutsideOfDefiningBlock(&*I)) - if (!isa<AllocaInst>(I) || !StaticAllocaMap.count(cast<AllocaInst>(I))) - InitializeRegForValue(&*I); + if (isUsedOutsideOfDefiningBlock(&I)) + if (!isa<AllocaInst>(I) || !StaticAllocaMap.count(cast<AllocaInst>(&I))) + InitializeRegForValue(&I); // Collect llvm.dbg.declare information. This is done now instead of // during the initial isel pass through the IR so that it is done // in a predictable order. - if (const DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(I)) { + if (const DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(&I)) { assert(DI->getVariable() && "Missing variable"); assert(DI->getDebugLoc() && "Missing location"); if (MMI.hasDebugInfo()) { @@ -234,7 +233,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, StaticAllocaMap.find(AI); if (SI != StaticAllocaMap.end()) { // Check for VLAs. int FI = SI->second; - MMI.setVariableDbgInfo(DI->getVariable(), DI->getExpression(), + MF->setVariableDbgInfo(DI->getVariable(), DI->getExpression(), FI, DI->getDebugLoc()); } } @@ -243,47 +242,52 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, } // Decide the preferred extend type for a value. - PreferredExtendType[&*I] = getPreferredExtendForValue(&*I); + PreferredExtendType[&I] = getPreferredExtendForValue(&I); } + } // Create an initial MachineBasicBlock for each LLVM BasicBlock in F. This // also creates the initial PHI MachineInstrs, though none of the input // operands are populated. - for (BB = Fn->begin(); BB != EB; ++BB) { + for (const BasicBlock &BB : *Fn) { // Don't create MachineBasicBlocks for imaginary EH pad blocks. These blocks // are really data, and no instructions can live here. - if (BB->isEHPad()) { - const Instruction *I = BB->getFirstNonPHI(); + if (BB.isEHPad()) { + const Instruction *PadInst = BB.getFirstNonPHI(); // If this is a non-landingpad EH pad, mark this function as using // funclets. // FIXME: SEH catchpads do not create funclets, so we could avoid setting // this in such cases in order to improve frame layout. - if (!isa<LandingPadInst>(I)) { - MMI.setHasEHFunclets(true); - MF->getFrameInfo()->setHasOpaqueSPAdjustment(true); + if (!isa<LandingPadInst>(PadInst)) { + MF->setHasEHFunclets(true); + MF->getFrameInfo().setHasOpaqueSPAdjustment(true); } - if (isa<CatchSwitchInst>(I)) { - assert(&*BB->begin() == I && + if (isa<CatchSwitchInst>(PadInst)) { + assert(&*BB.begin() == PadInst && "WinEHPrepare failed to remove PHIs from imaginary BBs"); continue; } - if (isa<FuncletPadInst>(I)) - assert(&*BB->begin() == I && "WinEHPrepare failed to demote PHIs"); + if (isa<FuncletPadInst>(PadInst)) + assert(&*BB.begin() == PadInst && "WinEHPrepare failed to demote PHIs"); } - MachineBasicBlock *MBB = mf.CreateMachineBasicBlock(&*BB); - MBBMap[&*BB] = MBB; + MachineBasicBlock *MBB = mf.CreateMachineBasicBlock(&BB); + MBBMap[&BB] = MBB; MF->push_back(MBB); // Transfer the address-taken flag. This is necessary because there could // be multiple MachineBasicBlocks corresponding to one BasicBlock, and only // the first one should be marked. - if (BB->hasAddressTaken()) + if (BB.hasAddressTaken()) MBB->setHasAddressTaken(); + // Mark landing pad blocks. + if (BB.isEHPad()) + MBB->setIsEHPad(); + // Create Machine PHI nodes for LLVM PHI nodes, lowering them as // appropriate. - for (BasicBlock::const_iterator I = BB->begin(); + for (BasicBlock::const_iterator I = BB.begin(); const PHINode *PN = dyn_cast<PHINode>(I); ++I) { if (PN->use_empty()) continue; @@ -297,8 +301,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, SmallVector<EVT, 4> ValueVTs; ComputeValueVTs(*TLI, MF->getDataLayout(), PN->getType(), ValueVTs); - for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) { - EVT VT = ValueVTs[vti]; + for (EVT VT : ValueVTs) { unsigned NumRegisters = TLI->getNumRegisters(Fn->getContext(), VT); const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); for (unsigned i = 0; i != NumRegisters; ++i) @@ -308,16 +311,6 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, } } - // Mark landing pad blocks. - SmallVector<const LandingPadInst *, 4> LPads; - for (BB = Fn->begin(); BB != EB; ++BB) { - const Instruction *FNP = BB->getFirstNonPHI(); - if (BB->isEHPad() && MBBMap.count(&*BB)) - MBBMap[&*BB]->setIsEHPad(); - if (const auto *LPI = dyn_cast<LandingPadInst>(FNP)) - LPads.push_back(LPI); - } - if (!isFuncletEHPersonality(Personality)) return; @@ -541,75 +534,26 @@ unsigned FunctionLoweringInfo::getCatchPadExceptionPointerVReg( return VReg; } -/// ComputeUsesVAFloatArgument - Determine if any floating-point values are -/// being passed to this variadic function, and set the MachineModuleInfo's -/// usesVAFloatArgument flag if so. This flag is used to emit an undefined -/// reference to _fltused on Windows, which will link in MSVCRT's -/// floating-point support. -void llvm::ComputeUsesVAFloatArgument(const CallInst &I, - MachineModuleInfo *MMI) -{ - FunctionType *FT = cast<FunctionType>( - I.getCalledValue()->getType()->getContainedType(0)); - if (FT->isVarArg() && !MMI->usesVAFloatArgument()) { - for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { - Type* T = I.getArgOperand(i)->getType(); - for (auto i : post_order(T)) { - if (i->isFloatingPointTy()) { - MMI->setUsesVAFloatArgument(true); - return; - } - } - } - } -} - -/// AddLandingPadInfo - Extract the exception handling information from the -/// landingpad instruction and add them to the specified machine module info. -void llvm::AddLandingPadInfo(const LandingPadInst &I, MachineModuleInfo &MMI, - MachineBasicBlock *MBB) { - if (const auto *PF = dyn_cast<Function>( - I.getParent()->getParent()->getPersonalityFn()->stripPointerCasts())) - MMI.addPersonality(PF); - - if (I.isCleanup()) - MMI.addCleanup(MBB); - - // FIXME: New EH - Add the clauses in reverse order. This isn't 100% correct, - // but we need to do it this way because of how the DWARF EH emitter - // processes the clauses. - for (unsigned i = I.getNumClauses(); i != 0; --i) { - Value *Val = I.getClause(i - 1); - if (I.isCatch(i - 1)) { - MMI.addCatchTypeInfo(MBB, - dyn_cast<GlobalValue>(Val->stripPointerCasts())); - } else { - // Add filters in a list. - Constant *CVal = cast<Constant>(Val); - SmallVector<const GlobalValue*, 4> FilterList; - for (User::op_iterator - II = CVal->op_begin(), IE = CVal->op_end(); II != IE; ++II) - FilterList.push_back(cast<GlobalValue>((*II)->stripPointerCasts())); - - MMI.addFilterTypeInfo(MBB, FilterList); - } - } -} - -unsigned FunctionLoweringInfo::findSwiftErrorVReg(const MachineBasicBlock *MBB, - const Value* Val) const { - // Find the index in SwiftErrorVals. - SwiftErrorValues::const_iterator I = - std::find(SwiftErrorVals.begin(), SwiftErrorVals.end(), Val); - assert(I != SwiftErrorVals.end() && "Can't find value in SwiftErrorVals"); - return SwiftErrorMap.lookup(MBB)[I - SwiftErrorVals.begin()]; +unsigned +FunctionLoweringInfo::getOrCreateSwiftErrorVReg(const MachineBasicBlock *MBB, + const Value *Val) { + auto Key = std::make_pair(MBB, Val); + auto It = SwiftErrorVRegDefMap.find(Key); + // If this is the first use of this swifterror value in this basic block, + // create a new virtual register. + // After we processed all basic blocks we will satisfy this "upwards exposed + // use" by inserting a copy or phi at the beginning of this block. + if (It == SwiftErrorVRegDefMap.end()) { + auto &DL = MF->getDataLayout(); + const TargetRegisterClass *RC = TLI->getRegClassFor(TLI->getPointerTy(DL)); + auto VReg = MF->getRegInfo().createVirtualRegister(RC); + SwiftErrorVRegDefMap[Key] = VReg; + SwiftErrorVRegUpwardsUse[Key] = VReg; + return VReg; + } else return It->second; } -void FunctionLoweringInfo::setSwiftErrorVReg(const MachineBasicBlock *MBB, - const Value* Val, unsigned VReg) { - // Find the index in SwiftErrorVals. - SwiftErrorValues::iterator I = - std::find(SwiftErrorVals.begin(), SwiftErrorVals.end(), Val); - assert(I != SwiftErrorVals.end() && "Can't find value in SwiftErrorVals"); - SwiftErrorMap[MBB][I - SwiftErrorVals.begin()] = VReg; +void FunctionLoweringInfo::setCurrentSwiftErrorVReg( + const MachineBasicBlock *MBB, const Value *Val, unsigned VReg) { + SwiftErrorVRegDefMap[std::make_pair(MBB, Val)] = VReg; } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index c8af73a..4a9042c 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -330,16 +330,24 @@ InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB, // shrink VReg's register class within reason. For example, if VReg == GR32 // and II requires a GR32_NOSP, just constrain VReg to GR32_NOSP. if (II) { - const TargetRegisterClass *DstRC = nullptr; + const TargetRegisterClass *OpRC = nullptr; if (IIOpNum < II->getNumOperands()) - DstRC = TRI->getAllocatableClass(TII->getRegClass(*II,IIOpNum,TRI,*MF)); - assert((!DstRC || TargetRegisterInfo::isVirtualRegister(VReg)) && - "Expected VReg"); - if (DstRC && !MRI->constrainRegClass(VReg, DstRC, MinRCSize)) { - unsigned NewVReg = MRI->createVirtualRegister(DstRC); - BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(), - TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg); - VReg = NewVReg; + OpRC = TII->getRegClass(*II, IIOpNum, TRI, *MF); + + if (OpRC) { + const TargetRegisterClass *ConstrainedRC + = MRI->constrainRegClass(VReg, OpRC, MinRCSize); + if (!ConstrainedRC) { + OpRC = TRI->getAllocatableClass(OpRC); + assert(OpRC && "Constraints cannot be fulfilled for allocation"); + unsigned NewVReg = MRI->createVirtualRegister(OpRC); + BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(), + TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg); + VReg = NewVReg; + } else { + assert(ConstrainedRC->isAllocatable() && + "Constraining an allocatable VReg produced an unallocatable class?"); + } } } @@ -494,8 +502,17 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, const TargetRegisterClass *TRC = TLI->getRegClassFor(Node->getSimpleValueType(0)); - unsigned VReg = getVR(Node->getOperand(0), VRBaseMap); - MachineInstr *DefMI = MRI->getVRegDef(VReg); + unsigned Reg; + MachineInstr *DefMI; + RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node->getOperand(0)); + if (R && TargetRegisterInfo::isPhysicalRegister(R->getReg())) { + Reg = R->getReg(); + DefMI = nullptr; + } else { + Reg = getVR(Node->getOperand(0), VRBaseMap); + DefMI = MRI->getVRegDef(Reg); + } + unsigned SrcReg, DstReg, DefSubIdx; if (DefMI && TII->isCoalescableExtInstr(*DefMI, SrcReg, DstReg, DefSubIdx) && @@ -511,20 +528,26 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, TII->get(TargetOpcode::COPY), VRBase).addReg(SrcReg); MRI->clearKillFlags(SrcReg); } else { - // VReg may not support a SubIdx sub-register, and we may need to + // Reg may not support a SubIdx sub-register, and we may need to // constrain its register class or issue a COPY to a compatible register // class. - VReg = ConstrainForSubReg(VReg, SubIdx, - Node->getOperand(0).getSimpleValueType(), - Node->getDebugLoc()); + if (TargetRegisterInfo::isVirtualRegister(Reg)) + Reg = ConstrainForSubReg(Reg, SubIdx, + Node->getOperand(0).getSimpleValueType(), + Node->getDebugLoc()); // Create the destreg if it is missing. if (VRBase == 0) VRBase = MRI->createVirtualRegister(TRC); // Create the extract_subreg machine instruction. - BuildMI(*MBB, InsertPos, Node->getDebugLoc(), - TII->get(TargetOpcode::COPY), VRBase).addReg(VReg, 0, SubIdx); + MachineInstrBuilder CopyMI = + BuildMI(*MBB, InsertPos, Node->getDebugLoc(), + TII->get(TargetOpcode::COPY), VRBase); + if (TargetRegisterInfo::isVirtualRegister(Reg)) + CopyMI.addReg(Reg, 0, SubIdx); + else + CopyMI.addReg(TRI->getSubReg(Reg, SubIdx)); } } else if (Opc == TargetOpcode::INSERT_SUBREG || Opc == TargetOpcode::SUBREG_TO_REG) { diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 18ad910..b002825 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -259,19 +259,25 @@ SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) { (VT == MVT::f64) ? MVT::i64 : MVT::i32); } + APFloat APF = CFP->getValueAPF(); EVT OrigVT = VT; EVT SVT = VT; - while (SVT != MVT::f32 && SVT != MVT::f16) { - SVT = (MVT::SimpleValueType)(SVT.getSimpleVT().SimpleTy - 1); - if (ConstantFPSDNode::isValueValidForType(SVT, CFP->getValueAPF()) && - // Only do this if the target has a native EXTLOAD instruction from - // smaller type. - TLI.isLoadExtLegal(ISD::EXTLOAD, OrigVT, SVT) && - TLI.ShouldShrinkFPConstant(OrigVT)) { - Type *SType = SVT.getTypeForEVT(*DAG.getContext()); - LLVMC = cast<ConstantFP>(ConstantExpr::getFPTrunc(LLVMC, SType)); - VT = SVT; - Extend = true; + + // We don't want to shrink SNaNs. Converting the SNaN back to its real type + // can cause it to be changed into a QNaN on some platforms (e.g. on SystemZ). + if (!APF.isSignaling()) { + while (SVT != MVT::f32 && SVT != MVT::f16) { + SVT = (MVT::SimpleValueType)(SVT.getSimpleVT().SimpleTy - 1); + if (ConstantFPSDNode::isValueValidForType(SVT, APF) && + // Only do this if the target has a native EXTLOAD instruction from + // smaller type. + TLI.isLoadExtLegal(ISD::EXTLOAD, OrigVT, SVT) && + TLI.ShouldShrinkFPConstant(OrigVT)) { + Type *SType = SVT.getTypeForEVT(*DAG.getContext()); + LLVMC = cast<ConstantFP>(ConstantExpr::getFPTrunc(LLVMC, SType)); + VT = SVT; + Extend = true; + } } } @@ -324,8 +330,6 @@ SDValue SelectionDAGLegalize::PerformInsertVectorEltInMemory(SDValue Vec, // supported by the target. EVT VT = Tmp1.getValueType(); EVT EltVT = VT.getVectorElementType(); - EVT IdxVT = Tmp3.getValueType(); - EVT PtrVT = TLI.getPointerTy(DAG.getDataLayout()); SDValue StackPtr = DAG.CreateStackTemporary(VT); int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); @@ -335,13 +339,8 @@ SDValue SelectionDAGLegalize::PerformInsertVectorEltInMemory(SDValue Vec, DAG.getEntryNode(), dl, Tmp1, StackPtr, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI)); - // Truncate or zero extend offset to target pointer type. - Tmp3 = DAG.getZExtOrTrunc(Tmp3, dl, PtrVT); - // Add the offset to the index. - unsigned EltSize = EltVT.getSizeInBits()/8; - Tmp3 = DAG.getNode(ISD::MUL, dl, IdxVT, Tmp3, - DAG.getConstant(EltSize, dl, IdxVT)); - SDValue StackPtr2 = DAG.getNode(ISD::ADD, dl, IdxVT, Tmp3, StackPtr); + SDValue StackPtr2 = TLI.getVectorElementPointer(DAG, StackPtr, VT, Tmp3); + // Store the scalar value. Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2, MachinePointerInfo(), EltVT); // Load the updated vector. @@ -795,7 +794,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Custom: isCustom = true; - // FALLTHROUGH + LLVM_FALLTHROUGH; case TargetLowering::Legal: { Value = SDValue(Node, 0); Chain = SDValue(Node, 1); @@ -1013,6 +1012,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { case ISD::ADJUST_TRAMPOLINE: case ISD::FRAMEADDR: case ISD::RETURNADDR: + case ISD::ADDROFRETURNADDR: // These operations lie about being legal: when they claim to be legal, // they should actually be custom-lowered. Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); @@ -1061,35 +1061,41 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { case ISD::SRL: case ISD::SRA: case ISD::ROTL: - case ISD::ROTR: + case ISD::ROTR: { // Legalizing shifts/rotates requires adjusting the shift amount // to the appropriate width. - if (!Node->getOperand(1).getValueType().isVector()) { - SDValue SAO = - DAG.getShiftAmountOperand(Node->getOperand(0).getValueType(), - Node->getOperand(1)); - HandleSDNode Handle(SAO); - LegalizeOp(SAO.getNode()); - NewNode = DAG.UpdateNodeOperands(Node, Node->getOperand(0), - Handle.getValue()); + SDValue Op0 = Node->getOperand(0); + SDValue Op1 = Node->getOperand(1); + if (!Op1.getValueType().isVector()) { + SDValue SAO = DAG.getShiftAmountOperand(Op0.getValueType(), Op1); + // The getShiftAmountOperand() may create a new operand node or + // return the existing one. If new operand is created we need + // to update the parent node. + // Do not try to legalize SAO here! It will be automatically legalized + // in the next round. + if (SAO != Op1) + NewNode = DAG.UpdateNodeOperands(Node, Op0, SAO); } - break; + } + break; case ISD::SRL_PARTS: case ISD::SRA_PARTS: - case ISD::SHL_PARTS: + case ISD::SHL_PARTS: { // Legalizing shifts/rotates requires adjusting the shift amount // to the appropriate width. - if (!Node->getOperand(2).getValueType().isVector()) { - SDValue SAO = - DAG.getShiftAmountOperand(Node->getOperand(0).getValueType(), - Node->getOperand(2)); - HandleSDNode Handle(SAO); - LegalizeOp(SAO.getNode()); - NewNode = DAG.UpdateNodeOperands(Node, Node->getOperand(0), - Node->getOperand(1), - Handle.getValue()); + SDValue Op0 = Node->getOperand(0); + SDValue Op1 = Node->getOperand(1); + SDValue Op2 = Node->getOperand(2); + if (!Op2.getValueType().isVector()) { + SDValue SAO = DAG.getShiftAmountOperand(Op0.getValueType(), Op2); + // The getShiftAmountOperand() may create a new operand node or + // return the existing one. If new operand is created we need + // to update the parent node. + if (SAO != Op2) + NewNode = DAG.UpdateNodeOperands(Node, Op0, Op1, SAO); } - break; + } + break; } if (NewNode != Node) { @@ -1118,12 +1124,12 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { ReplaceNode(Node, ResultVals.data()); return; } + LLVM_FALLTHROUGH; } - // FALL THROUGH case TargetLowering::Expand: if (ExpandNode(Node)) return; - // FALL THROUGH + LLVM_FALLTHROUGH; case TargetLowering::LibCall: ConvertNodeToLibcall(Node); return; @@ -1196,21 +1202,16 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { } } + EVT VecVT = Vec.getValueType(); + if (!Ch.getNode()) { // Store the value to a temporary stack slot, then LOAD the returned part. - StackPtr = DAG.CreateStackTemporary(Vec.getValueType()); + StackPtr = DAG.CreateStackTemporary(VecVT); Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, MachinePointerInfo()); } - // Add the offset to the index. - unsigned EltSize = - Vec.getValueType().getVectorElementType().getSizeInBits()/8; - Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx, - DAG.getConstant(EltSize, SDLoc(Vec), Idx.getValueType())); - - Idx = DAG.getZExtOrTrunc(Idx, dl, TLI.getPointerTy(DAG.getDataLayout())); - StackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr); + StackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx); SDValue NewLoad; @@ -1220,7 +1221,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { else NewLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr, MachinePointerInfo(), - Vec.getValueType().getVectorElementType()); + VecVT.getVectorElementType()); // Replace the chain going out of the store, by the one out of the load. DAG.ReplaceAllUsesOfValueWith(Ch, SDValue(NewLoad.getNode(), 1)); @@ -1244,8 +1245,8 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) { SDLoc dl(Op); // Store the value to a temporary stack slot, then LOAD the returned part. - - SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType()); + EVT VecVT = Vec.getValueType(); + SDValue StackPtr = DAG.CreateStackTemporary(VecVT); int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI); @@ -1254,17 +1255,7 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) { SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo); // Then store the inserted part. - - // Add the offset to the index. - unsigned EltSize = - Vec.getValueType().getVectorElementType().getSizeInBits()/8; - - Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx, - DAG.getConstant(EltSize, SDLoc(Vec), Idx.getValueType())); - Idx = DAG.getZExtOrTrunc(Idx, dl, TLI.getPointerTy(DAG.getDataLayout())); - - SDValue SubStackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, - StackPtr); + SDValue SubStackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx); // Store the subvector. Ch = DAG.getStore(Ch, dl, Part, SubStackPtr, MachinePointerInfo()); @@ -1593,6 +1584,7 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS, break; } // Fallthrough if we are unsigned integer. + LLVM_FALLTHROUGH; case ISD::SETLE: case ISD::SETGT: case ISD::SETGE: @@ -1650,7 +1642,7 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT, MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI); - unsigned SrcSize = SrcOp.getValueType().getSizeInBits(); + unsigned SrcSize = SrcOp.getValueSizeInBits(); unsigned SlotSize = SlotVT.getSizeInBits(); unsigned DestSize = DestVT.getSizeInBits(); Type *DestType = DestVT.getTypeForEVT(*DAG.getContext()); @@ -2521,13 +2513,56 @@ SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT, return DAG.getNode(ISD::TRUNCATE, dl, DestVT, Operation); } -/// Open code the operations for BITREVERSE. +/// Legalize a BITREVERSE scalar/vector operation as a series of mask + shifts. SDValue SelectionDAGLegalize::ExpandBITREVERSE(SDValue Op, const SDLoc &dl) { EVT VT = Op.getValueType(); EVT SHVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); unsigned Sz = VT.getScalarSizeInBits(); - SDValue Tmp, Tmp2; + SDValue Tmp, Tmp2, Tmp3; + + // If we can, perform BSWAP first and then the mask+swap the i4, then i2 + // and finally the i1 pairs. + // TODO: We can easily support i4/i2 legal types if any target ever does. + if (Sz >= 8 && isPowerOf2_32(Sz)) { + // Create the masks - repeating the pattern every byte. + APInt MaskHi4(Sz, 0), MaskHi2(Sz, 0), MaskHi1(Sz, 0); + APInt MaskLo4(Sz, 0), MaskLo2(Sz, 0), MaskLo1(Sz, 0); + for (unsigned J = 0; J != Sz; J += 8) { + MaskHi4 = MaskHi4.Or(APInt(Sz, 0xF0ull << J)); + MaskLo4 = MaskLo4.Or(APInt(Sz, 0x0Full << J)); + MaskHi2 = MaskHi2.Or(APInt(Sz, 0xCCull << J)); + MaskLo2 = MaskLo2.Or(APInt(Sz, 0x33ull << J)); + MaskHi1 = MaskHi1.Or(APInt(Sz, 0xAAull << J)); + MaskLo1 = MaskLo1.Or(APInt(Sz, 0x55ull << J)); + } + + // BSWAP if the type is wider than a single byte. + Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op); + + // swap i4: ((V & 0xF0) >> 4) | ((V & 0x0F) << 4) + Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi4, dl, VT)); + Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo4, dl, VT)); + Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(4, dl, VT)); + Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, VT)); + Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); + + // swap i2: ((V & 0xCC) >> 2) | ((V & 0x33) << 2) + Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi2, dl, VT)); + Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo2, dl, VT)); + Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(2, dl, VT)); + Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, VT)); + Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); + + // swap i1: ((V & 0xAA) >> 1) | ((V & 0x55) << 1) + Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi1, dl, VT)); + Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo1, dl, VT)); + Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(1, dl, VT)); + Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, VT)); + Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); + return Tmp; + } + Tmp = DAG.getConstant(0, dl, VT); for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) { if (I < J) @@ -2551,7 +2586,7 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, const SDLoc &dl) { EVT VT = Op.getValueType(); EVT SHVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8; - switch (VT.getSimpleVT().SimpleTy) { + switch (VT.getSimpleVT().getScalarType().SimpleTy) { default: llvm_unreachable("Unhandled Expand type in BSWAP!"); case MVT::i16: Tmp2 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); @@ -2780,10 +2815,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { SDValue Swap = DAG.getAtomicCmpSwap( ISD::ATOMIC_CMP_SWAP, dl, cast<AtomicSDNode>(Node)->getMemoryVT(), VTs, Node->getOperand(0), Node->getOperand(1), Zero, Zero, - cast<AtomicSDNode>(Node)->getMemOperand(), - cast<AtomicSDNode>(Node)->getOrdering(), - cast<AtomicSDNode>(Node)->getOrdering(), - cast<AtomicSDNode>(Node)->getSynchScope()); + cast<AtomicSDNode>(Node)->getMemOperand()); Results.push_back(Swap.getValue(0)); Results.push_back(Swap.getValue(1)); break; @@ -2794,9 +2826,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { cast<AtomicSDNode>(Node)->getMemoryVT(), Node->getOperand(0), Node->getOperand(1), Node->getOperand(2), - cast<AtomicSDNode>(Node)->getMemOperand(), - cast<AtomicSDNode>(Node)->getOrdering(), - cast<AtomicSDNode>(Node)->getSynchScope()); + cast<AtomicSDNode>(Node)->getMemOperand()); Results.push_back(Swap.getValue(1)); break; } @@ -2808,10 +2838,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { SDValue Res = DAG.getAtomicCmpSwap( ISD::ATOMIC_CMP_SWAP, dl, cast<AtomicSDNode>(Node)->getMemoryVT(), VTs, Node->getOperand(0), Node->getOperand(1), Node->getOperand(2), - Node->getOperand(3), cast<MemSDNode>(Node)->getMemOperand(), - cast<AtomicSDNode>(Node)->getSuccessOrdering(), - cast<AtomicSDNode>(Node)->getFailureOrdering(), - cast<AtomicSDNode>(Node)->getSynchScope()); + Node->getOperand(3), cast<MemSDNode>(Node)->getMemOperand()); SDValue ExtRes = Res; SDValue LHS = Res; @@ -2879,15 +2906,32 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(Tmp1); break; case ISD::SIGN_EXTEND_INREG: { - // NOTE: we could fall back on load/store here too for targets without - // SAR. However, it is doubtful that any exist. EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT(); EVT VT = Node->getValueType(0); + + // An in-register sign-extend of a boolean is a negation: + // 'true' (1) sign-extended is -1. + // 'false' (0) sign-extended is 0. + // However, we must mask the high bits of the source operand because the + // SIGN_EXTEND_INREG does not guarantee that the high bits are already zero. + + // TODO: Do this for vectors too? + if (ExtraVT.getSizeInBits() == 1) { + SDValue One = DAG.getConstant(1, dl, VT); + SDValue And = DAG.getNode(ISD::AND, dl, VT, Node->getOperand(0), One); + SDValue Zero = DAG.getConstant(0, dl, VT); + SDValue Neg = DAG.getNode(ISD::SUB, dl, VT, Zero, And); + Results.push_back(Neg); + break; + } + + // NOTE: we could fall back on load/store here too for targets without + // SRA. However, it is doubtful that any exist. EVT ShiftAmountTy = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); if (VT.isVector()) ShiftAmountTy = VT; - unsigned BitsDiff = VT.getScalarType().getSizeInBits() - - ExtraVT.getScalarType().getSizeInBits(); + unsigned BitsDiff = VT.getScalarSizeInBits() - + ExtraVT.getScalarSizeInBits(); SDValue ShiftCst = DAG.getConstant(BitsDiff, dl, ShiftAmountTy); Tmp1 = DAG.getNode(ISD::SHL, dl, Node->getValueType(0), Node->getOperand(0), ShiftCst); @@ -3248,17 +3292,49 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { } case ISD::MULHU: case ISD::MULHS: { - unsigned ExpandOpcode = Node->getOpcode() == ISD::MULHU ? ISD::UMUL_LOHI : - ISD::SMUL_LOHI; + unsigned ExpandOpcode = + Node->getOpcode() == ISD::MULHU ? ISD::UMUL_LOHI : ISD::SMUL_LOHI; EVT VT = Node->getValueType(0); SDVTList VTs = DAG.getVTList(VT, VT); - assert(TLI.isOperationLegalOrCustom(ExpandOpcode, VT) && - "If this wasn't legal, it shouldn't have been created!"); + Tmp1 = DAG.getNode(ExpandOpcode, dl, VTs, Node->getOperand(0), Node->getOperand(1)); Results.push_back(Tmp1.getValue(1)); break; } + case ISD::UMUL_LOHI: + case ISD::SMUL_LOHI: { + SDValue LHS = Node->getOperand(0); + SDValue RHS = Node->getOperand(1); + MVT VT = LHS.getSimpleValueType(); + unsigned MULHOpcode = + Node->getOpcode() == ISD::UMUL_LOHI ? ISD::MULHU : ISD::MULHS; + + if (TLI.isOperationLegalOrCustom(MULHOpcode, VT)) { + Results.push_back(DAG.getNode(ISD::MUL, dl, VT, LHS, RHS)); + Results.push_back(DAG.getNode(MULHOpcode, dl, VT, LHS, RHS)); + break; + } + + SmallVector<SDValue, 4> Halves; + EVT HalfType = EVT(VT).getHalfSizedIntegerVT(*DAG.getContext()); + assert(TLI.isTypeLegal(HalfType)); + if (TLI.expandMUL_LOHI(Node->getOpcode(), VT, Node, LHS, RHS, Halves, + HalfType, DAG, + TargetLowering::MulExpansionKind::Always)) { + for (unsigned i = 0; i < 2; ++i) { + SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Halves[2 * i]); + SDValue Hi = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Halves[2 * i + 1]); + SDValue Shift = DAG.getConstant( + HalfType.getScalarSizeInBits(), dl, + TLI.getShiftAmountTy(HalfType, DAG.getDataLayout())); + Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift); + Results.push_back(DAG.getNode(ISD::OR, dl, VT, Lo, Hi)); + } + break; + } + break; + } case ISD::MUL: { EVT VT = Node->getValueType(0); SDVTList VTs = DAG.getVTList(VT, VT); @@ -3293,7 +3369,8 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND, VT) && TLI.isOperationLegalOrCustom(ISD::SHL, VT) && TLI.isOperationLegalOrCustom(ISD::OR, VT) && - TLI.expandMUL(Node, Lo, Hi, HalfType, DAG)) { + TLI.expandMUL(Node, Lo, Hi, HalfType, DAG, + TargetLowering::MulExpansionKind::OnlyLegalOrCustom)) { Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo); Hi = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Hi); SDValue Shift = @@ -3416,8 +3493,18 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { // pre-lowered to the correct types. This all depends upon WideVT not // being a legal type for the architecture and thus has to be split to // two arguments. - SDValue Args[] = { LHS, HiLHS, RHS, HiRHS }; - SDValue Ret = ExpandLibCall(LC, WideVT, Args, 4, isSigned, dl); + SDValue Ret; + if(DAG.getDataLayout().isLittleEndian()) { + // Halves of WideVT are packed into registers in different order + // depending on platform endianness. This is usually handled by + // the C calling convention, but we can't defer to it in + // the legalizer. + SDValue Args[] = { LHS, HiLHS, RHS, HiRHS }; + Ret = ExpandLibCall(LC, WideVT, Args, 4, isSigned, dl); + } else { + SDValue Args[] = { HiLHS, LHS, HiRHS, RHS }; + Ret = ExpandLibCall(LC, WideVT, Args, 4, isSigned, dl); + } BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret, DAG.getIntPtrConstant(0, dl)); TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret, @@ -3441,6 +3528,15 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { TopHalf = DAG.getSetCC(dl, getSetCCResultType(VT), TopHalf, DAG.getConstant(0, dl, VT), ISD::SETNE); } + + // Truncate the result if SetCC returns a larger type than needed. + EVT RType = Node->getValueType(1); + if (RType.getSizeInBits() < TopHalf.getValueSizeInBits()) + TopHalf = DAG.getNode(ISD::TRUNCATE, dl, RType, TopHalf); + + assert(RType.getSizeInBits() == TopHalf.getValueSizeInBits() && + "Unexpected result type for S/UMULO legalization"); + Results.push_back(BottomHalf); Results.push_back(TopHalf); break; @@ -3476,9 +3572,9 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { SDValue Table = Node->getOperand(1); SDValue Index = Node->getOperand(2); - EVT PTy = TLI.getPointerTy(DAG.getDataLayout()); - const DataLayout &TD = DAG.getDataLayout(); + EVT PTy = TLI.getPointerTy(TD); + unsigned EntrySize = DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(TD); @@ -3492,7 +3588,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { ISD::SEXTLOAD, dl, PTy, Chain, Addr, MachinePointerInfo::getJumpTable(DAG.getMachineFunction()), MemVT); Addr = LD; - if (TM.isPositionIndependent()) { + if (TLI.isJumpTableRelative()) { // For PIC, the sequence is: // BRIND(load(Jumptable + index) + RelocBase) // RelocBase can be JumpTable, GOT or some sort of global base. @@ -4019,10 +4115,11 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { } Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp1)); break; + case ISD::BITREVERSE: case ISD::BSWAP: { unsigned DiffBits = NVT.getSizeInBits() - OVT.getSizeInBits(); Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0)); - Tmp1 = DAG.getNode(ISD::BSWAP, dl, NVT, Tmp1); + Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1); Tmp1 = DAG.getNode( ISD::SRL, dl, NVT, Tmp1, DAG.getConstant(DiffBits, dl, @@ -4073,6 +4170,10 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { ReplacedNode(Node); break; } + case ISD::SDIV: + case ISD::SREM: + case ISD::UDIV: + case ISD::UREM: case ISD::AND: case ISD::OR: case ISD::XOR: { @@ -4082,7 +4183,20 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { TruncOp = ISD::BITCAST; } else { assert(OVT.isInteger() && "Cannot promote logic operation"); - ExtOp = ISD::ANY_EXTEND; + + switch (Node->getOpcode()) { + default: + ExtOp = ISD::ANY_EXTEND; + break; + case ISD::SDIV: + case ISD::SREM: + ExtOp = ISD::SIGN_EXTEND; + break; + case ISD::UDIV: + case ISD::UREM: + ExtOp = ISD::ZERO_EXTEND; + break; + } TruncOp = ISD::TRUNCATE; } // Promote each of the values to the new type. @@ -4093,6 +4207,24 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Results.push_back(DAG.getNode(TruncOp, dl, OVT, Tmp1)); break; } + case ISD::UMUL_LOHI: + case ISD::SMUL_LOHI: { + // Promote to a multiply in a wider integer type. + unsigned ExtOp = Node->getOpcode() == ISD::UMUL_LOHI ? ISD::ZERO_EXTEND + : ISD::SIGN_EXTEND; + Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(0)); + Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1)); + Tmp1 = DAG.getNode(ISD::MUL, dl, NVT, Tmp1, Tmp2); + + auto &DL = DAG.getDataLayout(); + unsigned OriginalSize = OVT.getScalarSizeInBits(); + Tmp2 = DAG.getNode( + ISD::SRL, dl, NVT, Tmp1, + DAG.getConstant(OriginalSize, dl, TLI.getScalarShiftAmountTy(DL, NVT))); + Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp1)); + Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp2)); + break; + } case ISD::SELECT: { unsigned ExtOp, TruncOp; if (Node->getValueType(0).isVector() || @@ -4351,7 +4483,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { MVT EltVT = OVT.getVectorElementType(); MVT NewEltVT = NVT.getVectorElementType(); - // Handle bitcasts to different vector type with the smae total bit size. + // Handle bitcasts to different vector type with the same total bit size. // // e.g. v2i64 = scalar_to_vector x:i64 // => diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 31ebf7b..72b56d8 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -632,7 +632,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N, unsigned ResNo) { SDLoc dl(N); auto MMOFlags = - L->getMemOperand()->getFlags() & ~MachineMemOperand::MOInvariant; + L->getMemOperand()->getFlags() & + ~(MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable); SDValue NewL; if (L->getExtensionType() == ISD::NON_EXTLOAD) { NewL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), NVT, dl, @@ -1465,7 +1466,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, // TODO: Are there fast-math-flags to propagate to this FADD? Lo = DAG.getNode(ISD::FADD, dl, VT, Hi, - DAG.getConstantFP(APFloat(APFloat::PPCDoubleDouble, + DAG.getConstantFP(APFloat(APFloat::PPCDoubleDouble(), APInt(128, Parts)), dl, MVT::ppcf128)); Lo = DAG.getSelectCC(dl, Src, DAG.getConstant(0, dl, SrcVT), @@ -1630,7 +1631,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) { assert(N->getOperand(0).getValueType() == MVT::ppcf128 && "Logic only correct for ppcf128!"); const uint64_t TwoE31[] = {0x41e0000000000000LL, 0}; - APFloat APF = APFloat(APFloat::PPCDoubleDouble, APInt(128, TwoE31)); + APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31)); SDValue Tmp = DAG.getConstantFP(APF, dl, MVT::ppcf128); // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X // FIXME: generated code sucks. @@ -2085,7 +2086,8 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_LOAD(SDNode *N) { // Load the value as an integer value with the same number of bits. EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); auto MMOFlags = - L->getMemOperand()->getFlags() & ~MachineMemOperand::MOInvariant; + L->getMemOperand()->getFlags() & + ~(MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable); SDValue newL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), IVT, SDLoc(N), L->getChain(), L->getBasePtr(), L->getOffset(), L->getPointerInfo(), IVT, diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 9a18943..dc436ce 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -57,8 +57,6 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::BSWAP: Res = PromoteIntRes_BSWAP(N); break; case ISD::BUILD_PAIR: Res = PromoteIntRes_BUILD_PAIR(N); break; case ISD::Constant: Res = PromoteIntRes_Constant(N); break; - case ISD::CONVERT_RNDSAT: - Res = PromoteIntRes_CONVERT_RNDSAT(N); break; case ISD::CTLZ_ZERO_UNDEF: case ISD::CTLZ: Res = PromoteIntRes_CTLZ(N); break; case ISD::CTPOP: Res = PromoteIntRes_CTPOP(N); break; @@ -102,6 +100,11 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::CONCAT_VECTORS: Res = PromoteIntRes_CONCAT_VECTORS(N); break; + case ISD::ANY_EXTEND_VECTOR_INREG: + case ISD::SIGN_EXTEND_VECTOR_INREG: + case ISD::ZERO_EXTEND_VECTOR_INREG: + Res = PromoteIntRes_EXTEND_VECTOR_INREG(N); break; + case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: Res = PromoteIntRes_INT_EXTEND(N); break; @@ -183,8 +186,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic0(AtomicSDNode *N) { SDValue Res = DAG.getAtomic(N->getOpcode(), SDLoc(N), N->getMemoryVT(), ResVT, N->getChain(), N->getBasePtr(), - N->getMemOperand(), N->getOrdering(), - N->getSynchScope()); + N->getMemOperand()); // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); @@ -196,8 +198,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) { SDValue Res = DAG.getAtomic(N->getOpcode(), SDLoc(N), N->getMemoryVT(), N->getChain(), N->getBasePtr(), - Op2, N->getMemOperand(), N->getOrdering(), - N->getSynchScope()); + Op2, N->getMemOperand()); // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); @@ -220,8 +221,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_AtomicCmpSwap(AtomicSDNode *N, SDValue Res = DAG.getAtomicCmpSwap( ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, SDLoc(N), N->getMemoryVT(), VTs, N->getChain(), N->getBasePtr(), N->getOperand(2), N->getOperand(3), - N->getMemOperand(), N->getSuccessOrdering(), N->getFailureOrdering(), - N->getSynchScope()); + N->getMemOperand()); ReplaceValueWith(SDValue(N, 0), Res.getValue(0)); ReplaceValueWith(SDValue(N, 2), Res.getValue(2)); return Res.getValue(1); @@ -233,8 +233,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_AtomicCmpSwap(AtomicSDNode *N, DAG.getVTList(Op2.getValueType(), N->getValueType(1), MVT::Other); SDValue Res = DAG.getAtomicCmpSwap( N->getOpcode(), SDLoc(N), N->getMemoryVT(), VTs, N->getChain(), - N->getBasePtr(), Op2, Op3, N->getMemOperand(), N->getSuccessOrdering(), - N->getFailureOrdering(), N->getSynchScope()); + N->getBasePtr(), Op2, Op3, N->getMemOperand()); // Update the use to N with the newly created Res. for (unsigned i = 1, NumResults = N->getNumValues(); i < NumResults; ++i) ReplaceValueWith(SDValue(N, i), Res.getValue(i)); @@ -353,18 +352,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Constant(SDNode *N) { return Result; } -SDValue DAGTypeLegalizer::PromoteIntRes_CONVERT_RNDSAT(SDNode *N) { - ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode(); - assert ((CvtCode == ISD::CVT_SS || CvtCode == ISD::CVT_SU || - CvtCode == ISD::CVT_US || CvtCode == ISD::CVT_UU || - CvtCode == ISD::CVT_SF || CvtCode == ISD::CVT_UF) && - "can only promote integers"); - EVT OutVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - return DAG.getConvertRndSat(OutVT, SDLoc(N), N->getOperand(0), - N->getOperand(1), N->getOperand(2), - N->getOperand(3), N->getOperand(4), CvtCode); -} - SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) { // Zero extend to the promoted type and do the count there. SDValue Op = ZExtPromotedInteger(N->getOperand(0)); @@ -427,6 +414,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) { // Assert that the converted value fits in the original type. If it doesn't // (eg: because the value being converted is too big), then the result of the // original operation was undefined anyway, so the assert is still correct. + // + // NOTE: fp-to-uint to fp-to-sint promotion guarantees zero extend. For example: + // before legalization: fp-to-uint16, 65534. -> 0xfffe + // after legalization: fp-to-sint32, 65534. -> 0x0000fffe return DAG.getNode(N->getOpcode() == ISD::FP_TO_UINT ? ISD::AssertZext : ISD::AssertSext, dl, NVT, Res, DAG.getValueType(N->getValueType(0).getScalarType())); @@ -507,7 +498,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MGATHER(MaskedGatherSDNode *N) { N->getIndex()}; SDValue Res = DAG.getMaskedGather(DAG.getVTList(NVT, MVT::Other), N->getMemoryVT(), dl, Ops, - N->getMemOperand()); + N->getMemOperand()); // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); @@ -882,8 +873,6 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::BUILD_VECTOR: Res = PromoteIntOp_BUILD_VECTOR(N); break; case ISD::CONCAT_VECTORS: Res = PromoteIntOp_CONCAT_VECTORS(N); break; case ISD::EXTRACT_VECTOR_ELT: Res = PromoteIntOp_EXTRACT_VECTOR_ELT(N); break; - case ISD::CONVERT_RNDSAT: - Res = PromoteIntOp_CONVERT_RNDSAT(N); break; case ISD::INSERT_VECTOR_ELT: Res = PromoteIntOp_INSERT_VECTOR_ELT(N, OpNo);break; case ISD::SCALAR_TO_VECTOR: @@ -946,14 +935,16 @@ void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS, SDValue OpL = GetPromotedInteger(NewLHS); SDValue OpR = GetPromotedInteger(NewRHS); - // We would prefer to promote the comparison operand with sign extension, - // if we find the operand is actually to truncate an AssertSext. With this - // optimization, we can avoid inserting real truncate instruction, which - // is redudant eventually. - if (OpL->getOpcode() == ISD::AssertSext && - cast<VTSDNode>(OpL->getOperand(1))->getVT() == NewLHS.getValueType() && - OpR->getOpcode() == ISD::AssertSext && - cast<VTSDNode>(OpR->getOperand(1))->getVT() == NewRHS.getValueType()) { + // We would prefer to promote the comparison operand with sign extension. + // If the width of OpL/OpR excluding the duplicated sign bits is no greater + // than the width of NewLHS/NewRH, we can avoid inserting real truncate + // instruction, which is redudant eventually. + unsigned OpLEffectiveBits = + OpL.getValueSizeInBits() - DAG.ComputeNumSignBits(OpL) + 1; + unsigned OpREffectiveBits = + OpR.getValueSizeInBits() - DAG.ComputeNumSignBits(OpR) + 1; + if (OpLEffectiveBits <= NewLHS.getValueSizeInBits() && + OpREffectiveBits <= NewRHS.getValueSizeInBits()) { NewLHS = OpL; NewRHS = OpR; } else { @@ -990,8 +981,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ANY_EXTEND(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntOp_ATOMIC_STORE(AtomicSDNode *N) { SDValue Op2 = GetPromotedInteger(N->getOperand(2)); return DAG.getAtomic(N->getOpcode(), SDLoc(N), N->getMemoryVT(), - N->getChain(), N->getBasePtr(), Op2, N->getMemOperand(), - N->getOrdering(), N->getSynchScope()); + N->getChain(), N->getBasePtr(), Op2, N->getMemOperand()); } SDValue DAGTypeLegalizer::PromoteIntOp_BITCAST(SDNode *N) { @@ -1051,8 +1041,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_VECTOR(SDNode *N) { // Promote the inserted value. The type does not need to match the // vector element type. Check that any extra bits introduced will be // truncated away. - assert(N->getOperand(0).getValueType().getSizeInBits() >= - N->getValueType(0).getVectorElementType().getSizeInBits() && + assert(N->getOperand(0).getValueSizeInBits() >= + N->getValueType(0).getScalarSizeInBits() && "Type of inserted value narrower than vector element type!"); SmallVector<SDValue, 16> NewOps; @@ -1062,18 +1052,6 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_VECTOR(SDNode *N) { return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); } -SDValue DAGTypeLegalizer::PromoteIntOp_CONVERT_RNDSAT(SDNode *N) { - ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode(); - assert ((CvtCode == ISD::CVT_SS || CvtCode == ISD::CVT_SU || - CvtCode == ISD::CVT_US || CvtCode == ISD::CVT_UU || - CvtCode == ISD::CVT_FS || CvtCode == ISD::CVT_FU) && - "can only promote integer arguments"); - SDValue InOp = GetPromotedInteger(N->getOperand(0)); - return DAG.getConvertRndSat(N->getValueType(0), SDLoc(N), InOp, - N->getOperand(1), N->getOperand(2), - N->getOperand(3), N->getOperand(4), CvtCode); -} - SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, unsigned OpNo) { if (OpNo == 1) { @@ -1081,8 +1059,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, // have to match the vector element type. // Check that any extra bits introduced will be truncated away. - assert(N->getOperand(1).getValueType().getSizeInBits() >= - N->getValueType(0).getVectorElementType().getSizeInBits() && + assert(N->getOperand(1).getValueSizeInBits() >= + N->getValueType(0).getScalarSizeInBits() && "Type of inserted value narrower than vector element type!"); return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), GetPromotedInteger(N->getOperand(1)), @@ -1210,7 +1188,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, return DAG.getMaskedStore(N->getChain(), dl, DataOp, N->getBasePtr(), Mask, N->getMemoryVT(), N->getMemOperand(), - TruncateStore); + TruncateStore, N->isCompressingStore()); } SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N, @@ -1233,7 +1211,15 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MGATHER(MaskedGatherSDNode *N, NewOps[OpNo] = PromoteTargetBoolean(N->getOperand(OpNo), DataVT); } else NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo)); - return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); + + SDValue Res = SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); + // updated in place. + if (Res.getNode() == N) + return Res; + + ReplaceValueWith(SDValue(N, 0), Res.getValue(0)); + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return SDValue(); } SDValue DAGTypeLegalizer::PromoteIntOp_MSCATTER(MaskedScatterSDNode *N, @@ -1314,6 +1300,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::CTPOP: ExpandIntRes_CTPOP(N, Lo, Hi); break; case ISD::CTTZ_ZERO_UNDEF: case ISD::CTTZ: ExpandIntRes_CTTZ(N, Lo, Hi); break; + case ISD::FLT_ROUNDS_: ExpandIntRes_FLT_ROUNDS(N, Lo, Hi); break; case ISD::FP_TO_SINT: ExpandIntRes_FP_TO_SINT(N, Lo, Hi); break; case ISD::FP_TO_UINT: ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break; case ISD::LOAD: ExpandIntRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); break; @@ -1352,8 +1339,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { SDValue Tmp = DAG.getAtomicCmpSwap( ISD::ATOMIC_CMP_SWAP, SDLoc(N), AN->getMemoryVT(), VTs, N->getOperand(0), N->getOperand(1), N->getOperand(2), N->getOperand(3), - AN->getMemOperand(), AN->getSuccessOrdering(), AN->getFailureOrdering(), - AN->getSynchScope()); + AN->getMemOperand()); // Expanding to the strong ATOMIC_CMP_SWAP node means we can determine // success simply by comparing the loaded value against the ingoing @@ -1508,8 +1494,8 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Amt = N->getOperand(1); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); EVT ShTy = Amt.getValueType(); - unsigned ShBits = ShTy.getScalarType().getSizeInBits(); - unsigned NVTBits = NVT.getScalarType().getSizeInBits(); + unsigned ShBits = ShTy.getScalarSizeInBits(); + unsigned NVTBits = NVT.getScalarSizeInBits(); assert(isPowerOf2_32(NVTBits) && "Expanded integer type size not a power of two!"); SDLoc dl(N); @@ -1700,7 +1686,7 @@ void DAGTypeLegalizer::ExpandIntRes_MINMAX(SDNode *N, EVT CCT = getSetCCResultType(NVT); // Hi part is always the same op - Hi = DAG.getNode(N->getOpcode(), DL, {NVT, NVT}, {LHSH, RHSH}); + Hi = DAG.getNode(N->getOpcode(), DL, NVT, {LHSH, RHSH}); // We need to know whether to select Lo part that corresponds to 'winning' // Hi part or if Hi parts are equal. @@ -1711,7 +1697,7 @@ void DAGTypeLegalizer::ExpandIntRes_MINMAX(SDNode *N, SDValue LoCmp = DAG.getSelect(DL, NVT, IsHiLeft, LHSL, RHSL); // Recursed Lo part if Hi parts are equal, this uses unsigned version - SDValue LoMinMax = DAG.getNode(LoOpc, DL, {NVT, NVT}, {LHSL, RHSL}); + SDValue LoMinMax = DAG.getNode(LoOpc, DL, NVT, {LHSL, RHSL}); Lo = DAG.getSelect(DL, NVT, IsHiEq, LoMinMax, LoCmp); } @@ -1774,7 +1760,7 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, switch (BoolType) { case TargetLoweringBase::UndefinedBooleanContent: OVF = DAG.getNode(ISD::AND, dl, NVT, DAG.getConstant(1, dl, NVT), OVF); - // Fallthrough + LLVM_FALLTHROUGH; case TargetLoweringBase::ZeroOrOneBooleanContent: Hi = DAG.getNode(N->getOpcode(), dl, NVT, Hi, OVF); break; @@ -2002,6 +1988,19 @@ void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N, Hi = DAG.getConstant(0, dl, NVT); } +void DAGTypeLegalizer::ExpandIntRes_FLT_ROUNDS(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDLoc dl(N); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + unsigned NBitWidth = NVT.getSizeInBits(); + + EVT ShiftAmtTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout()); + Lo = DAG.getNode(ISD::FLT_ROUNDS_, dl, NVT); + // The high part is the sign of Lo, as -1 is a valid value for FLT_ROUNDS + Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo, + DAG.getConstant(NBitWidth - 1, dl, ShiftAmtTy)); +} + void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo, SDValue &Hi) { SDLoc dl(N); @@ -2065,7 +2064,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, if (ExtType == ISD::SEXTLOAD) { // The high part is obtained by SRA'ing all but one of the bits of the // lo part. - unsigned LoSize = Lo.getValueType().getSizeInBits(); + unsigned LoSize = Lo.getValueSizeInBits(); Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo, DAG.getConstant(LoSize - 1, dl, TLI.getPointerTy(DAG.getDataLayout()))); @@ -2166,7 +2165,9 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N, GetExpandedInteger(N->getOperand(0), LL, LH); GetExpandedInteger(N->getOperand(1), RL, RH); - if (TLI.expandMUL(N, Lo, Hi, NVT, DAG, LL, LH, RL, RH)) + if (TLI.expandMUL(N, Lo, Hi, NVT, DAG, + TargetLowering::MulExpansionKind::OnlyLegalOrCustom, + LL, LH, RL, RH)) return; // If nothing else, we can make a libcall. @@ -2180,7 +2181,7 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N, else if (VT == MVT::i128) LC = RTLIB::MUL_I128; - if (LC == RTLIB::UNKNOWN_LIBCALL) { + if (LC == RTLIB::UNKNOWN_LIBCALL || !TLI.getLibcallName(LC)) { // We'll expand the multiplication by brute force because we have no other // options. This is a trivially-generalized version of the code from // Hacker's Delight (itself derived from Knuth's Algorithm M from section @@ -2354,8 +2355,8 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, // the new SHL_PARTS operation would need further legalization. SDValue ShiftOp = N->getOperand(1); EVT ShiftTy = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); - assert(ShiftTy.getScalarType().getSizeInBits() >= - Log2_32_Ceil(VT.getScalarType().getSizeInBits()) && + assert(ShiftTy.getScalarSizeInBits() >= + Log2_32_Ceil(VT.getScalarSizeInBits()) && "ShiftAmountTy is too small to cover the range of this type!"); if (ShiftOp.getValueType() != ShiftTy) ShiftOp = DAG.getZExtOrTrunc(ShiftOp, dl, ShiftTy); @@ -2436,8 +2437,7 @@ void DAGTypeLegalizer::ExpandIntRes_SIGN_EXTEND(SDNode *N, "Operand over promoted?"); // Split the promoted operand. This will simplify when it is expanded. SplitInteger(Res, Lo, Hi); - unsigned ExcessBits = - Op.getValueType().getSizeInBits() - NVT.getSizeInBits(); + unsigned ExcessBits = Op.getValueSizeInBits() - NVT.getSizeInBits(); Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi, DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), ExcessBits))); @@ -2458,13 +2458,12 @@ ExpandIntRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi) { // The high part gets the sign extension from the lo-part. This handles // things like sextinreg V:i64 from i8. Hi = DAG.getNode(ISD::SRA, dl, Hi.getValueType(), Lo, - DAG.getConstant(Hi.getValueType().getSizeInBits() - 1, dl, + DAG.getConstant(Hi.getValueSizeInBits() - 1, dl, TLI.getPointerTy(DAG.getDataLayout()))); } else { // For example, extension of an i48 to an i64. Leave the low part alone, // sext_inreg the high part. - unsigned ExcessBits = - EVT.getSizeInBits() - Lo.getValueType().getSizeInBits(); + unsigned ExcessBits = EVT.getSizeInBits() - Lo.getValueSizeInBits(); Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi, DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), ExcessBits))); @@ -2690,8 +2689,7 @@ void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N, "Operand over promoted?"); // Split the promoted operand. This will simplify when it is expanded. SplitInteger(Res, Lo, Hi); - unsigned ExcessBits = - Op.getValueType().getSizeInBits() - NVT.getSizeInBits(); + unsigned ExcessBits = Op.getValueSizeInBits() - NVT.getSizeInBits(); Hi = DAG.getZeroExtendInReg(Hi, dl, EVT::getIntegerVT(*DAG.getContext(), ExcessBits)); @@ -2707,10 +2705,7 @@ void DAGTypeLegalizer::ExpandIntRes_ATOMIC_LOAD(SDNode *N, SDValue Swap = DAG.getAtomicCmpSwap( ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, cast<AtomicSDNode>(N)->getMemoryVT(), VTs, N->getOperand(0), - N->getOperand(1), Zero, Zero, cast<AtomicSDNode>(N)->getMemOperand(), - cast<AtomicSDNode>(N)->getOrdering(), - cast<AtomicSDNode>(N)->getOrdering(), - cast<AtomicSDNode>(N)->getSynchScope()); + N->getOperand(1), Zero, Zero, cast<AtomicSDNode>(N)->getMemOperand()); ReplaceValueWith(SDValue(N, 0), Swap.getValue(0)); ReplaceValueWith(SDValue(N, 1), Swap.getValue(2)); @@ -2833,51 +2828,51 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS, case ISD::SETUGE: LowCC = ISD::SETUGE; break; } - // Tmp1 = lo(op1) < lo(op2) // Always unsigned comparison - // Tmp2 = hi(op1) < hi(op2) // Signedness depends on operands - // dest = hi(op1) == hi(op2) ? Tmp1 : Tmp2; + // LoCmp = lo(op1) < lo(op2) // Always unsigned comparison + // HiCmp = hi(op1) < hi(op2) // Signedness depends on operands + // dest = hi(op1) == hi(op2) ? LoCmp : HiCmp; // NOTE: on targets without efficient SELECT of bools, we can always use // this identity: (B1 ? B2 : B3) --> (B1 & B2)|(!B1&B3) TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, AfterLegalizeTypes, true, nullptr); - SDValue Tmp1, Tmp2; + SDValue LoCmp, HiCmp; if (TLI.isTypeLegal(LHSLo.getValueType()) && TLI.isTypeLegal(RHSLo.getValueType())) - Tmp1 = TLI.SimplifySetCC(getSetCCResultType(LHSLo.getValueType()), - LHSLo, RHSLo, LowCC, false, DagCombineInfo, dl); - if (!Tmp1.getNode()) - Tmp1 = DAG.getSetCC(dl, getSetCCResultType(LHSLo.getValueType()), - LHSLo, RHSLo, LowCC); + LoCmp = TLI.SimplifySetCC(getSetCCResultType(LHSLo.getValueType()), LHSLo, + RHSLo, LowCC, false, DagCombineInfo, dl); + if (!LoCmp.getNode()) + LoCmp = DAG.getSetCC(dl, getSetCCResultType(LHSLo.getValueType()), LHSLo, + RHSLo, LowCC); if (TLI.isTypeLegal(LHSHi.getValueType()) && TLI.isTypeLegal(RHSHi.getValueType())) - Tmp2 = TLI.SimplifySetCC(getSetCCResultType(LHSHi.getValueType()), - LHSHi, RHSHi, CCCode, false, DagCombineInfo, dl); - if (!Tmp2.getNode()) - Tmp2 = DAG.getNode(ISD::SETCC, dl, - getSetCCResultType(LHSHi.getValueType()), - LHSHi, RHSHi, DAG.getCondCode(CCCode)); - - ConstantSDNode *Tmp1C = dyn_cast<ConstantSDNode>(Tmp1.getNode()); - ConstantSDNode *Tmp2C = dyn_cast<ConstantSDNode>(Tmp2.getNode()); - if ((Tmp1C && Tmp1C->isNullValue()) || - (Tmp2C && Tmp2C->isNullValue() && - (CCCode == ISD::SETLE || CCCode == ISD::SETGE || - CCCode == ISD::SETUGE || CCCode == ISD::SETULE)) || - (Tmp2C && Tmp2C->getAPIntValue() == 1 && - (CCCode == ISD::SETLT || CCCode == ISD::SETGT || - CCCode == ISD::SETUGT || CCCode == ISD::SETULT))) { - // low part is known false, returns high part. + HiCmp = TLI.SimplifySetCC(getSetCCResultType(LHSHi.getValueType()), LHSHi, + RHSHi, CCCode, false, DagCombineInfo, dl); + if (!HiCmp.getNode()) + HiCmp = + DAG.getNode(ISD::SETCC, dl, getSetCCResultType(LHSHi.getValueType()), + LHSHi, RHSHi, DAG.getCondCode(CCCode)); + + ConstantSDNode *LoCmpC = dyn_cast<ConstantSDNode>(LoCmp.getNode()); + ConstantSDNode *HiCmpC = dyn_cast<ConstantSDNode>(HiCmp.getNode()); + + bool EqAllowed = (CCCode == ISD::SETLE || CCCode == ISD::SETGE || + CCCode == ISD::SETUGE || CCCode == ISD::SETULE); + + if ((EqAllowed && (HiCmpC && HiCmpC->isNullValue())) || + (!EqAllowed && ((HiCmpC && (HiCmpC->getAPIntValue() == 1)) || + (LoCmpC && LoCmpC->isNullValue())))) { // For LE / GE, if high part is known false, ignore the low part. - // For LT / GT, if high part is known true, ignore the low part. - NewLHS = Tmp2; + // For LT / GT: if low part is known false, return the high part. + // if high part is known true, ignore the low part. + NewLHS = HiCmp; NewRHS = SDValue(); return; } if (LHSHi == RHSHi) { // Comparing the low bits is enough. - NewLHS = Tmp1; + NewLHS = LoCmp; NewRHS = SDValue(); return; } @@ -2922,8 +2917,8 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS, if (!NewLHS.getNode()) NewLHS = DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()), LHSHi, RHSHi, ISD::SETEQ); - NewLHS = DAG.getSelect(dl, Tmp1.getValueType(), - NewLHS, Tmp1, Tmp2); + NewLHS = DAG.getSelect(dl, LoCmp.getValueType(), + NewLHS, LoCmp, HiCmp); NewRHS = SDValue(); } @@ -3198,9 +3193,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_ATOMIC_STORE(SDNode *N) { cast<AtomicSDNode>(N)->getMemoryVT(), N->getOperand(0), N->getOperand(1), N->getOperand(2), - cast<AtomicSDNode>(N)->getMemOperand(), - cast<AtomicSDNode>(N)->getOrdering(), - cast<AtomicSDNode>(N)->getSynchScope()); + cast<AtomicSDNode>(N)->getMemOperand()); return Swap.getValue(1); } @@ -3327,6 +3320,41 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) { return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, Ops); } +SDValue DAGTypeLegalizer::PromoteIntRes_EXTEND_VECTOR_INREG(SDNode *N) { + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + assert(NVT.isVector() && "This type must be promoted to a vector type"); + + SDLoc dl(N); + + // For operands whose TypeAction is to promote, extend the promoted node + // appropriately (ZERO_EXTEND or SIGN_EXTEND) from the original pre-promotion + // type, and then construct a new *_EXTEND_VECTOR_INREG node to the promote-to + // type.. + if (getTypeAction(N->getOperand(0).getValueType()) + == TargetLowering::TypePromoteInteger) { + SDValue Promoted; + + switch(N->getOpcode()) { + case ISD::SIGN_EXTEND_VECTOR_INREG: + Promoted = SExtPromotedInteger(N->getOperand(0)); + break; + case ISD::ZERO_EXTEND_VECTOR_INREG: + Promoted = ZExtPromotedInteger(N->getOperand(0)); + break; + case ISD::ANY_EXTEND_VECTOR_INREG: + Promoted = GetPromotedInteger(N->getOperand(0)); + break; + default: + llvm_unreachable("Node has unexpected Opcode"); + } + return DAG.getNode(N->getOpcode(), dl, NVT, Promoted); + } + + // Directly extend to the appropriate transform-to type. + return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0)); +} + SDValue DAGTypeLegalizer::PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N) { EVT OutVT = N->getValueType(0); EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 144bed2..cf19d75 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -117,6 +117,8 @@ void DAGTypeLegalizer::PerformExpensiveChecks() { Mapped |= 64; if (WidenedVectors.find(Res) != WidenedVectors.end()) Mapped |= 128; + if (PromotedFloats.find(Res) != PromotedFloats.end()) + Mapped |= 256; if (Node.getNodeId() != Processed) { // Since we allow ReplacedValues to map deleted nodes, it may map nodes @@ -159,6 +161,8 @@ void DAGTypeLegalizer::PerformExpensiveChecks() { dbgs() << " SplitVectors"; if (Mapped & 128) dbgs() << " WidenedVectors"; + if (Mapped & 256) + dbgs() << " PromotedFloats"; dbgs() << "\n"; llvm_unreachable(nullptr); } @@ -484,7 +488,7 @@ SDNode *DAGTypeLegalizer::AnalyzeNewNode(SDNode *N) { // updated after all operands have been analyzed. Since this is rare, // the code tries to minimize overhead in the non-morphing case. - SmallVector<SDValue, 8> NewOps; + std::vector<SDValue> NewOps; unsigned NumProcessed = 0; for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { SDValue OrigOp = N->getOperand(i); @@ -500,7 +504,7 @@ SDNode *DAGTypeLegalizer::AnalyzeNewNode(SDNode *N) { NewOps.push_back(Op); } else if (Op != OrigOp) { // This is the first operand to change - add all operands so far. - NewOps.append(N->op_begin(), N->op_begin() + i); + NewOps.insert(NewOps.end(), N->op_begin(), N->op_begin() + i); NewOps.push_back(Op); } } @@ -794,8 +798,7 @@ void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) { // Note that in some cases vector operation operands may be greater than // the vector element type. For example BUILD_VECTOR of type <1 x i1> with // a constant i8 operand. - assert(Result.getValueType().getSizeInBits() >= - Op.getValueType().getVectorElementType().getSizeInBits() && + assert(Result.getValueSizeInBits() >= Op.getScalarValueSizeInBits() && "Invalid type for scalarized vector"); AnalyzeNewValue(Result); @@ -905,7 +908,7 @@ void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) { /// Convert to an integer of the same size. SDValue DAGTypeLegalizer::BitConvertToInteger(SDValue Op) { - unsigned BitWidth = Op.getValueType().getSizeInBits(); + unsigned BitWidth = Op.getValueSizeInBits(); return DAG.getNode(ISD::BITCAST, SDLoc(Op), EVT::getIntegerVT(*DAG.getContext(), BitWidth), Op); } @@ -913,7 +916,7 @@ SDValue DAGTypeLegalizer::BitConvertToInteger(SDValue Op) { /// Convert to a vector of integers of the same size. SDValue DAGTypeLegalizer::BitConvertVectorToIntegerVector(SDValue Op) { assert(Op.getValueType().isVector() && "Only applies to vectors!"); - unsigned EltWidth = Op.getValueType().getVectorElementType().getSizeInBits(); + unsigned EltWidth = Op.getScalarValueSizeInBits(); EVT EltNVT = EVT::getIntegerVT(*DAG.getContext(), EltWidth); unsigned NumElts = Op.getValueType().getVectorNumElements(); return DAG.getNode(ISD::BITCAST, SDLoc(Op), @@ -1018,22 +1021,6 @@ void DAGTypeLegalizer::GetPairElements(SDValue Pair, DAG.getIntPtrConstant(1, dl)); } -SDValue DAGTypeLegalizer::GetVectorElementPointer(SDValue VecPtr, EVT EltVT, - SDValue Index) { - SDLoc dl(Index); - // Make sure the index type is big enough to compute in. - Index = DAG.getZExtOrTrunc(Index, dl, TLI.getPointerTy(DAG.getDataLayout())); - - // Calculate the element offset and add it to the pointer. - unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size. - assert(EltSize * 8 == EltVT.getSizeInBits() && - "Converting bits to bytes lost precision"); - - Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), Index, - DAG.getConstant(EltSize, dl, Index.getValueType())); - return DAG.getNode(ISD::ADD, dl, Index.getValueType(), Index, VecPtr); -} - /// Build an integer with low bits Lo and high bits Hi. SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) { // Arbitrarily use dlHi for result SDLoc @@ -1145,7 +1132,7 @@ void DAGTypeLegalizer::SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi) { SDLoc dl(Op); assert(LoVT.getSizeInBits() + HiVT.getSizeInBits() == - Op.getValueType().getSizeInBits() && "Invalid integer splitting!"); + Op.getValueSizeInBits() && "Invalid integer splitting!"); Lo = DAG.getNode(ISD::TRUNCATE, dl, LoVT, Op); Hi = DAG.getNode(ISD::SRL, dl, Op.getValueType(), Op, DAG.getConstant(LoVT.getSizeInBits(), dl, @@ -1157,8 +1144,8 @@ void DAGTypeLegalizer::SplitInteger(SDValue Op, /// size of Op's. void DAGTypeLegalizer::SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi) { - EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), - Op.getValueType().getSizeInBits()/2); + EVT HalfVT = + EVT::getIntegerVT(*DAG.getContext(), Op.getValueSizeInBits() / 2); SplitInteger(Op, HalfVT, HalfVT, Lo, Hi); } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 84ad8f8..ec55662 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -173,7 +173,6 @@ private: /// input operand is returned. SDValue DisintegrateMERGE_VALUES(SDNode *N, unsigned ResNo); - SDValue GetVectorElementPointer(SDValue VecPtr, EVT EltVT, SDValue Index); SDValue JoinIntegers(SDValue Lo, SDValue Hi); SDValue LibCallify(RTLIB::Libcall LC, SDNode *N, bool isSigned); @@ -242,6 +241,7 @@ private: SDValue PromoteIntRes_VECTOR_SHUFFLE(SDNode *N); SDValue PromoteIntRes_BUILD_VECTOR(SDNode *N); SDValue PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N); + SDValue PromoteIntRes_EXTEND_VECTOR_INREG(SDNode *N); SDValue PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N); SDValue PromoteIntRes_CONCAT_VECTORS(SDNode *N); SDValue PromoteIntRes_BITCAST(SDNode *N); @@ -249,7 +249,6 @@ private: SDValue PromoteIntRes_BITREVERSE(SDNode *N); SDValue PromoteIntRes_BUILD_PAIR(SDNode *N); SDValue PromoteIntRes_Constant(SDNode *N); - SDValue PromoteIntRes_CONVERT_RNDSAT(SDNode *N); SDValue PromoteIntRes_CTLZ(SDNode *N); SDValue PromoteIntRes_CTPOP(SDNode *N); SDValue PromoteIntRes_CTTZ(SDNode *N); @@ -288,7 +287,6 @@ private: SDValue PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_BUILD_VECTOR(SDNode *N); - SDValue PromoteIntOp_CONVERT_RNDSAT(SDNode *N); SDValue PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N); @@ -339,6 +337,7 @@ private: void ExpandIntRes_SIGN_EXTEND_INREG (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_TRUNCATE (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_ZERO_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_FLT_ROUNDS (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_FP_TO_SINT (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_FP_TO_UINT (SDNode *N, SDValue &Lo, SDValue &Hi); @@ -601,7 +600,6 @@ private: SDValue ScalarizeVecRes_BITCAST(SDNode *N); SDValue ScalarizeVecRes_BUILD_VECTOR(SDNode *N); - SDValue ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N); SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N); SDValue ScalarizeVecRes_FP_ROUND(SDNode *N); SDValue ScalarizeVecRes_FPOWI(SDNode *N); @@ -707,7 +705,6 @@ private: SDValue WidenVecRes_BITCAST(SDNode* N); SDValue WidenVecRes_BUILD_VECTOR(SDNode* N); SDValue WidenVecRes_CONCAT_VECTORS(SDNode* N); - SDValue WidenVecRes_CONVERT_RNDSAT(SDNode* N); SDValue WidenVecRes_EXTEND_VECTOR_INREG(SDNode* N); SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N); SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index 665180e..3682c32 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -141,11 +141,10 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { if (DAG.getDataLayout().isBigEndian()) std::swap(LHS, RHS); - Vals.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, - EVT::getIntegerVT( - *DAG.getContext(), - LHS.getValueType().getSizeInBits() << 1), - LHS, RHS)); + Vals.push_back(DAG.getNode( + ISD::BUILD_PAIR, dl, + EVT::getIntegerVT(*DAG.getContext(), LHS.getValueSizeInBits() << 1), + LHS, RHS)); } Lo = Vals[Slot++]; Hi = Vals[Slot++]; @@ -337,7 +336,8 @@ void DAGTypeLegalizer::IntegerToVector(SDValue Op, unsigned NumElements, SDValue DAGTypeLegalizer::ExpandOp_BITCAST(SDNode *N) { SDLoc dl(N); - if (N->getValueType(0).isVector()) { + if (N->getValueType(0).isVector() && + N->getOperand(0).getValueType().isInteger()) { // An illegal expanding type is being converted to a legal vector type. // Make a two element vector out of the expanded parts and convert that // instead, but only if the new vector type is legal (otherwise there diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 3c9cb17..d4fa20f 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -106,7 +106,8 @@ class VectorLegalizer { SDValue ExpandStore(SDValue Op); SDValue ExpandFNEG(SDValue Op); SDValue ExpandBITREVERSE(SDValue Op); - SDValue ExpandCTLZ_CTTZ_ZERO_UNDEF(SDValue Op); + SDValue ExpandCTLZ(SDValue Op); + SDValue ExpandCTTZ_ZERO_UNDEF(SDValue Op); /// \brief Implements vector promotion. /// @@ -332,6 +333,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::SMAX: case ISD::UMIN: case ISD::UMAX: + case ISD::SMUL_LOHI: + case ISD::UMUL_LOHI: QueryType = Node->getValueType(0); break; case ISD::FP_ROUND_INREG: @@ -362,7 +365,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { Result = Tmp1; break; } - // FALL THROUGH + LLVM_FALLTHROUGH; } case TargetLowering::Expand: Result = Expand(Op); @@ -693,9 +696,11 @@ SDValue VectorLegalizer::Expand(SDValue Op) { return UnrollVSETCC(Op); case ISD::BITREVERSE: return ExpandBITREVERSE(Op); + case ISD::CTLZ: case ISD::CTLZ_ZERO_UNDEF: + return ExpandCTLZ(Op); case ISD::CTTZ_ZERO_UNDEF: - return ExpandCTLZ_CTTZ_ZERO_UNDEF(Op); + return ExpandCTTZ_ZERO_UNDEF(Op); default: return DAG.UnrollVectorOp(Op.getNode()); } @@ -770,8 +775,8 @@ SDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) { SDLoc DL(Op); EVT OrigTy = cast<VTSDNode>(Op->getOperand(1))->getVT(); - unsigned BW = VT.getScalarType().getSizeInBits(); - unsigned OrigBW = OrigTy.getScalarType().getSizeInBits(); + unsigned BW = VT.getScalarSizeInBits(); + unsigned OrigBW = OrigTy.getScalarSizeInBits(); SDValue ShiftSz = DAG.getConstant(BW - OrigBW, DL, VT); Op = Op.getOperand(0); @@ -817,8 +822,8 @@ SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op) { // Now we need sign extend. Do this by shifting the elements. Even if these // aren't legal operations, they have a better chance of being legalized // without full scalarization than the sign extension does. - unsigned EltWidth = VT.getVectorElementType().getSizeInBits(); - unsigned SrcEltWidth = SrcVT.getVectorElementType().getSizeInBits(); + unsigned EltWidth = VT.getScalarSizeInBits(); + unsigned SrcEltWidth = SrcVT.getScalarSizeInBits(); SDValue ShiftAmount = DAG.getConstant(EltWidth - SrcEltWidth, DL, VT); return DAG.getNode(ISD::SRA, DL, VT, DAG.getNode(ISD::SHL, DL, VT, Op, ShiftAmount), @@ -951,7 +956,7 @@ SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { // If the mask and the type are different sizes, unroll the vector op. This // can occur when getSetCCResultType returns something that is different in // size from the operand types. For example, v4i8 = select v4i32, v4i8, v4i8. - if (VT.getSizeInBits() != Op1.getValueType().getSizeInBits()) + if (VT.getSizeInBits() != Op1.getValueSizeInBits()) return DAG.UnrollVectorOp(Op.getNode()); // Bitcast the operands to be the same type as the mask. @@ -961,7 +966,7 @@ SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { Op2 = DAG.getNode(ISD::BITCAST, DL, VT, Op2); SDValue AllOnes = DAG.getConstant( - APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()), DL, VT); + APInt::getAllOnesValue(VT.getScalarSizeInBits()), DL, VT); SDValue NotMask = DAG.getNode(ISD::XOR, DL, VT, Mask, AllOnes); Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask); @@ -979,21 +984,20 @@ SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) { TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Expand) return DAG.UnrollVectorOp(Op.getNode()); - EVT SVT = VT.getScalarType(); - assert((SVT.getSizeInBits() == 64 || SVT.getSizeInBits() == 32) && - "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide"); + unsigned BW = VT.getScalarSizeInBits(); + assert((BW == 64 || BW == 32) && + "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide"); - unsigned BW = SVT.getSizeInBits(); - SDValue HalfWord = DAG.getConstant(BW/2, DL, VT); + SDValue HalfWord = DAG.getConstant(BW / 2, DL, VT); // Constants to clear the upper part of the word. // Notice that we can also use SHL+SHR, but using a constant is slightly // faster on x86. - uint64_t HWMask = (SVT.getSizeInBits()==64)?0x00000000FFFFFFFF:0x0000FFFF; + uint64_t HWMask = (BW == 64) ? 0x00000000FFFFFFFF : 0x0000FFFF; SDValue HalfWordMask = DAG.getConstant(HWMask, DL, VT); // Two to the power of half-word-size. - SDValue TWOHW = DAG.getConstantFP(1 << (BW/2), DL, Op.getValueType()); + SDValue TWOHW = DAG.getConstantFP(1 << (BW / 2), DL, Op.getValueType()); // Clear upper part of LO, lower HI SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Op.getOperand(0), HalfWord); @@ -1010,7 +1014,6 @@ SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) { return DAG.getNode(ISD::FADD, DL, Op.getValueType(), fHI, fLO); } - SDValue VectorLegalizer::ExpandFNEG(SDValue Op) { if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) { SDLoc DL(Op); @@ -1022,12 +1025,53 @@ SDValue VectorLegalizer::ExpandFNEG(SDValue Op) { return DAG.UnrollVectorOp(Op.getNode()); } -SDValue VectorLegalizer::ExpandCTLZ_CTTZ_ZERO_UNDEF(SDValue Op) { +SDValue VectorLegalizer::ExpandCTLZ(SDValue Op) { + EVT VT = Op.getValueType(); + unsigned NumBitsPerElt = VT.getScalarSizeInBits(); + + // If the non-ZERO_UNDEF version is supported we can use that instead. + if (Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF && + TLI.isOperationLegalOrCustom(ISD::CTLZ, VT)) { + SDLoc DL(Op); + return DAG.getNode(ISD::CTLZ, DL, Op.getValueType(), Op.getOperand(0)); + } + + // If CTPOP is available we can lower with a CTPOP based method: + // u16 ctlz(u16 x) { + // x |= (x >> 1); + // x |= (x >> 2); + // x |= (x >> 4); + // x |= (x >> 8); + // return ctpop(~x); + // } + // Ref: "Hacker's Delight" by Henry Warren + if (isPowerOf2_32(NumBitsPerElt) && + TLI.isOperationLegalOrCustom(ISD::CTPOP, VT) && + TLI.isOperationLegalOrCustom(ISD::SRL, VT) && + TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT) && + TLI.isOperationLegalOrCustomOrPromote(ISD::XOR, VT)) { + SDLoc DL(Op); + SDValue Res = Op.getOperand(0); + EVT ShiftTy = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); + + for (unsigned i = 1; i != NumBitsPerElt; i *= 2) + Res = DAG.getNode( + ISD::OR, DL, VT, Res, + DAG.getNode(ISD::SRL, DL, VT, Res, DAG.getConstant(i, DL, ShiftTy))); + + Res = DAG.getNOT(DL, Res, VT); + return DAG.getNode(ISD::CTPOP, DL, VT, Res); + } + + // Otherwise go ahead and unroll. + return DAG.UnrollVectorOp(Op.getNode()); +} + +SDValue VectorLegalizer::ExpandCTTZ_ZERO_UNDEF(SDValue Op) { // If the non-ZERO_UNDEF version is supported we can use that instead. - unsigned Opc = Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF ? ISD::CTLZ : ISD::CTTZ; - if (TLI.isOperationLegalOrCustom(Opc, Op.getValueType())) { + if (TLI.isOperationLegalOrCustom(ISD::CTTZ, Op.getValueType())) { SDLoc DL(Op); - return DAG.getNode(Opc, DL, Op.getValueType(), Op.getOperand(0)); + return DAG.getNode(ISD::CTTZ, DL, Op.getValueType(), Op.getOperand(0)); } // Otherwise go ahead and unroll. diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index f3adca4..6906f67 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -51,7 +51,6 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::MERGE_VALUES: R = ScalarizeVecRes_MERGE_VALUES(N, ResNo);break; case ISD::BITCAST: R = ScalarizeVecRes_BITCAST(N); break; case ISD::BUILD_VECTOR: R = ScalarizeVecRes_BUILD_VECTOR(N); break; - case ISD::CONVERT_RNDSAT: R = ScalarizeVecRes_CONVERT_RNDSAT(N); break; case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break; case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break; case ISD::FP_ROUND_INREG: R = ScalarizeVecRes_InregOp(N); break; @@ -179,17 +178,6 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_BUILD_VECTOR(SDNode *N) { return InOp; } -SDValue DAGTypeLegalizer::ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N) { - EVT NewVT = N->getValueType(0).getVectorElementType(); - SDValue Op0 = GetScalarizedVector(N->getOperand(0)); - return DAG.getConvertRndSat(NewVT, SDLoc(N), - Op0, DAG.getValueType(NewVT), - DAG.getValueType(Op0.getValueType()), - N->getOperand(3), - N->getOperand(4), - cast<CvtRndSatSDNode>(N)->getCvtCode()); -} - SDValue DAGTypeLegalizer::ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N) { return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), N->getValueType(0).getVectorElementType(), @@ -621,7 +609,6 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::BITREVERSE: case ISD::BSWAP: - case ISD::CONVERT_RNDSAT: case ISD::CTLZ: case ISD::CTTZ: case ISD::CTLZ_ZERO_UNDEF: @@ -846,7 +833,6 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, GetSplitVector(Vec, Lo, Hi); EVT VecVT = Vec.getValueType(); - EVT VecElemVT = VecVT.getVectorElementType(); unsigned VecElems = VecVT.getVectorNumElements(); unsigned SubElems = SubVec.getValueType().getVectorNumElements(); @@ -872,7 +858,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, MachinePointerInfo()); // Store the new subvector into the specified index. - SDValue SubVecPtr = GetVectorElementPointer(StackPtr, VecElemVT, Idx); + SDValue SubVecPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx); Type *VecType = VecVT.getTypeForEVT(*DAG.getContext()); unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment(VecType); Store = DAG.getStore(Store, dl, SubVec, SubVecPtr, MachinePointerInfo()); @@ -882,7 +868,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo()); // Increment the pointer to the other part. - unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8; + unsigned IncrementSize = Lo.getValueSizeInBits() / 8; StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, DAG.getConstant(IncrementSize, dl, StackPtr.getValueType())); @@ -1003,7 +989,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, // Store the new element. This may be larger than the vector element type, // so use a truncating store. - SDValue EltPtr = GetVectorElementPointer(StackPtr, EltVT, Idx); + SDValue EltPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx); Type *VecType = VecVT.getTypeForEVT(*DAG.getContext()); unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment(VecType); Store = @@ -1014,7 +1000,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo()); // Increment the pointer to the other part. - unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8; + unsigned IncrementSize = Lo.getValueSizeInBits() / 8; StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, DAG.getConstant(IncrementSize, dl, StackPtr.getValueType())); @@ -1114,11 +1100,10 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, Alignment, MLD->getAAInfo(), MLD->getRanges()); Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, MaskLo, Src0Lo, LoMemVT, MMO, - ExtType); + ExtType, MLD->isExpandingLoad()); - unsigned IncrementSize = LoMemVT.getSizeInBits()/8; - Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); + Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, dl, LoMemVT, DAG, + MLD->isExpandingLoad()); MMO = DAG.getMachineFunction(). getMachineMemOperand(MLD->getPointerInfo(), @@ -1126,7 +1111,7 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges()); Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, MaskHi, Src0Hi, HiMemVT, MMO, - ExtType); + ExtType, MLD->isExpandingLoad()); // Build a factor node to remember that this load is independent of the @@ -1237,18 +1222,6 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, if (N->getOpcode() == ISD::FP_ROUND) { Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1)); Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getOperand(1)); - } else if (N->getOpcode() == ISD::CONVERT_RNDSAT) { - SDValue DTyOpLo = DAG.getValueType(LoVT); - SDValue DTyOpHi = DAG.getValueType(HiVT); - SDValue STyOpLo = DAG.getValueType(Lo.getValueType()); - SDValue STyOpHi = DAG.getValueType(Hi.getValueType()); - SDValue RndOp = N->getOperand(3); - SDValue SatOp = N->getOperand(4); - ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode(); - Lo = DAG.getConvertRndSat(LoVT, dl, Lo, DTyOpLo, STyOpLo, RndOp, SatOp, - CvtCode); - Hi = DAG.getConvertRndSat(HiVT, dl, Hi, DTyOpHi, STyOpHi, RndOp, SatOp, - CvtCode); } else { Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo); Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi); @@ -1282,7 +1255,7 @@ void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, LLVMContext &Ctx = *DAG.getContext(); EVT NewSrcVT = EVT::getVectorVT( Ctx, EVT::getIntegerVT( - Ctx, SrcVT.getVectorElementType().getSizeInBits() * 2), + Ctx, SrcVT.getScalarSizeInBits() * 2), NumElements); EVT SplitSrcVT = EVT::getVectorVT(Ctx, SrcVT.getVectorElementType(), NumElements / 2); @@ -1651,7 +1624,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, MachinePointerInfo()); // Load back the required element. - StackPtr = GetVectorElementPointer(StackPtr, EltVT, Idx); + StackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx); return DAG.getExtLoad(ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr, MachinePointerInfo(), EltVT); } @@ -1769,19 +1742,18 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, Alignment, N->getAAInfo(), N->getRanges()); Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO, - N->isTruncatingStore()); - - unsigned IncrementSize = LoMemVT.getSizeInBits()/8; - Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, DL, Ptr.getValueType())); + N->isTruncatingStore(), + N->isCompressingStore()); + Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG, + N->isCompressingStore()); MMO = DAG.getMachineFunction(). getMachineMemOperand(N->getPointerInfo(), MachineMemOperand::MOStore, HiMemVT.getStoreSize(), SecondHalfAlignment, N->getAAInfo(), N->getRanges()); Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO, - N->isTruncatingStore()); + N->isTruncatingStore(), N->isCompressingStore()); // Build a factor node to remember that this store is independent of the // other one. @@ -1940,8 +1912,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) { // if we're trying to split it at all. assert() that's true, just in case. assert(!(NumElements & 1) && "Splitting vector, but not in half!"); - unsigned InElementSize = InVT.getVectorElementType().getSizeInBits(); - unsigned OutElementSize = OutVT.getVectorElementType().getSizeInBits(); + unsigned InElementSize = InVT.getScalarSizeInBits(); + unsigned OutElementSize = OutVT.getScalarSizeInBits(); // If the input elements are only 1/2 the width of the result elements, // just use the normal splitting. Our trick only work if there's room @@ -2047,7 +2019,6 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::BITCAST: Res = WidenVecRes_BITCAST(N); break; case ISD::BUILD_VECTOR: Res = WidenVecRes_BUILD_VECTOR(N); break; case ISD::CONCAT_VECTORS: Res = WidenVecRes_CONCAT_VECTORS(N); break; - case ISD::CONVERT_RNDSAT: Res = WidenVecRes_CONVERT_RNDSAT(N); break; case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break; case ISD::FP_ROUND_INREG: Res = WidenVecRes_InregOp(N); break; case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break; @@ -2695,86 +2666,6 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, Ops); } -SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) { - SDLoc dl(N); - SDValue InOp = N->getOperand(0); - SDValue RndOp = N->getOperand(3); - SDValue SatOp = N->getOperand(4); - - EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - unsigned WidenNumElts = WidenVT.getVectorNumElements(); - - EVT InVT = InOp.getValueType(); - EVT InEltVT = InVT.getVectorElementType(); - EVT InWidenVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenNumElts); - - SDValue DTyOp = DAG.getValueType(WidenVT); - SDValue STyOp = DAG.getValueType(InWidenVT); - ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode(); - - unsigned InVTNumElts = InVT.getVectorNumElements(); - if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) { - InOp = GetWidenedVector(InOp); - InVT = InOp.getValueType(); - InVTNumElts = InVT.getVectorNumElements(); - if (InVTNumElts == WidenNumElts) - return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp, - SatOp, CvtCode); - } - - if (TLI.isTypeLegal(InWidenVT)) { - // Because the result and the input are different vector types, widening - // the result could create a legal type but widening the input might make - // it an illegal type that might lead to repeatedly splitting the input - // and then widening it. To avoid this, we widen the input only if - // it results in a legal type. - if (WidenNumElts % InVTNumElts == 0) { - // Widen the input and call convert on the widened input vector. - unsigned NumConcat = WidenNumElts/InVTNumElts; - SmallVector<SDValue, 16> Ops(NumConcat); - Ops[0] = InOp; - SDValue UndefVal = DAG.getUNDEF(InVT); - for (unsigned i = 1; i != NumConcat; ++i) - Ops[i] = UndefVal; - - InOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, InWidenVT, Ops); - return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp, - SatOp, CvtCode); - } - - if (InVTNumElts % WidenNumElts == 0) { - // Extract the input and convert the shorten input vector. - InOp = DAG.getNode( - ISD::EXTRACT_SUBVECTOR, dl, InWidenVT, InOp, - DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); - return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp, - SatOp, CvtCode); - } - } - - // Otherwise unroll into some nasty scalar code and rebuild the vector. - SmallVector<SDValue, 16> Ops(WidenNumElts); - EVT EltVT = WidenVT.getVectorElementType(); - DTyOp = DAG.getValueType(EltVT); - STyOp = DAG.getValueType(InEltVT); - - unsigned MinElts = std::min(InVTNumElts, WidenNumElts); - unsigned i; - for (i=0; i < MinElts; ++i) { - SDValue ExtVal = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp, - DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); - Ops[i] = DAG.getConvertRndSat(WidenVT, dl, ExtVal, DTyOp, STyOp, RndOp, - SatOp, CvtCode); - } - - SDValue UndefVal = DAG.getUNDEF(EltVT); - for (; i < WidenNumElts; ++i) - Ops[i] = UndefVal; - - return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, Ops); -} - SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { EVT VT = N->getValueType(0); EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); @@ -2881,7 +2772,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) { SDValue Res = DAG.getMaskedLoad(WidenVT, dl, N->getChain(), N->getBasePtr(), Mask, Src0, N->getMemoryVT(), - N->getMemOperand(), ExtType); + N->getMemOperand(), ExtType, + N->isExpandingLoad()); // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); @@ -3317,7 +3209,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) { "Mask and data vectors should have the same number of elements"); return DAG.getMaskedStore(MST->getChain(), dl, WideVal, MST->getBasePtr(), Mask, MST->getMemoryVT(), MST->getMemOperand(), - false); + false, MST->isCompressingStore()); } SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) { @@ -3547,7 +3439,10 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, LD->getPointerInfo().getWithOffset(Offset), MinAlign(Align, Increment), MMOFlags, AAInfo); LdChain.push_back(L.getValue(1)); - if (L->getValueType(0).isVector()) { + if (L->getValueType(0).isVector() && NewVTWidth >= LdWidth) { + // Later code assumes the vector loads produced will be mergeable, so we + // must pad the final entry up to the previous width. Scalars are + // combined separately. SmallVector<SDValue, 16> Loads; Loads.push_back(L); unsigned size = L->getValueSizeInBits(0); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp index 1e5c4a7..ded8e68 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp @@ -631,7 +631,7 @@ SUnit *ResourcePriorityQueue::pop() { void ResourcePriorityQueue::remove(SUnit *SU) { assert(!Queue.empty() && "Queue is empty!"); - std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), SU); + std::vector<SUnit *>::iterator I = find(Queue, SU); if (I != std::prev(Queue.end())) std::swap(*I, Queue.back()); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 802c459..3549ccd 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -1339,7 +1339,7 @@ void ScheduleDAGRRList::releaseInterferences(unsigned Reg) { LRegsMapT::iterator LRegsPos = LRegsMap.find(SU); if (Reg) { SmallVectorImpl<unsigned> &LRegs = LRegsPos->second; - if (std::find(LRegs.begin(), LRegs.end(), Reg) == LRegs.end()) + if (!is_contained(LRegs, Reg)) continue; } SU->isPending = false; @@ -1704,8 +1704,7 @@ public: void remove(SUnit *SU) override { assert(!Queue.empty() && "Queue is empty!"); assert(SU->NodeQueueId != 0 && "Not in queue!"); - std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), - SU); + std::vector<SUnit *>::iterator I = find(Queue, SU); if (I != std::prev(Queue.end())) std::swap(*I, Queue.back()); Queue.pop_back(); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index 5cc8066..a058942 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -15,10 +15,20 @@ #ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_SCHEDULEDAGSDNODES_H #define LLVM_LIB_CODEGEN_SELECTIONDAG_SCHEDULEDAGSDNODES_H +#include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/Support/Casting.h" +#include <cassert> +#include <string> +#include <vector> namespace llvm { + +class InstrItineraryData; + /// ScheduleDAGSDNodes - A ScheduleDAG for scheduling SDNode-based DAGs. /// /// Edges between SUnits are initially based on edges in the SelectionDAG, @@ -44,7 +54,7 @@ namespace llvm { explicit ScheduleDAGSDNodes(MachineFunction &mf); - ~ScheduleDAGSDNodes() override {} + ~ScheduleDAGSDNodes() override = default; /// Run - perform scheduling. /// @@ -131,6 +141,7 @@ namespace llvm { unsigned DefIdx; unsigned NodeNumDefs; MVT ValueType; + public: RegDefIter(const SUnit *SU, const ScheduleDAGSDNodes *SD); @@ -150,6 +161,7 @@ namespace llvm { } void Advance(); + private: void InitNodeNumDefs(); }; @@ -175,6 +187,7 @@ namespace llvm { void EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap, MachineBasicBlock::iterator InsertPos); }; -} -#endif +} // end namespace llvm + +#endif // LLVM_LIB_CODEGEN_SELECTIONDAG_SCHEDULEDAGSDNODES_H diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 29d11c7..e225ba8 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -134,7 +134,7 @@ bool ISD::isBuildVectorAllOnes(const SDNode *N) { // we care if the resultant vector is all ones, not whether the individual // constants are. SDValue NotZero = N->getOperand(i); - unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits(); + unsigned EltSize = N->getValueType(0).getScalarSizeInBits(); if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(NotZero)) { if (CN->getAPIntValue().countTrailingOnes() < EltSize) return false; @@ -173,7 +173,7 @@ bool ISD::isBuildVectorAllZeros(const SDNode *N) { // We only want to check enough bits to cover the vector elements, because // we care if the resultant vector is all zeros, not whether the individual // constants are. - unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits(); + unsigned EltSize = N->getValueType(0).getScalarSizeInBits(); if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op)) { if (CN->getAPIntValue().countTrailingZeros() < EltSize) return false; @@ -403,7 +403,6 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { ID.AddPointer(GA->getGlobal()); ID.AddInteger(GA->getOffset()); ID.AddInteger(GA->getTargetFlags()); - ID.AddInteger(GA->getAddressSpace()); break; } case ISD::BasicBlock: @@ -521,24 +520,6 @@ static void AddNodeIDNode(FoldingSetNodeID &ID, const SDNode *N) { AddNodeIDCustom(ID, N); } -/// encodeMemSDNodeFlags - Generic routine for computing a value for use in -/// the CSE map that carries volatility, temporalness, indexing mode, and -/// extension/truncation information. -/// -static inline unsigned -encodeMemSDNodeFlags(int ConvType, ISD::MemIndexedMode AM, bool isVolatile, - bool isNonTemporal, bool isInvariant) { - assert((ConvType & 3) == ConvType && - "ConvType may not require more than 2 bits!"); - assert((AM & 7) == AM && - "AM may not require more than 3 bits!"); - return ConvType | - (AM << 2) | - (isVolatile << 5) | - (isNonTemporal << 6) | - (isInvariant << 7); -} - //===----------------------------------------------------------------------===// // SelectionDAG Class //===----------------------------------------------------------------------===// @@ -1030,7 +1011,7 @@ SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) { "getZeroExtendInReg should use the vector element type instead of " "the vector type!"); if (Op.getValueType() == VT) return Op; - unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits(); + unsigned BitWidth = Op.getScalarValueSizeInBits(); APInt Imm = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits()); return getNode(ISD::AND, DL, Op.getValueType(), Op, @@ -1040,7 +1021,7 @@ SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) { SDValue SelectionDAG::getAnyExtendVectorInReg(SDValue Op, const SDLoc &DL, EVT VT) { assert(VT.isVector() && "This DAG node is restricted to vector types."); - assert(VT.getSizeInBits() == Op.getValueType().getSizeInBits() && + assert(VT.getSizeInBits() == Op.getValueSizeInBits() && "The sizes of the input and result must match in order to perform the " "extend in-register."); assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() && @@ -1051,7 +1032,7 @@ SDValue SelectionDAG::getAnyExtendVectorInReg(SDValue Op, const SDLoc &DL, SDValue SelectionDAG::getSignExtendVectorInReg(SDValue Op, const SDLoc &DL, EVT VT) { assert(VT.isVector() && "This DAG node is restricted to vector types."); - assert(VT.getSizeInBits() == Op.getValueType().getSizeInBits() && + assert(VT.getSizeInBits() == Op.getValueSizeInBits() && "The sizes of the input and result must match in order to perform the " "extend in-register."); assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() && @@ -1062,7 +1043,7 @@ SDValue SelectionDAG::getSignExtendVectorInReg(SDValue Op, const SDLoc &DL, SDValue SelectionDAG::getZeroExtendVectorInReg(SDValue Op, const SDLoc &DL, EVT VT) { assert(VT.isVector() && "This DAG node is restricted to vector types."); - assert(VT.getSizeInBits() == Op.getValueType().getSizeInBits() && + assert(VT.getSizeInBits() == Op.getValueSizeInBits() && "The sizes of the input and result must match in order to perform the " "extend in-register."); assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() && @@ -1123,7 +1104,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL, if (VT.isVector() && TLI->getTypeAction(*getContext(), EltVT) == TargetLowering::TypePromoteInteger) { EltVT = TLI->getTypeToTransformTo(*getContext(), EltVT); - APInt NewVal = Elt->getValue().zext(EltVT.getSizeInBits()); + APInt NewVal = Elt->getValue().zextOrTrunc(EltVT.getSizeInBits()); Elt = ConstantInt::get(*getContext(), NewVal); } // In other cases the element type is illegal and needs to be expanded, for @@ -1149,7 +1130,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL, SmallVector<SDValue, 2> EltParts; for (unsigned i = 0; i < ViaVecNumElts / VT.getVectorNumElements(); ++i) { EltParts.push_back(getConstant(NewVal.lshr(i * ViaEltSizeInBits) - .trunc(ViaEltSizeInBits), DL, + .zextOrTrunc(ViaEltSizeInBits), DL, ViaEltVT, isT, isO)); } @@ -1166,12 +1147,9 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL, // This situation occurs in MIPS MSA. SmallVector<SDValue, 8> Ops; - for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) + for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) Ops.insert(Ops.end(), EltParts.begin(), EltParts.end()); - - SDValue Result = getNode(ISD::BITCAST, DL, VT, - getNode(ISD::BUILD_VECTOR, DL, ViaVecVT, Ops)); - return Result; + return getNode(ISD::BITCAST, DL, VT, getBuildVector(ViaVecVT, DL, Ops)); } assert(Elt->getBitWidth() == EltVT.getSizeInBits() && @@ -1280,7 +1258,6 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, ID.AddPointer(GV); ID.AddInteger(Offset); ID.AddInteger(TargetFlags); - ID.AddInteger(GV->getType()->getAddressSpace()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) return SDValue(E, 0); @@ -1333,7 +1310,9 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT, assert((TargetFlags == 0 || isTarget) && "Cannot set target flags on target-independent globals"); if (Alignment == 0) - Alignment = getDataLayout().getPrefTypeAlignment(C->getType()); + Alignment = MF->getFunction()->optForSize() + ? getDataLayout().getABITypeAlignment(C->getType()) + : getDataLayout().getPrefTypeAlignment(C->getType()); unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; FoldingSetNodeID ID; AddNodeIDNode(ID, Opc, getVTList(VT), None); @@ -1650,31 +1629,6 @@ SDValue SelectionDAG::getCommutedVectorShuffle(const ShuffleVectorSDNode &SV) { return getVectorShuffle(VT, SDLoc(&SV), Op1, Op0, MaskVec); } -SDValue SelectionDAG::getConvertRndSat(EVT VT, const SDLoc &dl, SDValue Val, - SDValue DTy, SDValue STy, SDValue Rnd, - SDValue Sat, ISD::CvtCode Code) { - // If the src and dest types are the same and the conversion is between - // integer types of the same sign or two floats, no conversion is necessary. - if (DTy == STy && - (Code == ISD::CVT_UU || Code == ISD::CVT_SS || Code == ISD::CVT_FF)) - return Val; - - FoldingSetNodeID ID; - SDValue Ops[] = { Val, DTy, STy, Rnd, Sat }; - AddNodeIDNode(ID, ISD::CONVERT_RNDSAT, getVTList(VT), Ops); - void* IP = nullptr; - if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) - return SDValue(E, 0); - - auto *N = - newSDNode<CvtRndSatSDNode>(VT, dl.getIROrder(), dl.getDebugLoc(), Code); - createOperands(N, Ops); - - CSEMap.InsertNode(N, IP); - InsertNode(N); - return SDValue(N, 0); -} - SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) { FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::Register, getVTList(VT), None); @@ -1863,13 +1817,13 @@ SDValue SelectionDAG::expandVACopy(SDNode *Node) { } SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) { - MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo(); + MachineFrameInfo &MFI = getMachineFunction().getFrameInfo(); unsigned ByteSize = VT.getStoreSize(); Type *Ty = VT.getTypeForEVT(*getContext()); unsigned StackAlign = std::max((unsigned)getDataLayout().getPrefTypeAlignment(Ty), minAlign); - int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false); + int FrameIdx = MFI.CreateStackObject(ByteSize, StackAlign, false); return getFrameIndex(FrameIdx, TLI->getPointerTy(getDataLayout())); } @@ -1881,8 +1835,8 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) { unsigned Align = std::max(DL.getPrefTypeAlignment(Ty1), DL.getPrefTypeAlignment(Ty2)); - MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo(); - int FrameIdx = FrameInfo->CreateStackObject(Bytes, Align, false); + MachineFrameInfo &MFI = getMachineFunction().getFrameInfo(); + int FrameIdx = MFI.CreateStackObject(Bytes, Align, false); return getFrameIndex(FrameIdx, TLI->getPointerTy(getDataLayout())); } @@ -1943,29 +1897,29 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2, default: break; case ISD::SETEQ: if (R==APFloat::cmpUnordered) return getUNDEF(VT); - // fall through + LLVM_FALLTHROUGH; case ISD::SETOEQ: return getConstant(R==APFloat::cmpEqual, dl, VT); case ISD::SETNE: if (R==APFloat::cmpUnordered) return getUNDEF(VT); - // fall through + LLVM_FALLTHROUGH; case ISD::SETONE: return getConstant(R==APFloat::cmpGreaterThan || R==APFloat::cmpLessThan, dl, VT); case ISD::SETLT: if (R==APFloat::cmpUnordered) return getUNDEF(VT); - // fall through + LLVM_FALLTHROUGH; case ISD::SETOLT: return getConstant(R==APFloat::cmpLessThan, dl, VT); case ISD::SETGT: if (R==APFloat::cmpUnordered) return getUNDEF(VT); - // fall through + LLVM_FALLTHROUGH; case ISD::SETOGT: return getConstant(R==APFloat::cmpGreaterThan, dl, VT); case ISD::SETLE: if (R==APFloat::cmpUnordered) return getUNDEF(VT); - // fall through + LLVM_FALLTHROUGH; case ISD::SETOLE: return getConstant(R==APFloat::cmpLessThan || R==APFloat::cmpEqual, dl, VT); case ISD::SETGE: if (R==APFloat::cmpUnordered) return getUNDEF(VT); - // fall through + LLVM_FALLTHROUGH; case ISD::SETOGE: return getConstant(R==APFloat::cmpGreaterThan || R==APFloat::cmpEqual, dl, VT); case ISD::SETO: return getConstant(R!=APFloat::cmpUnordered, dl, VT); @@ -1998,11 +1952,7 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2, /// SignBitIsZero - Return true if the sign bit of Op is known to be zero. We /// use this predicate to simplify operations downstream. bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const { - // This predicate is not safe for vector operations. - if (Op.getValueType().isVector()) - return false; - - unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits(); + unsigned BitWidth = Op.getScalarValueSizeInBits(); return MaskedValueIsZero(Op, APInt::getSignBit(BitWidth), Depth); } @@ -2016,28 +1966,244 @@ bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask, return (KnownZero & Mask) == Mask; } +/// If a SHL/SRA/SRL node has a constant or splat constant shift amount that +/// is less than the element bit-width of the shift node, return it. +static const APInt *getValidShiftAmountConstant(SDValue V) { + if (ConstantSDNode *SA = isConstOrConstSplat(V.getOperand(1))) { + // Shifting more than the bitwidth is not valid. + const APInt &ShAmt = SA->getAPIntValue(); + if (ShAmt.ult(V.getScalarValueSizeInBits())) + return &ShAmt; + } + return nullptr; +} + /// Determine which bits of Op are known to be either zero or one and return -/// them in the KnownZero/KnownOne bitsets. +/// them in the KnownZero/KnownOne bitsets. For vectors, the known bits are +/// those that are shared by every vector element. void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, APInt &KnownOne, unsigned Depth) const { - unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits(); + EVT VT = Op.getValueType(); + APInt DemandedElts = VT.isVector() + ? APInt::getAllOnesValue(VT.getVectorNumElements()) + : APInt(1, 1); + computeKnownBits(Op, KnownZero, KnownOne, DemandedElts, Depth); +} + +/// Determine which bits of Op are known to be either zero or one and return +/// them in the KnownZero/KnownOne bitsets. The DemandedElts argument allows +/// us to only collect the known bits that are shared by the requested vector +/// elements. +/// TODO: We only support DemandedElts on a few opcodes so far, the remainder +/// should be added when they become necessary. +void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, + APInt &KnownOne, const APInt &DemandedElts, + unsigned Depth) const { + unsigned BitWidth = Op.getScalarValueSizeInBits(); KnownZero = KnownOne = APInt(BitWidth, 0); // Don't know anything. if (Depth == 6) return; // Limit search depth. APInt KnownZero2, KnownOne2; + unsigned NumElts = DemandedElts.getBitWidth(); - switch (Op.getOpcode()) { + if (!DemandedElts) + return; // No demanded elts, better to assume we don't know anything. + + unsigned Opcode = Op.getOpcode(); + switch (Opcode) { case ISD::Constant: // We know all of the bits for a constant! KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue(); KnownZero = ~KnownOne; break; + case ISD::BUILD_VECTOR: + // Collect the known bits that are shared by every demanded vector element. + assert(NumElts == Op.getValueType().getVectorNumElements() && + "Unexpected vector size"); + KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth); + for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { + if (!DemandedElts[i]) + continue; + + SDValue SrcOp = Op.getOperand(i); + computeKnownBits(SrcOp, KnownZero2, KnownOne2, Depth + 1); + + // BUILD_VECTOR can implicitly truncate sources, we must handle this. + if (SrcOp.getValueSizeInBits() != BitWidth) { + assert(SrcOp.getValueSizeInBits() > BitWidth && + "Expected BUILD_VECTOR implicit truncation"); + KnownOne2 = KnownOne2.trunc(BitWidth); + KnownZero2 = KnownZero2.trunc(BitWidth); + } + + // Known bits are the values that are shared by every demanded element. + KnownOne &= KnownOne2; + KnownZero &= KnownZero2; + + // If we don't know any bits, early out. + if (!KnownOne && !KnownZero) + break; + } + break; + case ISD::VECTOR_SHUFFLE: { + // Collect the known bits that are shared by every vector element referenced + // by the shuffle. + APInt DemandedLHS(NumElts, 0), DemandedRHS(NumElts, 0); + KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth); + const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op); + assert(NumElts == SVN->getMask().size() && "Unexpected vector size"); + for (unsigned i = 0; i != NumElts; ++i) { + if (!DemandedElts[i]) + continue; + + int M = SVN->getMaskElt(i); + if (M < 0) { + // For UNDEF elements, we don't know anything about the common state of + // the shuffle result. + KnownOne.clearAllBits(); + KnownZero.clearAllBits(); + DemandedLHS.clearAllBits(); + DemandedRHS.clearAllBits(); + break; + } + + if ((unsigned)M < NumElts) + DemandedLHS.setBit((unsigned)M % NumElts); + else + DemandedRHS.setBit((unsigned)M % NumElts); + } + // Known bits are the values that are shared by every demanded element. + if (!!DemandedLHS) { + SDValue LHS = Op.getOperand(0); + computeKnownBits(LHS, KnownZero2, KnownOne2, DemandedLHS, Depth + 1); + KnownOne &= KnownOne2; + KnownZero &= KnownZero2; + } + // If we don't know any bits, early out. + if (!KnownOne && !KnownZero) + break; + if (!!DemandedRHS) { + SDValue RHS = Op.getOperand(1); + computeKnownBits(RHS, KnownZero2, KnownOne2, DemandedRHS, Depth + 1); + KnownOne &= KnownOne2; + KnownZero &= KnownZero2; + } + break; + } + case ISD::CONCAT_VECTORS: { + // Split DemandedElts and test each of the demanded subvectors. + KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth); + EVT SubVectorVT = Op.getOperand(0).getValueType(); + unsigned NumSubVectorElts = SubVectorVT.getVectorNumElements(); + unsigned NumSubVectors = Op.getNumOperands(); + for (unsigned i = 0; i != NumSubVectors; ++i) { + APInt DemandedSub = DemandedElts.lshr(i * NumSubVectorElts); + DemandedSub = DemandedSub.trunc(NumSubVectorElts); + if (!!DemandedSub) { + SDValue Sub = Op.getOperand(i); + computeKnownBits(Sub, KnownZero2, KnownOne2, DemandedSub, Depth + 1); + KnownOne &= KnownOne2; + KnownZero &= KnownZero2; + } + // If we don't know any bits, early out. + if (!KnownOne && !KnownZero) + break; + } + break; + } + case ISD::EXTRACT_SUBVECTOR: { + // If we know the element index, just demand that subvector elements, + // otherwise demand them all. + SDValue Src = Op.getOperand(0); + ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1)); + unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); + if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) { + // Offset the demanded elts by the subvector index. + uint64_t Idx = SubIdx->getZExtValue(); + APInt DemandedSrc = DemandedElts.zext(NumSrcElts).shl(Idx); + computeKnownBits(Src, KnownZero, KnownOne, DemandedSrc, Depth + 1); + } else { + computeKnownBits(Src, KnownZero, KnownOne, Depth + 1); + } + break; + } + case ISD::BITCAST: { + SDValue N0 = Op.getOperand(0); + unsigned SubBitWidth = N0.getScalarValueSizeInBits(); + + // Ignore bitcasts from floating point. + if (!N0.getValueType().isInteger()) + break; + + // Fast handling of 'identity' bitcasts. + if (BitWidth == SubBitWidth) { + computeKnownBits(N0, KnownZero, KnownOne, DemandedElts, Depth + 1); + break; + } + + // Support big-endian targets when it becomes useful. + bool IsLE = getDataLayout().isLittleEndian(); + if (!IsLE) + break; + + // Bitcast 'small element' vector to 'large element' scalar/vector. + if ((BitWidth % SubBitWidth) == 0) { + assert(N0.getValueType().isVector() && "Expected bitcast from vector"); + + // Collect known bits for the (larger) output by collecting the known + // bits from each set of sub elements and shift these into place. + // We need to separately call computeKnownBits for each set of + // sub elements as the knownbits for each is likely to be different. + unsigned SubScale = BitWidth / SubBitWidth; + APInt SubDemandedElts(NumElts * SubScale, 0); + for (unsigned i = 0; i != NumElts; ++i) + if (DemandedElts[i]) + SubDemandedElts.setBit(i * SubScale); + + for (unsigned i = 0; i != SubScale; ++i) { + computeKnownBits(N0, KnownZero2, KnownOne2, SubDemandedElts.shl(i), + Depth + 1); + KnownOne |= KnownOne2.zext(BitWidth).shl(SubBitWidth * i); + KnownZero |= KnownZero2.zext(BitWidth).shl(SubBitWidth * i); + } + } + + // Bitcast 'large element' scalar/vector to 'small element' vector. + if ((SubBitWidth % BitWidth) == 0) { + assert(Op.getValueType().isVector() && "Expected bitcast to vector"); + + // Collect known bits for the (smaller) output by collecting the known + // bits from the overlapping larger input elements and extracting the + // sub sections we actually care about. + unsigned SubScale = SubBitWidth / BitWidth; + APInt SubDemandedElts(NumElts / SubScale, 0); + for (unsigned i = 0; i != NumElts; ++i) + if (DemandedElts[i]) + SubDemandedElts.setBit(i / SubScale); + + computeKnownBits(N0, KnownZero2, KnownOne2, SubDemandedElts, Depth + 1); + + KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth); + for (unsigned i = 0; i != NumElts; ++i) + if (DemandedElts[i]) { + unsigned Offset = (i % SubScale) * BitWidth; + KnownOne &= KnownOne2.lshr(Offset).trunc(BitWidth); + KnownZero &= KnownZero2.lshr(Offset).trunc(BitWidth); + // If we don't know any bits, early out. + if (!KnownOne && !KnownZero) + break; + } + } + break; + } case ISD::AND: // If either the LHS or the RHS are Zero, the result is zero. - computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); - computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); + computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, DemandedElts, + Depth + 1); + computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts, + Depth + 1); // Output known-1 bits are only known if set in both the LHS & RHS. KnownOne &= KnownOne2; @@ -2045,8 +2211,10 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, KnownZero |= KnownZero2; break; case ISD::OR: - computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); - computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); + computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, DemandedElts, + Depth + 1); + computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts, + Depth + 1); // Output known-0 bits are only known if clear in both the LHS & RHS. KnownZero &= KnownZero2; @@ -2054,8 +2222,10 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, KnownOne |= KnownOne2; break; case ISD::XOR: { - computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); - computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); + computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, DemandedElts, + Depth + 1); + computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts, + Depth + 1); // Output known-0 bits are known if clear or set in both the LHS & RHS. APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2); @@ -2065,11 +2235,13 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, break; } case ISD::MUL: { - computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); - computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); + computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, DemandedElts, + Depth + 1); + computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts, + Depth + 1); // If low bits are zero in either operand, output low known-0 bits. - // Also compute a conserative estimate for high known-0 bits. + // Also compute a conservative estimate for high known-0 bits. // More trickiness is possible, but this is sufficient for the // interesting case of alignment computation. KnownOne.clearAllBits(); @@ -2089,12 +2261,12 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, // For the purposes of computing leading zeros we can conservatively // treat a udiv as a logical right shift by the power of 2 known to // be less than the denominator. - computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); + computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts, + Depth + 1); unsigned LeadZ = KnownZero2.countLeadingOnes(); - KnownOne2.clearAllBits(); - KnownZero2.clearAllBits(); - computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); + computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts, + Depth + 1); unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros(); if (RHSUnknownLeadingOnes != BitWidth) LeadZ = std::min(BitWidth, @@ -2105,6 +2277,9 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, } case ISD::SELECT: computeKnownBits(Op.getOperand(2), KnownZero, KnownOne, Depth+1); + // If we don't know any bits, early out. + if (!KnownOne && !KnownZero) + break; computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); // Only known if known in both the LHS and RHS. @@ -2113,6 +2288,9 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, break; case ISD::SELECT_CC: computeKnownBits(Op.getOperand(3), KnownZero, KnownOne, Depth+1); + // If we don't know any bits, early out. + if (!KnownOne && !KnownZero) + break; computeKnownBits(Op.getOperand(2), KnownZero2, KnownOne2, Depth+1); // Only known if known in both the LHS and RHS. @@ -2144,58 +2322,37 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1); break; case ISD::SHL: - // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0 - if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { - unsigned ShAmt = SA->getZExtValue(); - - // If the shift count is an invalid immediate, don't do anything. - if (ShAmt >= BitWidth) - break; - - computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); - KnownZero <<= ShAmt; - KnownOne <<= ShAmt; - // low bits known zero. - KnownZero |= APInt::getLowBitsSet(BitWidth, ShAmt); + if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) { + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts, + Depth + 1); + KnownZero = KnownZero << *ShAmt; + KnownOne = KnownOne << *ShAmt; + // Low bits are known zero. + KnownZero |= APInt::getLowBitsSet(BitWidth, ShAmt->getZExtValue()); } break; case ISD::SRL: - // (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0 - if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { - unsigned ShAmt = SA->getZExtValue(); - - // If the shift count is an invalid immediate, don't do anything. - if (ShAmt >= BitWidth) - break; - - computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); - KnownZero = KnownZero.lshr(ShAmt); - KnownOne = KnownOne.lshr(ShAmt); - - APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt); - KnownZero |= HighBits; // High bits known zero. + if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) { + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts, + Depth + 1); + KnownZero = KnownZero.lshr(*ShAmt); + KnownOne = KnownOne.lshr(*ShAmt); + // High bits are known zero. + APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt->getZExtValue()); + KnownZero |= HighBits; } break; case ISD::SRA: - if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { - unsigned ShAmt = SA->getZExtValue(); - - // If the shift count is an invalid immediate, don't do anything. - if (ShAmt >= BitWidth) - break; - - // If any of the demanded bits are produced by the sign extension, we also - // demand the input sign bit. - APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt); - - computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); - KnownZero = KnownZero.lshr(ShAmt); - KnownOne = KnownOne.lshr(ShAmt); - - // Handle the sign bits. + if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) { + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts, + Depth + 1); + KnownZero = KnownZero.lshr(*ShAmt); + KnownOne = KnownOne.lshr(*ShAmt); + // If we know the value of the sign bit, then we know it is copied across + // the high bits by the shift amount. + APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt->getZExtValue()); APInt SignBit = APInt::getSignBit(BitWidth); - SignBit = SignBit.lshr(ShAmt); // Adjust to where it is now in the mask. - + SignBit = SignBit.lshr(*ShAmt); // Adjust to where it is now in the mask. if (KnownZero.intersects(SignBit)) { KnownZero |= HighBits; // New bits are known zero. } else if (KnownOne.intersects(SignBit)) { @@ -2205,7 +2362,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, break; case ISD::SIGN_EXTEND_INREG: { EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); - unsigned EBits = EVT.getScalarType().getSizeInBits(); + unsigned EBits = EVT.getScalarSizeInBits(); // Sign extension. Compute the demanded bits in the result that are not // present in the input. @@ -2220,7 +2377,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, if (NewBits.getBoolValue()) InputDemandedBits |= InSignBit; - computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts, + Depth + 1); KnownOne &= InputDemandedBits; KnownZero &= InputDemandedBits; @@ -2253,7 +2411,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, // If this is a ZEXTLoad and we are looking at the loaded value. if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) { EVT VT = LD->getMemoryVT(); - unsigned MemBits = VT.getScalarType().getSizeInBits(); + unsigned MemBits = VT.getScalarSizeInBits(); KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits); } else if (const MDNode *Ranges = LD->getRanges()) { if (LD->getExtensionType() == ISD::NON_EXTLOAD) @@ -2263,11 +2421,12 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, } case ISD::ZERO_EXTEND: { EVT InVT = Op.getOperand(0).getValueType(); - unsigned InBits = InVT.getScalarType().getSizeInBits(); + unsigned InBits = InVT.getScalarSizeInBits(); APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits); KnownZero = KnownZero.trunc(InBits); KnownOne = KnownOne.trunc(InBits); - computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts, + Depth + 1); KnownZero = KnownZero.zext(BitWidth); KnownOne = KnownOne.zext(BitWidth); KnownZero |= NewBits; @@ -2275,30 +2434,22 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, } case ISD::SIGN_EXTEND: { EVT InVT = Op.getOperand(0).getValueType(); - unsigned InBits = InVT.getScalarType().getSizeInBits(); - APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits); + unsigned InBits = InVT.getScalarSizeInBits(); KnownZero = KnownZero.trunc(InBits); KnownOne = KnownOne.trunc(InBits); - computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts, + Depth + 1); - // Note if the sign bit is known to be zero or one. - bool SignBitKnownZero = KnownZero.isNegative(); - bool SignBitKnownOne = KnownOne.isNegative(); - - KnownZero = KnownZero.zext(BitWidth); - KnownOne = KnownOne.zext(BitWidth); - - // If the sign bit is known zero or one, the top bits match. - if (SignBitKnownZero) - KnownZero |= NewBits; - else if (SignBitKnownOne) - KnownOne |= NewBits; + // If the sign bit is known to be zero or one, then sext will extend + // it to the top bits, else it will just zext. + KnownZero = KnownZero.sext(BitWidth); + KnownOne = KnownOne.sext(BitWidth); break; } case ISD::ANY_EXTEND: { EVT InVT = Op.getOperand(0).getValueType(); - unsigned InBits = InVT.getScalarType().getSizeInBits(); + unsigned InBits = InVT.getScalarSizeInBits(); KnownZero = KnownZero.trunc(InBits); KnownOne = KnownOne.trunc(InBits); computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); @@ -2308,10 +2459,11 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, } case ISD::TRUNCATE: { EVT InVT = Op.getOperand(0).getValueType(); - unsigned InBits = InVT.getScalarType().getSizeInBits(); + unsigned InBits = InVT.getScalarSizeInBits(); KnownZero = KnownZero.zext(InBits); KnownOne = KnownOne.zext(InBits); - computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts, + Depth + 1); KnownZero = KnownZero.trunc(BitWidth); KnownOne = KnownOne.trunc(BitWidth); break; @@ -2330,7 +2482,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, break; case ISD::SUB: { - if (ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0))) { + if (ConstantSDNode *CLHS = isConstOrConstSplat(Op.getOperand(0))) { // We know that the top bits of C-X are clear if X contains less bits // than C (i.e. no wrap-around can happen). For example, 20-X is // positive if we can prove that X is >= 0 and < 16. @@ -2338,7 +2490,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, unsigned NLZ = (CLHS->getAPIntValue()+1).countLeadingZeros(); // NLZ can't be BitWidth with no sign bit APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1); - computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); + computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts, + Depth + 1); // If all of the MaskV bits are known to be zero, then we know the // output top bits are zero, because we now know that the output is @@ -2350,8 +2503,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, } } } + LLVM_FALLTHROUGH; } - // fall through case ISD::ADD: case ISD::ADDE: { // Output known-0 bits are known if clear or set in both the low clear bits @@ -2361,17 +2514,19 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, // known to be clear. For example, if one input has the top 10 bits clear // and the other has the top 8 bits clear, we know the top 7 bits of the // output must be clear. - computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); + computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts, + Depth + 1); unsigned KnownZeroHigh = KnownZero2.countLeadingOnes(); unsigned KnownZeroLow = KnownZero2.countTrailingOnes(); - computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); + computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts, + Depth + 1); KnownZeroHigh = std::min(KnownZeroHigh, KnownZero2.countLeadingOnes()); KnownZeroLow = std::min(KnownZeroLow, KnownZero2.countTrailingOnes()); - if (Op.getOpcode() == ISD::ADD) { + if (Opcode == ISD::ADD) { KnownZero |= APInt::getLowBitsSet(BitWidth, KnownZeroLow); if (KnownZeroHigh > 1) KnownZero |= APInt::getHighBitsSet(BitWidth, KnownZeroHigh - 1); @@ -2387,11 +2542,12 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, break; } case ISD::SREM: - if (ConstantSDNode *Rem = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + if (ConstantSDNode *Rem = isConstOrConstSplat(Op.getOperand(1))) { const APInt &RA = Rem->getAPIntValue().abs(); if (RA.isPowerOf2()) { APInt LowBits = RA - 1; - computeKnownBits(Op.getOperand(0), KnownZero2,KnownOne2,Depth+1); + computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts, + Depth + 1); // The low bits of the first operand are unchanged by the srem. KnownZero = KnownZero2 & LowBits; @@ -2411,11 +2567,12 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, } break; case ISD::UREM: { - if (ConstantSDNode *Rem = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + if (ConstantSDNode *Rem = isConstOrConstSplat(Op.getOperand(1))) { const APInt &RA = Rem->getAPIntValue(); if (RA.isPowerOf2()) { APInt LowBits = (RA - 1); - computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth + 1); + computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts, + Depth + 1); // The upper bits are all zero, the lower ones are unchanged. KnownZero = KnownZero2 | ~LowBits; @@ -2426,8 +2583,10 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, // Since the result is less than or equal to either operand, any leading // zero bits in either operand must also exist in the result. - computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); - computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts, + Depth + 1); + computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts, + Depth + 1); uint32_t Leaders = std::max(KnownZero.countLeadingOnes(), KnownZero2.countLeadingOnes()); @@ -2437,9 +2596,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, } case ISD::EXTRACT_ELEMENT: { computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); - const unsigned Index = - cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); - const unsigned BitWidth = Op.getValueType().getSizeInBits(); + const unsigned Index = Op.getConstantOperandVal(1); + const unsigned BitWidth = Op.getValueSizeInBits(); // Remove low part of known bits mask KnownZero = KnownZero.getHiBits(KnownZero.getBitWidth() - Index * BitWidth); @@ -2450,8 +2608,74 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, KnownOne = KnownOne.trunc(BitWidth); break; } + case ISD::EXTRACT_VECTOR_ELT: { + SDValue InVec = Op.getOperand(0); + SDValue EltNo = Op.getOperand(1); + EVT VecVT = InVec.getValueType(); + const unsigned BitWidth = Op.getValueSizeInBits(); + const unsigned EltBitWidth = VecVT.getScalarSizeInBits(); + const unsigned NumSrcElts = VecVT.getVectorNumElements(); + // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know + // anything about the extended bits. + if (BitWidth > EltBitWidth) { + KnownZero = KnownZero.trunc(EltBitWidth); + KnownOne = KnownOne.trunc(EltBitWidth); + } + ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo); + if (ConstEltNo && ConstEltNo->getAPIntValue().ult(NumSrcElts)) { + // If we know the element index, just demand that vector element. + unsigned Idx = ConstEltNo->getZExtValue(); + APInt DemandedElt = APInt::getOneBitSet(NumSrcElts, Idx); + computeKnownBits(InVec, KnownZero, KnownOne, DemandedElt, Depth + 1); + } else { + // Unknown element index, so ignore DemandedElts and demand them all. + computeKnownBits(InVec, KnownZero, KnownOne, Depth + 1); + } + if (BitWidth > EltBitWidth) { + KnownZero = KnownZero.zext(BitWidth); + KnownOne = KnownOne.zext(BitWidth); + } + break; + } + case ISD::INSERT_VECTOR_ELT: { + SDValue InVec = Op.getOperand(0); + SDValue InVal = Op.getOperand(1); + SDValue EltNo = Op.getOperand(2); + + ConstantSDNode *CEltNo = dyn_cast<ConstantSDNode>(EltNo); + if (CEltNo && CEltNo->getAPIntValue().ult(NumElts)) { + // If we know the element index, split the demand between the + // source vector and the inserted element. + KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth); + unsigned EltIdx = CEltNo->getZExtValue(); + + // If we demand the inserted element then add its common known bits. + if (DemandedElts[EltIdx]) { + computeKnownBits(InVal, KnownZero2, KnownOne2, Depth + 1); + KnownOne &= KnownOne2.zextOrTrunc(KnownOne.getBitWidth()); + KnownZero &= KnownZero2.zextOrTrunc(KnownZero.getBitWidth());; + } + + // If we demand the source vector then add its common known bits, ensuring + // that we don't demand the inserted element. + APInt VectorElts = DemandedElts & ~(APInt::getOneBitSet(NumElts, EltIdx)); + if (!!VectorElts) { + computeKnownBits(InVec, KnownZero2, KnownOne2, VectorElts, Depth + 1); + KnownOne &= KnownOne2; + KnownZero &= KnownZero2; + } + } else { + // Unknown element index, so ignore DemandedElts and demand them all. + computeKnownBits(InVec, KnownZero, KnownOne, Depth + 1); + computeKnownBits(InVal, KnownZero2, KnownOne2, Depth + 1); + KnownOne &= KnownOne2.zextOrTrunc(KnownOne.getBitWidth()); + KnownZero &= KnownZero2.zextOrTrunc(KnownZero.getBitWidth());; + } + break; + } case ISD::BSWAP: { - computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); + computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts, + Depth + 1); KnownZero = KnownZero2.byteSwap(); KnownOne = KnownOne2.byteSwap(); break; @@ -2460,13 +2684,15 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, case ISD::SMAX: case ISD::UMIN: case ISD::UMAX: { - APInt Op0Zero, Op0One; - APInt Op1Zero, Op1One; - computeKnownBits(Op.getOperand(0), Op0Zero, Op0One, Depth); - computeKnownBits(Op.getOperand(1), Op1Zero, Op1One, Depth); - - KnownZero = Op0Zero & Op1Zero; - KnownOne = Op0One & Op1One; + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts, + Depth + 1); + // If we don't know any bits, early out. + if (!KnownOne && !KnownZero) + break; + computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts, + Depth + 1); + KnownZero &= KnownZero2; + KnownOne &= KnownOne2; break; } case ISD::FrameIndex: @@ -2479,9 +2705,9 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, break; default: - if (Op.getOpcode() < ISD::BUILTIN_OP_END) + if (Opcode < ISD::BUILTIN_OP_END) break; - // Fallthrough + LLVM_FALLTHROUGH; case ISD::INTRINSIC_WO_CHAIN: case ISD::INTRINSIC_W_CHAIN: case ISD::INTRINSIC_VOID: @@ -2494,6 +2720,13 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, } bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const { + EVT OpVT = Val.getValueType(); + unsigned BitWidth = OpVT.getScalarSizeInBits(); + + // Is the constant a known power of 2? + if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Val)) + return Const->getAPIntValue().zextOrTrunc(BitWidth).isPowerOf2(); + // A left-shift of a constant one will have exactly one bit set because // shifting the bit off the end is undefined. if (Val.getOpcode() == ISD::SHL) { @@ -2510,12 +2743,19 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const { return true; } + // Are all operands of a build vector constant powers of two? + if (Val.getOpcode() == ISD::BUILD_VECTOR) + if (llvm::all_of(Val->ops(), [this, BitWidth](SDValue E) { + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(E)) + return C->getAPIntValue().zextOrTrunc(BitWidth).isPowerOf2(); + return false; + })) + return true; + // More could be done here, though the above checks are enough // to handle some common cases. // Fall back to computeKnownBits to catch other known cases. - EVT OpVT = Val.getValueType(); - unsigned BitWidth = OpVT.getScalarType().getSizeInBits(); APInt KnownZero, KnownOne; computeKnownBits(Val, KnownZero, KnownOne); return (KnownZero.countPopulation() == BitWidth - 1) && @@ -2525,7 +2765,7 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const { unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const { EVT VT = Op.getValueType(); assert(VT.isInteger() && "Invalid VT!"); - unsigned VTBits = VT.getScalarType().getSizeInBits(); + unsigned VTBits = VT.getScalarSizeInBits(); unsigned Tmp, Tmp2; unsigned FirstAnswer = 1; @@ -2547,14 +2787,12 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const { } case ISD::SIGN_EXTEND: - Tmp = - VTBits-Op.getOperand(0).getValueType().getScalarType().getSizeInBits(); + Tmp = VTBits - Op.getOperand(0).getScalarValueSizeInBits(); return ComputeNumSignBits(Op.getOperand(0), Depth+1) + Tmp; case ISD::SIGN_EXTEND_INREG: // Max of the input and what this extends. - Tmp = - cast<VTSDNode>(Op.getOperand(1))->getVT().getScalarType().getSizeInBits(); + Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getScalarSizeInBits(); Tmp = VTBits-Tmp+1; Tmp2 = ComputeNumSignBits(Op.getOperand(0), Depth+1); @@ -2563,17 +2801,18 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const { case ISD::SRA: Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); // SRA X, C -> adds C sign bits. - if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { - Tmp += C->getZExtValue(); - if (Tmp > VTBits) Tmp = VTBits; + if (ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1))) { + APInt ShiftVal = C->getAPIntValue(); + ShiftVal += Tmp; + Tmp = ShiftVal.uge(VTBits) ? VTBits : ShiftVal.getZExtValue(); } return Tmp; case ISD::SHL: - if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + if (ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1))) { // shl destroys sign bits. Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); - if (C->getZExtValue() >= VTBits || // Bad shift. - C->getZExtValue() >= Tmp) break; // Shifted all sign bits out. + if (C->getAPIntValue().uge(VTBits) || // Bad shift. + C->getAPIntValue().uge(Tmp)) break; // Shifted all sign bits out. return Tmp - C->getZExtValue(); } break; @@ -2679,7 +2918,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const { if (Tmp2 == 1) return 1; // Handle NEG. - if (ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0))) + if (ConstantSDNode *CLHS = isConstOrConstSplat(Op.getOperand(0))) if (CLHS->isNullValue()) { APInt KnownZero, KnownOne; computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); @@ -2701,25 +2940,50 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const { Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); if (Tmp == 1) return 1; // Early out. return std::min(Tmp, Tmp2)-1; - case ISD::TRUNCATE: - // FIXME: it's tricky to do anything useful for this, but it is an important - // case for targets like X86. + case ISD::TRUNCATE: { + // Check if the sign bits of source go down as far as the truncated value. + unsigned NumSrcBits = Op.getOperand(0).getScalarValueSizeInBits(); + unsigned NumSrcSignBits = ComputeNumSignBits(Op.getOperand(0), Depth + 1); + if (NumSrcSignBits > (NumSrcBits - VTBits)) + return NumSrcSignBits - (NumSrcBits - VTBits); break; + } case ISD::EXTRACT_ELEMENT: { const int KnownSign = ComputeNumSignBits(Op.getOperand(0), Depth+1); - const int BitWidth = Op.getValueType().getSizeInBits(); - const int Items = - Op.getOperand(0).getValueType().getSizeInBits() / BitWidth; + const int BitWidth = Op.getValueSizeInBits(); + const int Items = Op.getOperand(0).getValueSizeInBits() / BitWidth; // Get reverse index (starting from 1), Op1 value indexes elements from // little end. Sign starts at big end. - const int rIndex = Items - 1 - - cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); + const int rIndex = Items - 1 - Op.getConstantOperandVal(1); // If the sign portion ends in our element the subtraction gives correct // result. Otherwise it gives either negative or > bitwidth result return std::max(std::min(KnownSign - rIndex * BitWidth, BitWidth), 0); } + case ISD::EXTRACT_VECTOR_ELT: { + // At the moment we keep this simple and skip tracking the specific + // element. This way we get the lowest common denominator for all elements + // of the vector. + // TODO: get information for given vector element + const unsigned BitWidth = Op.getValueSizeInBits(); + const unsigned EltBitWidth = Op.getOperand(0).getScalarValueSizeInBits(); + // If BitWidth > EltBitWidth the value is anyext:ed, and we do not know + // anything about sign bits. But if the sizes match we can derive knowledge + // about sign bits from the vector operand. + if (BitWidth == EltBitWidth) + return ComputeNumSignBits(Op.getOperand(0), Depth+1); + break; + } + case ISD::EXTRACT_SUBVECTOR: + return ComputeNumSignBits(Op.getOperand(0), Depth + 1); + case ISD::CONCAT_VECTORS: + // Determine the minimum number of sign bits across all input vectors. + // Early out if the result is already 1. + Tmp = ComputeNumSignBits(Op.getOperand(0), Depth + 1); + for (unsigned i = 1, e = Op.getNumOperands(); (i < e) && (Tmp > 1); ++i) + Tmp = std::min(Tmp, ComputeNumSignBits(Op.getOperand(i), Depth + 1)); + return Tmp; } // If we are looking at the loaded value of the SDNode. @@ -2730,10 +2994,10 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const { switch (ExtType) { default: break; case ISD::SEXTLOAD: // '17' bits known - Tmp = LD->getMemoryVT().getScalarType().getSizeInBits(); + Tmp = LD->getMemoryVT().getScalarSizeInBits(); return VTBits-Tmp+1; case ISD::ZEXTLOAD: // '16' bits known - Tmp = LD->getMemoryVT().getScalarType().getSizeInBits(); + Tmp = LD->getMemoryVT().getScalarSizeInBits(); return VTBits-Tmp; } } @@ -2842,6 +3106,16 @@ bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const { static SDValue FoldCONCAT_VECTORS(const SDLoc &DL, EVT VT, ArrayRef<SDValue> Ops, llvm::SelectionDAG &DAG) { + assert(!Ops.empty() && "Can't concatenate an empty list of vectors!"); + assert(llvm::all_of(Ops, + [Ops](SDValue Op) { + return Ops[0].getValueType() == Op.getValueType(); + }) && + "Concatenation of vectors with inconsistent value types!"); + assert((Ops.size() * Ops[0].getValueType().getVectorNumElements()) == + VT.getVectorNumElements() && + "Incorrect element count in vector concatenation!"); + if (Ops.size() == 1) return Ops[0]; @@ -2875,7 +3149,7 @@ static SDValue FoldCONCAT_VECTORS(const SDLoc &DL, EVT VT, ? DAG.getZExtOrTrunc(Op, DL, SVT) : DAG.getSExtOrTrunc(Op, DL, SVT); - return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Elts); + return DAG.getBuildVector(VT, DL, Elts); } /// Gets or creates the specified node. @@ -2924,13 +3198,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, } case ISD::BITCAST: if (VT == MVT::f16 && C->getValueType(0) == MVT::i16) - return getConstantFP(APFloat(APFloat::IEEEhalf, Val), DL, VT); + return getConstantFP(APFloat(APFloat::IEEEhalf(), Val), DL, VT); if (VT == MVT::f32 && C->getValueType(0) == MVT::i32) - return getConstantFP(APFloat(APFloat::IEEEsingle, Val), DL, VT); + return getConstantFP(APFloat(APFloat::IEEEsingle(), Val), DL, VT); if (VT == MVT::f64 && C->getValueType(0) == MVT::i64) - return getConstantFP(APFloat(APFloat::IEEEdouble, Val), DL, VT); + return getConstantFP(APFloat(APFloat::IEEEdouble(), Val), DL, VT); if (VT == MVT::f128 && C->getValueType(0) == MVT::i128) - return getConstantFP(APFloat(APFloat::IEEEquad, Val), DL, VT); + return getConstantFP(APFloat(APFloat::IEEEquad(), Val), DL, VT); break; case ISD::BSWAP: return getConstant(Val.byteSwap(), DL, VT, C->isTargetOpcode(), @@ -3162,8 +3436,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, break; case ISD::BITCAST: // Basic sanity checking. - assert(VT.getSizeInBits() == Operand.getValueType().getSizeInBits() - && "Cannot BITCAST between types of different sizes!"); + assert(VT.getSizeInBits() == Operand.getValueSizeInBits() && + "Cannot BITCAST between types of different sizes!"); if (VT == Operand.getValueType()) return Operand; // noop conversion. if (OpOpcode == ISD::BITCAST) // bitconv(bitconv(x)) -> bitconv(x) return getNode(ISD::BITCAST, DL, VT, Operand.getOperand(0)); @@ -3333,25 +3607,22 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT SVT = VT.getScalarType(); SmallVector<SDValue, 4> Outputs; for (unsigned I = 0, E = BV1->getNumOperands(); I != E; ++I) { - ConstantSDNode *V1 = dyn_cast<ConstantSDNode>(BV1->getOperand(I)); - ConstantSDNode *V2 = dyn_cast<ConstantSDNode>(BV2->getOperand(I)); - if (!V1 || !V2) // Not a constant, bail. - return SDValue(); - - if (V1->isOpaque() || V2->isOpaque()) - return SDValue(); + SDValue V1 = BV1->getOperand(I); + SDValue V2 = BV2->getOperand(I); // Avoid BUILD_VECTOR nodes that perform implicit truncation. - // FIXME: This is valid and could be handled by truncating the APInts. + // FIXME: This is valid and could be handled by truncation. if (V1->getValueType(0) != SVT || V2->getValueType(0) != SVT) return SDValue(); // Fold one vector element. - std::pair<APInt, bool> Folded = FoldValue(Opcode, V1->getAPIntValue(), - V2->getAPIntValue()); - if (!Folded.second) + SDValue ScalarResult = getNode(Opcode, DL, SVT, V1, V2); + + // Scalar folding only succeeded if the result is a constant or UNDEF. + if (!ScalarResult.isUndef() && ScalarResult.getOpcode() != ISD::Constant && + ScalarResult.getOpcode() != ISD::ConstantFP) return SDValue(); - Outputs.push_back(getConstant(Folded.first, DL, SVT)); + Outputs.push_back(ScalarResult); } assert(VT.getVectorNumElements() == Outputs.size() && @@ -3394,8 +3665,8 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode, // All operands must be vector types with the same number of elements as // the result type and must be either UNDEF or a build vector of constant // or UNDEF scalars. - if (!std::all_of(Ops.begin(), Ops.end(), IsConstantBuildVectorOrUndef) || - !std::all_of(Ops.begin(), Ops.end(), IsScalarOrSameVectorSize)) + if (!all_of(Ops, IsConstantBuildVectorOrUndef) || + !all_of(Ops, IsScalarOrSameVectorSize)) return SDValue(); // If we are comparing vectors, then the result needs to be a i1 boolean @@ -3577,8 +3848,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, // amounts. This catches things like trying to shift an i1024 value by an // i8, which is easy to fall into in generic code that uses // TLI.getShiftAmount(). - assert(N2.getValueType().getSizeInBits() >= - Log2_32_Ceil(N1.getValueType().getSizeInBits()) && + assert(N2.getValueSizeInBits() >= Log2_32_Ceil(N1.getValueSizeInBits()) && "Invalid use of small shift amount with oversized value!"); // Always fold shifts of i1 values so the code generator doesn't need to @@ -3609,7 +3879,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, assert(VT.isFloatingPoint() && N1.getValueType().isFloatingPoint() && VT.bitsLE(N1.getValueType()) && - N2C && "Invalid FP_ROUND!"); + N2C && (N2C->getZExtValue() == 0 || N2C->getZExtValue() == 1) && + "Invalid FP_ROUND!"); if (N1.getValueType() == VT) return N1; // noop conversion. break; case ISD::AssertSext: @@ -3640,7 +3911,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, if (EVT == VT) return N1; // Not actually extending auto SignExtendInReg = [&](APInt Val) { - unsigned FromBits = EVT.getScalarType().getSizeInBits(); + unsigned FromBits = EVT.getScalarSizeInBits(); Val <<= Val.getBitWidth() - FromBits; Val = Val.ashr(Val.getBitWidth() - FromBits); return getConstant(Val, DL, VT.getScalarType()); @@ -3768,6 +4039,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, // Trivial extraction. if (VT.getSimpleVT() == N1.getSimpleValueType()) return N1; + + // EXTRACT_SUBVECTOR of INSERT_SUBVECTOR is often created + // during shuffle legalization. + if (N1.getOpcode() == ISD::INSERT_SUBVECTOR && N2 == N1.getOperand(2) && + VT == N1.getOperand(1).getValueType()) + return N1.getOperand(1); } break; } @@ -3868,7 +4145,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, // Handle undef ^ undef -> 0 special case. This is a common // idiom (misuse). return getConstant(0, DL, VT); - // fallthrough + LLVM_FALLTHROUGH; case ISD::ADD: case ISD::ADDC: case ISD::ADDE: @@ -3977,6 +4254,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, break; case ISD::VECTOR_SHUFFLE: llvm_unreachable("should use getVectorShuffle constructor!"); + case ISD::INSERT_VECTOR_ELT: { + ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3); + // INSERT_VECTOR_ELT into out-of-bounds element is an UNDEF + if (N3C && N3C->getZExtValue() >= N1.getValueType().getVectorNumElements()) + return getUNDEF(VT); + break; + } case ISD::INSERT_SUBVECTOR: { SDValue Index = N3; if (VT.isSimple() && N1.getValueType().isSimple() @@ -4072,7 +4356,7 @@ static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG, const SDLoc &dl) { assert(!Value.isUndef()); - unsigned NumBits = VT.getScalarType().getSizeInBits(); + unsigned NumBits = VT.getScalarSizeInBits(); if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) { assert(C->getAPIntValue().getBitWidth() == 8); APInt Val = APInt::getSplat(NumBits, C->getAPIntValue()); @@ -4306,10 +4590,10 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, std::vector<EVT> MemOps; bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); - MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); bool OptSize = shouldLowerMemFuncForSize(MF); FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); - if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) + if (FI && !MFI.isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; unsigned SrcAlign = DAG.InferPtrAlignment(Src); if (Align > SrcAlign) @@ -4342,8 +4626,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, if (NewAlign > Align) { // Give the stack frame object a larger alignment if needed. - if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign) - MFI->setObjectAlignment(FI->getIndex(), NewAlign); + if (MFI.getObjectAlignment(FI->getIndex()) < NewAlign) + MFI.setObjectAlignment(FI->getIndex(), NewAlign); Align = NewAlign; } } @@ -4422,10 +4706,10 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, std::vector<EVT> MemOps; bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); - MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); bool OptSize = shouldLowerMemFuncForSize(MF); FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); - if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) + if (FI && !MFI.isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; unsigned SrcAlign = DAG.InferPtrAlignment(Src); if (Align > SrcAlign) @@ -4445,8 +4729,8 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, unsigned NewAlign = (unsigned)DAG.getDataLayout().getABITypeAlignment(Ty); if (NewAlign > Align) { // Give the stack frame object a larger alignment if needed. - if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign) - MFI->setObjectAlignment(FI->getIndex(), NewAlign); + if (MFI.getObjectAlignment(FI->getIndex()) < NewAlign) + MFI.setObjectAlignment(FI->getIndex(), NewAlign); Align = NewAlign; } } @@ -4519,10 +4803,10 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl, std::vector<EVT> MemOps; bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); - MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); bool OptSize = shouldLowerMemFuncForSize(MF); FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); - if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) + if (FI && !MFI.isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; bool IsZeroVal = isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue(); @@ -4538,8 +4822,8 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl, unsigned NewAlign = (unsigned)DAG.getDataLayout().getABITypeAlignment(Ty); if (NewAlign > Align) { // Give the stack frame object a larger alignment if needed. - if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign) - MFI->setObjectAlignment(FI->getIndex(), NewAlign); + if (MFI.getObjectAlignment(FI->getIndex()) < NewAlign) + MFI.setObjectAlignment(FI->getIndex(), NewAlign); Align = NewAlign; } } @@ -4796,10 +5080,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDVTList VTList, ArrayRef<SDValue> Ops, - MachineMemOperand *MMO, - AtomicOrdering SuccessOrdering, - AtomicOrdering FailureOrdering, - SynchronizationScope SynchScope) { + MachineMemOperand *MMO) { FoldingSetNodeID ID; ID.AddInteger(MemVT.getRawBits()); AddNodeIDNode(ID, Opcode, VTList, Ops); @@ -4811,8 +5092,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, } auto *N = newSDNode<AtomicSDNode>(Opcode, dl.getIROrder(), dl.getDebugLoc(), - VTList, MemVT, MMO, SuccessOrdering, - FailureOrdering, SynchScope); + VTList, MemVT, MMO); createOperands(N, Ops); CSEMap.InsertNode(N, IP); @@ -4820,14 +5100,6 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, return SDValue(N, 0); } -SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, - SDVTList VTList, ArrayRef<SDValue> Ops, - MachineMemOperand *MMO, AtomicOrdering Ordering, - SynchronizationScope SynchScope) { - return getAtomic(Opcode, dl, MemVT, VTList, Ops, MMO, Ordering, - Ordering, SynchScope); -} - SDValue SelectionDAG::getAtomicCmpSwap( unsigned Opcode, const SDLoc &dl, EVT MemVT, SDVTList VTs, SDValue Chain, SDValue Ptr, SDValue Cmp, SDValue Swp, MachinePointerInfo PtrInfo, @@ -4847,26 +5119,23 @@ SDValue SelectionDAG::getAtomicCmpSwap( auto Flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad | MachineMemOperand::MOStore; MachineMemOperand *MMO = - MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment); + MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment, + AAMDNodes(), nullptr, SynchScope, SuccessOrdering, + FailureOrdering); - return getAtomicCmpSwap(Opcode, dl, MemVT, VTs, Chain, Ptr, Cmp, Swp, MMO, - SuccessOrdering, FailureOrdering, SynchScope); + return getAtomicCmpSwap(Opcode, dl, MemVT, VTs, Chain, Ptr, Cmp, Swp, MMO); } SDValue SelectionDAG::getAtomicCmpSwap(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDVTList VTs, SDValue Chain, SDValue Ptr, SDValue Cmp, SDValue Swp, - MachineMemOperand *MMO, - AtomicOrdering SuccessOrdering, - AtomicOrdering FailureOrdering, - SynchronizationScope SynchScope) { + MachineMemOperand *MMO) { assert(Opcode == ISD::ATOMIC_CMP_SWAP || Opcode == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS); assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types"); SDValue Ops[] = {Chain, Ptr, Cmp, Swp}; - return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO, - SuccessOrdering, FailureOrdering, SynchScope); + return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO); } SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, @@ -4892,16 +5161,15 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, MachineMemOperand *MMO = MF.getMachineMemOperand(MachinePointerInfo(PtrVal), Flags, - MemVT.getStoreSize(), Alignment); + MemVT.getStoreSize(), Alignment, AAMDNodes(), + nullptr, SynchScope, Ordering); - return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Val, MMO, - Ordering, SynchScope); + return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Val, MMO); } SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, - MachineMemOperand *MMO, AtomicOrdering Ordering, - SynchronizationScope SynchScope) { + MachineMemOperand *MMO) { assert((Opcode == ISD::ATOMIC_LOAD_ADD || Opcode == ISD::ATOMIC_LOAD_SUB || Opcode == ISD::ATOMIC_LOAD_AND || @@ -4921,18 +5189,17 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDVTList VTs = Opcode == ISD::ATOMIC_STORE ? getVTList(MVT::Other) : getVTList(VT, MVT::Other); SDValue Ops[] = {Chain, Ptr, Val}; - return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO, Ordering, SynchScope); + return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO); } SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, EVT VT, SDValue Chain, SDValue Ptr, - MachineMemOperand *MMO, AtomicOrdering Ordering, - SynchronizationScope SynchScope) { + MachineMemOperand *MMO) { assert(Opcode == ISD::ATOMIC_LOAD && "Invalid Atomic Op"); SDVTList VTs = getVTList(VT, MVT::Other); SDValue Ops[] = {Chain, Ptr}; - return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO, Ordering, SynchScope); + return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO); } /// getMergeValues - Create a MERGE_VALUES node from the given operands. @@ -5056,7 +5323,7 @@ SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); if (Alignment == 0) // Ensure that codegen never sees alignment 0 - Alignment = getEVTAlignment(VT); + Alignment = getEVTAlignment(MemVT); MMOFlags |= MachineMemOperand::MOLoad; assert((MMOFlags & MachineMemOperand::MOStore) == 0); @@ -5101,9 +5368,8 @@ SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::LOAD, VTs, Ops); ID.AddInteger(MemVT.getRawBits()); - ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, MMO->isVolatile(), - MMO->isNonTemporal(), - MMO->isInvariant())); + ID.AddInteger(getSyntheticNodeSubclassData<LoadSDNode>( + dl.getIROrder(), VTs, AM, ExtType, MemVT, MMO)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { @@ -5160,12 +5426,14 @@ SDValue SelectionDAG::getIndexedLoad(SDValue OrigLoad, const SDLoc &dl, ISD::MemIndexedMode AM) { LoadSDNode *LD = cast<LoadSDNode>(OrigLoad); assert(LD->getOffset().isUndef() && "Load is already a indexed load!"); - // Don't propagate the invariant flag. + // Don't propagate the invariant or dereferenceable flags. auto MMOFlags = - LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOInvariant; + LD->getMemOperand()->getFlags() & + ~(MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable); return getLoad(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl, LD->getChain(), Base, Offset, LD->getPointerInfo(), - LD->getMemoryVT(), LD->getAlignment(), MMOFlags); + LD->getMemoryVT(), LD->getAlignment(), MMOFlags, + LD->getAAInfo()); } SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val, @@ -5200,8 +5468,8 @@ SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val, FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::STORE, VTs, Ops); ID.AddInteger(VT.getRawBits()); - ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(), - MMO->isNonTemporal(), MMO->isInvariant())); + ID.AddInteger(getSyntheticNodeSubclassData<StoreSDNode>( + dl.getIROrder(), VTs, ISD::UNINDEXED, false, VT, MMO)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { @@ -5265,8 +5533,8 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::STORE, VTs, Ops); ID.AddInteger(SVT.getRawBits()); - ID.AddInteger(encodeMemSDNodeFlags(true, ISD::UNINDEXED, MMO->isVolatile(), - MMO->isNonTemporal(), MMO->isInvariant())); + ID.AddInteger(getSyntheticNodeSubclassData<StoreSDNode>( + dl.getIROrder(), VTs, ISD::UNINDEXED, true, SVT, MMO)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { @@ -5311,17 +5579,15 @@ SDValue SelectionDAG::getIndexedStore(SDValue OrigStore, const SDLoc &dl, SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, - ISD::LoadExtType ExtTy) { + ISD::LoadExtType ExtTy, bool isExpanding) { SDVTList VTs = getVTList(VT, MVT::Other); SDValue Ops[] = { Chain, Ptr, Mask, Src0 }; FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::MLOAD, VTs, Ops); ID.AddInteger(VT.getRawBits()); - ID.AddInteger(encodeMemSDNodeFlags(ExtTy, ISD::UNINDEXED, - MMO->isVolatile(), - MMO->isNonTemporal(), - MMO->isInvariant())); + ID.AddInteger(getSyntheticNodeSubclassData<MaskedLoadSDNode>( + dl.getIROrder(), VTs, ExtTy, isExpanding, MemVT, MMO)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { @@ -5329,7 +5595,7 @@ SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, return SDValue(E, 0); } auto *N = newSDNode<MaskedLoadSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, - ExtTy, MemVT, MMO); + ExtTy, isExpanding, MemVT, MMO); createOperands(N, Ops); CSEMap.InsertNode(N, IP); @@ -5340,7 +5606,7 @@ SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, - bool isTrunc) { + bool IsTruncating, bool IsCompressing) { assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); EVT VT = Val.getValueType(); @@ -5349,8 +5615,8 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl, FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::MSTORE, VTs, Ops); ID.AddInteger(VT.getRawBits()); - ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(), - MMO->isNonTemporal(), MMO->isInvariant())); + ID.AddInteger(getSyntheticNodeSubclassData<MaskedStoreSDNode>( + dl.getIROrder(), VTs, IsTruncating, IsCompressing, MemVT, MMO)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { @@ -5358,7 +5624,7 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl, return SDValue(E, 0); } auto *N = newSDNode<MaskedStoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, - isTrunc, MemVT, MMO); + IsTruncating, IsCompressing, MemVT, MMO); createOperands(N, Ops); CSEMap.InsertNode(N, IP); @@ -5374,10 +5640,8 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl, FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::MGATHER, VTs, Ops); ID.AddInteger(VT.getRawBits()); - ID.AddInteger(encodeMemSDNodeFlags(ISD::NON_EXTLOAD, ISD::UNINDEXED, - MMO->isVolatile(), - MMO->isNonTemporal(), - MMO->isInvariant())); + ID.AddInteger(getSyntheticNodeSubclassData<MaskedGatherSDNode>( + dl.getIROrder(), VTs, VT, MMO)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { @@ -5411,9 +5675,8 @@ SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl, FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::MSCATTER, VTs, Ops); ID.AddInteger(VT.getRawBits()); - ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(), - MMO->isNonTemporal(), - MMO->isInvariant())); + ID.AddInteger(getSyntheticNodeSubclassData<MaskedScatterSDNode>( + dl.getIROrder(), VTs, VT, MMO)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { @@ -5545,7 +5808,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, if (ConstantSDNode *AndRHS = dyn_cast<ConstantSDNode>(N3.getOperand(1))) { // If the and is only masking out bits that cannot effect the shift, // eliminate the and. - unsigned NumBits = VT.getScalarType().getSizeInBits()*2; + unsigned NumBits = VT.getScalarSizeInBits()*2; if ((AndRHS->getValue() & (NumBits-1)) == NumBits-1) return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0)); } @@ -5870,21 +6133,6 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, - EVT VT1, EVT VT2, EVT VT3, EVT VT4, - ArrayRef<SDValue> Ops) { - SDVTList VTs = getVTList(VT1, VT2, VT3, VT4); - return SelectNodeTo(N, MachineOpc, VTs, Ops); -} - -SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, - EVT VT1, EVT VT2, - SDValue Op1) { - SDVTList VTs = getVTList(VT1, VT2); - SDValue Ops[] = { Op1 }; - return SelectNodeTo(N, MachineOpc, VTs, Ops); -} - -SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT1, EVT VT2, SDValue Op1, SDValue Op2) { SDVTList VTs = getVTList(VT1, VT2); @@ -5893,24 +6141,6 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, - EVT VT1, EVT VT2, - SDValue Op1, SDValue Op2, - SDValue Op3) { - SDVTList VTs = getVTList(VT1, VT2); - SDValue Ops[] = { Op1, Op2, Op3 }; - return SelectNodeTo(N, MachineOpc, VTs, Ops); -} - -SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, - EVT VT1, EVT VT2, EVT VT3, - SDValue Op1, SDValue Op2, - SDValue Op3) { - SDVTList VTs = getVTList(VT1, VT2, VT3); - SDValue Ops[] = { Op1, Op2, Op3 }; - return SelectNodeTo(N, MachineOpc, VTs, Ops); -} - -SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, SDVTList VTs,ArrayRef<SDValue> Ops) { SDNode *New = MorphNodeTo(N, ~MachineOpc, VTs, Ops); // Reset the NodeID to -1. @@ -5922,14 +6152,14 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, return New; } -/// UpdadeSDLocOnMergedSDNode - If the opt level is -O0 then it throws away +/// UpdateSDLocOnMergeSDNode - If the opt level is -O0 then it throws away /// the line number information on the merged node since it is not possible to /// preserve the information that operation is associated with multiple lines. /// This will make the debugger working better at -O0, were there is a higher /// probability having other instructions associated with that line. /// /// For IROrder, we keep the smaller of the two -SDNode *SelectionDAG::UpdadeSDLocOnMergedSDNode(SDNode *N, const SDLoc &OLoc) { +SDNode *SelectionDAG::UpdateSDLocOnMergeSDNode(SDNode *N, const SDLoc &OLoc) { DebugLoc NLoc = N->getDebugLoc(); if (NLoc && OptLevel == CodeGenOpt::None && OLoc.getDebugLoc() != NLoc) { N->setDebugLoc(DebugLoc()); @@ -5963,7 +6193,7 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, FoldingSetNodeID ID; AddNodeIDNode(ID, Opc, VTs, Ops); if (SDNode *ON = FindNodeOrInsertPos(ID, SDLoc(N), IP)) - return UpdadeSDLocOnMergedSDNode(ON, SDLoc(N)); + return UpdateSDLocOnMergeSDNode(ON, SDLoc(N)); } if (!RemoveNodeFromCSEMaps(N)) @@ -6050,19 +6280,6 @@ MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, } MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, - EVT VT1, EVT VT2) { - SDVTList VTs = getVTList(VT1, VT2); - return getMachineNode(Opcode, dl, VTs, None); -} - -MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, - EVT VT1, EVT VT2, SDValue Op1) { - SDVTList VTs = getVTList(VT1, VT2); - SDValue Ops[] = { Op1 }; - return getMachineNode(Opcode, dl, VTs, Ops); -} - -MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT1, EVT VT2, SDValue Op1, SDValue Op2) { SDVTList VTs = getVTList(VT1, VT2); @@ -6110,13 +6327,6 @@ MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, } MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, - EVT VT1, EVT VT2, EVT VT3, EVT VT4, - ArrayRef<SDValue> Ops) { - SDVTList VTs = getVTList(VT1, VT2, VT3, VT4); - return getMachineNode(Opcode, dl, VTs, Ops); -} - -MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, ArrayRef<EVT> ResultTys, ArrayRef<SDValue> Ops) { SDVTList VTs = getVTList(ResultTys); @@ -6135,7 +6345,7 @@ MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &DL, AddNodeIDNode(ID, ~Opcode, VTs, Ops); IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) { - return cast<MachineSDNode>(UpdadeSDLocOnMergedSDNode(E, DL)); + return cast<MachineSDNode>(UpdateSDLocOnMergeSDNode(E, DL)); } } @@ -6255,6 +6465,9 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To) { "Cannot replace with this method!"); assert(From != To.getNode() && "Cannot replace uses of with self"); + // Preserve Debug Values + TransferDbgValues(FromN, To); + // Iterate over all the existing uses of From. New uses will be added // to the beginning of the use list, which we avoid visiting. // This specifically avoids visiting uses of From that arise while the @@ -6285,8 +6498,6 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To) { AddModifiedNodeToCSEMaps(User); } - // Preserve Debug Values - TransferDbgValues(FromN, To); // If we just RAUW'd the root, take note. if (FromN == getRoot()) @@ -6689,6 +6900,40 @@ bool llvm::isBitwiseNot(SDValue V) { return V.getOpcode() == ISD::XOR && isAllOnesConstant(V.getOperand(1)); } +ConstantSDNode *llvm::isConstOrConstSplat(SDValue N) { + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) + return CN; + + if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) { + BitVector UndefElements; + ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements); + + // BuildVectors can truncate their operands. Ignore that case here. + // FIXME: We blindly ignore splats which include undef which is overly + // pessimistic. + if (CN && UndefElements.none() && + CN->getValueType(0) == N.getValueType().getScalarType()) + return CN; + } + + return nullptr; +} + +ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N) { + if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N)) + return CN; + + if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) { + BitVector UndefElements; + ConstantFPSDNode *CN = BV->getConstantFPSplatNode(&UndefElements); + + if (CN && UndefElements.none()) + return CN; + } + + return nullptr; +} + HandleSDNode::~HandleSDNode() { DropOperands(); } @@ -6710,11 +6955,11 @@ AddrSpaceCastSDNode::AddrSpaceCastSDNode(unsigned Order, const DebugLoc &dl, MemSDNode::MemSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, SDVTList VTs, EVT memvt, MachineMemOperand *mmo) : SDNode(Opc, Order, dl, VTs), MemoryVT(memvt), MMO(mmo) { - SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(), - MMO->isNonTemporal(), MMO->isInvariant()); - assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!"); - assert(isNonTemporal() == MMO->isNonTemporal() && - "Non-temporal encoding error!"); + MemSDNodeBits.IsVolatile = MMO->isVolatile(); + MemSDNodeBits.IsNonTemporal = MMO->isNonTemporal(); + MemSDNodeBits.IsDereferenceable = MMO->isDereferenceable(); + MemSDNodeBits.IsInvariant = MMO->isInvariant(); + // We check here that the size of the memory operand fits within the size of // the MMO. This is because the MMO might indicate only a possible address // range instead of specifying the affected memory addresses precisely. @@ -6939,8 +7184,8 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { for (; i < ResNE; ++i) Scalars.push_back(getUNDEF(EltVT)); - return getNode(ISD::BUILD_VECTOR, dl, - EVT::getVectorVT(*getContext(), EltVT, ResNE), Scalars); + EVT VecVT = EVT::getVectorVT(*getContext(), EltVT, ResNE); + return getBuildVector(VecVT, dl, Scalars); } bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD, @@ -6962,13 +7207,13 @@ bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD, if (Loc.getOpcode() == ISD::FrameIndex) { if (BaseLoc.getOpcode() != ISD::FrameIndex) return false; - const MachineFrameInfo *MFI = getMachineFunction().getFrameInfo(); + const MachineFrameInfo &MFI = getMachineFunction().getFrameInfo(); int FI = cast<FrameIndexSDNode>(Loc)->getIndex(); int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex(); - int FS = MFI->getObjectSize(FI); - int BFS = MFI->getObjectSize(BFI); + int FS = MFI.getObjectSize(FI); + int BFS = MFI.getObjectSize(BFI); if (FS != BFS || FS != (int)Bytes) return false; - return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes); + return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes); } // Handle X + C. @@ -7033,7 +7278,7 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { } if (FrameIdx != (1 << 31)) { - const MachineFrameInfo &MFI = *getMachineFunction().getFrameInfo(); + const MachineFrameInfo &MFI = getMachineFunction().getFrameInfo(); unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx), FrameOffset); return FIInfoAlign; @@ -7124,7 +7369,7 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, // false. unsigned int nOps = getNumOperands(); assert(nOps > 0 && "isConstantSplat has 0-size build vector"); - unsigned EltBitSize = VT.getVectorElementType().getSizeInBits(); + unsigned EltBitSize = VT.getScalarSizeInBits(); for (unsigned j = 0; j < nOps; ++j) { unsigned i = isBigEndian ? nOps-1-j : j; @@ -7265,6 +7510,16 @@ SDNode *SelectionDAG::isConstantIntBuildVectorOrConstantInt(SDValue N) { return nullptr; } +SDNode *SelectionDAG::isConstantFPBuildVectorOrConstantFP(SDValue N) { + if (isa<ConstantFPSDNode>(N)) + return N.getNode(); + + if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode())) + return N.getNode(); + + return nullptr; +} + #ifndef NDEBUG static void checkForCyclesHelper(const SDNode *N, SmallPtrSetImpl<const SDNode*> &Visited, diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index e1fc37d..996c95b 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -40,6 +40,7 @@ #include "llvm/CodeGen/StackMaps.h" #include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/CallingConv.h" +#include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" @@ -183,7 +184,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL, Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi); Hi = DAG.getNode(ISD::SHL, DL, TotalVT, Hi, - DAG.getConstant(Lo.getValueType().getSizeInBits(), DL, + DAG.getConstant(Lo.getValueSizeInBits(), DL, TLI.getPointerTy(DAG.getDataLayout()))); Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo); Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi); @@ -833,8 +834,7 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, if (TheReg == SP && Code == InlineAsm::Kind_Clobber) { // If we clobbered the stack pointer, MFI should know about it. - assert(DAG.getMachineFunction().getFrameInfo()-> - hasOpaqueSPAdjustment()); + assert(DAG.getMachineFunction().getFrameInfo().hasOpaqueSPAdjustment()); } } } @@ -931,46 +931,9 @@ SDValue SelectionDAGBuilder::getControlRoot() { return Root; } -/// Copy swift error to the final virtual register at end of a basic block, as -/// specified by SwiftErrorWorklist, if necessary. -static void copySwiftErrorsToFinalVRegs(SelectionDAGBuilder &SDB) { - const TargetLowering &TLI = SDB.DAG.getTargetLoweringInfo(); - if (!TLI.supportSwiftError()) - return; - - if (!SDB.FuncInfo.SwiftErrorWorklist.count(SDB.FuncInfo.MBB)) - return; - - // Go through entries in SwiftErrorWorklist, and create copy as necessary. - FunctionLoweringInfo::SwiftErrorVRegs &WorklistEntry = - SDB.FuncInfo.SwiftErrorWorklist[SDB.FuncInfo.MBB]; - FunctionLoweringInfo::SwiftErrorVRegs &MapEntry = - SDB.FuncInfo.SwiftErrorMap[SDB.FuncInfo.MBB]; - for (unsigned I = 0, E = WorklistEntry.size(); I < E; I++) { - unsigned WorkReg = WorklistEntry[I]; - - // Find the swifterror virtual register for the value in SwiftErrorMap. - unsigned MapReg = MapEntry[I]; - assert(TargetRegisterInfo::isVirtualRegister(MapReg) && - "Entries in SwiftErrorMap should be virtual registers"); - - if (WorkReg == MapReg) - continue; - - // Create copy from SwiftErrorMap to SwiftWorklist. - auto &DL = SDB.DAG.getDataLayout(); - SDValue CopyNode = SDB.DAG.getCopyToReg( - SDB.getRoot(), SDB.getCurSDLoc(), WorkReg, - SDB.DAG.getRegister(MapReg, EVT(TLI.getPointerTy(DL)))); - MapEntry[I] = WorkReg; - SDB.DAG.setRoot(CopyNode); - } -} - void SelectionDAGBuilder::visit(const Instruction &I) { // Set up outgoing PHI node register values before emitting the terminator. if (isa<TerminatorInst>(&I)) { - copySwiftErrorsToFinalVRegs(*this); HandlePHINodesInSuccessorBlocks(I.getParent()); } @@ -1021,8 +984,7 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, if (Val.getNode()) { if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, Offset, false, Val)) { - SDV = DAG.getDbgValue(Variable, Expr, Val.getNode(), Val.getResNo(), - false, Offset, dl, DbgSDNodeOrder); + SDV = getDbgValue(Val, Variable, Expr, Offset, dl, DbgSDNodeOrder); DAG.AddDbgValue(SDV, Val.getNode(), false); } } else @@ -1491,6 +1453,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { const Function *F = I.getParent()->getParent(); if (TLI.supportSwiftError() && F->getAttributes().hasAttrSomewhere(Attribute::SwiftError)) { + assert(FuncInfo.SwiftErrorArg && "Need a swift error argument"); ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); Flags.setSwiftError(); Outs.push_back(ISD::OutputArg(Flags, EVT(TLI.getPointerTy(DL)) /*vt*/, @@ -1498,7 +1461,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { true /*isfixed*/, 1 /*origidx*/, 0 /*partOffs*/)); // Create SDNode for the swifterror virtual register. - OutVals.push_back(DAG.getRegister(FuncInfo.SwiftErrorMap[FuncInfo.MBB][0], + OutVals.push_back(DAG.getRegister(FuncInfo.getOrCreateSwiftErrorVReg( + FuncInfo.MBB, FuncInfo.SwiftErrorArg), EVT(TLI.getPointerTy(DL)))); } @@ -2012,7 +1976,8 @@ static SDValue getLoadStackGuard(SelectionDAG &DAG, const SDLoc &DL, if (Global) { MachinePointerInfo MPInfo(Global); MachineInstr::mmo_iterator MemRefs = MF.allocateMemRefsArray(1); - auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant; + auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant | + MachineMemOperand::MODereferenceable; *MemRefs = MF.getMachineMemOperand(MPInfo, Flags, PtrTy.getSizeInBits() / 8, DAG.getEVTAlignment(PtrTy)); Node->setMemRefs(MemRefs, MemRefs + 1); @@ -2033,8 +1998,8 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); - MachineFrameInfo *MFI = ParentBB->getParent()->getFrameInfo(); - int FI = MFI->getStackProtectorIndex(); + MachineFrameInfo &MFI = ParentBB->getParent()->getFrameInfo(); + int FI = MFI.getStackProtectorIndex(); SDValue Guard; SDLoc dl = getCurSDLoc(); @@ -2329,8 +2294,7 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) { "Call to landingpad not in landing pad!"); MachineBasicBlock *MBB = FuncInfo.MBB; - MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); - AddLandingPadInfo(LP, MMI, MBB); + addLandingPadInfo(LP, *MBB); // If there aren't registers to copy the values into (e.g., during SjLj // exceptions), then don't bother to create these DAG nodes. @@ -2484,7 +2448,7 @@ static bool isVectorReductionOp(const User *I) { if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst)) if (FPOp->getFastMathFlags().unsafeAlgebra()) break; - // Fall through. + LLVM_FALLTHROUGH; default: return false; } @@ -2639,7 +2603,7 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { // Coerce the shift amount to the right type if we can. if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) { unsigned ShiftSize = ShiftTy.getSizeInBits(); - unsigned Op2Size = Op2.getValueType().getSizeInBits(); + unsigned Op2Size = Op2.getValueSizeInBits(); SDLoc DL = getCurSDLoc(); // If the operand is smaller than the shift count type, promote it. @@ -2650,7 +2614,7 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { // count type has enough bits to represent any shift value, truncate // it now. This is a common case and it exposes the truncate to // optimization early. - else if (ShiftSize >= Log2_32_Ceil(Op2.getValueType().getSizeInBits())) + else if (ShiftSize >= Log2_32_Ceil(Op2.getValueSizeInBits())) Op2 = DAG.getNode(ISD::TRUNCATE, DL, ShiftTy, Op2); // Otherwise we'll need to temporarily settle for some other convenient // type. Type legalization will make adjustments once the shiftee is split. @@ -2731,7 +2695,7 @@ void SelectionDAGBuilder::visitFCmp(const User &I) { // Check if the condition of the select has one use or two users that are both // selects with the same condition. static bool hasOnlySelectUsers(const Value *Cond) { - return std::all_of(Cond->user_begin(), Cond->user_end(), [](const Value *V) { + return all_of(Cond->users(), [](const Value *V) { return isa<SelectInst>(V); }); } @@ -2998,6 +2962,7 @@ void SelectionDAGBuilder::visitExtractElement(const User &I) { void SelectionDAGBuilder::visitShuffleVector(const User &I) { SDValue Src1 = getValue(I.getOperand(0)); SDValue Src2 = getValue(I.getOperand(1)); + SDLoc DL = getCurSDLoc(); SmallVector<int, 8> Mask; ShuffleVectorInst::getShuffleMask(cast<Constant>(I.getOperand(2)), Mask); @@ -3009,54 +2974,60 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { unsigned SrcNumElts = SrcVT.getVectorNumElements(); if (SrcNumElts == MaskNumElts) { - setValue(&I, DAG.getVectorShuffle(VT, getCurSDLoc(), Src1, Src2, Mask)); + setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, Mask)); return; } // Normalize the shuffle vector since mask and vector length don't match. - if (SrcNumElts < MaskNumElts && MaskNumElts % SrcNumElts == 0) { - // Mask is longer than the source vectors and is a multiple of the source - // vectors. We can use concatenate vector to make the mask and vectors - // lengths match. - - unsigned NumConcat = MaskNumElts / SrcNumElts; - - // Check if the shuffle is some kind of concatenation of the input vectors. - bool IsConcat = true; - SmallVector<int, 8> ConcatSrcs(NumConcat, -1); - for (unsigned i = 0; i != MaskNumElts; ++i) { - int Idx = Mask[i]; - if (Idx < 0) - continue; - // Ensure the indices in each SrcVT sized piece are sequential and that - // the same source is used for the whole piece. - if ((Idx % SrcNumElts != (i % SrcNumElts)) || - (ConcatSrcs[i / SrcNumElts] >= 0 && - ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts))) { - IsConcat = false; - break; + if (SrcNumElts < MaskNumElts) { + // Mask is longer than the source vectors. We can use concatenate vector to + // make the mask and vectors lengths match. + + if (MaskNumElts % SrcNumElts == 0) { + // Mask length is a multiple of the source vector length. + // Check if the shuffle is some kind of concatenation of the input + // vectors. + unsigned NumConcat = MaskNumElts / SrcNumElts; + bool IsConcat = true; + SmallVector<int, 8> ConcatSrcs(NumConcat, -1); + for (unsigned i = 0; i != MaskNumElts; ++i) { + int Idx = Mask[i]; + if (Idx < 0) + continue; + // Ensure the indices in each SrcVT sized piece are sequential and that + // the same source is used for the whole piece. + if ((Idx % SrcNumElts != (i % SrcNumElts)) || + (ConcatSrcs[i / SrcNumElts] >= 0 && + ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts))) { + IsConcat = false; + break; + } + // Remember which source this index came from. + ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts; } - // Remember which source this index came from. - ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts; - } - // The shuffle is concatenating multiple vectors together. Just emit - // a CONCAT_VECTORS operation. - if (IsConcat) { - SmallVector<SDValue, 8> ConcatOps; - for (auto Src : ConcatSrcs) { - if (Src < 0) - ConcatOps.push_back(DAG.getUNDEF(SrcVT)); - else if (Src == 0) - ConcatOps.push_back(Src1); - else - ConcatOps.push_back(Src2); + // The shuffle is concatenating multiple vectors together. Just emit + // a CONCAT_VECTORS operation. + if (IsConcat) { + SmallVector<SDValue, 8> ConcatOps; + for (auto Src : ConcatSrcs) { + if (Src < 0) + ConcatOps.push_back(DAG.getUNDEF(SrcVT)); + else if (Src == 0) + ConcatOps.push_back(Src1); + else + ConcatOps.push_back(Src2); + } + setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps)); + return; } - setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurSDLoc(), - VT, ConcatOps)); - return; } + unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts); + unsigned NumConcat = PaddedMaskNumElts / SrcNumElts; + EVT PaddedVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(), + PaddedMaskNumElts); + // Pad both vectors with undefs to make them the same length as the mask. SDValue UndefVal = DAG.getUNDEF(SrcVT); @@ -3065,24 +3036,32 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { MOps1[0] = Src1; MOps2[0] = Src2; - Src1 = Src1.isUndef() ? DAG.getUNDEF(VT) - : DAG.getNode(ISD::CONCAT_VECTORS, - getCurSDLoc(), VT, MOps1); - Src2 = Src2.isUndef() ? DAG.getUNDEF(VT) - : DAG.getNode(ISD::CONCAT_VECTORS, - getCurSDLoc(), VT, MOps2); + Src1 = Src1.isUndef() + ? DAG.getUNDEF(PaddedVT) + : DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps1); + Src2 = Src2.isUndef() + ? DAG.getUNDEF(PaddedVT) + : DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps2); // Readjust mask for new input vector length. - SmallVector<int, 8> MappedOps; + SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1); for (unsigned i = 0; i != MaskNumElts; ++i) { int Idx = Mask[i]; if (Idx >= (int)SrcNumElts) - Idx -= SrcNumElts - MaskNumElts; - MappedOps.push_back(Idx); + Idx -= SrcNumElts - PaddedMaskNumElts; + MappedOps[i] = Idx; } - setValue(&I, DAG.getVectorShuffle(VT, getCurSDLoc(), Src1, Src2, - MappedOps)); + SDValue Result = DAG.getVectorShuffle(PaddedVT, DL, Src1, Src2, MappedOps); + + // If the concatenated vector was padded, extract a subvector with the + // correct number of elements. + if (MaskNumElts != PaddedMaskNumElts) + Result = DAG.getNode( + ISD::EXTRACT_SUBVECTOR, DL, VT, Result, + DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + + setValue(&I, Result); return; } @@ -3141,10 +3120,9 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { if (RangeUse[Input] == 0) Src = DAG.getUNDEF(VT); else { - SDLoc dl = getCurSDLoc(); Src = DAG.getNode( - ISD::EXTRACT_SUBVECTOR, dl, VT, Src, - DAG.getConstant(StartIdx[Input], dl, + ISD::EXTRACT_SUBVECTOR, DL, VT, Src, + DAG.getConstant(StartIdx[Input], DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); } } @@ -3162,8 +3140,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { MappedOps.push_back(Idx); } - setValue(&I, DAG.getVectorShuffle(VT, getCurSDLoc(), Src1, Src2, - MappedOps)); + setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, MappedOps)); return; } } @@ -3173,7 +3150,6 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { // to insert and build vector. EVT EltVT = VT.getVectorElementType(); EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout()); - SDLoc dl = getCurSDLoc(); SmallVector<SDValue,8> Ops; for (unsigned i = 0; i != MaskNumElts; ++i) { int Idx = Mask[i]; @@ -3185,14 +3161,14 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2; if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts; - Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - EltVT, Src, DAG.getConstant(Idx, dl, IdxVT)); + Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, + EltVT, Src, DAG.getConstant(Idx, DL, IdxVT)); } Ops.push_back(Res); } - setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops)); + setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops)); } void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { @@ -3293,13 +3269,13 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { if (VectorWidth && !N.getValueType().isVector()) { LLVMContext &Context = *DAG.getContext(); EVT VT = EVT::getVectorVT(Context, N.getValueType(), VectorWidth); - SmallVector<SDValue, 16> Ops(VectorWidth, N); - N = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); + N = DAG.getSplatBuildVector(VT, dl, N); } + for (gep_type_iterator GTI = gep_type_begin(&I), E = gep_type_end(&I); GTI != E; ++GTI) { const Value *Idx = GTI.getOperand(); - if (StructType *StTy = dyn_cast<StructType>(*GTI)) { + if (StructType *StTy = GTI.getStructTypeOrNull()) { unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue(); if (Field) { // N = N + Offset @@ -3331,8 +3307,9 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { if (CI->isZero()) continue; APInt Offs = ElementSize * CI->getValue().sextOrTrunc(PtrSize); + LLVMContext &Context = *DAG.getContext(); SDValue OffsVal = VectorWidth ? - DAG.getConstant(Offs, dl, MVT::getVectorVT(PtrTy, VectorWidth)) : + DAG.getConstant(Offs, dl, EVT::getVectorVT(Context, PtrTy, VectorWidth)) : DAG.getConstant(Offs, dl, PtrTy); // In an inbouds GEP with an offset that is nonnegative even when @@ -3350,9 +3327,9 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { if (!IdxN.getValueType().isVector() && VectorWidth) { MVT VT = MVT::getVectorVT(IdxN.getValueType().getSimpleVT(), VectorWidth); - SmallVector<SDValue, 16> Ops(VectorWidth, IdxN); - IdxN = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); + IdxN = DAG.getSplatBuildVector(VT, dl, IdxN); } + // If the index is smaller or larger than intptr_t, truncate or extend // it. IdxN = DAG.getSExtOrTrunc(IdxN, dl, N.getValueType()); @@ -3433,7 +3410,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { setValue(&I, DSA); DAG.setRoot(DSA.getValue(1)); - assert(FuncInfo.MF->getFrameInfo()->hasVarSizedObjects()); + assert(FuncInfo.MF->getFrameInfo().hasVarSizedObjects()); } void SelectionDAGBuilder::visitLoad(const LoadInst &I) { @@ -3462,17 +3439,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { bool isVolatile = I.isVolatile(); bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr; - - // The IR notion of invariant_load only guarantees that all *non-faulting* - // invariant loads result in the same value. The MI notion of invariant load - // guarantees that the load can be legally moved to any location within its - // containing function. The MI notion of invariant_load is stronger than the - // IR notion of invariant_load -- an MI invariant_load is an IR invariant_load - // with a guarantee that the location being loaded from is dereferenceable - // throughout the function's lifetime. - - bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr && - isDereferenceablePointer(SV, DAG.getDataLayout()); + bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr; + bool isDereferenceable = isDereferenceablePointer(SV, DAG.getDataLayout()); unsigned Alignment = I.getAlignment(); AAMDNodes AAInfo; @@ -3540,6 +3508,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { MMOFlags |= MachineMemOperand::MONonTemporal; if (isInvariant) MMOFlags |= MachineMemOperand::MOInvariant; + if (isDereferenceable) + MMOFlags |= MachineMemOperand::MODereferenceable; SDValue L = DAG.getLoad(ValueVTs[i], dl, Root, A, MachinePointerInfo(SV, Offsets[i]), Alignment, @@ -3585,7 +3555,7 @@ void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) { SDValue CopyNode = DAG.getCopyToReg(getRoot(), getCurSDLoc(), VReg, SDValue(Src.getNode(), Src.getResNo())); DAG.setRoot(CopyNode); - FuncInfo.setSwiftErrorVReg(FuncInfo.MBB, I.getOperand(1), VReg); + FuncInfo.setCurrentSwiftErrorVReg(FuncInfo.MBB, I.getOperand(1), VReg); } void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) { @@ -3613,9 +3583,9 @@ void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) { "expect a single EVT for swifterror"); // Chain, DL, Reg, VT, Glue or Chain, DL, Reg, VT - SDValue L = DAG.getCopyFromReg(getRoot(), getCurSDLoc(), - FuncInfo.findSwiftErrorVReg(FuncInfo.MBB, SV), - ValueVTs[0]); + SDValue L = DAG.getCopyFromReg( + getRoot(), getCurSDLoc(), + FuncInfo.getOrCreateSwiftErrorVReg(FuncInfo.MBB, SV), ValueVTs[0]); setValue(&I, L); } @@ -3697,16 +3667,39 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { DAG.setRoot(StoreNode); } -void SelectionDAGBuilder::visitMaskedStore(const CallInst &I) { +void SelectionDAGBuilder::visitMaskedStore(const CallInst &I, + bool IsCompressing) { SDLoc sdl = getCurSDLoc(); - // llvm.masked.store.*(Src0, Ptr, alignment, Mask) - Value *PtrOperand = I.getArgOperand(1); + auto getMaskedStoreOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0, + unsigned& Alignment) { + // llvm.masked.store.*(Src0, Ptr, alignment, Mask) + Src0 = I.getArgOperand(0); + Ptr = I.getArgOperand(1); + Alignment = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue(); + Mask = I.getArgOperand(3); + }; + auto getCompressingStoreOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0, + unsigned& Alignment) { + // llvm.masked.compressstore.*(Src0, Ptr, Mask) + Src0 = I.getArgOperand(0); + Ptr = I.getArgOperand(1); + Mask = I.getArgOperand(2); + Alignment = 0; + }; + + Value *PtrOperand, *MaskOperand, *Src0Operand; + unsigned Alignment; + if (IsCompressing) + getCompressingStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment); + else + getMaskedStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment); + SDValue Ptr = getValue(PtrOperand); - SDValue Src0 = getValue(I.getArgOperand(0)); - SDValue Mask = getValue(I.getArgOperand(3)); + SDValue Src0 = getValue(Src0Operand); + SDValue Mask = getValue(MaskOperand); + EVT VT = Src0.getValueType(); - unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(2)))->getZExtValue(); if (!Alignment) Alignment = DAG.getEVTAlignment(VT); @@ -3719,7 +3712,8 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I) { MachineMemOperand::MOStore, VT.getStoreSize(), Alignment, AAInfo); SDValue StoreNode = DAG.getMaskedStore(getRoot(), sdl, Src0, Ptr, Mask, VT, - MMO, false); + MMO, false /* Truncating */, + IsCompressing); DAG.setRoot(StoreNode); setValue(&I, StoreNode); } @@ -3740,7 +3734,7 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I) { // extract the spalt value and use it as a uniform base. // In all other cases the function returns 'false'. // -static bool getUniformBase(const Value *& Ptr, SDValue& Base, SDValue& Index, +static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index, SelectionDAGBuilder* SDB) { SelectionDAG& DAG = SDB->DAG; @@ -3777,8 +3771,7 @@ static bool getUniformBase(const Value *& Ptr, SDValue& Base, SDValue& Index, if (!Index.getValueType().isVector()) { unsigned GEPWidth = GEP->getType()->getVectorNumElements(); EVT VT = EVT::getVectorVT(Context, Index.getValueType(), GEPWidth); - SmallVector<SDValue, 16> Ops(GEPWidth, Index); - Index = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Index), VT, Ops); + Index = DAG.getSplatBuildVector(VT, SDLoc(Index), Index); } return true; } @@ -3820,18 +3813,38 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { setValue(&I, Scatter); } -void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) { +void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) { SDLoc sdl = getCurSDLoc(); - // @llvm.masked.load.*(Ptr, alignment, Mask, Src0) - Value *PtrOperand = I.getArgOperand(0); + auto getMaskedLoadOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0, + unsigned& Alignment) { + // @llvm.masked.load.*(Ptr, alignment, Mask, Src0) + Ptr = I.getArgOperand(0); + Alignment = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue(); + Mask = I.getArgOperand(2); + Src0 = I.getArgOperand(3); + }; + auto getExpandingLoadOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0, + unsigned& Alignment) { + // @llvm.masked.expandload.*(Ptr, Mask, Src0) + Ptr = I.getArgOperand(0); + Alignment = 0; + Mask = I.getArgOperand(1); + Src0 = I.getArgOperand(2); + }; + + Value *PtrOperand, *MaskOperand, *Src0Operand; + unsigned Alignment; + if (IsExpanding) + getExpandingLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment); + else + getMaskedLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment); + SDValue Ptr = getValue(PtrOperand); - SDValue Src0 = getValue(I.getArgOperand(3)); - SDValue Mask = getValue(I.getArgOperand(2)); + SDValue Src0 = getValue(Src0Operand); + SDValue Mask = getValue(MaskOperand); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); - unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(1)))->getZExtValue(); + EVT VT = Src0.getValueType(); if (!Alignment) Alignment = DAG.getEVTAlignment(VT); @@ -3851,7 +3864,7 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) { Alignment, AAInfo, Ranges); SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, VT, MMO, - ISD::NON_EXTLOAD); + ISD::NON_EXTLOAD, IsExpanding); if (AddToChain) { SDValue OutChain = Load.getValue(1); DAG.setRoot(OutChain); @@ -4003,13 +4016,13 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { MachineMemOperand::MOLoad, VT.getStoreSize(), I.getAlignment() ? I.getAlignment() : - DAG.getEVTAlignment(VT)); + DAG.getEVTAlignment(VT), + AAMDNodes(), nullptr, Scope, Order); InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG); SDValue L = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain, - getValue(I.getPointerOperand()), MMO, - Order, Scope); + getValue(I.getPointerOperand()), MMO); SDValue OutChain = L.getValue(1); @@ -4047,8 +4060,12 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { /// node. void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic) { - bool HasChain = !I.doesNotAccessMemory(); - bool OnlyLoad = HasChain && I.onlyReadsMemory(); + // Ignore the callsite's attributes. A specific call site may be marked with + // readnone, but the lowering code will expect the chain based on the + // definition. + const Function *F = I.getCalledFunction(); + bool HasChain = !F->doesNotAccessMemory(); + bool OnlyLoad = HasChain && F->onlyReadsMemory(); // Build the operand list. SmallVector<SDValue, 8> Ops; @@ -4156,7 +4173,7 @@ static SDValue GetExponent(SelectionDAG &DAG, SDValue Op, /// getF32Constant - Get 32-bit floating point constant. static SDValue getF32Constant(SelectionDAG &DAG, unsigned Flt, const SDLoc &dl) { - return DAG.getConstantFP(APFloat(APFloat::IEEEsingle, APInt(32, Flt)), dl, + return DAG.getConstantFP(APFloat(APFloat::IEEEsingle(), APInt(32, Flt)), dl, MVT::f32); } @@ -4743,6 +4760,32 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( return true; } +/// Return the appropriate SDDbgValue based on N. +SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N, + DILocalVariable *Variable, + DIExpression *Expr, int64_t Offset, + DebugLoc dl, + unsigned DbgSDNodeOrder) { + SDDbgValue *SDV; + auto *FISDN = dyn_cast<FrameIndexSDNode>(N.getNode()); + if (FISDN && Expr->startsWithDeref()) { + // Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can describe + // stack slot locations as such instead of as indirectly addressed + // locations. + ArrayRef<uint64_t> TrailingElements(Expr->elements_begin() + 1, + Expr->elements_end()); + DIExpression *DerefedDIExpr = + DIExpression::get(*DAG.getContext(), TrailingElements); + int FI = FISDN->getIndex(); + SDV = DAG.getFrameIndexDbgValue(Variable, DerefedDIExpr, FI, 0, dl, + DbgSDNodeOrder); + } else { + SDV = DAG.getDbgValue(Variable, Expr, N.getNode(), N.getResNo(), false, + Offset, dl, DbgSDNodeOrder); + } + return SDV; +} + // VisualStudio defines setjmp as _setjmp #if defined(_MSC_VER) && defined(setjmp) && \ !defined(setjmp_undefined_for_msvc) @@ -4774,6 +4817,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { TLI.getPointerTy(DAG.getDataLayout()), getValue(I.getArgOperand(0)))); return nullptr; + case Intrinsic::addressofreturnaddress: + setValue(&I, DAG.getNode(ISD::ADDROFRETURNADDR, sdl, + TLI.getPointerTy(DAG.getDataLayout()))); + return nullptr; case Intrinsic::frameaddress: setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl, TLI.getPointerTy(DAG.getDataLayout()), @@ -4850,6 +4897,51 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { updateDAGForMaybeTailCall(MM); return nullptr; } + case Intrinsic::memcpy_element_atomic: { + SDValue Dst = getValue(I.getArgOperand(0)); + SDValue Src = getValue(I.getArgOperand(1)); + SDValue NumElements = getValue(I.getArgOperand(2)); + SDValue ElementSize = getValue(I.getArgOperand(3)); + + // Emit a library call. + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); + Entry.Node = Dst; + Args.push_back(Entry); + + Entry.Node = Src; + Args.push_back(Entry); + + Entry.Ty = I.getArgOperand(2)->getType(); + Entry.Node = NumElements; + Args.push_back(Entry); + + Entry.Ty = Type::getInt32Ty(*DAG.getContext()); + Entry.Node = ElementSize; + Args.push_back(Entry); + + uint64_t ElementSizeConstant = + cast<ConstantInt>(I.getArgOperand(3))->getZExtValue(); + RTLIB::Libcall LibraryCall = + RTLIB::getMEMCPY_ELEMENT_ATOMIC(ElementSizeConstant); + if (LibraryCall == RTLIB::UNKNOWN_LIBCALL) + report_fatal_error("Unsupported element size"); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(sdl) + .setChain(getRoot()) + .setCallee(TLI.getLibcallCallingConv(LibraryCall), + Type::getVoidTy(*DAG.getContext()), + DAG.getExternalSymbol( + TLI.getLibcallName(LibraryCall), + TLI.getPointerTy(DAG.getDataLayout())), + std::move(Args)); + + std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); + DAG.setRoot(CallResult.second); + return nullptr; + } case Intrinsic::dbg_declare: { const DbgDeclareInst &DI = cast<DbgDeclareInst>(I); DILocalVariable *Variable = DI.getVariable(); @@ -4944,8 +5036,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { if (N.getNode()) { if (!EmitFuncArgumentDbgValue(V, Variable, Expression, dl, Offset, false, N)) { - SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(), - false, Offset, dl, SDNodeOrder); + SDV = getDbgValue(N, Variable, Expression, Offset, dl, SDNodeOrder); DAG.AddDbgValue(SDV, N.getNode(), false); } } else if (!V->use_empty() ) { @@ -4980,7 +5071,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::eh_typeid_for: { // Find the type id for the given typeinfo. GlobalValue *GV = ExtractTypeInfo(I.getArgOperand(0)); - unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(GV); + unsigned TypeID = DAG.getMachineFunction().getTypeIDFor(GV); Res = DAG.getConstant(TypeID, sdl, MVT::i32); setValue(&I, Res); return nullptr; @@ -4988,7 +5079,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::eh_return_i32: case Intrinsic::eh_return_i64: - DAG.getMachineFunction().getMMI().setCallsEHReturn(true); + DAG.getMachineFunction().setCallsEHReturn(true); DAG.setRoot(DAG.getNode(ISD::EH_RETURN, sdl, MVT::Other, getControlRoot(), @@ -4996,7 +5087,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getValue(I.getArgOperand(1)))); return nullptr; case Intrinsic::eh_unwind_init: - DAG.getMachineFunction().getMMI().setCallsUnwindInit(true); + DAG.getMachineFunction().setCallsUnwindInit(true); return nullptr; case Intrinsic::eh_dwarf_cfa: { setValue(&I, DAG.getNode(ISD::EH_DWARF_CFA, sdl, @@ -5015,11 +5106,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::eh_sjlj_functioncontext: { // Get and store the index of the function context. - MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); AllocaInst *FnCtx = cast<AllocaInst>(I.getArgOperand(0)->stripPointerCasts()); int FI = FuncInfo.StaticAllocaMap[FnCtx]; - MFI->setFunctionContextIndex(FI); + MFI.setFunctionContextIndex(FI); return nullptr; } case Intrinsic::eh_sjlj_setjmp: { @@ -5055,6 +5146,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::masked_store: visitMaskedStore(I); return nullptr; + case Intrinsic::masked_expandload: + visitMaskedLoad(I, true /* IsExpanding */); + return nullptr; + case Intrinsic::masked_compressstore: + visitMaskedStore(I, true /* IsCompressing */); + return nullptr; case Intrinsic::x86_mmx_pslli_w: case Intrinsic::x86_mmx_pslli_d: case Intrinsic::x86_mmx_pslli_q: @@ -5114,39 +5211,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { setValue(&I, Res); return nullptr; } - case Intrinsic::convertff: - case Intrinsic::convertfsi: - case Intrinsic::convertfui: - case Intrinsic::convertsif: - case Intrinsic::convertuif: - case Intrinsic::convertss: - case Intrinsic::convertsu: - case Intrinsic::convertus: - case Intrinsic::convertuu: { - ISD::CvtCode Code = ISD::CVT_INVALID; - switch (Intrinsic) { - default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. - case Intrinsic::convertff: Code = ISD::CVT_FF; break; - case Intrinsic::convertfsi: Code = ISD::CVT_FS; break; - case Intrinsic::convertfui: Code = ISD::CVT_FU; break; - case Intrinsic::convertsif: Code = ISD::CVT_SF; break; - case Intrinsic::convertuif: Code = ISD::CVT_UF; break; - case Intrinsic::convertss: Code = ISD::CVT_SS; break; - case Intrinsic::convertsu: Code = ISD::CVT_SU; break; - case Intrinsic::convertus: Code = ISD::CVT_US; break; - case Intrinsic::convertuu: Code = ISD::CVT_UU; break; - } - EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); - const Value *Op1 = I.getArgOperand(0); - Res = DAG.getConvertRndSat(DestVT, sdl, getValue(Op1), - DAG.getValueType(DestVT), - DAG.getValueType(getValue(Op1).getValueType()), - getValue(I.getArgOperand(1)), - getValue(I.getArgOperand(2)), - Code); - setValue(&I, Res); - return nullptr; - } case Intrinsic::powi: setValue(&I, ExpandPowI(sdl, getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), DAG)); @@ -5368,7 +5432,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::stackprotector: { // Emit code into the DAG to store the stack guard onto the stack. MachineFunction &MF = DAG.getMachineFunction(); - MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); SDValue Src, Chain = getRoot(); @@ -5380,7 +5444,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1)); int FI = FuncInfo.StaticAllocaMap[Slot]; - MFI->setStackProtectorIndex(FI); + MFI.setStackProtectorIndex(FI); SDValue FIN = DAG.getFrameIndex(FI, PtrTy); @@ -5411,6 +5475,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::annotation: case Intrinsic::ptr_annotation: + case Intrinsic::invariant_group_barrier: // Drop the intrinsic, but forward the value setValue(&I, getValue(I.getOperand(0))); return nullptr; @@ -5687,7 +5752,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { std::pair<SDValue, SDValue> SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, const BasicBlock *EHPadBB) { - MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); + MachineFunction &MF = DAG.getMachineFunction(); + MachineModuleInfo &MMI = MF.getMMI(); MCSymbol *BeginLabel = nullptr; if (EHPadBB) { @@ -5699,7 +5765,7 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, // so as to maintain the ordering of pads in the LSDA. unsigned CallSiteIndex = MMI.getCurrentCallSite(); if (CallSiteIndex) { - MMI.setCallSiteBeginLabel(BeginLabel, CallSiteIndex); + MF.setCallSiteBeginLabel(BeginLabel, CallSiteIndex); LPadToCallSiteMap[FuncInfo.MBBMap[EHPadBB]].push_back(CallSiteIndex); // Now that the call site is handled, stop tracking it. @@ -5740,13 +5806,13 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getRoot(), EndLabel)); // Inform MachineModuleInfo of range. - if (MMI.hasEHFunclets()) { + if (MF.hasEHFunclets()) { assert(CLI.CS); WinEHFuncInfo *EHInfo = DAG.getMachineFunction().getWinEHFuncInfo(); EHInfo->addIPToStateRange(cast<InvokeInst>(CLI.CS->getInstruction()), BeginLabel, EndLabel); } else { - MMI.addInvoke(FuncInfo.MBBMap[EHPadBB], BeginLabel, EndLabel); + MF.addInvoke(FuncInfo.MBBMap[EHPadBB], BeginLabel, EndLabel); } } @@ -5766,6 +5832,15 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, const Value *SwiftErrorVal = nullptr; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + // We can't tail call inside a function with a swifterror argument. Lowering + // does not support this yet. It would have to move into the swifterror + // register before the call. + auto *Caller = CS.getInstruction()->getParent()->getParent(); + if (TLI.supportSwiftError() && + Caller->getAttributes().hasAttrSomewhere(Attribute::SwiftError)) + isTailCall = false; + for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i) { const Value *V = *i; @@ -5785,9 +5860,9 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, SwiftErrorVal = V; // We find the virtual register for the actual swifterror argument. // Instead of using the Value, we use the virtual register instead. - Entry.Node = DAG.getRegister( - FuncInfo.findSwiftErrorVReg(FuncInfo.MBB, V), - EVT(TLI.getPointerTy(DL))); + Entry.Node = + DAG.getRegister(FuncInfo.getOrCreateSwiftErrorVReg(FuncInfo.MBB, V), + EVT(TLI.getPointerTy(DL))); } Args.push_back(Entry); @@ -5803,6 +5878,11 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, if (isTailCall && !isInTailCallPosition(CS, DAG.getTarget())) isTailCall = false; + // Disable tail calls if there is an swifterror argument. Targets have not + // been updated to support tail calls. + if (TLI.supportSwiftError() && SwiftErrorVal) + isTailCall = false; + TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(getCurSDLoc()) .setChain(getRoot()) @@ -5827,7 +5907,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, unsigned VReg = FuncInfo.MF->getRegInfo().createVirtualRegister(RC); SDValue CopyNode = CLI.DAG.getCopyToReg(Result.second, CLI.DL, VReg, Src); // We update the virtual register for the actual swifterror argument. - FuncInfo.setSwiftErrorVReg(FuncInfo.MBB, SwiftErrorVal, VReg); + FuncInfo.setCurrentSwiftErrorVReg(FuncInfo.MBB, SwiftErrorVal, VReg); DAG.setRoot(CopyNode); } } @@ -6035,6 +6115,49 @@ bool SelectionDAGBuilder::visitMemChrCall(const CallInst &I) { return false; } +/// +/// visitMemPCpyCall -- lower a mempcpy call as a memcpy followed by code to +/// to adjust the dst pointer by the size of the copied memory. +bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) { + + // Verify argument count: void *mempcpy(void *, const void *, size_t) + if (I.getNumArgOperands() != 3) + return false; + + SDValue Dst = getValue(I.getArgOperand(0)); + SDValue Src = getValue(I.getArgOperand(1)); + SDValue Size = getValue(I.getArgOperand(2)); + + unsigned DstAlign = DAG.InferPtrAlignment(Dst); + unsigned SrcAlign = DAG.InferPtrAlignment(Src); + unsigned Align = std::min(DstAlign, SrcAlign); + if (Align == 0) // Alignment of one or both could not be inferred. + Align = 1; // 0 and 1 both specify no alignment, but 0 is reserved. + + bool isVol = false; + SDLoc sdl = getCurSDLoc(); + + // In the mempcpy context we need to pass in a false value for isTailCall + // because the return pointer needs to be adjusted by the size of + // the copied memory. + SDValue MC = DAG.getMemcpy(getRoot(), sdl, Dst, Src, Size, Align, isVol, + false, /*isTailCall=*/false, + MachinePointerInfo(I.getArgOperand(0)), + MachinePointerInfo(I.getArgOperand(1))); + assert(MC.getNode() != nullptr && + "** memcpy should not be lowered as TailCall in mempcpy context **"); + DAG.setRoot(MC); + + // Check if Size needs to be truncated or extended. + Size = DAG.getSExtOrTrunc(Size, sdl, Dst.getValueType()); + + // Adjust return pointer to point just past the last dst byte. + SDValue DstPlusSize = DAG.getNode(ISD::ADD, sdl, Dst.getValueType(), + Dst, Size); + setValue(&I, DstPlusSize); + return true; +} + /// visitStrCpyCall -- See if we can lower a strcpy or stpcpy call into an /// optimized form. If so, return true and lower it, otherwise return false /// and it will be lowered like a normal call. @@ -6191,7 +6314,7 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { } MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); - ComputeUsesVAFloatArgument(I, &MMI); + computeUsesVAFloatArgument(I, MMI); const char *RenameFn = nullptr; if (Function *F = I.getCalledFunction()) { @@ -6325,6 +6448,10 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { if (visitMemCmpCall(I)) return; break; + case LibFunc::mempcpy: + if (visitMemPCpyCall(I)) + return; + break; case LibFunc::memchr: if (visitMemChrCall(I)) return; @@ -6395,6 +6522,19 @@ public: : TargetLowering::AsmOperandInfo(info), CallOperand(nullptr,0) { } + /// Whether or not this operand accesses memory + bool hasMemory(const TargetLowering &TLI) const { + // Indirect operand accesses access memory. + if (isIndirect) + return true; + + for (const auto &Code : Codes) + if (TLI.getConstraintType(Code) == TargetLowering::C_Memory) + return true; + + return false; + } + /// getCallOperandValEVT - Return the EVT of the Value* that this operand /// corresponds to. If there is no Value* for this operand, it returns /// MVT::Other. @@ -6447,6 +6587,75 @@ typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector; } // end anonymous namespace +/// Make sure that the output operand \p OpInfo and its corresponding input +/// operand \p MatchingOpInfo have compatible constraint types (otherwise error +/// out). +static void patchMatchingInput(const SDISelAsmOperandInfo &OpInfo, + SDISelAsmOperandInfo &MatchingOpInfo, + SelectionDAG &DAG) { + if (OpInfo.ConstraintVT == MatchingOpInfo.ConstraintVT) + return; + + const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo(); + const auto &TLI = DAG.getTargetLoweringInfo(); + + std::pair<unsigned, const TargetRegisterClass *> MatchRC = + TLI.getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode, + OpInfo.ConstraintVT); + std::pair<unsigned, const TargetRegisterClass *> InputRC = + TLI.getRegForInlineAsmConstraint(TRI, MatchingOpInfo.ConstraintCode, + MatchingOpInfo.ConstraintVT); + if ((OpInfo.ConstraintVT.isInteger() != + MatchingOpInfo.ConstraintVT.isInteger()) || + (MatchRC.second != InputRC.second)) { + // FIXME: error out in a more elegant fashion + report_fatal_error("Unsupported asm: input constraint" + " with a matching output constraint of" + " incompatible type!"); + } + MatchingOpInfo.ConstraintVT = OpInfo.ConstraintVT; +} + +/// Get a direct memory input to behave well as an indirect operand. +/// This may introduce stores, hence the need for a \p Chain. +/// \return The (possibly updated) chain. +static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location, + SDISelAsmOperandInfo &OpInfo, + SelectionDAG &DAG) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + // If we don't have an indirect input, put it in the constpool if we can, + // otherwise spill it to a stack slot. + // TODO: This isn't quite right. We need to handle these according to + // the addressing mode that the constraint wants. Also, this may take + // an additional register for the computation and we don't want that + // either. + + // If the operand is a float, integer, or vector constant, spill to a + // constant pool entry to get its address. + const Value *OpVal = OpInfo.CallOperandVal; + if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) || + isa<ConstantVector>(OpVal) || isa<ConstantDataVector>(OpVal)) { + OpInfo.CallOperand = DAG.getConstantPool( + cast<Constant>(OpVal), TLI.getPointerTy(DAG.getDataLayout())); + return Chain; + } + + // Otherwise, create a stack slot and emit a store to it before the asm. + Type *Ty = OpVal->getType(); + auto &DL = DAG.getDataLayout(); + uint64_t TySize = DL.getTypeAllocSize(Ty); + unsigned Align = DL.getPrefTypeAlignment(Ty); + MachineFunction &MF = DAG.getMachineFunction(); + int SSFI = MF.getFrameInfo().CreateStackObject(TySize, Align, false); + SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy(DL)); + Chain = DAG.getStore(Chain, Location, OpInfo.CallOperand, StackSlot, + MachinePointerInfo::getFixedStack(MF, SSFI)); + OpInfo.CallOperand = StackSlot; + + return Chain; +} + /// GetRegistersForValue - Assign registers (virtual or physical) for the /// specified operand. We prefer to assign virtual registers, to allow the /// register allocator to handle the assignment process. However, if the asm @@ -6555,6 +6764,73 @@ static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI, // Otherwise, we couldn't allocate enough registers for this. } +static unsigned +findMatchingInlineAsmOperand(unsigned OperandNo, + const std::vector<SDValue> &AsmNodeOperands) { + // Scan until we find the definition we already emitted of this operand. + unsigned CurOp = InlineAsm::Op_FirstOperand; + for (; OperandNo; --OperandNo) { + // Advance to the next operand. + unsigned OpFlag = + cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue(); + assert((InlineAsm::isRegDefKind(OpFlag) || + InlineAsm::isRegDefEarlyClobberKind(OpFlag) || + InlineAsm::isMemKind(OpFlag)) && + "Skipped past definitions?"); + CurOp += InlineAsm::getNumOperandRegisters(OpFlag) + 1; + } + return CurOp; +} + +/// Fill \p Regs with \p NumRegs new virtual registers of type \p RegVT +/// \return true if it has succeeded, false otherwise +static bool createVirtualRegs(SmallVector<unsigned, 4> &Regs, unsigned NumRegs, + MVT RegVT, SelectionDAG &DAG) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo(); + for (unsigned i = 0, e = NumRegs; i != e; ++i) { + if (const TargetRegisterClass *RC = TLI.getRegClassFor(RegVT)) + Regs.push_back(RegInfo.createVirtualRegister(RC)); + else + return false; + } + return true; +} + +class ExtraFlags { + unsigned Flags = 0; + +public: + explicit ExtraFlags(ImmutableCallSite CS) { + const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue()); + if (IA->hasSideEffects()) + Flags |= InlineAsm::Extra_HasSideEffects; + if (IA->isAlignStack()) + Flags |= InlineAsm::Extra_IsAlignStack; + if (CS.isConvergent()) + Flags |= InlineAsm::Extra_IsConvergent; + Flags |= IA->getDialect() * InlineAsm::Extra_AsmDialect; + } + + void update(const llvm::TargetLowering::AsmOperandInfo &OpInfo) { + // Ideally, we would only check against memory constraints. However, the + // meaning of an Other constraint can be target-specific and we can't easily + // reason about it. Therefore, be conservative and set MayLoad/MayStore + // for Other constraints as well. + if (OpInfo.ConstraintType == TargetLowering::C_Memory || + OpInfo.ConstraintType == TargetLowering::C_Other) { + if (OpInfo.Type == InlineAsm::isInput) + Flags |= InlineAsm::Extra_MayLoad; + else if (OpInfo.Type == InlineAsm::isOutput) + Flags |= InlineAsm::Extra_MayStore; + else if (OpInfo.Type == InlineAsm::isClobber) + Flags |= (InlineAsm::Extra_MayLoad | InlineAsm::Extra_MayStore); + } + } + + unsigned get() const { return Flags; } +}; + /// visitInlineAsm - Handle a call to an InlineAsm object. /// void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { @@ -6569,6 +6845,9 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { bool hasMemory = false; + // Remember the HasSideEffect, AlignStack, AsmDialect, MayLoad and MayStore + ExtraFlags ExtraInfo(CS); + unsigned ArgNo = 0; // ArgNo - The argument of the CallInst. unsigned ResNo = 0; // ResNo - The result number of the next output. for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { @@ -6578,14 +6857,25 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { MVT OpVT = MVT::Other; // Compute the value type for each operand. - switch (OpInfo.Type) { - case InlineAsm::isOutput: - // Indirect outputs just consume an argument. - if (OpInfo.isIndirect) { - OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++)); - break; + if (OpInfo.Type == InlineAsm::isInput || + (OpInfo.Type == InlineAsm::isOutput && OpInfo.isIndirect)) { + OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++)); + + // Process the call argument. BasicBlocks are labels, currently appearing + // only in asm's. + if (const BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) { + OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]); + } else { + OpInfo.CallOperand = getValue(OpInfo.CallOperandVal); } + OpVT = + OpInfo + .getCallOperandValEVT(*DAG.getContext(), TLI, DAG.getDataLayout()) + .getSimpleVT(); + } + + if (OpInfo.Type == InlineAsm::isOutput && !OpInfo.isIndirect) { // The return value of the call is this value. As such, there is no // corresponding argument. assert(!CS.getType()->isVoidTy() && "Bad inline asm!"); @@ -6597,43 +6887,21 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { OpVT = TLI.getSimpleValueType(DAG.getDataLayout(), CS.getType()); } ++ResNo; - break; - case InlineAsm::isInput: - OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++)); - break; - case InlineAsm::isClobber: - // Nothing to do. - break; } - // If this is an input or an indirect output, process the call argument. - // BasicBlocks are labels, currently appearing only in asm's. - if (OpInfo.CallOperandVal) { - if (const BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) { - OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]); - } else { - OpInfo.CallOperand = getValue(OpInfo.CallOperandVal); - } + OpInfo.ConstraintVT = OpVT; - OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI, - DAG.getDataLayout()).getSimpleVT(); - } + if (!hasMemory) + hasMemory = OpInfo.hasMemory(TLI); - OpInfo.ConstraintVT = OpVT; + // Determine if this InlineAsm MayLoad or MayStore based on the constraints. + // FIXME: Could we compute this on OpInfo rather than TargetConstraints[i]? + auto TargetConstraint = TargetConstraints[i]; - // Indirect operand accesses access memory. - if (OpInfo.isIndirect) - hasMemory = true; - else { - for (unsigned j = 0, ee = OpInfo.Codes.size(); j != ee; ++j) { - TargetLowering::ConstraintType - CType = TLI.getConstraintType(OpInfo.Codes[j]); - if (CType == TargetLowering::C_Memory) { - hasMemory = true; - break; - } - } - } + // Compute the constraint code and ConstraintType to use. + TLI.ComputeConstraintToUse(TargetConstraint, SDValue()); + + ExtraInfo.update(TargetConstraint); } SDValue Chain, Flag; @@ -6656,24 +6924,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // error. if (OpInfo.hasMatchingInput()) { SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; - - if (OpInfo.ConstraintVT != Input.ConstraintVT) { - const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo(); - std::pair<unsigned, const TargetRegisterClass *> MatchRC = - TLI.getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode, - OpInfo.ConstraintVT); - std::pair<unsigned, const TargetRegisterClass *> InputRC = - TLI.getRegForInlineAsmConstraint(TRI, Input.ConstraintCode, - Input.ConstraintVT); - if ((OpInfo.ConstraintVT.isInteger() != - Input.ConstraintVT.isInteger()) || - (MatchRC.second != InputRC.second)) { - report_fatal_error("Unsupported asm: input constraint" - " with a matching output constraint of" - " incompatible type!"); - } - Input.ConstraintVT = OpInfo.ConstraintVT; - } + patchMatchingInput(OpInfo, Input, DAG); } // Compute the constraint code and ConstraintType to use. @@ -6691,37 +6942,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { (OpInfo.Type == InlineAsm::isInput)) && "Can only indirectify direct input operands!"); - // Memory operands really want the address of the value. If we don't have - // an indirect input, put it in the constpool if we can, otherwise spill - // it to a stack slot. - // TODO: This isn't quite right. We need to handle these according to - // the addressing mode that the constraint wants. Also, this may take - // an additional register for the computation and we don't want that - // either. - - // If the operand is a float, integer, or vector constant, spill to a - // constant pool entry to get its address. - const Value *OpVal = OpInfo.CallOperandVal; - if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) || - isa<ConstantVector>(OpVal) || isa<ConstantDataVector>(OpVal)) { - OpInfo.CallOperand = DAG.getConstantPool( - cast<Constant>(OpVal), TLI.getPointerTy(DAG.getDataLayout())); - } else { - // Otherwise, create a stack slot and emit a store to it before the - // asm. - Type *Ty = OpVal->getType(); - auto &DL = DAG.getDataLayout(); - uint64_t TySize = DL.getTypeAllocSize(Ty); - unsigned Align = DL.getPrefTypeAlignment(Ty); - MachineFunction &MF = DAG.getMachineFunction(); - int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); - SDValue StackSlot = - DAG.getFrameIndex(SSFI, TLI.getPointerTy(DAG.getDataLayout())); - Chain = DAG.getStore( - Chain, getCurSDLoc(), OpInfo.CallOperand, StackSlot, - MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI)); - OpInfo.CallOperand = StackSlot; - } + // Memory operands really want the address of the value. + Chain = getAddressForMemoryInput(Chain, getCurSDLoc(), OpInfo, DAG); // There is no longer a Value* corresponding to this operand. OpInfo.CallOperandVal = nullptr; @@ -6736,7 +6958,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { GetRegistersForValue(DAG, TLI, getCurSDLoc(), OpInfo); } - // Second pass - Loop over all of the operands, assigning virtual or physregs + // Third pass - Loop over all of the operands, assigning virtual or physregs // to register class operands. for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) { SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i]; @@ -6761,40 +6983,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Remember the HasSideEffect, AlignStack, AsmDialect, MayLoad and MayStore // bits as operand 3. - unsigned ExtraInfo = 0; - if (IA->hasSideEffects()) - ExtraInfo |= InlineAsm::Extra_HasSideEffects; - if (IA->isAlignStack()) - ExtraInfo |= InlineAsm::Extra_IsAlignStack; - if (CS.isConvergent()) - ExtraInfo |= InlineAsm::Extra_IsConvergent; - // Set the asm dialect. - ExtraInfo |= IA->getDialect() * InlineAsm::Extra_AsmDialect; - - // Determine if this InlineAsm MayLoad or MayStore based on the constraints. - for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { - TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; - - // Compute the constraint code and ConstraintType to use. - TLI.ComputeConstraintToUse(OpInfo, SDValue()); - - // Ideally, we would only check against memory constraints. However, the - // meaning of an other constraint can be target-specific and we can't easily - // reason about it. Therefore, be conservative and set MayLoad/MayStore - // for other constriants as well. - if (OpInfo.ConstraintType == TargetLowering::C_Memory || - OpInfo.ConstraintType == TargetLowering::C_Other) { - if (OpInfo.Type == InlineAsm::isInput) - ExtraInfo |= InlineAsm::Extra_MayLoad; - else if (OpInfo.Type == InlineAsm::isOutput) - ExtraInfo |= InlineAsm::Extra_MayStore; - else if (OpInfo.Type == InlineAsm::isClobber) - ExtraInfo |= (InlineAsm::Extra_MayLoad | InlineAsm::Extra_MayStore); - } - } - AsmNodeOperands.push_back(DAG.getTargetConstant( - ExtraInfo, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout()))); + ExtraInfo.get(), getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout()))); // Loop over all of the inputs, copying the operand values into the // appropriate registers and processing the output regs. @@ -6862,24 +7052,11 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { case InlineAsm::isInput: { SDValue InOperandVal = OpInfo.CallOperand; - if (OpInfo.isMatchingInputConstraint()) { // Matching constraint? + if (OpInfo.isMatchingInputConstraint()) { // If this is required to match an output register we have already set, // just use its register. - unsigned OperandNo = OpInfo.getMatchedOperand(); - - // Scan until we find the definition we already emitted of this operand. - // When we find it, create a RegsForValue operand. - unsigned CurOp = InlineAsm::Op_FirstOperand; - for (; OperandNo; --OperandNo) { - // Advance to the next operand. - unsigned OpFlag = - cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue(); - assert((InlineAsm::isRegDefKind(OpFlag) || - InlineAsm::isRegDefEarlyClobberKind(OpFlag) || - InlineAsm::isMemKind(OpFlag)) && "Skipped past definitions?"); - CurOp += InlineAsm::getNumOperandRegisters(OpFlag)+1; - } - + auto CurOp = findMatchingInlineAsmOperand(OpInfo.getMatchedOperand(), + AsmNodeOperands); unsigned OpFlag = cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue(); if (InlineAsm::isRegDefKind(OpFlag) || @@ -6893,22 +7070,19 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { return; } - RegsForValue MatchedRegs; - MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType()); MVT RegVT = AsmNodeOperands[CurOp+1].getSimpleValueType(); - MatchedRegs.RegVTs.push_back(RegVT); - MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo(); - for (unsigned i = 0, e = InlineAsm::getNumOperandRegisters(OpFlag); - i != e; ++i) { - if (const TargetRegisterClass *RC = TLI.getRegClassFor(RegVT)) - MatchedRegs.Regs.push_back(RegInfo.createVirtualRegister(RC)); - else { - emitInlineAsmError( - CS, "inline asm error: This value" - " type register class is not natively supported!"); - return; - } + SmallVector<unsigned, 4> Regs; + + if (!createVirtualRegs(Regs, + InlineAsm::getNumOperandRegisters(OpFlag), + RegVT, DAG)) { + emitInlineAsmError(CS, "inline asm error: This value type register " + "class is not natively supported!"); + return; } + + RegsForValue MatchedRegs(Regs, RegVT, InOperandVal.getValueType()); + SDLoc dl = getCurSDLoc(); // Use the produced MatchedRegs object to MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl, @@ -7142,19 +7316,23 @@ SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG, if (!Range) return Op; - Constant *Lo = cast<ConstantAsMetadata>(Range->getOperand(0))->getValue(); - if (!Lo->isNullValue()) + ConstantRange CR = getConstantRangeFromMetadata(*Range); + if (CR.isFullSet() || CR.isEmptySet() || CR.isWrappedSet()) + return Op; + + APInt Lo = CR.getUnsignedMin(); + if (!Lo.isMinValue()) return Op; - Constant *Hi = cast<ConstantAsMetadata>(Range->getOperand(1))->getValue(); - unsigned Bits = cast<ConstantInt>(Hi)->getValue().logBase2(); + APInt Hi = CR.getUnsignedMax(); + unsigned Bits = Hi.getActiveBits(); EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), Bits); SDLoc SL = getCurSDLoc(); - SDValue ZExt = DAG.getNode(ISD::AssertZext, SL, Op.getValueType(), - Op, DAG.getValueType(SmallVT)); + SDValue ZExt = DAG.getNode(ISD::AssertZext, SL, Op.getValueType(), Op, + DAG.getValueType(SmallVT)); unsigned NumVals = Op.getNode()->getNumValues(); if (NumVals == 1) return ZExt; @@ -7299,7 +7477,7 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { DAG.setRoot(Chain); // Inform the Frame Information that we have a stackmap in this function. - FuncInfo.MF->getFrameInfo()->setHasStackMap(); + FuncInfo.MF->getFrameInfo().setHasStackMap(); } /// \brief Lower llvm.experimental.patchpoint directly to its target opcode. @@ -7450,7 +7628,7 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, DAG.DeleteNode(Call); // Inform the Frame Information that we have a patchpoint in this function. - FuncInfo.MF->getFrameInfo()->setHasPatchPoint(); + FuncInfo.MF->getFrameInfo().setHasPatchPoint(); } /// Returns an AttributeSet representing the attributes applied to the return @@ -7498,7 +7676,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { uint64_t TySize = DL.getTypeAllocSize(CLI.RetTy); unsigned Align = DL.getPrefTypeAlignment(CLI.RetTy); MachineFunction &MF = CLI.DAG.getMachineFunction(); - DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); + DemoteStackIdx = MF.getFrameInfo().CreateStackObject(TySize, Align, false); Type *StackSlotPtrType = PointerType::getUnqual(CLI.RetTy); DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getPointerTy(DL)); @@ -7580,8 +7758,19 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { Flags.setZExt(); if (Args[i].isSExt) Flags.setSExt(); - if (Args[i].isInReg) + if (Args[i].isInReg) { + // If we are using vectorcall calling convention, a structure that is + // passed InReg - is surely an HVA + if (CLI.CallConv == CallingConv::X86_VectorCall && + isa<StructType>(FinalType)) { + // The first value of a structure is marked + if (0 == Value) + Flags.setHvaStart(); + Flags.setHva(); + } + // Set InReg Flag Flags.setInReg(); + } if (Args[i].isSRet) Flags.setSRet(); if (Args[i].isSwiftSelf) @@ -7867,8 +8056,19 @@ void SelectionDAGISel::LowerArguments(const Function &F) { Flags.setZExt(); if (F.getAttributes().hasAttribute(Idx, Attribute::SExt)) Flags.setSExt(); - if (F.getAttributes().hasAttribute(Idx, Attribute::InReg)) + if (F.getAttributes().hasAttribute(Idx, Attribute::InReg)) { + // If we are using vectorcall calling convention, a structure that is + // passed InReg - is surely an HVA + if (F.getCallingConv() == CallingConv::X86_VectorCall && + isa<StructType>(I->getType())) { + // The first value of a structure is marked + if (0 == Value) + Flags.setHvaStart(); + Flags.setHva(); + } + // Set InReg Flag Flags.setInReg(); + } if (F.getAttributes().hasAttribute(Idx, Attribute::StructRet)) Flags.setSRet(); if (F.getAttributes().hasAttribute(Idx, Attribute::SwiftSelf)) @@ -7990,7 +8190,10 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // If this argument is unused then remember its value. It is used to generate // debugging information. - if (I->use_empty() && NumValues) { + bool isSwiftErrorArg = + TLI->supportSwiftError() && + F.getAttributes().hasAttribute(Idx, Attribute::SwiftError); + if (I->use_empty() && NumValues && !isSwiftErrorArg) { SDB->setUnusedArgValue(&*I, InVals[i]); // Also remember any frame index for use in FastISel. @@ -8004,7 +8207,10 @@ void SelectionDAGISel::LowerArguments(const Function &F) { MVT PartVT = TLI->getRegisterType(*CurDAG->getContext(), VT); unsigned NumParts = TLI->getNumRegisters(*CurDAG->getContext(), VT); - if (!I->use_empty()) { + // Even an apparant 'unused' swifterror argument needs to be returned. So + // we do generate a copy for it that can be used on return from the + // function. + if (!I->use_empty() || isSwiftErrorArg) { Optional<ISD::NodeType> AssertOp; if (F.getAttributes().hasAttribute(Idx, Attribute::SExt)) AssertOp = ISD::AssertSext; @@ -8040,12 +8246,12 @@ void SelectionDAGISel::LowerArguments(const Function &F) { FuncInfo->setArgumentFrameIndex(&*I, FI->getIndex()); } - // Update SwiftErrorMap. - if (Res.getOpcode() == ISD::CopyFromReg && TLI->supportSwiftError() && - F.getAttributes().hasAttribute(Idx, Attribute::SwiftError)) { + // Update the SwiftErrorVRegDefMap. + if (Res.getOpcode() == ISD::CopyFromReg && isSwiftErrorArg) { unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg(); if (TargetRegisterInfo::isVirtualRegister(Reg)) - FuncInfo->SwiftErrorMap[FuncInfo->MBB][0] = Reg; + FuncInfo->setCurrentSwiftErrorVReg(FuncInfo->MBB, + FuncInfo->SwiftErrorArg, Reg); } // If this argument is live outside of the entry block, insert a copy from @@ -8197,14 +8403,14 @@ void SelectionDAGBuilder::updateDAGForMaybeTailCall(SDValue MaybeTC) { } bool SelectionDAGBuilder::isDense(const CaseClusterVector &Clusters, - unsigned *TotalCases, unsigned First, - unsigned Last, - unsigned Density) { + const SmallVectorImpl<unsigned> &TotalCases, + unsigned First, unsigned Last, + unsigned Density) const { assert(Last >= First); assert(TotalCases[Last] >= TotalCases[First]); - APInt LowCase = Clusters[First].Low->getValue(); - APInt HighCase = Clusters[Last].High->getValue(); + const APInt &LowCase = Clusters[First].Low->getValue(); + const APInt &HighCase = Clusters[Last].High->getValue(); assert(LowCase.getBitWidth() == HighCase.getBitWidth()); // FIXME: A range of consecutive cases has 100% density, but only requires one @@ -8233,7 +8439,7 @@ static inline bool areJTsAllowed(const TargetLowering &TLI, TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other); } -bool SelectionDAGBuilder::buildJumpTable(CaseClusterVector &Clusters, +bool SelectionDAGBuilder::buildJumpTable(const CaseClusterVector &Clusters, unsigned First, unsigned Last, const SwitchInst *SI, MachineBasicBlock *DefaultMBB, @@ -8252,12 +8458,12 @@ bool SelectionDAGBuilder::buildJumpTable(CaseClusterVector &Clusters, for (unsigned I = First; I <= Last; ++I) { assert(Clusters[I].Kind == CC_Range); Prob += Clusters[I].Prob; - APInt Low = Clusters[I].Low->getValue(); - APInt High = Clusters[I].High->getValue(); + const APInt &Low = Clusters[I].Low->getValue(); + const APInt &High = Clusters[I].High->getValue(); NumCmps += (Low == High) ? 1 : 2; if (I != First) { // Fill the gap between this and the previous cluster. - APInt PreviousHigh = Clusters[I - 1].High->getValue(); + const APInt &PreviousHigh = Clusters[I - 1].High->getValue(); assert(PreviousHigh.slt(Low)); uint64_t Gap = (Low - PreviousHigh).getLimitedValue() - 1; for (uint64_t J = 0; J < Gap; J++) @@ -8325,26 +8531,37 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters, if (!areJTsAllowed(TLI, SI)) return; + const bool OptForSize = DefaultMBB->getParent()->getFunction()->optForSize(); + const int64_t N = Clusters.size(); - const unsigned MinJumpTableSize = TLI.getMinimumJumpTableEntries(); + const unsigned MinJumpTableEntries = TLI.getMinimumJumpTableEntries(); + const unsigned SmallNumberOfEntries = MinJumpTableEntries / 2; + const unsigned MaxJumpTableSize = + OptForSize || TLI.getMaximumJumpTableSize() == 0 + ? UINT_MAX : TLI.getMaximumJumpTableSize(); + + if (N < 2 || N < MinJumpTableEntries) + return; // TotalCases[i]: Total nbr of cases in Clusters[0..i]. SmallVector<unsigned, 8> TotalCases(N); - for (unsigned i = 0; i < N; ++i) { - APInt Hi = Clusters[i].High->getValue(); - APInt Lo = Clusters[i].Low->getValue(); + const APInt &Hi = Clusters[i].High->getValue(); + const APInt &Lo = Clusters[i].Low->getValue(); TotalCases[i] = (Hi - Lo).getLimitedValue() + 1; if (i != 0) TotalCases[i] += TotalCases[i - 1]; } - unsigned MinDensity = JumpTableDensity; - if (DefaultMBB->getParent()->getFunction()->optForSize()) - MinDensity = OptsizeJumpTableDensity; - if (N >= MinJumpTableSize - && isDense(Clusters, &TotalCases[0], 0, N - 1, MinDensity)) { - // Cheap case: the whole range might be suitable for jump table. + const unsigned MinDensity = + OptForSize ? OptsizeJumpTableDensity : JumpTableDensity; + + // Cheap case: the whole range may be suitable for jump table. + unsigned JumpTableSize = (Clusters[N - 1].High->getValue() - + Clusters[0].Low->getValue()) + .getLimitedValue(UINT_MAX - 1) + 1; + if (JumpTableSize <= MaxJumpTableSize && + isDense(Clusters, TotalCases, 0, N - 1, MinDensity)) { CaseCluster JTCluster; if (buildJumpTable(Clusters, 0, N - 1, SI, DefaultMBB, JTCluster)) { Clusters[0] = JTCluster; @@ -8368,14 +8585,23 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters, SmallVector<unsigned, 8> MinPartitions(N); // LastElement[i] is the last element of the partition starting at i. SmallVector<unsigned, 8> LastElement(N); - // NumTables[i]: nbr of >= MinJumpTableSize partitions from Clusters[i..N-1]. - SmallVector<unsigned, 8> NumTables(N); + // PartitionsScore[i] is used to break ties when choosing between two + // partitionings resulting in the same number of partitions. + SmallVector<unsigned, 8> PartitionsScore(N); + // For PartitionsScore, a small number of comparisons is considered as good as + // a jump table and a single comparison is considered better than a jump + // table. + enum PartitionScores : unsigned { + NoTable = 0, + Table = 1, + FewCases = 1, + SingleCase = 2 + }; // Base case: There is only one way to partition Clusters[N-1]. MinPartitions[N - 1] = 1; LastElement[N - 1] = N - 1; - assert(MinJumpTableSize > 1); - NumTables[N - 1] = 0; + PartitionsScore[N - 1] = PartitionScores::SingleCase; // Note: loop indexes are signed to avoid underflow. for (int64_t i = N - 2; i >= 0; i--) { @@ -8383,23 +8609,34 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters, // Baseline: Put Clusters[i] into a partition on its own. MinPartitions[i] = MinPartitions[i + 1] + 1; LastElement[i] = i; - NumTables[i] = NumTables[i + 1]; + PartitionsScore[i] = PartitionsScore[i + 1] + PartitionScores::SingleCase; // Search for a solution that results in fewer partitions. for (int64_t j = N - 1; j > i; j--) { // Try building a partition from Clusters[i..j]. - if (isDense(Clusters, &TotalCases[0], i, j, MinDensity)) { + JumpTableSize = (Clusters[j].High->getValue() - + Clusters[i].Low->getValue()) + .getLimitedValue(UINT_MAX - 1) + 1; + if (JumpTableSize <= MaxJumpTableSize && + isDense(Clusters, TotalCases, i, j, MinDensity)) { unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]); - bool IsTable = j - i + 1 >= MinJumpTableSize; - unsigned Tables = IsTable + (j == N - 1 ? 0 : NumTables[j + 1]); - - // If this j leads to fewer partitions, or same number of partitions - // with more lookup tables, it is a better partitioning. + unsigned Score = j == N - 1 ? 0 : PartitionsScore[j + 1]; + int64_t NumEntries = j - i + 1; + + if (NumEntries == 1) + Score += PartitionScores::SingleCase; + else if (NumEntries <= SmallNumberOfEntries) + Score += PartitionScores::FewCases; + else if (NumEntries >= MinJumpTableEntries) + Score += PartitionScores::Table; + + // If this leads to fewer partitions, or to the same number of + // partitions with better score, it is a better partitioning. if (NumPartitions < MinPartitions[i] || - (NumPartitions == MinPartitions[i] && Tables > NumTables[i])) { + (NumPartitions == MinPartitions[i] && Score > PartitionsScore[i])) { MinPartitions[i] = NumPartitions; LastElement[i] = j; - NumTables[i] = Tables; + PartitionsScore[i] = Score; } } } @@ -8414,7 +8651,7 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters, unsigned NumClusters = Last - First + 1; CaseCluster JTCluster; - if (NumClusters >= MinJumpTableSize && + if (NumClusters >= MinJumpTableEntries && buildJumpTable(Clusters, First, Last, SI, DefaultMBB, JTCluster)) { Clusters[DstIndex++] = JTCluster; } else { @@ -9107,7 +9344,8 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { WorkList.pop_back(); unsigned NumClusters = W.LastCluster - W.FirstCluster + 1; - if (NumClusters > 3 && TM.getOptLevel() != CodeGenOpt::None) { + if (NumClusters > 3 && TM.getOptLevel() != CodeGenOpt::None && + !DefaultMBB->getParent()->getFunction()->optForMinSize()) { // For optimized builds, lower large range as a balanced binary tree. splitWorkItem(WorkList, W, SI.getCondition(), SwitchMBB); continue; diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index b9888ae..abde8a8 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -305,12 +305,13 @@ private: }; /// Check whether a range of clusters is dense enough for a jump table. - bool isDense(const CaseClusterVector &Clusters, unsigned *TotalCases, - unsigned First, unsigned Last, unsigned MinDensity); + bool isDense(const CaseClusterVector &Clusters, + const SmallVectorImpl<unsigned> &TotalCases, + unsigned First, unsigned Last, unsigned MinDensity) const; /// Build a jump table cluster from Clusters[First..Last]. Returns false if it /// decides it's not a good idea. - bool buildJumpTable(CaseClusterVector &Clusters, unsigned First, + bool buildJumpTable(const CaseClusterVector &Clusters, unsigned First, unsigned Last, const SwitchInst *SI, MachineBasicBlock *DefaultMBB, CaseCluster &JTCluster); @@ -652,8 +653,6 @@ public: return CurInst ? CurInst->getDebugLoc() : DebugLoc(); } - unsigned getSDNodeOrder() const { return SDNodeOrder; } - void CopyValueToVirtualRegister(const Value *V, unsigned Reg); void visit(const Instruction &I); @@ -875,8 +874,8 @@ private: void visitAlloca(const AllocaInst &I); void visitLoad(const LoadInst &I); void visitStore(const StoreInst &I); - void visitMaskedLoad(const CallInst &I); - void visitMaskedStore(const CallInst &I); + void visitMaskedLoad(const CallInst &I, bool IsExpanding = false); + void visitMaskedStore(const CallInst &I, bool IsCompressing = false); void visitMaskedGather(const CallInst &I); void visitMaskedScatter(const CallInst &I); void visitAtomicCmpXchg(const AtomicCmpXchgInst &I); @@ -885,6 +884,7 @@ private: void visitPHI(const PHINode &I); void visitCall(const CallInst &I); bool visitMemCmpCall(const CallInst &I); + bool visitMemPCpyCall(const CallInst &I); bool visitMemChrCall(const CallInst &I); bool visitStrCpyCall(const CallInst &I, bool isStpcpy); bool visitStrCmpCall(const CallInst &I); @@ -941,6 +941,11 @@ private: /// Update the DAG and DAG builder with the relevant information after /// a new root node has been created which could be a tail call. void updateDAGForMaybeTailCall(SDValue MaybeTC); + + /// Return the appropriate SDDbgValue based on N. + SDDbgValue *getDbgValue(SDValue N, DILocalVariable *Variable, + DIExpression *Expr, int64_t Offset, DebugLoc dl, + unsigned DbgSDNodeOrder); }; /// RegsForValue - This struct represents the registers (physical or virtual) diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 93ac6d6..0faaad8 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -100,6 +100,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::JumpTable: return "JumpTable"; case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE"; case ISD::RETURNADDR: return "RETURNADDR"; + case ISD::ADDROFRETURNADDR: return "ADDROFRETURNADDR"; case ISD::FRAMEADDR: return "FRAMEADDR"; case ISD::LOCAL_RECOVER: return "LOCAL_RECOVER"; case ISD::READ_REGISTER: return "READ_REGISTER"; @@ -120,7 +121,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { unsigned OpNo = getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1; unsigned IID = cast<ConstantSDNode>(getOperand(OpNo))->getZExtValue(); if (IID < Intrinsic::num_intrinsics) - return Intrinsic::getName((Intrinsic::ID)IID); + return Intrinsic::getName((Intrinsic::ID)IID, None); else if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo()) return TII->getName(IID); llvm_unreachable("Invalid intrinsic ID"); @@ -261,21 +262,6 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FP16_TO_FP: return "fp16_to_fp"; case ISD::FP_TO_FP16: return "fp_to_fp16"; - case ISD::CONVERT_RNDSAT: { - switch (cast<CvtRndSatSDNode>(this)->getCvtCode()) { - default: llvm_unreachable("Unknown cvt code!"); - case ISD::CVT_FF: return "cvt_ff"; - case ISD::CVT_FS: return "cvt_fs"; - case ISD::CVT_FU: return "cvt_fu"; - case ISD::CVT_SF: return "cvt_sf"; - case ISD::CVT_UF: return "cvt_uf"; - case ISD::CVT_SS: return "cvt_ss"; - case ISD::CVT_SU: return "cvt_su"; - case ISD::CVT_US: return "cvt_us"; - case ISD::CVT_UU: return "cvt_uu"; - } - } - // Control flow instructions case ISD::BR: return "br"; case ISD::BRIND: return "brind"; @@ -321,7 +307,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::CTTZ_ZERO_UNDEF: return "cttz_zero_undef"; case ISD::CTLZ: return "ctlz"; case ISD::CTLZ_ZERO_UNDEF: return "ctlz_zero_undef"; - + // Trampolines case ISD::INIT_TRAMPOLINE: return "init_trampoline"; case ISD::ADJUST_TRAMPOLINE: return "adjust_trampoline"; @@ -424,9 +410,9 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { } else if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(this)) { OS << '<' << CSDN->getAPIntValue() << '>'; } else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(this)) { - if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEsingle) + if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEsingle()) OS << '<' << CSDN->getValueAPF().convertToFloat() << '>'; - else if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEdouble) + else if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEdouble()) OS << '<' << CSDN->getValueAPF().convertToDouble() << '>'; else { OS << "<APFloat("; diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 1d61657..64e6c22 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -426,6 +426,10 @@ static void SplitCriticalSideEffectEdges(Function &Fn) { } bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { + // If we already selected that function, we do not need to run SDISel. + if (mf.getProperties().hasProperty( + MachineFunctionProperties::Property::Selected)) + return false; // Do some sanity-checking on the command-line options. assert((!EnableFastISelVerbose || TM.Options.EnableFastISel) && "-fast-isel-verbose requires -fast-isel"); @@ -594,16 +598,16 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { } // Determine if there are any calls in this machine function. - MachineFrameInfo *MFI = MF->getFrameInfo(); + MachineFrameInfo &MFI = MF->getFrameInfo(); for (const auto &MBB : *MF) { - if (MFI->hasCalls() && MF->hasInlineAsm()) + if (MFI.hasCalls() && MF->hasInlineAsm()) break; for (const auto &MI : MBB) { const MCInstrDesc &MCID = TII->get(MI.getOpcode()); if ((MCID.isCall() && !MCID.isReturn()) || MI.isStackAligningInlineAsm()) { - MFI->setHasCalls(true); + MFI.setHasCalls(true); } if (MI.isInlineAsm()) { MF->setHasInlineAsm(true); @@ -645,7 +649,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { } if (TLI->hasCopyImplyingStackAdjustment(MF)) - MFI->setHasCopyImplyingStackAdjustment(true); + MFI.setHasCopyImplyingStackAdjustment(true); // Freeze the set of reserved registers now that MachineFrameInfo has been // set up. All the information required by getReservedRegs() should be @@ -721,9 +725,8 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() { } void SelectionDAGISel::CodeGenAndEmitDAG() { - std::string GroupName; - if (TimePassesIsEnabled) - GroupName = "Instruction Selection and Scheduling"; + StringRef GroupName = "sdag"; + StringRef GroupDescription = "Instruction Selection and Scheduling"; std::string BlockName; int BlockNumber = -1; (void)BlockNumber; @@ -751,7 +754,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { // Run the DAG combiner in pre-legalize mode. { - NamedRegionTimer T("DAG Combining 1", GroupName, TimePassesIsEnabled); + NamedRegionTimer T("combine1", "DAG Combining 1", GroupName, + GroupDescription, TimePassesIsEnabled); CurDAG->Combine(BeforeLegalizeTypes, *AA, OptLevel); } @@ -765,7 +769,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { bool Changed; { - NamedRegionTimer T("Type Legalization", GroupName, TimePassesIsEnabled); + NamedRegionTimer T("legalize_types", "Type Legalization", GroupName, + GroupDescription, TimePassesIsEnabled); Changed = CurDAG->LegalizeTypes(); } @@ -780,8 +785,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { // Run the DAG combiner in post-type-legalize mode. { - NamedRegionTimer T("DAG Combining after legalize types", GroupName, - TimePassesIsEnabled); + NamedRegionTimer T("combine_lt", "DAG Combining after legalize types", + GroupName, GroupDescription, TimePassesIsEnabled); CurDAG->Combine(AfterLegalizeTypes, *AA, OptLevel); } @@ -791,13 +796,15 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { } { - NamedRegionTimer T("Vector Legalization", GroupName, TimePassesIsEnabled); + NamedRegionTimer T("legalize_vec", "Vector Legalization", GroupName, + GroupDescription, TimePassesIsEnabled); Changed = CurDAG->LegalizeVectors(); } if (Changed) { { - NamedRegionTimer T("Type Legalization 2", GroupName, TimePassesIsEnabled); + NamedRegionTimer T("legalize_types2", "Type Legalization 2", GroupName, + GroupDescription, TimePassesIsEnabled); CurDAG->LegalizeTypes(); } @@ -806,8 +813,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { // Run the DAG combiner in post-type-legalize mode. { - NamedRegionTimer T("DAG Combining after legalize vectors", GroupName, - TimePassesIsEnabled); + NamedRegionTimer T("combine_lv", "DAG Combining after legalize vectors", + GroupName, GroupDescription, TimePassesIsEnabled); CurDAG->Combine(AfterLegalizeVectorOps, *AA, OptLevel); } @@ -819,7 +826,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->viewGraph("legalize input for " + BlockName); { - NamedRegionTimer T("DAG Legalization", GroupName, TimePassesIsEnabled); + NamedRegionTimer T("legalize", "DAG Legalization", GroupName, + GroupDescription, TimePassesIsEnabled); CurDAG->Legalize(); } @@ -831,7 +839,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { // Run the DAG combiner in post-legalize mode. { - NamedRegionTimer T("DAG Combining 2", GroupName, TimePassesIsEnabled); + NamedRegionTimer T("combine2", "DAG Combining 2", GroupName, + GroupDescription, TimePassesIsEnabled); CurDAG->Combine(AfterLegalizeDAG, *AA, OptLevel); } @@ -847,7 +856,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { // Third, instruction select all of the operations to machine code, adding the // code to the MachineBasicBlock. { - NamedRegionTimer T("Instruction Selection", GroupName, TimePassesIsEnabled); + NamedRegionTimer T("isel", "Instruction Selection", GroupName, + GroupDescription, TimePassesIsEnabled); DoInstructionSelection(); } @@ -860,8 +870,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { // Schedule machine code. ScheduleDAGSDNodes *Scheduler = CreateScheduler(); { - NamedRegionTimer T("Instruction Scheduling", GroupName, - TimePassesIsEnabled); + NamedRegionTimer T("sched", "Instruction Scheduling", GroupName, + GroupDescription, TimePassesIsEnabled); Scheduler->Run(CurDAG, FuncInfo->MBB); } @@ -872,7 +882,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { // inserted into. MachineBasicBlock *FirstMBB = FuncInfo->MBB, *LastMBB; { - NamedRegionTimer T("Instruction Creation", GroupName, TimePassesIsEnabled); + NamedRegionTimer T("emit", "Instruction Creation", GroupName, + GroupDescription, TimePassesIsEnabled); // FuncInfo->InsertPt is passed by reference and set to the end of the // scheduled instructions. @@ -886,8 +897,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { // Free the scheduler state. { - NamedRegionTimer T("Instruction Scheduling Cleanup", GroupName, - TimePassesIsEnabled); + NamedRegionTimer T("cleanup", "Instruction Scheduling Cleanup", GroupName, + GroupDescription, TimePassesIsEnabled); delete Scheduler; } @@ -1003,10 +1014,10 @@ bool SelectionDAGISel::PrepareEHLandingPad() { // Add a label to mark the beginning of the landing pad. Deletion of the // landing pad can thus be detected via the MachineModuleInfo. - MCSymbol *Label = MF->getMMI().addLandingPad(MBB); + MCSymbol *Label = MF->addLandingPad(MBB); // Assign the call site to the landing pad's begin label. - MF->getMMI().setCallSiteLandingPad(Label, SDB->LPadToCallSiteMap[MBB]); + MF->setCallSiteLandingPad(Label, SDB->LPadToCallSiteMap[MBB]); const MCInstrDesc &II = TII->get(TargetOpcode::EH_LABEL); BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II) @@ -1153,14 +1164,22 @@ static void setupSwiftErrorVals(const Function &Fn, const TargetLowering *TLI, return; FuncInfo->SwiftErrorVals.clear(); - FuncInfo->SwiftErrorMap.clear(); - FuncInfo->SwiftErrorWorklist.clear(); + FuncInfo->SwiftErrorVRegDefMap.clear(); + FuncInfo->SwiftErrorVRegUpwardsUse.clear(); + FuncInfo->SwiftErrorArg = nullptr; // Check if function has a swifterror argument. + bool HaveSeenSwiftErrorArg = false; for (Function::const_arg_iterator AI = Fn.arg_begin(), AE = Fn.arg_end(); AI != AE; ++AI) - if (AI->hasSwiftErrorAttr()) + if (AI->hasSwiftErrorAttr()) { + assert(!HaveSeenSwiftErrorArg && + "Must have only one swifterror parameter"); + (void)HaveSeenSwiftErrorArg; // silence warning. + HaveSeenSwiftErrorArg = true; + FuncInfo->SwiftErrorArg = &*AI; FuncInfo->SwiftErrorVals.push_back(&*AI); + } for (const auto &LLVMBB : Fn) for (const auto &Inst : LLVMBB) { @@ -1170,95 +1189,152 @@ static void setupSwiftErrorVals(const Function &Fn, const TargetLowering *TLI, } } -/// For each basic block, merge incoming swifterror values or simply propagate -/// them. The merged results will be saved in SwiftErrorMap. For predecessors -/// that are not yet visited, we create virtual registers to hold the swifterror -/// values and save them in SwiftErrorWorklist. -static void mergeIncomingSwiftErrors(FunctionLoweringInfo *FuncInfo, - const TargetLowering *TLI, - const TargetInstrInfo *TII, - const BasicBlock *LLVMBB, - SelectionDAGBuilder *SDB) { +static void createSwiftErrorEntriesInEntryBlock(FunctionLoweringInfo *FuncInfo, + const TargetLowering *TLI, + const TargetInstrInfo *TII, + const BasicBlock *LLVMBB, + SelectionDAGBuilder *SDB) { if (!TLI->supportSwiftError()) return; - // We should only do this when we have swifterror parameter or swifterror + // We only need to do this when we have swifterror parameter or swifterror // alloc. if (FuncInfo->SwiftErrorVals.empty()) return; - // At beginning of a basic block, insert PHI nodes or get the virtual - // register from the only predecessor, and update SwiftErrorMap; if one - // of the predecessors is not visited, update SwiftErrorWorklist. - // At end of a basic block, if a block is in SwiftErrorWorklist, insert copy - // to sync up the virtual register assignment. - - // Always create a virtual register for each swifterror value in entry block. - auto &DL = SDB->DAG.getDataLayout(); - const TargetRegisterClass *RC = TLI->getRegClassFor(TLI->getPointerTy(DL)); if (pred_begin(LLVMBB) == pred_end(LLVMBB)) { - for (unsigned I = 0, E = FuncInfo->SwiftErrorVals.size(); I < E; I++) { + auto &DL = FuncInfo->MF->getDataLayout(); + auto const *RC = TLI->getRegClassFor(TLI->getPointerTy(DL)); + for (const auto *SwiftErrorVal : FuncInfo->SwiftErrorVals) { + // We will always generate a copy from the argument. It is always used at + // least by the 'return' of the swifterror. + if (FuncInfo->SwiftErrorArg && FuncInfo->SwiftErrorArg == SwiftErrorVal) + continue; unsigned VReg = FuncInfo->MF->getRegInfo().createVirtualRegister(RC); // Assign Undef to Vreg. We construct MI directly to make sure it works // with FastISel. - BuildMI(*FuncInfo->MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), - TII->get(TargetOpcode::IMPLICIT_DEF), VReg); - FuncInfo->SwiftErrorMap[FuncInfo->MBB].push_back(VReg); + BuildMI(*FuncInfo->MBB, FuncInfo->MBB->getFirstNonPHI(), + SDB->getCurDebugLoc(), TII->get(TargetOpcode::IMPLICIT_DEF), + VReg); + FuncInfo->setCurrentSwiftErrorVReg(FuncInfo->MBB, SwiftErrorVal, VReg); } - return; } +} - if (auto *UniquePred = LLVMBB->getUniquePredecessor()) { - auto *UniquePredMBB = FuncInfo->MBBMap[UniquePred]; - if (!FuncInfo->SwiftErrorMap.count(UniquePredMBB)) { - // Update SwiftErrorWorklist with a new virtual register. - for (unsigned I = 0, E = FuncInfo->SwiftErrorVals.size(); I < E; I++) { - unsigned VReg = FuncInfo->MF->getRegInfo().createVirtualRegister(RC); - FuncInfo->SwiftErrorWorklist[UniquePredMBB].push_back(VReg); - // Propagate the information from the single predecessor. - FuncInfo->SwiftErrorMap[FuncInfo->MBB].push_back(VReg); - } - return; - } - // Propagate the information from the single predecessor. - FuncInfo->SwiftErrorMap[FuncInfo->MBB] = - FuncInfo->SwiftErrorMap[UniquePredMBB]; +/// Propagate swifterror values through the machine function CFG. +static void propagateSwiftErrorVRegs(FunctionLoweringInfo *FuncInfo) { + auto *TLI = FuncInfo->TLI; + if (!TLI->supportSwiftError()) return; - } - // For the case of multiple predecessors, update SwiftErrorWorklist. - // Handle the case where we have two or more predecessors being the same. - for (const_pred_iterator PI = pred_begin(LLVMBB), PE = pred_end(LLVMBB); - PI != PE; ++PI) { - auto *PredMBB = FuncInfo->MBBMap[*PI]; - if (!FuncInfo->SwiftErrorMap.count(PredMBB) && - !FuncInfo->SwiftErrorWorklist.count(PredMBB)) { - for (unsigned I = 0, E = FuncInfo->SwiftErrorVals.size(); I < E; I++) { - unsigned VReg = FuncInfo->MF->getRegInfo().createVirtualRegister(RC); - // When we actually visit the basic block PredMBB, we will materialize - // the virtual register assignment in copySwiftErrorsToFinalVRegs. - FuncInfo->SwiftErrorWorklist[PredMBB].push_back(VReg); + // We only need to do this when we have swifterror parameter or swifterror + // alloc. + if (FuncInfo->SwiftErrorVals.empty()) + return; + + // For each machine basic block in reverse post order. + ReversePostOrderTraversal<MachineFunction *> RPOT(FuncInfo->MF); + for (ReversePostOrderTraversal<MachineFunction *>::rpo_iterator + It = RPOT.begin(), + E = RPOT.end(); + It != E; ++It) { + MachineBasicBlock *MBB = *It; + + // For each swifterror value in the function. + for(const auto *SwiftErrorVal : FuncInfo->SwiftErrorVals) { + auto Key = std::make_pair(MBB, SwiftErrorVal); + auto UUseIt = FuncInfo->SwiftErrorVRegUpwardsUse.find(Key); + auto VRegDefIt = FuncInfo->SwiftErrorVRegDefMap.find(Key); + bool UpwardsUse = UUseIt != FuncInfo->SwiftErrorVRegUpwardsUse.end(); + unsigned UUseVReg = UpwardsUse ? UUseIt->second : 0; + bool DownwardDef = VRegDefIt != FuncInfo->SwiftErrorVRegDefMap.end(); + assert(!(UpwardsUse && !DownwardDef) && + "We can't have an upwards use but no downwards def"); + + // If there is no upwards exposed use and an entry for the swifterror in + // the def map for this value we don't need to do anything: We already + // have a downward def for this basic block. + if (!UpwardsUse && DownwardDef) + continue; + + // Otherwise we either have an upwards exposed use vreg that we need to + // materialize or need to forward the downward def from predecessors. + + // Check whether we have a single vreg def from all predecessors. + // Otherwise we need a phi. + SmallVector<std::pair<MachineBasicBlock *, unsigned>, 4> VRegs; + SmallSet<const MachineBasicBlock*, 8> Visited; + for (auto *Pred : MBB->predecessors()) { + if (!Visited.insert(Pred).second) + continue; + VRegs.push_back(std::make_pair( + Pred, FuncInfo->getOrCreateSwiftErrorVReg(Pred, SwiftErrorVal))); + if (Pred != MBB) + continue; + // We have a self-edge. + // If there was no upwards use in this basic block there is now one: the + // phi needs to use it self. + if (!UpwardsUse) { + UpwardsUse = true; + UUseIt = FuncInfo->SwiftErrorVRegUpwardsUse.find(Key); + assert(UUseIt != FuncInfo->SwiftErrorVRegUpwardsUse.end()); + UUseVReg = UUseIt->second; + } + } + + // We need a phi node if we have more than one predecessor with different + // downward defs. + bool needPHI = + VRegs.size() >= 1 && + std::find_if( + VRegs.begin(), VRegs.end(), + [&](const std::pair<const MachineBasicBlock *, unsigned> &V) + -> bool { return V.second != VRegs[0].second; }) != + VRegs.end(); + + // If there is no upwards exposed used and we don't need a phi just + // forward the swifterror vreg from the predecessor(s). + if (!UpwardsUse && !needPHI) { + assert(!VRegs.empty() && + "No predecessors? The entry block should bail out earlier"); + // Just forward the swifterror vreg from the predecessor(s). + FuncInfo->setCurrentSwiftErrorVReg(MBB, SwiftErrorVal, VRegs[0].second); + continue; } - } - } - // For the case of multiple predecessors, create a virtual register for - // each swifterror value and generate Phi node. - for (unsigned I = 0, E = FuncInfo->SwiftErrorVals.size(); I < E; I++) { - unsigned VReg = FuncInfo->MF->getRegInfo().createVirtualRegister(RC); - FuncInfo->SwiftErrorMap[FuncInfo->MBB].push_back(VReg); - - MachineInstrBuilder SwiftErrorPHI = BuildMI(*FuncInfo->MBB, - FuncInfo->MBB->begin(), SDB->getCurDebugLoc(), - TII->get(TargetOpcode::PHI), VReg); - for (const_pred_iterator PI = pred_begin(LLVMBB), PE = pred_end(LLVMBB); - PI != PE; ++PI) { - auto *PredMBB = FuncInfo->MBBMap[*PI]; - unsigned SwiftErrorReg = FuncInfo->SwiftErrorMap.count(PredMBB) ? - FuncInfo->SwiftErrorMap[PredMBB][I] : - FuncInfo->SwiftErrorWorklist[PredMBB][I]; - SwiftErrorPHI.addReg(SwiftErrorReg) - .addMBB(PredMBB); + auto DLoc = isa<Instruction>(SwiftErrorVal) + ? dyn_cast<Instruction>(SwiftErrorVal)->getDebugLoc() + : DebugLoc(); + const auto *TII = FuncInfo->MF->getSubtarget().getInstrInfo(); + + // If we don't need a phi create a copy to the upward exposed vreg. + if (!needPHI) { + assert(UpwardsUse); + unsigned DestReg = UUseVReg; + BuildMI(*MBB, MBB->getFirstNonPHI(), DLoc, TII->get(TargetOpcode::COPY), + DestReg) + .addReg(VRegs[0].second); + continue; + } + + // We need a phi: if there is an upwards exposed use we already have a + // destination virtual register number otherwise we generate a new one. + auto &DL = FuncInfo->MF->getDataLayout(); + auto const *RC = TLI->getRegClassFor(TLI->getPointerTy(DL)); + unsigned PHIVReg = + UpwardsUse ? UUseVReg + : FuncInfo->MF->getRegInfo().createVirtualRegister(RC); + MachineInstrBuilder SwiftErrorPHI = + BuildMI(*MBB, MBB->getFirstNonPHI(), DLoc, + TII->get(TargetOpcode::PHI), PHIVReg); + for (auto BBRegPair : VRegs) { + SwiftErrorPHI.addReg(BBRegPair.second).addMBB(BBRegPair.first); + } + + // We did not have a definition in this block before: store the phi's vreg + // as this block downward exposed def. + if (!UpwardsUse) + FuncInfo->setCurrentSwiftErrorVReg(MBB, SwiftErrorVal, PHIVReg); } } } @@ -1309,7 +1385,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { if (!FuncInfo->MBB) continue; // Some blocks like catchpads have no code or MBB. FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI(); - mergeIncomingSwiftErrors(FuncInfo, TLI, TII, LLVMBB, SDB); + createSwiftErrorEntriesInEntryBlock(FuncInfo, TLI, TII, LLVMBB, SDB); // Setup an EH landing-pad block. FuncInfo->ExceptionPointerVirtReg = 0; @@ -1486,6 +1562,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { FuncInfo->PHINodesToUpdate.clear(); } + propagateSwiftErrorVRegs(FuncInfo); + delete FastIS; SDB->clearDanglingDebugInfo(); SDB->SPDescriptor.resetPerFunctionState(); @@ -2170,7 +2248,7 @@ GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) { /// to use the new results. void SelectionDAGISel::UpdateChains( SDNode *NodeToMatch, SDValue InputChain, - const SmallVectorImpl<SDNode *> &ChainNodesMatched, bool isMorphNodeTo) { + SmallVectorImpl<SDNode *> &ChainNodesMatched, bool isMorphNodeTo) { SmallVector<SDNode*, 4> NowDeadNodes; // Now that all the normal results are replaced, we replace the chain and @@ -2182,6 +2260,11 @@ void SelectionDAGISel::UpdateChains( // Replace all the chain results with the final chain we ended up with. for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) { SDNode *ChainNode = ChainNodesMatched[i]; + // If ChainNode is null, it's because we replaced it on a previous + // iteration and we cleared it out of the map. Just skip it. + if (!ChainNode) + continue; + assert(ChainNode->getOpcode() != ISD::DELETED_NODE && "Deleted node left in chain"); @@ -2194,6 +2277,11 @@ void SelectionDAGISel::UpdateChains( if (ChainVal.getValueType() == MVT::Glue) ChainVal = ChainVal.getValue(ChainVal->getNumValues()-2); assert(ChainVal.getValueType() == MVT::Other && "Not a chain?"); + SelectionDAG::DAGNodeDeletedListener NDL( + *CurDAG, [&](SDNode *N, SDNode *E) { + std::replace(ChainNodesMatched.begin(), ChainNodesMatched.end(), N, + static_cast<SDNode *>(nullptr)); + }); CurDAG->ReplaceAllUsesOfValueWith(ChainVal, InputChain); // If the node became dead and we haven't already seen it, delete it. @@ -2694,14 +2782,15 @@ struct MatchScope { /// for this. class MatchStateUpdater : public SelectionDAG::DAGUpdateListener { - SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes; - SmallVectorImpl<MatchScope> &MatchScopes; + SDNode **NodeToMatch; + SmallVectorImpl<std::pair<SDValue, SDNode *>> &RecordedNodes; + SmallVectorImpl<MatchScope> &MatchScopes; public: - MatchStateUpdater(SelectionDAG &DAG, - SmallVectorImpl<std::pair<SDValue, SDNode*> > &RN, - SmallVectorImpl<MatchScope> &MS) : - SelectionDAG::DAGUpdateListener(DAG), - RecordedNodes(RN), MatchScopes(MS) { } + MatchStateUpdater(SelectionDAG &DAG, SDNode **NodeToMatch, + SmallVectorImpl<std::pair<SDValue, SDNode *>> &RN, + SmallVectorImpl<MatchScope> &MS) + : SelectionDAG::DAGUpdateListener(DAG), NodeToMatch(NodeToMatch), + RecordedNodes(RN), MatchScopes(MS) {} void NodeDeleted(SDNode *N, SDNode *E) override { // Some early-returns here to avoid the search if we deleted the node or @@ -2711,6 +2800,9 @@ public: // update listener during matching a complex patterns. if (!E || E->isMachineOpcode()) return; + // Check if NodeToMatch was updated. + if (N == *NodeToMatch) + *NodeToMatch = E; // Performing linear search here does not matter because we almost never // run this code. You'd have to have a CSE during complex pattern // matching. @@ -3003,7 +3095,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, // consistent. std::unique_ptr<MatchStateUpdater> MSU; if (ComplexPatternFuncMutatesDAG()) - MSU.reset(new MatchStateUpdater(*CurDAG, RecordedNodes, + MSU.reset(new MatchStateUpdater(*CurDAG, &NodeToMatch, RecordedNodes, MatchScopes)); if (!CheckComplexPattern(NodeToMatch, RecordedNodes[RecNo].second, @@ -3388,7 +3480,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, SelectionDAG::DAGNodeDeletedListener NDL(*CurDAG, [&](SDNode *N, SDNode *E) { auto &Chain = ChainNodesMatched; - assert((!E || llvm::find(Chain, N) == Chain.end()) && + assert((!E || !is_contained(Chain, N)) && "Chain node replaced during MorphNode"); Chain.erase(std::remove(Chain.begin(), Chain.end(), N), Chain.end()); }); @@ -3487,7 +3579,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, NodeToMatch->getValueType(i) == MVT::iPTR || Res.getValueType() == MVT::iPTR || NodeToMatch->getValueType(i).getSizeInBits() == - Res.getValueType().getSizeInBits()) && + Res.getValueSizeInBits()) && "invalid replacement"); CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, i), Res); } @@ -3579,7 +3671,7 @@ void SelectionDAGISel::CannotYetSelect(SDNode *N) { unsigned iid = cast<ConstantSDNode>(N->getOperand(HasInputChain))->getZExtValue(); if (iid < Intrinsic::num_intrinsics) - Msg << "intrinsic %" << Intrinsic::getName((Intrinsic::ID)iid); + Msg << "intrinsic %" << Intrinsic::getName((Intrinsic::ID)iid, None); else if (const TargetIntrinsicInfo *TII = TM.getIntrinsicInfo()) Msg << "target intrinsic %" << TII->getName(iid); else diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index 90aaba2..d27e245 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -55,7 +55,8 @@ void StatepointLoweringState::startNewStatepoint(SelectionDAGBuilder &Builder) { NextSlotToAllocate = 0; // Need to resize this on each safepoint - we need the two to stay in sync and // the clear patterns of a SelectionDAGBuilder have no relation to - // FunctionLoweringInfo. SmallBitVector::reset initializes all bits to false. + // FunctionLoweringInfo. Also need to ensure used bits get cleared. + AllocatedStackSlots.clear(); AllocatedStackSlots.resize(Builder.FuncInfo.StatepointStackSlots.size()); } @@ -70,7 +71,7 @@ SDValue StatepointLoweringState::allocateStackSlot(EVT ValueType, SelectionDAGBuilder &Builder) { NumSlotsAllocatedForStatepoints++; - auto *MFI = Builder.DAG.getMachineFunction().getFrameInfo(); + MachineFrameInfo &MFI = Builder.DAG.getMachineFunction().getFrameInfo(); unsigned SpillSize = ValueType.getSizeInBits() / 8; assert((SpillSize * 8) == ValueType.getSizeInBits() && "Size not in bytes?"); @@ -82,16 +83,16 @@ StatepointLoweringState::allocateStackSlot(EVT ValueType, const size_t NumSlots = AllocatedStackSlots.size(); assert(NextSlotToAllocate <= NumSlots && "Broken invariant"); - // The stack slots in StatepointStackSlots beyond the first NumSlots were - // added in this instance of StatepointLoweringState, and cannot be re-used. - assert(NumSlots <= Builder.FuncInfo.StatepointStackSlots.size() && + assert(AllocatedStackSlots.size() == + Builder.FuncInfo.StatepointStackSlots.size() && "Broken invariant"); for (; NextSlotToAllocate < NumSlots; NextSlotToAllocate++) { if (!AllocatedStackSlots.test(NextSlotToAllocate)) { const int FI = Builder.FuncInfo.StatepointStackSlots[NextSlotToAllocate]; - if (MFI->getObjectSize(FI) == SpillSize) { + if (MFI.getObjectSize(FI) == SpillSize) { AllocatedStackSlots.set(NextSlotToAllocate); + // TODO: Is ValueType the right thing to use here? return Builder.DAG.getFrameIndex(FI, ValueType); } } @@ -101,9 +102,13 @@ StatepointLoweringState::allocateStackSlot(EVT ValueType, SDValue SpillSlot = Builder.DAG.CreateStackTemporary(ValueType); const unsigned FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); - MFI->markAsStatepointSpillSlotObjectIndex(FI); + MFI.markAsStatepointSpillSlotObjectIndex(FI); Builder.FuncInfo.StatepointStackSlots.push_back(FI); + AllocatedStackSlots.resize(AllocatedStackSlots.size()+1, true); + assert(AllocatedStackSlots.size() == + Builder.FuncInfo.StatepointStackSlots.size() && + "Broken invariant"); StatepointMaxSlotsRequired = std::max<unsigned long>( StatepointMaxSlotsRequired, Builder.FuncInfo.StatepointStackSlots.size()); @@ -350,9 +355,8 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain, // vary since we spill vectors of pointers too). At some point we // can consider allowing spills of smaller values to larger slots // (i.e. change the '==' in the assert below to a '>='). - auto *MFI = Builder.DAG.getMachineFunction().getFrameInfo(); - assert((MFI->getObjectSize(Index) * 8) == - Incoming.getValueType().getSizeInBits() && + MachineFrameInfo &MFI = Builder.DAG.getMachineFunction().getFrameInfo(); + assert((MFI.getObjectSize(Index) * 8) == Incoming.getValueSizeInBits() && "Bad spill: stack slot does not match!"); #endif @@ -370,7 +374,7 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain, /// Lower a single value incoming to a statepoint node. This value can be /// either a deopt value or a gc value, the handling is the same. We special /// case constants and allocas, then fall back to spilling if required. -static void lowerIncomingStatepointValue(SDValue Incoming, +static void lowerIncomingStatepointValue(SDValue Incoming, bool LiveInOnly, SmallVectorImpl<SDValue> &Ops, SelectionDAGBuilder &Builder) { SDValue Chain = Builder.getRoot(); @@ -389,6 +393,14 @@ static void lowerIncomingStatepointValue(SDValue Incoming, // relocate the address of the alloca itself?) Ops.push_back(Builder.DAG.getTargetFrameIndex(FI->getIndex(), Incoming.getValueType())); + } else if (LiveInOnly) { + // If this value is live in (not live-on-return, or live-through), we can + // treat it the same way patchpoint treats it's "live in" values. We'll + // end up folding some of these into stack references, but they'll be + // handled by the register allocator. Note that we do not have the notion + // of a late use so these values might be placed in registers which are + // clobbered by the call. This is fine for live-in. + Ops.push_back(Incoming); } else { // Otherwise, locate a spill slot and explicitly spill it so it // can be found by the runtime later. We currently do not support @@ -439,19 +451,38 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, "non gc managed derived pointer found in statepoint"); } } + assert(SI.Bases.size() == SI.Ptrs.size() && "Pointer without base!"); } else { assert(SI.Bases.empty() && "No gc specified, so cannot relocate pointers!"); assert(SI.Ptrs.empty() && "No gc specified, so cannot relocate pointers!"); } #endif + // Figure out what lowering strategy we're going to use for each part + // Note: Is is conservatively correct to lower both "live-in" and "live-out" + // as "live-through". A "live-through" variable is one which is "live-in", + // "live-out", and live throughout the lifetime of the call (i.e. we can find + // it from any PC within the transitive callee of the statepoint). In + // particular, if the callee spills callee preserved registers we may not + // be able to find a value placed in that register during the call. This is + // fine for live-out, but not for live-through. If we were willing to make + // assumptions about the code generator producing the callee, we could + // potentially allow live-through values in callee saved registers. + const bool LiveInDeopt = + SI.StatepointFlags & (uint64_t)StatepointFlags::DeoptLiveIn; + + auto isGCValue =[&](const Value *V) { + return is_contained(SI.Ptrs, V) || is_contained(SI.Bases, V); + }; + // Before we actually start lowering (and allocating spill slots for values), // reserve any stack slots which we judge to be profitable to reuse for a // particular value. This is purely an optimization over the code below and // doesn't change semantics at all. It is important for performance that we // reserve slots for both deopt and gc values before lowering either. for (const Value *V : SI.DeoptState) { - reservePreviousStackSlotForValue(V, Builder); + if (!LiveInDeopt || isGCValue(V)) + reservePreviousStackSlotForValue(V, Builder); } for (unsigned i = 0; i < SI.Bases.size(); ++i) { reservePreviousStackSlotForValue(SI.Bases[i], Builder); @@ -468,7 +499,8 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, // what type of values are contained within. for (const Value *V : SI.DeoptState) { SDValue Incoming = Builder.getValue(V); - lowerIncomingStatepointValue(Incoming, Ops, Builder); + const bool LiveInValue = LiveInDeopt && !isGCValue(V); + lowerIncomingStatepointValue(Incoming, LiveInValue, Ops, Builder); } // Finally, go ahead and lower all the gc arguments. There's no prefixed @@ -478,10 +510,12 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, // (base[0], ptr[0], base[1], ptr[1], ...) for (unsigned i = 0; i < SI.Bases.size(); ++i) { const Value *Base = SI.Bases[i]; - lowerIncomingStatepointValue(Builder.getValue(Base), Ops, Builder); + lowerIncomingStatepointValue(Builder.getValue(Base), /*LiveInOnly*/ false, + Ops, Builder); const Value *Ptr = SI.Ptrs[i]; - lowerIncomingStatepointValue(Builder.getValue(Ptr), Ops, Builder); + lowerIncomingStatepointValue(Builder.getValue(Ptr), /*LiveInOnly*/ false, + Ops, Builder); } // If there are any explicit spill slots passed to the statepoint, record @@ -889,7 +923,7 @@ void SelectionDAGBuilder::visitGCResult(const GCResultInst &CI) { void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) { #ifndef NDEBUG // Consistency check - // We skip this check for relocates not in the same basic block as thier + // We skip this check for relocates not in the same basic block as their // statepoint. It would be too expensive to preserve validation info through // different basic blocks. if (Relocate.getStatepoint()->getParent() == Relocate.getParent()) diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 806646f..690f0d2 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -216,7 +216,7 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, case ISD::SETUEQ: LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : (VT == MVT::f64) ? RTLIB::UO_F64 : - (VT == MVT::f128) ? RTLIB::UO_F64 : RTLIB::UO_PPCF128; + (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128; LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : (VT == MVT::f64) ? RTLIB::OEQ_F64 : (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128; @@ -418,6 +418,58 @@ bool TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op, return false; } +bool +TargetLowering::TargetLoweringOpt::SimplifyDemandedBits(SDNode *User, + unsigned OpIdx, + const APInt &Demanded, + DAGCombinerInfo &DCI) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue Op = User->getOperand(OpIdx); + APInt KnownZero, KnownOne; + + if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, + *this, 0, true)) + return false; + + + // Old will not always be the same as Op. For example: + // + // Demanded = 0xffffff + // Op = i64 truncate (i32 and x, 0xffffff) + // In this case simplify demand bits will want to replace the 'and' node + // with the value 'x', which will give us: + // Old = i32 and x, 0xffffff + // New = x + if (Old.hasOneUse()) { + // For the one use case, we just commit the change. + DCI.CommitTargetLoweringOpt(*this); + return true; + } + + // If Old has more than one use then it must be Op, because the + // AssumeSingleUse flag is not propogated to recursive calls of + // SimplifyDemanded bits, so the only node with multiple use that + // it will attempt to combine will be opt. + assert(Old == Op); + + SmallVector <SDValue, 4> NewOps; + for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { + if (i == OpIdx) { + NewOps.push_back(New); + continue; + } + NewOps.push_back(User->getOperand(i)); + } + DAG.UpdateNodeOperands(User, NewOps); + // Op has less users now, so we may be able to perform additional combines + // with it. + DCI.AddToWorklist(Op.getNode()); + // User's operands have been updated, so we may be able to do new combines + // with it. + DCI.AddToWorklist(User); + return true; +} + /// Look at Op. At this point, we know that only the DemandedMask bits of the /// result of Op are ever used downstream. If we can use this information to /// simplify Op, create a new simplified DAG node and return true, returning the @@ -430,9 +482,10 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, APInt &KnownZero, APInt &KnownOne, TargetLoweringOpt &TLO, - unsigned Depth) const { + unsigned Depth, + bool AssumeSingleUse) const { unsigned BitWidth = DemandedMask.getBitWidth(); - assert(Op.getValueType().getScalarType().getSizeInBits() == BitWidth && + assert(Op.getScalarValueSizeInBits() == BitWidth && "Mask size mismatches value type size!"); APInt NewMask = DemandedMask; SDLoc dl(Op); @@ -442,7 +495,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, KnownZero = KnownOne = APInt(BitWidth, 0); // Other users may use these bits. - if (!Op.getNode()->hasOneUse()) { + if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) { if (Depth != 0) { // If not at the root, Just compute the KnownZero/KnownOne bits to // simplify things downstream. @@ -468,22 +521,63 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue(); KnownZero = ~KnownOne; return false; // Don't fall through, will infinitely loop. + case ISD::BUILD_VECTOR: + // Collect the known bits that are shared by every constant vector element. + KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth); + for (SDValue SrcOp : Op->ops()) { + if (!isa<ConstantSDNode>(SrcOp)) { + // We can only handle all constant values - bail out with no known bits. + KnownZero = KnownOne = APInt(BitWidth, 0); + return false; + } + KnownOne2 = cast<ConstantSDNode>(SrcOp)->getAPIntValue(); + KnownZero2 = ~KnownOne2; + + // BUILD_VECTOR can implicitly truncate sources, we must handle this. + if (KnownOne2.getBitWidth() != BitWidth) { + assert(KnownOne2.getBitWidth() > BitWidth && + KnownZero2.getBitWidth() > BitWidth && + "Expected BUILD_VECTOR implicit truncation"); + KnownOne2 = KnownOne2.trunc(BitWidth); + KnownZero2 = KnownZero2.trunc(BitWidth); + } + + // Known bits are the values that are shared by every element. + // TODO: support per-element known bits. + KnownOne &= KnownOne2; + KnownZero &= KnownZero2; + } + return false; // Don't fall through, will infinitely loop. case ISD::AND: // If the RHS is a constant, check to see if the LHS would be zero without // using the bits from the RHS. Below, we use knowledge about the RHS to // simplify the LHS, here we're using information from the LHS to simplify // the RHS. if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + SDValue Op0 = Op.getOperand(0); APInt LHSZero, LHSOne; // Do not increment Depth here; that can cause an infinite loop. - TLO.DAG.computeKnownBits(Op.getOperand(0), LHSZero, LHSOne, Depth); + TLO.DAG.computeKnownBits(Op0, LHSZero, LHSOne, Depth); // If the LHS already has zeros where RHSC does, this and is dead. if ((LHSZero & NewMask) == (~RHSC->getAPIntValue() & NewMask)) - return TLO.CombineTo(Op, Op.getOperand(0)); + return TLO.CombineTo(Op, Op0); + // If any of the set bits in the RHS are known zero on the LHS, shrink // the constant. if (TLO.ShrinkDemandedConstant(Op, ~LHSZero & NewMask)) return true; + + // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its + // constant, but if this 'and' is only clearing bits that were just set by + // the xor, then this 'and' can be eliminated by shrinking the mask of + // the xor. For example, for a 32-bit X: + // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1 + if (isBitwiseNot(Op0) && Op0.hasOneUse() && + LHSOne == ~RHSC->getAPIntValue()) { + SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, Op.getValueType(), + Op0.getOperand(0), Op.getOperand(1)); + return TLO.CombineTo(Op, Xor); + } } if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero, @@ -599,10 +693,10 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // If the RHS is a constant, see if we can simplify it. // for XOR, we prefer to force bits to 1 if they will make a -1. - // if we can't force bits, try to shrink constant + // If we can't force bits, try to shrink the constant. if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { APInt Expanded = C->getAPIntValue() | (~NewMask); - // if we can expand it to have all bits set, do it + // If we can expand it to have all bits set, do it. if (Expanded.isAllOnesValue()) { if (Expanded != C->getAPIntValue()) { EVT VT = Op.getValueType(); @@ -610,7 +704,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, TLO.DAG.getConstant(Expanded, dl, VT)); return TLO.CombineTo(Op, New); } - // if it already has all the bits set, nothing to change + // If it already has all the bits set, nothing to change // but don't shrink either! } else if (TLO.ShrinkDemandedConstant(Op, NewMask)) { return true; @@ -823,7 +917,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // demand the input sign bit. APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt); if (HighBits.intersects(NewMask)) - InDemandedMask |= APInt::getSignBit(VT.getScalarType().getSizeInBits()); + InDemandedMask |= APInt::getSignBit(VT.getScalarSizeInBits()); if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask, KnownZero, KnownOne, TLO, Depth+1)) @@ -866,9 +960,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, APInt MsbMask = APInt::getHighBitsSet(BitWidth, 1); // If we only care about the highest bit, don't bother shifting right. if (MsbMask == NewMask) { - unsigned ShAmt = ExVT.getScalarType().getSizeInBits(); + unsigned ShAmt = ExVT.getScalarSizeInBits(); SDValue InOp = Op.getOperand(0); - unsigned VTBits = Op->getValueType(0).getScalarType().getSizeInBits(); + unsigned VTBits = Op->getValueType(0).getScalarSizeInBits(); bool AlreadySignExtended = TLO.DAG.ComputeNumSignBits(InOp) >= VTBits-ShAmt+1; // However if the input is already sign extended we expect the sign @@ -892,17 +986,17 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // present in the input. APInt NewBits = APInt::getHighBitsSet(BitWidth, - BitWidth - ExVT.getScalarType().getSizeInBits()); + BitWidth - ExVT.getScalarSizeInBits()); // If none of the extended bits are demanded, eliminate the sextinreg. if ((NewBits & NewMask) == 0) return TLO.CombineTo(Op, Op.getOperand(0)); APInt InSignBit = - APInt::getSignBit(ExVT.getScalarType().getSizeInBits()).zext(BitWidth); + APInt::getSignBit(ExVT.getScalarSizeInBits()).zext(BitWidth); APInt InputDemandedBits = APInt::getLowBitsSet(BitWidth, - ExVT.getScalarType().getSizeInBits()) & + ExVT.getScalarSizeInBits()) & NewMask; // Since the sign extended bits are demanded, we know that the sign @@ -919,8 +1013,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // If the input sign bit is known zero, convert this into a zero extension. if (KnownZero.intersects(InSignBit)) - return TLO.CombineTo(Op, - TLO.DAG.getZeroExtendInReg(Op.getOperand(0),dl,ExVT)); + return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg( + Op.getOperand(0), dl, ExVT.getScalarType())); if (KnownOne.intersects(InSignBit)) { // Input sign bit known set KnownOne |= NewBits; @@ -957,8 +1051,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, break; } case ISD::ZERO_EXTEND: { - unsigned OperandBitWidth = - Op.getOperand(0).getValueType().getScalarType().getSizeInBits(); + unsigned OperandBitWidth = Op.getOperand(0).getScalarValueSizeInBits(); APInt InMask = NewMask.trunc(OperandBitWidth); // If none of the top bits are demanded, convert this into an any_extend. @@ -980,7 +1073,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, } case ISD::SIGN_EXTEND: { EVT InVT = Op.getOperand(0).getValueType(); - unsigned InBits = InVT.getScalarType().getSizeInBits(); + unsigned InBits = InVT.getScalarSizeInBits(); APInt InMask = APInt::getLowBitsSet(BitWidth, InBits); APInt InSignBit = APInt::getBitsSet(BitWidth, InBits - 1, InBits); APInt NewBits = ~InMask & NewMask; @@ -1020,8 +1113,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, break; } case ISD::ANY_EXTEND: { - unsigned OperandBitWidth = - Op.getOperand(0).getValueType().getScalarType().getSizeInBits(); + unsigned OperandBitWidth = Op.getOperand(0).getScalarValueSizeInBits(); APInt InMask = NewMask.trunc(OperandBitWidth); if (SimplifyDemandedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, TLO, Depth+1)) @@ -1034,8 +1126,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, case ISD::TRUNCATE: { // Simplify the input, using demanded bit information, and compute the known // zero/one bits live out. - unsigned OperandBitWidth = - Op.getOperand(0).getValueType().getScalarType().getSizeInBits(); + unsigned OperandBitWidth = Op.getOperand(0).getScalarValueSizeInBits(); APInt TruncMask = NewMask.zext(OperandBitWidth); if (SimplifyDemandedBits(Op.getOperand(0), TruncMask, KnownZero, KnownOne, TLO, Depth+1)) @@ -1109,7 +1200,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (!TLO.LegalOperations() && !Op.getValueType().isVector() && !Op.getOperand(0).getValueType().isVector() && - NewMask == APInt::getSignBit(Op.getValueType().getSizeInBits()) && + NewMask == APInt::getSignBit(Op.getValueSizeInBits()) && Op.getOperand(0).getValueType().isFloatingPoint()) { bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, Op.getValueType()); bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32); @@ -1120,10 +1211,10 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // Make a FGETSIGN + SHL to move the sign bit into the appropriate // place. We expect the SHL to be eliminated by other optimizations. SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Op.getOperand(0)); - unsigned OpVTSizeInBits = Op.getValueType().getSizeInBits(); + unsigned OpVTSizeInBits = Op.getValueSizeInBits(); if (!OpVTLegal && OpVTSizeInBits > 32) Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, Op.getValueType(), Sign); - unsigned ShVal = Op.getValueType().getSizeInBits()-1; + unsigned ShVal = Op.getValueSizeInBits() - 1; SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, Op.getValueType()); return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, Op.getValueType(), @@ -1139,16 +1230,27 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - NewMask.countLeadingZeros()); if (SimplifyDemandedBits(Op.getOperand(0), LoMask, KnownZero2, - KnownOne2, TLO, Depth+1)) - return true; - if (SimplifyDemandedBits(Op.getOperand(1), LoMask, KnownZero2, - KnownOne2, TLO, Depth+1)) - return true; - // See if the operation should be performed at a smaller bit width. - if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + KnownOne2, TLO, Depth+1) || + SimplifyDemandedBits(Op.getOperand(1), LoMask, KnownZero2, + KnownOne2, TLO, Depth+1) || + // See if the operation should be performed at a smaller bit width. + TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) { + const SDNodeFlags *Flags = Op.getNode()->getFlags(); + if (Flags->hasNoSignedWrap() || Flags->hasNoUnsignedWrap()) { + // Disable the nsw and nuw flags. We can no longer guarantee that we + // won't wrap after simplification. + SDNodeFlags NewFlags = *Flags; + NewFlags.setNoSignedWrap(false); + NewFlags.setNoUnsignedWrap(false); + SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), + Op.getOperand(0), Op.getOperand(1), + &NewFlags); + return TLO.CombineTo(Op, NewOp); + } return true; + } + LLVM_FALLTHROUGH; } - // FALL THROUGH default: // Just use computeKnownBits to compute output bits. TLO.DAG.computeKnownBits(Op, KnownZero, KnownOne, Depth); @@ -1214,11 +1316,11 @@ bool TargetLowering::isConstTrueVal(const SDNode *N) const { if (!BV) return false; - BitVector UndefElements; - CN = BV->getConstantSplatNode(&UndefElements); - // Only interested in constant splats, and we don't try to handle undef - // elements in identifying boolean constants. - if (!CN || UndefElements.none()) + // Only interested in constant splats, we don't care about undef + // elements in identifying boolean constants and getConstantSplatNode + // returns NULL if all ops are undef; + CN = BV->getConstantSplatNode(); + if (!CN) return false; } @@ -1254,11 +1356,11 @@ bool TargetLowering::isConstFalseVal(const SDNode *N) const { if (!BV) return false; - BitVector UndefElements; - CN = BV->getConstantSplatNode(&UndefElements); - // Only interested in constant splats, and we don't try to handle undef - // elements in identifying boolean constants. - if (!CN || UndefElements.none()) + // Only interested in constant splats, we don't care about undef + // elements in identifying boolean constants and getConstantSplatNode + // returns NULL if all ops are undef; + CN = BV->getConstantSplatNode(); + if (!CN) return false; } @@ -1390,7 +1492,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, const APInt &ShAmt = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && - ShAmt == Log2_32(N0.getValueType().getSizeInBits())) { + ShAmt == Log2_32(N0.getValueSizeInBits())) { if ((C1 == 0) == (Cond == ISD::SETEQ)) { // (srl (ctlz x), 5) == 0 -> X != 0 // (srl (ctlz x), 5) != 1 -> X != 0 @@ -1412,8 +1514,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, CTPOP = N0.getOperand(0); if (CTPOP.hasOneUse() && CTPOP.getOpcode() == ISD::CTPOP && - (N0 == CTPOP || N0.getValueType().getSizeInBits() > - Log2_32_Ceil(CTPOP.getValueType().getSizeInBits()))) { + (N0 == CTPOP || + N0.getValueSizeInBits() > Log2_32_Ceil(CTPOP.getValueSizeInBits()))) { EVT CTVT = CTPOP.getValueType(); SDValue CTOp = CTPOP.getOperand(0); @@ -1478,6 +1580,10 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (isTypeDesirableForOp(ISD::SETCC, MinVT)) { // Will get folded away. SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt); + if (MinBits == 1 && C1 == 1) + // Invert the condition. + return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1), + Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ); SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT); return DAG.getSetCC(dl, VT, Trunc, C, Cond); } @@ -1530,7 +1636,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, APInt bestMask; unsigned bestWidth = 0, bestOffset = 0; if (!Lod->isVolatile() && Lod->isUnindexed()) { - unsigned origWidth = N0.getValueType().getSizeInBits(); + unsigned origWidth = N0.getValueSizeInBits(); unsigned maskWidth = origWidth; // We can narrow (e.g.) 16-bit extending loads on 32-bit target to // 8 bits, but have to be careful... @@ -1577,7 +1683,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // If the LHS is a ZERO_EXTEND, perform the comparison on the input. if (N0.getOpcode() == ISD::ZERO_EXTEND) { - unsigned InSize = N0.getOperand(0).getValueType().getSizeInBits(); + unsigned InSize = N0.getOperand(0).getValueSizeInBits(); // If the comparison constant has bits in the upper part, the // zero-extended value could never match. @@ -2297,7 +2403,7 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, Ops.push_back(Op); return; } - // fall through + LLVM_FALLTHROUGH; case 'i': // Simple Integer or Relocatable Constant case 'n': // Simple Integer case 's': { // Relocatable Constant @@ -2946,7 +3052,7 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor, Q = SDValue(DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), Q, DAG.getConstant(magics.m, dl, VT)).getNode(), 1); else - return SDValue(); // No mulhu or equvialent + return SDValue(); // No mulhu or equivalent Created->push_back(Q.getNode()); @@ -2987,108 +3093,190 @@ verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const { // Legalization Utilities //===----------------------------------------------------------------------===// -bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, - SelectionDAG &DAG, SDValue LL, SDValue LH, - SDValue RL, SDValue RH) const { - EVT VT = N->getValueType(0); - SDLoc dl(N); - - bool HasMULHS = isOperationLegalOrCustom(ISD::MULHS, HiLoVT); - bool HasMULHU = isOperationLegalOrCustom(ISD::MULHU, HiLoVT); - bool HasSMUL_LOHI = isOperationLegalOrCustom(ISD::SMUL_LOHI, HiLoVT); - bool HasUMUL_LOHI = isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT); - if (HasMULHU || HasMULHS || HasUMUL_LOHI || HasSMUL_LOHI) { - unsigned OuterBitSize = VT.getSizeInBits(); - unsigned InnerBitSize = HiLoVT.getSizeInBits(); - unsigned LHSSB = DAG.ComputeNumSignBits(N->getOperand(0)); - unsigned RHSSB = DAG.ComputeNumSignBits(N->getOperand(1)); - - // LL, LH, RL, and RH must be either all NULL or all set to a value. - assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) || - (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode())); - - if (!LL.getNode() && !RL.getNode() && - isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) { - LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, N->getOperand(0)); - RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, N->getOperand(1)); - } - - if (!LL.getNode()) - return false; +bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, SDLoc dl, + SDValue LHS, SDValue RHS, + SmallVectorImpl<SDValue> &Result, + EVT HiLoVT, SelectionDAG &DAG, + MulExpansionKind Kind, SDValue LL, + SDValue LH, SDValue RL, SDValue RH) const { + assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI || + Opcode == ISD::SMUL_LOHI); + + bool HasMULHS = (Kind == MulExpansionKind::Always) || + isOperationLegalOrCustom(ISD::MULHS, HiLoVT); + bool HasMULHU = (Kind == MulExpansionKind::Always) || + isOperationLegalOrCustom(ISD::MULHU, HiLoVT); + bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) || + isOperationLegalOrCustom(ISD::SMUL_LOHI, HiLoVT); + bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) || + isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT); + + if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI) + return false; - APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize); - if (DAG.MaskedValueIsZero(N->getOperand(0), HighMask) && - DAG.MaskedValueIsZero(N->getOperand(1), HighMask)) { - // The inputs are both zero-extended. - if (HasUMUL_LOHI) { - // We can emit a umul_lohi. - Lo = DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(HiLoVT, HiLoVT), LL, - RL); - Hi = SDValue(Lo.getNode(), 1); - return true; - } - if (HasMULHU) { - // We can emit a mulhu+mul. - Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RL); - Hi = DAG.getNode(ISD::MULHU, dl, HiLoVT, LL, RL); - return true; - } + unsigned OuterBitSize = VT.getScalarSizeInBits(); + unsigned InnerBitSize = HiLoVT.getScalarSizeInBits(); + unsigned LHSSB = DAG.ComputeNumSignBits(LHS); + unsigned RHSSB = DAG.ComputeNumSignBits(RHS); + + // LL, LH, RL, and RH must be either all NULL or all set to a value. + assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) || + (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode())); + + SDVTList VTs = DAG.getVTList(HiLoVT, HiLoVT); + auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi, + bool Signed) -> bool { + if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) { + Lo = DAG.getNode(Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, dl, VTs, L, R); + Hi = SDValue(Lo.getNode(), 1); + return true; } - if (LHSSB > InnerBitSize && RHSSB > InnerBitSize) { - // The input values are both sign-extended. - if (HasSMUL_LOHI) { - // We can emit a smul_lohi. - Lo = DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(HiLoVT, HiLoVT), LL, - RL); - Hi = SDValue(Lo.getNode(), 1); - return true; - } - if (HasMULHS) { - // We can emit a mulhs+mul. - Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RL); - Hi = DAG.getNode(ISD::MULHS, dl, HiLoVT, LL, RL); - return true; - } + if ((Signed && HasMULHS) || (!Signed && HasMULHU)) { + Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, L, R); + Hi = DAG.getNode(Signed ? ISD::MULHS : ISD::MULHU, dl, HiLoVT, L, R); + return true; } + return false; + }; - if (!LH.getNode() && !RH.getNode() && - isOperationLegalOrCustom(ISD::SRL, VT) && - isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) { - auto &DL = DAG.getDataLayout(); - unsigned ShiftAmt = VT.getSizeInBits() - HiLoVT.getSizeInBits(); - SDValue Shift = DAG.getConstant(ShiftAmt, dl, getShiftAmountTy(VT, DL)); - LH = DAG.getNode(ISD::SRL, dl, VT, N->getOperand(0), Shift); - LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH); - RH = DAG.getNode(ISD::SRL, dl, VT, N->getOperand(1), Shift); - RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH); - } + SDValue Lo, Hi; - if (!LH.getNode()) - return false; + if (!LL.getNode() && !RL.getNode() && + isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) { + LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LHS); + RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RHS); + } + + if (!LL.getNode()) + return false; - if (HasUMUL_LOHI) { - // Lo,Hi = umul LHS, RHS. - SDValue UMulLOHI = DAG.getNode(ISD::UMUL_LOHI, dl, - DAG.getVTList(HiLoVT, HiLoVT), LL, RL); - Lo = UMulLOHI; - Hi = UMulLOHI.getValue(1); - RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH); - LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL); - Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH); - Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH); + APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize); + if (DAG.MaskedValueIsZero(LHS, HighMask) && + DAG.MaskedValueIsZero(RHS, HighMask)) { + // The inputs are both zero-extended. + if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) { + Result.push_back(Lo); + Result.push_back(Hi); + if (Opcode != ISD::MUL) { + SDValue Zero = DAG.getConstant(0, dl, HiLoVT); + Result.push_back(Zero); + Result.push_back(Zero); + } return true; } - if (HasMULHU) { - Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RL); - Hi = DAG.getNode(ISD::MULHU, dl, HiLoVT, LL, RL); - RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH); - LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL); - Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH); - Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH); + } + + if (!VT.isVector() && Opcode == ISD::MUL && LHSSB > InnerBitSize && + RHSSB > InnerBitSize) { + // The input values are both sign-extended. + // TODO non-MUL case? + if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) { + Result.push_back(Lo); + Result.push_back(Hi); return true; } } - return false; + + unsigned ShiftAmount = OuterBitSize - InnerBitSize; + EVT ShiftAmountTy = getShiftAmountTy(VT, DAG.getDataLayout()); + if (APInt::getMaxValue(ShiftAmountTy.getSizeInBits()).ult(ShiftAmount)) { + // FIXME getShiftAmountTy does not always return a sensible result when VT + // is an illegal type, and so the type may be too small to fit the shift + // amount. Override it with i32. The shift will have to be legalized. + ShiftAmountTy = MVT::i32; + } + SDValue Shift = DAG.getConstant(ShiftAmount, dl, ShiftAmountTy); + + if (!LH.getNode() && !RH.getNode() && + isOperationLegalOrCustom(ISD::SRL, VT) && + isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) { + LH = DAG.getNode(ISD::SRL, dl, VT, LHS, Shift); + LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH); + RH = DAG.getNode(ISD::SRL, dl, VT, RHS, Shift); + RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH); + } + + if (!LH.getNode()) + return false; + + if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false)) + return false; + + Result.push_back(Lo); + + if (Opcode == ISD::MUL) { + RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH); + LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL); + Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH); + Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH); + Result.push_back(Hi); + return true; + } + + // Compute the full width result. + auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue { + Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo); + Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi); + Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift); + return DAG.getNode(ISD::OR, dl, VT, Lo, Hi); + }; + + SDValue Next = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi); + if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false)) + return false; + + // This is effectively the add part of a multiply-add of half-sized operands, + // so it cannot overflow. + Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi)); + + if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false)) + return false; + + Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next, + Merge(Lo, Hi)); + + SDValue Carry = Next.getValue(1); + Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next)); + Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift); + + if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI)) + return false; + + SDValue Zero = DAG.getConstant(0, dl, HiLoVT); + Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero, + Carry); + Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi)); + + if (Opcode == ISD::SMUL_LOHI) { + SDValue NextSub = DAG.getNode(ISD::SUB, dl, VT, Next, + DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RL)); + Next = DAG.getSelectCC(dl, LH, Zero, NextSub, Next, ISD::SETLT); + + NextSub = DAG.getNode(ISD::SUB, dl, VT, Next, + DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LL)); + Next = DAG.getSelectCC(dl, RH, Zero, NextSub, Next, ISD::SETLT); + } + + Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next)); + Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift); + Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next)); + return true; +} + +bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, + SelectionDAG &DAG, MulExpansionKind Kind, + SDValue LL, SDValue LH, SDValue RL, + SDValue RH) const { + SmallVector<SDValue, 2> Result; + bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), N, + N->getOperand(0), N->getOperand(1), Result, HiLoVT, + DAG, Kind, LL, LH, RL, RH); + if (Ok) { + assert(Result.size() == 2); + Lo = Result[0]; + Hi = Result[1]; + } + return Ok; } bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result, @@ -3190,7 +3378,7 @@ SDValue TargetLowering::scalarizeVectorLoad(LoadSDNode *LD, } SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains); - SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, SL, LD->getValueType(0), Vals); + SDValue Value = DAG.getBuildVector(LD->getValueType(0), SL, Vals); return DAG.getMergeValues({ Value, NewChain }, SL); } @@ -3518,6 +3706,81 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST, return Result; } +SDValue +TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask, + const SDLoc &DL, EVT DataVT, + SelectionDAG &DAG, + bool IsCompressedMemory) const { + SDValue Increment; + EVT AddrVT = Addr.getValueType(); + EVT MaskVT = Mask.getValueType(); + assert(DataVT.getVectorNumElements() == MaskVT.getVectorNumElements() && + "Incompatible types of Data and Mask"); + if (IsCompressedMemory) { + // Incrementing the pointer according to number of '1's in the mask. + EVT MaskIntVT = EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits()); + SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask); + if (MaskIntVT.getSizeInBits() < 32) { + MaskInIntReg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg); + MaskIntVT = MVT::i32; + } + + // Count '1's with POPCNT. + Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg); + Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT); + // Scale is an element size in bytes. + SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL, + AddrVT); + Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale); + } else + Increment = DAG.getConstant(DataVT.getSizeInBits() / 8, DL, AddrVT); + + return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment); +} + +static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, + SDValue Idx, + EVT VecVT, + const SDLoc &dl) { + if (isa<ConstantSDNode>(Idx)) + return Idx; + + EVT IdxVT = Idx.getValueType(); + unsigned NElts = VecVT.getVectorNumElements(); + if (isPowerOf2_32(NElts)) { + APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(), + Log2_32(NElts)); + return DAG.getNode(ISD::AND, dl, IdxVT, Idx, + DAG.getConstant(Imm, dl, IdxVT)); + } + + return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx, + DAG.getConstant(NElts - 1, dl, IdxVT)); +} + +SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG, + SDValue VecPtr, EVT VecVT, + SDValue Index) const { + SDLoc dl(Index); + // Make sure the index type is big enough to compute in. + Index = DAG.getZExtOrTrunc(Index, dl, getPointerTy(DAG.getDataLayout())); + + EVT EltVT = VecVT.getVectorElementType(); + + // Calculate the element offset and add it to the pointer. + unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size. + assert(EltSize * 8 == EltVT.getSizeInBits() && + "Converting bits to bytes lost precision"); + + Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl); + + EVT IdxVT = Index.getValueType(); + + Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index, + DAG.getConstant(EltSize, dl, IdxVT)); + return DAG.getNode(ISD::ADD, dl, IdxVT, Index, VecPtr); +} + //===----------------------------------------------------------------------===// // Implementation of Emulated TLS Model //===----------------------------------------------------------------------===// @@ -3550,11 +3813,36 @@ SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, // TLSADDR will be codegen'ed as call. Inform MFI that function has calls. // At last for X86 targets, maybe good for other targets too? - MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); - MFI->setAdjustsStack(true); // Is this only for X86 target? - MFI->setHasCalls(true); + MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); + MFI.setAdjustsStack(true); // Is this only for X86 target? + MFI.setHasCalls(true); assert((GA->getOffset() == 0) && "Emulated TLS must have zero offset in GlobalAddressSDNode"); return CallResult.first; } + +SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op, + SelectionDAG &DAG) const { + assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node."); + if (!isCtlzFast()) + return SDValue(); + ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); + SDLoc dl(Op); + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + if (C->isNullValue() && CC == ISD::SETEQ) { + EVT VT = Op.getOperand(0).getValueType(); + SDValue Zext = Op.getOperand(0); + if (VT.bitsLT(MVT::i32)) { + VT = MVT::i32; + Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0)); + } + unsigned Log2b = Log2_32(VT.getSizeInBits()); + SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext); + SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz, + DAG.getConstant(Log2b, dl, MVT::i32)); + return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc); + } + } + return SDValue(); +} diff --git a/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp b/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp index 1efc440..ff7d205 100644 --- a/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp +++ b/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp @@ -23,6 +23,7 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" +#include "llvm/Transforms/Utils/EscapeEnumerator.h" using namespace llvm; @@ -81,121 +82,6 @@ ShadowStackGCLowering::ShadowStackGCLowering() initializeShadowStackGCLoweringPass(*PassRegistry::getPassRegistry()); } -namespace { -/// EscapeEnumerator - This is a little algorithm to find all escape points -/// from a function so that "finally"-style code can be inserted. In addition -/// to finding the existing return and unwind instructions, it also (if -/// necessary) transforms any call instructions into invokes and sends them to -/// a landing pad. -/// -/// It's wrapped up in a state machine using the same transform C# uses for -/// 'yield return' enumerators, This transform allows it to be non-allocating. -class EscapeEnumerator { - Function &F; - const char *CleanupBBName; - - // State. - int State; - Function::iterator StateBB, StateE; - IRBuilder<> Builder; - -public: - EscapeEnumerator(Function &F, const char *N = "cleanup") - : F(F), CleanupBBName(N), State(0), Builder(F.getContext()) {} - - IRBuilder<> *Next() { - switch (State) { - default: - return nullptr; - - case 0: - StateBB = F.begin(); - StateE = F.end(); - State = 1; - - case 1: - // Find all 'return', 'resume', and 'unwind' instructions. - while (StateBB != StateE) { - BasicBlock *CurBB = &*StateBB++; - - // Branches and invokes do not escape, only unwind, resume, and return - // do. - TerminatorInst *TI = CurBB->getTerminator(); - if (!isa<ReturnInst>(TI) && !isa<ResumeInst>(TI)) - continue; - - Builder.SetInsertPoint(TI); - return &Builder; - } - - State = 2; - - // Find all 'call' instructions. - SmallVector<Instruction *, 16> Calls; - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) - for (BasicBlock::iterator II = BB->begin(), EE = BB->end(); II != EE; - ++II) - if (CallInst *CI = dyn_cast<CallInst>(II)) - if (!CI->getCalledFunction() || - !CI->getCalledFunction()->getIntrinsicID()) - Calls.push_back(CI); - - if (Calls.empty()) - return nullptr; - - // Create a cleanup block. - LLVMContext &C = F.getContext(); - BasicBlock *CleanupBB = BasicBlock::Create(C, CleanupBBName, &F); - Type *ExnTy = - StructType::get(Type::getInt8PtrTy(C), Type::getInt32Ty(C), nullptr); - if (!F.hasPersonalityFn()) { - Constant *PersFn = F.getParent()->getOrInsertFunction( - "__gcc_personality_v0", - FunctionType::get(Type::getInt32Ty(C), true)); - F.setPersonalityFn(PersFn); - } - LandingPadInst *LPad = - LandingPadInst::Create(ExnTy, 1, "cleanup.lpad", CleanupBB); - LPad->setCleanup(true); - ResumeInst *RI = ResumeInst::Create(LPad, CleanupBB); - - // Transform the 'call' instructions into 'invoke's branching to the - // cleanup block. Go in reverse order to make prettier BB names. - SmallVector<Value *, 16> Args; - for (unsigned I = Calls.size(); I != 0;) { - CallInst *CI = cast<CallInst>(Calls[--I]); - - // Split the basic block containing the function call. - BasicBlock *CallBB = CI->getParent(); - BasicBlock *NewBB = CallBB->splitBasicBlock( - CI->getIterator(), CallBB->getName() + ".cont"); - - // Remove the unconditional branch inserted at the end of CallBB. - CallBB->getInstList().pop_back(); - NewBB->getInstList().remove(CI); - - // Create a new invoke instruction. - Args.clear(); - CallSite CS(CI); - Args.append(CS.arg_begin(), CS.arg_end()); - - InvokeInst *II = - InvokeInst::Create(CI->getCalledValue(), NewBB, CleanupBB, Args, - CI->getName(), CallBB); - II->setCallingConv(CI->getCallingConv()); - II->setAttributes(CI->getAttributes()); - CI->replaceAllUsesWith(II); - delete CI; - } - - Builder.SetInsertPoint(RI); - return &Builder; - } - } -}; -} - - Constant *ShadowStackGCLowering::GetFrameMap(Function &F) { // doInitialization creates the abstract type of this value. Type *VoidPtr = Type::getInt8PtrTy(F.getContext()); diff --git a/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp b/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp index d361a6c..4837495 100644 --- a/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp +++ b/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp @@ -199,9 +199,7 @@ public: MachineFunctionPass::getAnalysisUsage(AU); } - const char *getPassName() const override { - return "Shrink Wrapping analysis"; - } + StringRef getPassName() const override { return "Shrink Wrapping analysis"; } /// \brief Perform the shrink-wrapping analysis and update /// the MachineFrameInfo attached to \p MF with the results. @@ -256,8 +254,8 @@ bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI, /// \brief Helper function to find the immediate (post) dominator. template <typename ListOfBBs, typename DominanceAnalysis> -MachineBasicBlock *FindIDom(MachineBasicBlock &Block, ListOfBBs BBs, - DominanceAnalysis &Dom) { +static MachineBasicBlock *FindIDom(MachineBasicBlock &Block, ListOfBBs BBs, + DominanceAnalysis &Dom) { MachineBasicBlock *IDom = &Block; for (MachineBasicBlock *BB : BBs) { IDom = Dom.findNearestCommonDominator(IDom, BB); @@ -521,9 +519,9 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) { << ' ' << Save->getName() << "\nRestore: " << Restore->getNumber() << ' ' << Restore->getName() << '\n'); - MachineFrameInfo *MFI = MF.getFrameInfo(); - MFI->setSavePoint(Save); - MFI->setRestorePoint(Restore); + MachineFrameInfo &MFI = MF.getFrameInfo(); + MFI.setSavePoint(Save); + MFI.setRestorePoint(Restore); ++NumCandidates; return false; } diff --git a/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp index ce01c5f..209bbe5 100644 --- a/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp +++ b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp @@ -58,7 +58,7 @@ public: bool runOnFunction(Function &F) override; void getAnalysisUsage(AnalysisUsage &AU) const override {} - const char *getPassName() const override { + StringRef getPassName() const override { return "SJLJ Exception Handling preparation"; } diff --git a/contrib/llvm/lib/CodeGen/SplitKit.cpp b/contrib/llvm/lib/CodeGen/SplitKit.cpp index 07be24b..1c6a84e 100644 --- a/contrib/llvm/lib/CodeGen/SplitKit.cpp +++ b/contrib/llvm/lib/CodeGen/SplitKit.cpp @@ -381,9 +381,59 @@ LLVM_DUMP_METHOD void SplitEditor::dump() const { } #endif +LiveInterval::SubRange &SplitEditor::getSubRangeForMask(LaneBitmask LM, + LiveInterval &LI) { + for (LiveInterval::SubRange &S : LI.subranges()) + if (S.LaneMask == LM) + return S; + llvm_unreachable("SubRange for this mask not found"); +} + +void SplitEditor::addDeadDef(LiveInterval &LI, VNInfo *VNI, bool Original) { + if (!LI.hasSubRanges()) { + LI.createDeadDef(VNI); + return; + } + + SlotIndex Def = VNI->def; + if (Original) { + // If we are transferring a def from the original interval, make sure + // to only update the subranges for which the original subranges had + // a def at this location. + for (LiveInterval::SubRange &S : LI.subranges()) { + auto &PS = getSubRangeForMask(S.LaneMask, Edit->getParent()); + VNInfo *PV = PS.getVNInfoAt(Def); + if (PV != nullptr && PV->def == Def) + S.createDeadDef(Def, LIS.getVNInfoAllocator()); + } + } else { + // This is a new def: either from rematerialization, or from an inserted + // copy. Since rematerialization can regenerate a definition of a sub- + // register, we need to check which subranges need to be updated. + const MachineInstr *DefMI = LIS.getInstructionFromIndex(Def); + assert(DefMI != nullptr); + LaneBitmask LM; + for (const MachineOperand &DefOp : DefMI->defs()) { + unsigned R = DefOp.getReg(); + if (R != LI.reg) + continue; + if (unsigned SR = DefOp.getSubReg()) + LM |= TRI.getSubRegIndexLaneMask(SR); + else { + LM = MRI.getMaxLaneMaskForVReg(R); + break; + } + } + for (LiveInterval::SubRange &S : LI.subranges()) + if ((S.LaneMask & LM).any()) + S.createDeadDef(Def, LIS.getVNInfoAllocator()); + } +} + VNInfo *SplitEditor::defValue(unsigned RegIdx, const VNInfo *ParentVNI, - SlotIndex Idx) { + SlotIndex Idx, + bool Original) { assert(ParentVNI && "Mapping NULL value"); assert(Idx.isValid() && "Invalid SlotIndex"); assert(Edit->getParent().getVNInfoAt(Idx) == ParentVNI && "Bad Parent VNI"); @@ -392,28 +442,28 @@ VNInfo *SplitEditor::defValue(unsigned RegIdx, // Create a new value. VNInfo *VNI = LI->getNextValue(Idx, LIS.getVNInfoAllocator()); + bool Force = LI->hasSubRanges(); + ValueForcePair FP(Force ? nullptr : VNI, Force); // Use insert for lookup, so we can add missing values with a second lookup. std::pair<ValueMap::iterator, bool> InsP = - Values.insert(std::make_pair(std::make_pair(RegIdx, ParentVNI->id), - ValueForcePair(VNI, false))); + Values.insert(std::make_pair(std::make_pair(RegIdx, ParentVNI->id), FP)); - // This was the first time (RegIdx, ParentVNI) was mapped. - // Keep it as a simple def without any liveness. - if (InsP.second) + // This was the first time (RegIdx, ParentVNI) was mapped, and it is not + // forced. Keep it as a simple def without any liveness. + if (!Force && InsP.second) return VNI; // If the previous value was a simple mapping, add liveness for it now. if (VNInfo *OldVNI = InsP.first->second.getPointer()) { - SlotIndex Def = OldVNI->def; - LI->addSegment(LiveInterval::Segment(Def, Def.getDeadSlot(), OldVNI)); - // No longer a simple mapping. Switch to a complex, non-forced mapping. - InsP.first->second = ValueForcePair(); + addDeadDef(*LI, OldVNI, Original); + + // No longer a simple mapping. Switch to a complex mapping. If the + // interval has subranges, make it a forced mapping. + InsP.first->second = ValueForcePair(nullptr, Force); } // This is a complex mapping, add liveness for VNI - SlotIndex Def = VNI->def; - LI->addSegment(LiveInterval::Segment(Def, Def.getDeadSlot(), VNI)); - + addDeadDef(*LI, VNI, Original); return VNI; } @@ -431,9 +481,8 @@ void SplitEditor::forceRecompute(unsigned RegIdx, const VNInfo *ParentVNI) { // This was previously a single mapping. Make sure the old def is represented // by a trivial live range. - SlotIndex Def = VNI->def; - LiveInterval *LI = &LIS.getInterval(Edit->get(RegIdx)); - LI->addSegment(LiveInterval::Segment(Def, Def.getDeadSlot(), VNI)); + addDeadDef(LIS.getInterval(Edit->get(RegIdx)), VNI, false); + // Mark as complex mapped, forced. VFP = ValueForcePair(nullptr, true); } @@ -455,13 +504,18 @@ VNInfo *SplitEditor::defFromParent(unsigned RegIdx, unsigned Original = VRM.getOriginal(Edit->get(RegIdx)); LiveInterval &OrigLI = LIS.getInterval(Original); VNInfo *OrigVNI = OrigLI.getVNInfoAt(UseIdx); - LiveRangeEdit::Remat RM(ParentVNI); - RM.OrigMI = LIS.getInstructionFromIndex(OrigVNI->def); - if (Edit->canRematerializeAt(RM, OrigVNI, UseIdx, true)) { - Def = Edit->rematerializeAt(MBB, I, LI->reg, RM, TRI, Late); - ++NumRemats; - } else { + bool DidRemat = false; + if (OrigVNI) { + LiveRangeEdit::Remat RM(ParentVNI); + RM.OrigMI = LIS.getInstructionFromIndex(OrigVNI->def); + if (Edit->canRematerializeAt(RM, OrigVNI, UseIdx, true)) { + Def = Edit->rematerializeAt(MBB, I, LI->reg, RM, TRI, Late); + ++NumRemats; + DidRemat = true; + } + } + if (!DidRemat) { // Can't remat, just insert a copy from parent. CopyMI = BuildMI(MBB, I, DebugLoc(), TII.get(TargetOpcode::COPY), LI->reg) .addReg(Edit->getReg()); @@ -472,7 +526,7 @@ VNInfo *SplitEditor::defFromParent(unsigned RegIdx, } // Define the value in Reg. - return defValue(RegIdx, ParentVNI, Def); + return defValue(RegIdx, ParentVNI, Def, false); } /// Create a new virtual register and live interval. @@ -621,7 +675,7 @@ SlotIndex SplitEditor::leaveIntvAtTop(MachineBasicBlock &MBB) { } VNInfo *VNI = defFromParent(0, ParentVNI, Start, MBB, - MBB.SkipPHIsAndLabels(MBB.begin())); + MBB.SkipPHIsLabelsAndDebug(MBB.begin())); RegAssign.insert(Start, VNI->def, OpenIdx); DEBUG(dump()); return VNI->def; @@ -944,14 +998,15 @@ bool SplitEditor::transferValues() { } // The interval [Start;End) is continuously mapped to RegIdx, ParentVNI. - DEBUG(dbgs() << " [" << Start << ';' << End << ")=" << RegIdx); - LiveRange &LR = LIS.getInterval(Edit->get(RegIdx)); + DEBUG(dbgs() << " [" << Start << ';' << End << ")=" << RegIdx + << '(' << PrintReg(Edit->get(RegIdx)) << ')'); + LiveInterval &LI = LIS.getInterval(Edit->get(RegIdx)); // Check for a simply defined value that can be blitted directly. ValueForcePair VFP = Values.lookup(std::make_pair(RegIdx, ParentVNI->id)); if (VNInfo *VNI = VFP.getPointer()) { DEBUG(dbgs() << ':' << VNI->id); - LR.addSegment(LiveInterval::Segment(Start, End, VNI)); + LI.addSegment(LiveInterval::Segment(Start, End, VNI)); Start = End; continue; } @@ -975,7 +1030,7 @@ bool SplitEditor::transferValues() { // The first block may be live-in, or it may have its own def. if (Start != BlockStart) { - VNInfo *VNI = LR.extendInBlock(BlockStart, std::min(BlockEnd, End)); + VNInfo *VNI = LI.extendInBlock(BlockStart, std::min(BlockEnd, End)); assert(VNI && "Missing def for complex mapped value"); DEBUG(dbgs() << ':' << VNI->id << "*BB#" << MBB->getNumber()); // MBB has its own def. Is it also live-out? @@ -995,7 +1050,7 @@ bool SplitEditor::transferValues() { if (BlockStart == ParentVNI->def) { // This block has the def of a parent PHI, so it isn't live-in. assert(ParentVNI->isPHIDef() && "Non-phi defined at block start?"); - VNInfo *VNI = LR.extendInBlock(BlockStart, std::min(BlockEnd, End)); + VNInfo *VNI = LI.extendInBlock(BlockStart, std::min(BlockEnd, End)); assert(VNI && "Missing def for complex mapped parent PHI"); if (End >= BlockEnd) LRC.setLiveOutValue(&*MBB, VNI); // Live-out as well. @@ -1003,10 +1058,10 @@ bool SplitEditor::transferValues() { // This block needs a live-in value. The last block covered may not // be live-out. if (End < BlockEnd) - LRC.addLiveInBlock(LR, MDT[&*MBB], End); + LRC.addLiveInBlock(LI, MDT[&*MBB], End); else { // Live-through, and we don't know the value. - LRC.addLiveInBlock(LR, MDT[&*MBB]); + LRC.addLiveInBlock(LI, MDT[&*MBB]); LRC.setLiveOutValue(&*MBB, nullptr); } } @@ -1025,42 +1080,90 @@ bool SplitEditor::transferValues() { return Skipped; } +static bool removeDeadSegment(SlotIndex Def, LiveRange &LR) { + const LiveRange::Segment *Seg = LR.getSegmentContaining(Def); + if (Seg == nullptr) + return true; + if (Seg->end != Def.getDeadSlot()) + return false; + // This is a dead PHI. Remove it. + LR.removeSegment(*Seg, true); + return true; +} + +void SplitEditor::extendPHIRange(MachineBasicBlock &B, LiveRangeCalc &LRC, + LiveRange &LR, LaneBitmask LM, + ArrayRef<SlotIndex> Undefs) { + for (MachineBasicBlock *P : B.predecessors()) { + SlotIndex End = LIS.getMBBEndIdx(P); + SlotIndex LastUse = End.getPrevSlot(); + // The predecessor may not have a live-out value. That is OK, like an + // undef PHI operand. + LiveInterval &PLI = Edit->getParent(); + // Need the cast because the inputs to ?: would otherwise be deemed + // "incompatible": SubRange vs LiveInterval. + LiveRange &PSR = !LM.all() ? getSubRangeForMask(LM, PLI) + : static_cast<LiveRange&>(PLI); + if (PSR.liveAt(LastUse)) + LRC.extend(LR, End, /*PhysReg=*/0, Undefs); + } +} + void SplitEditor::extendPHIKillRanges() { // Extend live ranges to be live-out for successor PHI values. - for (const VNInfo *PHIVNI : Edit->getParent().valnos) { - if (PHIVNI->isUnused() || !PHIVNI->isPHIDef()) - continue; - unsigned RegIdx = RegAssign.lookup(PHIVNI->def); - LiveRange &LR = LIS.getInterval(Edit->get(RegIdx)); - - // Check whether PHI is dead. - const LiveRange::Segment *Segment = LR.getSegmentContaining(PHIVNI->def); - assert(Segment != nullptr && "Missing segment for VNI"); - if (Segment->end == PHIVNI->def.getDeadSlot()) { - // This is a dead PHI. Remove it. - LR.removeSegment(*Segment, true); + + // Visit each PHI def slot in the parent live interval. If the def is dead, + // remove it. Otherwise, extend the live interval to reach the end indexes + // of all predecessor blocks. + + LiveInterval &ParentLI = Edit->getParent(); + for (const VNInfo *V : ParentLI.valnos) { + if (V->isUnused() || !V->isPHIDef()) continue; - } + unsigned RegIdx = RegAssign.lookup(V->def); + LiveInterval &LI = LIS.getInterval(Edit->get(RegIdx)); LiveRangeCalc &LRC = getLRCalc(RegIdx); - MachineBasicBlock *MBB = LIS.getMBBFromIndex(PHIVNI->def); - for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), - PE = MBB->pred_end(); PI != PE; ++PI) { - SlotIndex End = LIS.getMBBEndIdx(*PI); - SlotIndex LastUse = End.getPrevSlot(); - // The predecessor may not have a live-out value. That is OK, like an - // undef PHI operand. - if (Edit->getParent().liveAt(LastUse)) { - assert(RegAssign.lookup(LastUse) == RegIdx && - "Different register assignment in phi predecessor"); - LRC.extend(LR, End); - } + MachineBasicBlock &B = *LIS.getMBBFromIndex(V->def); + if (!removeDeadSegment(V->def, LI)) + extendPHIRange(B, LRC, LI, LaneBitmask::getAll(), /*Undefs=*/{}); + } + + SmallVector<SlotIndex, 4> Undefs; + LiveRangeCalc SubLRC; + + for (LiveInterval::SubRange &PS : ParentLI.subranges()) { + for (const VNInfo *V : PS.valnos) { + if (V->isUnused() || !V->isPHIDef()) + continue; + unsigned RegIdx = RegAssign.lookup(V->def); + LiveInterval &LI = LIS.getInterval(Edit->get(RegIdx)); + LiveInterval::SubRange &S = getSubRangeForMask(PS.LaneMask, LI); + if (removeDeadSegment(V->def, S)) + continue; + + MachineBasicBlock &B = *LIS.getMBBFromIndex(V->def); + SubLRC.reset(&VRM.getMachineFunction(), LIS.getSlotIndexes(), &MDT, + &LIS.getVNInfoAllocator()); + Undefs.clear(); + LI.computeSubRangeUndefs(Undefs, PS.LaneMask, MRI, *LIS.getSlotIndexes()); + extendPHIRange(B, SubLRC, S, PS.LaneMask, Undefs); } } } /// rewriteAssigned - Rewrite all uses of Edit->getReg(). void SplitEditor::rewriteAssigned(bool ExtendRanges) { + struct ExtPoint { + ExtPoint(const MachineOperand &O, unsigned R, SlotIndex N) + : MO(O), RegIdx(R), Next(N) {} + MachineOperand MO; + unsigned RegIdx; + SlotIndex Next; + }; + + SmallVector<ExtPoint,4> ExtPoints; + for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Edit->getReg()), RE = MRI.reg_end(); RI != RE;) { MachineOperand &MO = *RI; @@ -1082,8 +1185,8 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) { // Rewrite to the mapped register at Idx. unsigned RegIdx = RegAssign.lookup(Idx); - LiveInterval *LI = &LIS.getInterval(Edit->get(RegIdx)); - MO.setReg(LI->reg); + LiveInterval &LI = LIS.getInterval(Edit->get(RegIdx)); + MO.setReg(LI.reg); DEBUG(dbgs() << " rewr BB#" << MI->getParent()->getNumber() << '\t' << Idx << ':' << RegIdx << '\t' << *MI); @@ -1095,7 +1198,7 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) { if (MO.isDef()) { if (!MO.getSubReg() && !MO.isEarlyClobber()) continue; - // We may wan't to extend a live range for a partial redef, or for a use + // We may want to extend a live range for a partial redef, or for a use // tied to an early clobber. Idx = Idx.getPrevSlot(); if (!Edit->getParent().liveAt(Idx)) @@ -1103,7 +1206,53 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) { } else Idx = Idx.getRegSlot(true); - getLRCalc(RegIdx).extend(*LI, Idx.getNextSlot()); + SlotIndex Next = Idx.getNextSlot(); + if (LI.hasSubRanges()) { + // We have to delay extending subranges until we have seen all operands + // defining the register. This is because a <def,read-undef> operand + // will create an "undef" point, and we cannot extend any subranges + // until all of them have been accounted for. + if (MO.isUse()) + ExtPoints.push_back(ExtPoint(MO, RegIdx, Next)); + } else { + LiveRangeCalc &LRC = getLRCalc(RegIdx); + LRC.extend(LI, Next, 0, ArrayRef<SlotIndex>()); + } + } + + for (ExtPoint &EP : ExtPoints) { + LiveInterval &LI = LIS.getInterval(Edit->get(EP.RegIdx)); + assert(LI.hasSubRanges()); + + LiveRangeCalc SubLRC; + unsigned Reg = EP.MO.getReg(), Sub = EP.MO.getSubReg(); + LaneBitmask LM = Sub != 0 ? TRI.getSubRegIndexLaneMask(Sub) + : MRI.getMaxLaneMaskForVReg(Reg); + for (LiveInterval::SubRange &S : LI.subranges()) { + if ((S.LaneMask & LM).none()) + continue; + // The problem here can be that the new register may have been created + // for a partially defined original register. For example: + // %vreg827:subreg_hireg<def,read-undef> = ... + // ... + // %vreg828<def> = COPY %vreg827 + if (S.empty()) + continue; + SubLRC.reset(&VRM.getMachineFunction(), LIS.getSlotIndexes(), &MDT, + &LIS.getVNInfoAllocator()); + SmallVector<SlotIndex, 4> Undefs; + LI.computeSubRangeUndefs(Undefs, S.LaneMask, MRI, *LIS.getSlotIndexes()); + SubLRC.extend(S, EP.Next, 0, Undefs); + } + } + + for (unsigned R : *Edit) { + LiveInterval &LI = LIS.getInterval(R); + if (!LI.hasSubRanges()) + continue; + LI.clear(); + LI.removeEmptySubRanges(); + LIS.constructMainRangeFromSubranges(LI); } } @@ -1146,7 +1295,7 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) { if (ParentVNI->isUnused()) continue; unsigned RegIdx = RegAssign.lookup(ParentVNI->def); - defValue(RegIdx, ParentVNI, ParentVNI->def); + defValue(RegIdx, ParentVNI, ParentVNI->def, true); // Force rematted values to be recomputed everywhere. // The new live ranges may be truncated. @@ -1182,8 +1331,9 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) { deleteRematVictims(); // Get rid of unused values and set phi-kill flags. - for (LiveRangeEdit::iterator I = Edit->begin(), E = Edit->end(); I != E; ++I) { - LiveInterval &LI = LIS.getInterval(*I); + for (unsigned Reg : *Edit) { + LiveInterval &LI = LIS.getInterval(Reg); + LI.removeEmptySubRanges(); LI.RenumberValues(); } diff --git a/contrib/llvm/lib/CodeGen/SplitKit.h b/contrib/llvm/lib/CodeGen/SplitKit.h index a968494..a75738a 100644 --- a/contrib/llvm/lib/CodeGen/SplitKit.h +++ b/contrib/llvm/lib/CodeGen/SplitKit.h @@ -325,12 +325,30 @@ private: return LRCalc[SpillMode != SM_Partition && RegIdx != 0]; } + /// Find a subrange corresponding to the lane mask @p LM in the live + /// interval @p LI. The interval @p LI is assumed to contain such a subrange. + /// This function is used to find corresponding subranges between the + /// original interval and the new intervals. + LiveInterval::SubRange &getSubRangeForMask(LaneBitmask LM, LiveInterval &LI); + + /// Add a segment to the interval LI for the value number VNI. If LI has + /// subranges, corresponding segments will be added to them as well, but + /// with newly created value numbers. If Original is true, dead def will + /// only be added a subrange of LI if the corresponding subrange of the + /// original interval has a def at this index. Otherwise, all subranges + /// of LI will be updated. + void addDeadDef(LiveInterval &LI, VNInfo *VNI, bool Original); + /// defValue - define a value in RegIdx from ParentVNI at Idx. /// Idx does not have to be ParentVNI->def, but it must be contained within /// ParentVNI's live range in ParentLI. The new value is added to the value - /// map. + /// map. The value being defined may either come from rematerialization + /// (or an inserted copy), or it may be coming from the original interval. + /// The parameter Original should be true in the latter case, otherwise + /// it should be false. /// Return the new LI value. - VNInfo *defValue(unsigned RegIdx, const VNInfo *ParentVNI, SlotIndex Idx); + VNInfo *defValue(unsigned RegIdx, const VNInfo *ParentVNI, SlotIndex Idx, + bool Original); /// forceRecompute - Force the live range of ParentVNI in RegIdx to be /// recomputed by LiveRangeCalc::extend regardless of the number of defs. @@ -368,6 +386,15 @@ private: /// Return true if any ranges were skipped. bool transferValues(); + /// Live range @p LR corresponding to the lane Mask @p LM has a live + /// PHI def at the beginning of block @p B. Extend the range @p LR of + /// all predecessor values that reach this def. If @p LR is a subrange, + /// the array @p Undefs is the set of all locations where it is undefined + /// via <def,read-undef> in other subranges for the same register. + void extendPHIRange(MachineBasicBlock &B, LiveRangeCalc &LRC, + LiveRange &LR, LaneBitmask LM, + ArrayRef<SlotIndex> Undefs); + /// extendPHIKillRanges - Extend the ranges of all values killed by original /// parent PHIDefs. void extendPHIKillRanges(); diff --git a/contrib/llvm/lib/CodeGen/StackColoring.cpp b/contrib/llvm/lib/CodeGen/StackColoring.cpp index 87cd470..89c4b57 100644 --- a/contrib/llvm/lib/CodeGen/StackColoring.cpp +++ b/contrib/llvm/lib/CodeGen/StackColoring.cpp @@ -778,10 +778,9 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) { unsigned FixedInstr = 0; unsigned FixedMemOp = 0; unsigned FixedDbg = 0; - MachineModuleInfo *MMI = &MF->getMMI(); // Remap debug information that refers to stack slots. - for (auto &VI : MMI->getVariableDbgInfo()) { + for (auto &VI : MF->getVariableDbgInfo()) { if (!VI.Var) continue; if (SlotRemap.count(VI.Slot)) { @@ -980,7 +979,7 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) { << "********** Function: " << ((const Value*)Func.getFunction())->getName() << '\n'); MF = &Func; - MFI = MF->getFrameInfo(); + MFI = &MF->getFrameInfo(); Indexes = &getAnalysis<SlotIndexes>(); SP = &getAnalysis<StackProtector>(); BlockLiveness.clear(); diff --git a/contrib/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp b/contrib/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp index 87e4eb6..a5ef7c8 100644 --- a/contrib/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp +++ b/contrib/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp @@ -17,7 +17,6 @@ #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFunctionAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/Passes.h" #include "llvm/Support/CommandLine.h" @@ -64,7 +63,7 @@ public: MachineFunctionProperties getRequiredProperties() const override { return MachineFunctionProperties().set( - MachineFunctionProperties::Property::AllVRegsAllocated); + MachineFunctionProperties::Property::NoVRegs); } /// \brief Calculate the liveness information for the given machine function. @@ -113,7 +112,7 @@ bool StackMapLiveness::runOnMachineFunction(MachineFunction &MF) { ++NumStackMapFuncVisited; // Skip this function if there are no patchpoints to process. - if (!MF.getFrameInfo()->hasPatchPoint()) { + if (!MF.getFrameInfo().hasPatchPoint()) { ++NumStackMapFuncSkipped; return false; } @@ -126,7 +125,7 @@ bool StackMapLiveness::calculateLiveness(MachineFunction &MF) { // For all basic blocks in the function. for (auto &MBB : MF) { DEBUG(dbgs() << "****** BB " << MBB.getName() << " ******\n"); - LiveRegs.init(TRI); + LiveRegs.init(*TRI); // FIXME: This should probably be addLiveOuts(). LiveRegs.addLiveOutsNoPristines(MBB); bool HasStackMap = false; diff --git a/contrib/llvm/lib/CodeGen/StackMaps.cpp b/contrib/llvm/lib/CodeGen/StackMaps.cpp index d91bb80..9b7dd400 100644 --- a/contrib/llvm/lib/CodeGen/StackMaps.cpp +++ b/contrib/llvm/lib/CodeGen/StackMaps.cpp @@ -30,16 +30,20 @@ using namespace llvm; #define DEBUG_TYPE "stackmaps" static cl::opt<int> StackMapVersion( - "stackmap-version", cl::init(1), - cl::desc("Specify the stackmap encoding version (default = 1)")); + "stackmap-version", cl::init(2), + cl::desc("Specify the stackmap encoding version (default = 2)")); const char *StackMaps::WSMP = "Stack Maps: "; +StackMapOpers::StackMapOpers(const MachineInstr *MI) + : MI(MI) { + assert(getVarIdx() <= MI->getNumOperands() && + "invalid stackmap definition"); +} + PatchPointOpers::PatchPointOpers(const MachineInstr *MI) : MI(MI), HasDef(MI->getOperand(0).isReg() && MI->getOperand(0).isDef() && - !MI->getOperand(0).isImplicit()), - IsAnyReg(MI->getOperand(getMetaIdx(CCPos)).getImm() == - CallingConv::AnyReg) { + !MI->getOperand(0).isImplicit()) { #ifndef NDEBUG unsigned CheckStartIdx = 0, e = MI->getNumOperands(); while (CheckStartIdx < e && MI->getOperand(CheckStartIdx).isReg() && @@ -70,7 +74,7 @@ unsigned PatchPointOpers::getNextScratchIdx(unsigned StartIdx) const { } StackMaps::StackMaps(AsmPrinter &AP) : AP(AP) { - if (StackMapVersion != 1) + if (StackMapVersion != 2) llvm_unreachable("Unsupported stackmap version!"); } @@ -272,8 +276,7 @@ StackMaps::parseRegisterLiveOutMask(const uint32_t *Mask) const { } LiveOuts.erase( - std::remove_if(LiveOuts.begin(), LiveOuts.end(), - [](const LiveOutReg &LO) { return LO.Reg == 0; }), + remove_if(LiveOuts, [](const LiveOutReg &LO) { return LO.Reg == 0; }), LiveOuts.end()); return LiveOuts; @@ -332,20 +335,26 @@ void StackMaps::recordStackMapOpers(const MachineInstr &MI, uint64_t ID, CSInfos.emplace_back(CSOffsetExpr, ID, std::move(Locations), std::move(LiveOuts)); - // Record the stack size of the current function. - const MachineFrameInfo *MFI = AP.MF->getFrameInfo(); + // Record the stack size of the current function and update callsite count. + const MachineFrameInfo &MFI = AP.MF->getFrameInfo(); const TargetRegisterInfo *RegInfo = AP.MF->getSubtarget().getRegisterInfo(); bool HasDynamicFrameSize = - MFI->hasVarSizedObjects() || RegInfo->needsStackRealignment(*(AP.MF)); - FnStackSize[AP.CurrentFnSym] = - HasDynamicFrameSize ? UINT64_MAX : MFI->getStackSize(); + MFI.hasVarSizedObjects() || RegInfo->needsStackRealignment(*(AP.MF)); + uint64_t FrameSize = HasDynamicFrameSize ? UINT64_MAX : MFI.getStackSize(); + + auto CurrentIt = FnInfos.find(AP.CurrentFnSym); + if (CurrentIt != FnInfos.end()) + CurrentIt->second.RecordCount++; + else + FnInfos.insert(std::make_pair(AP.CurrentFnSym, FunctionInfo(FrameSize))); } void StackMaps::recordStackMap(const MachineInstr &MI) { assert(MI.getOpcode() == TargetOpcode::STACKMAP && "expected stackmap"); - int64_t ID = MI.getOperand(0).getImm(); - recordStackMapOpers(MI, ID, std::next(MI.operands_begin(), 2), + StackMapOpers opers(&MI); + const int64_t ID = MI.getOperand(PatchPointOpers::IDPos).getImm(); + recordStackMapOpers(MI, ID, std::next(MI.operands_begin(), opers.getVarIdx()), MI.operands_end()); } @@ -353,8 +362,7 @@ void StackMaps::recordPatchPoint(const MachineInstr &MI) { assert(MI.getOpcode() == TargetOpcode::PATCHPOINT && "expected patchpoint"); PatchPointOpers opers(&MI); - int64_t ID = opers.getMetaOper(PatchPointOpers::IDPos).getImm(); - + const int64_t ID = opers.getID(); auto MOI = std::next(MI.operands_begin(), opers.getStackMapStartIdx()); recordStackMapOpers(MI, ID, MOI, MI.operands_end(), opers.isAnyReg() && opers.hasDef()); @@ -363,7 +371,7 @@ void StackMaps::recordPatchPoint(const MachineInstr &MI) { // verify anyregcc auto &Locations = CSInfos.back().Locations; if (opers.isAnyReg()) { - unsigned NArgs = opers.getMetaOper(PatchPointOpers::NArgPos).getImm(); + unsigned NArgs = opers.getNumCallArgs(); for (unsigned i = 0, e = (opers.hasDef() ? NArgs + 1 : NArgs); i != e; ++i) assert(Locations[i].Type == Location::Register && "anyreg arg must be in reg."); @@ -384,7 +392,7 @@ void StackMaps::recordStatepoint(const MachineInstr &MI) { /// Emit the stackmap header. /// /// Header { -/// uint8 : Stack Map Version (currently 1) +/// uint8 : Stack Map Version (currently 2) /// uint8 : Reserved (expected to be 0) /// uint16 : Reserved (expected to be 0) /// } @@ -398,8 +406,8 @@ void StackMaps::emitStackmapHeader(MCStreamer &OS) { OS.EmitIntValue(0, 2); // Reserved. // Num functions. - DEBUG(dbgs() << WSMP << "#functions = " << FnStackSize.size() << '\n'); - OS.EmitIntValue(FnStackSize.size(), 4); + DEBUG(dbgs() << WSMP << "#functions = " << FnInfos.size() << '\n'); + OS.EmitIntValue(FnInfos.size(), 4); // Num constants. DEBUG(dbgs() << WSMP << "#constants = " << ConstPool.size() << '\n'); OS.EmitIntValue(ConstPool.size(), 4); @@ -413,15 +421,18 @@ void StackMaps::emitStackmapHeader(MCStreamer &OS) { /// StkSizeRecord[NumFunctions] { /// uint64 : Function Address /// uint64 : Stack Size +/// uint64 : Record Count /// } void StackMaps::emitFunctionFrameRecords(MCStreamer &OS) { // Function Frame records. DEBUG(dbgs() << WSMP << "functions:\n"); - for (auto const &FR : FnStackSize) { + for (auto const &FR : FnInfos) { DEBUG(dbgs() << WSMP << "function addr: " << FR.first - << " frame size: " << FR.second); + << " frame size: " << FR.second.StackSize + << " callsite count: " << FR.second.RecordCount << '\n'); OS.EmitSymbolValue(FR.first, 8); - OS.EmitIntValue(FR.second, 8); + OS.EmitIntValue(FR.second.StackSize, 8); + OS.EmitIntValue(FR.second.RecordCount, 8); } } @@ -522,7 +533,7 @@ void StackMaps::serializeToStackMapSection() { // Bail out if there's no stack map data. assert((!CSInfos.empty() || ConstPool.empty()) && "Expected empty constant pool too!"); - assert((!CSInfos.empty() || FnStackSize.empty()) && + assert((!CSInfos.empty() || FnInfos.empty()) && "Expected empty function record too!"); if (CSInfos.empty()) return; diff --git a/contrib/llvm/lib/CodeGen/StackProtector.cpp b/contrib/llvm/lib/CodeGen/StackProtector.cpp index 89868e4..c2c010a 100644 --- a/contrib/llvm/lib/CodeGen/StackProtector.cpp +++ b/contrib/llvm/lib/CodeGen/StackProtector.cpp @@ -50,7 +50,7 @@ static cl::opt<bool> EnableSelectionDAGSP("enable-selectiondag-sp", cl::init(true), cl::Hidden); char StackProtector::ID = 0; -INITIALIZE_PASS(StackProtector, "stack-protector", "Insert stack protectors", +INITIALIZE_TM_PASS(StackProtector, "stack-protector", "Insert stack protectors", false, true) FunctionPass *llvm::createStackProtectorPass(const TargetMachine *TM) { @@ -236,11 +236,6 @@ bool StackProtector::RequiresStackProtector() { for (const Instruction &I : BB) { if (const AllocaInst *AI = dyn_cast<AllocaInst>(&I)) { if (AI->isArrayAllocation()) { - // SSP-Strong: Enable protectors for any call to alloca, regardless - // of size. - if (Strong) - return true; - if (const auto *CI = dyn_cast<ConstantInt>(AI->getArraySize())) { if (CI->getLimitedValue(SSPBufferSize) >= SSPBufferSize) { // A call to alloca with size >= SSPBufferSize requires diff --git a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp index d996714..234b204 100644 --- a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp +++ b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp @@ -381,7 +381,6 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) { I != E; ++I) { if (DCELimit != -1 && (int)NumDead >= DCELimit) break; - int FirstSS, SecondSS; if (TII->isStackSlotCopy(*I, FirstSS, SecondSS) && FirstSS == SecondSS && FirstSS != -1) { @@ -392,12 +391,18 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) { } MachineBasicBlock::iterator NextMI = std::next(I); - if (NextMI == MBB->end()) continue; + MachineBasicBlock::iterator ProbableLoadMI = I; unsigned LoadReg = 0; unsigned StoreReg = 0; if (!(LoadReg = TII->isLoadFromStackSlot(*I, FirstSS))) continue; + // Skip the ...pseudo debugging... instructions between a load and store. + while ((NextMI != E) && NextMI->isDebugValue()) { + ++NextMI; + ++I; + } + if (NextMI == E) continue; if (!(StoreReg = TII->isStoreToStackSlot(*NextMI, SecondSS))) continue; if (FirstSS != SecondSS || LoadReg != StoreReg || FirstSS == -1) continue; @@ -407,7 +412,7 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) { if (NextMI->findRegisterUseOperandIdx(LoadReg, true, nullptr) != -1) { ++NumDead; - toErase.push_back(&*I); + toErase.push_back(&*ProbableLoadMI); } toErase.push_back(&*NextMI); @@ -428,7 +433,7 @@ bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) { << "********** Function: " << MF.getName() << '\n'; }); - MFI = MF.getFrameInfo(); + MFI = &MF.getFrameInfo(); TII = MF.getSubtarget().getInstrInfo(); LS = &getAnalysis<LiveStacks>(); MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); diff --git a/contrib/llvm/lib/CodeGen/TailDuplication.cpp b/contrib/llvm/lib/CodeGen/TailDuplication.cpp index 2b1fb12..e2377d8 100644 --- a/contrib/llvm/lib/CodeGen/TailDuplication.cpp +++ b/contrib/llvm/lib/CodeGen/TailDuplication.cpp @@ -47,13 +47,12 @@ bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(*MF.getFunction())) return false; - auto MMI = getAnalysisIfAvailable<MachineModuleInfo>(); auto MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); - Duplicator.initMF(MF, MMI, MBPI); + Duplicator.initMF(MF, MBPI, /* LayoutMode */ false); bool MadeChange = false; - while (Duplicator.tailDuplicateBlocks(MF)) + while (Duplicator.tailDuplicateBlocks()) MadeChange = true; return MadeChange; diff --git a/contrib/llvm/lib/CodeGen/TailDuplicator.cpp b/contrib/llvm/lib/CodeGen/TailDuplicator.cpp index 847a093..7709236 100644 --- a/contrib/llvm/lib/CodeGen/TailDuplicator.cpp +++ b/contrib/llvm/lib/CodeGen/TailDuplicator.cpp @@ -20,6 +20,7 @@ #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/IR/Function.h" @@ -40,12 +41,20 @@ STATISTIC(NumTailDupRemoved, STATISTIC(NumDeadBlocks, "Number of dead blocks removed"); STATISTIC(NumAddedPHIs, "Number of phis added"); +namespace llvm { + // Heuristic for tail duplication. static cl::opt<unsigned> TailDuplicateSize( "tail-dup-size", cl::desc("Maximum instructions to consider tail duplicating"), cl::init(2), cl::Hidden); +cl::opt<unsigned> TailDupIndirectBranchSize( + "tail-dup-indirect-size", + cl::desc("Maximum instructions to consider tail duplicating blocks that " + "end with indirect branches."), cl::init(20), + cl::Hidden); + static cl::opt<bool> TailDupVerify("tail-dup-verify", cl::desc("Verify sanity of PHI instructions during taildup"), @@ -54,18 +63,20 @@ static cl::opt<bool> static cl::opt<unsigned> TailDupLimit("tail-dup-limit", cl::init(~0U), cl::Hidden); -namespace llvm { - -void TailDuplicator::initMF(MachineFunction &MF, const MachineModuleInfo *MMIin, - const MachineBranchProbabilityInfo *MBPIin) { - TII = MF.getSubtarget().getInstrInfo(); - TRI = MF.getSubtarget().getRegisterInfo(); - MRI = &MF.getRegInfo(); - MMI = MMIin; +void TailDuplicator::initMF(MachineFunction &MFin, + const MachineBranchProbabilityInfo *MBPIin, + bool LayoutModeIn, unsigned TailDupSizeIn) { + MF = &MFin; + TII = MF->getSubtarget().getInstrInfo(); + TRI = MF->getSubtarget().getRegisterInfo(); + MRI = &MF->getRegInfo(); + MMI = &MF->getMMI(); MBPI = MBPIin; + TailDupSize = TailDupSizeIn; assert(MBPI != nullptr && "Machine Branch Probability Info required"); + LayoutMode = LayoutModeIn; PreRegAlloc = MRI->isSSA(); } @@ -78,10 +89,7 @@ static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) { while (MI != MBB->end()) { if (!MI->isPHI()) break; - for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), - PE = Preds.end(); - PI != PE; ++PI) { - MachineBasicBlock *PredBB = *PI; + for (MachineBasicBlock *PredBB : Preds) { bool Found = false; for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) { MachineBasicBlock *PHIBB = MI->getOperand(i + 1).getMBB(); @@ -119,21 +127,31 @@ static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) { } /// Tail duplicate the block and cleanup. -bool TailDuplicator::tailDuplicateAndUpdate(MachineFunction &MF, bool IsSimple, - MachineBasicBlock *MBB) { +/// \p IsSimple - return value of isSimpleBB +/// \p MBB - block to be duplicated +/// \p ForcedLayoutPred - If non-null, treat this block as the layout +/// predecessor, instead of using the ordering in MF +/// \p DuplicatedPreds - if non-null, \p DuplicatedPreds will contain a list of +/// all Preds that received a copy of \p MBB. +/// \p RemovalCallback - if non-null, called just before MBB is deleted. +bool TailDuplicator::tailDuplicateAndUpdate( + bool IsSimple, MachineBasicBlock *MBB, + MachineBasicBlock *ForcedLayoutPred, + SmallVectorImpl<MachineBasicBlock*> *DuplicatedPreds, + llvm::function_ref<void(MachineBasicBlock *)> *RemovalCallback) { // Save the successors list. SmallSetVector<MachineBasicBlock *, 8> Succs(MBB->succ_begin(), MBB->succ_end()); SmallVector<MachineBasicBlock *, 8> TDBBs; SmallVector<MachineInstr *, 16> Copies; - if (!tailDuplicate(MF, IsSimple, MBB, TDBBs, Copies)) + if (!tailDuplicate(IsSimple, MBB, ForcedLayoutPred, TDBBs, Copies)) return false; ++NumTails; SmallVector<MachineInstr *, 8> NewPHIs; - MachineSSAUpdater SSAUpdate(MF, &NewPHIs); + MachineSSAUpdater SSAUpdate(*MF, &NewPHIs); // TailBB's immediate successors are now successors of those predecessors // which duplicated TailBB. Add the predecessors as sources to the PHI @@ -145,7 +163,7 @@ bool TailDuplicator::tailDuplicateAndUpdate(MachineFunction &MF, bool IsSimple, // If it is dead, remove it. if (isDead) { NumTailDupRemoved += MBB->size(); - removeDeadBlock(MBB); + removeDeadBlock(MBB, RemovalCallback); ++NumDeadBlocks; } @@ -216,21 +234,24 @@ bool TailDuplicator::tailDuplicateAndUpdate(MachineFunction &MF, bool IsSimple, if (NewPHIs.size()) NumAddedPHIs += NewPHIs.size(); + if (DuplicatedPreds) + *DuplicatedPreds = std::move(TDBBs); + return true; } /// Look for small blocks that are unconditionally branched to and do not fall /// through. Tail-duplicate their instructions into their predecessors to /// eliminate (dynamic) branches. -bool TailDuplicator::tailDuplicateBlocks(MachineFunction &MF) { +bool TailDuplicator::tailDuplicateBlocks() { bool MadeChange = false; if (PreRegAlloc && TailDupVerify) { DEBUG(dbgs() << "\n*** Before tail-duplicating\n"); - VerifyPHIs(MF, true); + VerifyPHIs(*MF, true); } - for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E;) { + for (MachineFunction::iterator I = ++MF->begin(), E = MF->end(); I != E;) { MachineBasicBlock *MBB = &*I++; if (NumTails == TailDupLimit) @@ -238,14 +259,14 @@ bool TailDuplicator::tailDuplicateBlocks(MachineFunction &MF) { bool IsSimple = isSimpleBB(MBB); - if (!shouldTailDuplicate(MF, IsSimple, *MBB)) + if (!shouldTailDuplicate(IsSimple, *MBB)) continue; - MadeChange |= tailDuplicateAndUpdate(MF, IsSimple, MBB); + MadeChange |= tailDuplicateAndUpdate(IsSimple, MBB, nullptr); } if (PreRegAlloc && TailDupVerify) - VerifyPHIs(MF, false); + VerifyPHIs(*MF, false); return MadeChange; } @@ -334,10 +355,9 @@ void TailDuplicator::processPHI( /// the source operands due to earlier PHI translation. void TailDuplicator::duplicateInstruction( MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB, - MachineFunction &MF, DenseMap<unsigned, RegSubRegPair> &LocalVRMap, const DenseSet<unsigned> &UsedByPhi) { - MachineInstr *NewMI = TII->duplicate(*MI, MF); + MachineInstr *NewMI = TII->duplicate(*MI, *MF); if (PreRegAlloc) { for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) { MachineOperand &MO = NewMI->getOperand(i); @@ -421,18 +441,14 @@ void TailDuplicator::updateSuccessorsPHIs( MachineBasicBlock *FromBB, bool isDead, SmallVectorImpl<MachineBasicBlock *> &TDBBs, SmallSetVector<MachineBasicBlock *, 8> &Succs) { - for (SmallSetVector<MachineBasicBlock *, 8>::iterator SI = Succs.begin(), - SE = Succs.end(); - SI != SE; ++SI) { - MachineBasicBlock *SuccBB = *SI; - for (MachineBasicBlock::iterator II = SuccBB->begin(), EE = SuccBB->end(); - II != EE; ++II) { - if (!II->isPHI()) + for (MachineBasicBlock *SuccBB : Succs) { + for (MachineInstr &MI : *SuccBB) { + if (!MI.isPHI()) break; - MachineInstrBuilder MIB(*FromBB->getParent(), II); + MachineInstrBuilder MIB(*FromBB->getParent(), MI); unsigned Idx = 0; - for (unsigned i = 1, e = II->getNumOperands(); i != e; i += 2) { - MachineOperand &MO = II->getOperand(i + 1); + for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) { + MachineOperand &MO = MI.getOperand(i + 1); if (MO.getMBB() == FromBB) { Idx = i; break; @@ -440,17 +456,17 @@ void TailDuplicator::updateSuccessorsPHIs( } assert(Idx != 0); - MachineOperand &MO0 = II->getOperand(Idx); + MachineOperand &MO0 = MI.getOperand(Idx); unsigned Reg = MO0.getReg(); if (isDead) { // Folded into the previous BB. // There could be duplicate phi source entries. FIXME: Should sdisel // or earlier pass fixed this? - for (unsigned i = II->getNumOperands() - 2; i != Idx; i -= 2) { - MachineOperand &MO = II->getOperand(i + 1); + for (unsigned i = MI.getNumOperands() - 2; i != Idx; i -= 2) { + MachineOperand &MO = MI.getOperand(i + 1); if (MO.getMBB() == FromBB) { - II->RemoveOperand(i + 1); - II->RemoveOperand(i); + MI.RemoveOperand(i + 1); + MI.RemoveOperand(i); } } } else @@ -474,8 +490,8 @@ void TailDuplicator::updateSuccessorsPHIs( unsigned SrcReg = LI->second[j].second; if (Idx != 0) { - II->getOperand(Idx).setReg(SrcReg); - II->getOperand(Idx + 1).setMBB(SrcBB); + MI.getOperand(Idx).setReg(SrcReg); + MI.getOperand(Idx + 1).setMBB(SrcBB); Idx = 0; } else { MIB.addReg(SrcReg).addMBB(SrcBB); @@ -486,8 +502,8 @@ void TailDuplicator::updateSuccessorsPHIs( for (unsigned j = 0, ee = TDBBs.size(); j != ee; ++j) { MachineBasicBlock *SrcBB = TDBBs[j]; if (Idx != 0) { - II->getOperand(Idx).setReg(Reg); - II->getOperand(Idx + 1).setMBB(SrcBB); + MI.getOperand(Idx).setReg(Reg); + MI.getOperand(Idx + 1).setMBB(SrcBB); Idx = 0; } else { MIB.addReg(Reg).addMBB(SrcBB); @@ -495,19 +511,20 @@ void TailDuplicator::updateSuccessorsPHIs( } } if (Idx != 0) { - II->RemoveOperand(Idx + 1); - II->RemoveOperand(Idx); + MI.RemoveOperand(Idx + 1); + MI.RemoveOperand(Idx); } } } } /// Determine if it is profitable to duplicate this block. -bool TailDuplicator::shouldTailDuplicate(const MachineFunction &MF, - bool IsSimple, +bool TailDuplicator::shouldTailDuplicate(bool IsSimple, MachineBasicBlock &TailBB) { - // Only duplicate blocks that end with unconditional branches. - if (TailBB.canFallThrough()) + // When doing tail-duplication during layout, the block ordering is in flux, + // so canFallThrough returns a result based on incorrect information and + // should just be ignored. + if (!LayoutMode && TailBB.canFallThrough()) return false; // Don't try to tail-duplicate single-block loops. @@ -518,12 +535,24 @@ bool TailDuplicator::shouldTailDuplicate(const MachineFunction &MF, // duplicate only one, because one branch instruction can be eliminated to // compensate for the duplication. unsigned MaxDuplicateCount; - if (TailDuplicateSize.getNumOccurrences() == 0 && - // FIXME: Use Function::optForSize(). - MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize)) + if (TailDupSize == 0 && + TailDuplicateSize.getNumOccurrences() == 0 && + MF->getFunction()->optForSize()) MaxDuplicateCount = 1; - else + else if (TailDupSize == 0) MaxDuplicateCount = TailDuplicateSize; + else + MaxDuplicateCount = TailDupSize; + + // If the block to be duplicated ends in an unanalyzable fallthrough, don't + // duplicate it. + // A similar check is necessary in MachineBlockPlacement to make sure pairs of + // blocks with unanalyzable fallthrough get layed out contiguously. + MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; + SmallVector<MachineOperand, 4> PredCond; + if (TII->analyzeBranch(TailBB, PredTBB, PredFBB, PredCond) && + TailBB.canFallThrough()) + return false; // If the target has hardware branch prediction that can handle indirect // branches, duplicating them can often make them predictable when there @@ -536,7 +565,7 @@ bool TailDuplicator::shouldTailDuplicate(const MachineFunction &MF, HasIndirectbr = TailBB.back().isIndirectBranch(); if (HasIndirectbr && PreRegAlloc) - MaxDuplicateCount = 20; + MaxDuplicateCount = TailDupIndirectBranchSize; // Check the instructions in the block to determine whether tail-duplication // is invalid or unlikely to be profitable. @@ -631,7 +660,7 @@ bool TailDuplicator::canCompletelyDuplicateBB(MachineBasicBlock &BB) { MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; SmallVector<MachineOperand, 4> PredCond; - if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)) + if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond)) return false; if (!PredCond.empty()) @@ -649,11 +678,7 @@ bool TailDuplicator::duplicateSimpleBB( SmallVector<MachineBasicBlock *, 8> Preds(TailBB->pred_begin(), TailBB->pred_end()); bool Changed = false; - for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), - PE = Preds.end(); - PI != PE; ++PI) { - MachineBasicBlock *PredBB = *PI; - + for (MachineBasicBlock *PredBB : Preds) { if (PredBB->hasEHPadSuccessor()) continue; @@ -662,7 +687,7 @@ bool TailDuplicator::duplicateSimpleBB( MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; SmallVector<MachineOperand, 4> PredCond; - if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)) + if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond)) continue; Changed = true; @@ -670,7 +695,7 @@ bool TailDuplicator::duplicateSimpleBB( << "From simple Succ: " << *TailBB); MachineBasicBlock *NewTarget = *TailBB->succ_begin(); - MachineBasicBlock *NextBB = &*std::next(PredBB->getIterator()); + MachineBasicBlock *NextBB = PredBB->getNextNode(); // Make PredFBB explicit. if (PredCond.empty()) @@ -700,7 +725,7 @@ bool TailDuplicator::duplicateSimpleBB( if (PredTBB == NextBB && PredFBB == nullptr) PredTBB = nullptr; - TII->RemoveBranch(*PredBB); + TII->removeBranch(*PredBB); if (!PredBB->isSuccessor(NewTarget)) PredBB->replaceSuccessor(TailBB, NewTarget); @@ -710,17 +735,40 @@ bool TailDuplicator::duplicateSimpleBB( } if (PredTBB) - TII->InsertBranch(*PredBB, PredTBB, PredFBB, PredCond, DebugLoc()); + TII->insertBranch(*PredBB, PredTBB, PredFBB, PredCond, DebugLoc()); TDBBs.push_back(PredBB); } return Changed; } +bool TailDuplicator::canTailDuplicate(MachineBasicBlock *TailBB, + MachineBasicBlock *PredBB) { + // EH edges are ignored by analyzeBranch. + if (PredBB->succ_size() > 1) + return false; + + MachineBasicBlock *PredTBB, *PredFBB; + SmallVector<MachineOperand, 4> PredCond; + if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond)) + return false; + if (!PredCond.empty()) + return false; + return true; +} + /// If it is profitable, duplicate TailBB's contents in each /// of its predecessors. -bool TailDuplicator::tailDuplicate(MachineFunction &MF, bool IsSimple, - MachineBasicBlock *TailBB, +/// \p IsSimple result of isSimpleBB +/// \p TailBB Block to be duplicated. +/// \p ForcedLayoutPred When non-null, use this block as the layout predecessor +/// instead of the previous block in MF's order. +/// \p TDBBs A vector to keep track of all blocks tail-duplicated +/// into. +/// \p Copies A vector of copy instructions inserted. Used later to +/// walk all the inserted copies and remove redundant ones. +bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB, + MachineBasicBlock *ForcedLayoutPred, SmallVectorImpl<MachineBasicBlock *> &TDBBs, SmallVectorImpl<MachineInstr *> &Copies) { DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n'); @@ -737,25 +785,20 @@ bool TailDuplicator::tailDuplicate(MachineFunction &MF, bool IsSimple, bool Changed = false; SmallSetVector<MachineBasicBlock *, 8> Preds(TailBB->pred_begin(), TailBB->pred_end()); - for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), - PE = Preds.end(); - PI != PE; ++PI) { - MachineBasicBlock *PredBB = *PI; - + for (MachineBasicBlock *PredBB : Preds) { assert(TailBB != PredBB && "Single-block loop should have been rejected earlier!"); - // EH edges are ignored by AnalyzeBranch. - if (PredBB->succ_size() > 1) - continue; - MachineBasicBlock *PredTBB, *PredFBB; - SmallVector<MachineOperand, 4> PredCond; - if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)) - continue; - if (!PredCond.empty()) + if (!canTailDuplicate(TailBB, PredBB)) continue; + // Don't duplicate into a fall-through predecessor (at least for now). - if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough()) + bool IsLayoutSuccessor = false; + if (ForcedLayoutPred) + IsLayoutSuccessor = (ForcedLayoutPred == PredBB); + else if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough()) + IsLayoutSuccessor = true; + if (IsLayoutSuccessor) continue; DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB @@ -764,7 +807,7 @@ bool TailDuplicator::tailDuplicate(MachineFunction &MF, bool IsSimple, TDBBs.push_back(PredBB); // Remove PredBB's unconditional branch. - TII->RemoveBranch(*PredBB); + TII->removeBranch(*PredBB); // Clone the contents of TailBB into PredBB. DenseMap<unsigned, RegSubRegPair> LocalVRMap; @@ -782,13 +825,15 @@ bool TailDuplicator::tailDuplicate(MachineFunction &MF, bool IsSimple, } else { // Replace def of virtual registers with new registers, and update // uses with PHI source register or the new registers. - duplicateInstruction(MI, TailBB, PredBB, MF, LocalVRMap, UsedByPhi); + duplicateInstruction(MI, TailBB, PredBB, LocalVRMap, UsedByPhi); } } appendCopies(PredBB, CopyInfos, Copies); // Simplify - TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true); + MachineBasicBlock *PredTBB, *PredFBB; + SmallVector<MachineOperand, 4> PredCond; + TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond); NumTailDupAdded += TailBB->size() - 1; // subtract one for removed branch @@ -796,10 +841,8 @@ bool TailDuplicator::tailDuplicate(MachineFunction &MF, bool IsSimple, PredBB->removeSuccessor(PredBB->succ_begin()); assert(PredBB->succ_empty() && "TailDuplicate called on block with multiple successors!"); - for (MachineBasicBlock::succ_iterator I = TailBB->succ_begin(), - E = TailBB->succ_end(); - I != E; ++I) - PredBB->addSuccessor(*I, MBPI->getEdgeProbability(TailBB, I)); + for (MachineBasicBlock *Succ : TailBB->successors()) + PredBB->addSuccessor(Succ, MBPI->getEdgeProbability(TailBB, Succ)); Changed = true; ++NumTailDups; @@ -808,17 +851,27 @@ bool TailDuplicator::tailDuplicate(MachineFunction &MF, bool IsSimple, // If TailBB was duplicated into all its predecessors except for the prior // block, which falls through unconditionally, move the contents of this // block into the prior block. - MachineBasicBlock *PrevBB = &*std::prev(TailBB->getIterator()); + MachineBasicBlock *PrevBB = ForcedLayoutPred; + if (!PrevBB) + PrevBB = &*std::prev(TailBB->getIterator()); MachineBasicBlock *PriorTBB = nullptr, *PriorFBB = nullptr; SmallVector<MachineOperand, 4> PriorCond; // This has to check PrevBB->succ_size() because EH edges are ignored by - // AnalyzeBranch. + // analyzeBranch. if (PrevBB->succ_size() == 1 && - !TII->analyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond, true) && - PriorCond.empty() && !PriorTBB && TailBB->pred_size() == 1 && + // Layout preds are not always CFG preds. Check. + *PrevBB->succ_begin() == TailBB && + !TII->analyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond) && + PriorCond.empty() && + (!PriorTBB || PriorTBB == TailBB) && + TailBB->pred_size() == 1 && !TailBB->hasAddressTaken()) { DEBUG(dbgs() << "\nMerging into block: " << *PrevBB << "From MBB: " << *TailBB); + // There may be a branch to the layout successor. This is unlikely but it + // happens. The correct thing to do is to remove the branch before + // duplicating the instructions in all cases. + TII->removeBranch(*PrevBB); if (PreRegAlloc) { DenseMap<unsigned, RegSubRegPair> LocalVRMap; SmallVector<std::pair<unsigned, RegSubRegPair>, 4> CopyInfos; @@ -837,11 +890,12 @@ bool TailDuplicator::tailDuplicate(MachineFunction &MF, bool IsSimple, // uses with PHI source register or the new registers. MachineInstr *MI = &*I++; assert(!MI->isBundle() && "Not expecting bundles before regalloc!"); - duplicateInstruction(MI, TailBB, PrevBB, MF, LocalVRMap, UsedByPhi); + duplicateInstruction(MI, TailBB, PrevBB, LocalVRMap, UsedByPhi); MI->eraseFromParent(); } appendCopies(PrevBB, CopyInfos, Copies); } else { + TII->removeBranch(*PrevBB); // No PHIs to worry about, just splice the instructions over. PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end()); } @@ -874,11 +928,8 @@ bool TailDuplicator::tailDuplicate(MachineFunction &MF, bool IsSimple, // What we do here is introduce a copy in 3 of the register defined by the // phi, just like when we are duplicating 2 into 3, but we don't copy any // real instructions or remove the 3 -> 2 edge from the phi in 2. - for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), - PE = Preds.end(); - PI != PE; ++PI) { - MachineBasicBlock *PredBB = *PI; - if (std::find(TDBBs.begin(), TDBBs.end(), PredBB) != TDBBs.end()) + for (MachineBasicBlock *PredBB : Preds) { + if (is_contained(TDBBs, PredBB)) continue; // EH edges @@ -917,10 +968,15 @@ void TailDuplicator::appendCopies(MachineBasicBlock *MBB, /// Remove the specified dead machine basic block from the function, updating /// the CFG. -void TailDuplicator::removeDeadBlock(MachineBasicBlock *MBB) { +void TailDuplicator::removeDeadBlock( + MachineBasicBlock *MBB, + llvm::function_ref<void(MachineBasicBlock *)> *RemovalCallback) { assert(MBB->pred_empty() && "MBB must be dead!"); DEBUG(dbgs() << "\nRemoving MBB: " << *MBB); + if (RemovalCallback) + (*RemovalCallback)(MBB); + // Remove all successors. while (!MBB->succ_empty()) MBB->removeSuccessor(MBB->succ_end() - 1); diff --git a/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp b/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp index cac7e63..f082add 100644 --- a/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp +++ b/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp @@ -40,7 +40,7 @@ bool TargetFrameLowering::noFramePointerElim(const MachineFunction &MF) const { /// is overridden for some targets. int TargetFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); + const MachineFrameInfo &MFI = MF.getFrameInfo(); const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); // By default, assume all frame indices are referenced via whatever @@ -48,13 +48,13 @@ int TargetFrameLowering::getFrameIndexReference(const MachineFunction &MF, // something different. FrameReg = RI->getFrameRegister(MF); - return MFI->getObjectOffset(FI) + MFI->getStackSize() - - getOffsetOfLocalArea() + MFI->getOffsetAdjustment(); + return MFI.getObjectOffset(FI) + MFI.getStackSize() - + getOffsetOfLocalArea() + MFI.getOffsetAdjustment(); } bool TargetFrameLowering::needsFrameIndexResolution( const MachineFunction &MF) const { - return MF.getFrameInfo()->hasStackObjects(); + return MF.getFrameInfo().hasStackObjects(); } void TargetFrameLowering::determineCalleeSaves(MachineFunction &MF, @@ -84,7 +84,7 @@ void TargetFrameLowering::determineCalleeSaves(MachineFunction &MF, return; // Functions which call __builtin_unwind_init get all their registers saved. - bool CallsUnwindInit = MF.getMMI().callsUnwindInit(); + bool CallsUnwindInit = MF.callsUnwindInit(); const MachineRegisterInfo &MRI = MF.getRegInfo(); for (unsigned i = 0; CSRegs[i]; ++i) { unsigned Reg = CSRegs[i]; diff --git a/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp b/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp index e7330c6..01f91b9 100644 --- a/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -84,8 +84,8 @@ unsigned TargetInstrInfo::getInlineAsmLength(const char *Str, if (*Str == '\n' || strncmp(Str, MAI.getSeparatorString(), strlen(MAI.getSeparatorString())) == 0) { atInsnStart = true; - } else if (strncmp(Str, MAI.getCommentString(), - strlen(MAI.getCommentString())) == 0) { + } else if (strncmp(Str, MAI.getCommentString().data(), + MAI.getCommentString().size()) == 0) { // Stop counting as an instruction after a comment until the next // separator. atInsnStart = false; @@ -119,7 +119,7 @@ TargetInstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail, // If MBB isn't immediately before MBB, insert a branch to it. if (++MachineFunction::iterator(MBB) != MachineFunction::iterator(NewDest)) - InsertBranch(*MBB, NewDest, nullptr, SmallVector<MachineOperand, 0>(), DL); + insertBranch(*MBB, NewDest, nullptr, SmallVector<MachineOperand, 0>(), DL); MBB->addSuccessor(NewDest); } @@ -437,13 +437,20 @@ static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr &MI, const TargetInstrInfo &TII) { unsigned StartIdx = 0; switch (MI.getOpcode()) { - case TargetOpcode::STACKMAP: - StartIdx = 2; // Skip ID, nShadowBytes. + case TargetOpcode::STACKMAP: { + // StackMapLiveValues are foldable + StartIdx = StackMapOpers(&MI).getVarIdx(); break; + } case TargetOpcode::PATCHPOINT: { - // For PatchPoint, the call args are not foldable. - PatchPointOpers opers(&MI); - StartIdx = opers.getVarIdx(); + // For PatchPoint, the call args are not foldable (even if reported in the + // stackmap e.g. via anyregcc). + StartIdx = PatchPointOpers(&MI).getVarIdx(); + break; + } + case TargetOpcode::STATEPOINT: { + // For statepoints, fold deopt and gc arguments, but not call arguments. + StartIdx = StatepointOpers(&MI).getVarIdx(); break; } default: @@ -467,7 +474,7 @@ static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr &MI, for (unsigned i = StartIdx; i < MI.getNumOperands(); ++i) { MachineOperand &MO = MI.getOperand(i); - if (std::find(Ops.begin(), Ops.end(), i) != Ops.end()) { + if (is_contained(Ops, i)) { unsigned SpillSize; unsigned SpillOffset; // Compute the spill slot size and offset. @@ -508,10 +515,36 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI, assert(MBB && "foldMemoryOperand needs an inserted instruction"); MachineFunction &MF = *MBB->getParent(); + // If we're not folding a load into a subreg, the size of the load is the + // size of the spill slot. But if we are, we need to figure out what the + // actual load size is. + int64_t MemSize = 0; + const MachineFrameInfo &MFI = MF.getFrameInfo(); + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + + if (Flags & MachineMemOperand::MOStore) { + MemSize = MFI.getObjectSize(FI); + } else { + for (unsigned Idx : Ops) { + int64_t OpSize = MFI.getObjectSize(FI); + + if (auto SubReg = MI.getOperand(Idx).getSubReg()) { + unsigned SubRegSize = TRI->getSubRegIdxSize(SubReg); + if (SubRegSize > 0 && !(SubRegSize % 8)) + OpSize = SubRegSize / 8; + } + + MemSize = std::max(MemSize, OpSize); + } + } + + assert(MemSize && "Did not expect a zero-sized stack slot"); + MachineInstr *NewMI = nullptr; if (MI.getOpcode() == TargetOpcode::STACKMAP || - MI.getOpcode() == TargetOpcode::PATCHPOINT) { + MI.getOpcode() == TargetOpcode::PATCHPOINT || + MI.getOpcode() == TargetOpcode::STATEPOINT) { // Fold stackmap/patchpoint. NewMI = foldPatchpoint(MF, MI, Ops, FI, *this); if (NewMI) @@ -530,10 +563,9 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI, assert((!(Flags & MachineMemOperand::MOLoad) || NewMI->mayLoad()) && "Folded a use to a non-load!"); - const MachineFrameInfo &MFI = *MF.getFrameInfo(); assert(MFI.getObjectOffset(FI) != -1); MachineMemOperand *MMO = MF.getMachineMemOperand( - MachinePointerInfo::getFixedStack(MF, FI), Flags, MFI.getObjectSize(FI), + MachinePointerInfo::getFixedStack(MF, FI), Flags, MemSize, MFI.getObjectAlignment(FI)); NewMI->addMemOperand(MF, MMO); @@ -550,7 +582,6 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI, const MachineOperand &MO = MI.getOperand(1 - Ops[0]); MachineBasicBlock::iterator Pos = MI; - const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); if (Flags == MachineMemOperand::MOStore) storeRegToStackSlot(*MBB, Pos, MO.getReg(), MO.isKill(), FI, RC, TRI); @@ -792,7 +823,8 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI, int FrameIndex = 0; if ((MI.getOpcode() == TargetOpcode::STACKMAP || - MI.getOpcode() == TargetOpcode::PATCHPOINT) && + MI.getOpcode() == TargetOpcode::PATCHPOINT || + MI.getOpcode() == TargetOpcode::STATEPOINT) && isLoadFromStackSlot(LoadMI, FrameIndex)) { // Fold stackmap/patchpoint. NewMI = foldPatchpoint(MF, MI, Ops, FrameIndex, *this); @@ -844,7 +876,7 @@ bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric( // simple, and a common case. int FrameIdx = 0; if (isLoadFromStackSlot(MI, FrameIdx) && - MF.getFrameInfo()->isImmutableObjectIndex(FrameIdx)) + MF.getFrameInfo().isImmutableObjectIndex(FrameIdx)) return true; // Avoid instructions obviously unsafe for remat. @@ -857,7 +889,7 @@ bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric( return false; // Avoid instructions which load from potentially varying memory. - if (MI.mayLoad() && !MI.isInvariantLoad(AA)) + if (MI.mayLoad() && !MI.isDereferenceableInvariantLoad(AA)) return false; // If any of the registers accessed are non-constant, conservatively assume @@ -875,7 +907,7 @@ bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric( // If the physreg has no defs anywhere, it's just an ambient register // and we can freely move its uses. Alternatively, if it's allocatable, // it could get allocated to something with a def during allocation. - if (!MRI.isConstantPhysReg(Reg, MF)) + if (!MRI.isConstantPhysReg(Reg)) return false; } else { // A physreg def. We can't remat it. @@ -1091,35 +1123,6 @@ int TargetInstrInfo::computeDefOperandLatency( return -1; } -unsigned TargetInstrInfo::computeOperandLatency( - const InstrItineraryData *ItinData, const MachineInstr &DefMI, - unsigned DefIdx, const MachineInstr *UseMI, unsigned UseIdx) const { - - int DefLatency = computeDefOperandLatency(ItinData, DefMI); - if (DefLatency >= 0) - return DefLatency; - - assert(ItinData && !ItinData->isEmpty() && "computeDefOperandLatency fail"); - - int OperLatency = 0; - if (UseMI) - OperLatency = getOperandLatency(ItinData, DefMI, DefIdx, *UseMI, UseIdx); - else { - unsigned DefClass = DefMI.getDesc().getSchedClass(); - OperLatency = ItinData->getOperandCycle(DefClass, DefIdx); - } - if (OperLatency >= 0) - return OperLatency; - - // No operand latency was found. - unsigned InstrLatency = getInstrLatency(ItinData, DefMI); - - // Expected latency is the max of the stage latency and itinerary props. - InstrLatency = std::max(InstrLatency, - defaultDefLatency(ItinData->SchedModel, DefMI)); - return InstrLatency; -} - bool TargetInstrInfo::getRegSequenceInputs( const MachineInstr &MI, unsigned DefIdx, SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const { diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp index 6d3fe8c..003311b 100644 --- a/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -14,6 +14,7 @@ #include "llvm/Target/TargetLowering.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Triple.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -44,6 +45,14 @@ static cl::opt<bool> JumpIsExpensiveOverride( cl::desc("Do not create extra branches to split comparison logic."), cl::Hidden); +static cl::opt<unsigned> MinimumJumpTableEntries + ("min-jump-table-entries", cl::init(4), cl::Hidden, + cl::desc("Set minimum number of entries to use a jump table.")); + +static cl::opt<unsigned> MaximumJumpTableSize + ("max-jump-table-size", cl::init(0), cl::Hidden, + cl::desc("Set maximum size of jump tables; zero for no limit.")); + // Although this default value is arbitrary, it is not random. It is assumed // that a condition that evaluates the same way by a higher percentage than this // is best represented as control flow. Therefore, the default value N should be @@ -352,6 +361,11 @@ static void InitLibcallNames(const char **Names, const Triple &TT) { Names[RTLIB::MEMCPY] = "memcpy"; Names[RTLIB::MEMMOVE] = "memmove"; Names[RTLIB::MEMSET] = "memset"; + Names[RTLIB::MEMCPY_ELEMENT_ATOMIC_1] = "__llvm_memcpy_element_atomic_1"; + Names[RTLIB::MEMCPY_ELEMENT_ATOMIC_2] = "__llvm_memcpy_element_atomic_2"; + Names[RTLIB::MEMCPY_ELEMENT_ATOMIC_4] = "__llvm_memcpy_element_atomic_4"; + Names[RTLIB::MEMCPY_ELEMENT_ATOMIC_8] = "__llvm_memcpy_element_atomic_8"; + Names[RTLIB::MEMCPY_ELEMENT_ATOMIC_16] = "__llvm_memcpy_element_atomic_16"; Names[RTLIB::UNWIND_RESUME] = "_Unwind_Resume"; Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1] = "__sync_val_compare_and_swap_1"; Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2] = "__sync_val_compare_and_swap_2"; @@ -488,12 +502,10 @@ static void InitLibcallNames(const char **Names, const Triple &TT) { Names[RTLIB::DEOPTIMIZE] = "__llvm_deoptimize"; } -/// InitLibcallCallingConvs - Set default libcall CallingConvs. -/// +/// Set default libcall CallingConvs. static void InitLibcallCallingConvs(CallingConv::ID *CCs) { - for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) { - CCs[i] = CallingConv::C; - } + for (int LC = 0; LC < RTLIB::UNKNOWN_LIBCALL; ++LC) + CCs[LC] = CallingConv::C; } /// getFPEXT - Return the FPEXT_*_* value for the given types, or @@ -756,6 +768,24 @@ RTLIB::Libcall RTLIB::getSYNC(unsigned Opc, MVT VT) { return UNKNOWN_LIBCALL; } +RTLIB::Libcall RTLIB::getMEMCPY_ELEMENT_ATOMIC(uint64_t ElementSize) { + switch (ElementSize) { + case 1: + return MEMCPY_ELEMENT_ATOMIC_1; + case 2: + return MEMCPY_ELEMENT_ATOMIC_2; + case 4: + return MEMCPY_ELEMENT_ATOMIC_4; + case 8: + return MEMCPY_ELEMENT_ATOMIC_8; + case 16: + return MEMCPY_ELEMENT_ATOMIC_16; + default: + return UNKNOWN_LIBCALL; + } + +} + /// InitCmpLibcallCCs - Set default comparison libcall CC. /// static void InitCmpLibcallCCs(ISD::CondCode *CCs) { @@ -804,10 +834,8 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) { = MaxStoresPerMemmoveOptSize = 4; UseUnderscoreSetJmp = false; UseUnderscoreLongJmp = false; - SelectIsExpensive = false; HasMultipleConditionRegisters = false; HasExtractBitsInsn = false; - FsqrtIsCheap = false; JumpIsExpensive = JumpIsExpensiveOverride; PredictableSelectIsExpensive = false; MaskAndBranchFoldingIsLegal = false; @@ -825,7 +853,6 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) { PrefLoopAlignment = 0; GatherAllAliasesMaxDepth = 6; MinStackArgumentAlignment = 1; - MinimumJumpTableEntries = 4; // TODO: the default will be switched to 0 in the next commit, along // with the Target-specific changes necessary. MaxAtomicSizeInBitsSupported = 1024; @@ -956,15 +983,11 @@ EVT TargetLoweringBase::getShiftAmountTy(EVT LHSTy, return getScalarShiftAmountTy(DL, LHSTy); } -/// canOpTrap - Returns true if the operation can trap for the value type. -/// VT must be a legal type. bool TargetLoweringBase::canOpTrap(unsigned Op, EVT VT) const { assert(isTypeLegal(VT)); switch (Op) { default: return false; - case ISD::FDIV: - case ISD::FREM: case ISD::SDIV: case ISD::UDIV: case ISD::SREM: @@ -1177,7 +1200,7 @@ TargetLoweringBase::emitPatchPoint(MachineInstr &InitialMI, MachineBasicBlock *MBB) const { MachineInstr *MI = &InitialMI; MachineFunction &MF = *MI->getParent()->getParent(); - MachineFrameInfo &MFI = *MF.getFrameInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); // We're handling multiple types of operands here: // PATCHPOINT MetaArgs - live-in, read only, direct @@ -1402,7 +1425,7 @@ void TargetLoweringBase::computeRegisterProperties( MVT SVT = (MVT::SimpleValueType) nVT; // Promote vectors of integers to vectors with the same number // of elements, with a wider element type. - if (SVT.getVectorElementType().getSizeInBits() > EltVT.getSizeInBits() && + if (SVT.getScalarSizeInBits() > EltVT.getSizeInBits() && SVT.getVectorNumElements() == NElts && isTypeLegal(SVT)) { TransformToType[i] = SVT; RegisterTypeForVT[i] = SVT; @@ -1754,9 +1777,41 @@ TargetLoweringBase::getTypeLegalizationCost(const DataLayout &DL, } } +Value *TargetLoweringBase::getDefaultSafeStackPointerLocation(IRBuilder<> &IRB, + bool UseTLS) const { + // compiler-rt provides a variable with a magic name. Targets that do not + // link with compiler-rt may also provide such a variable. + Module *M = IRB.GetInsertBlock()->getParent()->getParent(); + const char *UnsafeStackPtrVar = "__safestack_unsafe_stack_ptr"; + auto UnsafeStackPtr = + dyn_cast_or_null<GlobalVariable>(M->getNamedValue(UnsafeStackPtrVar)); + + Type *StackPtrTy = Type::getInt8PtrTy(M->getContext()); + + if (!UnsafeStackPtr) { + auto TLSModel = UseTLS ? + GlobalValue::InitialExecTLSModel : + GlobalValue::NotThreadLocal; + // The global variable is not defined yet, define it ourselves. + // We use the initial-exec TLS model because we do not support the + // variable living anywhere other than in the main executable. + UnsafeStackPtr = new GlobalVariable( + *M, StackPtrTy, false, GlobalValue::ExternalLinkage, nullptr, + UnsafeStackPtrVar, nullptr, TLSModel); + } else { + // The variable exists, check its type and attributes. + if (UnsafeStackPtr->getValueType() != StackPtrTy) + report_fatal_error(Twine(UnsafeStackPtrVar) + " must have void* type"); + if (UseTLS != UnsafeStackPtr->isThreadLocal()) + report_fatal_error(Twine(UnsafeStackPtrVar) + " must " + + (UseTLS ? "" : "not ") + "be thread-local"); + } + return UnsafeStackPtr; +} + Value *TargetLoweringBase::getSafeStackPointerLocation(IRBuilder<> &IRB) const { if (!TM.getTargetTriple().isAndroid()) - return nullptr; + return getDefaultSafeStackPointerLocation(IRB, true); // Android provides a libc function to retrieve the address of the current // thread's unsafe stack pointer. @@ -1818,9 +1873,7 @@ Value *TargetLoweringBase::getIRStackGuard(IRBuilder<> &IRB) const { if (getTargetMachine().getTargetTriple().isOSOpenBSD()) { Module &M = *IRB.GetInsertBlock()->getParent()->getParent(); PointerType *PtrTy = Type::getInt8PtrTy(M.getContext()); - auto Guard = cast<GlobalValue>(M.getOrInsertGlobal("__guard_local", PtrTy)); - Guard->setVisibility(GlobalValue::HiddenVisibility); - return Guard; + return M.getOrInsertGlobal("__guard_local", PtrTy); } return nullptr; } @@ -1840,3 +1893,207 @@ Value *TargetLoweringBase::getSDagStackGuard(const Module &M) const { Value *TargetLoweringBase::getSSPStackGuardCheck(const Module &M) const { return nullptr; } + +unsigned TargetLoweringBase::getMinimumJumpTableEntries() const { + return MinimumJumpTableEntries; +} + +void TargetLoweringBase::setMinimumJumpTableEntries(unsigned Val) { + MinimumJumpTableEntries = Val; +} + +unsigned TargetLoweringBase::getMaximumJumpTableSize() const { + return MaximumJumpTableSize; +} + +void TargetLoweringBase::setMaximumJumpTableSize(unsigned Val) { + MaximumJumpTableSize = Val; +} + +//===----------------------------------------------------------------------===// +// Reciprocal Estimates +//===----------------------------------------------------------------------===// + +/// Get the reciprocal estimate attribute string for a function that will +/// override the target defaults. +static StringRef getRecipEstimateForFunc(MachineFunction &MF) { + const Function *F = MF.getFunction(); + StringRef RecipAttrName = "reciprocal-estimates"; + if (!F->hasFnAttribute(RecipAttrName)) + return StringRef(); + + return F->getFnAttribute(RecipAttrName).getValueAsString(); +} + +/// Construct a string for the given reciprocal operation of the given type. +/// This string should match the corresponding option to the front-end's +/// "-mrecip" flag assuming those strings have been passed through in an +/// attribute string. For example, "vec-divf" for a division of a vXf32. +static std::string getReciprocalOpName(bool IsSqrt, EVT VT) { + std::string Name = VT.isVector() ? "vec-" : ""; + + Name += IsSqrt ? "sqrt" : "div"; + + // TODO: Handle "half" or other float types? + if (VT.getScalarType() == MVT::f64) { + Name += "d"; + } else { + assert(VT.getScalarType() == MVT::f32 && + "Unexpected FP type for reciprocal estimate"); + Name += "f"; + } + + return Name; +} + +/// Return the character position and value (a single numeric character) of a +/// customized refinement operation in the input string if it exists. Return +/// false if there is no customized refinement step count. +static bool parseRefinementStep(StringRef In, size_t &Position, + uint8_t &Value) { + const char RefStepToken = ':'; + Position = In.find(RefStepToken); + if (Position == StringRef::npos) + return false; + + StringRef RefStepString = In.substr(Position + 1); + // Allow exactly one numeric character for the additional refinement + // step parameter. + if (RefStepString.size() == 1) { + char RefStepChar = RefStepString[0]; + if (RefStepChar >= '0' && RefStepChar <= '9') { + Value = RefStepChar - '0'; + return true; + } + } + report_fatal_error("Invalid refinement step for -recip."); +} + +/// For the input attribute string, return one of the ReciprocalEstimate enum +/// status values (enabled, disabled, or not specified) for this operation on +/// the specified data type. +static int getOpEnabled(bool IsSqrt, EVT VT, StringRef Override) { + if (Override.empty()) + return TargetLoweringBase::ReciprocalEstimate::Unspecified; + + SmallVector<StringRef, 4> OverrideVector; + SplitString(Override, OverrideVector, ","); + unsigned NumArgs = OverrideVector.size(); + + // Check if "all", "none", or "default" was specified. + if (NumArgs == 1) { + // Look for an optional setting of the number of refinement steps needed + // for this type of reciprocal operation. + size_t RefPos; + uint8_t RefSteps; + if (parseRefinementStep(Override, RefPos, RefSteps)) { + // Split the string for further processing. + Override = Override.substr(0, RefPos); + } + + // All reciprocal types are enabled. + if (Override == "all") + return TargetLoweringBase::ReciprocalEstimate::Enabled; + + // All reciprocal types are disabled. + if (Override == "none") + return TargetLoweringBase::ReciprocalEstimate::Disabled; + + // Target defaults for enablement are used. + if (Override == "default") + return TargetLoweringBase::ReciprocalEstimate::Unspecified; + } + + // The attribute string may omit the size suffix ('f'/'d'). + std::string VTName = getReciprocalOpName(IsSqrt, VT); + std::string VTNameNoSize = VTName; + VTNameNoSize.pop_back(); + static const char DisabledPrefix = '!'; + + for (StringRef RecipType : OverrideVector) { + size_t RefPos; + uint8_t RefSteps; + if (parseRefinementStep(RecipType, RefPos, RefSteps)) + RecipType = RecipType.substr(0, RefPos); + + // Ignore the disablement token for string matching. + bool IsDisabled = RecipType[0] == DisabledPrefix; + if (IsDisabled) + RecipType = RecipType.substr(1); + + if (RecipType.equals(VTName) || RecipType.equals(VTNameNoSize)) + return IsDisabled ? TargetLoweringBase::ReciprocalEstimate::Disabled + : TargetLoweringBase::ReciprocalEstimate::Enabled; + } + + return TargetLoweringBase::ReciprocalEstimate::Unspecified; +} + +/// For the input attribute string, return the customized refinement step count +/// for this operation on the specified data type. If the step count does not +/// exist, return the ReciprocalEstimate enum value for unspecified. +static int getOpRefinementSteps(bool IsSqrt, EVT VT, StringRef Override) { + if (Override.empty()) + return TargetLoweringBase::ReciprocalEstimate::Unspecified; + + SmallVector<StringRef, 4> OverrideVector; + SplitString(Override, OverrideVector, ","); + unsigned NumArgs = OverrideVector.size(); + + // Check if "all", "default", or "none" was specified. + if (NumArgs == 1) { + // Look for an optional setting of the number of refinement steps needed + // for this type of reciprocal operation. + size_t RefPos; + uint8_t RefSteps; + if (!parseRefinementStep(Override, RefPos, RefSteps)) + return TargetLoweringBase::ReciprocalEstimate::Unspecified; + + // Split the string for further processing. + Override = Override.substr(0, RefPos); + assert(Override != "none" && + "Disabled reciprocals, but specifed refinement steps?"); + + // If this is a general override, return the specified number of steps. + if (Override == "all" || Override == "default") + return RefSteps; + } + + // The attribute string may omit the size suffix ('f'/'d'). + std::string VTName = getReciprocalOpName(IsSqrt, VT); + std::string VTNameNoSize = VTName; + VTNameNoSize.pop_back(); + + for (StringRef RecipType : OverrideVector) { + size_t RefPos; + uint8_t RefSteps; + if (!parseRefinementStep(RecipType, RefPos, RefSteps)) + continue; + + RecipType = RecipType.substr(0, RefPos); + if (RecipType.equals(VTName) || RecipType.equals(VTNameNoSize)) + return RefSteps; + } + + return TargetLoweringBase::ReciprocalEstimate::Unspecified; +} + +int TargetLoweringBase::getRecipEstimateSqrtEnabled(EVT VT, + MachineFunction &MF) const { + return getOpEnabled(true, VT, getRecipEstimateForFunc(MF)); +} + +int TargetLoweringBase::getRecipEstimateDivEnabled(EVT VT, + MachineFunction &MF) const { + return getOpEnabled(false, VT, getRecipEstimateForFunc(MF)); +} + +int TargetLoweringBase::getSqrtRefinementSteps(EVT VT, + MachineFunction &MF) const { + return getOpRefinementSteps(true, VT, getRecipEstimateForFunc(MF)); +} + +int TargetLoweringBase::getDivRefinementSteps(EVT VT, + MachineFunction &MF) const { + return getOpRefinementSteps(false, VT, getRecipEstimateForFunc(MF)); +} diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 5f814c9..eb2a28f 100644 --- a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -50,14 +50,14 @@ using namespace dwarf; //===----------------------------------------------------------------------===// MCSymbol *TargetLoweringObjectFileELF::getCFIPersonalitySymbol( - const GlobalValue *GV, Mangler &Mang, const TargetMachine &TM, + const GlobalValue *GV, const TargetMachine &TM, MachineModuleInfo *MMI) const { unsigned Encoding = getPersonalityEncoding(); if ((Encoding & 0x80) == dwarf::DW_EH_PE_indirect) return getContext().getOrCreateSymbol(StringRef("DW.ref.") + - TM.getSymbol(GV, Mang)->getName()); + TM.getSymbol(GV)->getName()); if ((Encoding & 0x70) == dwarf::DW_EH_PE_absptr) - return TM.getSymbol(GV, Mang); + return TM.getSymbol(GV); report_fatal_error("We do not support this DWARF encoding yet!"); } @@ -84,20 +84,19 @@ void TargetLoweringObjectFileELF::emitPersonalityValue( } const MCExpr *TargetLoweringObjectFileELF::getTTypeGlobalReference( - const GlobalValue *GV, unsigned Encoding, Mangler &Mang, - const TargetMachine &TM, MachineModuleInfo *MMI, - MCStreamer &Streamer) const { + const GlobalValue *GV, unsigned Encoding, const TargetMachine &TM, + MachineModuleInfo *MMI, MCStreamer &Streamer) const { if (Encoding & dwarf::DW_EH_PE_indirect) { MachineModuleInfoELF &ELFMMI = MMI->getObjFileInfo<MachineModuleInfoELF>(); - MCSymbol *SSym = getSymbolWithGlobalValueBase(GV, ".DW.stub", Mang, TM); + MCSymbol *SSym = getSymbolWithGlobalValueBase(GV, ".DW.stub", TM); // Add information about the stub reference to ELFMMI so that the stub // gets emitted by the asmprinter. MachineModuleInfoImpl::StubValueTy &StubSym = ELFMMI.getGVStubEntry(SSym); if (!StubSym.getPointer()) { - MCSymbol *Sym = TM.getSymbol(GV, Mang); + MCSymbol *Sym = TM.getSymbol(GV); StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage()); } @@ -106,8 +105,8 @@ const MCExpr *TargetLoweringObjectFileELF::getTTypeGlobalReference( Encoding & ~dwarf::DW_EH_PE_indirect, Streamer); } - return TargetLoweringObjectFile:: - getTTypeGlobalReference(GV, Encoding, Mang, TM, MMI, Streamer); + return TargetLoweringObjectFile::getTTypeGlobalReference(GV, Encoding, TM, + MMI, Streamer); } static SectionKind @@ -152,6 +151,11 @@ getELFKindForNamedSection(StringRef Name, SectionKind K) { static unsigned getELFSectionType(StringRef Name, SectionKind K) { + // Use SHT_NOTE for section whose name starts with ".note" to allow + // emitting ELF notes from C variable declaration. + // See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=77609 + if (Name.startswith(".note")) + return ELF::SHT_NOTE; if (Name == ".init_array") return ELF::SHT_INIT_ARRAY; @@ -177,6 +181,9 @@ static unsigned getELFSectionFlags(SectionKind K) { if (K.isText()) Flags |= ELF::SHF_EXECINSTR; + if (K.isExecuteOnly()) + Flags |= ELF::SHF_ARM_PURECODE; + if (K.isWriteable()) Flags |= ELF::SHF_WRITE; @@ -205,16 +212,15 @@ static const Comdat *getELFComdat(const GlobalValue *GV) { } MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal( - const GlobalValue *GV, SectionKind Kind, Mangler &Mang, - const TargetMachine &TM) const { - StringRef SectionName = GV->getSection(); + const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { + StringRef SectionName = GO->getSection(); // Infer section flags from the section name if we can. Kind = getELFKindForNamedSection(SectionName, Kind); StringRef Group = ""; unsigned Flags = getELFSectionFlags(Kind); - if (const Comdat *C = getELFComdat(GV)) { + if (const Comdat *C = getELFComdat(GO)) { Group = C->getName(); Flags |= ELF::SHF_GROUP; } @@ -243,7 +249,7 @@ static StringRef getSectionPrefixForGlobal(SectionKind Kind) { } static MCSectionELF * -selectELFSectionForGlobal(MCContext &Ctx, const GlobalValue *GV, +selectELFSectionForGlobal(MCContext &Ctx, const GlobalObject *GO, SectionKind Kind, Mangler &Mang, const TargetMachine &TM, bool EmitUniqueSection, unsigned Flags, unsigned *NextUniqueID) { @@ -271,7 +277,7 @@ selectELFSectionForGlobal(MCContext &Ctx, const GlobalValue *GV, } StringRef Group = ""; - if (const Comdat *C = getELFComdat(GV)) { + if (const Comdat *C = getELFComdat(GO)) { Flags |= ELF::SHF_GROUP; Group = C->getName(); } @@ -282,8 +288,8 @@ selectELFSectionForGlobal(MCContext &Ctx, const GlobalValue *GV, // We also need alignment here. // FIXME: this is getting the alignment of the character, not the // alignment of the global! - unsigned Align = GV->getParent()->getDataLayout().getPreferredAlignment( - cast<GlobalVariable>(GV)); + unsigned Align = GO->getParent()->getDataLayout().getPreferredAlignment( + cast<GlobalVariable>(GO)); std::string SizeSpec = ".rodata.str" + utostr(EntrySize) + "."; Name = SizeSpec + utostr(Align); @@ -293,25 +299,31 @@ selectELFSectionForGlobal(MCContext &Ctx, const GlobalValue *GV, } else { Name = getSectionPrefixForGlobal(Kind); } - // FIXME: Extend the section prefix to include hotness catagories such as .hot - // or .unlikely for functions. + + if (const auto *F = dyn_cast<Function>(GO)) { + const auto &OptionalPrefix = F->getSectionPrefix(); + if (OptionalPrefix) + Name += *OptionalPrefix; + } if (EmitUniqueSection && UniqueSectionNames) { Name.push_back('.'); - TM.getNameWithPrefix(Name, GV, Mang, true); + TM.getNameWithPrefix(Name, GO, Mang, true); } unsigned UniqueID = MCContext::GenericSectionID; if (EmitUniqueSection && !UniqueSectionNames) { UniqueID = *NextUniqueID; (*NextUniqueID)++; } + // Use 0 as the unique ID for execute-only text + if (Kind.isExecuteOnly()) + UniqueID = 0; return Ctx.getELFSection(Name, getELFSectionType(Name, Kind), Flags, EntrySize, Group, UniqueID); } MCSection *TargetLoweringObjectFileELF::SelectSectionForGlobal( - const GlobalValue *GV, SectionKind Kind, Mangler &Mang, - const TargetMachine &TM) const { + const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { unsigned Flags = getELFSectionFlags(Kind); // If we have -ffunction-section or -fdata-section then we should emit the @@ -323,14 +335,14 @@ MCSection *TargetLoweringObjectFileELF::SelectSectionForGlobal( else EmitUniqueSection = TM.getDataSections(); } - EmitUniqueSection |= GV->hasComdat(); + EmitUniqueSection |= GO->hasComdat(); - return selectELFSectionForGlobal(getContext(), GV, Kind, Mang, TM, + return selectELFSectionForGlobal(getContext(), GO, Kind, getMangler(), TM, EmitUniqueSection, Flags, &NextUniqueID); } MCSection *TargetLoweringObjectFileELF::getSectionForJumpTable( - const Function &F, Mangler &Mang, const TargetMachine &TM) const { + const Function &F, const TargetMachine &TM) const { // If the function can be removed, produce a unique section so that // the table doesn't prevent the removal. const Comdat *C = F.getComdat(); @@ -339,7 +351,7 @@ MCSection *TargetLoweringObjectFileELF::getSectionForJumpTable( return ReadOnlySection; return selectELFSectionForGlobal(getContext(), &F, SectionKind::getReadOnly(), - Mang, TM, EmitUniqueSection, ELF::SHF_ALLOC, + getMangler(), TM, EmitUniqueSection, ELF::SHF_ALLOC, &NextUniqueID); } @@ -423,7 +435,7 @@ MCSection *TargetLoweringObjectFileELF::getStaticDtorSection( } const MCExpr *TargetLoweringObjectFileELF::lowerRelativeReference( - const GlobalValue *LHS, const GlobalValue *RHS, Mangler &Mang, + const GlobalValue *LHS, const GlobalValue *RHS, const TargetMachine &TM) const { // We may only use a PLT-relative relocation to refer to unnamed_addr // functions. @@ -437,22 +449,28 @@ const MCExpr *TargetLoweringObjectFileELF::lowerRelativeReference( return nullptr; return MCBinaryExpr::createSub( - MCSymbolRefExpr::create(TM.getSymbol(LHS, Mang), PLTRelativeVariantKind, + MCSymbolRefExpr::create(TM.getSymbol(LHS), PLTRelativeVariantKind, getContext()), - MCSymbolRefExpr::create(TM.getSymbol(RHS, Mang), getContext()), - getContext()); + MCSymbolRefExpr::create(TM.getSymbol(RHS), getContext()), getContext()); } void TargetLoweringObjectFileELF::InitializeELF(bool UseInitArray_) { UseInitArray = UseInitArray_; - if (!UseInitArray) + MCContext &Ctx = getContext(); + if (!UseInitArray) { + StaticCtorSection = Ctx.getELFSection(".ctors", ELF::SHT_PROGBITS, + ELF::SHF_ALLOC | ELF::SHF_WRITE); + + StaticDtorSection = Ctx.getELFSection(".dtors", ELF::SHT_PROGBITS, + ELF::SHF_ALLOC | ELF::SHF_WRITE); return; + } - StaticCtorSection = getContext().getELFSection( - ".init_array", ELF::SHT_INIT_ARRAY, ELF::SHF_WRITE | ELF::SHF_ALLOC); - StaticDtorSection = getContext().getELFSection( - ".fini_array", ELF::SHT_FINI_ARRAY, ELF::SHF_WRITE | ELF::SHF_ALLOC); + StaticCtorSection = Ctx.getELFSection(".init_array", ELF::SHT_INIT_ARRAY, + ELF::SHF_WRITE | ELF::SHF_ALLOC); + StaticDtorSection = Ctx.getELFSection(".fini_array", ELF::SHT_FINI_ARRAY, + ELF::SHF_WRITE | ELF::SHF_ALLOC); } //===----------------------------------------------------------------------===// @@ -464,11 +482,28 @@ TargetLoweringObjectFileMachO::TargetLoweringObjectFileMachO() SupportIndirectSymViaGOTPCRel = true; } +void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx, + const TargetMachine &TM) { + TargetLoweringObjectFile::Initialize(Ctx, TM); + if (TM.getRelocationModel() == Reloc::Static) { + StaticCtorSection = Ctx.getMachOSection("__TEXT", "__constructor", 0, + SectionKind::getData()); + StaticDtorSection = Ctx.getMachOSection("__TEXT", "__destructor", 0, + SectionKind::getData()); + } else { + StaticCtorSection = Ctx.getMachOSection("__DATA", "__mod_init_func", + MachO::S_MOD_INIT_FUNC_POINTERS, + SectionKind::getData()); + StaticDtorSection = Ctx.getMachOSection("__DATA", "__mod_term_func", + MachO::S_MOD_TERM_FUNC_POINTERS, + SectionKind::getData()); + } +} + /// emitModuleFlags - Perform code emission for module flags. -void TargetLoweringObjectFileMachO:: -emitModuleFlags(MCStreamer &Streamer, - ArrayRef<Module::ModuleFlagEntry> ModuleFlags, - Mangler &Mang, const TargetMachine &TM) const { +void TargetLoweringObjectFileMachO::emitModuleFlags( + MCStreamer &Streamer, ArrayRef<Module::ModuleFlagEntry> ModuleFlags, + const TargetMachine &TM) const { unsigned VersionVal = 0; unsigned ImageInfoFlags = 0; MDNode *LinkerOptions = nullptr; @@ -542,23 +577,22 @@ static void checkMachOComdat(const GlobalValue *GV) { } MCSection *TargetLoweringObjectFileMachO::getExplicitSectionGlobal( - const GlobalValue *GV, SectionKind Kind, Mangler &Mang, - const TargetMachine &TM) const { + const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { // Parse the section specifier and create it if valid. StringRef Segment, Section; unsigned TAA = 0, StubSize = 0; bool TAAParsed; - checkMachOComdat(GV); + checkMachOComdat(GO); std::string ErrorCode = - MCSectionMachO::ParseSectionSpecifier(GV->getSection(), Segment, Section, + MCSectionMachO::ParseSectionSpecifier(GO->getSection(), Segment, Section, TAA, TAAParsed, StubSize); if (!ErrorCode.empty()) { // If invalid, report the error with report_fatal_error. - report_fatal_error("Global variable '" + GV->getName() + + report_fatal_error("Global variable '" + GO->getName() + "' has an invalid section specifier '" + - GV->getSection() + "': " + ErrorCode + "."); + GO->getSection() + "': " + ErrorCode + "."); } // Get the section. @@ -575,7 +609,7 @@ MCSection *TargetLoweringObjectFileMachO::getExplicitSectionGlobal( // to reject it here. if (S->getTypeAndAttributes() != TAA || S->getStubSize() != StubSize) { // If invalid, report the error with report_fatal_error. - report_fatal_error("Global variable '" + GV->getName() + + report_fatal_error("Global variable '" + GO->getName() + "' section type or attributes does not match previous" " section specifier"); } @@ -584,20 +618,19 @@ MCSection *TargetLoweringObjectFileMachO::getExplicitSectionGlobal( } MCSection *TargetLoweringObjectFileMachO::SelectSectionForGlobal( - const GlobalValue *GV, SectionKind Kind, Mangler &Mang, - const TargetMachine &TM) const { - checkMachOComdat(GV); + const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { + checkMachOComdat(GO); // Handle thread local data. if (Kind.isThreadBSS()) return TLSBSSSection; if (Kind.isThreadData()) return TLSDataSection; if (Kind.isText()) - return GV->isWeakForLinker() ? TextCoalSection : TextSection; + return GO->isWeakForLinker() ? TextCoalSection : TextSection; // If this is weak/linkonce, put this in a coalescable section, either in text // or data depending on if it is writable. - if (GV->isWeakForLinker()) { + if (GO->isWeakForLinker()) { if (Kind.isReadOnly()) return ConstTextCoalSection; return DataCoalSection; @@ -605,21 +638,21 @@ MCSection *TargetLoweringObjectFileMachO::SelectSectionForGlobal( // FIXME: Alignment check should be handled by section classifier. if (Kind.isMergeable1ByteCString() && - GV->getParent()->getDataLayout().getPreferredAlignment( - cast<GlobalVariable>(GV)) < 32) + GO->getParent()->getDataLayout().getPreferredAlignment( + cast<GlobalVariable>(GO)) < 32) return CStringSection; // Do not put 16-bit arrays in the UString section if they have an // externally visible label, this runs into issues with certain linker // versions. - if (Kind.isMergeable2ByteCString() && !GV->hasExternalLinkage() && - GV->getParent()->getDataLayout().getPreferredAlignment( - cast<GlobalVariable>(GV)) < 32) + if (Kind.isMergeable2ByteCString() && !GO->hasExternalLinkage() && + GO->getParent()->getDataLayout().getPreferredAlignment( + cast<GlobalVariable>(GO)) < 32) return UStringSection; // With MachO only variables whose corresponding symbol starts with 'l' or // 'L' can be merged, so we only try merging GVs with private linkage. - if (GV->hasPrivateLinkage() && Kind.isMergeableConst()) { + if (GO->hasPrivateLinkage() && Kind.isMergeableConst()) { if (Kind.isMergeableConst4()) return FourByteConstantSection; if (Kind.isMergeableConst8()) @@ -670,23 +703,21 @@ MCSection *TargetLoweringObjectFileMachO::getSectionForConstant( } const MCExpr *TargetLoweringObjectFileMachO::getTTypeGlobalReference( - const GlobalValue *GV, unsigned Encoding, Mangler &Mang, - const TargetMachine &TM, MachineModuleInfo *MMI, - MCStreamer &Streamer) const { + const GlobalValue *GV, unsigned Encoding, const TargetMachine &TM, + MachineModuleInfo *MMI, MCStreamer &Streamer) const { // The mach-o version of this method defaults to returning a stub reference. if (Encoding & DW_EH_PE_indirect) { MachineModuleInfoMachO &MachOMMI = MMI->getObjFileInfo<MachineModuleInfoMachO>(); - MCSymbol *SSym = - getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr", Mang, TM); + MCSymbol *SSym = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr", TM); // Add information about the stub reference to MachOMMI so that the stub // gets emitted by the asmprinter. MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(SSym); if (!StubSym.getPointer()) { - MCSymbol *Sym = TM.getSymbol(GV, Mang); + MCSymbol *Sym = TM.getSymbol(GV); StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage()); } @@ -695,24 +726,24 @@ const MCExpr *TargetLoweringObjectFileMachO::getTTypeGlobalReference( Encoding & ~dwarf::DW_EH_PE_indirect, Streamer); } - return TargetLoweringObjectFile::getTTypeGlobalReference(GV, Encoding, Mang, - TM, MMI, Streamer); + return TargetLoweringObjectFile::getTTypeGlobalReference(GV, Encoding, TM, + MMI, Streamer); } MCSymbol *TargetLoweringObjectFileMachO::getCFIPersonalitySymbol( - const GlobalValue *GV, Mangler &Mang, const TargetMachine &TM, + const GlobalValue *GV, const TargetMachine &TM, MachineModuleInfo *MMI) const { // The mach-o version of this method defaults to returning a stub reference. MachineModuleInfoMachO &MachOMMI = MMI->getObjFileInfo<MachineModuleInfoMachO>(); - MCSymbol *SSym = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr", Mang, TM); + MCSymbol *SSym = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr", TM); // Add information about the stub reference to MachOMMI so that the stub // gets emitted by the asmprinter. MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(SSym); if (!StubSym.getPointer()) { - MCSymbol *Sym = TM.getSymbol(GV, Mang); + MCSymbol *Sym = TM.getSymbol(GV); StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage()); } @@ -793,13 +824,16 @@ static bool canUsePrivateLabel(const MCAsmInfo &AsmInfo, } void TargetLoweringObjectFileMachO::getNameWithPrefix( - SmallVectorImpl<char> &OutName, const GlobalValue *GV, Mangler &Mang, + SmallVectorImpl<char> &OutName, const GlobalValue *GV, const TargetMachine &TM) const { - SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM); - const MCSection *TheSection = SectionForGlobal(GV, GVKind, Mang, TM); - bool CannotUsePrivateLabel = - !canUsePrivateLabel(*TM.getMCAsmInfo(), *TheSection); - Mang.getNameWithPrefix(OutName, GV, CannotUsePrivateLabel); + bool CannotUsePrivateLabel = true; + if (auto *GO = GV->getBaseObject()) { + SectionKind GOKind = TargetLoweringObjectFile::getKindForGlobal(GO, TM); + const MCSection *TheSection = SectionForGlobal(GO, GOKind, TM); + CannotUsePrivateLabel = + !canUsePrivateLabel(*TM.getMCAsmInfo(), *TheSection); + } + getMangler().getNameWithPrefix(OutName, GV, CannotUsePrivateLabel); } //===----------------------------------------------------------------------===// @@ -886,22 +920,21 @@ static int getSelectionForCOFF(const GlobalValue *GV) { } MCSection *TargetLoweringObjectFileCOFF::getExplicitSectionGlobal( - const GlobalValue *GV, SectionKind Kind, Mangler &Mang, - const TargetMachine &TM) const { + const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { int Selection = 0; unsigned Characteristics = getCOFFSectionFlags(Kind, TM); - StringRef Name = GV->getSection(); + StringRef Name = GO->getSection(); StringRef COMDATSymName = ""; - if (GV->hasComdat()) { - Selection = getSelectionForCOFF(GV); + if (GO->hasComdat()) { + Selection = getSelectionForCOFF(GO); const GlobalValue *ComdatGV; if (Selection == COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE) - ComdatGV = getComdatGVForCOFF(GV); + ComdatGV = getComdatGVForCOFF(GO); else - ComdatGV = GV; + ComdatGV = GO; if (!ComdatGV->hasPrivateLinkage()) { - MCSymbol *Sym = TM.getSymbol(ComdatGV, Mang); + MCSymbol *Sym = TM.getSymbol(ComdatGV); COMDATSymName = Sym->getName(); Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT; } else { @@ -926,8 +959,7 @@ static const char *getCOFFSectionNameForUniqueGlobal(SectionKind Kind) { } MCSection *TargetLoweringObjectFileCOFF::SelectSectionForGlobal( - const GlobalValue *GV, SectionKind Kind, Mangler &Mang, - const TargetMachine &TM) const { + const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { // If we have -ffunction-sections then we should emit the global value to a // uniqued section specifically for it. bool EmitUniquedSection; @@ -936,32 +968,32 @@ MCSection *TargetLoweringObjectFileCOFF::SelectSectionForGlobal( else EmitUniquedSection = TM.getDataSections(); - if ((EmitUniquedSection && !Kind.isCommon()) || GV->hasComdat()) { + if ((EmitUniquedSection && !Kind.isCommon()) || GO->hasComdat()) { const char *Name = getCOFFSectionNameForUniqueGlobal(Kind); unsigned Characteristics = getCOFFSectionFlags(Kind, TM); Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT; - int Selection = getSelectionForCOFF(GV); + int Selection = getSelectionForCOFF(GO); if (!Selection) Selection = COFF::IMAGE_COMDAT_SELECT_NODUPLICATES; const GlobalValue *ComdatGV; - if (GV->hasComdat()) - ComdatGV = getComdatGVForCOFF(GV); + if (GO->hasComdat()) + ComdatGV = getComdatGVForCOFF(GO); else - ComdatGV = GV; + ComdatGV = GO; unsigned UniqueID = MCContext::GenericSectionID; if (EmitUniquedSection) UniqueID = NextUniqueID++; if (!ComdatGV->hasPrivateLinkage()) { - MCSymbol *Sym = TM.getSymbol(ComdatGV, Mang); + MCSymbol *Sym = TM.getSymbol(ComdatGV); StringRef COMDATSymName = Sym->getName(); return getContext().getCOFFSection(Name, Characteristics, Kind, COMDATSymName, Selection, UniqueID); } else { SmallString<256> TmpData; - Mang.getNameWithPrefix(TmpData, GV, /*CannotUsePrivateLabel=*/true); + getMangler().getNameWithPrefix(TmpData, GO, /*CannotUsePrivateLabel=*/true); return getContext().getCOFFSection(Name, Characteristics, Kind, TmpData, Selection, UniqueID); } @@ -986,7 +1018,7 @@ MCSection *TargetLoweringObjectFileCOFF::SelectSectionForGlobal( } void TargetLoweringObjectFileCOFF::getNameWithPrefix( - SmallVectorImpl<char> &OutName, const GlobalValue *GV, Mangler &Mang, + SmallVectorImpl<char> &OutName, const GlobalValue *GV, const TargetMachine &TM) const { bool CannotUsePrivateLabel = false; if (GV->hasPrivateLinkage() && @@ -994,11 +1026,11 @@ void TargetLoweringObjectFileCOFF::getNameWithPrefix( (isa<GlobalVariable>(GV) && TM.getDataSections()))) CannotUsePrivateLabel = true; - Mang.getNameWithPrefix(OutName, GV, CannotUsePrivateLabel); + getMangler().getNameWithPrefix(OutName, GV, CannotUsePrivateLabel); } MCSection *TargetLoweringObjectFileCOFF::getSectionForJumpTable( - const Function &F, Mangler &Mang, const TargetMachine &TM) const { + const Function &F, const TargetMachine &TM) const { // If the function can be removed, produce a unique section so that // the table doesn't prevent the removal. const Comdat *C = F.getComdat(); @@ -1010,7 +1042,7 @@ MCSection *TargetLoweringObjectFileCOFF::getSectionForJumpTable( if (F.hasPrivateLinkage()) return ReadOnlySection; - MCSymbol *Sym = TM.getSymbol(&F, Mang); + MCSymbol *Sym = TM.getSymbol(&F); StringRef COMDATSymName = Sym->getName(); SectionKind Kind = SectionKind::getReadOnly(); @@ -1023,10 +1055,9 @@ MCSection *TargetLoweringObjectFileCOFF::getSectionForJumpTable( COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE, UniqueID); } -void TargetLoweringObjectFileCOFF:: -emitModuleFlags(MCStreamer &Streamer, - ArrayRef<Module::ModuleFlagEntry> ModuleFlags, - Mangler &Mang, const TargetMachine &TM) const { +void TargetLoweringObjectFileCOFF::emitModuleFlags( + MCStreamer &Streamer, ArrayRef<Module::ModuleFlagEntry> ModuleFlags, + const TargetMachine &TM) const { MDNode *LinkerOptions = nullptr; for (const auto &MFE : ModuleFlags) { @@ -1052,6 +1083,31 @@ emitModuleFlags(MCStreamer &Streamer, } } +void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx, + const TargetMachine &TM) { + TargetLoweringObjectFile::Initialize(Ctx, TM); + const Triple &T = TM.getTargetTriple(); + if (T.isKnownWindowsMSVCEnvironment() || T.isWindowsItaniumEnvironment()) { + StaticCtorSection = + Ctx.getCOFFSection(".CRT$XCU", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getReadOnly()); + StaticDtorSection = + Ctx.getCOFFSection(".CRT$XTX", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getReadOnly()); + } else { + StaticCtorSection = Ctx.getCOFFSection( + ".ctors", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ | COFF::IMAGE_SCN_MEM_WRITE, + SectionKind::getData()); + StaticDtorSection = Ctx.getCOFFSection( + ".dtors", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ | COFF::IMAGE_SCN_MEM_WRITE, + SectionKind::getData()); + } +} + MCSection *TargetLoweringObjectFileCOFF::getStaticCtorSection( unsigned Priority, const MCSymbol *KeySym) const { return getContext().getAssociativeCOFFSection( @@ -1065,7 +1121,7 @@ MCSection *TargetLoweringObjectFileCOFF::getStaticDtorSection( } void TargetLoweringObjectFileCOFF::emitLinkerFlagsForGlobal( - raw_ostream &OS, const GlobalValue *GV, const Mangler &Mang) const { + raw_ostream &OS, const GlobalValue *GV) const { if (!GV->hasDLLExportStorageClass() || GV->isDeclaration()) return; @@ -1079,14 +1135,14 @@ void TargetLoweringObjectFileCOFF::emitLinkerFlagsForGlobal( if (TT.isWindowsGNUEnvironment() || TT.isWindowsCygwinEnvironment()) { std::string Flag; raw_string_ostream FlagOS(Flag); - Mang.getNameWithPrefix(FlagOS, GV, false); + getMangler().getNameWithPrefix(FlagOS, GV, false); FlagOS.flush(); if (Flag[0] == GV->getParent()->getDataLayout().getGlobalPrefix()) OS << Flag.substr(1); else OS << Flag; } else { - Mang.getNameWithPrefix(OS, GV, false); + getMangler().getNameWithPrefix(OS, GV, false); } if (!GV->getValueType()->isFunctionTy()) { diff --git a/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp b/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp index 8d2048f..b6da8e0 100644 --- a/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp +++ b/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp @@ -29,7 +29,7 @@ bool TargetOptions::DisableFramePointerElim(const MachineFunction &MF) const { // Check to see if we should eliminate non-leaf frame pointers. if (MF.getFunction()->hasFnAttribute("no-frame-pointer-elim-non-leaf")) - return MF.getFrameInfo()->hasCalls(); + return MF.getFrameInfo().hasCalls(); return false; } diff --git a/contrib/llvm/lib/CodeGen/TargetPassConfig.cpp b/contrib/llvm/lib/CodeGen/TargetPassConfig.cpp index b8c8209..e7ea2b4 100644 --- a/contrib/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/contrib/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -38,8 +38,8 @@ using namespace llvm; -static cl::opt<bool> DisablePostRA("disable-post-ra", cl::Hidden, - cl::desc("Disable Post Regalloc")); +static cl::opt<bool> DisablePostRASched("disable-post-ra", cl::Hidden, + cl::desc("Disable Post Regalloc Scheduler")); static cl::opt<bool> DisableBranchFold("disable-branch-fold", cl::Hidden, cl::desc("Disable branch folding")); static cl::opt<bool> DisableTailDuplicate("disable-tail-duplicate", cl::Hidden, @@ -98,6 +98,14 @@ PrintMachineInstrs("print-machineinstrs", cl::ValueOptional, cl::desc("Print machine instrs"), cl::value_desc("pass-name"), cl::init("option-unspecified")); +static cl::opt<int> EnableGlobalISelAbort( + "global-isel-abort", cl::Hidden, + cl::desc("Enable abort calls when \"global\" instruction selection " + "fails to lower/select an instruction: 0 disable the abort, " + "1 enable the abort, and " + "2 disable the abort but emit a diagnostic on failure"), + cl::init(1)); + // Temporary option to allow experimenting with MachineScheduler as a post-RA // scheduler. Targets can "properly" enable this with // substitutePass(&PostRASchedulerID, &PostMachineSchedulerID). @@ -121,8 +129,7 @@ static cl::opt<CFLAAType> UseCFLAA( clEnumValN(CFLAAType::Andersen, "anders", "Enable inclusion-based CFL-AA"), clEnumValN(CFLAAType::Both, "both", - "Enable both variants of CFL-AA"), - clEnumValEnd)); + "Enable both variants of CFL-AA"))); /// Allow standard passes to be disabled by command line options. This supports /// simple binary flags that either suppress the pass or do nothing. @@ -150,7 +157,7 @@ static IdentifyingPassPtr applyDisable(IdentifyingPassPtr PassID, static IdentifyingPassPtr overridePass(AnalysisID StandardID, IdentifyingPassPtr TargetID) { if (StandardID == &PostRASchedulerID) - return applyDisable(TargetID, DisablePostRA); + return applyDisable(TargetID, DisablePostRASched); if (StandardID == &BranchFolderPassID) return applyDisable(TargetID, DisableBranchFold); @@ -252,8 +259,7 @@ TargetPassConfig::~TargetPassConfig() { // Out of line constructor provides default values for pass options and // registers all common codegen passes. TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm) - : ImmutablePass(ID), PM(&pm), StartBefore(nullptr), StartAfter(nullptr), - StopAfter(nullptr), Started(true), Stopped(false), + : ImmutablePass(ID), PM(&pm), Started(true), Stopped(false), AddingMachinePasses(false), TM(tm), Impl(nullptr), Initialized(false), DisableVerify(false), EnableTailMerge(true) { @@ -347,6 +353,8 @@ void TargetPassConfig::addPass(Pass *P, bool verifyAfter, bool printAfter) { if (StartBefore == PassID) Started = true; + if (StopBefore == PassID) + Stopped = true; if (Started && !Stopped) { std::string Banner; // Construct banner message before PM->add() as that may delete the pass. @@ -469,12 +477,17 @@ void TargetPassConfig::addIRPasses() { if (getOptLevel() != CodeGenOpt::None && !DisablePartialLibcallInlining) addPass(createPartiallyInlineLibCallsPass()); + + // Insert calls to mcount-like functions. + addPass(createCountingFunctionInserterPass()); } /// Turn exception handling constructs into something the code generators can /// handle. void TargetPassConfig::addPassesToHandleExceptions() { - switch (TM->getMCAsmInfo()->getExceptionHandlingType()) { + const MCAsmInfo *MCAI = TM->getMCAsmInfo(); + assert(MCAI && "No MCAsmInfo"); + switch (MCAI->getExceptionHandlingType()) { case ExceptionHandling::SjLj: // SjLj piggy-backs on dwarf for this bit. The cleanups done apply to both // Dwarf EH prepare needs to be run after SjLj prepare. Otherwise, @@ -483,7 +496,7 @@ void TargetPassConfig::addPassesToHandleExceptions() { // pad is shared by multiple invokes and is also a target of a normal // edge from elsewhere. addPass(createSjLjEHPreparePass()); - // FALLTHROUGH + LLVM_FALLTHROUGH; case ExceptionHandling::DwarfCFI: case ExceptionHandling::ARM: addPass(createDwarfEHPass(TM)); @@ -557,9 +570,6 @@ void TargetPassConfig::addISelPrepare() { void TargetPassConfig::addMachinePasses() { AddingMachinePasses = true; - if (TM->Options.EnableIPRA) - addPass(createRegUsageInfoPropPass()); - // Insert a machine instr printer pass after the specified pass. if (!StringRef(PrintMachineInstrs.getValue()).equals("") && !StringRef(PrintMachineInstrs.getValue()).equals("option-unspecified")) { @@ -575,6 +585,9 @@ void TargetPassConfig::addMachinePasses() { // Print the instruction selected machine code... printAndVerify("After Instruction Selection"); + if (TM->Options.EnableIPRA) + addPass(createRegUsageInfoPropPass()); + // Expand pseudo-instructions emitted by ISel. addPass(&ExpandISelPseudosID); @@ -886,3 +899,14 @@ void TargetPassConfig::addBlockPlacement() { addPass(&MachineBlockPlacementStatsID); } } + +//===---------------------------------------------------------------------===// +/// GlobalISel Configuration +//===---------------------------------------------------------------------===// +bool TargetPassConfig::isGlobalISelAbortEnabled() const { + return EnableGlobalISelAbort == 1; +} + +bool TargetPassConfig::reportDiagnosticWhenGlobalISelFallback() const { + return EnableGlobalISelAbort == 2; +} diff --git a/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp b/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp index e1d90cb..cd50c5b 100644 --- a/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp +++ b/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp @@ -30,8 +30,8 @@ using namespace llvm; TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterInfoDesc *ID, regclass_iterator RCB, regclass_iterator RCE, const char *const *SRINames, - const unsigned *SRILaneMasks, - unsigned SRICoveringLanes) + const LaneBitmask *SRILaneMasks, + LaneBitmask SRICoveringLanes) : InfoDesc(ID), SubRegIndexNames(SRINames), SubRegIndexLaneMasks(SRILaneMasks), RegClassBegin(RCB), RegClassEnd(RCE), @@ -40,6 +40,36 @@ TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterInfoDesc *ID, TargetRegisterInfo::~TargetRegisterInfo() {} +void TargetRegisterInfo::markSuperRegs(BitVector &RegisterSet, unsigned Reg) + const { + for (MCSuperRegIterator AI(Reg, this, true); AI.isValid(); ++AI) + RegisterSet.set(*AI); +} + +bool TargetRegisterInfo::checkAllSuperRegsMarked(const BitVector &RegisterSet, + ArrayRef<MCPhysReg> Exceptions) const { + // Check that all super registers of reserved regs are reserved as well. + BitVector Checked(getNumRegs()); + for (int Reg = RegisterSet.find_first(); Reg>=0; + Reg = RegisterSet.find_next(Reg)) { + if (Checked[Reg]) + continue; + for (MCSuperRegIterator SR(Reg, this); SR.isValid(); ++SR) { + if (!RegisterSet[*SR] && !is_contained(Exceptions, Reg)) { + dbgs() << "Error: Super register " << PrintReg(*SR, this) + << " of reserved register " << PrintReg(Reg, this) + << " is not reserved.\n"; + return false; + } + + // We transitively check superregs. So we can remember this for later + // to avoid compiletime explosion in deep register hierarchies. + Checked.set(*SR); + } + } + return true; +} + namespace llvm { Printable PrintReg(unsigned Reg, const TargetRegisterInfo *TRI, @@ -97,12 +127,6 @@ Printable PrintVRegOrUnit(unsigned Unit, const TargetRegisterInfo *TRI) { }); } -Printable PrintLaneMask(LaneBitmask LaneMask) { - return Printable([LaneMask](raw_ostream &OS) { - OS << format("%08X", LaneMask); - }); -} - } // End of llvm namespace /// getAllocatableClass - Return the maximal subclass of the given register @@ -354,7 +378,7 @@ TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg, // Check that Phys is in the allocation order. We shouldn't heed hints // from VirtReg's register class if they aren't in the allocation order. The // target probably has a reason for removing the register. - if (std::find(Order.begin(), Order.end(), Phys) == Order.end()) + if (!is_contained(Order, Phys)) return; // All clear, tell the register allocator to prefer this register. @@ -367,11 +391,11 @@ bool TargetRegisterInfo::canRealignStack(const MachineFunction &MF) const { bool TargetRegisterInfo::needsStackRealignment( const MachineFunction &MF) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); + const MachineFrameInfo &MFI = MF.getFrameInfo(); const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); const Function *F = MF.getFunction(); unsigned StackAlign = TFI->getStackAlignment(); - bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) || + bool requiresRealignment = ((MFI.getMaxAlignment() > StackAlign) || F->hasFnAttribute(Attribute::StackAlignment)); if (MF.getFunction()->hasFnAttribute("stackrealign") || requiresRealignment) { if (canRealignStack(MF)) diff --git a/contrib/llvm/lib/CodeGen/TargetSchedule.cpp b/contrib/llvm/lib/CodeGen/TargetSchedule.cpp index 022e912..83e52d3 100644 --- a/contrib/llvm/lib/CodeGen/TargetSchedule.cpp +++ b/contrib/llvm/lib/CodeGen/TargetSchedule.cpp @@ -144,7 +144,7 @@ static unsigned findUseIdx(const MachineInstr *MI, unsigned UseOperIdx) { unsigned UseIdx = 0; for (unsigned i = 0; i != UseOperIdx; ++i) { const MachineOperand &MO = MI->getOperand(i); - if (MO.isReg() && MO.readsReg()) + if (MO.isReg() && MO.readsReg() && !MO.isDef()) ++UseIdx; } return UseIdx; diff --git a/contrib/llvm/lib/CodeGen/TargetSubtargetInfo.cpp b/contrib/llvm/lib/CodeGen/TargetSubtargetInfo.cpp new file mode 100644 index 0000000..c74707d --- /dev/null +++ b/contrib/llvm/lib/CodeGen/TargetSubtargetInfo.cpp @@ -0,0 +1,54 @@ +//===-- TargetSubtargetInfo.cpp - General Target Information ---------------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file This file describes the general parts of a Subtarget. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Target/TargetSubtargetInfo.h" +using namespace llvm; + +//--------------------------------------------------------------------------- +// TargetSubtargetInfo Class +// +TargetSubtargetInfo::TargetSubtargetInfo( + const Triple &TT, StringRef CPU, StringRef FS, + ArrayRef<SubtargetFeatureKV> PF, ArrayRef<SubtargetFeatureKV> PD, + const SubtargetInfoKV *ProcSched, const MCWriteProcResEntry *WPR, + const MCWriteLatencyEntry *WL, const MCReadAdvanceEntry *RA, + const InstrStage *IS, const unsigned *OC, const unsigned *FP) + : MCSubtargetInfo(TT, CPU, FS, PF, PD, ProcSched, WPR, WL, RA, IS, OC, FP) { +} + +TargetSubtargetInfo::~TargetSubtargetInfo() {} + +bool TargetSubtargetInfo::enableAtomicExpand() const { + return true; +} + +bool TargetSubtargetInfo::enableMachineScheduler() const { + return false; +} + +bool TargetSubtargetInfo::enableJoinGlobalCopies() const { + return enableMachineScheduler(); +} + +bool TargetSubtargetInfo::enableRALocalReassignment( + CodeGenOpt::Level OptLevel) const { + return true; +} + +bool TargetSubtargetInfo::enablePostRAScheduler() const { + return getSchedModel().PostRAScheduler; +} + +bool TargetSubtargetInfo::useAA() const { + return false; +} diff --git a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp index 8feb18b..0f1b2ed 100644 --- a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -109,7 +109,7 @@ class TwoAddressInstructionPass : public MachineFunctionPass { bool isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC, MachineInstr *MI, unsigned Dist); - bool commuteInstruction(MachineInstr *MI, + bool commuteInstruction(MachineInstr *MI, unsigned DstIdx, unsigned RegBIdx, unsigned RegCIdx, unsigned Dist); bool isProfitableToConv3Addr(unsigned RegA, unsigned RegB); @@ -651,6 +651,7 @@ isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC, /// Commute a two-address instruction and update the basic block, distance map, /// and live variables if needed. Return true if it is successful. bool TwoAddressInstructionPass::commuteInstruction(MachineInstr *MI, + unsigned DstIdx, unsigned RegBIdx, unsigned RegCIdx, unsigned Dist) { @@ -671,7 +672,7 @@ bool TwoAddressInstructionPass::commuteInstruction(MachineInstr *MI, // Update source register map. unsigned FromRegC = getMappedReg(RegC, SrcRegMap); if (FromRegC) { - unsigned RegA = MI->getOperand(0).getReg(); + unsigned RegA = MI->getOperand(DstIdx).getReg(); SrcRegMap[RegA] = FromRegC; } @@ -1171,6 +1172,9 @@ bool TwoAddressInstructionPass::tryInstructionCommute(MachineInstr *MI, unsigned BaseOpIdx, bool BaseOpKilled, unsigned Dist) { + if (!MI->isCommutable()) + return false; + unsigned DstOpReg = MI->getOperand(DstOpIdx).getReg(); unsigned BaseOpReg = MI->getOperand(BaseOpIdx).getReg(); unsigned OpsNum = MI->getDesc().getNumOperands(); @@ -1180,7 +1184,7 @@ bool TwoAddressInstructionPass::tryInstructionCommute(MachineInstr *MI, // and OtherOpIdx are commutable, it does not really search for // other commutable operands and does not change the values of passed // variables. - if (OtherOpIdx == BaseOpIdx || + if (OtherOpIdx == BaseOpIdx || !MI->getOperand(OtherOpIdx).isReg() || !TII->findCommutedOpIndices(*MI, BaseOpIdx, OtherOpIdx)) continue; @@ -1199,7 +1203,8 @@ bool TwoAddressInstructionPass::tryInstructionCommute(MachineInstr *MI, } // If it's profitable to commute, try to do so. - if (DoCommute && commuteInstruction(MI, BaseOpIdx, OtherOpIdx, Dist)) { + if (DoCommute && commuteInstruction(MI, DstOpIdx, BaseOpIdx, OtherOpIdx, + Dist)) { ++NumCommuted; if (AggressiveCommute) ++NumAggrCommuted; @@ -1567,14 +1572,14 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, if (!IsEarlyClobber) { // Replace other (un-tied) uses of regB with LastCopiedReg. for (MachineOperand &MO : MI->operands()) { - if (MO.isReg() && MO.getReg() == RegB && MO.getSubReg() == SubRegB && + if (MO.isReg() && MO.getReg() == RegB && MO.isUse()) { if (MO.isKill()) { MO.setIsKill(false); RemovedKillFlag = true; } MO.setReg(LastCopiedReg); - MO.setSubReg(0); + MO.setSubReg(MO.getSubReg()); } } } diff --git a/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp b/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp index 501e01c..c2db56a 100644 --- a/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp +++ b/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp @@ -40,7 +40,7 @@ using namespace llvm; static bool eliminateUnreachableBlock(Function &F) { - SmallPtrSet<BasicBlock*, 8> Reachable; + df_iterator_default_set<BasicBlock*> Reachable; // Mark all reachable blocks. for (BasicBlock *BB : depth_first_ext(&F, Reachable)) @@ -130,7 +130,7 @@ void UnreachableMachineBlockElim::getAnalysisUsage(AnalysisUsage &AU) const { } bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) { - SmallPtrSet<MachineBasicBlock*, 8> Reachable; + df_iterator_default_set<MachineBasicBlock*> Reachable; bool ModifiedPHI = false; MMI = getAnalysisIfAvailable<MachineModuleInfo>(); diff --git a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp index 8a3a032..0d506d6 100644 --- a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp +++ b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp @@ -73,8 +73,8 @@ void VirtRegMap::grow() { } unsigned VirtRegMap::createSpillSlot(const TargetRegisterClass *RC) { - int SS = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(), - RC->getAlignment()); + int SS = MF->getFrameInfo().CreateSpillStackObject(RC->getSize(), + RC->getAlignment()); ++NumSpillSlots; return SS; } @@ -110,7 +110,7 @@ void VirtRegMap::assignVirt2StackSlot(unsigned virtReg, int SS) { assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT && "attempt to assign stack slot to already spilled register"); assert((SS >= 0 || - (SS >= MF->getFrameInfo()->getObjectIndexBegin())) && + (SS >= MF->getFrameInfo().getObjectIndexBegin())) && "illegal fixed frame index"); Virt2StackSlotMap[virtReg] = SS; } @@ -177,7 +177,7 @@ public: bool runOnMachineFunction(MachineFunction&) override; MachineFunctionProperties getSetProperties() const override { return MachineFunctionProperties().set( - MachineFunctionProperties::Property::AllVRegsAllocated); + MachineFunctionProperties::Property::NoVRegs); } }; } // end anonymous namespace @@ -266,7 +266,7 @@ void VirtRegRewriter::addLiveInsForSubRanges(const LiveInterval &LI, SlotIndex MBBBegin = MBBI->first; // Advance all subrange iterators so that their end position is just // behind MBBBegin (or the iterator is at the end). - LaneBitmask LaneMask = 0; + LaneBitmask LaneMask; for (auto &RangeIterPair : SubRanges) { const LiveInterval::SubRange *SR = RangeIterPair.first; LiveInterval::const_iterator &SRI = RangeIterPair.second; @@ -277,7 +277,7 @@ void VirtRegRewriter::addLiveInsForSubRanges(const LiveInterval &LI, if (SRI->start <= MBBBegin) LaneMask |= SR->LaneMask; } - if (LaneMask == 0) + if (LaneMask.none()) continue; MachineBasicBlock *MBB = MBBI->second; MBB->addLiveIn(PhysReg, LaneMask); @@ -338,10 +338,11 @@ bool VirtRegRewriter::readsUndefSubreg(const MachineOperand &MO) const { assert(LI.liveAt(BaseIndex) && "Reads of completely dead register should be marked undef already"); unsigned SubRegIdx = MO.getSubReg(); + assert(SubRegIdx != 0 && LI.hasSubRanges()); LaneBitmask UseMask = TRI->getSubRegIndexLaneMask(SubRegIdx); // See if any of the relevant subregister liveranges is defined at this point. for (const LiveInterval::SubRange &SR : LI.subranges()) { - if ((SR.LaneMask & UseMask) != 0 && SR.liveAt(BaseIndex)) + if ((SR.LaneMask & UseMask).any() && SR.liveAt(BaseIndex)) return false; } return true; diff --git a/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp b/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp index 041fb7b..568720c 100644 --- a/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp +++ b/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp @@ -62,7 +62,7 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override; - const char *getPassName() const override { + StringRef getPassName() const override { return "Windows exception handling preparation"; } @@ -521,7 +521,7 @@ void llvm::calculateClrEHStateNumbers(const Function *Fn, if (const auto *Cleanup = dyn_cast<CleanupPadInst>(Pad)) { // Create the entry for this cleanup with the appropriate handler - // properties. Finaly and fault handlers are distinguished by arity. + // properties. Finally and fault handlers are distinguished by arity. ClrHandlerType HandlerType = (Cleanup->getNumArgOperands() ? ClrHandlerType::Fault : ClrHandlerType::Finally); @@ -708,7 +708,7 @@ void WinEHPrepare::demotePHIsOnFunclets(Function &F) { void WinEHPrepare::cloneCommonBlocks(Function &F) { // We need to clone all blocks which belong to multiple funclets. Values are - // remapped throughout the funclet to propogate both the new instructions + // remapped throughout the funclet to propagate both the new instructions // *and* the new basic blocks themselves. for (auto &Funclets : FuncletBlocks) { BasicBlock *FuncletPadBB = Funclets.first; @@ -1202,8 +1202,12 @@ void WinEHPrepare::replaceUseWithLoad(Value *V, Use &U, AllocaInst *&SpillSlot, Goto->setSuccessor(0, PHIBlock); CatchRet->setSuccessor(NewBlock); // Update the color mapping for the newly split edge. + // Grab a reference to the ColorVector to be inserted before getting the + // reference to the vector we are copying because inserting the new + // element in BlockColors might cause the map to be reallocated. + ColorVector &ColorsForNewBlock = BlockColors[NewBlock]; ColorVector &ColorsForPHIBlock = BlockColors[PHIBlock]; - BlockColors[NewBlock] = ColorsForPHIBlock; + ColorsForNewBlock = ColorsForPHIBlock; for (BasicBlock *FuncletPad : ColorsForPHIBlock) FuncletBlocks[FuncletPad].push_back(NewBlock); // Treat the new block as incoming for load insertion. diff --git a/contrib/llvm/lib/CodeGen/XRayInstrumentation.cpp b/contrib/llvm/lib/CodeGen/XRayInstrumentation.cpp index 1f95708..63bd762 100644 --- a/contrib/llvm/lib/CodeGen/XRayInstrumentation.cpp +++ b/contrib/llvm/lib/CodeGen/XRayInstrumentation.cpp @@ -34,7 +34,82 @@ struct XRayInstrumentation : public MachineFunctionPass { } bool runOnMachineFunction(MachineFunction &MF) override; + +private: + // Replace the original RET instruction with the exit sled code ("patchable + // ret" pseudo-instruction), so that at runtime XRay can replace the sled + // with a code jumping to XRay trampoline, which calls the tracing handler + // and, in the end, issues the RET instruction. + // This is the approach to go on CPUs which have a single RET instruction, + // like x86/x86_64. + void replaceRetWithPatchableRet(MachineFunction &MF, + const TargetInstrInfo *TII); + + // Prepend the original return instruction with the exit sled code ("patchable + // function exit" pseudo-instruction), preserving the original return + // instruction just after the exit sled code. + // This is the approach to go on CPUs which have multiple options for the + // return instruction, like ARM. For such CPUs we can't just jump into the + // XRay trampoline and issue a single return instruction there. We rather + // have to call the trampoline and return from it to the original return + // instruction of the function being instrumented. + void prependRetWithPatchableExit(MachineFunction &MF, + const TargetInstrInfo *TII); }; +} // anonymous namespace + +void XRayInstrumentation::replaceRetWithPatchableRet(MachineFunction &MF, + const TargetInstrInfo *TII) +{ + // We look for *all* terminators and returns, then replace those with + // PATCHABLE_RET instructions. + SmallVector<MachineInstr *, 4> Terminators; + for (auto &MBB : MF) { + for (auto &T : MBB.terminators()) { + unsigned Opc = 0; + if (T.isReturn() && T.getOpcode() == TII->getReturnOpcode()) { + // Replace return instructions with: + // PATCHABLE_RET <Opcode>, <Operand>... + Opc = TargetOpcode::PATCHABLE_RET; + } + if (TII->isTailCall(T)) { + // Treat the tail call as a return instruction, which has a + // different-looking sled than the normal return case. + Opc = TargetOpcode::PATCHABLE_TAIL_CALL; + } + if (Opc != 0) { + auto MIB = BuildMI(MBB, T, T.getDebugLoc(), TII->get(Opc)) + .addImm(T.getOpcode()); + for (auto &MO : T.operands()) + MIB.addOperand(MO); + Terminators.push_back(&T); + } + } + } + + for (auto &I : Terminators) + I->eraseFromParent(); +} + +void XRayInstrumentation::prependRetWithPatchableExit(MachineFunction &MF, + const TargetInstrInfo *TII) +{ + for (auto &MBB : MF) { + for (auto &T : MBB.terminators()) { + unsigned Opc = 0; + if (T.isReturn()) { + Opc = TargetOpcode::PATCHABLE_FUNCTION_EXIT; + } + if (TII->isTailCall(T)) { + Opc = TargetOpcode::PATCHABLE_TAIL_CALL; + } + if (Opc != 0) { + // Prepend the return instruction with PATCHABLE_FUNCTION_EXIT or + // PATCHABLE_TAIL_CALL . + BuildMI(MBB, T, T.getDebugLoc(),TII->get(Opc)); + } + } + } } bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) { @@ -54,39 +129,43 @@ bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) { return false; // Function is too small. } + // We look for the first non-empty MachineBasicBlock, so that we can insert + // the function instrumentation in the appropriate place. + auto MBI = + find_if(MF, [&](const MachineBasicBlock &MBB) { return !MBB.empty(); }); + if (MBI == MF.end()) + return false; // The function is empty. + + auto *TII = MF.getSubtarget().getInstrInfo(); + auto &FirstMBB = *MBI; + auto &FirstMI = *FirstMBB.begin(); + + if (!MF.getSubtarget().isXRaySupported()) { + FirstMI.emitError("An attempt to perform XRay instrumentation for an" + " unsupported target."); + return false; + } + // FIXME: Do the loop triviality analysis here or in an earlier pass. // First, insert an PATCHABLE_FUNCTION_ENTER as the first instruction of the // MachineFunction. - auto &FirstMBB = *MF.begin(); - auto &FirstMI = *FirstMBB.begin(); - auto *TII = MF.getSubtarget().getInstrInfo(); BuildMI(FirstMBB, FirstMI, FirstMI.getDebugLoc(), TII->get(TargetOpcode::PATCHABLE_FUNCTION_ENTER)); - // Then we look for *all* terminators and returns, then replace those with - // PATCHABLE_RET instructions. - SmallVector<MachineInstr *, 4> Terminators; - for (auto &MBB : MF) { - for (auto &T : MBB.terminators()) { - // FIXME: Handle tail calls here too? - if (T.isReturn() && T.getOpcode() == TII->getReturnOpcode()) { - // Replace return instructions with: - // PATCHABLE_RET <Opcode>, <Operand>... - auto MIB = BuildMI(MBB, T, T.getDebugLoc(), - TII->get(TargetOpcode::PATCHABLE_RET)) - .addImm(T.getOpcode()); - for (auto &MO : T.operands()) - MIB.addOperand(MO); - Terminators.push_back(&T); - break; - } - } + switch (MF.getTarget().getTargetTriple().getArch()) { + case Triple::ArchType::arm: + case Triple::ArchType::thumb: + case Triple::ArchType::aarch64: + // For the architectures which don't have a single return instruction + prependRetWithPatchableExit(MF, TII); + break; + default: + // For the architectures that have a single return instruction (such as + // RETQ on x86_64). + replaceRetWithPatchableRet(MF, TII); + break; } - - for (auto &I : Terminators) - I->eraseFromParent(); - return true; } |