diff options
Diffstat (limited to 'lib/CodeGen')
124 files changed, 17318 insertions, 9905 deletions
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 6d12581..8bc5ef9 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -18,16 +18,25 @@ #include "llvm/Module.h" #include "llvm/CodeGen/GCMetadataPrinter.h" #include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/DwarfWriter.h" #include "llvm/Analysis/DebugInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormattedStream.h" #include "llvm/Support/Mangler.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/ADT/SmallPtrSet.h" @@ -41,12 +50,17 @@ AsmVerbose("asm-verbose", cl::desc("Add comments to directives."), cl::init(cl::BOU_UNSET)); char AsmPrinter::ID = 0; -AsmPrinter::AsmPrinter(raw_ostream &o, TargetMachine &tm, - const TargetAsmInfo *T, bool VDef) +AsmPrinter::AsmPrinter(formatted_raw_ostream &o, TargetMachine &tm, + const MCAsmInfo *T, bool VDef) : MachineFunctionPass(&ID), FunctionNumber(0), O(o), - TM(tm), TAI(T), TRI(tm.getRegisterInfo()), - IsInTextSection(false), LastMI(0), LastFn(0), Counter(~0U), - PrevDLT(0, ~0U, ~0U) { + TM(tm), MAI(T), TRI(tm.getRegisterInfo()), + + OutContext(*new MCContext()), + // FIXME: Pass instprinter to streamer. + OutStreamer(*createAsmStreamer(OutContext, O, *T, 0)), + + LastMI(0), LastFn(0), Counter(~0U), + PrevDLT(0, 0, ~0U, ~0U) { DW = 0; MMI = 0; switch (AsmVerbose) { case cl::BOU_UNSET: VerboseAsm = VDef; break; @@ -59,188 +73,124 @@ AsmPrinter::~AsmPrinter() { for (gcp_iterator I = GCMetadataPrinters.begin(), E = GCMetadataPrinters.end(); I != E; ++I) delete I->second; -} - -/// SwitchToTextSection - Switch to the specified text section of the executable -/// if we are not already in it! -/// -void AsmPrinter::SwitchToTextSection(const char *NewSection, - const GlobalValue *GV) { - std::string NS; - if (GV && GV->hasSection()) - NS = TAI->getSwitchToSectionDirective() + GV->getSection(); - else - NS = NewSection; - // If we're already in this section, we're done. - if (CurrentSection == NS) return; - - // Close the current section, if applicable. - if (TAI->getSectionEndDirectiveSuffix() && !CurrentSection.empty()) - O << CurrentSection << TAI->getSectionEndDirectiveSuffix() << '\n'; - - CurrentSection = NS; - - if (!CurrentSection.empty()) - O << CurrentSection << TAI->getTextSectionStartSuffix() << '\n'; - - IsInTextSection = true; + delete &OutStreamer; + delete &OutContext; } -/// SwitchToDataSection - Switch to the specified data section of the executable -/// if we are not already in it! -/// -void AsmPrinter::SwitchToDataSection(const char *NewSection, - const GlobalValue *GV) { - std::string NS; - if (GV && GV->hasSection()) - NS = TAI->getSwitchToSectionDirective() + GV->getSection(); - else - NS = NewSection; - - // If we're already in this section, we're done. - if (CurrentSection == NS) return; - - // Close the current section, if applicable. - if (TAI->getSectionEndDirectiveSuffix() && !CurrentSection.empty()) - O << CurrentSection << TAI->getSectionEndDirectiveSuffix() << '\n'; - - CurrentSection = NS; - - if (!CurrentSection.empty()) - O << CurrentSection << TAI->getDataSectionStartSuffix() << '\n'; - - IsInTextSection = false; +TargetLoweringObjectFile &AsmPrinter::getObjFileLowering() const { + return TM.getTargetLowering()->getObjFileLowering(); } -/// SwitchToSection - Switch to the specified section of the executable if we -/// are not already in it! -void AsmPrinter::SwitchToSection(const Section* NS) { - const std::string& NewSection = NS->getName(); - - // If we're already in this section, we're done. - if (CurrentSection == NewSection) return; - - // Close the current section, if applicable. - if (TAI->getSectionEndDirectiveSuffix() && !CurrentSection.empty()) - O << CurrentSection << TAI->getSectionEndDirectiveSuffix() << '\n'; - - // FIXME: Make CurrentSection a Section* in the future - CurrentSection = NewSection; - CurrentSection_ = NS; - - if (!CurrentSection.empty()) { - // If section is named we need to switch into it via special '.section' - // directive and also append funky flags. Otherwise - section name is just - // some magic assembler directive. - if (NS->isNamed()) - O << TAI->getSwitchToSectionDirective() - << CurrentSection - << TAI->getSectionFlags(NS->getFlags()); - else - O << CurrentSection; - O << TAI->getDataSectionStartSuffix() << '\n'; - } - - IsInTextSection = (NS->getFlags() & SectionFlags::Code); +/// getCurrentSection() - Return the current section we are emitting to. +const MCSection *AsmPrinter::getCurrentSection() const { + return OutStreamer.getCurrentSection(); } + void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); MachineFunctionPass::getAnalysisUsage(AU); AU.addRequired<GCModuleInfo>(); + if (VerboseAsm) + AU.addRequired<MachineLoopInfo>(); } bool AsmPrinter::doInitialization(Module &M) { - Mang = new Mangler(M, TAI->getGlobalPrefix(), TAI->getPrivateGlobalPrefix()); + // Initialize TargetLoweringObjectFile. + const_cast<TargetLoweringObjectFile&>(getObjFileLowering()) + .Initialize(OutContext, TM); - if (TAI->doesAllowQuotesInName()) + Mang = new Mangler(M, MAI->getGlobalPrefix(), MAI->getPrivateGlobalPrefix(), + MAI->getLinkerPrivateGlobalPrefix()); + + if (MAI->doesAllowQuotesInName()) Mang->setUseQuotes(true); + + if (MAI->doesAllowNameToStartWithDigit()) + Mang->setSymbolsCanStartWithDigit(true); - GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>(); - assert(MI && "AsmPrinter didn't require GCModuleInfo?"); + // Allow the target to emit any magic that it wants at the start of the file. + EmitStartOfAsmFile(M); - if (TAI->hasSingleParameterDotFile()) { + if (MAI->hasSingleParameterDotFile()) { /* Very minimal debug info. It is ignored if we emit actual - debug info. If we don't, this at helps the user find where + debug info. If we don't, this at least helps the user find where a function came from. */ O << "\t.file\t\"" << M.getModuleIdentifier() << "\"\n"; } + GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>(); + assert(MI && "AsmPrinter didn't require GCModuleInfo?"); for (GCModuleInfo::iterator I = MI->begin(), E = MI->end(); I != E; ++I) if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*I)) - MP->beginAssembly(O, *this, *TAI); + MP->beginAssembly(O, *this, *MAI); if (!M.getModuleInlineAsm().empty()) - O << TAI->getCommentString() << " Start of file scope inline assembly\n" + O << MAI->getCommentString() << " Start of file scope inline assembly\n" << M.getModuleInlineAsm() - << '\n' << TAI->getCommentString() + << '\n' << MAI->getCommentString() << " End of file scope inline assembly\n"; - SwitchToDataSection(""); // Reset back to no section. - - if (TAI->doesSupportDebugInformation() || - TAI->doesSupportExceptionHandling()) { - MMI = getAnalysisIfAvailable<MachineModuleInfo>(); - if (MMI) - MMI->AnalyzeModule(M); - DW = getAnalysisIfAvailable<DwarfWriter>(); - if (DW) - DW->BeginModule(&M, MMI, O, this, TAI); - } + MMI = getAnalysisIfAvailable<MachineModuleInfo>(); + if (MMI) + MMI->AnalyzeModule(M); + DW = getAnalysisIfAvailable<DwarfWriter>(); + if (DW) + DW->BeginModule(&M, MMI, O, this, MAI); return false; } bool AsmPrinter::doFinalization(Module &M) { + // Emit global variables. + for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) + PrintGlobalVariable(I); + // Emit final debug information. - if (TAI->doesSupportDebugInformation() || TAI->doesSupportExceptionHandling()) + if (MAI->doesSupportDebugInformation() || MAI->doesSupportExceptionHandling()) DW->EndModule(); // If the target wants to know about weak references, print them all. - if (TAI->getWeakRefDirective()) { + if (MAI->getWeakRefDirective()) { // FIXME: This is not lazy, it would be nice to only print weak references // to stuff that is actually used. Note that doing so would require targets // to notice uses in operands (due to constant exprs etc). This should // happen with the MC stuff eventually. - SwitchToDataSection(""); // Print out module-level global variables here. for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I) { if (I->hasExternalWeakLinkage()) - O << TAI->getWeakRefDirective() << Mang->getValueName(I) << '\n'; + O << MAI->getWeakRefDirective() << Mang->getMangledName(I) << '\n'; } - for (Module::const_iterator I = M.begin(), E = M.end(); - I != E; ++I) { + for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) { if (I->hasExternalWeakLinkage()) - O << TAI->getWeakRefDirective() << Mang->getValueName(I) << '\n'; + O << MAI->getWeakRefDirective() << Mang->getMangledName(I) << '\n'; } } - if (TAI->getSetDirective()) { - if (!M.alias_empty()) - SwitchToSection(TAI->getTextSection()); - + if (MAI->getSetDirective()) { O << '\n'; for (Module::const_alias_iterator I = M.alias_begin(), E = M.alias_end(); I != E; ++I) { - std::string Name = Mang->getValueName(I); - std::string Target; + std::string Name = Mang->getMangledName(I); const GlobalValue *GV = cast<GlobalValue>(I->getAliasedGlobal()); - Target = Mang->getValueName(GV); + std::string Target = Mang->getMangledName(GV); - if (I->hasExternalLinkage() || !TAI->getWeakRefDirective()) + if (I->hasExternalLinkage() || !MAI->getWeakRefDirective()) O << "\t.globl\t" << Name << '\n'; else if (I->hasWeakLinkage()) - O << TAI->getWeakRefDirective() << Name << '\n'; + O << MAI->getWeakRefDirective() << Name << '\n'; else if (!I->hasLocalLinkage()) - assert(0 && "Invalid alias linkage"); + llvm_unreachable("Invalid alias linkage"); printVisibility(Name, I->getVisibility()); - O << TAI->getSetDirective() << ' ' << Name << ", " << Target << '\n'; + O << MAI->getSetDirective() << ' ' << Name << ", " << Target << '\n'; } } @@ -248,45 +198,43 @@ bool AsmPrinter::doFinalization(Module &M) { assert(MI && "AsmPrinter didn't require GCModuleInfo?"); for (GCModuleInfo::iterator I = MI->end(), E = MI->begin(); I != E; ) if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*--I)) - MP->finishAssembly(O, *this, *TAI); + MP->finishAssembly(O, *this, *MAI); // If we don't have any trampolines, then we don't require stack memory // to be executable. Some targets have a directive to declare this. Function *InitTrampolineIntrinsic = M.getFunction("llvm.init.trampoline"); if (!InitTrampolineIntrinsic || InitTrampolineIntrinsic->use_empty()) - if (TAI->getNonexecutableStackDirective()) - O << TAI->getNonexecutableStackDirective() << '\n'; + if (MAI->getNonexecutableStackDirective()) + O << MAI->getNonexecutableStackDirective() << '\n'; + + // Allow the target to emit any magic that it wants at the end of the file, + // after everything else has gone out. + EmitEndOfAsmFile(M); + delete Mang; Mang = 0; DW = 0; MMI = 0; + + OutStreamer.Finish(); return false; } -const std::string & -AsmPrinter::getCurrentFunctionEHName(const MachineFunction *MF, - std::string &Name) const { - assert(MF && "No machine function?"); - Name = MF->getFunction()->getName(); - if (Name.empty()) - Name = Mang->getValueName(MF->getFunction()); - Name = Mang->makeNameProper(TAI->getEHGlobalPrefix() + - Name + ".eh", TAI->getGlobalPrefix()); - return Name; -} - void AsmPrinter::SetupMachineFunction(MachineFunction &MF) { // What's my mangled name? - CurrentFnName = Mang->getValueName(MF.getFunction()); + CurrentFnName = Mang->getMangledName(MF.getFunction()); IncrementFunctionNumber(); + + if (VerboseAsm) + LI = &getAnalysis<MachineLoopInfo>(); } namespace { // SectionCPs - Keep track the alignment, constpool entries per Section. struct SectionCPs { - const Section *S; + const MCSection *S; unsigned Alignment; SmallVector<unsigned, 4> CPEs; - SectionCPs(const Section *s, unsigned a) : S(s), Alignment(a) {}; + SectionCPs(const MCSection *s, unsigned a) : S(s), Alignment(a) {}; }; } @@ -303,9 +251,27 @@ void AsmPrinter::EmitConstantPool(MachineConstantPool *MCP) { // the same section together to reduce amount of section switch statements. SmallVector<SectionCPs, 4> CPSections; for (unsigned i = 0, e = CP.size(); i != e; ++i) { - MachineConstantPoolEntry CPE = CP[i]; + const MachineConstantPoolEntry &CPE = CP[i]; unsigned Align = CPE.getAlignment(); - const Section* S = TAI->SelectSectionForMachineConst(CPE.getType()); + + SectionKind Kind; + switch (CPE.getRelocationInfo()) { + default: llvm_unreachable("Unknown section kind"); + case 2: Kind = SectionKind::getReadOnlyWithRel(); break; + case 1: + Kind = SectionKind::getReadOnlyWithRelLocal(); + break; + case 0: + switch (TM.getTargetData()->getTypeAllocSize(CPE.getType())) { + case 4: Kind = SectionKind::getMergeableConst4(); break; + case 8: Kind = SectionKind::getMergeableConst8(); break; + case 16: Kind = SectionKind::getMergeableConst16();break; + default: Kind = SectionKind::getMergeableConst(); break; + } + } + + const MCSection *S = getObjFileLowering().getSectionForConstant(Kind); + // The number of sections are small, just do a linear search from the // last section to the first. bool Found = false; @@ -328,7 +294,7 @@ void AsmPrinter::EmitConstantPool(MachineConstantPool *MCP) { // Now print stuff into the calculated sections. for (unsigned i = 0, e = CPSections.size(); i != e; ++i) { - SwitchToSection(CPSections[i].S); + OutStreamer.SwitchSection(CPSections[i].S); EmitAlignment(Log2_32(CPSections[i].Alignment)); unsigned Offset = 0; @@ -344,11 +310,12 @@ void AsmPrinter::EmitConstantPool(MachineConstantPool *MCP) { const Type *Ty = CPE.getType(); Offset = NewOffset + TM.getTargetData()->getTypeAllocSize(Ty); - O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << '_' - << CPI << ":\t\t\t\t\t"; + O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << '_' + << CPI << ':'; if (VerboseAsm) { - O << TAI->getCommentString() << ' '; - WriteTypeSymbolic(O, CPE.getType(), 0); + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() << " constant "; + WriteTypeSymbolic(O, CPE.getType(), MF->getFunction()->getParent()); } O << '\n'; if (CPE.isMachineConstantPoolEntry()) @@ -373,20 +340,21 @@ void AsmPrinter::EmitJumpTableInfo(MachineJumpTableInfo *MJTI, // the appropriate section. TargetLowering *LoweringInfo = TM.getTargetLowering(); - const char* JumpTableDataSection = TAI->getJumpTableDataSection(); const Function *F = MF.getFunction(); - unsigned SectionFlags = TAI->SectionFlagsForGlobal(F); bool JTInDiffSection = false; - if ((IsPic && !(LoweringInfo && LoweringInfo->usesGlobalOffsetTable())) || - !JumpTableDataSection || - SectionFlags & SectionFlags::Linkonce) { + if (F->isWeakForLinker() || + (IsPic && !LoweringInfo->usesGlobalOffsetTable())) { // In PIC mode, we need to emit the jump table to the same section as the // function body itself, otherwise the label differences won't make sense. // We should also do if the section name is NULL or function is declared in // discardable section. - SwitchToSection(TAI->SectionForGlobal(F)); + OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, + TM)); } else { - SwitchToDataSection(JumpTableDataSection); + // Otherwise, drop it in the readonly section. + const MCSection *ReadOnlySection = + getObjFileLowering().getSectionForConstant(SectionKind::getReadOnly()); + OutStreamer.SwitchSection(ReadOnlySection); JTInDiffSection = true; } @@ -402,21 +370,21 @@ void AsmPrinter::EmitJumpTableInfo(MachineJumpTableInfo *MJTI, // the number of relocations the assembler will generate for the jump table. // Set directives are all printed before the jump table itself. SmallPtrSet<MachineBasicBlock*, 16> EmittedSets; - if (TAI->getSetDirective() && IsPic) + if (MAI->getSetDirective() && IsPic) for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) if (EmittedSets.insert(JTBBs[ii])) printPICJumpTableSetLabel(i, JTBBs[ii]); - // On some targets (e.g. darwin) we want to emit two consequtive labels + // On some targets (e.g. Darwin) we want to emit two consequtive labels // before each jump table. The first label is never referenced, but tells // the assembler and linker the extents of the jump table object. The // second label is actually referenced by the code. - if (JTInDiffSection) { - if (const char *JTLabelPrefix = TAI->getJumpTableSpecialLabelPrefix()) - O << JTLabelPrefix << "JTI" << getFunctionNumber() << '_' << i << ":\n"; + if (JTInDiffSection && MAI->getLinkerPrivateGlobalPrefix()[0]) { + O << MAI->getLinkerPrivateGlobalPrefix() + << "JTI" << getFunctionNumber() << '_' << i << ":\n"; } - O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() + O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << '_' << i << ":\n"; for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) { @@ -429,15 +397,15 @@ void AsmPrinter::EmitJumpTableInfo(MachineJumpTableInfo *MJTI, void AsmPrinter::printPICJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned uid) const { - bool IsPic = TM.getRelocationModel() == Reloc::PIC_; + bool isPIC = TM.getRelocationModel() == Reloc::PIC_; // Use JumpTableDirective otherwise honor the entry size from the jump table // info. - const char *JTEntryDirective = TAI->getJumpTableDirective(); + const char *JTEntryDirective = MAI->getJumpTableDirective(isPIC); bool HadJTEntryDirective = JTEntryDirective != NULL; if (!HadJTEntryDirective) { JTEntryDirective = MJTI->getEntrySize() == 4 ? - TAI->getData32bitsDirective() : TAI->getData64bitsDirective(); + MAI->getData32bitsDirective() : MAI->getData64bitsDirective(); } O << JTEntryDirective << ' '; @@ -447,20 +415,18 @@ void AsmPrinter::printPICJumpTableEntry(const MachineJumpTableInfo *MJTI, // emit the table entries as differences between two text section labels. // If we're emitting non-PIC code, then emit the entries as direct // references to the target basic blocks. - if (IsPic) { - if (TAI->getSetDirective()) { - O << TAI->getPrivateGlobalPrefix() << getFunctionNumber() - << '_' << uid << "_set_" << MBB->getNumber(); - } else { - printBasicBlockLabel(MBB, false, false, false); - // If the arch uses custom Jump Table directives, don't calc relative to - // JT - if (!HadJTEntryDirective) - O << '-' << TAI->getPrivateGlobalPrefix() << "JTI" - << getFunctionNumber() << '_' << uid; - } + if (!isPIC) { + GetMBBSymbol(MBB->getNumber())->print(O, MAI); + } else if (MAI->getSetDirective()) { + O << MAI->getPrivateGlobalPrefix() << getFunctionNumber() + << '_' << uid << "_set_" << MBB->getNumber(); } else { - printBasicBlockLabel(MBB, false, false, false); + GetMBBSymbol(MBB->getNumber())->print(O, MAI); + // If the arch uses custom Jump Table directives, don't calc relative to + // JT + if (!HadJTEntryDirective) + O << '-' << MAI->getPrivateGlobalPrefix() << "JTI" + << getFunctionNumber() << '_' << uid; } } @@ -470,12 +436,12 @@ void AsmPrinter::printPICJumpTableEntry(const MachineJumpTableInfo *MJTI, /// do nothing and return false. bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) { if (GV->getName() == "llvm.used") { - if (TAI->getUsedDirective() != 0) // No need to emit this at all. + if (MAI->getUsedDirective() != 0) // No need to emit this at all. EmitLLVMUsedList(GV->getInitializer()); return true; } - // Ignore debug and non-emitted data. + // Ignore debug and non-emitted data. This handles llvm.compiler.used. if (GV->getSection() == "llvm.metadata" || GV->hasAvailableExternallyLinkage()) return true; @@ -487,14 +453,14 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) { const TargetData *TD = TM.getTargetData(); unsigned Align = Log2_32(TD->getPointerPrefAlignment()); if (GV->getName() == "llvm.global_ctors") { - SwitchToDataSection(TAI->getStaticCtorsSection()); + OutStreamer.SwitchSection(getObjFileLowering().getStaticCtorSection()); EmitAlignment(Align, 0); EmitXXStructorList(GV->getInitializer()); return true; } if (GV->getName() == "llvm.global_dtors") { - SwitchToDataSection(TAI->getStaticDtorsSection()); + OutStreamer.SwitchSection(getObjFileLowering().getStaticDtorSection()); EmitAlignment(Align, 0); EmitXXStructorList(GV->getInitializer()); return true; @@ -503,45 +469,20 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) { return false; } -/// findGlobalValue - if CV is an expression equivalent to a single -/// global value, return that value. -const GlobalValue * AsmPrinter::findGlobalValue(const Constant *CV) { - if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) - return GV; - else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) { - const TargetData *TD = TM.getTargetData(); - unsigned Opcode = CE->getOpcode(); - switch (Opcode) { - case Instruction::GetElementPtr: { - const Constant *ptrVal = CE->getOperand(0); - SmallVector<Value*, 8> idxVec(CE->op_begin()+1, CE->op_end()); - if (TD->getIndexedOffset(ptrVal->getType(), &idxVec[0], idxVec.size())) - return 0; - return findGlobalValue(ptrVal); - } - case Instruction::BitCast: - return findGlobalValue(CE->getOperand(0)); - default: - return 0; - } - } - return 0; -} - -/// EmitLLVMUsedList - For targets that define a TAI::UsedDirective, mark each +/// EmitLLVMUsedList - For targets that define a MAI::UsedDirective, mark each /// global in the specified llvm.used list for which emitUsedDirectiveFor /// is true, as being used with this directive. - void AsmPrinter::EmitLLVMUsedList(Constant *List) { - const char *Directive = TAI->getUsedDirective(); + const char *Directive = MAI->getUsedDirective(); // Should be an array of 'i8*'. ConstantArray *InitList = dyn_cast<ConstantArray>(List); if (InitList == 0) return; for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) { - const GlobalValue *GV = findGlobalValue(InitList->getOperand(i)); - if (TAI->emitUsedDirectiveFor(GV, Mang)) { + const GlobalValue *GV = + dyn_cast<GlobalValue>(InitList->getOperand(i)->stripPointerCasts()); + if (GV && getObjFileLowering().shouldEmitUsedDirectiveFor(GV, Mang)) { O << Directive; EmitConstantValueOnly(InitList->getOperand(i)); O << '\n'; @@ -567,32 +508,6 @@ void AsmPrinter::EmitXXStructorList(Constant *List) { } } -/// getGlobalLinkName - Returns the asm/link name of of the specified -/// global variable. Should be overridden by each target asm printer to -/// generate the appropriate value. -const std::string &AsmPrinter::getGlobalLinkName(const GlobalVariable *GV, - std::string &LinkName) const { - if (isa<Function>(GV)) { - LinkName += TAI->getFunctionAddrPrefix(); - LinkName += Mang->getValueName(GV); - LinkName += TAI->getFunctionAddrSuffix(); - } else { - LinkName += TAI->getGlobalVarAddrPrefix(); - LinkName += Mang->getValueName(GV); - LinkName += TAI->getGlobalVarAddrSuffix(); - } - - return LinkName; -} - -/// EmitExternalGlobal - Emit the external reference to a global variable. -/// Should be overridden if an indirect reference should be used. -void AsmPrinter::EmitExternalGlobal(const GlobalVariable *GV) { - std::string GLN; - O << getGlobalLinkName(GV, GLN); -} - - //===----------------------------------------------------------------------===// /// LEB 128 number encoding. @@ -646,8 +561,8 @@ void AsmPrinter::EOL() const { void AsmPrinter::EOL(const std::string &Comment) const { if (VerboseAsm && !Comment.empty()) { - O << '\t' - << TAI->getCommentString() + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() << ' ' << Comment; } @@ -656,22 +571,72 @@ void AsmPrinter::EOL(const std::string &Comment) const { void AsmPrinter::EOL(const char* Comment) const { if (VerboseAsm && *Comment) { - O << '\t' - << TAI->getCommentString() + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() << ' ' << Comment; } O << '\n'; } +static const char *DecodeDWARFEncoding(unsigned Encoding) { + switch (Encoding) { + case dwarf::DW_EH_PE_absptr: + return "absptr"; + case dwarf::DW_EH_PE_omit: + return "omit"; + case dwarf::DW_EH_PE_pcrel: + return "pcrel"; + case dwarf::DW_EH_PE_udata4: + return "udata4"; + case dwarf::DW_EH_PE_udata8: + return "udata8"; + case dwarf::DW_EH_PE_sdata4: + return "sdata4"; + case dwarf::DW_EH_PE_sdata8: + return "sdata8"; + case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata4: + return "pcrel udata4"; + case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4: + return "pcrel sdata4"; + case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8: + return "pcrel udata8"; + case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8: + return "pcrel sdata8"; + case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_udata4: + return "indirect pcrel udata4"; + case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_sdata4: + return "indirect pcrel sdata4"; + case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_udata8: + return "indirect pcrel udata8"; + case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_sdata8: + return "indirect pcrel sdata8"; + } + + return 0; +} + +void AsmPrinter::EOL(const char *Comment, unsigned Encoding) const { + if (VerboseAsm && *Comment) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() + << ' ' + << Comment; + + if (const char *EncStr = DecodeDWARFEncoding(Encoding)) + O << " (" << EncStr << ')'; + } + O << '\n'; +} + /// EmitULEB128Bytes - Emit an assembler byte data directive to compose an /// unsigned leb128 value. void AsmPrinter::EmitULEB128Bytes(unsigned Value) const { - if (TAI->hasLEB128()) { + if (MAI->hasLEB128()) { O << "\t.uleb128\t" << Value; } else { - O << TAI->getData8bitsDirective(); + O << MAI->getData8bitsDirective(); PrintULEB128(Value); } } @@ -679,11 +644,11 @@ void AsmPrinter::EmitULEB128Bytes(unsigned Value) const { /// EmitSLEB128Bytes - print an assembler byte data directive to compose a /// signed leb128 value. void AsmPrinter::EmitSLEB128Bytes(int Value) const { - if (TAI->hasLEB128()) { + if (MAI->hasLEB128()) { O << "\t.sleb128\t" << Value; } else { - O << TAI->getData8bitsDirective(); + O << MAI->getData8bitsDirective(); PrintSLEB128(Value); } } @@ -691,29 +656,29 @@ void AsmPrinter::EmitSLEB128Bytes(int Value) const { /// EmitInt8 - Emit a byte directive and value. /// void AsmPrinter::EmitInt8(int Value) const { - O << TAI->getData8bitsDirective(); + O << MAI->getData8bitsDirective(); PrintHex(Value & 0xFF); } /// EmitInt16 - Emit a short directive and value. /// void AsmPrinter::EmitInt16(int Value) const { - O << TAI->getData16bitsDirective(); + O << MAI->getData16bitsDirective(); PrintHex(Value & 0xFFFF); } /// EmitInt32 - Emit a long directive and value. /// void AsmPrinter::EmitInt32(int Value) const { - O << TAI->getData32bitsDirective(); + O << MAI->getData32bitsDirective(); PrintHex(Value); } /// EmitInt64 - Emit a long long directive and value. /// void AsmPrinter::EmitInt64(uint64_t Value) const { - if (TAI->getData64bitsDirective()) { - O << TAI->getData64bitsDirective(); + if (MAI->getData64bitsDirective()) { + O << MAI->getData64bitsDirective(); PrintHex(Value); } else { if (TM.getTargetData()->isBigEndian()) { @@ -734,7 +699,7 @@ static inline char toOctal(int X) { /// printStringChar - Print a char, escaped if necessary. /// -static void printStringChar(raw_ostream &O, unsigned char C) { +static void printStringChar(formatted_raw_ostream &O, unsigned char C) { if (C == '"') { O << "\\\""; } else if (C == '\\') { @@ -766,11 +731,11 @@ void AsmPrinter::EmitString(const std::string &String) const { } void AsmPrinter::EmitString(const char *String, unsigned Size) const { - const char* AscizDirective = TAI->getAscizDirective(); + const char* AscizDirective = MAI->getAscizDirective(); if (AscizDirective) O << AscizDirective; else - O << TAI->getAsciiDirective(); + O << MAI->getAsciiDirective(); O << '\"'; for (unsigned i = 0; i < Size; ++i) printStringChar(O, String[i]); @@ -813,31 +778,26 @@ void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalValue *GV, NumBits = std::max(NumBits, ForcedAlignBits); if (NumBits == 0) return; // No need to emit alignment. - if (TAI->getAlignmentIsInBytes()) NumBits = 1 << NumBits; - O << TAI->getAlignDirective() << NumBits; - - unsigned FillValue = TAI->getTextAlignFillValue(); - UseFillExpr &= IsInTextSection && FillValue; - if (UseFillExpr) { - O << ','; - PrintHex(FillValue); - } - O << '\n'; + + unsigned FillValue = 0; + if (getCurrentSection()->getKind().isText()) + FillValue = MAI->getTextAlignFillValue(); + + OutStreamer.EmitValueToAlignment(1 << NumBits, FillValue, 1, 0); } - /// EmitZeros - Emit a block of zeros. /// void AsmPrinter::EmitZeros(uint64_t NumZeros, unsigned AddrSpace) const { if (NumZeros) { - if (TAI->getZeroDirective()) { - O << TAI->getZeroDirective() << NumZeros; - if (TAI->getZeroDirectiveSuffix()) - O << TAI->getZeroDirectiveSuffix(); + if (MAI->getZeroDirective()) { + O << MAI->getZeroDirective() << NumZeros; + if (MAI->getZeroDirectiveSuffix()) + O << MAI->getZeroDirectiveSuffix(); O << '\n'; } else { for (; NumZeros; --NumZeros) - O << TAI->getData8bitsDirective(AddrSpace) << "0\n"; + O << MAI->getData8bitsDirective(AddrSpace) << "0\n"; } } } @@ -851,22 +811,22 @@ void AsmPrinter::EmitConstantValueOnly(const Constant *CV) { O << CI->getZExtValue(); } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) { // This is a constant address for a global variable or function. Use the - // name of the variable or function as the address value, possibly - // decorating it with GlobalVarAddrPrefix/Suffix or - // FunctionAddrPrefix/Suffix (these all default to "" ) - if (isa<Function>(GV)) { - O << TAI->getFunctionAddrPrefix() - << Mang->getValueName(GV) - << TAI->getFunctionAddrSuffix(); - } else { - O << TAI->getGlobalVarAddrPrefix() - << Mang->getValueName(GV) - << TAI->getGlobalVarAddrSuffix(); - } + // name of the variable or function as the address value. + O << Mang->getMangledName(GV); } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) { const TargetData *TD = TM.getTargetData(); unsigned Opcode = CE->getOpcode(); switch (Opcode) { + case Instruction::Trunc: + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPTrunc: + case Instruction::FPExt: + case Instruction::UIToFP: + case Instruction::SIToFP: + case Instruction::FPToUI: + case Instruction::FPToSI: + llvm_unreachable("FIXME: Don't support this constant cast expr"); case Instruction::GetElementPtr: { // generate a symbolic expression for the byte address const Constant *ptrVal = CE->getOperand(0); @@ -891,17 +851,6 @@ void AsmPrinter::EmitConstantValueOnly(const Constant *CV) { } break; } - case Instruction::Trunc: - case Instruction::ZExt: - case Instruction::SExt: - case Instruction::FPTrunc: - case Instruction::FPExt: - case Instruction::UIToFP: - case Instruction::SIToFP: - case Instruction::FPToUI: - case Instruction::FPToSI: - assert(0 && "FIXME: Don't yet support this kind of constant cast expr"); - break; case Instruction::BitCast: return EmitConstantValueOnly(CE->getOperand(0)); @@ -909,7 +858,8 @@ void AsmPrinter::EmitConstantValueOnly(const Constant *CV) { // Handle casts to pointers by changing them into casts to the appropriate // integer type. This promotes constant folding and simplifies this code. Constant *Op = CE->getOperand(0); - Op = ConstantExpr::getIntegerCast(Op, TD->getIntPtrType(), false/*ZExt*/); + Op = ConstantExpr::getIntegerCast(Op, TD->getIntPtrType(CV->getContext()), + false/*ZExt*/); return EmitConstantValueOnly(Op); } @@ -922,16 +872,17 @@ void AsmPrinter::EmitConstantValueOnly(const Constant *CV) { // We can emit the pointer value into this slot if the slot is an // integer slot greater or equal to the size of the pointer. - if (TD->getTypeAllocSize(Ty) >= TD->getTypeAllocSize(Op->getType())) + if (TD->getTypeAllocSize(Ty) == TD->getTypeAllocSize(Op->getType())) return EmitConstantValueOnly(Op); O << "(("; EmitConstantValueOnly(Op); - APInt ptrMask = APInt::getAllOnesValue(TD->getTypeAllocSizeInBits(Ty)); + APInt ptrMask = + APInt::getAllOnesValue(TD->getTypeAllocSizeInBits(Op->getType())); SmallString<40> S; ptrMask.toStringUnsigned(S); - O << ") & " << S.c_str() << ')'; + O << ") & " << S.str() << ')'; break; } case Instruction::Add: @@ -966,17 +917,17 @@ void AsmPrinter::EmitConstantValueOnly(const Constant *CV) { O << ')'; break; default: - assert(0 && "Unsupported operator!"); + llvm_unreachable("Unsupported operator!"); } } else { - assert(0 && "Unknown constant value!"); + llvm_unreachable("Unknown constant value!"); } } /// printAsCString - Print the specified array as a C compatible string, only if /// the predicate isString is true. /// -static void printAsCString(raw_ostream &O, const ConstantArray *CVA, +static void printAsCString(formatted_raw_ostream &O, const ConstantArray *CVA, unsigned LastElt) { assert(CVA->isString() && "Array is not string compatible!"); @@ -993,12 +944,12 @@ static void printAsCString(raw_ostream &O, const ConstantArray *CVA, /// void AsmPrinter::EmitString(const ConstantArray *CVA) const { unsigned NumElts = CVA->getNumOperands(); - if (TAI->getAscizDirective() && NumElts && + if (MAI->getAscizDirective() && NumElts && cast<ConstantInt>(CVA->getOperand(NumElts-1))->getZExtValue() == 0) { - O << TAI->getAscizDirective(); + O << MAI->getAscizDirective(); printAsCString(O, CVA, NumElts-1); } else { - O << TAI->getAsciiDirective(); + O << MAI->getAsciiDirective(); printAsCString(O, CVA, NumElts); } O << '\n'; @@ -1053,48 +1004,65 @@ void AsmPrinter::EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace) { // FP Constants are printed as integer constants to avoid losing // precision... + LLVMContext &Context = CFP->getContext(); const TargetData *TD = TM.getTargetData(); - if (CFP->getType() == Type::DoubleTy) { + if (CFP->getType()->isDoubleTy()) { double Val = CFP->getValueAPF().convertToDouble(); // for comment only uint64_t i = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); - if (TAI->getData64bitsDirective(AddrSpace)) { - O << TAI->getData64bitsDirective(AddrSpace) << i; - if (VerboseAsm) - O << '\t' << TAI->getCommentString() << " double value: " << Val; + if (MAI->getData64bitsDirective(AddrSpace)) { + O << MAI->getData64bitsDirective(AddrSpace) << i; + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() << " double " << Val; + } O << '\n'; } else if (TD->isBigEndian()) { - O << TAI->getData32bitsDirective(AddrSpace) << unsigned(i >> 32); - if (VerboseAsm) - O << '\t' << TAI->getCommentString() - << " double most significant word " << Val; + O << MAI->getData32bitsDirective(AddrSpace) << unsigned(i >> 32); + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() + << " most significant word of double " << Val; + } O << '\n'; - O << TAI->getData32bitsDirective(AddrSpace) << unsigned(i); - if (VerboseAsm) - O << '\t' << TAI->getCommentString() - << " double least significant word " << Val; + O << MAI->getData32bitsDirective(AddrSpace) << unsigned(i); + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() + << " least significant word of double " << Val; + } O << '\n'; } else { - O << TAI->getData32bitsDirective(AddrSpace) << unsigned(i); - if (VerboseAsm) - O << '\t' << TAI->getCommentString() - << " double least significant word " << Val; + O << MAI->getData32bitsDirective(AddrSpace) << unsigned(i); + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() + << " least significant word of double " << Val; + } O << '\n'; - O << TAI->getData32bitsDirective(AddrSpace) << unsigned(i >> 32); - if (VerboseAsm) - O << '\t' << TAI->getCommentString() - << " double most significant word " << Val; + O << MAI->getData32bitsDirective(AddrSpace) << unsigned(i >> 32); + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() + << " most significant word of double " << Val; + } O << '\n'; } return; - } else if (CFP->getType() == Type::FloatTy) { + } + + if (CFP->getType()->isFloatTy()) { float Val = CFP->getValueAPF().convertToFloat(); // for comment only - O << TAI->getData32bitsDirective(AddrSpace) + O << MAI->getData32bitsDirective(AddrSpace) << CFP->getValueAPF().bitcastToAPInt().getZExtValue(); - if (VerboseAsm) - O << '\t' << TAI->getCommentString() << " float " << Val; + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() << " float " << Val; + } O << '\n'; return; - } else if (CFP->getType() == Type::X86_FP80Ty) { + } + + if (CFP->getType()->isX86_FP80Ty()) { // all long double variants are printed as hex // api needed to prevent premature destruction APInt api = CFP->getValueAPF().bitcastToAPInt(); @@ -1105,110 +1073,148 @@ void AsmPrinter::EmitGlobalConstantFP(const ConstantFP *CFP, DoubleVal.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored); if (TD->isBigEndian()) { - O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[1]); - if (VerboseAsm) - O << '\t' << TAI->getCommentString() - << " long double most significant halfword of ~" + O << MAI->getData16bitsDirective(AddrSpace) << uint16_t(p[1]); + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() + << " most significant halfword of x86_fp80 ~" << DoubleVal.convertToDouble(); + } O << '\n'; - O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 48); - if (VerboseAsm) - O << '\t' << TAI->getCommentString() << " long double next halfword"; + O << MAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 48); + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() << " next halfword"; + } O << '\n'; - O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 32); - if (VerboseAsm) - O << '\t' << TAI->getCommentString() << " long double next halfword"; + O << MAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 32); + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() << " next halfword"; + } O << '\n'; - O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 16); - if (VerboseAsm) - O << '\t' << TAI->getCommentString() << " long double next halfword"; + O << MAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 16); + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() << " next halfword"; + } O << '\n'; - O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0]); - if (VerboseAsm) - O << '\t' << TAI->getCommentString() - << " long double least significant halfword"; + O << MAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0]); + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() + << " least significant halfword"; + } O << '\n'; } else { - O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0]); - if (VerboseAsm) - O << '\t' << TAI->getCommentString() - << " long double least significant halfword of ~" + O << MAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0]); + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() + << " least significant halfword of x86_fp80 ~" << DoubleVal.convertToDouble(); + } O << '\n'; - O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 16); - if (VerboseAsm) - O << '\t' << TAI->getCommentString() - << " long double next halfword"; + O << MAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 16); + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() + << " next halfword"; + } O << '\n'; - O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 32); - if (VerboseAsm) - O << '\t' << TAI->getCommentString() - << " long double next halfword"; + O << MAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 32); + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() + << " next halfword"; + } O << '\n'; - O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 48); - if (VerboseAsm) - O << '\t' << TAI->getCommentString() - << " long double next halfword"; + O << MAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 48); + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() + << " next halfword"; + } O << '\n'; - O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[1]); - if (VerboseAsm) - O << '\t' << TAI->getCommentString() - << " long double most significant halfword"; + O << MAI->getData16bitsDirective(AddrSpace) << uint16_t(p[1]); + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() + << " most significant halfword"; + } O << '\n'; } - EmitZeros(TD->getTypeAllocSize(Type::X86_FP80Ty) - - TD->getTypeStoreSize(Type::X86_FP80Ty), AddrSpace); + EmitZeros(TD->getTypeAllocSize(Type::getX86_FP80Ty(Context)) - + TD->getTypeStoreSize(Type::getX86_FP80Ty(Context)), AddrSpace); return; - } else if (CFP->getType() == Type::PPC_FP128Ty) { + } + + if (CFP->getType()->isPPC_FP128Ty()) { // all long double variants are printed as hex // api needed to prevent premature destruction APInt api = CFP->getValueAPF().bitcastToAPInt(); const uint64_t *p = api.getRawData(); if (TD->isBigEndian()) { - O << TAI->getData32bitsDirective(AddrSpace) << uint32_t(p[0] >> 32); - if (VerboseAsm) - O << '\t' << TAI->getCommentString() - << " long double most significant word"; + O << MAI->getData32bitsDirective(AddrSpace) << uint32_t(p[0] >> 32); + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() + << " most significant word of ppc_fp128"; + } O << '\n'; - O << TAI->getData32bitsDirective(AddrSpace) << uint32_t(p[0]); - if (VerboseAsm) - O << '\t' << TAI->getCommentString() - << " long double next word"; + O << MAI->getData32bitsDirective(AddrSpace) << uint32_t(p[0]); + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() + << " next word"; + } O << '\n'; - O << TAI->getData32bitsDirective(AddrSpace) << uint32_t(p[1] >> 32); - if (VerboseAsm) - O << '\t' << TAI->getCommentString() - << " long double next word"; + O << MAI->getData32bitsDirective(AddrSpace) << uint32_t(p[1] >> 32); + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() + << " next word"; + } O << '\n'; - O << TAI->getData32bitsDirective(AddrSpace) << uint32_t(p[1]); - if (VerboseAsm) - O << '\t' << TAI->getCommentString() - << " long double least significant word"; + O << MAI->getData32bitsDirective(AddrSpace) << uint32_t(p[1]); + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() + << " least significant word"; + } O << '\n'; } else { - O << TAI->getData32bitsDirective(AddrSpace) << uint32_t(p[1]); - if (VerboseAsm) - O << '\t' << TAI->getCommentString() - << " long double least significant word"; + O << MAI->getData32bitsDirective(AddrSpace) << uint32_t(p[1]); + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() + << " least significant word of ppc_fp128"; + } O << '\n'; - O << TAI->getData32bitsDirective(AddrSpace) << uint32_t(p[1] >> 32); - if (VerboseAsm) - O << '\t' << TAI->getCommentString() - << " long double next word"; + O << MAI->getData32bitsDirective(AddrSpace) << uint32_t(p[1] >> 32); + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() + << " next word"; + } O << '\n'; - O << TAI->getData32bitsDirective(AddrSpace) << uint32_t(p[0]); - if (VerboseAsm) - O << '\t' << TAI->getCommentString() - << " long double next word"; + O << MAI->getData32bitsDirective(AddrSpace) << uint32_t(p[0]); + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() + << " next word"; + } O << '\n'; - O << TAI->getData32bitsDirective(AddrSpace) << uint32_t(p[0] >> 32); - if (VerboseAsm) - O << '\t' << TAI->getCommentString() - << " long double most significant word"; + O << MAI->getData32bitsDirective(AddrSpace) << uint32_t(p[0] >> 32); + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() + << " most significant word"; + } O << '\n'; } return; - } else assert(0 && "Floating point constant type not handled"); + } else llvm_unreachable("Floating point constant type not handled"); } void AsmPrinter::EmitGlobalConstantLargeInt(const ConstantInt *CI, @@ -1229,29 +1235,37 @@ void AsmPrinter::EmitGlobalConstantLargeInt(const ConstantInt *CI, else Val = RawData[i]; - if (TAI->getData64bitsDirective(AddrSpace)) - O << TAI->getData64bitsDirective(AddrSpace) << Val << '\n'; + if (MAI->getData64bitsDirective(AddrSpace)) + O << MAI->getData64bitsDirective(AddrSpace) << Val << '\n'; else if (TD->isBigEndian()) { - O << TAI->getData32bitsDirective(AddrSpace) << unsigned(Val >> 32); - if (VerboseAsm) - O << '\t' << TAI->getCommentString() - << " Double-word most significant word " << Val; + O << MAI->getData32bitsDirective(AddrSpace) << unsigned(Val >> 32); + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() + << " most significant half of i64 " << Val; + } O << '\n'; - O << TAI->getData32bitsDirective(AddrSpace) << unsigned(Val); - if (VerboseAsm) - O << '\t' << TAI->getCommentString() - << " Double-word least significant word " << Val; + O << MAI->getData32bitsDirective(AddrSpace) << unsigned(Val); + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() + << " least significant half of i64 " << Val; + } O << '\n'; } else { - O << TAI->getData32bitsDirective(AddrSpace) << unsigned(Val); - if (VerboseAsm) - O << '\t' << TAI->getCommentString() - << " Double-word least significant word " << Val; + O << MAI->getData32bitsDirective(AddrSpace) << unsigned(Val); + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() + << " least significant half of i64 " << Val; + } O << '\n'; - O << TAI->getData32bitsDirective(AddrSpace) << unsigned(Val >> 32); - if (VerboseAsm) - O << '\t' << TAI->getCommentString() - << " Double-word most significant word " << Val; + O << MAI->getData32bitsDirective(AddrSpace) << unsigned(Val >> 32); + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() + << " most significant half of i64 " << Val; + } O << '\n'; } } @@ -1292,7 +1306,8 @@ void AsmPrinter::EmitGlobalConstant(const Constant *CV, unsigned AddrSpace) { if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) { SmallString<40> S; CI->getValue().toStringUnsigned(S, 16); - O << "\t\t\t" << TAI->getCommentString() << " 0x" << S.c_str(); + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() << " 0x" << S.str(); } } O << '\n'; @@ -1300,7 +1315,7 @@ void AsmPrinter::EmitGlobalConstant(const Constant *CV, unsigned AddrSpace) { void AsmPrinter::EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) { // Target doesn't support this yet! - abort(); + llvm_unreachable("Target does not support EmitMachineConstantPoolValue"); } /// PrintSpecial - Print information related to the specified machine instr @@ -1311,10 +1326,10 @@ void AsmPrinter::EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) { /// for their own strange codes. void AsmPrinter::PrintSpecial(const MachineInstr *MI, const char *Code) const { if (!strcmp(Code, "private")) { - O << TAI->getPrivateGlobalPrefix(); + O << MAI->getPrivateGlobalPrefix(); } else if (!strcmp(Code, "comment")) { if (VerboseAsm) - O << TAI->getCommentString(); + O << MAI->getCommentString(); } else if (!strcmp(Code, "uid")) { // Comparing the address of MI isn't sufficient, because machineinstrs may // be allocated to the same address across functions. @@ -1328,23 +1343,38 @@ void AsmPrinter::PrintSpecial(const MachineInstr *MI, const char *Code) const { } O << Counter; } else { - cerr << "Unknown special formatter '" << Code + std::string msg; + raw_string_ostream Msg(msg); + Msg << "Unknown special formatter '" << Code << "' for machine instr: " << *MI; - exit(1); + llvm_report_error(Msg.str()); } } /// processDebugLoc - Processes the debug information of each machine /// instruction's DebugLoc. -void AsmPrinter::processDebugLoc(DebugLoc DL) { - if (TAI->doesSupportDebugInformation() && DW->ShouldEmitDwarfDebug()) { +void AsmPrinter::processDebugLoc(const MachineInstr *MI, + bool BeforePrintingInsn) { + if (!MAI || !DW) + return; + DebugLoc DL = MI->getDebugLoc(); + if (MAI->doesSupportDebugInformation() && DW->ShouldEmitDwarfDebug()) { if (!DL.isUnknown()) { DebugLocTuple CurDLT = MF->getDebugLocTuple(DL); - - if (CurDLT.CompileUnit != 0 && PrevDLT != CurDLT) - printLabel(DW->RecordSourceLine(CurDLT.Line, CurDLT.Col, - DICompileUnit(CurDLT.CompileUnit))); - + if (BeforePrintingInsn) { + if (CurDLT.Scope != 0 && PrevDLT != CurDLT) { + unsigned L = DW->RecordSourceLine(CurDLT.Line, CurDLT.Col, + CurDLT.Scope); + printLabel(L); +#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN + DW->SetDbgScopeBeginLabels(MI, L); +#endif + } else { +#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN + DW->SetDbgScopeEndLabels(MI, 0); +#endif + } + } PrevDLT = CurDLT; } } @@ -1369,14 +1399,15 @@ void AsmPrinter::printInlineAsm(const MachineInstr *MI) const { // If this asmstr is empty, just print the #APP/#NOAPP markers. // These are useful to see where empty asm's wound up. if (AsmStr[0] == 0) { - O << TAI->getInlineAsmStart() << "\n\t" << TAI->getInlineAsmEnd() << '\n'; + O << MAI->getCommentString() << MAI->getInlineAsmStart() << "\n\t"; + O << MAI->getCommentString() << MAI->getInlineAsmEnd() << '\n'; return; } - O << TAI->getInlineAsmStart() << "\n\t"; + O << MAI->getCommentString() << MAI->getInlineAsmStart() << "\n\t"; // The variant of the current asmprinter. - int AsmPrinterVariant = TAI->getAssemblerDialect(); + int AsmPrinterVariant = MAI->getAssemblerDialect(); int CurVariant = -1; // The number of the {.|.|.} region we are in. const char *LastEmitted = AsmStr; // One past the last character emitted. @@ -1413,9 +1444,8 @@ void AsmPrinter::printInlineAsm(const MachineInstr *MI) const { case '(': // $( -> same as GCC's { character. ++LastEmitted; // Consume '(' character. if (CurVariant != -1) { - cerr << "Nested variants found in inline asm string: '" - << AsmStr << "'\n"; - exit(1); + llvm_report_error("Nested variants found in inline asm string: '" + + std::string(AsmStr) + "'"); } CurVariant = 0; // We're in the first variant now. break; @@ -1450,9 +1480,8 @@ void AsmPrinter::printInlineAsm(const MachineInstr *MI) const { const char *StrStart = LastEmitted; const char *StrEnd = strchr(StrStart, '}'); if (StrEnd == 0) { - cerr << "Unterminated ${:foo} operand in inline asm string: '" - << AsmStr << "'\n"; - exit(1); + llvm_report_error("Unterminated ${:foo} operand in inline asm string: '" + + std::string(AsmStr) + "'"); } std::string Val(StrStart, StrEnd); @@ -1466,9 +1495,8 @@ void AsmPrinter::printInlineAsm(const MachineInstr *MI) const { errno = 0; long Val = strtol(IDStart, &IDEnd, 10); // We only accept numbers for IDs. if (!isdigit(*IDStart) || (Val == 0 && errno == EINVAL)) { - cerr << "Bad $ operand number in inline asm string: '" - << AsmStr << "'\n"; - exit(1); + llvm_report_error("Bad $ operand number in inline asm string: '" + + std::string(AsmStr) + "'"); } LastEmitted = IDEnd; @@ -1480,9 +1508,8 @@ void AsmPrinter::printInlineAsm(const MachineInstr *MI) const { if (*LastEmitted == ':') { ++LastEmitted; // Consume ':' character. if (*LastEmitted == 0) { - cerr << "Bad ${:} expression in inline asm string: '" - << AsmStr << "'\n"; - exit(1); + llvm_report_error("Bad ${:} expression in inline asm string: '" + + std::string(AsmStr) + "'"); } Modifier[0] = *LastEmitted; @@ -1490,17 +1517,15 @@ void AsmPrinter::printInlineAsm(const MachineInstr *MI) const { } if (*LastEmitted != '}') { - cerr << "Bad ${} expression in inline asm string: '" - << AsmStr << "'\n"; - exit(1); + llvm_report_error("Bad ${} expression in inline asm string: '" + + std::string(AsmStr) + "'"); } ++LastEmitted; // Consume '}' character. } if ((unsigned)Val >= NumOperands-1) { - cerr << "Invalid $ operand number in inline asm string: '" - << AsmStr << "'\n"; - exit(1); + llvm_report_error("Invalid $ operand number in inline asm string: '" + + std::string(AsmStr) + "'"); } // Okay, we finally have a value number. Ask the target to print this @@ -1524,8 +1549,8 @@ void AsmPrinter::printInlineAsm(const MachineInstr *MI) const { ++OpNo; // Skip over the ID number. if (Modifier[0]=='l') // labels are target independent - printBasicBlockLabel(MI->getOperand(OpNo).getMBB(), - false, false, false); + GetMBBSymbol(MI->getOperand(OpNo).getMBB() + ->getNumber())->print(O, MAI); else { AsmPrinter *AP = const_cast<AsmPrinter*>(this); if ((OpFlags & 7) == 4) { @@ -1538,25 +1563,28 @@ void AsmPrinter::printInlineAsm(const MachineInstr *MI) const { } } if (Error) { - cerr << "Invalid operand found in inline asm: '" + std::string msg; + raw_string_ostream Msg(msg); + Msg << "Invalid operand found in inline asm: '" << AsmStr << "'\n"; - MI->dump(); - exit(1); + MI->print(Msg); + llvm_report_error(Msg.str()); } } break; } } } - O << "\n\t" << TAI->getInlineAsmEnd() << '\n'; + O << "\n\t" << MAI->getCommentString() << MAI->getInlineAsmEnd(); } /// printImplicitDef - This method prints the specified machine instruction /// that is an implicit def. void AsmPrinter::printImplicitDef(const MachineInstr *MI) const { - if (VerboseAsm) - O << '\t' << TAI->getCommentString() << " implicit-def: " - << TRI->getAsmName(MI->getOperand(0).getReg()) << '\n'; + if (!VerboseAsm) return; + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() << " implicit-def: " + << TRI->getName(MI->getOperand(0).getReg()); } /// printLabel - This method prints a local label used by debug and @@ -1566,17 +1594,7 @@ void AsmPrinter::printLabel(const MachineInstr *MI) const { } void AsmPrinter::printLabel(unsigned Id) const { - O << TAI->getPrivateGlobalPrefix() << "label" << Id << ":\n"; -} - -/// printDeclare - This method prints a local variable declaration used by -/// debug tables. -/// FIXME: It doesn't really print anything rather it inserts a DebugVariable -/// entry into dwarf table. -void AsmPrinter::printDeclare(const MachineInstr *MI) const { - unsigned FI = MI->getOperand(0).getIndex(); - GlobalValue *GV = MI->getOperand(1).getGlobal(); - DW->RecordVariable(cast<GlobalVariable>(GV), FI, MI); + O << MAI->getPrivateGlobalPrefix() << "label" << Id << ':'; } /// PrintAsmOperand - Print the specified operand of MI, an INLINEASM @@ -1595,51 +1613,69 @@ bool AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, return true; } -/// printBasicBlockLabel - This method prints the label for the specified -/// MachineBasicBlock -void AsmPrinter::printBasicBlockLabel(const MachineBasicBlock *MBB, - bool printAlign, - bool printColon, - bool printComment) const { - if (printAlign) { - unsigned Align = MBB->getAlignment(); - if (Align) - EmitAlignment(Log2_32(Align)); - } +MCSymbol *AsmPrinter::GetMBBSymbol(unsigned MBBID) const { + SmallString<60> Name; + raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() << "BB" + << getFunctionNumber() << '_' << MBBID; + + return OutContext.GetOrCreateSymbol(Name.str()); +} + - O << TAI->getPrivateGlobalPrefix() << "BB" << getFunctionNumber() << '_' - << MBB->getNumber(); - if (printColon) +/// EmitBasicBlockStart - This method prints the label for the specified +/// MachineBasicBlock, an alignment (if present) and a comment describing +/// it if appropriate. +void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const { + if (unsigned Align = MBB->getAlignment()) + EmitAlignment(Log2_32(Align)); + + if (MBB->pred_empty() || MBB->isOnlyReachableByFallthrough()) { + if (VerboseAsm) + O << MAI->getCommentString() << " BB#" << MBB->getNumber() << ':'; + } else { + GetMBBSymbol(MBB->getNumber())->print(O, MAI); O << ':'; - if (printComment && MBB->getBasicBlock()) - O << '\t' << TAI->getCommentString() << ' ' - << MBB->getBasicBlock()->getNameStart(); + if (!VerboseAsm) + O << '\n'; + } + + if (VerboseAsm) { + if (const BasicBlock *BB = MBB->getBasicBlock()) + if (BB->hasName()) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() << ' '; + WriteAsOperand(O, BB, /*PrintType=*/false); + } + + EmitComments(*MBB); + O << '\n'; + } } /// printPICJumpTableSetLabel - This method prints a set label for the /// specified MachineBasicBlock for a jumptable entry. void AsmPrinter::printPICJumpTableSetLabel(unsigned uid, const MachineBasicBlock *MBB) const { - if (!TAI->getSetDirective()) + if (!MAI->getSetDirective()) return; - O << TAI->getSetDirective() << ' ' << TAI->getPrivateGlobalPrefix() + O << MAI->getSetDirective() << ' ' << MAI->getPrivateGlobalPrefix() << getFunctionNumber() << '_' << uid << "_set_" << MBB->getNumber() << ','; - printBasicBlockLabel(MBB, false, false, false); - O << '-' << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() + GetMBBSymbol(MBB->getNumber())->print(O, MAI); + O << '-' << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << '_' << uid << '\n'; } void AsmPrinter::printPICJumpTableSetLabel(unsigned uid, unsigned uid2, const MachineBasicBlock *MBB) const { - if (!TAI->getSetDirective()) + if (!MAI->getSetDirective()) return; - O << TAI->getSetDirective() << ' ' << TAI->getPrivateGlobalPrefix() + O << MAI->getSetDirective() << ' ' << MAI->getPrivateGlobalPrefix() << getFunctionNumber() << '_' << uid << '_' << uid2 << "_set_" << MBB->getNumber() << ','; - printBasicBlockLabel(MBB, false, false, false); - O << '-' << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() + GetMBBSymbol(MBB->getNumber())->print(O, MAI); + O << '-' << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << '_' << uid << '_' << uid2 << '\n'; } @@ -1648,73 +1684,51 @@ void AsmPrinter::printPICJumpTableSetLabel(unsigned uid, unsigned uid2, void AsmPrinter::printDataDirective(const Type *type, unsigned AddrSpace) { const TargetData *TD = TM.getTargetData(); switch (type->getTypeID()) { + case Type::FloatTyID: case Type::DoubleTyID: + case Type::X86_FP80TyID: case Type::FP128TyID: case Type::PPC_FP128TyID: + assert(0 && "Should have already output floating point constant."); + default: + assert(0 && "Can't handle printing this type of thing"); case Type::IntegerTyID: { unsigned BitWidth = cast<IntegerType>(type)->getBitWidth(); if (BitWidth <= 8) - O << TAI->getData8bitsDirective(AddrSpace); + O << MAI->getData8bitsDirective(AddrSpace); else if (BitWidth <= 16) - O << TAI->getData16bitsDirective(AddrSpace); + O << MAI->getData16bitsDirective(AddrSpace); else if (BitWidth <= 32) - O << TAI->getData32bitsDirective(AddrSpace); + O << MAI->getData32bitsDirective(AddrSpace); else if (BitWidth <= 64) { - assert(TAI->getData64bitsDirective(AddrSpace) && + assert(MAI->getData64bitsDirective(AddrSpace) && "Target cannot handle 64-bit constant exprs!"); - O << TAI->getData64bitsDirective(AddrSpace); + O << MAI->getData64bitsDirective(AddrSpace); } else { - assert(0 && "Target cannot handle given data directive width!"); + llvm_unreachable("Target cannot handle given data directive width!"); } break; } case Type::PointerTyID: if (TD->getPointerSize() == 8) { - assert(TAI->getData64bitsDirective(AddrSpace) && + assert(MAI->getData64bitsDirective(AddrSpace) && "Target cannot handle 64-bit pointer exprs!"); - O << TAI->getData64bitsDirective(AddrSpace); + O << MAI->getData64bitsDirective(AddrSpace); } else if (TD->getPointerSize() == 2) { - O << TAI->getData16bitsDirective(AddrSpace); + O << MAI->getData16bitsDirective(AddrSpace); } else if (TD->getPointerSize() == 1) { - O << TAI->getData8bitsDirective(AddrSpace); + O << MAI->getData8bitsDirective(AddrSpace); } else { - O << TAI->getData32bitsDirective(AddrSpace); + O << MAI->getData32bitsDirective(AddrSpace); } break; - case Type::FloatTyID: case Type::DoubleTyID: - case Type::X86_FP80TyID: case Type::FP128TyID: case Type::PPC_FP128TyID: - assert (0 && "Should have already output floating point constant."); - default: - assert (0 && "Can't handle printing this type of thing"); - break; } } -void AsmPrinter::printSuffixedName(const char *Name, const char *Suffix, - const char *Prefix) { - if (Name[0]=='\"') - O << '\"'; - O << TAI->getPrivateGlobalPrefix(); - if (Prefix) O << Prefix; - if (Name[0]=='\"') - O << '\"'; - if (Name[0]=='\"') - O << Name[1]; - else - O << Name; - O << Suffix; - if (Name[0]=='\"') - O << '\"'; -} - -void AsmPrinter::printSuffixedName(const std::string &Name, const char* Suffix) { - printSuffixedName(Name.c_str(), Suffix); -} - void AsmPrinter::printVisibility(const std::string& Name, unsigned Visibility) const { if (Visibility == GlobalValue::HiddenVisibility) { - if (const char *Directive = TAI->getHiddenDirective()) + if (const char *Directive = MAI->getHiddenDirective()) O << Directive << Name << '\n'; } else if (Visibility == GlobalValue::ProtectedVisibility) { - if (const char *Directive = TAI->getProtectedDirective()) + if (const char *Directive = MAI->getProtectedDirective()) O << Directive << Name << '\n'; } } @@ -1746,6 +1760,104 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy *S) { return GMP; } - cerr << "no GCMetadataPrinter registered for GC: " << Name << "\n"; - abort(); + errs() << "no GCMetadataPrinter registered for GC: " << Name << "\n"; + llvm_unreachable(0); +} + +/// EmitComments - Pretty-print comments for instructions +void AsmPrinter::EmitComments(const MachineInstr &MI) const { + assert(VerboseAsm && !MI.getDebugLoc().isUnknown()); + + DebugLocTuple DLT = MF->getDebugLocTuple(MI.getDebugLoc()); + + // Print source line info. + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() << " SrcLine "; + if (DLT.Scope) { + DICompileUnit CU(DLT.Scope); + if (!CU.isNull()) + O << CU.getFilename() << " "; + } + O << DLT.Line; + if (DLT.Col != 0) + O << ":" << DLT.Col; +} + +/// PrintChildLoopComment - Print comments about child loops within +/// the loop for this basic block, with nesting. +/// +static void PrintChildLoopComment(formatted_raw_ostream &O, + const MachineLoop *loop, + const MCAsmInfo *MAI, + int FunctionNumber) { + // Add child loop information + for(MachineLoop::iterator cl = loop->begin(), + clend = loop->end(); + cl != clend; + ++cl) { + MachineBasicBlock *Header = (*cl)->getHeader(); + assert(Header && "No header for loop"); + + O << '\n'; + O.PadToColumn(MAI->getCommentColumn()); + + O << MAI->getCommentString(); + O.indent(((*cl)->getLoopDepth()-1)*2) + << " Child Loop BB" << FunctionNumber << "_" + << Header->getNumber() << " Depth " << (*cl)->getLoopDepth(); + + PrintChildLoopComment(O, *cl, MAI, FunctionNumber); + } +} + +/// EmitComments - Pretty-print comments for basic blocks +void AsmPrinter::EmitComments(const MachineBasicBlock &MBB) const +{ + if (VerboseAsm) { + // Add loop depth information + const MachineLoop *loop = LI->getLoopFor(&MBB); + + if (loop) { + // Print a newline after bb# annotation. + O << "\n"; + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() << " Loop Depth " << loop->getLoopDepth() + << '\n'; + + O.PadToColumn(MAI->getCommentColumn()); + + MachineBasicBlock *Header = loop->getHeader(); + assert(Header && "No header for loop"); + + if (Header == &MBB) { + O << MAI->getCommentString() << " Loop Header"; + PrintChildLoopComment(O, loop, MAI, getFunctionNumber()); + } + else { + O << MAI->getCommentString() << " Loop Header is BB" + << getFunctionNumber() << "_" << loop->getHeader()->getNumber(); + } + + if (loop->empty()) { + O << '\n'; + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() << " Inner Loop"; + } + + // Add parent loop information + for (const MachineLoop *CurLoop = loop->getParentLoop(); + CurLoop; + CurLoop = CurLoop->getParentLoop()) { + MachineBasicBlock *Header = CurLoop->getHeader(); + assert(Header && "No header for loop"); + + O << '\n'; + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString(); + O.indent((CurLoop->getLoopDepth()-1)*2) + << " Inside Loop BB" << getFunctionNumber() << "_" + << Header->getNumber() << " Depth " << CurLoop->getLoopDepth(); + } + } + } } diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp index 01c431c..ecf0007 100644 --- a/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/lib/CodeGen/AsmPrinter/DIE.cpp @@ -14,9 +14,10 @@ #include "DIE.h" #include "DwarfPrinter.h" #include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/Target/TargetData.h" -#include <ostream> +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" using namespace llvm; //===----------------------------------------------------------------------===// @@ -75,24 +76,24 @@ void DIEAbbrev::Emit(const AsmPrinter *Asm) const { } #ifndef NDEBUG -void DIEAbbrev::print(std::ostream &O) { +void DIEAbbrev::print(raw_ostream &O) { O << "Abbreviation @" - << std::hex << (intptr_t)this << std::dec + << format("0x%lx", (long)(intptr_t)this) << " " << dwarf::TagString(Tag) << " " << dwarf::ChildrenString(ChildrenFlag) - << "\n"; + << '\n'; for (unsigned i = 0, N = Data.size(); i < N; ++i) { O << " " << dwarf::AttributeString(Data[i].getAttribute()) << " " << dwarf::FormEncodingString(Data[i].getForm()) - << "\n"; + << '\n'; } } -void DIEAbbrev::dump() { print(cerr); } +void DIEAbbrev::dump() { print(errs()); } #endif //===----------------------------------------------------------------------===// @@ -125,7 +126,7 @@ void DIE::Profile(FoldingSetNodeID &ID) { } #ifndef NDEBUG -void DIE::print(std::ostream &O, unsigned IncIndent) { +void DIE::print(raw_ostream &O, unsigned IncIndent) { IndentCount += IncIndent; const std::string Indent(IndentCount, ' '); bool isBlock = Abbrev.getTag() == 0; @@ -133,7 +134,7 @@ void DIE::print(std::ostream &O, unsigned IncIndent) { if (!isBlock) { O << Indent << "Die: " - << "0x" << std::hex << (intptr_t)this << std::dec + << format("0x%lx", (long)(intptr_t)this) << ", Offset: " << Offset << ", Size: " << Size << "\n"; @@ -175,14 +176,14 @@ void DIE::print(std::ostream &O, unsigned IncIndent) { } void DIE::dump() { - print(cerr); + print(errs()); } #endif #ifndef NDEBUG void DIEValue::dump() { - print(cerr); + print(errs()); } #endif @@ -206,7 +207,7 @@ void DIEInteger::EmitValue(Dwarf *D, unsigned Form) const { case dwarf::DW_FORM_data8: Asm->EmitInt64(Integer); break; case dwarf::DW_FORM_udata: Asm->EmitULEB128Bytes(Integer); break; case dwarf::DW_FORM_sdata: Asm->EmitSLEB128Bytes(Integer); break; - default: assert(0 && "DIE Value form not supported yet"); break; + default: llvm_unreachable("DIE Value form not supported yet"); } } @@ -223,9 +224,9 @@ unsigned DIEInteger::SizeOf(const TargetData *TD, unsigned Form) const { case dwarf::DW_FORM_data4: return sizeof(int32_t); case dwarf::DW_FORM_ref8: // Fall thru case dwarf::DW_FORM_data8: return sizeof(int64_t); - case dwarf::DW_FORM_udata: return TargetAsmInfo::getULEB128Size(Integer); - case dwarf::DW_FORM_sdata: return TargetAsmInfo::getSLEB128Size(Integer); - default: assert(0 && "DIE Value form not supported yet"); break; + case dwarf::DW_FORM_udata: return MCAsmInfo::getULEB128Size(Integer); + case dwarf::DW_FORM_sdata: return MCAsmInfo::getSLEB128Size(Integer); + default: llvm_unreachable("DIE Value form not supported yet"); break; } return 0; } @@ -241,9 +242,9 @@ void DIEInteger::Profile(FoldingSetNodeID &ID) { } #ifndef NDEBUG -void DIEInteger::print(std::ostream &O) { +void DIEInteger::print(raw_ostream &O) { O << "Int: " << (int64_t)Integer - << " 0x" << std::hex << Integer << std::dec; + << format(" 0x%llx", (unsigned long long)Integer); } #endif @@ -268,7 +269,7 @@ void DIEString::Profile(FoldingSetNodeID &ID) { } #ifndef NDEBUG -void DIEString::print(std::ostream &O) { +void DIEString::print(raw_ostream &O) { O << "Str: \"" << Str << "\""; } #endif @@ -302,7 +303,7 @@ void DIEDwarfLabel::Profile(FoldingSetNodeID &ID) { } #ifndef NDEBUG -void DIEDwarfLabel::print(std::ostream &O) { +void DIEDwarfLabel::print(raw_ostream &O) { O << "Lbl: "; Label.print(O); } @@ -337,7 +338,7 @@ void DIEObjectLabel::Profile(FoldingSetNodeID &ID) { } #ifndef NDEBUG -void DIEObjectLabel::print(std::ostream &O) { +void DIEObjectLabel::print(raw_ostream &O) { O << "Obj: " << Label; } #endif @@ -377,7 +378,7 @@ void DIESectionOffset::Profile(FoldingSetNodeID &ID) { } #ifndef NDEBUG -void DIESectionOffset::print(std::ostream &O) { +void DIESectionOffset::print(raw_ostream &O) { O << "Off: "; Label.print(O); O << "-"; @@ -417,7 +418,7 @@ void DIEDelta::Profile(FoldingSetNodeID &ID) { } #ifndef NDEBUG -void DIEDelta::print(std::ostream &O) { +void DIEDelta::print(raw_ostream &O) { O << "Del: "; LabelHi.print(O); O << "-"; @@ -451,8 +452,8 @@ void DIEEntry::Profile(FoldingSetNodeID &ID) { } #ifndef NDEBUG -void DIEEntry::print(std::ostream &O) { - O << "Die: 0x" << std::hex << (intptr_t)Entry << std::dec; +void DIEEntry::print(raw_ostream &O) { + O << format("Die: 0x%lx", (long)(intptr_t)Entry); } #endif @@ -481,7 +482,7 @@ void DIEBlock::EmitValue(Dwarf *D, unsigned Form) const { case dwarf::DW_FORM_block2: Asm->EmitInt16(Size); break; case dwarf::DW_FORM_block4: Asm->EmitInt32(Size); break; case dwarf::DW_FORM_block: Asm->EmitULEB128Bytes(Size); break; - default: assert(0 && "Improper form for block"); break; + default: llvm_unreachable("Improper form for block"); break; } const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev.getData(); @@ -498,8 +499,8 @@ unsigned DIEBlock::SizeOf(const TargetData *TD, unsigned Form) const { case dwarf::DW_FORM_block1: return Size + sizeof(int8_t); case dwarf::DW_FORM_block2: return Size + sizeof(int16_t); case dwarf::DW_FORM_block4: return Size + sizeof(int32_t); - case dwarf::DW_FORM_block: return Size + TargetAsmInfo::getULEB128Size(Size); - default: assert(0 && "Improper form for block"); break; + case dwarf::DW_FORM_block: return Size + MCAsmInfo::getULEB128Size(Size); + default: llvm_unreachable("Improper form for block"); break; } return 0; } @@ -510,7 +511,7 @@ void DIEBlock::Profile(FoldingSetNodeID &ID) { } #ifndef NDEBUG -void DIEBlock::print(std::ostream &O) { +void DIEBlock::print(raw_ostream &O) { O << "Blk: "; DIE::print(O, 5); } diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h index 5b60327..62b51ec 100644 --- a/lib/CodeGen/AsmPrinter/DIE.h +++ b/lib/CodeGen/AsmPrinter/DIE.h @@ -19,8 +19,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Dwarf.h" -#include "llvm/Support/raw_ostream.h" -#include <iosfwd> +#include <vector> namespace llvm { class AsmPrinter; @@ -103,10 +102,7 @@ namespace llvm { void Emit(const AsmPrinter *Asm) const; #ifndef NDEBUG - void print(std::ostream *O) { - if (O) print(*O); - } - void print(std::ostream &O); + void print(raw_ostream &O); void dump(); #endif }; @@ -198,10 +194,7 @@ namespace llvm { void Profile(FoldingSetNodeID &ID) ; #ifndef NDEBUG - void print(std::ostream *O, unsigned IncIndent = 0) { - if (O) print(*O, IncIndent); - } - void print(std::ostream &O, unsigned IncIndent = 0); + void print(raw_ostream &O, unsigned IncIndent = 0); void dump(); #endif }; @@ -248,10 +241,7 @@ namespace llvm { static bool classof(const DIEValue *) { return true; } #ifndef NDEBUG - void print(std::ostream *O) { - if (O) print(*O); - } - virtual void print(std::ostream &O) = 0; + virtual void print(raw_ostream &O) = 0; void dump(); #endif }; @@ -297,7 +287,7 @@ namespace llvm { static bool classof(const DIEValue *I) { return I->getType() == isInteger; } #ifndef NDEBUG - virtual void print(std::ostream &O); + virtual void print(raw_ostream &O); #endif }; @@ -329,7 +319,7 @@ namespace llvm { static bool classof(const DIEValue *S) { return S->getType() == isString; } #ifndef NDEBUG - virtual void print(std::ostream &O); + virtual void print(raw_ostream &O); #endif }; @@ -359,7 +349,7 @@ namespace llvm { static bool classof(const DIEValue *L) { return L->getType() == isLabel; } #ifndef NDEBUG - virtual void print(std::ostream &O); + virtual void print(raw_ostream &O); #endif }; @@ -392,7 +382,7 @@ namespace llvm { } #ifndef NDEBUG - virtual void print(std::ostream &O); + virtual void print(raw_ostream &O); #endif }; @@ -431,7 +421,7 @@ namespace llvm { } #ifndef NDEBUG - virtual void print(std::ostream &O); + virtual void print(raw_ostream &O); #endif }; @@ -464,7 +454,7 @@ namespace llvm { static bool classof(const DIEValue *D) { return D->getType() == isDelta; } #ifndef NDEBUG - virtual void print(std::ostream &O); + virtual void print(raw_ostream &O); #endif }; @@ -500,7 +490,7 @@ namespace llvm { static bool classof(const DIEValue *E) { return E->getType() == isEntry; } #ifndef NDEBUG - virtual void print(std::ostream &O); + virtual void print(raw_ostream &O); #endif }; @@ -544,7 +534,7 @@ namespace llvm { static bool classof(const DIEValue *E) { return E->getType() == isBlock; } #ifndef NDEBUG - virtual void print(std::ostream &O); + virtual void print(raw_ostream &O); #endif }; diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 547140f..4394ec0 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -10,16 +10,24 @@ // This file contains support for writing dwarf debug info into asm files. // //===----------------------------------------------------------------------===// - +#define DEBUG_TYPE "dwarfdebug" #include "DwarfDebug.h" #include "llvm/Module.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/Support/Timer.h" -#include "llvm/System/Path.h" -#include "llvm/Target/TargetAsmInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Mangler.h" +#include "llvm/Support/Timer.h" +#include "llvm/System/Path.h" using namespace llvm; static TimerGroup &getDwarfTimerGroup() { @@ -51,11 +59,13 @@ class VISIBILITY_HIDDEN CompileUnit { /// GVToDieMap - Tracks the mapping of unit level debug informaton /// variables to debug information entries. - std::map<GlobalVariable *, DIE *> GVToDieMap; + /// FIXME : Rename GVToDieMap -> NodeToDieMap + std::map<MDNode *, DIE *> GVToDieMap; /// GVToDIEEntryMap - Tracks the mapping of unit level debug informaton /// descriptors to debug information entries using a DIEEntry proxy. - std::map<GlobalVariable *, DIEEntry *> GVToDIEEntryMap; + /// FIXME : Rename + std::map<MDNode *, DIEEntry *> GVToDIEEntryMap; /// Globals - A map of globally visible named entities for this unit. /// @@ -84,12 +94,12 @@ public: /// getDieMapSlotFor - Returns the debug information entry map slot for the /// specified debug variable. - DIE *&getDieMapSlotFor(GlobalVariable *GV) { return GVToDieMap[GV]; } + DIE *&getDieMapSlotFor(MDNode *N) { return GVToDieMap[N]; } - /// getDIEEntrySlotFor - Returns the debug information entry proxy slot for the - /// specified debug variable. - DIEEntry *&getDIEEntrySlotFor(GlobalVariable *GV) { - return GVToDIEEntryMap[GV]; + /// getDIEEntrySlotFor - Returns the debug information entry proxy slot for + /// the specified debug variable. + DIEEntry *&getDIEEntrySlotFor(MDNode *N) { + return GVToDIEEntryMap[N]; } /// AddDie - Adds or interns the DIE to the compile unit. @@ -138,15 +148,18 @@ class VISIBILITY_HIDDEN DbgScope { // Either subprogram or block. unsigned StartLabelID; // Label ID of the beginning of scope. unsigned EndLabelID; // Label ID of the end of scope. + const MachineInstr *LastInsn; // Last instruction of this scope. + const MachineInstr *FirstInsn; // First instruction of this scope. SmallVector<DbgScope *, 4> Scopes; // Scopes defined in scope. SmallVector<DbgVariable *, 8> Variables;// Variables declared in scope. SmallVector<DbgConcreteScope *, 8> ConcreteInsts;// Concrete insts of funcs. - + // Private state for dump() mutable unsigned IndentLevel; public: DbgScope(DbgScope *P, DIDescriptor D) - : Parent(P), Desc(D), StartLabelID(0), EndLabelID(0), IndentLevel(0) {} + : Parent(P), Desc(D), StartLabelID(0), EndLabelID(0), LastInsn(0), + FirstInsn(0), IndentLevel(0) {} virtual ~DbgScope(); // Accessors. @@ -159,7 +172,10 @@ public: SmallVector<DbgConcreteScope*,8> &getConcreteInsts() { return ConcreteInsts; } void setStartLabelID(unsigned S) { StartLabelID = S; } void setEndLabelID(unsigned E) { EndLabelID = E; } - + void setLastInsn(const MachineInstr *MI) { LastInsn = MI; } + const MachineInstr *getLastInsn() { return LastInsn; } + void setFirstInsn(const MachineInstr *MI) { FirstInsn = MI; } + const MachineInstr *getFirstInsn() { return FirstInsn; } /// AddScope - Add a scope to the scope. /// void AddScope(DbgScope *S) { Scopes.push_back(S); } @@ -172,6 +188,21 @@ public: /// void AddConcreteInst(DbgConcreteScope *C) { ConcreteInsts.push_back(C); } + void FixInstructionMarkers() { + assert (getFirstInsn() && "First instruction is missing!"); + if (getLastInsn()) + return; + + // If a scope does not have an instruction to mark an end then use + // the end of last child scope. + SmallVector<DbgScope *, 4> &Scopes = getScopes(); + assert (!Scopes.empty() && "Inner most scope does not have last insn!"); + DbgScope *L = Scopes.back(); + if (!L->getLastInsn()) + L->FixInstructionMarkers(); + setLastInsn(L->getLastInsn()); + } + #ifndef NDEBUG void dump() const; #endif @@ -179,10 +210,10 @@ public: #ifndef NDEBUG void DbgScope::dump() const { - std::string Indent(IndentLevel, ' '); - - cerr << Indent; Desc.dump(); - cerr << " [" << StartLabelID << ", " << EndLabelID << "]\n"; + raw_ostream &err = errs(); + err.indent(IndentLevel); + Desc.dump(); + err << " [" << StartLabelID << ", " << EndLabelID << "]\n"; IndentLevel += 2; @@ -220,10 +251,10 @@ DbgScope::~DbgScope() { } // end llvm namespace -DwarfDebug::DwarfDebug(raw_ostream &OS, AsmPrinter *A, const TargetAsmInfo *T) +DwarfDebug::DwarfDebug(raw_ostream &OS, AsmPrinter *A, const MCAsmInfo *T) : Dwarf(OS, A, T, "dbg"), ModuleCU(0), AbbreviationsSet(InitAbbreviationsSetSize), Abbreviations(), - ValuesSet(InitValuesSetSize), Values(), StringPool(), SectionMap(), + ValuesSet(InitValuesSetSize), Values(), StringPool(), SectionSourceLines(), didInitial(false), shouldEmit(false), FunctionDbgScope(0), DebugTimer(0) { if (TimePassesIsEnabled) @@ -234,7 +265,7 @@ DwarfDebug::~DwarfDebug() { for (unsigned j = 0, M = Values.size(); j < M; ++j) delete Values[j]; - for (DenseMap<const GlobalVariable *, DbgScope *>::iterator + for (DenseMap<const MDNode *, DbgScope *>::iterator I = AbstractInstanceRootMap.begin(), E = AbstractInstanceRootMap.end(); I != E;++I) delete I->second; @@ -479,6 +510,27 @@ void DwarfDebug::AddSourceLine(DIE *Die, const DIGlobal *G) { AddUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); AddUInt(Die, dwarf::DW_AT_decl_line, 0, Line); } + +/// AddSourceLine - Add location information to specified debug information +/// entry. +void DwarfDebug::AddSourceLine(DIE *Die, const DISubprogram *SP) { + // If there is no compile unit specified, don't add a line #. + if (SP->getCompileUnit().isNull()) + return; + // If the line number is 0, don't add it. + if (SP->getLineNumber() == 0) + return; + + + unsigned Line = SP->getLineNumber(); + unsigned FileID = FindCompileUnit(SP->getCompileUnit()).getID(); + assert(FileID && "Invalid file id"); + AddUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); + AddUInt(Die, dwarf::DW_AT_decl_line, 0, Line); +} + +/// AddSourceLine - Add location information to specified debug information +/// entry. void DwarfDebug::AddSourceLine(DIE *Die, const DIType *Ty) { // If there is no compile unit specified, don't add a line #. DICompileUnit CU = Ty->getCompileUnit(); @@ -492,6 +544,270 @@ void DwarfDebug::AddSourceLine(DIE *Die, const DIType *Ty) { AddUInt(Die, dwarf::DW_AT_decl_line, 0, Line); } +/* Byref variables, in Blocks, are declared by the programmer as + "SomeType VarName;", but the compiler creates a + __Block_byref_x_VarName struct, and gives the variable VarName + either the struct, or a pointer to the struct, as its type. This + is necessary for various behind-the-scenes things the compiler + needs to do with by-reference variables in blocks. + + However, as far as the original *programmer* is concerned, the + variable should still have type 'SomeType', as originally declared. + + The following function dives into the __Block_byref_x_VarName + struct to find the original type of the variable. This will be + passed back to the code generating the type for the Debug + Information Entry for the variable 'VarName'. 'VarName' will then + have the original type 'SomeType' in its debug information. + + The original type 'SomeType' will be the type of the field named + 'VarName' inside the __Block_byref_x_VarName struct. + + NOTE: In order for this to not completely fail on the debugger + side, the Debug Information Entry for the variable VarName needs to + have a DW_AT_location that tells the debugger how to unwind through + the pointers and __Block_byref_x_VarName struct to find the actual + value of the variable. The function AddBlockByrefType does this. */ + +/// Find the type the programmer originally declared the variable to be +/// and return that type. +/// +DIType DwarfDebug::GetBlockByrefType(DIType Ty, std::string Name) { + + DIType subType = Ty; + unsigned tag = Ty.getTag(); + + if (tag == dwarf::DW_TAG_pointer_type) { + DIDerivedType DTy = DIDerivedType(Ty.getNode()); + subType = DTy.getTypeDerivedFrom(); + } + + DICompositeType blockStruct = DICompositeType(subType.getNode()); + + DIArray Elements = blockStruct.getTypeArray(); + + if (Elements.isNull()) + return Ty; + + for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { + DIDescriptor Element = Elements.getElement(i); + DIDerivedType DT = DIDerivedType(Element.getNode()); + if (strcmp(Name.c_str(), DT.getName()) == 0) + return (DT.getTypeDerivedFrom()); + } + + return Ty; +} + +/// AddComplexAddress - Start with the address based on the location provided, +/// and generate the DWARF information necessary to find the actual variable +/// given the extra address information encoded in the DIVariable, starting from +/// the starting location. Add the DWARF information to the die. +/// +void DwarfDebug::AddComplexAddress(DbgVariable *&DV, DIE *Die, + unsigned Attribute, + const MachineLocation &Location) { + const DIVariable &VD = DV->getVariable(); + DIType Ty = VD.getType(); + + // Decode the original location, and use that as the start of the byref + // variable's location. + unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false); + DIEBlock *Block = new DIEBlock(); + + if (Location.isReg()) { + if (Reg < 32) { + AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg); + } else { + Reg = Reg - dwarf::DW_OP_reg0; + AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg); + AddUInt(Block, 0, dwarf::DW_FORM_udata, Reg); + } + } else { + if (Reg < 32) + AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg); + else { + AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx); + AddUInt(Block, 0, dwarf::DW_FORM_udata, Reg); + } + + AddUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset()); + } + + for (unsigned i = 0, N = VD.getNumAddrElements(); i < N; ++i) { + uint64_t Element = VD.getAddrElement(i); + + if (Element == DIFactory::OpPlus) { + AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + AddUInt(Block, 0, dwarf::DW_FORM_udata, VD.getAddrElement(++i)); + } else if (Element == DIFactory::OpDeref) { + AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + } else llvm_unreachable("unknown DIFactory Opcode"); + } + + // Now attach the location information to the DIE. + AddBlock(Die, Attribute, 0, Block); +} + +/* Byref variables, in Blocks, are declared by the programmer as "SomeType + VarName;", but the compiler creates a __Block_byref_x_VarName struct, and + gives the variable VarName either the struct, or a pointer to the struct, as + its type. This is necessary for various behind-the-scenes things the + compiler needs to do with by-reference variables in Blocks. + + However, as far as the original *programmer* is concerned, the variable + should still have type 'SomeType', as originally declared. + + The function GetBlockByrefType dives into the __Block_byref_x_VarName + struct to find the original type of the variable, which is then assigned to + the variable's Debug Information Entry as its real type. So far, so good. + However now the debugger will expect the variable VarName to have the type + SomeType. So we need the location attribute for the variable to be an + expression that explains to the debugger how to navigate through the + pointers and struct to find the actual variable of type SomeType. + + The following function does just that. We start by getting + the "normal" location for the variable. This will be the location + of either the struct __Block_byref_x_VarName or the pointer to the + struct __Block_byref_x_VarName. + + The struct will look something like: + + struct __Block_byref_x_VarName { + ... <various fields> + struct __Block_byref_x_VarName *forwarding; + ... <various other fields> + SomeType VarName; + ... <maybe more fields> + }; + + If we are given the struct directly (as our starting point) we + need to tell the debugger to: + + 1). Add the offset of the forwarding field. + + 2). Follow that pointer to get the the real __Block_byref_x_VarName + struct to use (the real one may have been copied onto the heap). + + 3). Add the offset for the field VarName, to find the actual variable. + + If we started with a pointer to the struct, then we need to + dereference that pointer first, before the other steps. + Translating this into DWARF ops, we will need to append the following + to the current location description for the variable: + + DW_OP_deref -- optional, if we start with a pointer + DW_OP_plus_uconst <forward_fld_offset> + DW_OP_deref + DW_OP_plus_uconst <varName_fld_offset> + + That is what this function does. */ + +/// AddBlockByrefAddress - Start with the address based on the location +/// provided, and generate the DWARF information necessary to find the +/// actual Block variable (navigating the Block struct) based on the +/// starting location. Add the DWARF information to the die. For +/// more information, read large comment just above here. +/// +void DwarfDebug::AddBlockByrefAddress(DbgVariable *&DV, DIE *Die, + unsigned Attribute, + const MachineLocation &Location) { + const DIVariable &VD = DV->getVariable(); + DIType Ty = VD.getType(); + DIType TmpTy = Ty; + unsigned Tag = Ty.getTag(); + bool isPointer = false; + + const char *varName = VD.getName(); + + if (Tag == dwarf::DW_TAG_pointer_type) { + DIDerivedType DTy = DIDerivedType(Ty.getNode()); + TmpTy = DTy.getTypeDerivedFrom(); + isPointer = true; + } + + DICompositeType blockStruct = DICompositeType(TmpTy.getNode()); + + // Find the __forwarding field and the variable field in the __Block_byref + // struct. + DIArray Fields = blockStruct.getTypeArray(); + DIDescriptor varField = DIDescriptor(); + DIDescriptor forwardingField = DIDescriptor(); + + + for (unsigned i = 0, N = Fields.getNumElements(); i < N; ++i) { + DIDescriptor Element = Fields.getElement(i); + DIDerivedType DT = DIDerivedType(Element.getNode()); + const char *fieldName = DT.getName(); + if (strcmp(fieldName, "__forwarding") == 0) + forwardingField = Element; + else if (strcmp(fieldName, varName) == 0) + varField = Element; + } + + assert(!varField.isNull() && "Can't find byref variable in Block struct"); + assert(!forwardingField.isNull() + && "Can't find forwarding field in Block struct"); + + // Get the offsets for the forwarding field and the variable field. + unsigned int forwardingFieldOffset = + DIDerivedType(forwardingField.getNode()).getOffsetInBits() >> 3; + unsigned int varFieldOffset = + DIDerivedType(varField.getNode()).getOffsetInBits() >> 3; + + // Decode the original location, and use that as the start of the byref + // variable's location. + unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false); + DIEBlock *Block = new DIEBlock(); + + if (Location.isReg()) { + if (Reg < 32) + AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg); + else { + Reg = Reg - dwarf::DW_OP_reg0; + AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg); + AddUInt(Block, 0, dwarf::DW_FORM_udata, Reg); + } + } else { + if (Reg < 32) + AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg); + else { + AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx); + AddUInt(Block, 0, dwarf::DW_FORM_udata, Reg); + } + + AddUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset()); + } + + // If we started with a pointer to the __Block_byref... struct, then + // the first thing we need to do is dereference the pointer (DW_OP_deref). + if (isPointer) + AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + + // Next add the offset for the '__forwarding' field: + // DW_OP_plus_uconst ForwardingFieldOffset. Note there's no point in + // adding the offset if it's 0. + if (forwardingFieldOffset > 0) { + AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + AddUInt(Block, 0, dwarf::DW_FORM_udata, forwardingFieldOffset); + } + + // Now dereference the __forwarding field to get to the real __Block_byref + // struct: DW_OP_deref. + AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + + // Now that we've got the real __Block_byref... struct, add the offset + // for the variable's field to get to the location of the actual variable: + // DW_OP_plus_uconst varFieldOffset. Again, don't add if it's 0. + if (varFieldOffset > 0) { + AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + AddUInt(Block, 0, dwarf::DW_FORM_udata, varFieldOffset); + } + + // Now attach the location information to the DIE. + AddBlock(Die, Attribute, 0, Block); +} + /// AddAddress - Add an address attribute to a die based on the location /// provided. void DwarfDebug::AddAddress(DIE *Die, unsigned Attribute, @@ -526,7 +842,7 @@ void DwarfDebug::AddType(CompileUnit *DW_Unit, DIE *Entity, DIType Ty) { return; // Check for pre-existence. - DIEEntry *&Slot = DW_Unit->getDIEEntrySlotFor(Ty.getGV()); + DIEEntry *&Slot = DW_Unit->getDIEEntrySlotFor(Ty.getNode()); // If it exists then use the existing value. if (Slot) { @@ -539,20 +855,20 @@ void DwarfDebug::AddType(CompileUnit *DW_Unit, DIE *Entity, DIType Ty) { // Construct type. DIE Buffer(dwarf::DW_TAG_base_type); - if (Ty.isBasicType(Ty.getTag())) - ConstructTypeDIE(DW_Unit, Buffer, DIBasicType(Ty.getGV())); - else if (Ty.isDerivedType(Ty.getTag())) - ConstructTypeDIE(DW_Unit, Buffer, DIDerivedType(Ty.getGV())); + if (Ty.isBasicType()) + ConstructTypeDIE(DW_Unit, Buffer, DIBasicType(Ty.getNode())); + else if (Ty.isCompositeType()) + ConstructTypeDIE(DW_Unit, Buffer, DICompositeType(Ty.getNode())); else { - assert(Ty.isCompositeType(Ty.getTag()) && "Unknown kind of DIType"); - ConstructTypeDIE(DW_Unit, Buffer, DICompositeType(Ty.getGV())); + assert(Ty.isDerivedType() && "Unknown kind of DIType"); + ConstructTypeDIE(DW_Unit, Buffer, DIDerivedType(Ty.getNode())); } // Add debug information entry to entity and appropriate context. DIE *Die = NULL; DIDescriptor Context = Ty.getContext(); if (!Context.isNull()) - Die = DW_Unit->getDieMapSlotFor(Context.getGV()); + Die = DW_Unit->getDieMapSlotFor(Context.getNode()); if (Die) { DIE *Child = new DIE(Buffer); @@ -571,14 +887,13 @@ void DwarfDebug::AddType(CompileUnit *DW_Unit, DIE *Entity, DIType Ty) { void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, DIBasicType BTy) { // Get core information. - std::string Name; - BTy.getName(Name); + const char *Name = BTy.getName(); Buffer.setTag(dwarf::DW_TAG_base_type); AddUInt(&Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, BTy.getEncoding()); // Add name if not anonymous or intermediate type. - if (!Name.empty()) + if (Name) AddString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); uint64_t Size = BTy.getSizeInBits() >> 3; AddUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); @@ -588,8 +903,7 @@ void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, DIDerivedType DTy) { // Get core information. - std::string Name; - DTy.getName(Name); + const char *Name = DTy.getName(); uint64_t Size = DTy.getSizeInBits() >> 3; unsigned Tag = DTy.getTag(); @@ -603,7 +917,7 @@ void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, AddType(DW_Unit, &Buffer, FromTy); // Add name if not anonymous or intermediate type. - if (!Name.empty()) + if (Name) AddString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); // Add size if non-zero (derived types might be zero-sized.) @@ -619,8 +933,7 @@ void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, DICompositeType CTy) { // Get core information. - std::string Name; - CTy.getName(Name); + const char *Name = CTy.getName(); uint64_t Size = CTy.getSizeInBits() >> 3; unsigned Tag = CTy.getTag(); @@ -637,9 +950,11 @@ void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, // Add enumerators to enumeration type. for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { DIE *ElemDie = NULL; - DIEnumerator Enum(Elements.getElement(i).getGV()); - ElemDie = ConstructEnumTypeDIE(DW_Unit, &Enum); - Buffer.AddChild(ElemDie); + DIEnumerator Enum(Elements.getElement(i).getNode()); + if (!Enum.isNull()) { + ElemDie = ConstructEnumTypeDIE(DW_Unit, &Enum); + Buffer.AddChild(ElemDie); + } } } break; @@ -647,7 +962,7 @@ void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, // Add return type. DIArray Elements = CTy.getTypeArray(); DIDescriptor RTy = Elements.getElement(0); - AddType(DW_Unit, &Buffer, DIType(RTy.getGV())); + AddType(DW_Unit, &Buffer, DIType(RTy.getNode())); // Add prototype flag. AddUInt(&Buffer, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1); @@ -656,7 +971,7 @@ void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, for (unsigned i = 1, N = Elements.getNumElements(); i < N; ++i) { DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); DIDescriptor Ty = Elements.getElement(i); - AddType(DW_Unit, Arg, DIType(Ty.getGV())); + AddType(DW_Unit, Arg, DIType(Ty.getNode())); Buffer.AddChild(Arg); } } @@ -674,20 +989,19 @@ void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, // Add elements to structure type. for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { DIDescriptor Element = Elements.getElement(i); + if (Element.isNull()) + continue; DIE *ElemDie = NULL; if (Element.getTag() == dwarf::DW_TAG_subprogram) ElemDie = CreateSubprogramDIE(DW_Unit, - DISubprogram(Element.getGV())); + DISubprogram(Element.getNode())); else ElemDie = CreateMemberDIE(DW_Unit, - DIDerivedType(Element.getGV())); + DIDerivedType(Element.getNode())); Buffer.AddChild(ElemDie); } - // FIXME: We'd like an API to register additional attributes for the - // frontend to use while synthesizing, and then we'd use that api in clang - // instead of this. - if (Name == "__block_literal_generic") + if (CTy.isAppleBlockExtension()) AddUInt(&Buffer, dwarf::DW_AT_APPLE_block, dwarf::DW_FORM_flag, 1); unsigned RLang = CTy.getRunTimeLang(); @@ -701,7 +1015,7 @@ void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, } // Add name if not anonymous or intermediate type. - if (!Name.empty()) + if (Name) AddString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); if (Tag == dwarf::DW_TAG_enumeration_type || @@ -729,12 +1043,11 @@ void DwarfDebug::ConstructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy){ int64_t H = SR.getHi(); DIE *DW_Subrange = new DIE(dwarf::DW_TAG_subrange_type); - if (L != H) { - AddDIEEntry(DW_Subrange, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IndexTy); - if (L) - AddSInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, L); + AddDIEEntry(DW_Subrange, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IndexTy); + if (L) + AddSInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, L); + if (H) AddSInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, H); - } Buffer.AddChild(DW_Subrange); } @@ -761,15 +1074,14 @@ void DwarfDebug::ConstructArrayTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { DIDescriptor Element = Elements.getElement(i); if (Element.getTag() == dwarf::DW_TAG_subrange_type) - ConstructSubrangeDIE(Buffer, DISubrange(Element.getGV()), IndexTy); + ConstructSubrangeDIE(Buffer, DISubrange(Element.getNode()), IndexTy); } } /// ConstructEnumTypeDIE - Construct enum type DIE from DIEnumerator. DIE *DwarfDebug::ConstructEnumTypeDIE(CompileUnit *DW_Unit, DIEnumerator *ETy) { DIE *Enumerator = new DIE(dwarf::DW_TAG_enumerator); - std::string Name; - ETy->getName(Name); + const char *Name = ETy->getName(); AddString(Enumerator, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); int64_t Value = ETy->getEnumValue(); AddSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, Value); @@ -780,27 +1092,39 @@ DIE *DwarfDebug::ConstructEnumTypeDIE(CompileUnit *DW_Unit, DIEnumerator *ETy) { DIE *DwarfDebug::CreateGlobalVariableDIE(CompileUnit *DW_Unit, const DIGlobalVariable &GV) { DIE *GVDie = new DIE(dwarf::DW_TAG_variable); - std::string Name; - GV.getDisplayName(Name); - AddString(GVDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); - std::string LinkageName; - GV.getLinkageName(LinkageName); - if (!LinkageName.empty()) + AddString(GVDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, + GV.getDisplayName()); + + const char *LinkageName = GV.getLinkageName(); + if (LinkageName) { + // Skip special LLVM prefix that is used to inform the asm printer to not + // emit usual symbol prefix before the symbol name. This happens for + // Objective-C symbol names and symbol whose name is replaced using GCC's + // __asm__ attribute. + if (LinkageName[0] == 1) + LinkageName = &LinkageName[1]; AddString(GVDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string, LinkageName); + } AddType(DW_Unit, GVDie, GV.getType()); if (!GV.isLocalToUnit()) AddUInt(GVDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); AddSourceLine(GVDie, &GV); + + // Add address. + DIEBlock *Block = new DIEBlock(); + AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); + AddObjectLabel(Block, 0, dwarf::DW_FORM_udata, + Asm->Mang->getMangledName(GV.getGlobal())); + AddBlock(GVDie, dwarf::DW_AT_location, 0, Block); + return GVDie; } /// CreateMemberDIE - Create new member DIE. DIE *DwarfDebug::CreateMemberDIE(CompileUnit *DW_Unit, const DIDerivedType &DT){ DIE *MemberDie = new DIE(DT.getTag()); - std::string Name; - DT.getName(Name); - if (!Name.empty()) + if (const char *Name = DT.getName()) AddString(MemberDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); AddType(DW_Unit, MemberDie, DT.getTypeDerivedFrom()); @@ -849,17 +1173,19 @@ DIE *DwarfDebug::CreateSubprogramDIE(CompileUnit *DW_Unit, bool IsInlined) { DIE *SPDie = new DIE(dwarf::DW_TAG_subprogram); - std::string Name; - SP.getName(Name); + const char * Name = SP.getName(); AddString(SPDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); - std::string LinkageName; - SP.getLinkageName(LinkageName); - - if (!LinkageName.empty()) + const char *LinkageName = SP.getLinkageName(); + if (LinkageName) { + // Skip special LLVM prefix that is used to inform the asm printer to not emit + // usual symbol prefix before the symbol name. This happens for Objective-C + // symbol names and symbol whose name is replaced using GCC's __asm__ attribute. + if (LinkageName[0] == 1) + LinkageName = &LinkageName[1]; AddString(SPDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string, LinkageName); - + } AddSourceLine(SPDie, &SP); DICompositeType SPTy = SP.getType(); @@ -877,7 +1203,7 @@ DIE *DwarfDebug::CreateSubprogramDIE(CompileUnit *DW_Unit, if (Args.isNull() || SPTag != dwarf::DW_TAG_subroutine_type) AddType(DW_Unit, SPDie, SPTy); else - AddType(DW_Unit, SPDie, DIType(Args.getElement(0).getGV())); + AddType(DW_Unit, SPDie, DIType(Args.getElement(0).getNode())); } if (!SP.isDefinition()) { @@ -888,7 +1214,7 @@ DIE *DwarfDebug::CreateSubprogramDIE(CompileUnit *DW_Unit, if (SPTag == dwarf::DW_TAG_subroutine_type) for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) { DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); - AddType(DW_Unit, Arg, DIType(Args.getElement(i).getGV())); + AddType(DW_Unit, Arg, DIType(Args.getElement(i).getNode())); AddUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); // ?? SPDie->AddChild(Arg); } @@ -898,7 +1224,7 @@ DIE *DwarfDebug::CreateSubprogramDIE(CompileUnit *DW_Unit, AddUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); // DW_TAG_inlined_subroutine may refer to this DIE. - DIE *&Slot = DW_Unit->getDieMapSlotFor(SP.getGV()); + DIE *&Slot = DW_Unit->getDieMapSlotFor(SP.getNode()); Slot = SPDie; return SPDie; } @@ -907,7 +1233,7 @@ DIE *DwarfDebug::CreateSubprogramDIE(CompileUnit *DW_Unit, /// CompileUnit &DwarfDebug::FindCompileUnit(DICompileUnit Unit) const { DenseMap<Value *, CompileUnit *>::const_iterator I = - CompileUnitMap.find(Unit.getGV()); + CompileUnitMap.find(Unit.getNode()); assert(I != CompileUnitMap.end() && "Missing compile unit."); return *I->second; } @@ -935,15 +1261,18 @@ DIE *DwarfDebug::CreateDbgScopeVariable(DbgVariable *DV, CompileUnit *Unit) { // Define variable debug information entry. DIE *VariableDie = new DIE(Tag); - std::string Name; - VD.getName(Name); + const char *Name = VD.getName(); AddString(VariableDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); // Add source line info if available. AddSourceLine(VariableDie, &VD); // Add variable type. - AddType(Unit, VariableDie, VD.getType()); + // FIXME: isBlockByrefVariable should be reformulated in terms of complex addresses instead. + if (VD.isBlockByrefVariable()) + AddType(Unit, VariableDie, GetBlockByrefType(VD.getType(), Name)); + else + AddType(Unit, VariableDie, VD.getType()); // Add variable address. if (!DV->isInlinedFnVar()) { @@ -952,7 +1281,14 @@ DIE *DwarfDebug::CreateDbgScopeVariable(DbgVariable *DV, CompileUnit *Unit) { MachineLocation Location; Location.set(RI->getFrameRegister(*MF), RI->getFrameIndexOffset(*MF, DV->getFrameIndex())); - AddAddress(VariableDie, dwarf::DW_AT_location, Location); + + + if (VD.hasComplexAddress()) + AddComplexAddress(DV, VariableDie, dwarf::DW_AT_location, Location); + else if (VD.isBlockByrefVariable()) + AddBlockByrefAddress(DV, VariableDie, dwarf::DW_AT_location, Location); + else + AddAddress(VariableDie, dwarf::DW_AT_location, Location); } return VariableDie; @@ -960,26 +1296,64 @@ DIE *DwarfDebug::CreateDbgScopeVariable(DbgVariable *DV, CompileUnit *Unit) { /// getOrCreateScope - Returns the scope associated with the given descriptor. /// -DbgScope *DwarfDebug::getOrCreateScope(GlobalVariable *V) { - DbgScope *&Slot = DbgScopeMap[V]; +DbgScope *DwarfDebug::getDbgScope(MDNode *N, const MachineInstr *MI) { + DbgScope *&Slot = DbgScopeMap[N]; + if (Slot) return Slot; + + DbgScope *Parent = NULL; + + DIDescriptor Scope(N); + if (Scope.isCompileUnit()) { + return NULL; + } else if (Scope.isSubprogram()) { + DISubprogram SP(N); + DIDescriptor ParentDesc = SP.getContext(); + if (!ParentDesc.isNull() && !ParentDesc.isCompileUnit()) + Parent = getDbgScope(ParentDesc.getNode(), MI); + } else if (Scope.isLexicalBlock()) { + DILexicalBlock DB(N); + DIDescriptor ParentDesc = DB.getContext(); + if (!ParentDesc.isNull()) + Parent = getDbgScope(ParentDesc.getNode(), MI); + } else + assert (0 && "Unexpected scope info"); + + Slot = new DbgScope(Parent, DIDescriptor(N)); + Slot->setFirstInsn(MI); + + if (Parent) + Parent->AddScope(Slot); + else + // First function is top level function. + if (!FunctionDbgScope) + FunctionDbgScope = Slot; + + return Slot; +} + + +/// getOrCreateScope - Returns the scope associated with the given descriptor. +/// FIXME - Remove this method. +DbgScope *DwarfDebug::getOrCreateScope(MDNode *N) { + DbgScope *&Slot = DbgScopeMap[N]; if (Slot) return Slot; DbgScope *Parent = NULL; - DIBlock Block(V); + DILexicalBlock Block(N); // Don't create a new scope if we already created one for an inlined function. - DenseMap<const GlobalVariable *, DbgScope *>::iterator - II = AbstractInstanceRootMap.find(V); + DenseMap<const MDNode *, DbgScope *>::iterator + II = AbstractInstanceRootMap.find(N); if (II != AbstractInstanceRootMap.end()) return LexicalScopeStack.back(); if (!Block.isNull()) { DIDescriptor ParentDesc = Block.getContext(); Parent = - ParentDesc.isNull() ? NULL : getOrCreateScope(ParentDesc.getGV()); + ParentDesc.isNull() ? NULL : getOrCreateScope(ParentDesc.getNode()); } - Slot = new DbgScope(Parent, DIDescriptor(V)); + Slot = new DbgScope(Parent, DIDescriptor(N)); if (Parent) Parent->AddScope(Slot); @@ -1088,10 +1462,14 @@ void DwarfDebug::ConstructFunctionDbgScope(DbgScope *RootScope, return; // Get the subprogram debug information entry. - DISubprogram SPD(Desc.getGV()); + DISubprogram SPD(Desc.getNode()); // Get the subprogram die. - DIE *SPDie = ModuleCU->getDieMapSlotFor(SPD.getGV()); + DIE *SPDie = ModuleCU->getDieMapSlotFor(SPD.getNode()); + if (!SPDie) { + ConstructSubprogram(SPD.getNode()); + SPDie = ModuleCU->getDieMapSlotFor(SPD.getNode()); + } assert(SPDie && "Missing subprogram descriptor"); if (!AbstractScope) { @@ -1105,23 +1483,33 @@ void DwarfDebug::ConstructFunctionDbgScope(DbgScope *RootScope, } ConstructDbgScope(RootScope, 0, 0, SPDie, ModuleCU); + // If there are global variables at this scope then add their dies. + for (SmallVector<WeakVH, 4>::iterator SGI = ScopedGVs.begin(), + SGE = ScopedGVs.end(); SGI != SGE; ++SGI) { + MDNode *N = dyn_cast_or_null<MDNode>(*SGI); + if (!N) continue; + DIGlobalVariable GV(N); + if (GV.getContext().getNode() == RootScope->getDesc().getNode()) { + DIE *ScopedGVDie = CreateGlobalVariableDIE(ModuleCU, GV); + SPDie->AddChild(ScopedGVDie); + } + } } /// ConstructDefaultDbgScope - Construct a default scope for the subprogram. /// void DwarfDebug::ConstructDefaultDbgScope(MachineFunction *MF) { - const char *FnName = MF->getFunction()->getNameStart(); StringMap<DIE*> &Globals = ModuleCU->getGlobals(); - StringMap<DIE*>::iterator GI = Globals.find(FnName); + StringMap<DIE*>::iterator GI = Globals.find(MF->getFunction()->getName()); if (GI != Globals.end()) { DIE *SPDie = GI->second; - + // Add the function bounds. AddLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, DWLabel("func_begin", SubprogramCount)); AddLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, DWLabel("func_end", SubprogramCount)); - + MachineLocation Location(RI->getFrameRegister(*MF)); AddAddress(SPDie, dwarf::DW_AT_frame_base, Location); } @@ -1131,8 +1519,8 @@ void DwarfDebug::ConstructDefaultDbgScope(MachineFunction *MF) { /// source file names. If none currently exists, create a new id and insert it /// in the SourceIds map. This can update DirectoryNames and SourceFileNames /// maps as well. -unsigned DwarfDebug::GetOrCreateSourceID(const std::string &DirName, - const std::string &FileName) { +unsigned DwarfDebug::GetOrCreateSourceID(const char *DirName, + const char *FileName) { unsigned DId; StringMap<unsigned>::iterator DI = DirectoryIdMap.find(DirName); if (DI != DirectoryIdMap.end()) { @@ -1165,30 +1553,28 @@ unsigned DwarfDebug::GetOrCreateSourceID(const std::string &DirName, return SrcId; } -void DwarfDebug::ConstructCompileUnit(GlobalVariable *GV) { - DICompileUnit DIUnit(GV); - std::string Dir, FN, Prod; - unsigned ID = GetOrCreateSourceID(DIUnit.getDirectory(Dir), - DIUnit.getFilename(FN)); +void DwarfDebug::ConstructCompileUnit(MDNode *N) { + DICompileUnit DIUnit(N); + const char *FN = DIUnit.getFilename(); + const char *Dir = DIUnit.getDirectory(); + unsigned ID = GetOrCreateSourceID(Dir, FN); DIE *Die = new DIE(dwarf::DW_TAG_compile_unit); AddSectionOffset(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, DWLabel("section_line", 0), DWLabel("section_line", 0), false); AddString(Die, dwarf::DW_AT_producer, dwarf::DW_FORM_string, - DIUnit.getProducer(Prod)); + DIUnit.getProducer()); AddUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data1, DIUnit.getLanguage()); AddString(Die, dwarf::DW_AT_name, dwarf::DW_FORM_string, FN); - if (!Dir.empty()) + if (Dir) AddString(Die, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string, Dir); if (DIUnit.isOptimized()) AddUInt(Die, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1); - std::string Flags; - DIUnit.getFlags(Flags); - if (!Flags.empty()) + if (const char *Flags = DIUnit.getFlags()) AddString(Die, dwarf::DW_AT_APPLE_flags, dwarf::DW_FORM_string, Flags); unsigned RVer = DIUnit.getRunTimeVersion(); @@ -1203,28 +1589,24 @@ void DwarfDebug::ConstructCompileUnit(GlobalVariable *GV) { ModuleCU = Unit; } - CompileUnitMap[DIUnit.getGV()] = Unit; + CompileUnitMap[DIUnit.getNode()] = Unit; CompileUnits.push_back(Unit); } -void DwarfDebug::ConstructGlobalVariableDIE(GlobalVariable *GV) { - DIGlobalVariable DI_GV(GV); +void DwarfDebug::ConstructGlobalVariableDIE(MDNode *N) { + DIGlobalVariable DI_GV(N); + + // If debug information is malformed then ignore it. + if (DI_GV.Verify() == false) + return; // Check for pre-existence. - DIE *&Slot = ModuleCU->getDieMapSlotFor(DI_GV.getGV()); + DIE *&Slot = ModuleCU->getDieMapSlotFor(DI_GV.getNode()); if (Slot) return; DIE *VariableDie = CreateGlobalVariableDIE(ModuleCU, DI_GV); - // Add address. - DIEBlock *Block = new DIEBlock(); - AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); - std::string GLN; - AddObjectLabel(Block, 0, dwarf::DW_FORM_udata, - Asm->getGlobalLinkName(DI_GV.getGlobal(), GLN)); - AddBlock(VariableDie, dwarf::DW_AT_location, 0, Block); - // Add to map. Slot = VariableDie; @@ -1232,16 +1614,15 @@ void DwarfDebug::ConstructGlobalVariableDIE(GlobalVariable *GV) { ModuleCU->getDie()->AddChild(VariableDie); // Expose as global. FIXME - need to check external flag. - std::string Name; - ModuleCU->AddGlobal(DI_GV.getName(Name), VariableDie); + ModuleCU->AddGlobal(DI_GV.getName(), VariableDie); return; } -void DwarfDebug::ConstructSubprogram(GlobalVariable *GV) { - DISubprogram SP(GV); +void DwarfDebug::ConstructSubprogram(MDNode *N) { + DISubprogram SP(N); // Check for pre-existence. - DIE *&Slot = ModuleCU->getDieMapSlotFor(GV); + DIE *&Slot = ModuleCU->getDieMapSlotFor(N); if (Slot) return; @@ -1259,28 +1640,25 @@ void DwarfDebug::ConstructSubprogram(GlobalVariable *GV) { ModuleCU->getDie()->AddChild(SubprogramDie); // Expose as global. - std::string Name; - ModuleCU->AddGlobal(SP.getName(Name), SubprogramDie); + ModuleCU->AddGlobal(SP.getName(), SubprogramDie); return; } - /// BeginModule - Emit all Dwarf sections that should come prior to the - /// content. Create global DIEs and emit initial debug info sections. - /// This is inovked by the target AsmPrinter. +/// BeginModule - Emit all Dwarf sections that should come prior to the +/// content. Create global DIEs and emit initial debug info sections. +/// This is inovked by the target AsmPrinter. void DwarfDebug::BeginModule(Module *M, MachineModuleInfo *mmi) { this->M = M; if (TimePassesIsEnabled) DebugTimer->startTimer(); - SmallVector<GlobalVariable *, 2> CUs; - SmallVector<GlobalVariable *, 4> GVs; - SmallVector<GlobalVariable *, 4> SPs; - CollectDebugInfoAnchors(*M, CUs, GVs, SPs); + DebugInfoFinder DbgFinder; + DbgFinder.processModule(*M); // Create all the compile unit DIEs. - for (SmallVector<GlobalVariable *, 2>::iterator I = CUs.begin(), - E = CUs.end(); I != E; ++I) + for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(), + E = DbgFinder.compile_unit_end(); I != E; ++I) ConstructCompileUnit(*I); if (CompileUnits.empty()) { @@ -1295,23 +1673,19 @@ void DwarfDebug::BeginModule(Module *M, MachineModuleInfo *mmi) { if (!ModuleCU) ModuleCU = CompileUnits[0]; - // If there is not any debug info available for any global variables and any - // subprograms then there is not any debug info to emit. - if (GVs.empty() && SPs.empty()) { - if (TimePassesIsEnabled) - DebugTimer->stopTimer(); - - return; - } - // Create DIEs for each of the externally visible global variables. - for (SmallVector<GlobalVariable *, 4>::iterator I = GVs.begin(), - E = GVs.end(); I != E; ++I) - ConstructGlobalVariableDIE(*I); + for (DebugInfoFinder::iterator I = DbgFinder.global_variable_begin(), + E = DbgFinder.global_variable_end(); I != E; ++I) { + DIGlobalVariable GV(*I); + if (GV.getContext().getNode() != GV.getCompileUnit().getNode()) + ScopedGVs.push_back(*I); + else + ConstructGlobalVariableDIE(*I); + } // Create DIEs for each of the externally visible subprograms. - for (SmallVector<GlobalVariable *, 4>::iterator I = SPs.begin(), - E = SPs.end(); I != E; ++I) + for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(), + E = DbgFinder.subprogram_end(); I != E; ++I) ConstructSubprogram(*I); MMI = mmi; @@ -1319,11 +1693,11 @@ void DwarfDebug::BeginModule(Module *M, MachineModuleInfo *mmi) { MMI->setDebugInfoAvailability(true); // Prime section data. - SectionMap.insert(TAI->getTextSection()); + SectionMap.insert(Asm->getObjFileLowering().getTextSection()); // Print out .file directives to specify files for .loc directives. These are // printed out early so that they precede any .loc directives. - if (TAI->hasDotLocAndDotFile()) { + if (MAI->hasDotLocAndDotFile()) { for (unsigned i = 1, e = getNumSourceIds()+1; i != e; ++i) { // Remember source id starts at 1. std::pair<unsigned, unsigned> Id = getSourceDirectoryAndFileIds(i); @@ -1332,7 +1706,7 @@ void DwarfDebug::BeginModule(Module *M, MachineModuleInfo *mmi) { FullPath.appendComponent(getSourceFileName(Id.second)); assert(AppendOk && "Could not append filename to directory!"); AppendOk = false; - Asm->EmitFile(i, FullPath.toString()); + Asm->EmitFile(i, FullPath.str()); Asm->EOL(); } } @@ -1347,21 +1721,21 @@ void DwarfDebug::BeginModule(Module *M, MachineModuleInfo *mmi) { /// EndModule - Emit all Dwarf sections that should come after the content. /// void DwarfDebug::EndModule() { - if (!ShouldEmitDwarfDebug()) + if (!ModuleCU) return; if (TimePassesIsEnabled) DebugTimer->startTimer(); // Standard sections final addresses. - Asm->SwitchToSection(TAI->getTextSection()); + Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering().getTextSection()); EmitLabel("text_end", 0); - Asm->SwitchToSection(TAI->getDataSection()); + Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering().getDataSection()); EmitLabel("data_end", 0); // End text sections. for (unsigned i = 1, N = SectionMap.size(); i <= N; ++i) { - Asm->SwitchToSection(SectionMap[i]); + Asm->OutStreamer.SwitchSection(SectionMap[i]); EmitLabel("section_end", i); } @@ -1410,6 +1784,135 @@ void DwarfDebug::EndModule() { DebugTimer->stopTimer(); } +/// CollectVariableInfo - Populate DbgScope entries with variables' info. +void DwarfDebug::CollectVariableInfo() { + if (!MMI) return; + MachineModuleInfo::VariableDbgInfoMapTy &VMap = MMI->getVariableDbgInfo(); + for (MachineModuleInfo::VariableDbgInfoMapTy::iterator VI = VMap.begin(), + VE = VMap.end(); VI != VE; ++VI) { + MetadataBase *MB = VI->first; + MDNode *Var = dyn_cast_or_null<MDNode>(MB); + DIVariable DV (Var); + if (DV.isNull()) continue; + unsigned VSlot = VI->second; + DbgScope *Scope = getDbgScope(DV.getContext().getNode(), NULL); + Scope->AddVariable(new DbgVariable(DV, VSlot, false)); + } +} + +/// SetDbgScopeBeginLabels - Update DbgScope begin labels for the scopes that +/// start with this machine instruction. +void DwarfDebug::SetDbgScopeBeginLabels(const MachineInstr *MI, unsigned Label) { + InsnToDbgScopeMapTy::iterator I = DbgScopeBeginMap.find(MI); + if (I == DbgScopeBeginMap.end()) + return; + SmallVector<DbgScope *, 2> &SD = I->second; + for (SmallVector<DbgScope *, 2>::iterator SDI = SD.begin(), SDE = SD.end(); + SDI != SDE; ++SDI) + (*SDI)->setStartLabelID(Label); +} + +/// SetDbgScopeEndLabels - Update DbgScope end labels for the scopes that +/// end with this machine instruction. +void DwarfDebug::SetDbgScopeEndLabels(const MachineInstr *MI, unsigned Label) { + InsnToDbgScopeMapTy::iterator I = DbgScopeEndMap.find(MI); + if (I == DbgScopeEndMap.end()) + return; + SmallVector<DbgScope *, 2> &SD = I->second; + for (SmallVector<DbgScope *, 2>::iterator SDI = SD.begin(), SDE = SD.end(); + SDI != SDE; ++SDI) + (*SDI)->setEndLabelID(Label); +} + +/// ExtractScopeInformation - Scan machine instructions in this function +/// and collect DbgScopes. Return true, if atleast one scope was found. +bool DwarfDebug::ExtractScopeInformation(MachineFunction *MF) { + // If scope information was extracted using .dbg intrinsics then there is not + // any need to extract these information by scanning each instruction. + if (!DbgScopeMap.empty()) + return false; + + // Scan each instruction and create scopes. + for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); + I != E; ++I) { + for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); + II != IE; ++II) { + const MachineInstr *MInsn = II; + DebugLoc DL = MInsn->getDebugLoc(); + if (DL.isUnknown()) + continue; + DebugLocTuple DLT = MF->getDebugLocTuple(DL); + if (!DLT.Scope) + continue; + // There is no need to create another DIE for compile unit. For all + // other scopes, create one DbgScope now. This will be translated + // into a scope DIE at the end. + DIDescriptor D(DLT.Scope); + if (!D.isCompileUnit()) { + DbgScope *Scope = getDbgScope(DLT.Scope, MInsn); + Scope->setLastInsn(MInsn); + } + } + } + + // If a scope's last instruction is not set then use its child scope's + // last instruction as this scope's last instrunction. + for (DenseMap<MDNode *, DbgScope *>::iterator DI = DbgScopeMap.begin(), + DE = DbgScopeMap.end(); DI != DE; ++DI) { + assert (DI->second->getFirstInsn() && "Invalid first instruction!"); + DI->second->FixInstructionMarkers(); + assert (DI->second->getLastInsn() && "Invalid last instruction!"); + } + + // Each scope has first instruction and last instruction to mark beginning + // and end of a scope respectively. Create an inverse map that list scopes + // starts (and ends) with an instruction. One instruction may start (or end) + // multiple scopes. + for (DenseMap<MDNode *, DbgScope *>::iterator DI = DbgScopeMap.begin(), + DE = DbgScopeMap.end(); DI != DE; ++DI) { + DbgScope *S = DI->second; + assert (S && "DbgScope is missing!"); + const MachineInstr *MI = S->getFirstInsn(); + assert (MI && "DbgScope does not have first instruction!"); + + InsnToDbgScopeMapTy::iterator IDI = DbgScopeBeginMap.find(MI); + if (IDI != DbgScopeBeginMap.end()) + IDI->second.push_back(S); + else + DbgScopeBeginMap.insert(std::make_pair(MI, + SmallVector<DbgScope *, 2>(2, S))); + + MI = S->getLastInsn(); + assert (MI && "DbgScope does not have last instruction!"); + IDI = DbgScopeEndMap.find(MI); + if (IDI != DbgScopeEndMap.end()) + IDI->second.push_back(S); + else + DbgScopeEndMap.insert(std::make_pair(MI, + SmallVector<DbgScope *, 2>(2, S))); + } + + return !DbgScopeMap.empty(); +} + +static DISubprogram getDISubprogram(MDNode *N) { + + DIDescriptor D(N); + if (D.isNull()) + return DISubprogram(); + + if (D.isCompileUnit()) + return DISubprogram(); + + if (D.isSubprogram()) + return DISubprogram(N); + + if (D.isLexicalBlock()) + return getDISubprogram(DILexicalBlock(N).getContext().getNode()); + + llvm_unreachable("Unexpected Descriptor!"); +} + /// BeginFunction - Gather pre-function debug information. Assumes being /// emitted immediately after the function entry point. void DwarfDebug::BeginFunction(MachineFunction *MF) { @@ -1420,6 +1923,12 @@ void DwarfDebug::BeginFunction(MachineFunction *MF) { if (TimePassesIsEnabled) DebugTimer->startTimer(); +#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN + if (!ExtractScopeInformation(MF)) + return; + CollectVariableInfo(); +#endif + // Begin accumulating function debug information. MMI->BeginFunction(MF); @@ -1428,14 +1937,28 @@ void DwarfDebug::BeginFunction(MachineFunction *MF) { // Emit label for the implicitly defined dbg.stoppoint at the start of the // function. +#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN DebugLoc FDL = MF->getDefaultDebugLoc(); if (!FDL.isUnknown()) { DebugLocTuple DLT = MF->getDebugLocTuple(FDL); - unsigned LabelID = RecordSourceLine(DLT.Line, DLT.Col, - DICompileUnit(DLT.CompileUnit)); + unsigned LabelID = 0; + DISubprogram SP = getDISubprogram(DLT.Scope); + if (!SP.isNull()) + LabelID = RecordSourceLine(SP.getLineNumber(), 0, DLT.Scope); + else + LabelID = RecordSourceLine(DLT.Line, DLT.Col, DLT.Scope); Asm->printLabel(LabelID); + O << '\n'; } - +#else + DebugLoc FDL = MF->getDefaultDebugLoc(); + if (!FDL.isUnknown()) { + DebugLocTuple DLT = MF->getDebugLocTuple(FDL); + unsigned LabelID = RecordSourceLine(DLT.Line, DLT.Col, DLT.Scope); + Asm->printLabel(LabelID); + O << '\n'; + } +#endif if (TimePassesIsEnabled) DebugTimer->stopTimer(); } @@ -1448,13 +1971,17 @@ void DwarfDebug::EndFunction(MachineFunction *MF) { if (TimePassesIsEnabled) DebugTimer->startTimer(); +#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN + if (DbgScopeMap.empty()) + return; +#endif // Define end label for subprogram. EmitLabel("func_end", SubprogramCount); // Get function line info. if (!Lines.empty()) { // Get section line info. - unsigned ID = SectionMap.insert(Asm->CurrentSection_); + unsigned ID = SectionMap.insert(Asm->getCurrentSection()); if (SectionSourceLines.size() < ID) SectionSourceLines.resize(ID); std::vector<SrcLineInfo> &SectionLineInfos = SectionSourceLines[ID-1]; // Append the function info to section info. @@ -1489,9 +2016,10 @@ void DwarfDebug::EndFunction(MachineFunction *MF) { if (FunctionDbgScope) { delete FunctionDbgScope; DbgScopeMap.clear(); + DbgScopeBeginMap.clear(); + DbgScopeEndMap.clear(); DbgAbstractScopeMap.clear(); DbgConcreteScopeMap.clear(); - InlinedVariableScopes.clear(); FunctionDbgScope = NULL; LexicalScopeStack.clear(); AbstractInstanceRootList.clear(); @@ -1507,32 +2035,34 @@ void DwarfDebug::EndFunction(MachineFunction *MF) { /// RecordSourceLine - Records location information and associates it with a /// label. Returns a unique label ID used to generate a label and provide /// correspondence to the source line list. -unsigned DwarfDebug::RecordSourceLine(Value *V, unsigned Line, unsigned Col) { - if (TimePassesIsEnabled) - DebugTimer->startTimer(); - - CompileUnit *Unit = CompileUnitMap[V]; - assert(Unit && "Unable to find CompileUnit"); - unsigned ID = MMI->NextLabelID(); - Lines.push_back(SrcLineInfo(Line, Col, Unit->getID(), ID)); +unsigned DwarfDebug::RecordSourceLine(unsigned Line, unsigned Col, + MDNode *S) { + if (!MMI) + return 0; if (TimePassesIsEnabled) - DebugTimer->stopTimer(); - - return ID; -} - -/// RecordSourceLine - Records location information and associates it with a -/// label. Returns a unique label ID used to generate a label and provide -/// correspondence to the source line list. -unsigned DwarfDebug::RecordSourceLine(unsigned Line, unsigned Col, - DICompileUnit CU) { - if (TimePassesIsEnabled) DebugTimer->startTimer(); - std::string Dir, Fn; - unsigned Src = GetOrCreateSourceID(CU.getDirectory(Dir), - CU.getFilename(Fn)); + const char *Dir = NULL; + const char *Fn = NULL; + + DIDescriptor Scope(S); + if (Scope.isCompileUnit()) { + DICompileUnit CU(S); + Dir = CU.getDirectory(); + Fn = CU.getFilename(); + } else if (Scope.isSubprogram()) { + DISubprogram SP(S); + Dir = SP.getDirectory(); + Fn = SP.getFilename(); + } else if (Scope.isLexicalBlock()) { + DILexicalBlock DB(S); + Dir = DB.getDirectory(); + Fn = DB.getFilename(); + } else + assert (0 && "Unexpected scope info"); + + unsigned Src = GetOrCreateSourceID(Dir, Fn); unsigned ID = MMI->NextLabelID(); Lines.push_back(SrcLineInfo(Line, Col, Src, ID)); @@ -1552,7 +2082,7 @@ unsigned DwarfDebug::getOrCreateSourceID(const std::string &DirName, if (TimePassesIsEnabled) DebugTimer->startTimer(); - unsigned SrcId = GetOrCreateSourceID(DirName, FileName); + unsigned SrcId = GetOrCreateSourceID(DirName.c_str(), FileName.c_str()); if (TimePassesIsEnabled) DebugTimer->stopTimer(); @@ -1561,11 +2091,11 @@ unsigned DwarfDebug::getOrCreateSourceID(const std::string &DirName, } /// RecordRegionStart - Indicate the start of a region. -unsigned DwarfDebug::RecordRegionStart(GlobalVariable *V) { +unsigned DwarfDebug::RecordRegionStart(MDNode *N) { if (TimePassesIsEnabled) DebugTimer->startTimer(); - DbgScope *Scope = getOrCreateScope(V); + DbgScope *Scope = getOrCreateScope(N); unsigned ID = MMI->NextLabelID(); if (!Scope->getStartLabelID()) Scope->setStartLabelID(ID); LexicalScopeStack.push_back(Scope); @@ -1577,11 +2107,11 @@ unsigned DwarfDebug::RecordRegionStart(GlobalVariable *V) { } /// RecordRegionEnd - Indicate the end of a region. -unsigned DwarfDebug::RecordRegionEnd(GlobalVariable *V) { +unsigned DwarfDebug::RecordRegionEnd(MDNode *N) { if (TimePassesIsEnabled) DebugTimer->startTimer(); - DbgScope *Scope = getOrCreateScope(V); + DbgScope *Scope = getOrCreateScope(N); unsigned ID = MMI->NextLabelID(); Scope->setEndLabelID(ID); // FIXME : region.end() may not be in the last basic block. @@ -1598,62 +2128,36 @@ unsigned DwarfDebug::RecordRegionEnd(GlobalVariable *V) { } /// RecordVariable - Indicate the declaration of a local variable. -void DwarfDebug::RecordVariable(GlobalVariable *GV, unsigned FrameIndex, - const MachineInstr *MI) { +void DwarfDebug::RecordVariable(MDNode *N, unsigned FrameIndex) { if (TimePassesIsEnabled) DebugTimer->startTimer(); - DIDescriptor Desc(GV); + DIDescriptor Desc(N); DbgScope *Scope = NULL; bool InlinedFnVar = false; - if (Desc.getTag() == dwarf::DW_TAG_variable) { - // GV is a global variable. - DIGlobalVariable DG(GV); - Scope = getOrCreateScope(DG.getContext().getGV()); - } else { - DenseMap<const MachineInstr *, DbgScope *>::iterator - SI = InlinedVariableScopes.find(MI); - - if (SI != InlinedVariableScopes.end()) { - // or GV is an inlined local variable. - Scope = SI->second; - } else { - DIVariable DV(GV); - GlobalVariable *V = DV.getContext().getGV(); - - // FIXME: The code that checks for the inlined local variable is a hack! - DenseMap<const GlobalVariable *, DbgScope *>::iterator - AI = AbstractInstanceRootMap.find(V); - - if (AI != AbstractInstanceRootMap.end()) { - // This method is called each time a DECLARE node is encountered. For an - // inlined function, this could be many, many times. We don't want to - // re-add variables to that DIE for each time. We just want to add them - // once. Check to make sure that we haven't added them already. - DenseMap<const GlobalVariable *, - SmallSet<const GlobalVariable *, 32> >::iterator - IP = InlinedParamMap.find(V); - - if (IP != InlinedParamMap.end() && IP->second.count(GV) > 0) { - if (TimePassesIsEnabled) - DebugTimer->stopTimer(); - return; - } - - // or GV is an inlined local variable. - Scope = AI->second; - InlinedParamMap[V].insert(GV); - InlinedFnVar = true; - } else { - // or GV is a local variable. - Scope = getOrCreateScope(V); + if (Desc.getTag() == dwarf::DW_TAG_variable) + Scope = getOrCreateScope(DIGlobalVariable(N).getContext().getNode()); + else { + bool InlinedVar = false; + MDNode *Context = DIVariable(N).getContext().getNode(); + DISubprogram SP(Context); + if (!SP.isNull()) { + // SP is inserted into DbgAbstractScopeMap when inlined function + // start was recorded by RecordInlineFnStart. + DenseMap<MDNode *, DbgScope *>::iterator + I = DbgAbstractScopeMap.find(SP.getNode()); + if (I != DbgAbstractScopeMap.end()) { + InlinedVar = true; + Scope = I->second; } } + if (!InlinedVar) + Scope = getOrCreateScope(Context); } assert(Scope && "Unable to find the variable's scope"); - DbgVariable *DV = new DbgVariable(DIVariable(GV), FrameIndex, InlinedFnVar); + DbgVariable *DV = new DbgVariable(DIVariable(N), FrameIndex, InlinedFnVar); Scope->AddVariable(DV); if (TimePassesIsEnabled) @@ -1665,23 +2169,23 @@ unsigned DwarfDebug::RecordInlinedFnStart(DISubprogram &SP, DICompileUnit CU, unsigned Line, unsigned Col) { unsigned LabelID = MMI->NextLabelID(); - if (!TAI->doesDwarfUsesInlineInfoSection()) + if (!MAI->doesDwarfUsesInlineInfoSection()) return LabelID; if (TimePassesIsEnabled) DebugTimer->startTimer(); - GlobalVariable *GV = SP.getGV(); - DenseMap<const GlobalVariable *, DbgScope *>::iterator - II = AbstractInstanceRootMap.find(GV); + MDNode *Node = SP.getNode(); + DenseMap<const MDNode *, DbgScope *>::iterator + II = AbstractInstanceRootMap.find(Node); if (II == AbstractInstanceRootMap.end()) { // Create an abstract instance entry for this inlined function if it doesn't // already exist. - DbgScope *Scope = new DbgScope(NULL, DIDescriptor(GV)); + DbgScope *Scope = new DbgScope(NULL, DIDescriptor(Node)); // Get the compile unit context. - DIE *SPDie = ModuleCU->getDieMapSlotFor(GV); + DIE *SPDie = ModuleCU->getDieMapSlotFor(Node); if (!SPDie) SPDie = CreateSubprogramDIE(ModuleCU, SP, false, true); @@ -1693,18 +2197,18 @@ unsigned DwarfDebug::RecordInlinedFnStart(DISubprogram &SP, DICompileUnit CU, AddUInt(SPDie, dwarf::DW_AT_inline, 0, dwarf::DW_INL_declared_not_inlined); // Keep track of the abstract scope for this function. - DbgAbstractScopeMap[GV] = Scope; + DbgAbstractScopeMap[Node] = Scope; - AbstractInstanceRootMap[GV] = Scope; + AbstractInstanceRootMap[Node] = Scope; AbstractInstanceRootList.push_back(Scope); } // Create a concrete inlined instance for this inlined function. - DbgConcreteScope *ConcreteScope = new DbgConcreteScope(DIDescriptor(GV)); + DbgConcreteScope *ConcreteScope = new DbgConcreteScope(DIDescriptor(Node)); DIE *ScopeDie = new DIE(dwarf::DW_TAG_inlined_subroutine); ScopeDie->setAbstractCompileUnit(ModuleCU); - DIE *Origin = ModuleCU->getDieMapSlotFor(GV); + DIE *Origin = ModuleCU->getDieMapSlotFor(Node); AddDIEEntry(ScopeDie, dwarf::DW_AT_abstract_origin, dwarf::DW_FORM_ref4, Origin); AddUInt(ScopeDie, dwarf::DW_AT_call_file, 0, ModuleCU->getID()); @@ -1718,20 +2222,20 @@ unsigned DwarfDebug::RecordInlinedFnStart(DISubprogram &SP, DICompileUnit CU, LexicalScopeStack.back()->AddConcreteInst(ConcreteScope); // Keep track of the concrete scope that's inlined into this function. - DenseMap<GlobalVariable *, SmallVector<DbgScope *, 8> >::iterator - SI = DbgConcreteScopeMap.find(GV); + DenseMap<MDNode *, SmallVector<DbgScope *, 8> >::iterator + SI = DbgConcreteScopeMap.find(Node); if (SI == DbgConcreteScopeMap.end()) - DbgConcreteScopeMap[GV].push_back(ConcreteScope); + DbgConcreteScopeMap[Node].push_back(ConcreteScope); else SI->second.push_back(ConcreteScope); // Track the start label for this inlined function. - DenseMap<GlobalVariable *, SmallVector<unsigned, 4> >::iterator - I = InlineInfo.find(GV); + DenseMap<MDNode *, SmallVector<unsigned, 4> >::iterator + I = InlineInfo.find(Node); if (I == InlineInfo.end()) - InlineInfo[GV].push_back(LabelID); + InlineInfo[Node].push_back(LabelID); else I->second.push_back(LabelID); @@ -1743,15 +2247,15 @@ unsigned DwarfDebug::RecordInlinedFnStart(DISubprogram &SP, DICompileUnit CU, /// RecordInlinedFnEnd - Indicate the end of inlined subroutine. unsigned DwarfDebug::RecordInlinedFnEnd(DISubprogram &SP) { - if (!TAI->doesDwarfUsesInlineInfoSection()) + if (!MAI->doesDwarfUsesInlineInfoSection()) return 0; if (TimePassesIsEnabled) DebugTimer->startTimer(); - GlobalVariable *GV = SP.getGV(); - DenseMap<GlobalVariable *, SmallVector<DbgScope *, 8> >::iterator - I = DbgConcreteScopeMap.find(GV); + MDNode *Node = SP.getNode(); + DenseMap<MDNode *, SmallVector<DbgScope *, 8> >::iterator + I = DbgConcreteScopeMap.find(Node); if (I == DbgConcreteScopeMap.end()) { // FIXME: Can this situation actually happen? And if so, should it? @@ -1781,33 +2285,6 @@ unsigned DwarfDebug::RecordInlinedFnEnd(DISubprogram &SP) { return ID; } -/// RecordVariableScope - Record scope for the variable declared by -/// DeclareMI. DeclareMI must describe TargetInstrInfo::DECLARE. Record scopes -/// for only inlined subroutine variables. Other variables's scopes are -/// determined during RecordVariable(). -void DwarfDebug::RecordVariableScope(DIVariable &DV, - const MachineInstr *DeclareMI) { - if (TimePassesIsEnabled) - DebugTimer->startTimer(); - - DISubprogram SP(DV.getContext().getGV()); - - if (SP.isNull()) { - if (TimePassesIsEnabled) - DebugTimer->stopTimer(); - - return; - } - - DenseMap<GlobalVariable *, DbgScope *>::iterator - I = DbgAbstractScopeMap.find(SP.getGV()); - if (I != DbgAbstractScopeMap.end()) - InlinedVariableScopes[DeclareMI] = I->second; - - if (TimePassesIsEnabled) - DebugTimer->stopTimer(); -} - //===----------------------------------------------------------------------===// // Emit Methods //===----------------------------------------------------------------------===// @@ -1832,7 +2309,7 @@ unsigned DwarfDebug::SizeAndOffsetDie(DIE *Die, unsigned Offset, bool Last) { Die->setOffset(Offset); // Start the size with the size of abbreviation code. - Offset += TargetAsmInfo::getULEB128Size(AbbrevNumber); + Offset += MCAsmInfo::getULEB128Size(AbbrevNumber); const SmallVector<DIEValue*, 32> &Values = Die->getValues(); const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev->getData(); @@ -1879,38 +2356,40 @@ void DwarfDebug::EmitInitial() { if (didInitial) return; didInitial = true; + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + // Dwarf sections base addresses. - if (TAI->doesDwarfRequireFrameSection()) { - Asm->SwitchToDataSection(TAI->getDwarfFrameSection()); + if (MAI->doesDwarfRequireFrameSection()) { + Asm->OutStreamer.SwitchSection(TLOF.getDwarfFrameSection()); EmitLabel("section_debug_frame", 0); } - Asm->SwitchToDataSection(TAI->getDwarfInfoSection()); + Asm->OutStreamer.SwitchSection(TLOF.getDwarfInfoSection()); EmitLabel("section_info", 0); - Asm->SwitchToDataSection(TAI->getDwarfAbbrevSection()); + Asm->OutStreamer.SwitchSection(TLOF.getDwarfAbbrevSection()); EmitLabel("section_abbrev", 0); - Asm->SwitchToDataSection(TAI->getDwarfARangesSection()); + Asm->OutStreamer.SwitchSection(TLOF.getDwarfARangesSection()); EmitLabel("section_aranges", 0); - if (const char *LineInfoDirective = TAI->getDwarfMacroInfoSection()) { - Asm->SwitchToDataSection(LineInfoDirective); + if (const MCSection *LineInfoDirective = TLOF.getDwarfMacroInfoSection()) { + Asm->OutStreamer.SwitchSection(LineInfoDirective); EmitLabel("section_macinfo", 0); } - Asm->SwitchToDataSection(TAI->getDwarfLineSection()); + Asm->OutStreamer.SwitchSection(TLOF.getDwarfLineSection()); EmitLabel("section_line", 0); - Asm->SwitchToDataSection(TAI->getDwarfLocSection()); + Asm->OutStreamer.SwitchSection(TLOF.getDwarfLocSection()); EmitLabel("section_loc", 0); - Asm->SwitchToDataSection(TAI->getDwarfPubNamesSection()); + Asm->OutStreamer.SwitchSection(TLOF.getDwarfPubNamesSection()); EmitLabel("section_pubnames", 0); - Asm->SwitchToDataSection(TAI->getDwarfStrSection()); + Asm->OutStreamer.SwitchSection(TLOF.getDwarfStrSection()); EmitLabel("section_str", 0); - Asm->SwitchToDataSection(TAI->getDwarfRangesSection()); + Asm->OutStreamer.SwitchSection(TLOF.getDwarfRangesSection()); EmitLabel("section_ranges", 0); - Asm->SwitchToSection(TAI->getTextSection()); + Asm->OutStreamer.SwitchSection(TLOF.getTextSection()); EmitLabel("text_begin", 0); - Asm->SwitchToSection(TAI->getDataSection()); + Asm->OutStreamer.SwitchSection(TLOF.getDataSection()); EmitLabel("data_begin", 0); } @@ -2012,7 +2491,8 @@ void DwarfDebug::EmitDebugInfoPerCU(CompileUnit *Unit) { void DwarfDebug::EmitDebugInfo() { // Start debug info section. - Asm->SwitchToDataSection(TAI->getDwarfInfoSection()); + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfInfoSection()); EmitDebugInfoPerCU(ModuleCU); } @@ -2023,7 +2503,8 @@ void DwarfDebug::EmitAbbreviations() const { // Check to see if it is worth the effort. if (!Abbreviations.empty()) { // Start the debug abbrev section. - Asm->SwitchToDataSection(TAI->getDwarfAbbrevSection()); + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfAbbrevSection()); EmitLabel("abbrev_begin", 0); @@ -2071,7 +2552,7 @@ void DwarfDebug::EmitEndOfLineMatrix(unsigned SectionEnd) { void DwarfDebug::EmitDebugLines() { // If the target is using .loc/.file, the assembler will be emitting the // .debug_line table automatically. - if (TAI->hasDotLocAndDotFile()) + if (MAI->hasDotLocAndDotFile()) return; // Minimum line delta, thus ranging from -10..(255-10). @@ -2080,7 +2561,8 @@ void DwarfDebug::EmitDebugLines() { const int MaxLineDelta = 255 + MinLineDelta; // Start the dwarf line section. - Asm->SwitchToDataSection(TAI->getDwarfLineSection()); + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfLineSection()); // Construct the section header. EmitDifference("line_end", 0, "line_begin", 0, true); @@ -2147,13 +2629,12 @@ void DwarfDebug::EmitDebugLines() { // Isolate current sections line info. const std::vector<SrcLineInfo> &LineInfos = SectionSourceLines[j]; - if (Asm->isVerbose()) { - const Section* S = SectionMap[j + 1]; - O << '\t' << TAI->getCommentString() << " Section" + /*if (Asm->isVerbose()) { + const MCSection *S = SectionMap[j + 1]; + O << '\t' << MAI->getCommentString() << " Section" << S->getName() << '\n'; - } else { - Asm->EOL(); - } + }*/ + Asm->EOL(); // Dwarf assumes we start with first line of first source file. unsigned Source = 1; @@ -2165,12 +2646,14 @@ void DwarfDebug::EmitDebugLines() { unsigned LabelID = MMI->MappedLabel(LineInfo.getLabelID()); if (!LabelID) continue; + if (LineInfo.getLine() == 0) continue; + if (!Asm->isVerbose()) Asm->EOL(); else { std::pair<unsigned, unsigned> SourceID = getSourceDirectoryAndFileIds(LineInfo.getSourceID()); - O << '\t' << TAI->getCommentString() << ' ' + O << '\t' << MAI->getCommentString() << ' ' << getSourceDirectoryName(SourceID.first) << ' ' << getSourceFileName(SourceID.second) <<" :" << utostr_32(LineInfo.getLine()) << '\n'; @@ -2231,7 +2714,7 @@ void DwarfDebug::EmitDebugLines() { /// EmitCommonDebugFrame - Emit common frame info into a debug frame section. /// void DwarfDebug::EmitCommonDebugFrame() { - if (!TAI->doesDwarfRequireFrameSection()) + if (!MAI->doesDwarfRequireFrameSection()) return; int stackGrowth = @@ -2240,7 +2723,8 @@ void DwarfDebug::EmitCommonDebugFrame() { TD->getPointerSize() : -TD->getPointerSize(); // Start the dwarf frame section. - Asm->SwitchToDataSection(TAI->getDwarfFrameSection()); + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfFrameSection()); EmitLabel("debug_frame_common", 0); EmitDifference("debug_frame_common_end", 0, @@ -2276,11 +2760,12 @@ void DwarfDebug::EmitCommonDebugFrame() { /// section. void DwarfDebug::EmitFunctionDebugFrame(const FunctionDebugFrameInfo&DebugFrameInfo){ - if (!TAI->doesDwarfRequireFrameSection()) + if (!MAI->doesDwarfRequireFrameSection()) return; // Start the dwarf frame section. - Asm->SwitchToDataSection(TAI->getDwarfFrameSection()); + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfFrameSection()); EmitDifference("debug_frame_end", DebugFrameInfo.Number, "debug_frame_begin", DebugFrameInfo.Number, true); @@ -2344,7 +2829,8 @@ void DwarfDebug::EmitDebugPubNamesPerCU(CompileUnit *Unit) { /// void DwarfDebug::EmitDebugPubNames() { // Start the dwarf pubnames section. - Asm->SwitchToDataSection(TAI->getDwarfPubNamesSection()); + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfPubNamesSection()); EmitDebugPubNamesPerCU(ModuleCU); } @@ -2355,7 +2841,8 @@ void DwarfDebug::EmitDebugStr() { // Check to see if it is worth the effort. if (!StringPool.empty()) { // Start the dwarf str section. - Asm->SwitchToDataSection(TAI->getDwarfStrSection()); + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfStrSection()); // For each of strings in the string pool. for (unsigned StringID = 1, N = StringPool.size(); @@ -2376,7 +2863,8 @@ void DwarfDebug::EmitDebugStr() { /// void DwarfDebug::EmitDebugLoc() { // Start the dwarf loc section. - Asm->SwitchToDataSection(TAI->getDwarfLocSection()); + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfLocSection()); Asm->EOL(); } @@ -2384,7 +2872,8 @@ void DwarfDebug::EmitDebugLoc() { /// void DwarfDebug::EmitDebugARanges() { // Start the dwarf aranges section. - Asm->SwitchToDataSection(TAI->getDwarfARangesSection()); + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfARangesSection()); // FIXME - Mock up #if 0 @@ -2420,16 +2909,18 @@ void DwarfDebug::EmitDebugARanges() { /// void DwarfDebug::EmitDebugRanges() { // Start the dwarf ranges section. - Asm->SwitchToDataSection(TAI->getDwarfRangesSection()); + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfRangesSection()); Asm->EOL(); } /// EmitDebugMacInfo - Emit visible names into a debug macinfo section. /// void DwarfDebug::EmitDebugMacInfo() { - if (const char *LineInfoDirective = TAI->getDwarfMacroInfoSection()) { + if (const MCSection *LineInfo = + Asm->getObjFileLowering().getDwarfMacroInfoSection()) { // Start the dwarf macinfo section. - Asm->SwitchToDataSection(LineInfoDirective); + Asm->OutStreamer.SwitchSection(LineInfo); Asm->EOL(); } } @@ -2453,13 +2944,14 @@ void DwarfDebug::EmitDebugMacInfo() { /// __debug_info section, and the low_pc is the starting address for the /// inlining instance. void DwarfDebug::EmitDebugInlineInfo() { - if (!TAI->doesDwarfUsesInlineInfoSection()) + if (!MAI->doesDwarfUsesInlineInfoSection()) return; if (!ModuleCU) return; - Asm->SwitchToDataSection(TAI->getDwarfDebugInlineSection()); + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfDebugInlineSection()); Asm->EOL(); EmitDifference("debug_inlined_end", 1, "debug_inlined_begin", 1, true); @@ -2470,18 +2962,25 @@ void DwarfDebug::EmitDebugInlineInfo() { Asm->EmitInt16(dwarf::DWARF_VERSION); Asm->EOL("Dwarf Version"); Asm->EmitInt8(TD->getPointerSize()); Asm->EOL("Address Size (in bytes)"); - for (DenseMap<GlobalVariable *, SmallVector<unsigned, 4> >::iterator + for (DenseMap<MDNode *, SmallVector<unsigned, 4> >::iterator I = InlineInfo.begin(), E = InlineInfo.end(); I != E; ++I) { - GlobalVariable *GV = I->first; + MDNode *Node = I->first; SmallVector<unsigned, 4> &Labels = I->second; - DISubprogram SP(GV); - std::string Name; - std::string LName; - - SP.getLinkageName(LName); - SP.getName(Name); + DISubprogram SP(Node); + const char *LName = SP.getLinkageName(); + const char *Name = SP.getName(); - Asm->EmitString(LName.empty() ? Name : LName); + if (!LName) + Asm->EmitString(Name); + else { + // Skip special LLVM prefix that is used to inform the asm printer to not + // emit usual symbol prefix before the symbol name. This happens for + // Objective-C symbol names and symbol whose name is replaced using GCC's + // __asm__ attribute. + if (LName[0] == 1) + LName = &LName[1]; + Asm->EmitString(LName); + } Asm->EOL("MIPS linkage name"); Asm->EmitString(Name); Asm->EOL("Function name"); @@ -2490,13 +2989,13 @@ void DwarfDebug::EmitDebugInlineInfo() { for (SmallVector<unsigned, 4>::iterator LI = Labels.begin(), LE = Labels.end(); LI != LE; ++LI) { - DIE *SP = ModuleCU->getDieMapSlotFor(GV); + DIE *SP = ModuleCU->getDieMapSlotFor(Node); Asm->EmitInt32(SP->getOffset()); Asm->EOL("DIE offset"); if (TD->getPointerSize() == sizeof(int32_t)) - O << TAI->getData32bitsDirective(); + O << MAI->getData32bitsDirective(); else - O << TAI->getData64bitsDirective(); + O << MAI->getData64bitsDirective(); PrintLabelName("label", *LI); Asm->EOL("low_pc"); } diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index 101dc70..bd377c5 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -35,7 +35,7 @@ class DbgScope; class DbgConcreteScope; class MachineFrameInfo; class MachineModuleInfo; -class TargetAsmInfo; +class MCAsmInfo; class Timer; //===----------------------------------------------------------------------===// @@ -120,7 +120,7 @@ class VISIBILITY_HIDDEN DwarfDebug : public Dwarf { /// SectionMap - Provides a unique id per text section. /// - UniqueVector<const Section*> SectionMap; + UniqueVector<const MCSection*> SectionMap; /// SectionSourceLines - Tracks line numbers per text section. /// @@ -139,34 +139,38 @@ class VISIBILITY_HIDDEN DwarfDebug : public Dwarf { DbgScope *FunctionDbgScope; /// DbgScopeMap - Tracks the scopes in the current function. - DenseMap<GlobalVariable *, DbgScope *> DbgScopeMap; + DenseMap<MDNode *, DbgScope *> DbgScopeMap; + + /// ScopedGVs - Tracks global variables that are not at file scope. + /// For example void f() { static int b = 42; } + SmallVector<WeakVH, 4> ScopedGVs; + + typedef DenseMap<const MachineInstr *, SmallVector<DbgScope *, 2> > + InsnToDbgScopeMapTy; + + /// DbgScopeBeginMap - Maps instruction with a list DbgScopes it starts. + InsnToDbgScopeMapTy DbgScopeBeginMap; + + /// DbgScopeEndMap - Maps instruction with a list DbgScopes it ends. + InsnToDbgScopeMapTy DbgScopeEndMap; /// DbgAbstractScopeMap - Tracks abstract instance scopes in the current /// function. - DenseMap<GlobalVariable *, DbgScope *> DbgAbstractScopeMap; + DenseMap<MDNode *, DbgScope *> DbgAbstractScopeMap; /// DbgConcreteScopeMap - Tracks concrete instance scopes in the current /// function. - DenseMap<GlobalVariable *, + DenseMap<MDNode *, SmallVector<DbgScope *, 8> > DbgConcreteScopeMap; /// InlineInfo - Keep track of inlined functions and their location. This /// information is used to populate debug_inlined section. - DenseMap<GlobalVariable *, SmallVector<unsigned, 4> > InlineInfo; - - /// InlinedVariableScopes - Scopes information for the inlined subroutine - /// variables. - DenseMap<const MachineInstr *, DbgScope *> InlinedVariableScopes; + DenseMap<MDNode *, SmallVector<unsigned, 4> > InlineInfo; /// AbstractInstanceRootMap - Map of abstract instance roots of inlined /// functions. These are subroutine entries that contain a DW_AT_inline /// attribute. - DenseMap<const GlobalVariable *, DbgScope *> AbstractInstanceRootMap; - - /// InlinedParamMap - A map keeping track of which parameters are assigned to - /// which abstract instance. - DenseMap<const GlobalVariable *, - SmallSet<const GlobalVariable *, 32> > InlinedParamMap; + DenseMap<const MDNode *, DbgScope *> AbstractInstanceRootMap; /// AbstractInstanceRootList - List of abstract instance roots of inlined /// functions. These are subroutine entries that contain a DW_AT_inline @@ -284,11 +288,8 @@ class VISIBILITY_HIDDEN DwarfDebug : public Dwarf { /// AddSourceLine - Add location information to specified debug information /// entry. void AddSourceLine(DIE *Die, const DIVariable *V); - - /// AddSourceLine - Add location information to specified debug information - /// entry. void AddSourceLine(DIE *Die, const DIGlobal *G); - + void AddSourceLine(DIE *Die, const DISubprogram *SP); void AddSourceLine(DIE *Die, const DIType *Ty); /// AddAddress - Add an address attribute to a die based on the location @@ -296,6 +297,24 @@ class VISIBILITY_HIDDEN DwarfDebug : public Dwarf { void AddAddress(DIE *Die, unsigned Attribute, const MachineLocation &Location); + /// AddComplexAddress - Start with the address based on the location provided, + /// and generate the DWARF information necessary to find the actual variable + /// (navigating the extra location information encoded in the type) based on + /// the starting location. Add the DWARF information to the die. + /// + void AddComplexAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute, + const MachineLocation &Location); + + // FIXME: Should be reformulated in terms of AddComplexAddress. + /// AddBlockByrefAddress - Start with the address based on the location + /// provided, and generate the DWARF information necessary to find the + /// actual Block variable (navigating the Block struct) based on the + /// starting location. Add the DWARF information to the die. Obsolete, + /// please use AddComplexAddress instead. + /// + void AddBlockByrefAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute, + const MachineLocation &Location); + /// AddType - Add a new type attribute to the specified entity. void AddType(CompileUnit *DW_Unit, DIE *Entity, DIType Ty); @@ -342,9 +361,10 @@ class VISIBILITY_HIDDEN DwarfDebug : public Dwarf { /// DIE *CreateDbgScopeVariable(DbgVariable *DV, CompileUnit *Unit); - /// getOrCreateScope - Returns the scope associated with the given descriptor. + /// getDbgScope - Returns the scope associated with the given descriptor. /// - DbgScope *getOrCreateScope(GlobalVariable *V); + DbgScope *getOrCreateScope(MDNode *N); + DbgScope *getDbgScope(MDNode *N, const MachineInstr *MI); /// ConstructDbgScope - Construct the components of a scope. /// @@ -454,20 +474,26 @@ class VISIBILITY_HIDDEN DwarfDebug : public Dwarf { /// source file names. If none currently exists, create a new id and insert it /// in the SourceIds map. This can update DirectoryNames and SourceFileNames maps /// as well. - unsigned GetOrCreateSourceID(const std::string &DirName, - const std::string &FileName); + unsigned GetOrCreateSourceID(const char *DirName, + const char *FileName); - void ConstructCompileUnit(GlobalVariable *GV); + void ConstructCompileUnit(MDNode *N); - void ConstructGlobalVariableDIE(GlobalVariable *GV); + void ConstructGlobalVariableDIE(MDNode *N); - void ConstructSubprogram(GlobalVariable *GV); + void ConstructSubprogram(MDNode *N); + + // FIXME: This should go away in favor of complex addresses. + /// Find the type the programmer originally declared the variable to be + /// and return that type. Obsolete, use GetComplexAddrType instead. + /// + DIType GetBlockByrefType(DIType Ty, std::string Name); public: //===--------------------------------------------------------------------===// // Main entry points. // - DwarfDebug(raw_ostream &OS, AsmPrinter *A, const TargetAsmInfo *T); + DwarfDebug(raw_ostream &OS, AsmPrinter *A, const MCAsmInfo *T); virtual ~DwarfDebug(); /// ShouldEmitDwarfDebug - Returns true if Dwarf debugging declarations should @@ -493,12 +519,7 @@ public: /// RecordSourceLine - Records location information and associates it with a /// label. Returns a unique label ID used to generate a label and provide /// correspondence to the source line list. - unsigned RecordSourceLine(Value *V, unsigned Line, unsigned Col); - - /// RecordSourceLine - Records location information and associates it with a - /// label. Returns a unique label ID used to generate a label and provide - /// correspondence to the source line list. - unsigned RecordSourceLine(unsigned Line, unsigned Col, DICompileUnit CU); + unsigned RecordSourceLine(unsigned Line, unsigned Col, MDNode *Scope); /// getRecordSourceLineCount - Return the number of source lines in the debug /// info. @@ -515,14 +536,13 @@ public: const std::string &FileName); /// RecordRegionStart - Indicate the start of a region. - unsigned RecordRegionStart(GlobalVariable *V); + unsigned RecordRegionStart(MDNode *N); /// RecordRegionEnd - Indicate the end of a region. - unsigned RecordRegionEnd(GlobalVariable *V); + unsigned RecordRegionEnd(MDNode *N); /// RecordVariable - Indicate the declaration of a local variable. - void RecordVariable(GlobalVariable *GV, unsigned FrameIndex, - const MachineInstr *MI); + void RecordVariable(MDNode *N, unsigned FrameIndex); //// RecordInlinedFnStart - Indicate the start of inlined subroutine. unsigned RecordInlinedFnStart(DISubprogram &SP, DICompileUnit CU, @@ -531,11 +551,20 @@ public: /// RecordInlinedFnEnd - Indicate the end of inlined subroutine. unsigned RecordInlinedFnEnd(DISubprogram &SP); - /// RecordVariableScope - Record scope for the variable declared by - /// DeclareMI. DeclareMI must describe TargetInstrInfo::DECLARE. Record scopes - /// for only inlined subroutine variables. Other variables's scopes are - /// determined during RecordVariable(). - void RecordVariableScope(DIVariable &DV, const MachineInstr *DeclareMI); + /// ExtractScopeInformation - Scan machine instructions in this function + /// and collect DbgScopes. Return true, if atleast one scope was found. + bool ExtractScopeInformation(MachineFunction *MF); + + /// CollectVariableInfo - Populate DbgScope entries with variables' info. + void CollectVariableInfo(); + + /// SetDbgScopeBeginLabels - Update DbgScope begin labels for the scopes that + /// start with this machine instruction. + void SetDbgScopeBeginLabels(const MachineInstr *MI, unsigned Label); + + /// SetDbgScopeEndLabels - Update DbgScope end labels for the scopes that + /// end with this machine instruction. + void SetDbgScopeEndLabels(const MachineInstr *MI, unsigned Label); }; } // End of namespace llvm diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp index 37466ab..626523b 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This file contains support for writing dwarf exception info into asm files. +// This file contains support for writing DWARF exception info into asm files. // //===----------------------------------------------------------------------===// @@ -15,30 +15,38 @@ #include "llvm/Module.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineLocation.h" -#include "llvm/Support/Dwarf.h" -#include "llvm/Support/Timer.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetAsmInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCStreamer.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/Mangler.h" +#include "llvm/Support/Timer.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" using namespace llvm; static TimerGroup &getDwarfTimerGroup() { - static TimerGroup DwarfTimerGroup("Dwarf Exception"); + static TimerGroup DwarfTimerGroup("DWARF Exception"); return DwarfTimerGroup; } DwarfException::DwarfException(raw_ostream &OS, AsmPrinter *A, - const TargetAsmInfo *T) + const MCAsmInfo *T) : Dwarf(OS, A, T, "eh"), shouldEmitTable(false), shouldEmitMoves(false), shouldEmitTableModule(false), shouldEmitMovesModule(false), ExceptionTimer(0) { - if (TimePassesIsEnabled) - ExceptionTimer = new Timer("Dwarf Exception Writer", + if (TimePassesIsEnabled) + ExceptionTimer = new Timer("DWARF Exception Writer", getDwarfTimerGroup()); } @@ -46,21 +54,45 @@ DwarfException::~DwarfException() { delete ExceptionTimer; } -void DwarfException::EmitCommonEHFrame(const Function *Personality, - unsigned Index) { +/// SizeOfEncodedValue - Return the size of the encoding in bytes. +unsigned DwarfException::SizeOfEncodedValue(unsigned Encoding) { + if (Encoding == dwarf::DW_EH_PE_omit) + return 0; + + switch (Encoding & 0x07) { + case dwarf::DW_EH_PE_absptr: + return TD->getPointerSize(); + case dwarf::DW_EH_PE_udata2: + return 2; + case dwarf::DW_EH_PE_udata4: + return 4; + case dwarf::DW_EH_PE_udata8: + return 8; + } + + assert(0 && "Invalid encoded value."); + return 0; +} + +/// EmitCIE - Emit a Common Information Entry (CIE). This holds information that +/// is shared among many Frame Description Entries. There is at least one CIE +/// in every non-empty .debug_frame section. +void DwarfException::EmitCIE(const Function *PersonalityFn, unsigned Index) { // Size and sign of stack growth. int stackGrowth = Asm->TM.getFrameInfo()->getStackGrowthDirection() == TargetFrameInfo::StackGrowsUp ? TD->getPointerSize() : -TD->getPointerSize(); + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + // Begin eh frame section. - Asm->SwitchToTextSection(TAI->getDwarfEHFrameSection()); - - if (!TAI->doesRequireNonLocalEHFrameLabel()) - O << TAI->getEHGlobalPrefix(); + Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection()); + if (MAI->is_EHSymbolPrivate()) + O << MAI->getPrivateGlobalPrefix(); O << "EH_frame" << Index << ":\n"; + EmitLabel("section_eh_frame", Index); // Define base labels. @@ -79,8 +111,53 @@ void DwarfException::EmitCommonEHFrame(const Function *Personality, Asm->EOL("CIE Version"); // The personality presence indicates that language specific information will - // show up in the eh frame. - Asm->EmitString(Personality ? "zPLR" : "zR"); + // show up in the eh frame. Find out how we are supposed to lower the + // personality function reference: + const MCExpr *PersonalityRef = 0; + bool IsPersonalityIndirect = false, IsPersonalityPCRel = false; + if (PersonalityFn) { + // FIXME: HANDLE STATIC CODEGEN MODEL HERE. + + // In non-static mode, ask the object file how to represent this reference. + PersonalityRef = + TLOF.getSymbolForDwarfGlobalReference(PersonalityFn, Asm->Mang, + Asm->MMI, + IsPersonalityIndirect, + IsPersonalityPCRel); + } + + unsigned PerEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; + if (IsPersonalityIndirect) + PerEncoding |= dwarf::DW_EH_PE_indirect; + unsigned LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; + unsigned FDEEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; + + char Augmentation[5] = { 0 }; + unsigned AugmentationSize = 0; + char *APtr = Augmentation + 1; + + if (PersonalityRef) { + // There is a personality function. + *APtr++ = 'P'; + AugmentationSize += 1 + SizeOfEncodedValue(PerEncoding); + } + + if (UsesLSDA[Index]) { + // An LSDA pointer is in the FDE augmentation. + *APtr++ = 'L'; + ++AugmentationSize; + } + + if (FDEEncoding != dwarf::DW_EH_PE_absptr) { + // A non-default pointer encoding for the FDE. + *APtr++ = 'R'; + ++AugmentationSize; + } + + if (APtr != Augmentation + 1) + Augmentation[0] = 'z'; + + Asm->EmitString(Augmentation); Asm->EOL("CIE Augmentation"); // Round out reader. @@ -91,39 +168,41 @@ void DwarfException::EmitCommonEHFrame(const Function *Personality, Asm->EmitInt8(RI->getDwarfRegNum(RI->getRARegister(), true)); Asm->EOL("CIE Return Address Column"); - // If there is a personality, we need to indicate the functions location. - if (Personality) { - Asm->EmitULEB128Bytes(7); - Asm->EOL("Augmentation Size"); - - if (TAI->getNeedsIndirectEncoding()) { - Asm->EmitInt8(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4 | - dwarf::DW_EH_PE_indirect); - Asm->EOL("Personality (pcrel sdata4 indirect)"); - } else { - Asm->EmitInt8(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4); - Asm->EOL("Personality (pcrel sdata4)"); + Asm->EmitULEB128Bytes(AugmentationSize); + Asm->EOL("Augmentation Size"); + + Asm->EmitInt8(PerEncoding); + Asm->EOL("Personality", PerEncoding); + + // If there is a personality, we need to indicate the function's location. + if (PersonalityRef) { + // If the reference to the personality function symbol is not already + // pc-relative, then we need to subtract our current address from it. Do + // this by emitting a label and subtracting it from the expression we + // already have. This is equivalent to emitting "foo - .", but we have to + // emit the label for "." directly. + if (!IsPersonalityPCRel) { + SmallString<64> Name; + raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() + << "personalityref_addr" << Asm->getFunctionNumber() << "_" << Index; + MCSymbol *DotSym = Asm->OutContext.GetOrCreateSymbol(Name.str()); + Asm->OutStreamer.EmitLabel(DotSym); + + PersonalityRef = + MCBinaryExpr::CreateSub(PersonalityRef, + MCSymbolRefExpr::Create(DotSym,Asm->OutContext), + Asm->OutContext); } - - PrintRelDirective(true); - O << TAI->getPersonalityPrefix(); - Asm->EmitExternalGlobal((const GlobalVariable *)(Personality)); - O << TAI->getPersonalitySuffix(); - if (strcmp(TAI->getPersonalitySuffix(), "+4@GOTPCREL")) - O << "-" << TAI->getPCSymbol(); + + O << MAI->getData32bitsDirective(); + PersonalityRef->print(O, MAI); Asm->EOL("Personality"); - Asm->EmitInt8(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4); - Asm->EOL("LSDA Encoding (pcrel sdata4)"); + Asm->EmitInt8(LSDAEncoding); + Asm->EOL("LSDA Encoding", LSDAEncoding); - Asm->EmitInt8(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4); - Asm->EOL("FDE Encoding (pcrel sdata4)"); - } else { - Asm->EmitULEB128Bytes(1); - Asm->EOL("Augmentation Size"); - - Asm->EmitInt8(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4); - Asm->EOL("FDE Encoding (pcrel sdata4)"); + Asm->EmitInt8(FDEEncoding); + Asm->EOL("FDE Encoding", FDEEncoding); } // Indicate locations of general callee saved registers in frame. @@ -134,55 +213,44 @@ void DwarfException::EmitCommonEHFrame(const Function *Personality, // On Darwin the linker honors the alignment of eh_frame, which means it must // be 8-byte on 64-bit targets to match what gcc does. Otherwise you get // holes which confuse readers of eh_frame. - Asm->EmitAlignment(TD->getPointerSize() == sizeof(int32_t) ? 2 : 3, - 0, 0, false); + Asm->EmitAlignment(TD->getPointerSize() == 4 ? 2 : 3, 0, 0, false); EmitLabel("eh_frame_common_end", Index); Asm->EOL(); } -/// EmitEHFrame - Emit function exception frame information. -/// -void DwarfException::EmitEHFrame(const FunctionEHFrameInfo &EHFrameInfo) { - assert(!EHFrameInfo.function->hasAvailableExternallyLinkage() && +/// EmitFDE - Emit the Frame Description Entry (FDE) for the function. +void DwarfException::EmitFDE(const FunctionEHFrameInfo &EHFrameInfo) { + assert(!EHFrameInfo.function->hasAvailableExternallyLinkage() && "Should not emit 'available externally' functions at all"); - Function::LinkageTypes linkage = EHFrameInfo.function->getLinkage(); - Asm->SwitchToTextSection(TAI->getDwarfEHFrameSection()); + const Function *TheFunc = EHFrameInfo.function; + + Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering().getEHFrameSection()); // Externally visible entry into the functions eh frame info. If the // corresponding function is static, this should not be externally visible. - if (linkage != Function::InternalLinkage && - linkage != Function::PrivateLinkage) { - if (const char *GlobalEHDirective = TAI->getGlobalEHDirective()) + if (!TheFunc->hasLocalLinkage()) + if (const char *GlobalEHDirective = MAI->getGlobalEHDirective()) O << GlobalEHDirective << EHFrameInfo.FnName << "\n"; - } // If corresponding function is weak definition, this should be too. - if ((linkage == Function::WeakAnyLinkage || - linkage == Function::WeakODRLinkage || - linkage == Function::LinkOnceAnyLinkage || - linkage == Function::LinkOnceODRLinkage) && - TAI->getWeakDefDirective()) - O << TAI->getWeakDefDirective() << EHFrameInfo.FnName << "\n"; + if (TheFunc->isWeakForLinker() && MAI->getWeakDefDirective()) + O << MAI->getWeakDefDirective() << EHFrameInfo.FnName << "\n"; // If there are no calls then you can't unwind. This may mean we can omit the // EH Frame, but some environments do not handle weak absolute symbols. If // UnwindTablesMandatory is set we cannot do this optimization; the unwind // info is to be available for non-EH uses. - if (!EHFrameInfo.hasCalls && - !UnwindTablesMandatory && - ((linkage != Function::WeakAnyLinkage && - linkage != Function::WeakODRLinkage && - linkage != Function::LinkOnceAnyLinkage && - linkage != Function::LinkOnceODRLinkage) || - !TAI->getWeakDefDirective() || - TAI->getSupportsWeakOmittedEHFrame())) { + if (!EHFrameInfo.hasCalls && !UnwindTablesMandatory && + (!TheFunc->isWeakForLinker() || + !MAI->getWeakDefDirective() || + MAI->getSupportsWeakOmittedEHFrame())) { O << EHFrameInfo.FnName << " = 0\n"; // This name has no connection to the function, so it might get // dead-stripped when the function is not, erroneously. Prohibit // dead-stripping unconditionally. - if (const char *UsedDirective = TAI->getUsedDirective()) + if (const char *UsedDirective = MAI->getUsedDirective()) O << UsedDirective << EHFrameInfo.FnName << "\n\n"; } else { O << EHFrameInfo.FnName << ":\n"; @@ -194,17 +262,9 @@ void DwarfException::EmitEHFrame(const FunctionEHFrameInfo &EHFrameInfo) { EmitLabel("eh_frame_begin", EHFrameInfo.Number); - if (TAI->doesRequireNonLocalEHFrameLabel()) { - PrintRelDirective(true, true); - PrintLabelName("eh_frame_begin", EHFrameInfo.Number); - - if (!TAI->isAbsoluteEHSectionOffsets()) - O << "-EH_frame" << EHFrameInfo.PersonalityIndex; - } else { - EmitSectionOffset("eh_frame_begin", "eh_frame_common", - EHFrameInfo.Number, EHFrameInfo.PersonalityIndex, - true, true, false); - } + EmitSectionOffset("eh_frame_begin", "eh_frame_common", + EHFrameInfo.Number, EHFrameInfo.PersonalityIndex, + true, true, false); Asm->EOL("FDE CIE offset"); @@ -216,14 +276,20 @@ void DwarfException::EmitEHFrame(const FunctionEHFrameInfo &EHFrameInfo) { // If there is a personality and landing pads then point to the language // specific data area in the exception table. - if (EHFrameInfo.PersonalityIndex) { - Asm->EmitULEB128Bytes(4); + if (MMI->getPersonalities()[0] != NULL) { + bool is4Byte = TD->getPointerSize() == sizeof(int32_t); + + Asm->EmitULEB128Bytes(is4Byte ? 4 : 8); Asm->EOL("Augmentation size"); if (EHFrameInfo.hasLandingPads) - EmitReference("exception", EHFrameInfo.Number, true, true); - else - Asm->EmitInt32((int)0); + EmitReference("exception", EHFrameInfo.Number, true, false); + else { + if (is4Byte) + Asm->EmitInt32((int)0); + else + Asm->EmitInt64((int)0); + } Asm->EOL("Language Specific Data Area"); } else { Asm->EmitULEB128Bytes(0); @@ -231,7 +297,7 @@ void DwarfException::EmitEHFrame(const FunctionEHFrameInfo &EHFrameInfo) { } // Indicate locations of function specific callee saved registers in frame. - EmitFrameMoves("eh_func_begin", EHFrameInfo.Number, EHFrameInfo.Moves, + EmitFrameMoves("eh_func_begin", EHFrameInfo.Number, EHFrameInfo.Moves, true); // On Darwin the linker honors the alignment of eh_frame, which means it @@ -246,32 +312,13 @@ void DwarfException::EmitEHFrame(const FunctionEHFrameInfo &EHFrameInfo) { // retains the function in this case, and there is code around that depends // on unused functions (calling undefined externals) being dead-stripped to // link correctly. Yes, there really is. - if (MMI->getUsedFunctions().count(EHFrameInfo.function)) - if (const char *UsedDirective = TAI->getUsedDirective()) + if (MMI->isUsedFunction(EHFrameInfo.function)) + if (const char *UsedDirective = MAI->getUsedDirective()) O << UsedDirective << EHFrameInfo.FnName << "\n\n"; } -} -/// EmitExceptionTable - Emit landing pads and actions. -/// -/// The general organization of the table is complex, but the basic concepts are -/// easy. First there is a header which describes the location and organization -/// of the three components that follow. -/// -/// 1. The landing pad site information describes the range of code covered by -/// the try. In our case it's an accumulation of the ranges covered by the -/// invokes in the try. There is also a reference to the landing pad that -/// handles the exception once processed. Finally an index into the actions -/// table. -/// 2. The action table, in our case, is composed of pairs of type ids and next -/// action offset. Starting with the action index from the landing pad -/// site, each type Id is checked for a match to the current exception. If -/// it matches then the exception and type id are passed on to the landing -/// pad. Otherwise the next action is looked up. This chain is terminated -/// with a next action of zero. If no type id is found the the frame is -/// unwound and handling continues. -/// 3. Type id table contains references to all the C++ typeinfo for all -/// catches in the function. This tables is reversed indexed base 1. + Asm->EOL(); +} /// SharedTypeIds - How many leading type ids two landing pads have in common. unsigned DwarfException::SharedTypeIds(const LandingPadInfo *L, @@ -301,51 +348,58 @@ bool DwarfException::PadLT(const LandingPadInfo *L, const LandingPadInfo *R) { return LSize < RSize; } -void DwarfException::EmitExceptionTable() { - const std::vector<GlobalVariable *> &TypeInfos = MMI->getTypeInfos(); - const std::vector<unsigned> &FilterIds = MMI->getFilterIds(); - const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads(); - if (PadInfos.empty()) return; - - // Sort the landing pads in order of their type ids. This is used to fold - // duplicate actions. - SmallVector<const LandingPadInfo *, 64> LandingPads; - LandingPads.reserve(PadInfos.size()); - for (unsigned i = 0, N = PadInfos.size(); i != N; ++i) - LandingPads.push_back(&PadInfos[i]); - std::sort(LandingPads.begin(), LandingPads.end(), PadLT); - - // Negative type ids index into FilterIds, positive type ids index into - // TypeInfos. The value written for a positive type id is just the type id - // itself. For a negative type id, however, the value written is the +/// ComputeActionsTable - Compute the actions table and gather the first action +/// index for each landing pad site. +unsigned DwarfException:: +ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads, + SmallVectorImpl<ActionEntry> &Actions, + SmallVectorImpl<unsigned> &FirstActions) { + + // The action table follows the call-site table in the LSDA. The individual + // records are of two types: + // + // * Catch clause + // * Exception specification + // + // The two record kinds have the same format, with only small differences. + // They are distinguished by the "switch value" field: Catch clauses + // (TypeInfos) have strictly positive switch values, and exception + // specifications (FilterIds) have strictly negative switch values. Value 0 + // indicates a catch-all clause. + // + // Negative type IDs index into FilterIds. Positive type IDs index into + // TypeInfos. The value written for a positive type ID is just the type ID + // itself. For a negative type ID, however, the value written is the // (negative) byte offset of the corresponding FilterIds entry. The byte - // offset is usually equal to the type id, because the FilterIds entries are - // written using a variable width encoding which outputs one byte per entry as - // long as the value written is not too large, but can differ. This kind of - // complication does not occur for positive type ids because type infos are + // offset is usually equal to the type ID (because the FilterIds entries are + // written using a variable width encoding, which outputs one byte per entry + // as long as the value written is not too large) but can differ. This kind + // of complication does not occur for positive type IDs because type infos are // output using a fixed width encoding. FilterOffsets[i] holds the byte // offset corresponding to FilterIds[i]. + + const std::vector<unsigned> &FilterIds = MMI->getFilterIds(); SmallVector<int, 16> FilterOffsets; FilterOffsets.reserve(FilterIds.size()); int Offset = -1; - for(std::vector<unsigned>::const_iterator I = FilterIds.begin(), - E = FilterIds.end(); I != E; ++I) { + + for (std::vector<unsigned>::const_iterator + I = FilterIds.begin(), E = FilterIds.end(); I != E; ++I) { FilterOffsets.push_back(Offset); - Offset -= TargetAsmInfo::getULEB128Size(*I); + Offset -= MCAsmInfo::getULEB128Size(*I); } - // Compute the actions table and gather the first action index for each - // landing pad site. - SmallVector<ActionEntry, 32> Actions; - SmallVector<unsigned, 64> FirstActions; FirstActions.reserve(LandingPads.size()); int FirstAction = 0; unsigned SizeActions = 0; - for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) { - const LandingPadInfo *LP = LandingPads[i]; - const std::vector<int> &TypeIds = LP->TypeIds; - const unsigned NumShared = i ? SharedTypeIds(LP, LandingPads[i-1]) : 0; + const LandingPadInfo *PrevLPI = 0; + + for (SmallVectorImpl<const LandingPadInfo *>::const_iterator + I = LandingPads.begin(), E = LandingPads.end(); I != E; ++I) { + const LandingPadInfo *LPI = *I; + const std::vector<int> &TypeIds = LPI->TypeIds; + const unsigned NumShared = PrevLPI ? SharedTypeIds(LPI, PrevLPI) : 0; unsigned SizeSiteActions = 0; if (NumShared < TypeIds.size()) { @@ -353,34 +407,33 @@ void DwarfException::EmitExceptionTable() { ActionEntry *PrevAction = 0; if (NumShared) { - const unsigned SizePrevIds = LandingPads[i-1]->TypeIds.size(); + const unsigned SizePrevIds = PrevLPI->TypeIds.size(); assert(Actions.size()); PrevAction = &Actions.back(); - SizeAction = TargetAsmInfo::getSLEB128Size(PrevAction->NextAction) + - TargetAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID); + SizeAction = MCAsmInfo::getSLEB128Size(PrevAction->NextAction) + + MCAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID); for (unsigned j = NumShared; j != SizePrevIds; ++j) { SizeAction -= - TargetAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID); + MCAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID); SizeAction += -PrevAction->NextAction; PrevAction = PrevAction->Previous; } } // Compute the actions. - for (unsigned I = NumShared, M = TypeIds.size(); I != M; ++I) { - int TypeID = TypeIds[I]; - assert(-1-TypeID < (int)FilterOffsets.size() && "Unknown filter id!"); + for (unsigned J = NumShared, M = TypeIds.size(); J != M; ++J) { + int TypeID = TypeIds[J]; + assert(-1 - TypeID < (int)FilterOffsets.size() && "Unknown filter id!"); int ValueForTypeID = TypeID < 0 ? FilterOffsets[-1 - TypeID] : TypeID; - unsigned SizeTypeID = TargetAsmInfo::getSLEB128Size(ValueForTypeID); + unsigned SizeTypeID = MCAsmInfo::getSLEB128Size(ValueForTypeID); int NextAction = SizeAction ? -(SizeAction + SizeTypeID) : 0; - SizeAction = SizeTypeID + TargetAsmInfo::getSLEB128Size(NextAction); + SizeAction = SizeTypeID + MCAsmInfo::getSLEB128Size(NextAction); SizeSiteActions += SizeAction; - ActionEntry Action = {ValueForTypeID, NextAction, PrevAction}; + ActionEntry Action = { ValueForTypeID, NextAction, PrevAction }; Actions.push_back(Action); - PrevAction = &Actions.back(); } @@ -388,35 +441,34 @@ void DwarfException::EmitExceptionTable() { FirstAction = SizeActions + SizeSiteActions - SizeAction + 1; } // else identical - re-use previous FirstAction + // Information used when created the call-site table. The action record + // field of the call site record is the offset of the first associated + // action record, relative to the start of the actions table. This value is + // biased by 1 (1 in dicating the start of the actions table), and 0 + // indicates that there are no actions. FirstActions.push_back(FirstAction); // Compute this sites contribution to size. SizeActions += SizeSiteActions; - } - - // Compute the call-site table. The entry for an invoke has a try-range - // containing the call, a non-zero landing pad and an appropriate action. The - // entry for an ordinary call has a try-range containing the call and zero for - // the landing pad and the action. Calls marked 'nounwind' have no entry and - // must not be contained in the try-range of any entry - they form gaps in the - // table. Entries must be ordered by try-range address. - SmallVector<CallSiteEntry, 64> CallSites; - - RangeMapType PadMap; - // Invokes and nounwind calls have entries in PadMap (due to being bracketed - // by try-range labels when lowered). Ordinary calls do not, so appropriate - // try-ranges for them need be deduced. - for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) { - const LandingPadInfo *LandingPad = LandingPads[i]; - for (unsigned j = 0, E = LandingPad->BeginLabels.size(); j != E; ++j) { - unsigned BeginLabel = LandingPad->BeginLabels[j]; - assert(!PadMap.count(BeginLabel) && "Duplicate landing pad labels!"); - PadRange P = { i, j }; - PadMap[BeginLabel] = P; - } + PrevLPI = LPI; } + return SizeActions; +} + +/// ComputeCallSiteTable - Compute the call-site table. The entry for an invoke +/// has a try-range containing the call, a non-zero landing pad, and an +/// appropriate action. The entry for an ordinary call has a try-range +/// containing the call and zero for the landing pad and the action. Calls +/// marked 'nounwind' have no entry and must not be contained in the try-range +/// of any entry - they form gaps in the table. Entries must be ordered by +/// try-range address. +void DwarfException:: +ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, + const RangeMapType &PadMap, + const SmallVectorImpl<const LandingPadInfo *> &LandingPads, + const SmallVectorImpl<unsigned> &FirstActions) { // The end label of the previous invoke or nounwind try-range. unsigned LastLabel = 0; @@ -424,7 +476,7 @@ void DwarfException::EmitExceptionTable() { // an ordinary call) between the end of the previous try-range and now. bool SawPotentiallyThrowing = false; - // Whether the last callsite entry was for an invoke. + // Whether the last CallSite entry was for an invoke. bool PreviousIsInvoke = false; // Visit all instructions in order of address. @@ -450,17 +502,18 @@ void DwarfException::EmitExceptionTable() { // Nope, it was just some random label. continue; - PadRange P = L->second; + const PadRange &P = L->second; const LandingPadInfo *LandingPad = LandingPads[P.PadIndex]; - assert(BeginLabel == LandingPad->BeginLabels[P.RangeIndex] && "Inconsistent landing pad map!"); - // If some instruction between the previous try-range and this one may - // throw, create a call-site entry with no landing pad for the region - // between the try-ranges. - if (SawPotentiallyThrowing) { - CallSiteEntry Site = {LastLabel, BeginLabel, 0, 0}; + // For Dwarf exception handling (SjLj handling doesn't use this). If some + // instruction between the previous try-range and this one may throw, + // create a call-site entry with no landing pad for the region between the + // try-ranges. + if (SawPotentiallyThrowing && + MAI->getExceptionHandlingType() == ExceptionHandling::Dwarf) { + CallSiteEntry Site = { LastLabel, BeginLabel, 0, 0 }; CallSites.push_back(Site); PreviousIsInvoke = false; } @@ -470,12 +523,16 @@ void DwarfException::EmitExceptionTable() { if (LandingPad->LandingPadLabel) { // This try-range is for an invoke. - CallSiteEntry Site = {BeginLabel, LastLabel, - LandingPad->LandingPadLabel, - FirstActions[P.PadIndex]}; - - // Try to merge with the previous call-site. - if (PreviousIsInvoke) { + CallSiteEntry Site = { + BeginLabel, + LastLabel, + LandingPad->LandingPadLabel, + FirstActions[P.PadIndex] + }; + + // Try to merge with the previous call-site. SJLJ doesn't do this + if (PreviousIsInvoke && + MAI->getExceptionHandlingType() == ExceptionHandling::Dwarf) { CallSiteEntry &Prev = CallSites.back(); if (Site.PadLabel == Prev.PadLabel && Site.Action == Prev.Action) { // Extend the range of the previous entry. @@ -497,128 +554,363 @@ void DwarfException::EmitExceptionTable() { // If some instruction between the previous try-range and the end of the // function may throw, create a call-site entry with no landing pad for the // region following the try-range. - if (SawPotentiallyThrowing) { - CallSiteEntry Site = {LastLabel, 0, 0, 0}; + if (SawPotentiallyThrowing && + MAI->getExceptionHandlingType() == ExceptionHandling::Dwarf) { + CallSiteEntry Site = { LastLabel, 0, 0, 0 }; CallSites.push_back(Site); } +} + +/// EmitExceptionTable - Emit landing pads and actions. +/// +/// The general organization of the table is complex, but the basic concepts are +/// easy. First there is a header which describes the location and organization +/// of the three components that follow. +/// +/// 1. The landing pad site information describes the range of code covered by +/// the try. In our case it's an accumulation of the ranges covered by the +/// invokes in the try. There is also a reference to the landing pad that +/// handles the exception once processed. Finally an index into the actions +/// table. +/// 2. The action table, in our case, is composed of pairs of type IDs and next +/// action offset. Starting with the action index from the landing pad +/// site, each type ID is checked for a match to the current exception. If +/// it matches then the exception and type id are passed on to the landing +/// pad. Otherwise the next action is looked up. This chain is terminated +/// with a next action of zero. If no type id is found then the frame is +/// unwound and handling continues. +/// 3. Type ID table contains references to all the C++ typeinfo for all +/// catches in the function. This tables is reverse indexed base 1. +void DwarfException::EmitExceptionTable() { + const std::vector<GlobalVariable *> &TypeInfos = MMI->getTypeInfos(); + const std::vector<unsigned> &FilterIds = MMI->getFilterIds(); + const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads(); + if (PadInfos.empty()) return; + + // Sort the landing pads in order of their type ids. This is used to fold + // duplicate actions. + SmallVector<const LandingPadInfo *, 64> LandingPads; + LandingPads.reserve(PadInfos.size()); + + for (unsigned i = 0, N = PadInfos.size(); i != N; ++i) + LandingPads.push_back(&PadInfos[i]); + + std::sort(LandingPads.begin(), LandingPads.end(), PadLT); + + // Compute the actions table and gather the first action index for each + // landing pad site. + SmallVector<ActionEntry, 32> Actions; + SmallVector<unsigned, 64> FirstActions; + unsigned SizeActions = ComputeActionsTable(LandingPads, Actions, + FirstActions); + + // Invokes and nounwind calls have entries in PadMap (due to being bracketed + // by try-range labels when lowered). Ordinary calls do not, so appropriate + // try-ranges for them need be deduced when using DWARF exception handling. + RangeMapType PadMap; + for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) { + const LandingPadInfo *LandingPad = LandingPads[i]; + for (unsigned j = 0, E = LandingPad->BeginLabels.size(); j != E; ++j) { + unsigned BeginLabel = LandingPad->BeginLabels[j]; + assert(!PadMap.count(BeginLabel) && "Duplicate landing pad labels!"); + PadRange P = { i, j }; + PadMap[BeginLabel] = P; + } + } + + // Compute the call-site table. + SmallVector<CallSiteEntry, 64> CallSites; + ComputeCallSiteTable(CallSites, PadMap, LandingPads, FirstActions); // Final tallies. // Call sites. - const unsigned SiteStartSize = sizeof(int32_t); // DW_EH_PE_udata4 - const unsigned SiteLengthSize = sizeof(int32_t); // DW_EH_PE_udata4 - const unsigned LandingPadSize = sizeof(int32_t); // DW_EH_PE_udata4 - unsigned SizeSites = CallSites.size() * (SiteStartSize + - SiteLengthSize + - LandingPadSize); - for (unsigned i = 0, e = CallSites.size(); i < e; ++i) - SizeSites += TargetAsmInfo::getULEB128Size(CallSites[i].Action); + const unsigned SiteStartSize = SizeOfEncodedValue(dwarf::DW_EH_PE_udata4); + const unsigned SiteLengthSize = SizeOfEncodedValue(dwarf::DW_EH_PE_udata4); + const unsigned LandingPadSize = SizeOfEncodedValue(dwarf::DW_EH_PE_udata4); + bool IsSJLJ = MAI->getExceptionHandlingType() == ExceptionHandling::SjLj; + bool HaveTTData = IsSJLJ ? (!TypeInfos.empty() || !FilterIds.empty()) : true; + unsigned SizeSites; + + if (IsSJLJ) + SizeSites = 0; + else + SizeSites = CallSites.size() * + (SiteStartSize + SiteLengthSize + LandingPadSize); + + for (unsigned i = 0, e = CallSites.size(); i < e; ++i) { + SizeSites += MCAsmInfo::getULEB128Size(CallSites[i].Action); + if (IsSJLJ) + SizeSites += MCAsmInfo::getULEB128Size(i); + } // Type infos. - const unsigned TypeInfoSize = TD->getPointerSize(); // DW_EH_PE_absptr - unsigned SizeTypes = TypeInfos.size() * TypeInfoSize; - - unsigned TypeOffset = sizeof(int8_t) + // Call site format - TargetAsmInfo::getULEB128Size(SizeSites) + // Call-site table length - SizeSites + SizeActions + SizeTypes; - - unsigned TotalSize = sizeof(int8_t) + // LPStart format - sizeof(int8_t) + // TType format - TargetAsmInfo::getULEB128Size(TypeOffset) + // TType base offset - TypeOffset; + const MCSection *LSDASection = Asm->getObjFileLowering().getLSDASection(); + unsigned TTypeFormat; + unsigned TypeFormatSize; + + if (!HaveTTData) { + // For SjLj exceptions, if there is no TypeInfo, then we just explicitly say + // that we're omitting that bit. + TTypeFormat = dwarf::DW_EH_PE_omit; + TypeFormatSize = SizeOfEncodedValue(dwarf::DW_EH_PE_absptr); + } else { + // Okay, we have actual filters or typeinfos to emit. As such, we need to + // pick a type encoding for them. We're about to emit a list of pointers to + // typeinfo objects at the end of the LSDA. However, unless we're in static + // mode, this reference will require a relocation by the dynamic linker. + // + // Because of this, we have a couple of options: + // + // 1) If we are in -static mode, we can always use an absolute reference + // from the LSDA, because the static linker will resolve it. + // + // 2) Otherwise, if the LSDA section is writable, we can output the direct + // reference to the typeinfo and allow the dynamic linker to relocate + // it. Since it is in a writable section, the dynamic linker won't + // have a problem. + // + // 3) Finally, if we're in PIC mode and the LDSA section isn't writable, + // we need to use some form of indirection. For example, on Darwin, + // we can output a statically-relocatable reference to a dyld stub. The + // offset to the stub is constant, but the contents are in a section + // that is updated by the dynamic linker. This is easy enough, but we + // need to tell the personality function of the unwinder to indirect + // through the dyld stub. + // + // FIXME: When (3) is actually implemented, we'll have to emit the stubs + // somewhere. This predicate should be moved to a shared location that is + // in target-independent code. + // + if (LSDASection->getKind().isWriteable() || + Asm->TM.getRelocationModel() == Reloc::Static) + TTypeFormat = dwarf::DW_EH_PE_absptr; + else + TTypeFormat = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | + dwarf::DW_EH_PE_sdata4; - unsigned SizeAlign = (4 - TotalSize) & 3; + TypeFormatSize = SizeOfEncodedValue(TTypeFormat); + } // Begin the exception table. - Asm->SwitchToDataSection(TAI->getDwarfExceptionSection()); + Asm->OutStreamer.SwitchSection(LSDASection); Asm->EmitAlignment(2, 0, 0, false); + O << "GCC_except_table" << SubprogramCount << ":\n"; + // The type infos need to be aligned. GCC does this by inserting padding just + // before the type infos. However, this changes the size of the exception + // table, so you need to take this into account when you output the exception + // table size. However, the size is output using a variable length encoding. + // So by increasing the size by inserting padding, you may increase the number + // of bytes used for writing the size. If it increases, say by one byte, then + // you now need to output one less byte of padding to get the type infos + // aligned. However this decreases the size of the exception table. This + // changes the value you have to output for the exception table size. Due to + // the variable length encoding, the number of bytes used for writing the + // length may decrease. If so, you then have to increase the amount of + // padding. And so on. If you look carefully at the GCC code you will see that + // it indeed does this in a loop, going on and on until the values stabilize. + // We chose another solution: don't output padding inside the table like GCC + // does, instead output it before the table. + unsigned SizeTypes = TypeInfos.size() * TypeFormatSize; + unsigned TyOffset = sizeof(int8_t) + // Call site format + MCAsmInfo::getULEB128Size(SizeSites) + // Call-site table length + SizeSites + SizeActions + SizeTypes; + unsigned TotalSize = sizeof(int8_t) + // LPStart format + sizeof(int8_t) + // TType format + (HaveTTData ? + MCAsmInfo::getULEB128Size(TyOffset) : 0) + // TType base offset + TyOffset; + unsigned SizeAlign = (4 - TotalSize) & 3; + for (unsigned i = 0; i != SizeAlign; ++i) { Asm->EmitInt8(0); Asm->EOL("Padding"); - } + } EmitLabel("exception", SubprogramCount); + if (IsSJLJ) { + SmallString<16> LSDAName; + raw_svector_ostream(LSDAName) << MAI->getPrivateGlobalPrefix() << + "_LSDA_" << Asm->getFunctionNumber(); + O << LSDAName.str() << ":\n"; + } + // Emit the header. Asm->EmitInt8(dwarf::DW_EH_PE_omit); - Asm->EOL("LPStart format (DW_EH_PE_omit)"); - Asm->EmitInt8(dwarf::DW_EH_PE_absptr); - Asm->EOL("TType format (DW_EH_PE_absptr)"); - Asm->EmitULEB128Bytes(TypeOffset); - Asm->EOL("TType base offset"); - Asm->EmitInt8(dwarf::DW_EH_PE_udata4); - Asm->EOL("Call site format (DW_EH_PE_udata4)"); - Asm->EmitULEB128Bytes(SizeSites); - Asm->EOL("Call-site table length"); - - // Emit the landing pad site information. - for (unsigned i = 0; i < CallSites.size(); ++i) { - CallSiteEntry &S = CallSites[i]; - const char *BeginTag; - unsigned BeginNumber; - - if (!S.BeginLabel) { - BeginTag = "eh_func_begin"; - BeginNumber = SubprogramCount; - } else { - BeginTag = "label"; - BeginNumber = S.BeginLabel; - } + Asm->EOL("@LPStart format", dwarf::DW_EH_PE_omit); - EmitSectionOffset(BeginTag, "eh_func_begin", BeginNumber, SubprogramCount, - true, true); - Asm->EOL("Region start"); + Asm->EmitInt8(TTypeFormat); + Asm->EOL("@TType format", TTypeFormat); - if (!S.EndLabel) - EmitDifference("eh_func_end", SubprogramCount, BeginTag, BeginNumber, - true); - else - EmitDifference("label", S.EndLabel, BeginTag, BeginNumber, true); + if (HaveTTData) { + Asm->EmitULEB128Bytes(TyOffset); + Asm->EOL("@TType base offset"); + } - Asm->EOL("Region length"); + // SjLj Exception handling + if (IsSJLJ) { + Asm->EmitInt8(dwarf::DW_EH_PE_udata4); + Asm->EOL("Call site format", dwarf::DW_EH_PE_udata4); + Asm->EmitULEB128Bytes(SizeSites); + Asm->EOL("Call site table length"); + + // Emit the landing pad site information. + unsigned idx = 0; + for (SmallVectorImpl<CallSiteEntry>::const_iterator + I = CallSites.begin(), E = CallSites.end(); I != E; ++I, ++idx) { + const CallSiteEntry &S = *I; + + // Offset of the landing pad, counted in 16-byte bundles relative to the + // @LPStart address. + Asm->EmitULEB128Bytes(idx); + Asm->EOL("Landing pad"); + + // Offset of the first associated action record, relative to the start of + // the action table. This value is biased by 1 (1 indicates the start of + // the action table), and 0 indicates that there are no actions. + Asm->EmitULEB128Bytes(S.Action); + Asm->EOL("Action"); + } + } else { + // DWARF Exception handling + assert(MAI->getExceptionHandlingType() == ExceptionHandling::Dwarf); + + // The call-site table is a list of all call sites that may throw an + // exception (including C++ 'throw' statements) in the procedure + // fragment. It immediately follows the LSDA header. Each entry indicates, + // for a given call, the first corresponding action record and corresponding + // landing pad. + // + // The table begins with the number of bytes, stored as an LEB128 + // compressed, unsigned integer. The records immediately follow the record + // count. They are sorted in increasing call-site address. Each record + // indicates: + // + // * The position of the call-site. + // * The position of the landing pad. + // * The first action record for that call site. + // + // A missing entry in the call-site table indicates that a call is not + // supposed to throw. + + // Emit the landing pad call site table. + Asm->EmitInt8(dwarf::DW_EH_PE_udata4); + Asm->EOL("Call site format", dwarf::DW_EH_PE_udata4); + Asm->EmitULEB128Bytes(SizeSites); + Asm->EOL("Call site table size"); + + for (SmallVectorImpl<CallSiteEntry>::const_iterator + I = CallSites.begin(), E = CallSites.end(); I != E; ++I) { + const CallSiteEntry &S = *I; + const char *BeginTag; + unsigned BeginNumber; + + if (!S.BeginLabel) { + BeginTag = "eh_func_begin"; + BeginNumber = SubprogramCount; + } else { + BeginTag = "label"; + BeginNumber = S.BeginLabel; + } - if (!S.PadLabel) - Asm->EmitInt32(0); - else - EmitSectionOffset("label", "eh_func_begin", S.PadLabel, SubprogramCount, + // Offset of the call site relative to the previous call site, counted in + // number of 16-byte bundles. The first call site is counted relative to + // the start of the procedure fragment. + EmitSectionOffset(BeginTag, "eh_func_begin", BeginNumber, SubprogramCount, true, true); + Asm->EOL("Region start"); + + if (!S.EndLabel) + EmitDifference("eh_func_end", SubprogramCount, BeginTag, BeginNumber, + true); + else + EmitDifference("label", S.EndLabel, BeginTag, BeginNumber, true); + + Asm->EOL("Region length"); - Asm->EOL("Landing pad"); + // Offset of the landing pad, counted in 16-byte bundles relative to the + // @LPStart address. + if (!S.PadLabel) + Asm->EmitInt32(0); + else + EmitSectionOffset("label", "eh_func_begin", S.PadLabel, SubprogramCount, + true, true); + + Asm->EOL("Landing pad"); - Asm->EmitULEB128Bytes(S.Action); - Asm->EOL("Action"); + // Offset of the first associated action record, relative to the start of + // the action table. This value is biased by 1 (1 indicates the start of + // the action table), and 0 indicates that there are no actions. + Asm->EmitULEB128Bytes(S.Action); + Asm->EOL("Action"); + } } - // Emit the actions. - for (unsigned I = 0, N = Actions.size(); I != N; ++I) { - ActionEntry &Action = Actions[I]; + // Emit the Action Table. + for (SmallVectorImpl<ActionEntry>::const_iterator + I = Actions.begin(), E = Actions.end(); I != E; ++I) { + const ActionEntry &Action = *I; + + // Type Filter + // + // Used by the runtime to match the type of the thrown exception to the + // type of the catch clauses or the types in the exception specification. Asm->EmitSLEB128Bytes(Action.ValueForTypeID); Asm->EOL("TypeInfo index"); + + // Action Record + // + // Self-relative signed displacement in bytes of the next action record, + // or 0 if there is no next action record. + Asm->EmitSLEB128Bytes(Action.NextAction); Asm->EOL("Next action"); } - // Emit the type ids. - for (unsigned M = TypeInfos.size(); M; --M) { - GlobalVariable *GV = TypeInfos[M - 1]; + // Emit the Catch Clauses. The code for the catch clauses following the same + // try is similar to a switch statement. The catch clause action record + // informs the runtime about the type of a catch clause and about the + // associated switch value. + // + // Action Record Fields: + // + // * Filter Value + // Positive value, starting at 1. Index in the types table of the + // __typeinfo for the catch-clause type. 1 is the first word preceding + // TTBase, 2 is the second word, and so on. Used by the runtime to check + // if the thrown exception type matches the catch-clause type. Back-end + // generated switch statements check against this value. + // + // * Next + // Signed offset, in bytes from the start of this field, to the next + // chained action record, or zero if none. + // + // The order of the action records determined by the next field is the order + // of the catch clauses as they appear in the source code, and must be kept in + // the same order. As a result, changing the order of the catch clause would + // change the semantics of the program. + for (std::vector<GlobalVariable *>::const_reverse_iterator + I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) { + const GlobalVariable *GV = *I; PrintRelDirective(); if (GV) { - std::string GLN; - O << Asm->getGlobalLinkName(GV, GLN); + O << Asm->Mang->getMangledName(GV); } else { - O << "0"; + O << "0x0"; } Asm->EOL("TypeInfo"); } - // Emit the filter typeids. - for (unsigned j = 0, M = FilterIds.size(); j < M; ++j) { - unsigned TypeID = FilterIds[j]; + // Emit the Type Table. + for (std::vector<unsigned>::const_iterator + I = FilterIds.begin(), E = FilterIds.end(); I < E; ++I) { + unsigned TypeID = *I; Asm->EmitULEB128Bytes(TypeID); Asm->EOL("Filter TypeInfo index"); } @@ -629,48 +921,53 @@ void DwarfException::EmitExceptionTable() { /// EndModule - Emit all exception information that should come after the /// content. void DwarfException::EndModule() { + if (MAI->getExceptionHandlingType() != ExceptionHandling::Dwarf) + return; + + if (!shouldEmitMovesModule && !shouldEmitTableModule) + return; + if (TimePassesIsEnabled) ExceptionTimer->startTimer(); - if (shouldEmitMovesModule || shouldEmitTableModule) { - const std::vector<Function *> Personalities = MMI->getPersonalities(); - for (unsigned i = 0; i < Personalities.size(); ++i) - EmitCommonEHFrame(Personalities[i], i); + const std::vector<Function *> Personalities = MMI->getPersonalities(); - for (std::vector<FunctionEHFrameInfo>::iterator I = EHFrames.begin(), - E = EHFrames.end(); I != E; ++I) - EmitEHFrame(*I); - } + for (unsigned I = 0, E = Personalities.size(); I < E; ++I) + EmitCIE(Personalities[I], I); + + for (std::vector<FunctionEHFrameInfo>::iterator + I = EHFrames.begin(), E = EHFrames.end(); I != E; ++I) + EmitFDE(*I); if (TimePassesIsEnabled) ExceptionTimer->stopTimer(); } -/// BeginFunction - Gather pre-function exception information. Assumes being -/// emitted immediately after the function entry point. +/// BeginFunction - Gather pre-function exception information. Assumes it's +/// being emitted immediately after the function entry point. void DwarfException::BeginFunction(MachineFunction *MF) { + if (!MMI || !MAI->doesSupportExceptionHandling()) return; + if (TimePassesIsEnabled) ExceptionTimer->startTimer(); this->MF = MF; shouldEmitTable = shouldEmitMoves = false; - if (MMI && TAI->doesSupportExceptionHandling()) { - // Map all labels and get rid of any dead landing pads. - MMI->TidyLandingPads(); + // Map all labels and get rid of any dead landing pads. + MMI->TidyLandingPads(); - // If any landing pads survive, we need an EH table. - if (MMI->getLandingPads().size()) - shouldEmitTable = true; + // If any landing pads survive, we need an EH table. + if (!MMI->getLandingPads().empty()) + shouldEmitTable = true; - // See if we need frame move info. - if (!MF->getFunction()->doesNotThrow() || UnwindTablesMandatory) - shouldEmitMoves = true; + // See if we need frame move info. + if (!MF->getFunction()->doesNotThrow() || UnwindTablesMandatory) + shouldEmitMoves = true; - if (shouldEmitMoves || shouldEmitTable) - // Assumes in correct section after the entry point. - EmitLabel("eh_func_begin", ++SubprogramCount); - } + if (shouldEmitMoves || shouldEmitTable) + // Assumes in correct section after the entry point. + EmitLabel("eh_func_begin", ++SubprogramCount); shouldEmitTableModule |= shouldEmitTable; shouldEmitMovesModule |= shouldEmitMoves; @@ -682,25 +979,29 @@ void DwarfException::BeginFunction(MachineFunction *MF) { /// EndFunction - Gather and emit post-function exception information. /// void DwarfException::EndFunction() { - if (TimePassesIsEnabled) + if (!shouldEmitMoves && !shouldEmitTable) return; + + if (TimePassesIsEnabled) ExceptionTimer->startTimer(); - if (shouldEmitMoves || shouldEmitTable) { - EmitLabel("eh_func_end", SubprogramCount); - EmitExceptionTable(); - - // Save EH frame information - std::string Name; - EHFrames.push_back( - FunctionEHFrameInfo(getAsm()->getCurrentFunctionEHName(MF, Name), - SubprogramCount, - MMI->getPersonalityIndex(), - MF->getFrameInfo()->hasCalls(), - !MMI->getLandingPads().empty(), - MMI->getFrameMoves(), - MF->getFunction())); - } + EmitLabel("eh_func_end", SubprogramCount); + EmitExceptionTable(); - if (TimePassesIsEnabled) + std::string FunctionEHName = + Asm->Mang->getMangledName(MF->getFunction(), ".eh", + Asm->MAI->is_EHSymbolPrivate()); + + // Save EH frame information + EHFrames.push_back(FunctionEHFrameInfo(FunctionEHName, SubprogramCount, + MMI->getPersonalityIndex(), + MF->getFrameInfo()->hasCalls(), + !MMI->getLandingPads().empty(), + MMI->getFrameMoves(), + MF->getFunction())); + + // Record if this personality index uses a landing pad. + UsesLSDA[MMI->getPersonalityIndex()] |= !MMI->getLandingPads().empty(); + + if (TimePassesIsEnabled) ExceptionTimer->stopTimer(); } diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h index f1c3e56..f6f5025 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.h +++ b/lib/CodeGen/AsmPrinter/DwarfException.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef CODEGEN_ASMPRINTER_DWARFEXCEPTION_H__ -#define CODEGEN_ASMPRINTER_DWARFEXCEPTION_H__ +#ifndef LLVM_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H +#define LLVM_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H #include "DIE.h" #include "DwarfPrinter.h" @@ -24,7 +24,7 @@ namespace llvm { struct LandingPadInfo; class MachineModuleInfo; -class TargetAsmInfo; +class MCAsmInfo; class Timer; class raw_ostream; @@ -51,6 +51,11 @@ class VISIBILITY_HIDDEN DwarfException : public Dwarf { std::vector<FunctionEHFrameInfo> EHFrames; + /// UsesLSDA - Indicates whether an FDE that uses the CIE at the given index + /// uses an LSDA. If so, then we need to encode that information in the CIE's + /// augmentation. + DenseMap<unsigned, bool> UsesLSDA; + /// shouldEmitTable - Per-function flag to indicate if EH tables should /// be emitted. bool shouldEmitTable; @@ -70,13 +75,16 @@ class VISIBILITY_HIDDEN DwarfException : public Dwarf { /// ExceptionTimer - Timer for the Dwarf exception writer. Timer *ExceptionTimer; - /// EmitCommonEHFrame - Emit the common eh unwind frame. - /// - void EmitCommonEHFrame(const Function *Personality, unsigned Index); + /// SizeOfEncodedValue - Return the size of the encoding in bytes. + unsigned SizeOfEncodedValue(unsigned Encoding); - /// EmitEHFrame - Emit function exception frame information. - /// - void EmitEHFrame(const FunctionEHFrameInfo &EHFrameInfo); + /// EmitCIE - Emit a Common Information Entry (CIE). This holds information + /// that is shared among many Frame Description Entries. There is at least + /// one CIE in every non-empty .debug_frame section. + void EmitCIE(const Function *Personality, unsigned Index); + + /// EmitFDE - Emit the Frame Description Entry (FDE) for the function. + void EmitFDE(const FunctionEHFrameInfo &EHFrameInfo); /// EmitExceptionTable - Emit landing pads and actions. /// @@ -113,13 +121,6 @@ class VISIBILITY_HIDDEN DwarfException : public Dwarf { static bool isPod() { return true; } }; - /// ActionEntry - Structure describing an entry in the actions table. - struct ActionEntry { - int ValueForTypeID; // The value to write - may not be equal to the type id. - int NextAction; - struct ActionEntry *Previous; - }; - /// PadRange - Structure holding a try-range and the associated landing pad. struct PadRange { // The index of the landing pad. @@ -130,23 +131,48 @@ class VISIBILITY_HIDDEN DwarfException : public Dwarf { typedef DenseMap<unsigned, PadRange, KeyInfo> RangeMapType; + /// ActionEntry - Structure describing an entry in the actions table. + struct ActionEntry { + int ValueForTypeID; // The value to write - may not be equal to the type id. + int NextAction; + struct ActionEntry *Previous; + }; + /// CallSiteEntry - Structure describing an entry in the call-site table. struct CallSiteEntry { // The 'try-range' is BeginLabel .. EndLabel. unsigned BeginLabel; // zero indicates the start of the function. unsigned EndLabel; // zero indicates the end of the function. + // The landing pad starts at PadLabel. unsigned PadLabel; // zero indicates that there is no landing pad. unsigned Action; }; + /// ComputeActionsTable - Compute the actions table and gather the first + /// action index for each landing pad site. + unsigned ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*>&LPs, + SmallVectorImpl<ActionEntry> &Actions, + SmallVectorImpl<unsigned> &FirstActions); + + /// ComputeCallSiteTable - Compute the call-site table. The entry for an + /// invoke has a try-range containing the call, a non-zero landing pad and an + /// appropriate action. The entry for an ordinary call has a try-range + /// containing the call and zero for the landing pad and the action. Calls + /// marked 'nounwind' have no entry and must not be contained in the try-range + /// of any entry - they form gaps in the table. Entries must be ordered by + /// try-range address. + void ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, + const RangeMapType &PadMap, + const SmallVectorImpl<const LandingPadInfo *> &LPs, + const SmallVectorImpl<unsigned> &FirstActions); void EmitExceptionTable(); public: //===--------------------------------------------------------------------===// // Main entry points. // - DwarfException(raw_ostream &OS, AsmPrinter *A, const TargetAsmInfo *T); + DwarfException(raw_ostream &OS, AsmPrinter *A, const MCAsmInfo *T); virtual ~DwarfException(); /// BeginModule - Emit all exception information that should come prior to the diff --git a/lib/CodeGen/AsmPrinter/DwarfLabel.cpp b/lib/CodeGen/AsmPrinter/DwarfLabel.cpp index 8021b7c..6e9293a 100644 --- a/lib/CodeGen/AsmPrinter/DwarfLabel.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfLabel.cpp @@ -13,7 +13,7 @@ #include "DwarfLabel.h" #include "llvm/ADT/FoldingSet.h" -#include <ostream> +#include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -25,10 +25,7 @@ void DWLabel::Profile(FoldingSetNodeID &ID) const { } #ifndef NDEBUG -void DWLabel::print(std::ostream *O) const { - if (O) print(*O); -} -void DWLabel::print(std::ostream &O) const { +void DWLabel::print(raw_ostream &O) const { O << "." << Tag; if (Number) O << Number; } diff --git a/lib/CodeGen/AsmPrinter/DwarfLabel.h b/lib/CodeGen/AsmPrinter/DwarfLabel.h index b493903..0c0cc4b 100644 --- a/lib/CodeGen/AsmPrinter/DwarfLabel.h +++ b/lib/CodeGen/AsmPrinter/DwarfLabel.h @@ -14,19 +14,16 @@ #ifndef CODEGEN_ASMPRINTER_DWARFLABEL_H__ #define CODEGEN_ASMPRINTER_DWARFLABEL_H__ -#include "llvm/Support/Compiler.h" -#include <iosfwd> -#include <vector> - namespace llvm { class FoldingSetNodeID; + class raw_ostream; //===--------------------------------------------------------------------===// /// DWLabel - Labels are used to track locations in the assembler file. /// Labels appear in the form @verbatim <prefix><Tag><Number> @endverbatim, /// where the tag is a category of label (Ex. location) and number is a value /// unique in that category. - class VISIBILITY_HIDDEN DWLabel { + class DWLabel { /// Tag - Label category tag. Should always be a statically declared C /// string. /// @@ -47,8 +44,7 @@ namespace llvm { void Profile(FoldingSetNodeID &ID) const; #ifndef NDEBUG - void print(std::ostream *O) const; - void print(std::ostream &O) const; + void print(raw_ostream &O) const; #endif }; } // end llvm namespace diff --git a/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp b/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp index a1b97df..20b959b 100644 --- a/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp @@ -15,39 +15,41 @@ #include "llvm/Module.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/Support/Dwarf.h" -#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetFrameInfo.h" #include "llvm/Target/TargetRegisterInfo.h" - +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/ADT/StringExtras.h" using namespace llvm; -Dwarf::Dwarf(raw_ostream &OS, AsmPrinter *A, const TargetAsmInfo *T, +Dwarf::Dwarf(raw_ostream &OS, AsmPrinter *A, const MCAsmInfo *T, const char *flavor) -: O(OS), Asm(A), TAI(T), TD(Asm->TM.getTargetData()), +: O(OS), Asm(A), MAI(T), TD(Asm->TM.getTargetData()), RI(Asm->TM.getRegisterInfo()), M(NULL), MF(NULL), MMI(NULL), SubprogramCount(0), Flavor(flavor), SetCounter(1) {} void Dwarf::PrintRelDirective(bool Force32Bit, bool isInSection) const { - if (isInSection && TAI->getDwarfSectionOffsetDirective()) - O << TAI->getDwarfSectionOffsetDirective(); + if (isInSection && MAI->getDwarfSectionOffsetDirective()) + O << MAI->getDwarfSectionOffsetDirective(); else if (Force32Bit || TD->getPointerSize() == sizeof(int32_t)) - O << TAI->getData32bitsDirective(); + O << MAI->getData32bitsDirective(); else - O << TAI->getData64bitsDirective(); + O << MAI->getData64bitsDirective(); } /// PrintLabelName - Print label name in form used by Dwarf writer. /// void Dwarf::PrintLabelName(const char *Tag, unsigned Number) const { - O << TAI->getPrivateGlobalPrefix() << Tag; + O << MAI->getPrivateGlobalPrefix() << Tag; if (Number) O << Number; } void Dwarf::PrintLabelName(const char *Tag, unsigned Number, const char *Suffix) const { - O << TAI->getPrivateGlobalPrefix() << Tag; + O << MAI->getPrivateGlobalPrefix() << Tag; if (Number) O << Number; O << Suffix; } @@ -65,13 +67,13 @@ void Dwarf::EmitReference(const char *Tag, unsigned Number, bool IsPCRelative, bool Force32Bit) const { PrintRelDirective(Force32Bit); PrintLabelName(Tag, Number); - if (IsPCRelative) O << "-" << TAI->getPCSymbol(); + if (IsPCRelative) O << "-" << MAI->getPCSymbol(); } void Dwarf::EmitReference(const std::string &Name, bool IsPCRelative, bool Force32Bit) const { PrintRelDirective(Force32Bit); O << Name; - if (IsPCRelative) O << "-" << TAI->getPCSymbol(); + if (IsPCRelative) O << "-" << MAI->getPCSymbol(); } /// EmitDifference - Emit the difference between two labels. Some assemblers do @@ -80,7 +82,7 @@ void Dwarf::EmitReference(const std::string &Name, bool IsPCRelative, void Dwarf::EmitDifference(const char *TagHi, unsigned NumberHi, const char *TagLo, unsigned NumberLo, bool IsSmall) { - if (TAI->needsSet()) { + if (MAI->needsSet()) { O << "\t.set\t"; PrintLabelName("set", SetCounter, Flavor); O << ","; @@ -106,11 +108,11 @@ void Dwarf::EmitSectionOffset(const char* Label, const char* Section, bool useSet) { bool printAbsolute = false; if (isEH) - printAbsolute = TAI->isAbsoluteEHSectionOffsets(); + printAbsolute = MAI->isAbsoluteEHSectionOffsets(); else - printAbsolute = TAI->isAbsoluteDebugSectionOffsets(); + printAbsolute = MAI->isAbsoluteDebugSectionOffsets(); - if (TAI->needsSet() && useSet) { + if (MAI->needsSet() && useSet) { O << "\t.set\t"; PrintLabelName("set", SetCounter, Flavor); O << ","; @@ -190,7 +192,7 @@ void Dwarf::EmitFrameMoves(const char *BaseLabel, unsigned BaseLabelID, Asm->EmitULEB128Bytes(Offset); Asm->EOL("Offset"); } else { - assert(0 && "Machine move not supported yet."); + llvm_unreachable("Machine move not supported yet."); } } else if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) { @@ -200,7 +202,7 @@ void Dwarf::EmitFrameMoves(const char *BaseLabel, unsigned BaseLabelID, Asm->EmitULEB128Bytes(RI->getDwarfRegNum(Dst.getReg(), isEH)); Asm->EOL("Register"); } else { - assert(0 && "Machine move not supported yet."); + llvm_unreachable("Machine move not supported yet."); } } else { unsigned Reg = RI->getDwarfRegNum(Src.getReg(), isEH); diff --git a/lib/CodeGen/AsmPrinter/DwarfPrinter.h b/lib/CodeGen/AsmPrinter/DwarfPrinter.h index 6e75992..33ebb3b 100644 --- a/lib/CodeGen/AsmPrinter/DwarfPrinter.h +++ b/lib/CodeGen/AsmPrinter/DwarfPrinter.h @@ -25,7 +25,7 @@ namespace llvm { class MachineFunction; class MachineModuleInfo; class Module; - class TargetAsmInfo; + class MCAsmInfo; class TargetData; class TargetRegisterInfo; @@ -43,9 +43,9 @@ namespace llvm { /// AsmPrinter *Asm; - /// TAI - Target asm information. + /// MAI - Target asm information. /// - const TargetAsmInfo *TAI; + const MCAsmInfo *MAI; /// TD - Target data. /// @@ -80,7 +80,7 @@ namespace llvm { /// unsigned SetCounter; - Dwarf(raw_ostream &OS, AsmPrinter *A, const TargetAsmInfo *T, + Dwarf(raw_ostream &OS, AsmPrinter *A, const MCAsmInfo *T, const char *flavor); public: //===------------------------------------------------------------------===// @@ -88,7 +88,7 @@ namespace llvm { // const AsmPrinter *getAsm() const { return Asm; } MachineModuleInfo *getMMI() const { return MMI; } - const TargetAsmInfo *getTargetAsmInfo() const { return TAI; } + const MCAsmInfo *getMCAsmInfo() const { return MAI; } const TargetData *getTargetData() const { return TD; } void PrintRelDirective(bool Force32Bit = false, diff --git a/lib/CodeGen/AsmPrinter/DwarfWriter.cpp b/lib/CodeGen/AsmPrinter/DwarfWriter.cpp index 89084989..0638d35 100644 --- a/lib/CodeGen/AsmPrinter/DwarfWriter.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfWriter.cpp @@ -39,7 +39,7 @@ DwarfWriter::~DwarfWriter() { void DwarfWriter::BeginModule(Module *M, MachineModuleInfo *MMI, raw_ostream &OS, AsmPrinter *A, - const TargetAsmInfo *T) { + const MCAsmInfo *T) { DE = new DwarfException(OS, A, T); DD = new DwarfDebug(OS, A, T); DE->BeginModule(M, MMI); @@ -51,6 +51,8 @@ void DwarfWriter::BeginModule(Module *M, void DwarfWriter::EndModule() { DE->EndModule(); DD->EndModule(); + delete DD; DD = 0; + delete DE; DE = 0; } /// BeginFunction - Gather pre-function debug information. Assumes being @@ -75,18 +77,18 @@ void DwarfWriter::EndFunction(MachineFunction *MF) { /// label. Returns a unique label ID used to generate a label and provide /// correspondence to the source line list. unsigned DwarfWriter::RecordSourceLine(unsigned Line, unsigned Col, - DICompileUnit CU) { - return DD->RecordSourceLine(Line, Col, CU); + MDNode *Scope) { + return DD->RecordSourceLine(Line, Col, Scope); } /// RecordRegionStart - Indicate the start of a region. -unsigned DwarfWriter::RecordRegionStart(GlobalVariable *V) { - return DD->RecordRegionStart(V); +unsigned DwarfWriter::RecordRegionStart(MDNode *N) { + return DD->RecordRegionStart(N); } /// RecordRegionEnd - Indicate the end of a region. -unsigned DwarfWriter::RecordRegionEnd(GlobalVariable *V) { - return DD->RecordRegionEnd(V); +unsigned DwarfWriter::RecordRegionEnd(MDNode *N) { + return DD->RecordRegionEnd(N); } /// getRecordSourceLineCount - Count source lines. @@ -96,9 +98,8 @@ unsigned DwarfWriter::getRecordSourceLineCount() { /// RecordVariable - Indicate the declaration of a local variable. /// -void DwarfWriter::RecordVariable(GlobalVariable *GV, unsigned FrameIndex, - const MachineInstr *MI) { - DD->RecordVariable(GV, FrameIndex, MI); +void DwarfWriter::RecordVariable(MDNode *N, unsigned FrameIndex) { + DD->RecordVariable(N, FrameIndex); } /// ShouldEmitDwarfDebug - Returns true if Dwarf debugging declarations should @@ -107,8 +108,7 @@ bool DwarfWriter::ShouldEmitDwarfDebug() const { return DD && DD->ShouldEmitDwarfDebug(); } -//// RecordInlinedFnStart - Global variable GV is inlined at the location marked -//// by LabelID label. +//// RecordInlinedFnStart unsigned DwarfWriter::RecordInlinedFnStart(DISubprogram SP, DICompileUnit CU, unsigned Line, unsigned Col) { return DD->RecordInlinedFnStart(SP, CU, Line, Col); @@ -119,9 +119,9 @@ unsigned DwarfWriter::RecordInlinedFnEnd(DISubprogram SP) { return DD->RecordInlinedFnEnd(SP); } -/// RecordVariableScope - Record scope for the variable declared by -/// DeclareMI. DeclareMI must describe TargetInstrInfo::DECLARE. -void DwarfWriter::RecordVariableScope(DIVariable &DV, - const MachineInstr *DeclareMI) { - DD->RecordVariableScope(DV, DeclareMI); +void DwarfWriter::SetDbgScopeBeginLabels(const MachineInstr *MI, unsigned L) { + DD->SetDbgScopeEndLabels(MI, L); +} +void DwarfWriter::SetDbgScopeEndLabels(const MachineInstr *MI, unsigned L) { + DD->SetDbgScopeBeginLabels(MI, L); } diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp index 8ba903a..06b92b7 100644 --- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp @@ -15,12 +15,14 @@ #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/GCMetadataPrinter.h" #include "llvm/Module.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" - +#include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; namespace { @@ -28,10 +30,10 @@ namespace { class VISIBILITY_HIDDEN OcamlGCMetadataPrinter : public GCMetadataPrinter { public: void beginAssembly(raw_ostream &OS, AsmPrinter &AP, - const TargetAsmInfo &TAI); + const MCAsmInfo &MAI); void finishAssembly(raw_ostream &OS, AsmPrinter &AP, - const TargetAsmInfo &TAI); + const MCAsmInfo &MAI); }; } @@ -42,11 +44,11 @@ Y("ocaml", "ocaml 3.10-compatible collector"); void llvm::linkOcamlGCPrinter() { } static void EmitCamlGlobal(const Module &M, raw_ostream &OS, AsmPrinter &AP, - const TargetAsmInfo &TAI, const char *Id) { + const MCAsmInfo &MAI, const char *Id) { const std::string &MId = M.getModuleIdentifier(); std::string Mangled; - Mangled += TAI.getGlobalPrefix(); + Mangled += MAI.getGlobalPrefix(); Mangled += "caml"; size_t Letter = Mangled.size(); Mangled.append(MId.begin(), std::find(MId.begin(), MId.end(), '.')); @@ -56,18 +58,18 @@ static void EmitCamlGlobal(const Module &M, raw_ostream &OS, AsmPrinter &AP, // Capitalize the first letter of the module name. Mangled[Letter] = toupper(Mangled[Letter]); - if (const char *GlobalDirective = TAI.getGlobalDirective()) + if (const char *GlobalDirective = MAI.getGlobalDirective()) OS << GlobalDirective << Mangled << "\n"; OS << Mangled << ":\n"; } void OcamlGCMetadataPrinter::beginAssembly(raw_ostream &OS, AsmPrinter &AP, - const TargetAsmInfo &TAI) { - AP.SwitchToSection(TAI.getTextSection()); - EmitCamlGlobal(getModule(), OS, AP, TAI, "code_begin"); + const MCAsmInfo &MAI) { + AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getTextSection()); + EmitCamlGlobal(getModule(), OS, AP, MAI, "code_begin"); - AP.SwitchToSection(TAI.getDataSection()); - EmitCamlGlobal(getModule(), OS, AP, TAI, "data_begin"); + AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection()); + EmitCamlGlobal(getModule(), OS, AP, MAI, "data_begin"); } /// emitAssembly - Print the frametable. The ocaml frametable format is thus: @@ -87,55 +89,59 @@ void OcamlGCMetadataPrinter::beginAssembly(raw_ostream &OS, AsmPrinter &AP, /// either condition is detected in a function which uses the GC. /// void OcamlGCMetadataPrinter::finishAssembly(raw_ostream &OS, AsmPrinter &AP, - const TargetAsmInfo &TAI) { + const MCAsmInfo &MAI) { const char *AddressDirective; int AddressAlignLog; if (AP.TM.getTargetData()->getPointerSize() == sizeof(int32_t)) { - AddressDirective = TAI.getData32bitsDirective(); + AddressDirective = MAI.getData32bitsDirective(); AddressAlignLog = 2; } else { - AddressDirective = TAI.getData64bitsDirective(); + AddressDirective = MAI.getData64bitsDirective(); AddressAlignLog = 3; } - AP.SwitchToSection(TAI.getTextSection()); - EmitCamlGlobal(getModule(), OS, AP, TAI, "code_end"); + AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getTextSection()); + EmitCamlGlobal(getModule(), OS, AP, MAI, "code_end"); - AP.SwitchToSection(TAI.getDataSection()); - EmitCamlGlobal(getModule(), OS, AP, TAI, "data_end"); + AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection()); + EmitCamlGlobal(getModule(), OS, AP, MAI, "data_end"); OS << AddressDirective << 0; // FIXME: Why does ocaml emit this?? AP.EOL(); - AP.SwitchToSection(TAI.getDataSection()); - EmitCamlGlobal(getModule(), OS, AP, TAI, "frametable"); + AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection()); + EmitCamlGlobal(getModule(), OS, AP, MAI, "frametable"); for (iterator I = begin(), IE = end(); I != IE; ++I) { GCFunctionInfo &FI = **I; uint64_t FrameSize = FI.getFrameSize(); if (FrameSize >= 1<<16) { - cerr << "Function '" << FI.getFunction().getNameStart() + std::string msg; + raw_string_ostream Msg(msg); + Msg << "Function '" << FI.getFunction().getName() << "' is too large for the ocaml GC! " << "Frame size " << FrameSize << " >= 65536.\n"; - cerr << "(" << uintptr_t(&FI) << ")\n"; - abort(); // Very rude! + Msg << "(" << uintptr_t(&FI) << ")"; + llvm_report_error(Msg.str()); // Very rude! } - OS << "\t" << TAI.getCommentString() << " live roots for " - << FI.getFunction().getNameStart() << "\n"; + OS << "\t" << MAI.getCommentString() << " live roots for " + << FI.getFunction().getName() << "\n"; for (GCFunctionInfo::iterator J = FI.begin(), JE = FI.end(); J != JE; ++J) { size_t LiveCount = FI.live_size(J); if (LiveCount >= 1<<16) { - cerr << "Function '" << FI.getFunction().getNameStart() + std::string msg; + raw_string_ostream Msg(msg); + Msg << "Function '" << FI.getFunction().getName() << "' is too large for the ocaml GC! " - << "Live root count " << LiveCount << " >= 65536.\n"; - abort(); // Very rude! + << "Live root count " << LiveCount << " >= 65536."; + llvm_report_error(Msg.str()); // Very rude! } OS << AddressDirective - << TAI.getPrivateGlobalPrefix() << "label" << J->Num; + << MAI.getPrivateGlobalPrefix() << "label" << J->Num; AP.EOL("call return address"); AP.EmitInt16(FrameSize); diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index 2635303..f9abeac 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -17,6 +17,7 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "branchfolding" +#include "BranchFolding.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -27,6 +28,8 @@ #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" @@ -44,70 +47,35 @@ TailMergeThreshold("tail-merge-threshold", cl::desc("Max number of predecessors to consider tail merging"), cl::init(150), cl::Hidden); -namespace { - struct VISIBILITY_HIDDEN BranchFolder : public MachineFunctionPass { - static char ID; - explicit BranchFolder(bool defaultEnableTailMerge) : - MachineFunctionPass(&ID) { - switch (FlagEnableTailMerge) { - case cl::BOU_UNSET: EnableTailMerge = defaultEnableTailMerge; break; - case cl::BOU_TRUE: EnableTailMerge = true; break; - case cl::BOU_FALSE: EnableTailMerge = false; break; - } - } - virtual bool runOnMachineFunction(MachineFunction &MF); - virtual const char *getPassName() const { return "Control Flow Optimizer"; } - const TargetInstrInfo *TII; - MachineModuleInfo *MMI; - bool MadeChange; - private: - // Tail Merging. - bool EnableTailMerge; - bool TailMergeBlocks(MachineFunction &MF); - bool TryMergeBlocks(MachineBasicBlock* SuccBB, - MachineBasicBlock* PredBB); - void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, - MachineBasicBlock *NewDest); - MachineBasicBlock *SplitMBBAt(MachineBasicBlock &CurMBB, - MachineBasicBlock::iterator BBI1); - unsigned ComputeSameTails(unsigned CurHash, unsigned minCommonTailLength); - void RemoveBlocksWithHash(unsigned CurHash, MachineBasicBlock* SuccBB, - MachineBasicBlock* PredBB); - unsigned CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, - unsigned maxCommonTailLength); - - typedef std::pair<unsigned,MachineBasicBlock*> MergePotentialsElt; - typedef std::vector<MergePotentialsElt>::iterator MPIterator; - std::vector<MergePotentialsElt> MergePotentials; - - typedef std::pair<MPIterator, MachineBasicBlock::iterator> SameTailElt; - std::vector<SameTailElt> SameTails; - - const TargetRegisterInfo *RegInfo; - RegScavenger *RS; - // Branch optzn. - bool OptimizeBranches(MachineFunction &MF); - void OptimizeBlock(MachineBasicBlock *MBB); - void RemoveDeadBlock(MachineBasicBlock *MBB); - bool OptimizeImpDefsBlock(MachineBasicBlock *MBB); - - bool CanFallThrough(MachineBasicBlock *CurBB); - bool CanFallThrough(MachineBasicBlock *CurBB, bool BranchUnAnalyzable, - MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond); - }; - char BranchFolder::ID = 0; -} +char BranchFolderPass::ID = 0; FunctionPass *llvm::createBranchFoldingPass(bool DefaultEnableTailMerge) { - return new BranchFolder(DefaultEnableTailMerge); } + return new BranchFolderPass(DefaultEnableTailMerge); +} + +bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) { + return OptimizeFunction(MF, + MF.getTarget().getInstrInfo(), + MF.getTarget().getRegisterInfo(), + getAnalysisIfAvailable<MachineModuleInfo>()); +} + + + +BranchFolder::BranchFolder(bool defaultEnableTailMerge) { + switch (FlagEnableTailMerge) { + case cl::BOU_UNSET: EnableTailMerge = defaultEnableTailMerge; break; + case cl::BOU_TRUE: EnableTailMerge = true; break; + case cl::BOU_FALSE: EnableTailMerge = false; break; + } +} /// RemoveDeadBlock - Remove the specified dead machine basic block from the /// function, updating the CFG. void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) { assert(MBB->pred_empty() && "MBB must be dead!"); - DOUT << "\nRemoving MBB: " << *MBB; + DEBUG(errs() << "\nRemoving MBB: " << *MBB); MachineFunction *MF = MBB->getParent(); // drop all successors. @@ -146,7 +114,7 @@ bool BranchFolder::OptimizeImpDefsBlock(MachineBasicBlock *MBB) { break; unsigned Reg = I->getOperand(0).getReg(); ImpDefRegs.insert(Reg); - for (const unsigned *SubRegs = RegInfo->getSubRegisters(Reg); + for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); unsigned SubReg = *SubRegs; ++SubRegs) ImpDefRegs.insert(SubReg); ++I; @@ -180,32 +148,37 @@ bool BranchFolder::OptimizeImpDefsBlock(MachineBasicBlock *MBB) { return true; } -bool BranchFolder::runOnMachineFunction(MachineFunction &MF) { - TII = MF.getTarget().getInstrInfo(); - if (!TII) return false; +/// OptimizeFunction - Perhaps branch folding, tail merging and other +/// CFG optimizations on the given function. +bool BranchFolder::OptimizeFunction(MachineFunction &MF, + const TargetInstrInfo *tii, + const TargetRegisterInfo *tri, + MachineModuleInfo *mmi) { + if (!tii) return false; + + TII = tii; + TRI = tri; + MMI = mmi; - RegInfo = MF.getTarget().getRegisterInfo(); + RS = TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : NULL; // Fix CFG. The later algorithms expect it to be right. - bool EverMadeChange = false; + bool MadeChange = false; for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; I++) { MachineBasicBlock *MBB = I, *TBB = 0, *FBB = 0; SmallVector<MachineOperand, 4> Cond; if (!TII->AnalyzeBranch(*MBB, TBB, FBB, Cond, true)) - EverMadeChange |= MBB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty()); - EverMadeChange |= OptimizeImpDefsBlock(MBB); + MadeChange |= MBB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty()); + MadeChange |= OptimizeImpDefsBlock(MBB); } - RS = RegInfo->requiresRegisterScavenging(MF) ? new RegScavenger() : NULL; - - MMI = getAnalysisIfAvailable<MachineModuleInfo>(); bool MadeChangeThisIteration = true; while (MadeChangeThisIteration) { MadeChangeThisIteration = false; MadeChangeThisIteration |= TailMergeBlocks(MF); MadeChangeThisIteration |= OptimizeBranches(MF); - EverMadeChange |= MadeChangeThisIteration; + MadeChange |= MadeChangeThisIteration; } // See if any jump tables have become mergable or dead as the code generator @@ -222,8 +195,12 @@ bool BranchFolder::runOnMachineFunction(MachineFunction &MF) { // Scan the jump tables, seeing if there are any duplicates. Note that this // is N^2, which should be fixed someday. - for (unsigned i = 1, e = JTs.size(); i != e; ++i) - JTMapping.push_back(JTI->getJumpTableIndex(JTs[i].MBBs)); + for (unsigned i = 1, e = JTs.size(); i != e; ++i) { + if (JTs[i].MBBs.empty()) + JTMapping.push_back(i); + else + JTMapping.push_back(JTI->getJumpTableIndex(JTs[i].MBBs)); + } // If a jump table was merge with another one, walk the function rewriting // references to jump tables to reference the new JT ID's. Keep track of @@ -250,12 +227,12 @@ bool BranchFolder::runOnMachineFunction(MachineFunction &MF) { for (unsigned i = 0, e = JTIsLive.size(); i != e; ++i) if (!JTIsLive.test(i)) { JTI->RemoveJumpTable(i); - EverMadeChange = true; + MadeChange = true; } } - + delete RS; - return EverMadeChange; + return MadeChange; } //===----------------------------------------------------------------------===// @@ -395,9 +372,9 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB, RS->enterBasicBlock(&CurMBB); if (!CurMBB.empty()) RS->forward(prior(CurMBB.end())); - BitVector RegsLiveAtExit(RegInfo->getNumRegs()); + BitVector RegsLiveAtExit(TRI->getNumRegs()); RS->getRegsUsed(RegsLiveAtExit, false); - for (unsigned int i=0, e=RegInfo->getNumRegs(); i!=e; i++) + for (unsigned int i=0, e=TRI->getNumRegs(); i!=e; i++) if (RegsLiveAtExit[i]) NewMBB->addLiveIn(i); } @@ -461,7 +438,7 @@ static bool MergeCompare(const std::pair<unsigned,MachineBasicBlock*> &p, // _GLIBCXX_DEBUG checks strict weak ordering, which involves comparing // an object with itself. #ifndef _GLIBCXX_DEBUG - assert(0 && "Predecessor appears twice"); + llvm_unreachable("Predecessor appears twice"); #endif return false; } @@ -567,8 +544,8 @@ unsigned BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, MachineBasicBlock::iterator BBI = SameTails[commonTailIndex].second; MachineBasicBlock *MBB = SameTails[commonTailIndex].first->second; - DOUT << "\nSplitting " << MBB->getNumber() << ", size " << - maxCommonTailLength; + DEBUG(errs() << "\nSplitting " << MBB->getNumber() << ", size " + << maxCommonTailLength); MachineBasicBlock *newMBB = SplitMBBAt(*MBB, BBI); SameTails[commonTailIndex].first->second = newMBB; @@ -590,13 +567,14 @@ unsigned BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, bool BranchFolder::TryMergeBlocks(MachineBasicBlock *SuccBB, MachineBasicBlock* PredBB) { + bool MadeChange = false; + // It doesn't make sense to save a single instruction since tail merging // will add a jump. // FIXME: Ask the target to provide the threshold? unsigned minCommonTailLength = (SuccBB ? 1 : 2) + 1; - MadeChange = false; - DOUT << "\nTryMergeBlocks " << MergePotentials.size() << '\n'; + DEBUG(errs() << "\nTryMergeBlocks " << MergePotentials.size() << '\n'); // Sort by hash value so that blocks with identical end sequences sort // together. @@ -643,17 +621,17 @@ bool BranchFolder::TryMergeBlocks(MachineBasicBlock *SuccBB, MachineBasicBlock *MBB = SameTails[commonTailIndex].first->second; // MBB is common tail. Adjust all other BB's to jump to this one. // Traversal must be forwards so erases work. - DOUT << "\nUsing common tail " << MBB->getNumber() << " for "; + DEBUG(errs() << "\nUsing common tail " << MBB->getNumber() << " for "); for (unsigned int i=0; i<SameTails.size(); ++i) { if (commonTailIndex==i) continue; - DOUT << SameTails[i].first->second->getNumber() << ","; + DEBUG(errs() << SameTails[i].first->second->getNumber() << ","); // Hack the end off BB i, making it jump to BB commonTailIndex instead. ReplaceTailWithBranchTo(SameTails[i].second, MBB); // BB i is no longer a predecessor of SuccBB; remove it from the worklist. MergePotentials.erase(SameTails[i].first); } - DOUT << "\n"; + DEBUG(errs() << "\n"); // We leave commonTailIndex in the worklist in case there are other blocks // that match it with a smaller number of instructions. MadeChange = true; @@ -665,7 +643,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { if (!EnableTailMerge) return false; - MadeChange = false; + bool MadeChange = false; // First find blocks with no successors. MergePotentials.clear(); @@ -699,6 +677,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { if (I->pred_size() >= 2 && I->pred_size() < TailMergeThreshold) { + SmallPtrSet<MachineBasicBlock *, 8> UniquePreds; MachineBasicBlock *IBB = I; MachineBasicBlock *PredBB = prior(I); MergePotentials.clear(); @@ -709,6 +688,9 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { // Skip blocks that loop to themselves, can't tail merge these. if (PBB==IBB) continue; + // Visit each predecessor only once. + if (!UniquePreds.insert(PBB)) + continue; MachineBasicBlock *TBB = 0, *FBB = 0; SmallVector<MachineOperand, 4> Cond; if (!TII->AnalyzeBranch(*PBB, TBB, FBB, Cond, true)) { @@ -772,14 +754,14 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { //===----------------------------------------------------------------------===// bool BranchFolder::OptimizeBranches(MachineFunction &MF) { - MadeChange = false; + bool MadeChange = false; // Make sure blocks are numbered in order MF.RenumberBlocks(); for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) { MachineBasicBlock *MBB = I++; - OptimizeBlock(MBB); + MadeChange |= OptimizeBlock(MBB); // If it is dead, remove it. if (MBB->pred_empty()) { @@ -873,7 +855,9 @@ static bool IsBetterFallthrough(MachineBasicBlock *MBB1, /// OptimizeBlock - Analyze and optimize control flow related to the specified /// block. This is never called on the entry block. -void BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { +bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { + bool MadeChange = false; + MachineFunction::iterator FallThrough = MBB; ++FallThrough; @@ -882,7 +866,7 @@ void BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { // points to this block. if (MBB->empty() && !MBB->isLandingPad()) { // Dead block? Leave for cleanup later. - if (MBB->pred_empty()) return; + if (MBB->pred_empty()) return MadeChange; if (FallThrough == MBB->getParent()->end()) { // TODO: Simplify preds to not branch here if possible! @@ -893,14 +877,13 @@ void BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { MachineBasicBlock *Pred = *(MBB->pred_end()-1); Pred->ReplaceUsesOfBlockWith(MBB, FallThrough); } - // If MBB was the target of a jump table, update jump tables to go to the // fallthrough instead. MBB->getParent()->getJumpTableInfo()-> ReplaceMBBInJumpTables(MBB, FallThrough); MadeChange = true; } - return; + return MadeChange; } // Check to see if we can simplify the terminator of the block before this @@ -1004,8 +987,8 @@ void BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { // Reverse the branch so we will fall through on the previous true cond. SmallVector<MachineOperand, 4> NewPriorCond(PriorCond); if (!TII->ReverseBranchCondition(NewPriorCond)) { - DOUT << "\nMoving MBB: " << *MBB; - DOUT << "To make fallthrough to: " << *PriorTBB << "\n"; + DEBUG(errs() << "\nMoving MBB: " << *MBB + << "To make fallthrough to: " << *PriorTBB << "\n"); TII->RemoveBranch(PrevBB); TII->InsertBranch(PrevBB, MBB, 0, NewPriorCond); @@ -1014,7 +997,7 @@ void BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { MBB->moveAfter(--MBB->getParent()->end()); MadeChange = true; ++NumBranchOpts; - return; + return MadeChange; } } } @@ -1116,7 +1099,7 @@ void BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { if (DidChange) { ++NumBranchOpts; MadeChange = true; - if (!HasBranchToSelf) return; + if (!HasBranchToSelf) return MadeChange; } } } @@ -1197,8 +1180,10 @@ void BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { PrevBB.isSuccessor(FallThrough)) { MBB->moveAfter(--MBB->getParent()->end()); MadeChange = true; - return; + return MadeChange; } } } + + return MadeChange; } diff --git a/lib/CodeGen/BranchFolding.h b/lib/CodeGen/BranchFolding.h new file mode 100644 index 0000000..9763e33 --- /dev/null +++ b/lib/CodeGen/BranchFolding.h @@ -0,0 +1,84 @@ +//===-- BranchFolding.h - Fold machine code branch instructions --*- C++ -*===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_BRANCHFOLDING_HPP +#define LLVM_CODEGEN_BRANCHFOLDING_HPP + +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include <vector> + +namespace llvm { + class MachineFunction; + class MachineModuleInfo; + class RegScavenger; + class TargetInstrInfo; + class TargetRegisterInfo; + + class BranchFolder { + public: + explicit BranchFolder(bool defaultEnableTailMerge); + + bool OptimizeFunction(MachineFunction &MF, + const TargetInstrInfo *tii, + const TargetRegisterInfo *tri, + MachineModuleInfo *mmi); + private: + typedef std::pair<unsigned,MachineBasicBlock*> MergePotentialsElt; + typedef std::vector<MergePotentialsElt>::iterator MPIterator; + std::vector<MergePotentialsElt> MergePotentials; + + typedef std::pair<MPIterator, MachineBasicBlock::iterator> SameTailElt; + std::vector<SameTailElt> SameTails; + + bool EnableTailMerge; + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + MachineModuleInfo *MMI; + RegScavenger *RS; + + bool TailMergeBlocks(MachineFunction &MF); + bool TryMergeBlocks(MachineBasicBlock* SuccBB, + MachineBasicBlock* PredBB); + void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, + MachineBasicBlock *NewDest); + MachineBasicBlock *SplitMBBAt(MachineBasicBlock &CurMBB, + MachineBasicBlock::iterator BBI1); + unsigned ComputeSameTails(unsigned CurHash, unsigned minCommonTailLength); + void RemoveBlocksWithHash(unsigned CurHash, MachineBasicBlock* SuccBB, + MachineBasicBlock* PredBB); + unsigned CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, + unsigned maxCommonTailLength); + + bool OptimizeBranches(MachineFunction &MF); + bool OptimizeBlock(MachineBasicBlock *MBB); + void RemoveDeadBlock(MachineBasicBlock *MBB); + bool OptimizeImpDefsBlock(MachineBasicBlock *MBB); + + bool CanFallThrough(MachineBasicBlock *CurBB); + bool CanFallThrough(MachineBasicBlock *CurBB, bool BranchUnAnalyzable, + MachineBasicBlock *TBB, MachineBasicBlock *FBB, + const SmallVectorImpl<MachineOperand> &Cond); + }; + + + /// BranchFolderPass - Wrap branch folder in a machine function pass. + class BranchFolderPass : public MachineFunctionPass, + public BranchFolder { + public: + static char ID; + explicit BranchFolderPass(bool defaultEnableTailMerge) + : MachineFunctionPass(&ID), BranchFolder(defaultEnableTailMerge) {} + + virtual bool runOnMachineFunction(MachineFunction &MF); + virtual const char *getPassName() const { return "Control Flow Optimizer"; } + }; +} + +#endif /* LLVM_CODEGEN_BRANCHFOLDING_HPP */ diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index 48f17d0..713c30c 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -5,6 +5,7 @@ add_llvm_library(LLVMCodeGen DwarfEHPrepare.cpp ELFCodeEmitter.cpp ELFWriter.cpp + ExactHazardRecognizer.cpp GCMetadata.cpp GCMetadataPrinter.cpp GCStrategy.cpp @@ -12,7 +13,6 @@ add_llvm_library(LLVMCodeGen IntrinsicLowering.cpp LLVMTargetMachine.cpp LatencyPriorityQueue.cpp - LazyLiveness.cpp LiveInterval.cpp LiveIntervalAnalysis.cpp LiveStackAnalysis.cpp @@ -23,27 +23,28 @@ add_llvm_library(LLVMCodeGen MachineBasicBlock.cpp MachineDominators.cpp MachineFunction.cpp + MachineFunctionAnalysis.cpp + MachineFunctionPass.cpp MachineInstr.cpp MachineLICM.cpp MachineLoopInfo.cpp MachineModuleInfo.cpp + MachineModuleInfoImpls.cpp MachinePassRegistry.cpp MachineRegisterInfo.cpp MachineSink.cpp MachineVerifier.cpp + ObjectCodeEmitter.cpp OcamlGC.cpp - PBQP.cpp PHIElimination.cpp Passes.cpp PostRASchedulerList.cpp PreAllocSplitting.cpp PrologEpilogInserter.cpp PseudoSourceValue.cpp - RegAllocBigBlock.cpp RegAllocLinearScan.cpp RegAllocLocal.cpp RegAllocPBQP.cpp - RegAllocSimple.cpp RegisterCoalescer.cpp RegisterScavenging.cpp ScheduleDAG.cpp @@ -53,6 +54,7 @@ add_llvm_library(LLVMCodeGen ShadowStackGC.cpp ShrinkWrapping.cpp SimpleRegisterCoalescing.cpp + SjLjEHPrepare.cpp Spiller.cpp StackProtector.cpp StackSlotColoring.cpp diff --git a/lib/CodeGen/CodePlacementOpt.cpp b/lib/CodeGen/CodePlacementOpt.cpp index 383098e..932fae4 100644 --- a/lib/CodeGen/CodePlacementOpt.cpp +++ b/lib/CodeGen/CodePlacementOpt.cpp @@ -95,11 +95,11 @@ FunctionPass *llvm::createCodePlacementOptPass() { /// ... /// jmp B /// -/// C: --> new loop header +/// C: /// ... /// <fallthough to B> /// -/// B: +/// B: --> loop header /// ... /// jcc <cond> C, [exit] /// diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp index 4832a5e..078ed3d 100644 --- a/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -17,6 +17,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" using namespace llvm; @@ -36,7 +37,7 @@ namespace { DeadMachineInstructionElim() : MachineFunctionPass(&ID) {} private: - bool isDead(MachineInstr *MI) const; + bool isDead(const MachineInstr *MI) const; }; } char DeadMachineInstructionElim::ID = 0; @@ -49,10 +50,10 @@ FunctionPass *llvm::createDeadMachineInstructionElimPass() { return new DeadMachineInstructionElim(); } -bool DeadMachineInstructionElim::isDead(MachineInstr *MI) const { +bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const { // Don't delete instructions with side effects. bool SawStore = false; - if (!MI->isSafeToMove(TII, SawStore)) + if (!MI->isSafeToMove(TII, SawStore, 0)) return false; // Examine each operand. @@ -110,7 +111,7 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { // If the instruction is dead, delete it! if (isDead(MI)) { - DOUT << "DeadMachineInstructionElim: DELETING: " << *MI; + DEBUG(errs() << "DeadMachineInstructionElim: DELETING: " << *MI); AnyChanges = true; MI->eraseFromParent(); MIE = MBB->rend(); diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp index 720e3d1..72b3f92 100644 --- a/lib/CodeGen/DwarfEHPrepare.cpp +++ b/lib/CodeGen/DwarfEHPrepare.cpp @@ -107,7 +107,9 @@ FunctionPass *llvm::createDwarfEHPass(const TargetLowering *tli, bool fast) { /// NormalizeLandingPads - Normalize and discover landing pads, noting them /// in the LandingPads set. A landing pad is normal if the only CFG edges -/// that end at it are unwind edges from invoke instructions. +/// that end at it are unwind edges from invoke instructions. If we inlined +/// through an invoke we could have a normal branch from the previous +/// unwind block through to the landing pad for the original invoke. /// Abnormal landing pads are fixed up by redirecting all unwind edges to /// a new basic block which falls through to the original. bool DwarfEHPrepare::NormalizeLandingPads() { @@ -132,6 +134,7 @@ bool DwarfEHPrepare::NormalizeLandingPads() { break; } } + if (OnlyUnwoundTo) { // Only unwind edges lead to the landing pad. Remember the landing pad. LandingPads.insert(LPad); @@ -142,7 +145,8 @@ bool DwarfEHPrepare::NormalizeLandingPads() { // edges to a new basic block which falls through into this one. // Create the new basic block. - BasicBlock *NewBB = BasicBlock::Create(LPad->getName() + "_unwind_edge"); + BasicBlock *NewBB = BasicBlock::Create(F->getContext(), + LPad->getName() + "_unwind_edge"); // Insert it into the function right before the original landing pad. LPad->getParent()->getBasicBlockList().insert(LPad, NewBB); @@ -218,28 +222,43 @@ bool DwarfEHPrepare::NormalizeLandingPads() { /// at runtime if there is no such exception: using unwind to throw a new /// exception is currently not supported. bool DwarfEHPrepare::LowerUnwinds() { - bool Changed = false; + SmallVector<TerminatorInst*, 16> UnwindInsts; for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) { TerminatorInst *TI = I->getTerminator(); - if (!isa<UnwindInst>(TI)) - continue; + if (isa<UnwindInst>(TI)) + UnwindInsts.push_back(TI); + } + + if (UnwindInsts.empty()) return false; + + // Find the rewind function if we didn't already. + if (!RewindFunction) { + LLVMContext &Ctx = UnwindInsts[0]->getContext(); + std::vector<const Type*> + Params(1, Type::getInt8PtrTy(Ctx)); + FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx), + Params, false); + const char *RewindName = TLI->getLibcallName(RTLIB::UNWIND_RESUME); + RewindFunction = F->getParent()->getOrInsertFunction(RewindName, FTy); + } + + bool Changed = false; + + for (SmallVectorImpl<TerminatorInst*>::iterator + I = UnwindInsts.begin(), E = UnwindInsts.end(); I != E; ++I) { + TerminatorInst *TI = *I; // Replace the unwind instruction with a call to _Unwind_Resume (or the // appropriate target equivalent) followed by an UnreachableInst. - // Find the rewind function if we didn't already. - if (!RewindFunction) { - std::vector<const Type*> Params(1, PointerType::getUnqual(Type::Int8Ty)); - FunctionType *FTy = FunctionType::get(Type::VoidTy, Params, false); - const char *RewindName = TLI->getLibcallName(RTLIB::UNWIND_RESUME); - RewindFunction = F->getParent()->getOrInsertFunction(RewindName, FTy); - } - // Create the call... - CallInst::Create(RewindFunction, CreateReadOfExceptionValue(I), "", TI); + CallInst *CI = CallInst::Create(RewindFunction, + CreateReadOfExceptionValue(TI->getParent()), + "", TI); + CI->setCallingConv(TLI->getLibcallCallingConv(RTLIB::UNWIND_RESUME)); // ...followed by an UnreachableInst. - new UnreachableInst(TI); + new UnreachableInst(TI->getContext(), TI); // Nuke the unwind instruction. TI->eraseFromParent(); @@ -314,7 +333,7 @@ bool DwarfEHPrepare::PromoteStackTemporaries() { if (ExceptionValueVar && DT && DF && isAllocaPromotable(ExceptionValueVar)) { // Turn the exception temporary into registers and phi nodes if possible. std::vector<AllocaInst*> Allocas(1, ExceptionValueVar); - PromoteMemToReg(Allocas, *DT, *DF); + PromoteMemToReg(Allocas, *DT, *DF, ExceptionValueVar->getContext()); return true; } return false; @@ -354,8 +373,8 @@ Instruction *DwarfEHPrepare::CreateValueLoad(BasicBlock *BB) { // Create the temporary if we didn't already. if (!ExceptionValueVar) { - ExceptionValueVar = new AllocaInst(PointerType::getUnqual(Type::Int8Ty), - "eh.value", F->begin()->begin()); + ExceptionValueVar = new AllocaInst(PointerType::getUnqual( + Type::getInt8Ty(BB->getContext())), "eh.value", F->begin()->begin()); ++NumStackTempsIntroduced; } diff --git a/lib/CodeGen/ELF.h b/lib/CodeGen/ELF.h index 7e983a4..b466e89 100644 --- a/lib/CodeGen/ELF.h +++ b/lib/CodeGen/ELF.h @@ -52,6 +52,159 @@ namespace llvm { EV_CURRENT = 1 }; + /// ELFSym - This struct contains information about each symbol that is + /// added to logical symbol table for the module. This is eventually + /// turned into a real symbol table in the file. + struct ELFSym { + + // ELF symbols are related to llvm ones by being one of the two llvm + // types, for the other ones (section, file, func) a null pointer is + // assumed by default. + union { + const GlobalValue *GV; // If this is a pointer to a GV + const char *Ext; // If this is a pointer to a named symbol + } Source; + + // Describes from which source type this ELF symbol comes from, + // they can be GlobalValue, ExternalSymbol or neither. + enum { + isGV, // The Source.GV field is valid. + isExtSym, // The Source.ExtSym field is valid. + isOther // Not a GlobalValue or External Symbol + }; + unsigned SourceType; + + bool isGlobalValue() const { return SourceType == isGV; } + bool isExternalSym() const { return SourceType == isExtSym; } + + // getGlobalValue - If this is a global value which originated the + // elf symbol, return a reference to it. + const GlobalValue *getGlobalValue() const { + assert(SourceType == isGV && "This is not a global value"); + return Source.GV; + }; + + // getExternalSym - If this is an external symbol which originated the + // elf symbol, return a reference to it. + const char *getExternalSymbol() const { + assert(SourceType == isExtSym && "This is not an external symbol"); + return Source.Ext; + }; + + // getGV - From a global value return a elf symbol to represent it + static ELFSym *getGV(const GlobalValue *GV, unsigned Bind, + unsigned Type, unsigned Visibility) { + ELFSym *Sym = new ELFSym(); + Sym->Source.GV = GV; + Sym->setBind(Bind); + Sym->setType(Type); + Sym->setVisibility(Visibility); + Sym->SourceType = isGV; + return Sym; + } + + // getExtSym - Create and return an elf symbol to represent an + // external symbol + static ELFSym *getExtSym(const char *Ext) { + ELFSym *Sym = new ELFSym(); + Sym->Source.Ext = Ext; + Sym->setBind(STB_GLOBAL); + Sym->setType(STT_NOTYPE); + Sym->setVisibility(STV_DEFAULT); + Sym->SourceType = isExtSym; + return Sym; + } + + // getSectionSym - Returns a elf symbol to represent an elf section + static ELFSym *getSectionSym() { + ELFSym *Sym = new ELFSym(); + Sym->setBind(STB_LOCAL); + Sym->setType(STT_SECTION); + Sym->setVisibility(STV_DEFAULT); + Sym->SourceType = isOther; + return Sym; + } + + // getFileSym - Returns a elf symbol to represent the module identifier + static ELFSym *getFileSym() { + ELFSym *Sym = new ELFSym(); + Sym->setBind(STB_LOCAL); + Sym->setType(STT_FILE); + Sym->setVisibility(STV_DEFAULT); + Sym->SectionIdx = 0xfff1; // ELFSection::SHN_ABS; + Sym->SourceType = isOther; + return Sym; + } + + // getUndefGV - Returns a STT_NOTYPE symbol + static ELFSym *getUndefGV(const GlobalValue *GV, unsigned Bind) { + ELFSym *Sym = new ELFSym(); + Sym->Source.GV = GV; + Sym->setBind(Bind); + Sym->setType(STT_NOTYPE); + Sym->setVisibility(STV_DEFAULT); + Sym->SectionIdx = 0; //ELFSection::SHN_UNDEF; + Sym->SourceType = isGV; + return Sym; + } + + // ELF specific fields + unsigned NameIdx; // Index in .strtab of name, once emitted. + uint64_t Value; + unsigned Size; + uint8_t Info; + uint8_t Other; + unsigned short SectionIdx; + + // Symbol index into the Symbol table + unsigned SymTabIdx; + + enum { + STB_LOCAL = 0, // Local sym, not visible outside obj file containing def + STB_GLOBAL = 1, // Global sym, visible to all object files being combined + STB_WEAK = 2 // Weak symbol, like global but lower-precedence + }; + + enum { + STT_NOTYPE = 0, // Symbol's type is not specified + STT_OBJECT = 1, // Symbol is a data object (variable, array, etc.) + STT_FUNC = 2, // Symbol is executable code (function, etc.) + STT_SECTION = 3, // Symbol refers to a section + STT_FILE = 4 // Local, absolute symbol that refers to a file + }; + + enum { + STV_DEFAULT = 0, // Visibility is specified by binding type + STV_INTERNAL = 1, // Defined by processor supplements + STV_HIDDEN = 2, // Not visible to other components + STV_PROTECTED = 3 // Visible in other components but not preemptable + }; + + ELFSym() : SourceType(isOther), NameIdx(0), Value(0), + Size(0), Info(0), Other(STV_DEFAULT), SectionIdx(0), + SymTabIdx(0) {} + + unsigned getBind() const { return (Info >> 4) & 0xf; } + unsigned getType() const { return Info & 0xf; } + bool isLocalBind() const { return getBind() == STB_LOCAL; } + bool isFileType() const { return getType() == STT_FILE; } + + void setBind(unsigned X) { + assert(X == (X & 0xF) && "Bind value out of range!"); + Info = (Info & 0x0F) | (X << 4); + } + + void setType(unsigned X) { + assert(X == (X & 0xF) && "Type value out of range!"); + Info = (Info & 0xF0) | X; + } + + void setVisibility(unsigned V) { + assert(V == (V & 0x3) && "Visibility value out of range!"); + Other = V; + } + }; + /// ELFSection - This struct contains information about each section that is /// emitted to the file. This is eventually turned into the section header /// table at the end of the file. @@ -117,78 +270,19 @@ namespace llvm { /// SectionIdx - The number of the section in the Section Table. unsigned short SectionIdx; - ELFSection(const std::string &name, bool isLittleEndian, bool is64Bit) - : BinaryObject(name, isLittleEndian, is64Bit), Type(0), Flags(0), Addr(0), - Offset(0), Size(0), Link(0), Info(0), Align(0), EntSize(0) {} - }; - - /// ELFSym - This struct contains information about each symbol that is - /// added to logical symbol table for the module. This is eventually - /// turned into a real symbol table in the file. - struct ELFSym { - // The global value this corresponds to. Global symbols can be on of the - // 3 types : if this symbol has a zero initializer, it is common or should - // be placed in bss section otherwise it's a constant. - const GlobalValue *GV; - bool IsCommon; - bool IsBss; - bool IsConstant; - - // ELF specific fields - unsigned NameIdx; // Index in .strtab of name, once emitted. - uint64_t Value; - unsigned Size; - uint8_t Info; - uint8_t Other; - unsigned short SectionIdx; + /// Sym - The symbol to represent this section if it has one. + ELFSym *Sym; - // Symbol index into the Symbol table - unsigned SymTabIdx; - - enum { - STB_LOCAL = 0, - STB_GLOBAL = 1, - STB_WEAK = 2 - }; - - enum { - STT_NOTYPE = 0, - STT_OBJECT = 1, - STT_FUNC = 2, - STT_SECTION = 3, - STT_FILE = 4 - }; - - enum { - STV_DEFAULT = 0, // Visibility is specified by binding type - STV_INTERNAL = 1, // Defined by processor supplements - STV_HIDDEN = 2, // Not visible to other components - STV_PROTECTED = 3 // Visible in other components but not preemptable - }; - - ELFSym(const GlobalValue *gv) : GV(gv), IsCommon(false), IsBss(false), - IsConstant(false), NameIdx(0), Value(0), - Size(0), Info(0), Other(STV_DEFAULT), - SectionIdx(ELFSection::SHN_UNDEF), - SymTabIdx(0) {} - - unsigned getBind() { return (Info >> 4) & 0xf; } - unsigned getType() { return Info & 0xf; } - - void setBind(unsigned X) { - assert(X == (X & 0xF) && "Bind value out of range!"); - Info = (Info & 0x0F) | (X << 4); + /// getSymIndex - Returns the symbol table index of the symbol + /// representing this section. + unsigned getSymbolTableIndex() const { + assert(Sym && "section not present in the symbol table"); + return Sym->SymTabIdx; } - void setType(unsigned X) { - assert(X == (X & 0xF) && "Type value out of range!"); - Info = (Info & 0xF0) | X; - } - - void setVisibility(unsigned V) { - assert(V == (V & 0x3) && "Type value out of range!"); - Other = V; - } + ELFSection(const std::string &name, bool isLittleEndian, bool is64Bit) + : BinaryObject(name, isLittleEndian, is64Bit), Type(0), Flags(0), Addr(0), + Offset(0), Size(0), Link(0), Info(0), Align(0), EntSize(0), Sym(0) {} }; /// ELFRelocation - This class contains all the information necessary to diff --git a/lib/CodeGen/ELFCodeEmitter.cpp b/lib/CodeGen/ELFCodeEmitter.cpp index 691f194..a6429f7 100644 --- a/lib/CodeGen/ELFCodeEmitter.cpp +++ b/lib/CodeGen/ELFCodeEmitter.cpp @@ -17,12 +17,16 @@ #include "llvm/Function.h" #include "llvm/CodeGen/BinaryObject.h" #include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineRelocation.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetELFWriterInfo.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" //===----------------------------------------------------------------------===// // ELFCodeEmitter Implementation @@ -33,84 +37,75 @@ namespace llvm { /// startFunction - This callback is invoked when a new machine function is /// about to be emitted. void ELFCodeEmitter::startFunction(MachineFunction &MF) { - // Get the ELF Section that this function belongs in. - ES = &EW.getTextSection(); + DEBUG(errs() << "processing function: " + << MF.getFunction()->getName() << "\n"); - DOUT << "processing function: " << MF.getFunction()->getName() << "\n"; + // Get the ELF Section that this function belongs in. + ES = &EW.getTextSection(MF.getFunction()); - // FIXME: better memory management, this will be replaced by BinaryObjects - BinaryData &BD = ES->getData(); - BD.reserve(4096); - BufferBegin = &BD[0]; - BufferEnd = BufferBegin + BD.capacity(); + // Set the desired binary object to be used by the code emitters + setBinaryObject(ES); // Get the function alignment in bytes unsigned Align = (1 << MF.getAlignment()); - // Align the section size with the function alignment, so the function can - // start in a aligned offset, also update the section alignment if needed. - if (ES->Align < Align) ES->Align = Align; - ES->Size = (ES->Size + (Align-1)) & (-Align); - - // Snaity check on allocated space for text section - assert( ES->Size < 4096 && "no more space in TextSection" ); - - // FIXME: Using ES->Size directly here instead of calculating it from the - // output buffer size (impossible because the code emitter deals only in raw - // bytes) forces us to manually synchronize size and write padding zero bytes - // to the output buffer for all non-text sections. For text sections, we do - // not synchonize the output buffer, and we just blow up if anyone tries to - // write non-code to it. An assert should probably be added to - // AddSymbolToSection to prevent calling it on the text section. - CurBufferPtr = BufferBegin + ES->Size; - - // Record function start address relative to BufferBegin - FnStartPtr = CurBufferPtr; + // The function must start on its required alignment + ES->emitAlignment(Align); + + // Update the section alignment if needed. + ES->Align = std::max(ES->Align, Align); + + // Record the function start offset + FnStartOff = ES->getCurrentPCOffset(); + + // Emit constant pool and jump tables to their appropriate sections. + // They need to be emitted before the function because in some targets + // the later may reference JT or CP entry address. + emitConstantPool(MF.getConstantPool()); + emitJumpTables(MF.getJumpTableInfo()); } /// finishFunction - This callback is invoked after the function is completely /// finished. bool ELFCodeEmitter::finishFunction(MachineFunction &MF) { - // Update Section Size - ES->Size = CurBufferPtr - BufferBegin; - // Add a symbol to represent the function. const Function *F = MF.getFunction(); - ELFSym FnSym(F); - FnSym.setType(ELFSym::STT_FUNC); - FnSym.setBind(EW.getGlobalELFLinkage(F)); - FnSym.setVisibility(EW.getGlobalELFVisibility(F)); - FnSym.SectionIdx = ES->SectionIdx; - FnSym.Size = CurBufferPtr-FnStartPtr; + ELFSym *FnSym = ELFSym::getGV(F, EW.getGlobalELFBinding(F), ELFSym::STT_FUNC, + EW.getGlobalELFVisibility(F)); + FnSym->SectionIdx = ES->SectionIdx; + FnSym->Size = ES->getCurrentPCOffset()-FnStartOff; + EW.AddPendingGlobalSymbol(F, true); // Offset from start of Section - FnSym.Value = FnStartPtr-BufferBegin; - - // Locals should go on the symbol list front - if (!F->hasPrivateLinkage()) { - if (FnSym.getBind() == ELFSym::STB_LOCAL) - EW.SymbolList.push_front(FnSym); - else - EW.SymbolList.push_back(FnSym); + FnSym->Value = FnStartOff; + + if (!F->hasPrivateLinkage()) + EW.SymbolList.push_back(FnSym); + + // Patch up Jump Table Section relocations to use the real MBBs offsets + // now that the MBB label offsets inside the function are known. + if (!MF.getJumpTableInfo()->isEmpty()) { + ELFSection &JTSection = EW.getJumpTableSection(); + for (std::vector<MachineRelocation>::iterator MRI = JTRelocations.begin(), + MRE = JTRelocations.end(); MRI != MRE; ++MRI) { + MachineRelocation &MR = *MRI; + unsigned MBBOffset = getMachineBasicBlockAddress(MR.getBasicBlock()); + MR.setResultPointer((void*)MBBOffset); + MR.setConstantVal(ES->SectionIdx); + JTSection.addRelocation(MR); + } } - // Emit constant pool to appropriate section(s) - emitConstantPool(MF.getConstantPool()); - - // Emit jump tables to appropriate section - emitJumpTables(MF.getJumpTableInfo()); - - // Relocations - // ----------- // If we have emitted any relocations to function-specific objects such as // basic blocks, constant pools entries, or jump tables, record their - // addresses now so that we can rewrite them with the correct addresses - // later. + // addresses now so that we can rewrite them with the correct addresses later for (unsigned i = 0, e = Relocations.size(); i != e; ++i) { MachineRelocation &MR = Relocations[i]; intptr_t Addr; if (MR.isGlobalValue()) { - EW.PendingGlobals.insert(MR.getGlobalValue()); + EW.AddPendingGlobalSymbol(MR.getGlobalValue()); + } else if (MR.isExternalSymbol()) { + EW.AddPendingExternalSymbol(MR.getExternalSymbol()); } else if (MR.isBasicBlock()) { Addr = getMachineBasicBlockAddress(MR.getBasicBlock()); MR.setConstantVal(ES->SectionIdx); @@ -120,16 +115,18 @@ bool ELFCodeEmitter::finishFunction(MachineFunction &MF) { MR.setConstantVal(CPSections[MR.getConstantPoolIndex()]); MR.setResultPointer((void*)Addr); } else if (MR.isJumpTableIndex()) { + ELFSection &JTSection = EW.getJumpTableSection(); Addr = getJumpTableEntryAddress(MR.getJumpTableIndex()); + MR.setConstantVal(JTSection.SectionIdx); MR.setResultPointer((void*)Addr); - MR.setConstantVal(JumpTableSectionIdx); } else { - assert(0 && "Unhandled relocation type"); + llvm_unreachable("Unhandled relocation type"); } ES->addRelocation(MR); } // Clear per-function data structures. + JTRelocations.clear(); Relocations.clear(); CPLocations.clear(); CPSections.clear(); @@ -148,25 +145,19 @@ void ELFCodeEmitter::emitConstantPool(MachineConstantPool *MCP) { assert(TM.getRelocationModel() != Reloc::PIC_ && "PIC codegen not yet handled for elf constant pools!"); - const TargetAsmInfo *TAI = TM.getTargetAsmInfo(); for (unsigned i = 0, e = CP.size(); i != e; ++i) { MachineConstantPoolEntry CPE = CP[i]; - // Get the right ELF Section for this constant pool entry - std::string CstPoolName = - TAI->SelectSectionForMachineConst(CPE.getType())->getName(); - ELFSection &CstPoolSection = - EW.getConstantPoolSection(CstPoolName, CPE.getAlignment()); - // Record the constant pool location and the section index - CPLocations.push_back(CstPoolSection.size()); - CPSections.push_back(CstPoolSection.SectionIdx); + ELFSection &CstPool = EW.getConstantPoolSection(CPE); + CPLocations.push_back(CstPool.size()); + CPSections.push_back(CstPool.SectionIdx); if (CPE.isMachineConstantPoolEntry()) assert("CPE.isMachineConstantPoolEntry not supported yet"); // Emit the constant to constant pool section - EW.EmitGlobalConstant(CPE.Val.ConstVal, CstPoolSection); + EW.EmitGlobalConstant(CPE.Val.ConstVal, CstPool); } } @@ -180,44 +171,32 @@ void ELFCodeEmitter::emitJumpTables(MachineJumpTableInfo *MJTI) { assert(TM.getRelocationModel() != Reloc::PIC_ && "PIC codegen not yet handled for elf jump tables!"); - const TargetAsmInfo *TAI = TM.getTargetAsmInfo(); + const TargetELFWriterInfo *TEW = TM.getELFWriterInfo(); + unsigned EntrySize = MJTI->getEntrySize(); // Get the ELF Section to emit the jump table - unsigned Align = TM.getTargetData()->getPointerABIAlignment(); - std::string JTName(TAI->getJumpTableDataSection()); - ELFSection &JTSection = EW.getJumpTableSection(JTName, Align); - JumpTableSectionIdx = JTSection.SectionIdx; - - // Entries in the JT Section are relocated against the text section - ELFSection &TextSection = EW.getTextSection(); + ELFSection &JTSection = EW.getJumpTableSection(); // For each JT, record its offset from the start of the section for (unsigned i = 0, e = JT.size(); i != e; ++i) { const std::vector<MachineBasicBlock*> &MBBs = JT[i].MBBs; - DOUT << "JTSection.size(): " << JTSection.size() << "\n"; - DOUT << "JTLocations.size: " << JTLocations.size() << "\n"; - // Record JT 'i' offset in the JT section JTLocations.push_back(JTSection.size()); // Each MBB entry in the Jump table section has a relocation entry // against the current text section. for (unsigned mi = 0, me = MBBs.size(); mi != me; ++mi) { + unsigned MachineRelTy = TEW->getAbsoluteLabelMachineRelTy(); MachineRelocation MR = - MachineRelocation::getBB(JTSection.size(), - MachineRelocation::VANILLA, - MBBs[mi]); - - // Offset of JT 'i' in JT section - MR.setResultPointer((void*)getMachineBasicBlockAddress(MBBs[mi])); - MR.setConstantVal(TextSection.SectionIdx); + MachineRelocation::getBB(JTSection.size(), MachineRelTy, MBBs[mi]); // Add the relocation to the Jump Table section - JTSection.addRelocation(MR); + JTRelocations.push_back(MR); // Output placeholder for MBB in the JT section - JTSection.emitWord(0); + for (unsigned s=0; s < EntrySize; ++s) + JTSection.emitByte(0); } } } diff --git a/lib/CodeGen/ELFCodeEmitter.h b/lib/CodeGen/ELFCodeEmitter.h index 982aebf..b5e9c84 100644 --- a/lib/CodeGen/ELFCodeEmitter.h +++ b/lib/CodeGen/ELFCodeEmitter.h @@ -10,7 +10,7 @@ #ifndef ELFCODEEMITTER_H #define ELFCODEEMITTER_H -#include "llvm/CodeGen/MachineCodeEmitter.h" +#include "llvm/CodeGen/ObjectCodeEmitter.h" #include <vector> namespace llvm { @@ -19,7 +19,7 @@ namespace llvm { /// ELFCodeEmitter - This class is used by the ELFWriter to /// emit the code for functions to the ELF file. - class ELFCodeEmitter : public MachineCodeEmitter { + class ELFCodeEmitter : public ObjectCodeEmitter { ELFWriter &EW; /// Target machine description @@ -28,102 +28,48 @@ namespace llvm { /// Section containing code for functions ELFSection *ES; - /// Relocations - These are the relocations that the function needs, as - /// emitted. + /// Relocations - Record relocations needed by the current function std::vector<MachineRelocation> Relocations; - /// CPLocations - This is a map of constant pool indices to offsets from the - /// start of the section for that constant pool index. - std::vector<uintptr_t> CPLocations; + /// JTRelocations - Record relocations needed by the relocation + /// section. + std::vector<MachineRelocation> JTRelocations; - /// CPSections - This is a map of constant pool indices to the MachOSection - /// containing the constant pool entry for that index. - std::vector<unsigned> CPSections; - - /// JTLocations - This is a map of jump table indices to offsets from the - /// start of the section for that jump table index. - std::vector<uintptr_t> JTLocations; - - /// MBBLocations - This vector is a mapping from MBB ID's to their address. - /// It is filled in by the StartMachineBasicBlock callback and queried by - /// the getMachineBasicBlockAddress callback. - std::vector<uintptr_t> MBBLocations; - - /// FnStartPtr - Pointer to the start location of the current function - /// in the buffer - uint8_t *FnStartPtr; - - /// JumpTableSectionIdx - Holds the index of the Jump Table Section - unsigned JumpTableSectionIdx; + /// FnStartPtr - Function offset from the beginning of ELFSection 'ES' + uintptr_t FnStartOff; public: - explicit ELFCodeEmitter(ELFWriter &ew) : EW(ew), TM(EW.TM), - JumpTableSectionIdx(0) {} - - void startFunction(MachineFunction &F); - bool finishFunction(MachineFunction &F); + explicit ELFCodeEmitter(ELFWriter &ew) : EW(ew), TM(EW.TM) {} + /// addRelocation - Register new relocations for this function void addRelocation(const MachineRelocation &MR) { Relocations.push_back(MR); } - virtual void StartMachineBasicBlock(MachineBasicBlock *MBB) { - if (MBBLocations.size() <= (unsigned)MBB->getNumber()) - MBBLocations.resize((MBB->getNumber()+1)*2); - MBBLocations[MBB->getNumber()] = getCurrentPCOffset(); - } + /// emitConstantPool - For each constant pool entry, figure out which + /// section the constant should live in and emit data to it + void emitConstantPool(MachineConstantPool *MCP); - virtual uintptr_t getConstantPoolEntryAddress(unsigned Index) const { - assert(CPLocations.size() > Index && "CP not emitted!"); - return CPLocations[Index]; - } + /// emitJumpTables - Emit all the jump tables for a given jump table + /// info and record them to the appropriate section. + void emitJumpTables(MachineJumpTableInfo *MJTI); - virtual uintptr_t getJumpTableEntryAddress(unsigned Index) const { - assert(JTLocations.size() > Index && "JT not emitted!"); - return JTLocations[Index]; - } + void startFunction(MachineFunction &F); + bool finishFunction(MachineFunction &F); - virtual uintptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const { - assert(MBBLocations.size() > (unsigned)MBB->getNumber() && - MBBLocations[MBB->getNumber()] && "MBB not emitted!"); - return MBBLocations[MBB->getNumber()]; + /// emitLabel - Emits a label + virtual void emitLabel(uint64_t LabelID) { + assert("emitLabel not implemented"); } + /// getLabelAddress - Return the address of the specified LabelID, + /// only usable after the LabelID has been emitted. virtual uintptr_t getLabelAddress(uint64_t Label) const { - assert(0 && "Label address not implementated yet!"); - abort(); + assert("getLabelAddress not implemented"); return 0; } - virtual void emitLabel(uint64_t LabelID) { - assert(0 && "emit Label not implementated yet!"); - abort(); - } - - /// emitConstantPool - For each constant pool entry, figure out which section - /// the constant should live in and emit the constant. - void emitConstantPool(MachineConstantPool *MCP); - - /// emitJumpTables - Emit all the jump tables for a given jump table info - /// record to the appropriate section. - void emitJumpTables(MachineJumpTableInfo *MJTI); - virtual void setModuleInfo(llvm::MachineModuleInfo* MMI) {} - /// JIT SPECIFIC FUNCTIONS - DO NOT IMPLEMENT THESE HERE! - void startGVStub(const GlobalValue* F, unsigned StubSize, - unsigned Alignment = 1) { - assert(0 && "JIT specific function called!"); - abort(); - } - void startGVStub(const GlobalValue* F, void *Buffer, unsigned StubSize) { - assert(0 && "JIT specific function called!"); - abort(); - } - void *finishGVStub(const GlobalValue *F) { - assert(0 && "JIT specific function called!"); - abort(); - return 0; - } }; // end class ELFCodeEmitter } // end namespace llvm diff --git a/lib/CodeGen/ELFWriter.cpp b/lib/CodeGen/ELFWriter.cpp index 9e91524..3e1ee11 100644 --- a/lib/CodeGen/ELFWriter.cpp +++ b/lib/CodeGen/ELFWriter.cpp @@ -29,7 +29,6 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "elfwriter" - #include "ELF.h" #include "ELFWriter.h" #include "ELFCodeEmitter.h" @@ -40,26 +39,33 @@ #include "llvm/CodeGen/BinaryObject.h" #include "llvm/CodeGen/FileWriters.h" #include "llvm/CodeGen/MachineCodeEmitter.h" +#include "llvm/CodeGen/ObjectCodeEmitter.h" +#include "llvm/CodeGen/MachineCodeEmitter.h" #include "llvm/CodeGen/MachineConstantPool.h" -#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetELFWriterInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Mangler.h" -#include "llvm/Support/Streams.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Support/Debug.h" + using namespace llvm; char ELFWriter::ID = 0; -/// AddELFWriter - Concrete function to add the ELF writer to the function pass -/// manager. -MachineCodeEmitter *llvm::AddELFWriter(PassManagerBase &PM, - raw_ostream &O, - TargetMachine &TM) { + +/// AddELFWriter - Add the ELF writer to the function pass manager +ObjectCodeEmitter *llvm::AddELFWriter(PassManagerBase &PM, + raw_ostream &O, + TargetMachine &TM) { ELFWriter *EW = new ELFWriter(O, TM); PM.add(EW); - return &EW->getMachineCodeEmitter(); + return EW->getObjectCodeEmitter(); } //===----------------------------------------------------------------------===// @@ -68,27 +74,51 @@ MachineCodeEmitter *llvm::AddELFWriter(PassManagerBase &PM, ELFWriter::ELFWriter(raw_ostream &o, TargetMachine &tm) : MachineFunctionPass(&ID), O(o), TM(tm), + OutContext(*new MCContext()), + TLOF(TM.getTargetLowering()->getObjFileLowering()), is64Bit(TM.getTargetData()->getPointerSizeInBits() == 64), isLittleEndian(TM.getTargetData()->isLittleEndian()), ElfHdr(isLittleEndian, is64Bit) { - TAI = TM.getTargetAsmInfo(); + MAI = TM.getMCAsmInfo(); TEW = TM.getELFWriterInfo(); - // Create the machine code emitter object for this target. - MCE = new ELFCodeEmitter(*this); + // Create the object code emitter object for this target. + ElfCE = new ELFCodeEmitter(*this); // Inital number of sections NumSections = 0; } ELFWriter::~ELFWriter() { - delete MCE; + delete ElfCE; + delete &OutContext; + + while(!SymbolList.empty()) { + delete SymbolList.back(); + SymbolList.pop_back(); + } + + while(!PrivateSyms.empty()) { + delete PrivateSyms.back(); + PrivateSyms.pop_back(); + } + + while(!SectionList.empty()) { + delete SectionList.back(); + SectionList.pop_back(); + } + + // Release the name mangler object. + delete Mang; Mang = 0; } // doInitialization - Emit the file header and all of the global variables for // the module to the ELF file. bool ELFWriter::doInitialization(Module &M) { + // Initialize TargetLoweringObjectFile. + const_cast<TargetLoweringObjectFile&>(TLOF).Initialize(OutContext, TM); + Mang = new Mangler(M); // ELF Header @@ -138,13 +168,115 @@ bool ELFWriter::doInitialization(Module &M) { // Add the null section, which is required to be first in the file. getNullSection(); + // The first entry in the symtab is the null symbol and the second + // is a local symbol containing the module/file name + SymbolList.push_back(new ELFSym()); + SymbolList.push_back(ELFSym::getFileSym()); + return false; } +// AddPendingGlobalSymbol - Add a global to be processed and to +// the global symbol lookup, use a zero index because the table +// index will be determined later. +void ELFWriter::AddPendingGlobalSymbol(const GlobalValue *GV, + bool AddToLookup /* = false */) { + PendingGlobals.insert(GV); + if (AddToLookup) + GblSymLookup[GV] = 0; +} + +// AddPendingExternalSymbol - Add the external to be processed +// and to the external symbol lookup, use a zero index because +// the symbol table index will be determined later. +void ELFWriter::AddPendingExternalSymbol(const char *External) { + PendingExternals.insert(External); + ExtSymLookup[External] = 0; +} + +ELFSection &ELFWriter::getDataSection() { + const MCSectionELF *Data = (const MCSectionELF *)TLOF.getDataSection(); + return getSection(Data->getSectionName(), Data->getType(), + Data->getFlags(), 4); +} + +ELFSection &ELFWriter::getBSSSection() { + const MCSectionELF *BSS = (const MCSectionELF *)TLOF.getBSSSection(); + return getSection(BSS->getSectionName(), BSS->getType(), BSS->getFlags(), 4); +} + +// getCtorSection - Get the static constructor section +ELFSection &ELFWriter::getCtorSection() { + const MCSectionELF *Ctor = (const MCSectionELF *)TLOF.getStaticCtorSection(); + return getSection(Ctor->getSectionName(), Ctor->getType(), Ctor->getFlags()); +} + +// getDtorSection - Get the static destructor section +ELFSection &ELFWriter::getDtorSection() { + const MCSectionELF *Dtor = (const MCSectionELF *)TLOF.getStaticDtorSection(); + return getSection(Dtor->getSectionName(), Dtor->getType(), Dtor->getFlags()); +} + +// getTextSection - Get the text section for the specified function +ELFSection &ELFWriter::getTextSection(Function *F) { + const MCSectionELF *Text = + (const MCSectionELF *)TLOF.SectionForGlobal(F, Mang, TM); + return getSection(Text->getSectionName(), Text->getType(), Text->getFlags()); +} + +// getJumpTableSection - Get a read only section for constants when +// emitting jump tables. TODO: add PIC support +ELFSection &ELFWriter::getJumpTableSection() { + const MCSectionELF *JT = + (const MCSectionELF *)TLOF.getSectionForConstant(SectionKind::getReadOnly()); + return getSection(JT->getSectionName(), JT->getType(), JT->getFlags(), + TM.getTargetData()->getPointerABIAlignment()); +} + +// getConstantPoolSection - Get a constant pool section based on the machine +// constant pool entry type and relocation info. +ELFSection &ELFWriter::getConstantPoolSection(MachineConstantPoolEntry &CPE) { + SectionKind Kind; + switch (CPE.getRelocationInfo()) { + default: llvm_unreachable("Unknown section kind"); + case 2: Kind = SectionKind::getReadOnlyWithRel(); break; + case 1: + Kind = SectionKind::getReadOnlyWithRelLocal(); + break; + case 0: + switch (TM.getTargetData()->getTypeAllocSize(CPE.getType())) { + case 4: Kind = SectionKind::getMergeableConst4(); break; + case 8: Kind = SectionKind::getMergeableConst8(); break; + case 16: Kind = SectionKind::getMergeableConst16(); break; + default: Kind = SectionKind::getMergeableConst(); break; + } + } + + const MCSectionELF *CPSect = + (const MCSectionELF *)TLOF.getSectionForConstant(Kind); + return getSection(CPSect->getSectionName(), CPSect->getType(), + CPSect->getFlags(), CPE.getAlignment()); +} + +// getRelocSection - Return the relocation section of section 'S'. 'RelA' +// is true if the relocation section contains entries with addends. +ELFSection &ELFWriter::getRelocSection(ELFSection &S) { + unsigned SectionType = TEW->hasRelocationAddend() ? + ELFSection::SHT_RELA : ELFSection::SHT_REL; + + std::string SectionName(".rel"); + if (TEW->hasRelocationAddend()) + SectionName.append("a"); + SectionName.append(S.getName()); + + return getSection(SectionName, SectionType, 0, TEW->getPrefELFAlignment()); +} + +// getGlobalELFVisibility - Returns the ELF specific visibility type unsigned ELFWriter::getGlobalELFVisibility(const GlobalValue *GV) { switch (GV->getVisibility()) { default: - assert(0 && "unknown visibility type"); + llvm_unreachable("unknown visibility type"); case GlobalValue::DefaultVisibility: return ELFSym::STV_DEFAULT; case GlobalValue::HiddenVisibility: @@ -152,134 +284,132 @@ unsigned ELFWriter::getGlobalELFVisibility(const GlobalValue *GV) { case GlobalValue::ProtectedVisibility: return ELFSym::STV_PROTECTED; } - return 0; } -unsigned ELFWriter::getGlobalELFLinkage(const GlobalValue *GV) { +// getGlobalELFBinding - Returns the ELF specific binding type +unsigned ELFWriter::getGlobalELFBinding(const GlobalValue *GV) { if (GV->hasInternalLinkage()) return ELFSym::STB_LOCAL; - if (GV->hasWeakLinkage()) + if (GV->isWeakForLinker() && !GV->hasCommonLinkage()) return ELFSym::STB_WEAK; return ELFSym::STB_GLOBAL; } -// getElfSectionFlags - Get the ELF Section Header based on the -// flags defined in ELFTargetAsmInfo. -unsigned ELFWriter::getElfSectionFlags(unsigned Flags) { - unsigned ElfSectionFlags = ELFSection::SHF_ALLOC; - - if (Flags & SectionFlags::Code) - ElfSectionFlags |= ELFSection::SHF_EXECINSTR; - if (Flags & SectionFlags::Writeable) - ElfSectionFlags |= ELFSection::SHF_WRITE; - if (Flags & SectionFlags::Mergeable) - ElfSectionFlags |= ELFSection::SHF_MERGE; - if (Flags & SectionFlags::TLS) - ElfSectionFlags |= ELFSection::SHF_TLS; - if (Flags & SectionFlags::Strings) - ElfSectionFlags |= ELFSection::SHF_STRINGS; - - return ElfSectionFlags; -} - -// For global symbols without a section, return the Null section as a -// placeholder -ELFSection &ELFWriter::getGlobalSymELFSection(const GlobalVariable *GV, - ELFSym &Sym) { - // If this is a declaration, the symbol does not have a section. - if (!GV->hasInitializer()) { - Sym.SectionIdx = ELFSection::SHN_UNDEF; - return getNullSection(); - } +// getGlobalELFType - Returns the ELF specific type for a global +unsigned ELFWriter::getGlobalELFType(const GlobalValue *GV) { + if (GV->isDeclaration()) + return ELFSym::STT_NOTYPE; - // Get the name and flags of the section for the global - const Section *S = TAI->SectionForGlobal(GV); - unsigned SectionType = ELFSection::SHT_PROGBITS; - unsigned SectionFlags = getElfSectionFlags(S->getFlags()); - DOUT << "Section " << S->getName() << " for global " << GV->getName() << "\n"; + if (isa<Function>(GV)) + return ELFSym::STT_FUNC; - const TargetData *TD = TM.getTargetData(); - unsigned Align = TD->getPreferredAlignment(GV); - Constant *CV = GV->getInitializer(); - - // If this global has a zero initializer, go to .bss or common section. - // Variables are part of the common block if they are zero initialized - // and allowed to be merged with other symbols. - if (CV->isNullValue() || isa<UndefValue>(CV)) { - SectionType = ELFSection::SHT_NOBITS; - ELFSection &ElfS = getSection(S->getName(), SectionType, SectionFlags); - if (GV->hasLinkOnceLinkage() || GV->hasWeakLinkage() || - GV->hasCommonLinkage()) { - Sym.SectionIdx = ELFSection::SHN_COMMON; - Sym.IsCommon = true; - ElfS.Align = 1; - return ElfS; - } - Sym.IsBss = true; - Sym.SectionIdx = ElfS.SectionIdx; - if (Align) ElfS.Size = (ElfS.Size + Align-1) & ~(Align-1); - ElfS.Align = std::max(ElfS.Align, Align); - return ElfS; - } - - Sym.IsConstant = true; - ELFSection &ElfS = getSection(S->getName(), SectionType, SectionFlags); - Sym.SectionIdx = ElfS.SectionIdx; - ElfS.Align = std::max(ElfS.Align, Align); - return ElfS; + return ELFSym::STT_OBJECT; } -void ELFWriter::EmitFunctionDeclaration(const Function *F) { - ELFSym GblSym(F); - GblSym.setBind(ELFSym::STB_GLOBAL); - GblSym.setType(ELFSym::STT_NOTYPE); - GblSym.setVisibility(ELFSym::STV_DEFAULT); - GblSym.SectionIdx = ELFSection::SHN_UNDEF; - SymbolList.push_back(GblSym); +// IsELFUndefSym - True if the global value must be marked as a symbol +// which points to a SHN_UNDEF section. This means that the symbol has +// no definition on the module. +static bool IsELFUndefSym(const GlobalValue *GV) { + return GV->isDeclaration() || (isa<Function>(GV)); } -void ELFWriter::EmitGlobalVar(const GlobalVariable *GV) { - unsigned SymBind = getGlobalELFLinkage(GV); - unsigned Align=0, Size=0; - ELFSym GblSym(GV); - GblSym.setBind(SymBind); - GblSym.setVisibility(getGlobalELFVisibility(GV)); - - if (GV->hasInitializer()) { - GblSym.setType(ELFSym::STT_OBJECT); - const TargetData *TD = TM.getTargetData(); - Align = TD->getPreferredAlignment(GV); - Size = TD->getTypeAllocSize(GV->getInitializer()->getType()); - GblSym.Size = Size; +// AddToSymbolList - Update the symbol lookup and If the symbol is +// private add it to PrivateSyms list, otherwise to SymbolList. +void ELFWriter::AddToSymbolList(ELFSym *GblSym) { + assert(GblSym->isGlobalValue() && "Symbol must be a global value"); + + const GlobalValue *GV = GblSym->getGlobalValue(); + if (GV->hasPrivateLinkage()) { + // For a private symbols, keep track of the index inside + // the private list since it will never go to the symbol + // table and won't be patched up later. + PrivateSyms.push_back(GblSym); + GblSymLookup[GV] = PrivateSyms.size()-1; } else { - GblSym.setType(ELFSym::STT_NOTYPE); + // Non private symbol are left with zero indices until + // they are patched up during the symbol table emition + // (where the indicies are created). + SymbolList.push_back(GblSym); + GblSymLookup[GV] = 0; } +} - ELFSection &GblSection = getGlobalSymELFSection(GV, GblSym); - - if (GblSym.IsCommon) { - GblSym.Value = Align; - } else if (GblSym.IsBss) { - GblSym.Value = GblSection.Size; - GblSection.Size += Size; - } else if (GblSym.IsConstant){ - // GblSym.Value should contain the symbol index inside the section, - // and all symbols should start on their required alignment boundary - GblSym.Value = (GblSection.size() + (Align-1)) & (-Align); - GblSection.emitAlignment(Align); - EmitGlobalConstant(GV->getInitializer(), GblSection); - } +// EmitGlobal - Choose the right section for global and emit it +void ELFWriter::EmitGlobal(const GlobalValue *GV) { - // Local symbols should come first on the symbol table. - if (!GV->hasPrivateLinkage()) { - if (SymBind == ELFSym::STB_LOCAL) - SymbolList.push_front(GblSym); - else - SymbolList.push_back(GblSym); + // Check if the referenced symbol is already emitted + if (GblSymLookup.find(GV) != GblSymLookup.end()) + return; + + // Handle ELF Bind, Visibility and Type for the current symbol + unsigned SymBind = getGlobalELFBinding(GV); + unsigned SymType = getGlobalELFType(GV); + bool IsUndefSym = IsELFUndefSym(GV); + + ELFSym *GblSym = IsUndefSym ? ELFSym::getUndefGV(GV, SymBind) + : ELFSym::getGV(GV, SymBind, SymType, getGlobalELFVisibility(GV)); + + if (!IsUndefSym) { + assert(isa<GlobalVariable>(GV) && "GV not a global variable!"); + const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV); + + // Handle special llvm globals + if (EmitSpecialLLVMGlobal(GVar)) + return; + + // Get the ELF section where this global belongs from TLOF + const MCSectionELF *S = + (const MCSectionELF *)TLOF.SectionForGlobal(GV, Mang, TM); + ELFSection &ES = + getSection(S->getSectionName(), S->getType(), S->getFlags()); + SectionKind Kind = S->getKind(); + + // The symbol align should update the section alignment if needed + const TargetData *TD = TM.getTargetData(); + unsigned Align = TD->getPreferredAlignment(GVar); + unsigned Size = TD->getTypeAllocSize(GVar->getInitializer()->getType()); + GblSym->Size = Size; + + if (S->HasCommonSymbols()) { // Symbol must go to a common section + GblSym->SectionIdx = ELFSection::SHN_COMMON; + + // A new linkonce section is created for each global in the + // common section, the default alignment is 1 and the symbol + // value contains its alignment. + ES.Align = 1; + GblSym->Value = Align; + + } else if (Kind.isBSS() || Kind.isThreadBSS()) { // Symbol goes to BSS. + GblSym->SectionIdx = ES.SectionIdx; + + // Update the size with alignment and the next object can + // start in the right offset in the section + if (Align) ES.Size = (ES.Size + Align-1) & ~(Align-1); + ES.Align = std::max(ES.Align, Align); + + // GblSym->Value should contain the virtual offset inside the section. + // Virtual because the BSS space is not allocated on ELF objects + GblSym->Value = ES.Size; + ES.Size += Size; + + } else { // The symbol must go to some kind of data section + GblSym->SectionIdx = ES.SectionIdx; + + // GblSym->Value should contain the symbol offset inside the section, + // and all symbols should start on their required alignment boundary + ES.Align = std::max(ES.Align, Align); + ES.emitAlignment(Align); + GblSym->Value = ES.size(); + + // Emit the global to the data section 'ES' + EmitGlobalConstant(GVar->getInitializer(), ES); + } } + + AddToSymbolList(GblSym); } void ELFWriter::EmitGlobalConstantStruct(const ConstantStruct *CVS, @@ -305,8 +435,7 @@ void ELFWriter::EmitGlobalConstantStruct(const ConstantStruct *CVS, // Insert padding - this may include padding to increase the size of the // current field up to the ABI size (if the struct is not packed) as well // as padding to ensure that the next field starts at the right offset. - for (unsigned p=0; p < padSize; p++) - GblS.emitByte(0); + GblS.emitZeros(padSize); } assert(sizeSoFar == cvsLayout->getSizeInBytes() && "Layout of constant struct may be incorrect!"); @@ -317,65 +446,242 @@ void ELFWriter::EmitGlobalConstant(const Constant *CV, ELFSection &GblS) { unsigned Size = TD->getTypeAllocSize(CV->getType()); if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV)) { - if (CVA->isString()) { - std::string GblStr = CVA->getAsString(); - GblStr.resize(GblStr.size()-1); - GblS.emitString(GblStr); - } else { // Not a string. Print the values in successive locations - for (unsigned i = 0, e = CVA->getNumOperands(); i != e; ++i) - EmitGlobalConstant(CVA->getOperand(i), GblS); - } + for (unsigned i = 0, e = CVA->getNumOperands(); i != e; ++i) + EmitGlobalConstant(CVA->getOperand(i), GblS); + return; + } else if (isa<ConstantAggregateZero>(CV)) { + GblS.emitZeros(Size); return; } else if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV)) { EmitGlobalConstantStruct(CVS, GblS); return; } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) { - uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); - if (CFP->getType() == Type::DoubleTy) - GblS.emitWord64(Val); - else if (CFP->getType() == Type::FloatTy) - GblS.emitWord32(Val); - else if (CFP->getType() == Type::X86_FP80Ty) { - assert(0 && "X86_FP80Ty global emission not implemented"); - } else if (CFP->getType() == Type::PPC_FP128Ty) - assert(0 && "PPC_FP128Ty global emission not implemented"); + APInt Val = CFP->getValueAPF().bitcastToAPInt(); + if (CFP->getType()->isDoubleTy()) + GblS.emitWord64(Val.getZExtValue()); + else if (CFP->getType()->isFloatTy()) + GblS.emitWord32(Val.getZExtValue()); + else if (CFP->getType()->isX86_FP80Ty()) { + unsigned PadSize = TD->getTypeAllocSize(CFP->getType())- + TD->getTypeStoreSize(CFP->getType()); + GblS.emitWordFP80(Val.getRawData(), PadSize); + } else if (CFP->getType()->isPPC_FP128Ty()) + llvm_unreachable("PPC_FP128Ty global emission not implemented"); return; } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) { - if (Size == 4) + if (Size == 1) + GblS.emitByte(CI->getZExtValue()); + else if (Size == 2) + GblS.emitWord16(CI->getZExtValue()); + else if (Size == 4) GblS.emitWord32(CI->getZExtValue()); - else if (Size == 8) - GblS.emitWord64(CI->getZExtValue()); - else - assert(0 && "LargeInt global emission not implemented"); + else + EmitGlobalConstantLargeInt(CI, GblS); return; } else if (const ConstantVector *CP = dyn_cast<ConstantVector>(CV)) { const VectorType *PTy = CP->getType(); for (unsigned I = 0, E = PTy->getNumElements(); I < E; ++I) EmitGlobalConstant(CP->getOperand(I), GblS); return; + } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) { + // Resolve a constant expression which returns a (Constant, Offset) + // pair. If 'Res.first' is a GlobalValue, emit a relocation with + // the offset 'Res.second', otherwise emit a global constant like + // it is always done for not contant expression types. + CstExprResTy Res = ResolveConstantExpr(CE); + const Constant *Op = Res.first; + + if (isa<GlobalValue>(Op)) + EmitGlobalDataRelocation(cast<const GlobalValue>(Op), + TD->getTypeAllocSize(Op->getType()), + GblS, Res.second); + else + EmitGlobalConstant(Op, GblS); + + return; + } else if (CV->getType()->getTypeID() == Type::PointerTyID) { + // Fill the data entry with zeros or emit a relocation entry + if (isa<ConstantPointerNull>(CV)) + GblS.emitZeros(Size); + else + EmitGlobalDataRelocation(cast<const GlobalValue>(CV), + Size, GblS); + return; + } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) { + // This is a constant address for a global variable or function and + // therefore must be referenced using a relocation entry. + EmitGlobalDataRelocation(GV, Size, GblS); + return; + } + + std::string msg; + raw_string_ostream ErrorMsg(msg); + ErrorMsg << "Constant unimp for type: " << *CV->getType(); + llvm_report_error(ErrorMsg.str()); +} + +// ResolveConstantExpr - Resolve the constant expression until it stop +// yielding other constant expressions. +CstExprResTy ELFWriter::ResolveConstantExpr(const Constant *CV) { + const TargetData *TD = TM.getTargetData(); + + // There ins't constant expression inside others anymore + if (!isa<ConstantExpr>(CV)) + return std::make_pair(CV, 0); + + const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV); + switch (CE->getOpcode()) { + case Instruction::BitCast: + return ResolveConstantExpr(CE->getOperand(0)); + + case Instruction::GetElementPtr: { + const Constant *ptrVal = CE->getOperand(0); + SmallVector<Value*, 8> idxVec(CE->op_begin()+1, CE->op_end()); + int64_t Offset = TD->getIndexedOffset(ptrVal->getType(), &idxVec[0], + idxVec.size()); + return std::make_pair(ptrVal, Offset); + } + case Instruction::IntToPtr: { + Constant *Op = CE->getOperand(0); + Op = ConstantExpr::getIntegerCast(Op, TD->getIntPtrType(CV->getContext()), + false/*ZExt*/); + return ResolveConstantExpr(Op); + } + case Instruction::PtrToInt: { + Constant *Op = CE->getOperand(0); + const Type *Ty = CE->getType(); + + // We can emit the pointer value into this slot if the slot is an + // integer slot greater or equal to the size of the pointer. + if (TD->getTypeAllocSize(Ty) == TD->getTypeAllocSize(Op->getType())) + return ResolveConstantExpr(Op); + + llvm_unreachable("Integer size less then pointer size"); + } + case Instruction::Add: + case Instruction::Sub: { + // Only handle cases where there's a constant expression with GlobalValue + // as first operand and ConstantInt as second, which are the cases we can + // solve direclty using a relocation entry. GlobalValue=Op0, CstInt=Op1 + // 1) Instruction::Add => (global) + CstInt + // 2) Instruction::Sub => (global) + -CstInt + const Constant *Op0 = CE->getOperand(0); + const Constant *Op1 = CE->getOperand(1); + assert(isa<ConstantInt>(Op1) && "Op1 must be a ConstantInt"); + + CstExprResTy Res = ResolveConstantExpr(Op0); + assert(isa<GlobalValue>(Res.first) && "Op0 must be a GlobalValue"); + + const APInt &RHS = cast<ConstantInt>(Op1)->getValue(); + switch (CE->getOpcode()) { + case Instruction::Add: + return std::make_pair(Res.first, RHS.getSExtValue()); + case Instruction::Sub: + return std::make_pair(Res.first, (-RHS).getSExtValue()); + } + } + } + + std::string msg(CE->getOpcodeName()); + raw_string_ostream ErrorMsg(msg); + ErrorMsg << ": Unsupported ConstantExpr type"; + llvm_report_error(ErrorMsg.str()); + + return std::make_pair(CV, 0); // silence warning +} + +void ELFWriter::EmitGlobalDataRelocation(const GlobalValue *GV, unsigned Size, + ELFSection &GblS, int64_t Offset) { + // Create the relocation entry for the global value + MachineRelocation MR = + MachineRelocation::getGV(GblS.getCurrentPCOffset(), + TEW->getAbsoluteLabelMachineRelTy(), + const_cast<GlobalValue*>(GV), + Offset); + + // Fill the data entry with zeros + GblS.emitZeros(Size); + + // Add the relocation entry for the current data section + GblS.addRelocation(MR); +} + +void ELFWriter::EmitGlobalConstantLargeInt(const ConstantInt *CI, + ELFSection &S) { + const TargetData *TD = TM.getTargetData(); + unsigned BitWidth = CI->getBitWidth(); + assert(isPowerOf2_32(BitWidth) && + "Non-power-of-2-sized integers not handled!"); + + const uint64_t *RawData = CI->getValue().getRawData(); + uint64_t Val = 0; + for (unsigned i = 0, e = BitWidth / 64; i != e; ++i) { + Val = (TD->isBigEndian()) ? RawData[e - i - 1] : RawData[i]; + S.emitWord64(Val); } - assert(0 && "unknown global constant"); } +/// EmitSpecialLLVMGlobal - Check to see if the specified global is a +/// special global used by LLVM. If so, emit it and return true, otherwise +/// do nothing and return false. +bool ELFWriter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) { + if (GV->getName() == "llvm.used") + llvm_unreachable("not implemented yet"); + + // Ignore debug and non-emitted data. This handles llvm.compiler.used. + if (GV->getSection() == "llvm.metadata" || + GV->hasAvailableExternallyLinkage()) + return true; + + if (!GV->hasAppendingLinkage()) return false; + + assert(GV->hasInitializer() && "Not a special LLVM global!"); + + const TargetData *TD = TM.getTargetData(); + unsigned Align = TD->getPointerPrefAlignment(); + if (GV->getName() == "llvm.global_ctors") { + ELFSection &Ctor = getCtorSection(); + Ctor.emitAlignment(Align); + EmitXXStructorList(GV->getInitializer(), Ctor); + return true; + } + + if (GV->getName() == "llvm.global_dtors") { + ELFSection &Dtor = getDtorSection(); + Dtor.emitAlignment(Align); + EmitXXStructorList(GV->getInitializer(), Dtor); + return true; + } + + return false; +} + +/// EmitXXStructorList - Emit the ctor or dtor list. This just emits out the +/// function pointers, ignoring the init priority. +void ELFWriter::EmitXXStructorList(Constant *List, ELFSection &Xtor) { + // Should be an array of '{ int, void ()* }' structs. The first value is the + // init priority, which we ignore. + if (!isa<ConstantArray>(List)) return; + ConstantArray *InitList = cast<ConstantArray>(List); + for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) + if (ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(i))){ + if (CS->getNumOperands() != 2) return; // Not array of 2-element structs. + + if (CS->getOperand(1)->isNullValue()) + return; // Found a null terminator, exit printing. + // Emit the function pointer. + EmitGlobalConstant(CS->getOperand(1), Xtor); + } +} bool ELFWriter::runOnMachineFunction(MachineFunction &MF) { - // Nothing to do here, this is all done through the MCE object above. + // Nothing to do here, this is all done through the ElfCE object above. return false; } /// doFinalization - Now that the module has been completely processed, emit /// the ELF file to 'O'. bool ELFWriter::doFinalization(Module &M) { - /// FIXME: This should be removed when moving to ObjectCodeEmiter. Since the - /// current ELFCodeEmiter uses CurrBuff, ... it doesn't update S.Data - /// vector size for .text sections, so this is a quick dirty fix - ELFSection &TS = getTextSection(); - if (TS.Size) { - BinaryData &BD = TS.getData(); - for (unsigned e=0; e<TS.Size; ++e) - BD.push_back(BD[e]); - } - // Emit .data section placeholder getDataSection(); @@ -384,57 +690,34 @@ bool ELFWriter::doFinalization(Module &M) { // Build and emit data, bss and "common" sections. for (Module::global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) { - EmitGlobalVar(I); - GblSymLookup[I] = 0; - } + I != E; ++I) + EmitGlobal(I); // Emit all pending globals - // TODO: this should be done only for referenced symbols - for (SetVector<GlobalValue*>::const_iterator I = PendingGlobals.begin(), - E = PendingGlobals.end(); I != E; ++I) { + for (PendingGblsIter I = PendingGlobals.begin(), E = PendingGlobals.end(); + I != E; ++I) + EmitGlobal(*I); - // No need to emit the symbol again - if (GblSymLookup.find(*I) != GblSymLookup.end()) - continue; - - if (GlobalVariable *GV = dyn_cast<GlobalVariable>(*I)) { - EmitGlobalVar(GV); - } else if (Function *F = dyn_cast<Function>(*I)) { - // If function is not in GblSymLookup, it doesn't have a body, - // so emit the symbol as a function declaration (no section associated) - EmitFunctionDeclaration(F); - } else { - assert("unknown howto handle pending global"); - } - GblSymLookup[*I] = 0; - } + // Emit all pending externals + for (PendingExtsIter I = PendingExternals.begin(), E = PendingExternals.end(); + I != E; ++I) + SymbolList.push_back(ELFSym::getExtSym(*I)); // Emit non-executable stack note - if (TAI->getNonexecutableStackDirective()) + if (MAI->getNonexecutableStackDirective()) getNonExecStackSection(); - // Emit a symbol for each section created until now - for (std::map<std::string, ELFSection*>::iterator I = SectionLookup.begin(), - E = SectionLookup.end(); I != E; ++I) { - ELFSection *ES = I->second; - - // Skip null section - if (ES->SectionIdx == 0) continue; - - ELFSym SectionSym(0); - SectionSym.SectionIdx = ES->SectionIdx; - SectionSym.Size = 0; - SectionSym.setBind(ELFSym::STB_LOCAL); - SectionSym.setType(ELFSym::STT_SECTION); - SectionSym.setVisibility(ELFSym::STV_DEFAULT); - - // Local symbols go in the list front - SymbolList.push_front(SectionSym); + // Emit a symbol for each section created until now, skip null section + for (unsigned i = 1, e = SectionList.size(); i < e; ++i) { + ELFSection &ES = *SectionList[i]; + ELFSym *SectionSym = ELFSym::getSectionSym(); + SectionSym->SectionIdx = ES.SectionIdx; + SymbolList.push_back(SectionSym); + ES.Sym = SymbolList.back(); } // Emit string table - EmitStringTable(); + EmitStringTable(M.getModuleIdentifier()); // Emit the symbol table now, if non-empty. EmitSymbolTable(); @@ -448,77 +731,106 @@ bool ELFWriter::doFinalization(Module &M) { // Dump the sections and section table to the .o file. OutputSectionsAndSectionTable(); - // We are done with the abstract symbols. - SectionList.clear(); - NumSections = 0; - - // Release the name mangler object. - delete Mang; Mang = 0; return false; } +// RelocateField - Patch relocatable field with 'Offset' in 'BO' +// using a 'Value' of known 'Size' +void ELFWriter::RelocateField(BinaryObject &BO, uint32_t Offset, + int64_t Value, unsigned Size) { + if (Size == 32) + BO.fixWord32(Value, Offset); + else if (Size == 64) + BO.fixWord64(Value, Offset); + else + llvm_unreachable("don't know howto patch relocatable field"); +} + /// EmitRelocations - Emit relocations void ELFWriter::EmitRelocations() { + // True if the target uses the relocation entry to hold the addend, + // otherwise the addend is written directly to the relocatable field. + bool HasRelA = TEW->hasRelocationAddend(); + // Create Relocation sections for each section which needs it. - for (std::list<ELFSection>::iterator I = SectionList.begin(), - E = SectionList.end(); I != E; ++I) { + for (unsigned i=0, e=SectionList.size(); i != e; ++i) { + ELFSection &S = *SectionList[i]; // This section does not have relocations - if (!I->hasRelocations()) continue; - - // Get the relocation section for section 'I' - bool HasRelA = TEW->hasRelocationAddend(); - ELFSection &RelSec = getRelocSection(I->getName(), HasRelA, - TEW->getPrefELFAlignment()); + if (!S.hasRelocations()) continue; + ELFSection &RelSec = getRelocSection(S); // 'Link' - Section hdr idx of the associated symbol table // 'Info' - Section hdr idx of the section to which the relocation applies ELFSection &SymTab = getSymbolTableSection(); RelSec.Link = SymTab.SectionIdx; - RelSec.Info = I->SectionIdx; + RelSec.Info = S.SectionIdx; RelSec.EntSize = TEW->getRelocationEntrySize(); // Get the relocations from Section - std::vector<MachineRelocation> Relos = I->getRelocations(); + std::vector<MachineRelocation> Relos = S.getRelocations(); for (std::vector<MachineRelocation>::iterator MRI = Relos.begin(), MRE = Relos.end(); MRI != MRE; ++MRI) { MachineRelocation &MR = *MRI; - // Offset from the start of the section containing the symbol - unsigned Offset = MR.getMachineCodeOffset(); + // Relocatable field offset from the section start + unsigned RelOffset = MR.getMachineCodeOffset(); // Symbol index in the symbol table unsigned SymIdx = 0; - // Target specific ELF relocation type + // Target specific relocation field type and size unsigned RelType = TEW->getRelocationType(MR.getRelocationType()); - - // Constant addend used to compute the value to be stored - // into the relocatable field + unsigned RelTySize = TEW->getRelocationTySize(RelType); int64_t Addend = 0; // There are several machine relocations types, and each one of // them needs a different approach to retrieve the symbol table index. if (MR.isGlobalValue()) { const GlobalValue *G = MR.getGlobalValue(); + int64_t GlobalOffset = MR.getConstantVal(); SymIdx = GblSymLookup[G]; - Addend = TEW->getAddendForRelTy(RelType); + if (G->hasPrivateLinkage()) { + // If the target uses a section offset in the relocation: + // SymIdx + Addend = section sym for global + section offset + unsigned SectionIdx = PrivateSyms[SymIdx]->SectionIdx; + Addend = PrivateSyms[SymIdx]->Value + GlobalOffset; + SymIdx = SectionList[SectionIdx]->getSymbolTableIndex(); + } else { + Addend = TEW->getDefaultAddendForRelTy(RelType, GlobalOffset); + } + } else if (MR.isExternalSymbol()) { + const char *ExtSym = MR.getExternalSymbol(); + SymIdx = ExtSymLookup[ExtSym]; + Addend = TEW->getDefaultAddendForRelTy(RelType); } else { + // Get the symbol index for the section symbol unsigned SectionIdx = MR.getConstantVal(); - // TODO: use a map for this. - for (std::list<ELFSym>::iterator I = SymbolList.begin(), - E = SymbolList.end(); I != E; ++I) - if ((SectionIdx == I->SectionIdx) && - (I->getType() == ELFSym::STT_SECTION)) { - SymIdx = I->SymTabIdx; - break; - } - Addend = (uint64_t)MR.getResultPointer(); + SymIdx = SectionList[SectionIdx]->getSymbolTableIndex(); + + // The symbol offset inside the section + int64_t SymOffset = (int64_t)MR.getResultPointer(); + + // For pc relative relocations where symbols are defined in the same + // section they are referenced, ignore the relocation entry and patch + // the relocatable field with the symbol offset directly. + if (S.SectionIdx == SectionIdx && TEW->isPCRelativeRel(RelType)) { + int64_t Value = TEW->computeRelocation(SymOffset, RelOffset, RelType); + RelocateField(S, RelOffset, Value, RelTySize); + continue; + } + + Addend = TEW->getDefaultAddendForRelTy(RelType, SymOffset); } + // The target without addend on the relocation symbol must be + // patched in the relocation place itself to contain the addend + // otherwise write zeros to make sure there is no garbage there + RelocateField(S, RelOffset, HasRelA ? 0 : Addend, RelTySize); + // Get the relocation entry and emit to the relocation section - ELFRelocation Rel(Offset, SymIdx, RelType, HasRelA, Addend); + ELFRelocation Rel(RelOffset, SymIdx, RelType, HasRelA, Addend); EmitRelocation(RelSec, Rel, HasRelA); } } @@ -554,7 +866,7 @@ void ELFWriter::EmitSymbol(BinaryObject &SymbolTable, ELFSym &Sym) { /// EmitSectionHeader - Write section 'Section' header in 'SHdrTab' /// Section Header Table -void ELFWriter::EmitSectionHeader(BinaryObject &SHdrTab, +void ELFWriter::EmitSectionHeader(BinaryObject &SHdrTab, const ELFSection &SHdr) { SHdrTab.emitWord32(SHdr.NameIdx); SHdrTab.emitWord32(SHdr.Type); @@ -581,27 +893,30 @@ void ELFWriter::EmitSectionHeader(BinaryObject &SHdrTab, /// EmitStringTable - If the current symbol table is non-empty, emit the string /// table for it -void ELFWriter::EmitStringTable() { +void ELFWriter::EmitStringTable(const std::string &ModuleName) { if (!SymbolList.size()) return; // Empty symbol table. ELFSection &StrTab = getStringTableSection(); // Set the zero'th symbol to a null byte, as required. StrTab.emitByte(0); - // Walk on the symbol list and write symbol names into the - // string table. + // Walk on the symbol list and write symbol names into the string table. unsigned Index = 1; - for (std::list<ELFSym>::iterator I = SymbolList.begin(), - E = SymbolList.end(); I != E; ++I) { + for (ELFSymIter I=SymbolList.begin(), E=SymbolList.end(); I != E; ++I) { + ELFSym &Sym = *(*I); - // Use the name mangler to uniquify the LLVM symbol. std::string Name; - if (I->GV) Name.append(Mang->getValueName(I->GV)); + if (Sym.isGlobalValue()) + Name.append(Mang->getMangledName(Sym.getGlobalValue())); + else if (Sym.isExternalSym()) + Name.append(Sym.getExternalSymbol()); + else if (Sym.isFileType()) + Name.append(ModuleName); if (Name.empty()) { - I->NameIdx = 0; + Sym.NameIdx = 0; } else { - I->NameIdx = Index; + Sym.NameIdx = Index; StrTab.emitString(Name); // Keep track of the number of bytes emitted to this section. @@ -612,11 +927,38 @@ void ELFWriter::EmitStringTable() { StrTab.Size = Index; } +// SortSymbols - On the symbol table local symbols must come before +// all other symbols with non-local bindings. The return value is +// the position of the first non local symbol. +unsigned ELFWriter::SortSymbols() { + unsigned FirstNonLocalSymbol; + std::vector<ELFSym*> LocalSyms, OtherSyms; + + for (ELFSymIter I=SymbolList.begin(), E=SymbolList.end(); I != E; ++I) { + if ((*I)->isLocalBind()) + LocalSyms.push_back(*I); + else + OtherSyms.push_back(*I); + } + SymbolList.clear(); + FirstNonLocalSymbol = LocalSyms.size(); + + for (unsigned i = 0; i < FirstNonLocalSymbol; ++i) + SymbolList.push_back(LocalSyms[i]); + + for (ELFSymIter I=OtherSyms.begin(), E=OtherSyms.end(); I != E; ++I) + SymbolList.push_back(*I); + + LocalSyms.clear(); + OtherSyms.clear(); + + return FirstNonLocalSymbol; +} + /// EmitSymbolTable - Emit the symbol table itself. void ELFWriter::EmitSymbolTable() { if (!SymbolList.size()) return; // Empty symbol table. - unsigned FirstNonLocalSymbol = 1; // Now that we have emitted the string table and know the offset into the // string table of each symbol, emit the symbol table itself. ELFSection &SymTab = getSymbolTableSection(); @@ -628,30 +970,27 @@ void ELFWriter::EmitSymbolTable() { // Size of each symtab entry. SymTab.EntSize = TEW->getSymTabEntrySize(); - // The first entry in the symtab is the null symbol - ELFSym NullSym = ELFSym(0); - EmitSymbol(SymTab, NullSym); + // Reorder the symbol table with local symbols first! + unsigned FirstNonLocalSymbol = SortSymbols(); - // Emit all the symbols to the symbol table. Skip the null - // symbol, cause it's emitted already - unsigned Index = 1; - for (std::list<ELFSym>::iterator I = SymbolList.begin(), - E = SymbolList.end(); I != E; ++I, ++Index) { - // Keep track of the first non-local symbol - if (I->getBind() == ELFSym::STB_LOCAL) - FirstNonLocalSymbol++; + // Emit all the symbols to the symbol table. + for (unsigned i = 0, e = SymbolList.size(); i < e; ++i) { + ELFSym &Sym = *SymbolList[i]; // Emit symbol to the symbol table - EmitSymbol(SymTab, *I); + EmitSymbol(SymTab, Sym); - // Record the symbol table index for each global value - if (I->GV) - GblSymLookup[I->GV] = Index; + // Record the symbol table index for each symbol + if (Sym.isGlobalValue()) + GblSymLookup[Sym.getGlobalValue()] = i; + else if (Sym.isExternalSym()) + ExtSymLookup[Sym.getExternalSymbol()] = i; // Keep track on the symbol index into the symbol table - I->SymTabIdx = Index; + Sym.SymTabIdx = i; } + // One greater than the symbol table index of the last local symbol SymTab.Info = FirstNonLocalSymbol; SymTab.Size = SymTab.size(); } @@ -671,15 +1010,15 @@ void ELFWriter::EmitSectionTableStringTable() { // the string table. unsigned Index = 0; - for (std::list<ELFSection>::iterator I = SectionList.begin(), - E = SectionList.end(); I != E; ++I) { + for (ELFSectionIter I=SectionList.begin(), E=SectionList.end(); I != E; ++I) { + ELFSection &S = *(*I); // Set the index into the table. Note if we have lots of entries with // common suffixes, we could memoize them here if we cared. - I->NameIdx = Index; - SHStrTab.emitString(I->getName()); + S.NameIdx = Index; + SHStrTab.emitString(S.getName()); // Keep track of the number of bytes emitted to this section. - Index += I->getName().size()+1; + Index += S.getName().size()+1; } // Set the size of .shstrtab now that we know what it is. @@ -694,29 +1033,24 @@ void ELFWriter::OutputSectionsAndSectionTable() { // Pass #1: Compute the file offset for each section. size_t FileOff = ElfHdr.size(); // File header first. - // Adjust alignment of all section if needed. - for (std::list<ELFSection>::iterator I = SectionList.begin(), - E = SectionList.end(); I != E; ++I) { - - // Section idx 0 has 0 offset - if (!I->SectionIdx) - continue; - - if (!I->size()) { - I->Offset = FileOff; + // Adjust alignment of all section if needed, skip the null section. + for (unsigned i=1, e=SectionList.size(); i < e; ++i) { + ELFSection &ES = *SectionList[i]; + if (!ES.size()) { + ES.Offset = FileOff; continue; } // Update Section size - if (!I->Size) - I->Size = I->size(); + if (!ES.Size) + ES.Size = ES.size(); // Align FileOff to whatever the alignment restrictions of the section are. - if (I->Align) - FileOff = (FileOff+I->Align-1) & ~(I->Align-1); + if (ES.Align) + FileOff = (FileOff+ES.Align-1) & ~(ES.Align-1); - I->Offset = FileOff; - FileOff += I->Size; + ES.Offset = FileOff; + FileOff += ES.Size; } // Align Section Header. @@ -740,11 +1074,11 @@ void ELFWriter::OutputSectionsAndSectionTable() { BinaryObject SHdrTable(isLittleEndian, is64Bit); // Emit all of sections to the file and build the section header table. - while (!SectionList.empty()) { - ELFSection &S = *SectionList.begin(); - DOUT << "SectionIdx: " << S.SectionIdx << ", Name: " << S.getName() - << ", Size: " << S.Size << ", Offset: " << S.Offset - << ", SectionData Size: " << S.size() << "\n"; + for (ELFSectionIter I=SectionList.begin(), E=SectionList.end(); I != E; ++I) { + ELFSection &S = *(*I); + DEBUG(errs() << "SectionIdx: " << S.SectionIdx << ", Name: " << S.getName() + << ", Size: " << S.Size << ", Offset: " << S.Offset + << ", SectionData Size: " << S.size() << "\n"); // Align FileOff to whatever the alignment restrictions of the section are. if (S.size()) { @@ -758,7 +1092,6 @@ void ELFWriter::OutputSectionsAndSectionTable() { } EmitSectionHeader(SHdrTable, S); - SectionList.pop_front(); } // Align output for the section table. diff --git a/lib/CodeGen/ELFWriter.h b/lib/CodeGen/ELFWriter.h index bab118c..b61b484 100644 --- a/lib/CodeGen/ELFWriter.h +++ b/lib/CodeGen/ELFWriter.h @@ -16,23 +16,35 @@ #include "llvm/ADT/SetVector.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include <list> #include <map> namespace llvm { class BinaryObject; class Constant; + class ConstantInt; class ConstantStruct; class ELFCodeEmitter; + class ELFRelocation; + class ELFSection; + struct ELFSym; class GlobalVariable; + class JITDebugRegisterer; class Mangler; class MachineCodeEmitter; - class TargetAsmInfo; + class MachineConstantPoolEntry; + class ObjectCodeEmitter; + class MCAsmInfo; class TargetELFWriterInfo; + class TargetLoweringObjectFile; class raw_ostream; - class ELFSection; - class ELFSym; - class ELFRelocation; + class SectionKind; + class MCContext; + + typedef std::vector<ELFSym*>::iterator ELFSymIter; + typedef std::vector<ELFSection*>::iterator ELFSectionIter; + typedef SetVector<const GlobalValue*>::const_iterator PendingGblsIter; + typedef SetVector<const char *>::const_iterator PendingExtsIter; + typedef std::pair<const Constant *, int64_t> CstExprResTy; /// ELFWriter - This class implements the common target-independent code for /// writing ELF files. Targets should derive a class from this to @@ -40,18 +52,18 @@ namespace llvm { /// class ELFWriter : public MachineFunctionPass { friend class ELFCodeEmitter; + friend class JITDebugRegisterer; public: static char ID; - MachineCodeEmitter &getMachineCodeEmitter() const { - return *(MachineCodeEmitter*)MCE; + /// Return the ELFCodeEmitter as an instance of ObjectCodeEmitter + ObjectCodeEmitter *getObjectCodeEmitter() { + return reinterpret_cast<ObjectCodeEmitter*>(ElfCE); } ELFWriter(raw_ostream &O, TargetMachine &TM); ~ELFWriter(); - typedef std::vector<unsigned char> DataBuffer; - protected: /// Output stream to send the resultant object file to. raw_ostream &O; @@ -59,6 +71,9 @@ namespace llvm { /// Target machine description. TargetMachine &TM; + /// Context object for machine code objects. + MCContext &OutContext; + /// Target Elf Writer description. const TargetELFWriterInfo *TEW; @@ -67,11 +82,15 @@ namespace llvm { /// MCE - The MachineCodeEmitter object that we are exposing to emit machine /// code for functions to the .o file. - ELFCodeEmitter *MCE; + ELFCodeEmitter *ElfCE; - /// TAI - Target Asm Info, provide information about section names for + /// TLOF - Target Lowering Object File, provide section names for globals + /// and other object file specific stuff + const TargetLoweringObjectFile &TLOF; + + /// MAI - Target Asm Info, provide information about section names for /// globals and other target specific stuff. - const TargetAsmInfo *TAI; + const MCAsmInfo *MAI; //===------------------------------------------------------------------===// // Properties inferred automatically from the target machine. @@ -95,59 +114,49 @@ namespace llvm { BinaryObject ElfHdr; /// SectionList - This is the list of sections that we have emitted to the - /// file. Once the file has been completely built, the section header table + /// file. Once the file has been completely built, the section header table /// is constructed from this info. - std::list<ELFSection> SectionList; + std::vector<ELFSection*> SectionList; unsigned NumSections; // Always = SectionList.size() /// SectionLookup - This is a mapping from section name to section number in - /// the SectionList. + /// the SectionList. Used to quickly gather the Section Index from MAI names std::map<std::string, ELFSection*> SectionLookup; + /// PendingGlobals - Globals not processed as symbols yet. + SetVector<const GlobalValue*> PendingGlobals; + /// GblSymLookup - This is a mapping from global value to a symbol index - /// in the symbol table. This is useful since relocations symbol references - /// must be quickly mapped to a symbol table index + /// in the symbol table or private symbols list. This is useful since reloc + /// symbol references must be quickly mapped to their indices on the lists. std::map<const GlobalValue*, uint32_t> GblSymLookup; - /// SymbolList - This is the list of symbols emitted to the symbol table - /// Local symbols go to the front and Globals to the back. - std::list<ELFSym> SymbolList; - - /// PendingGlobals - List of externally defined symbols that we have been - /// asked to emit, but have not seen a reference to. When a reference - /// is seen, the symbol will move from this list to the SymbolList. - SetVector<GlobalValue*> PendingGlobals; - - // Remove tab from section name prefix. This is necessary becase TAI - // sometimes return a section name prefixed with a "\t" char. This is - // a little bit dirty. FIXME: find a better approach, maybe add more - // methods to TAI to get the clean name? - void fixNameForSection(std::string &Name) { - size_t Pos = Name.find("\t"); - if (Pos != std::string::npos) - Name.erase(Pos, 1); - - Pos = Name.find(".section "); - if (Pos != std::string::npos) - Name.erase(Pos, 9); - - Pos = Name.find("\n"); - if (Pos != std::string::npos) - Name.erase(Pos, 1); - } + /// PendingExternals - Externals not processed as symbols yet. + SetVector<const char *> PendingExternals; + + /// ExtSymLookup - This is a mapping from externals to a symbol index + /// in the symbol table list. This is useful since reloc symbol references + /// must be quickly mapped to their symbol table indices. + std::map<const char *, uint32_t> ExtSymLookup; + + /// SymbolList - This is the list of symbols emitted to the symbol table. + /// When the SymbolList is finally built, local symbols must be placed in + /// the beginning while non-locals at the end. + std::vector<ELFSym*> SymbolList; + + /// PrivateSyms - Record private symbols, every symbol here must never be + /// present in the SymbolList. + std::vector<ELFSym*> PrivateSyms; /// getSection - Return the section with the specified name, creating a new /// section if one does not already exist. ELFSection &getSection(const std::string &Name, unsigned Type, unsigned Flags = 0, unsigned Align = 0) { - std::string SectionName(Name); - fixNameForSection(SectionName); - - ELFSection *&SN = SectionLookup[SectionName]; + ELFSection *&SN = SectionLookup[Name]; if (SN) return *SN; - SectionList.push_back(ELFSection(SectionName, isLittleEndian, is64Bit)); - SN = &SectionList.back(); + SectionList.push_back(new ELFSection(Name, isLittleEndian, is64Bit)); + SN = SectionList.back(); SN->SectionIdx = NumSections++; SN->Type = Type; SN->Flags = Flags; @@ -156,37 +165,6 @@ namespace llvm { return *SN; } - /// TODO: support mangled names here to emit the right .text section - /// for c++ object files. - ELFSection &getTextSection() { - return getSection(".text", ELFSection::SHT_PROGBITS, - ELFSection::SHF_EXECINSTR | ELFSection::SHF_ALLOC); - } - - /// Get jump table section on the section name returned by TAI - ELFSection &getJumpTableSection(std::string SName, unsigned Align) { - return getSection(SName, ELFSection::SHT_PROGBITS, - ELFSection::SHF_ALLOC, Align); - } - - /// Get a constant pool section based on the section name returned by TAI - ELFSection &getConstantPoolSection(std::string SName, unsigned Align) { - return getSection(SName, ELFSection::SHT_PROGBITS, - ELFSection::SHF_MERGE | ELFSection::SHF_ALLOC, Align); - } - - /// Return the relocation section of section 'S'. 'RelA' is true - /// if the relocation section contains entries with addends. - ELFSection &getRelocSection(std::string SName, bool RelA, unsigned Align) { - std::string RelSName(".rel"); - unsigned SHdrTy = RelA ? ELFSection::SHT_RELA : ELFSection::SHT_REL; - - if (RelA) RelSName.append("a"); - RelSName.append(SName); - - return getSection(RelSName, SHdrTy, 0, Align); - } - ELFSection &getNonExecStackSection() { return getSection(".note.GNU-stack", ELFSection::SHT_PROGBITS, 0, 1); } @@ -203,24 +181,38 @@ namespace llvm { return getSection(".shstrtab", ELFSection::SHT_STRTAB, 0, 1); } - ELFSection &getDataSection() { - return getSection(".data", ELFSection::SHT_PROGBITS, - ELFSection::SHF_WRITE | ELFSection::SHF_ALLOC, 4); - } - - ELFSection &getBSSSection() { - return getSection(".bss", ELFSection::SHT_NOBITS, - ELFSection::SHF_WRITE | ELFSection::SHF_ALLOC, 4); - } - ELFSection &getNullSection() { return getSection("", ELFSection::SHT_NULL, 0); } + ELFSection &getDataSection(); + ELFSection &getBSSSection(); + ELFSection &getCtorSection(); + ELFSection &getDtorSection(); + ELFSection &getJumpTableSection(); + ELFSection &getConstantPoolSection(MachineConstantPoolEntry &CPE); + ELFSection &getTextSection(Function *F); + ELFSection &getRelocSection(ELFSection &S); + // Helpers for obtaining ELF specific info. - unsigned getGlobalELFLinkage(const GlobalValue *GV); + unsigned getGlobalELFBinding(const GlobalValue *GV); + unsigned getGlobalELFType(const GlobalValue *GV); unsigned getGlobalELFVisibility(const GlobalValue *GV); - unsigned getElfSectionFlags(unsigned Flags); + + // AddPendingGlobalSymbol - Add a global to be processed and to + // the global symbol lookup, use a zero index because the table + // index will be determined later. + void AddPendingGlobalSymbol(const GlobalValue *GV, + bool AddToLookup = false); + + // AddPendingExternalSymbol - Add the external to be processed + // and to the external symbol lookup, use a zero index because + // the symbol table index will be determined later. + void AddPendingExternalSymbol(const char *External); + + // AddToSymbolList - Update the symbol lookup and If the symbol is + // private add it to PrivateSyms list, otherwise to SymbolList. + void AddToSymbolList(ELFSym *GblSym); // As we complete the ELF file, we need to update fields in the ELF header // (e.g. the location of the section table). These members keep track of @@ -231,20 +223,27 @@ namespace llvm { unsigned ELFHdr_e_shnum_Offset; // e_shnum in ELF header. private: - void EmitFunctionDeclaration(const Function *F); - void EmitGlobalVar(const GlobalVariable *GV); + void EmitGlobal(const GlobalValue *GV); void EmitGlobalConstant(const Constant *C, ELFSection &GblS); void EmitGlobalConstantStruct(const ConstantStruct *CVS, ELFSection &GblS); - ELFSection &getGlobalSymELFSection(const GlobalVariable *GV, ELFSym &Sym); + void EmitGlobalConstantLargeInt(const ConstantInt *CI, ELFSection &S); + void EmitGlobalDataRelocation(const GlobalValue *GV, unsigned Size, + ELFSection &GblS, int64_t Offset = 0); + bool EmitSpecialLLVMGlobal(const GlobalVariable *GV); + void EmitXXStructorList(Constant *List, ELFSection &Xtor); void EmitRelocations(); void EmitRelocation(BinaryObject &RelSec, ELFRelocation &Rel, bool HasRelA); void EmitSectionHeader(BinaryObject &SHdrTab, const ELFSection &SHdr); void EmitSectionTableStringTable(); void EmitSymbol(BinaryObject &SymbolTable, ELFSym &Sym); void EmitSymbolTable(); - void EmitStringTable(); + void EmitStringTable(const std::string &ModuleName); void OutputSectionsAndSectionTable(); + void RelocateField(BinaryObject &BO, uint32_t Offset, int64_t Value, + unsigned Size); + unsigned SortSymbols(); + CstExprResTy ResolveConstantExpr(const Constant *CV); }; } diff --git a/lib/CodeGen/ExactHazardRecognizer.cpp b/lib/CodeGen/ExactHazardRecognizer.cpp new file mode 100644 index 0000000..4f32c2b --- /dev/null +++ b/lib/CodeGen/ExactHazardRecognizer.cpp @@ -0,0 +1,160 @@ +//===----- ExactHazardRecognizer.cpp - hazard recognizer -------- ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements a a hazard recognizer using the instructions itineraries +// defined for the current target. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "exact-hazards" +#include "ExactHazardRecognizer.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrItineraries.h" + +using namespace llvm; + +ExactHazardRecognizer::ExactHazardRecognizer(const InstrItineraryData &LItinData) : + ScheduleHazardRecognizer(), ItinData(LItinData) +{ + // Determine the maximum depth of any itinerary. This determines the + // depth of the scoreboard. We always make the scoreboard at least 1 + // cycle deep to avoid dealing with the boundary condition. + ScoreboardDepth = 1; + if (!ItinData.isEmpty()) { + for (unsigned idx = 0; ; ++idx) { + if (ItinData.isEndMarker(idx)) + break; + + const InstrStage *IS = ItinData.beginStage(idx); + const InstrStage *E = ItinData.endStage(idx); + unsigned ItinDepth = 0; + for (; IS != E; ++IS) + ItinDepth += IS->getCycles(); + + ScoreboardDepth = std::max(ScoreboardDepth, ItinDepth); + } + } + + Scoreboard = new unsigned[ScoreboardDepth]; + ScoreboardHead = 0; + + DEBUG(errs() << "Using exact hazard recognizer: ScoreboardDepth = " + << ScoreboardDepth << '\n'); +} + +ExactHazardRecognizer::~ExactHazardRecognizer() { + delete [] Scoreboard; +} + +void ExactHazardRecognizer::Reset() { + memset(Scoreboard, 0, ScoreboardDepth * sizeof(unsigned)); + ScoreboardHead = 0; +} + +unsigned ExactHazardRecognizer::getFutureIndex(unsigned offset) { + return (ScoreboardHead + offset) % ScoreboardDepth; +} + +void ExactHazardRecognizer::dumpScoreboard() { + errs() << "Scoreboard:\n"; + + unsigned last = ScoreboardDepth - 1; + while ((last > 0) && (Scoreboard[getFutureIndex(last)] == 0)) + last--; + + for (unsigned i = 0; i <= last; i++) { + unsigned FUs = Scoreboard[getFutureIndex(i)]; + errs() << "\t"; + for (int j = 31; j >= 0; j--) + errs() << ((FUs & (1 << j)) ? '1' : '0'); + errs() << '\n'; + } +} + +ExactHazardRecognizer::HazardType ExactHazardRecognizer::getHazardType(SUnit *SU) { + if (ItinData.isEmpty()) + return NoHazard; + + unsigned cycle = 0; + + // Use the itinerary for the underlying instruction to check for + // free FU's in the scoreboard at the appropriate future cycles. + unsigned idx = SU->getInstr()->getDesc().getSchedClass(); + for (const InstrStage *IS = ItinData.beginStage(idx), + *E = ItinData.endStage(idx); IS != E; ++IS) { + // We must find one of the stage's units free for every cycle the + // stage is occupied. FIXME it would be more accurate to find the + // same unit free in all the cycles. + for (unsigned int i = 0; i < IS->getCycles(); ++i) { + assert(((cycle + i) < ScoreboardDepth) && + "Scoreboard depth exceeded!"); + + unsigned index = getFutureIndex(cycle + i); + unsigned freeUnits = IS->getUnits() & ~Scoreboard[index]; + if (!freeUnits) { + DEBUG(errs() << "*** Hazard in cycle " << (cycle + i) << ", "); + DEBUG(errs() << "SU(" << SU->NodeNum << "): "); + DEBUG(SU->getInstr()->dump()); + return Hazard; + } + } + + // Advance the cycle to the next stage. + cycle += IS->getNextCycles(); + } + + return NoHazard; +} + +void ExactHazardRecognizer::EmitInstruction(SUnit *SU) { + if (ItinData.isEmpty()) + return; + + unsigned cycle = 0; + + // Use the itinerary for the underlying instruction to reserve FU's + // in the scoreboard at the appropriate future cycles. + unsigned idx = SU->getInstr()->getDesc().getSchedClass(); + for (const InstrStage *IS = ItinData.beginStage(idx), + *E = ItinData.endStage(idx); IS != E; ++IS) { + // We must reserve one of the stage's units for every cycle the + // stage is occupied. FIXME it would be more accurate to reserve + // the same unit free in all the cycles. + for (unsigned int i = 0; i < IS->getCycles(); ++i) { + assert(((cycle + i) < ScoreboardDepth) && + "Scoreboard depth exceeded!"); + + unsigned index = getFutureIndex(cycle + i); + unsigned freeUnits = IS->getUnits() & ~Scoreboard[index]; + + // reduce to a single unit + unsigned freeUnit = 0; + do { + freeUnit = freeUnits; + freeUnits = freeUnit & (freeUnit - 1); + } while (freeUnits); + + assert(freeUnit && "No function unit available!"); + Scoreboard[index] |= freeUnit; + } + + // Advance the cycle to the next stage. + cycle += IS->getNextCycles(); + } + + DEBUG(dumpScoreboard()); +} + +void ExactHazardRecognizer::AdvanceCycle() { + Scoreboard[ScoreboardHead] = 0; + ScoreboardHead = getFutureIndex(1); +} diff --git a/lib/CodeGen/ExactHazardRecognizer.h b/lib/CodeGen/ExactHazardRecognizer.h new file mode 100644 index 0000000..71ac979 --- /dev/null +++ b/lib/CodeGen/ExactHazardRecognizer.h @@ -0,0 +1,61 @@ +//=- llvm/CodeGen/ExactHazardRecognizer.h - Scheduling Support -*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the ExactHazardRecognizer class, which +// implements hazard-avoidance heuristics for scheduling, based on the +// scheduling itineraries specified for the target. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_EXACTHAZARDRECOGNIZER_H +#define LLVM_CODEGEN_EXACTHAZARDRECOGNIZER_H + +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/Target/TargetInstrItineraries.h" + +namespace llvm { + class ExactHazardRecognizer : public ScheduleHazardRecognizer { + // Itinerary data for the target. + const InstrItineraryData &ItinData; + + // Scoreboard to track function unit usage. Scoreboard[0] is a + // mask of the FUs in use in the cycle currently being + // schedule. Scoreboard[1] is a mask for the next cycle. The + // Scoreboard is used as a circular buffer with the current cycle + // indicated by ScoreboardHead. + unsigned *Scoreboard; + + // The maximum number of cycles monitored by the Scoreboard. This + // value is determined based on the target itineraries to ensure + // that all hazards can be tracked. + unsigned ScoreboardDepth; + + // Indices into the Scoreboard that represent the current cycle. + unsigned ScoreboardHead; + + // Return the scoreboard index to use for 'offset' cycles in the + // future. 'offset' of 0 returns ScoreboardHead. + unsigned getFutureIndex(unsigned offset); + + // Print the scoreboard. + void dumpScoreboard(); + + public: + ExactHazardRecognizer(const InstrItineraryData &ItinData); + ~ExactHazardRecognizer(); + + virtual HazardType getHazardType(SUnit *SU); + virtual void Reset(); + virtual void EmitInstruction(SUnit *SU); + virtual void AdvanceCycle(); + }; +} + +#endif diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp index cf2ebb3..a57296c 100644 --- a/lib/CodeGen/GCMetadata.cpp +++ b/lib/CodeGen/GCMetadata.cpp @@ -18,17 +18,20 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/Function.h" #include "llvm/Support/Compiler.h" - +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; namespace { class VISIBILITY_HIDDEN Printer : public FunctionPass { static char ID; - std::ostream &OS; + raw_ostream &OS; public: - explicit Printer(std::ostream &OS = *cerr); + Printer() : FunctionPass(&ID), OS(errs()) {} + explicit Printer(raw_ostream &OS) : FunctionPass(&ID), OS(OS) {} + const char *getPassName() const; void getAnalysisUsage(AnalysisUsage &AU) const; @@ -74,27 +77,24 @@ GCModuleInfo::~GCModuleInfo() { GCStrategy *GCModuleInfo::getOrCreateStrategy(const Module *M, const std::string &Name) { - const char *Start = Name.c_str(); - - strategy_map_type::iterator NMI = - StrategyMap.find(Start, Start + Name.size()); + strategy_map_type::iterator NMI = StrategyMap.find(Name); if (NMI != StrategyMap.end()) return NMI->getValue(); for (GCRegistry::iterator I = GCRegistry::begin(), E = GCRegistry::end(); I != E; ++I) { - if (strcmp(Start, I->getName()) == 0) { + if (Name == I->getName()) { GCStrategy *S = I->instantiate(); S->M = M; S->Name = Name; - StrategyMap.GetOrCreateValue(Start, Start + Name.size()).setValue(S); + StrategyMap.GetOrCreateValue(Name).setValue(S); StrategyList.push_back(S); return S; } } - - cerr << "unsupported GC: " << Name << "\n"; - abort(); + + errs() << "unsupported GC: " << Name << "\n"; + llvm_unreachable(0); } GCFunctionInfo &GCModuleInfo::getFunctionInfo(const Function &F) { @@ -124,12 +124,10 @@ void GCModuleInfo::clear() { char Printer::ID = 0; -FunctionPass *llvm::createGCInfoPrinter(std::ostream &OS) { +FunctionPass *llvm::createGCInfoPrinter(raw_ostream &OS) { return new Printer(OS); } -Printer::Printer(std::ostream &OS) - : FunctionPass(&ID), OS(OS) {} const char *Printer::getPassName() const { return "Print Garbage Collector Information"; @@ -143,7 +141,7 @@ void Printer::getAnalysisUsage(AnalysisUsage &AU) const { static const char *DescKind(GC::PointKind Kind) { switch (Kind) { - default: assert(0 && "Unknown GC point kind"); + default: llvm_unreachable("Unknown GC point kind"); case GC::Loop: return "loop"; case GC::Return: return "return"; case GC::PreCall: return "pre-call"; @@ -155,12 +153,12 @@ bool Printer::runOnFunction(Function &F) { if (!F.hasGC()) { GCFunctionInfo *FD = &getAnalysis<GCModuleInfo>().getFunctionInfo(F); - OS << "GC roots for " << FD->getFunction().getNameStart() << ":\n"; + OS << "GC roots for " << FD->getFunction().getNameStr() << ":\n"; for (GCFunctionInfo::roots_iterator RI = FD->roots_begin(), RE = FD->roots_end(); RI != RE; ++RI) OS << "\t" << RI->Num << "\t" << RI->StackOffset << "[sp]\n"; - OS << "GC safe points for " << FD->getFunction().getNameStart() << ":\n"; + OS << "GC safe points for " << FD->getFunction().getNameStr() << ":\n"; for (GCFunctionInfo::iterator PI = FD->begin(), PE = FD->end(); PI != PE; ++PI) { diff --git a/lib/CodeGen/GCMetadataPrinter.cpp b/lib/CodeGen/GCMetadataPrinter.cpp index 5a5ef84..9cd2925 100644 --- a/lib/CodeGen/GCMetadataPrinter.cpp +++ b/lib/CodeGen/GCMetadataPrinter.cpp @@ -20,11 +20,11 @@ GCMetadataPrinter::GCMetadataPrinter() { } GCMetadataPrinter::~GCMetadataPrinter() { } void GCMetadataPrinter::beginAssembly(raw_ostream &OS, AsmPrinter &AP, - const TargetAsmInfo &TAI) { + const MCAsmInfo &MAI) { // Default is no action. } void GCMetadataPrinter::finishAssembly(raw_ostream &OS, AsmPrinter &AP, - const TargetAsmInfo &TAI) { + const MCAsmInfo &MAI) { // Default is no action. } diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp index ad7421a..6d0de41 100644 --- a/lib/CodeGen/GCStrategy.cpp +++ b/lib/CodeGen/GCStrategy.cpp @@ -28,6 +28,8 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -70,7 +72,8 @@ namespace { void FindSafePoints(MachineFunction &MF); void VisitCallPoint(MachineBasicBlock::iterator MI); unsigned InsertLabel(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI) const; + MachineBasicBlock::iterator MI, + DebugLoc DL) const; void FindStackOffsets(MachineFunction &MF); @@ -107,8 +110,8 @@ GCStrategy::~GCStrategy() { bool GCStrategy::initializeCustomLowering(Module &M) { return false; } bool GCStrategy::performCustomLowering(Function &F) { - cerr << "gc " << getName() << " must override performCustomLowering.\n"; - abort(); + errs() << "gc " << getName() << " must override performCustomLowering.\n"; + llvm_unreachable(0); return 0; } @@ -327,11 +330,13 @@ void MachineCodeAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { } unsigned MachineCodeAnalysis::InsertLabel(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI) const { + MachineBasicBlock::iterator MI, + DebugLoc DL) const { unsigned Label = MMI->NextLabelID(); - // N.B. we assume that MI is *not* equal to the "end()" iterator. - BuildMI(MBB, MI, MI->getDebugLoc(), + + BuildMI(MBB, MI, DL, TII->get(TargetInstrInfo::GC_LABEL)).addImm(Label); + return Label; } @@ -342,10 +347,12 @@ void MachineCodeAnalysis::VisitCallPoint(MachineBasicBlock::iterator CI) { ++RAI; if (FI->getStrategy().needsSafePoint(GC::PreCall)) - FI->addSafePoint(GC::PreCall, InsertLabel(*CI->getParent(), CI)); + FI->addSafePoint(GC::PreCall, InsertLabel(*CI->getParent(), CI, + CI->getDebugLoc())); if (FI->getStrategy().needsSafePoint(GC::PostCall)) - FI->addSafePoint(GC::PostCall, InsertLabel(*CI->getParent(), RAI)); + FI->addSafePoint(GC::PostCall, InsertLabel(*CI->getParent(), RAI, + CI->getDebugLoc())); } void MachineCodeAnalysis::FindSafePoints(MachineFunction &MF) { diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index d5e7ea5..7b613ff 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "ifcvt" +#include "BranchFolding.h" #include "llvm/Function.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineModuleInfo.h" @@ -21,6 +22,8 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" @@ -226,14 +229,14 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { TII = MF.getTarget().getInstrInfo(); if (!TII) return false; - DOUT << "\nIfcvt: function (" << ++FnNum << ") \'" - << MF.getFunction()->getName() << "\'"; + DEBUG(errs() << "\nIfcvt: function (" << ++FnNum << ") \'" + << MF.getFunction()->getName() << "\'"); if (FnNum < IfCvtFnStart || (IfCvtFnStop != -1 && FnNum > IfCvtFnStop)) { - DOUT << " skipped\n"; + DEBUG(errs() << " skipped\n"); return false; } - DOUT << "\n"; + DEBUG(errs() << "\n"); MF.RenumberBlocks(); BBAnalysis.resize(MF.getNumBlockIDs()); @@ -278,13 +281,13 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { case ICSimpleFalse: { bool isFalse = Kind == ICSimpleFalse; if ((isFalse && DisableSimpleF) || (!isFalse && DisableSimple)) break; - DOUT << "Ifcvt (Simple" << (Kind == ICSimpleFalse ? " false" :"") - << "): BB#" << BBI.BB->getNumber() << " (" - << ((Kind == ICSimpleFalse) - ? BBI.FalseBB->getNumber() - : BBI.TrueBB->getNumber()) << ") "; + DEBUG(errs() << "Ifcvt (Simple" << (Kind == ICSimpleFalse ? " false" :"") + << "): BB#" << BBI.BB->getNumber() << " (" + << ((Kind == ICSimpleFalse) + ? BBI.FalseBB->getNumber() + : BBI.TrueBB->getNumber()) << ") "); RetVal = IfConvertSimple(BBI, Kind); - DOUT << (RetVal ? "succeeded!" : "failed!") << "\n"; + DEBUG(errs() << (RetVal ? "succeeded!" : "failed!") << "\n"); if (RetVal) { if (isFalse) NumSimpleFalse++; else NumSimple++; @@ -301,16 +304,16 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { if (DisableTriangleR && !isFalse && isRev) break; if (DisableTriangleF && isFalse && !isRev) break; if (DisableTriangleFR && isFalse && isRev) break; - DOUT << "Ifcvt (Triangle"; + DEBUG(errs() << "Ifcvt (Triangle"); if (isFalse) - DOUT << " false"; + DEBUG(errs() << " false"); if (isRev) - DOUT << " rev"; - DOUT << "): BB#" << BBI.BB->getNumber() << " (T:" - << BBI.TrueBB->getNumber() << ",F:" - << BBI.FalseBB->getNumber() << ") "; + DEBUG(errs() << " rev"); + DEBUG(errs() << "): BB#" << BBI.BB->getNumber() << " (T:" + << BBI.TrueBB->getNumber() << ",F:" + << BBI.FalseBB->getNumber() << ") "); RetVal = IfConvertTriangle(BBI, Kind); - DOUT << (RetVal ? "succeeded!" : "failed!") << "\n"; + DEBUG(errs() << (RetVal ? "succeeded!" : "failed!") << "\n"); if (RetVal) { if (isFalse) { if (isRev) NumTriangleFRev++; @@ -324,11 +327,11 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { } case ICDiamond: { if (DisableDiamond) break; - DOUT << "Ifcvt (Diamond): BB#" << BBI.BB->getNumber() << " (T:" - << BBI.TrueBB->getNumber() << ",F:" - << BBI.FalseBB->getNumber() << ") "; + DEBUG(errs() << "Ifcvt (Diamond): BB#" << BBI.BB->getNumber() << " (T:" + << BBI.TrueBB->getNumber() << ",F:" + << BBI.FalseBB->getNumber() << ") "); RetVal = IfConvertDiamond(BBI, Kind, NumDups, NumDups2); - DOUT << (RetVal ? "succeeded!" : "failed!") << "\n"; + DEBUG(errs() << (RetVal ? "succeeded!" : "failed!") << "\n"); if (RetVal) NumDiamonds++; break; } @@ -358,6 +361,13 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { Roots.clear(); BBAnalysis.clear(); + if (MadeChange) { + BranchFolder BF(false); + BF.OptimizeFunction(MF, TII, + MF.getTarget().getRegisterInfo(), + getAnalysisIfAvailable<MachineModuleInfo>()); + } + return MadeChange; } @@ -1130,8 +1140,10 @@ void IfConverter::PredicateBlock(BBInfo &BBI, if (TII->isPredicated(I)) continue; if (!TII->PredicateInstruction(I, Cond)) { - cerr << "Unable to predicate " << *I << "!\n"; - abort(); +#ifndef NDEBUG + errs() << "Unable to predicate " << *I << "!\n"; +#endif + llvm_unreachable(0); } } @@ -1164,8 +1176,10 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, if (!isPredicated) if (!TII->PredicateInstruction(MI, Cond)) { - cerr << "Unable to predicate " << *MI << "!\n"; - abort(); +#ifndef NDEBUG + errs() << "Unable to predicate " << *I << "!\n"; +#endif + llvm_unreachable(0); } } diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp index 052334a..3e3b28a 100644 --- a/lib/CodeGen/IntrinsicLowering.cpp +++ b/lib/CodeGen/IntrinsicLowering.cpp @@ -16,7 +16,9 @@ #include "llvm/Module.h" #include "llvm/Type.h" #include "llvm/CodeGen/IntrinsicLowering.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/IRBuilder.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetData.h" #include "llvm/ADT/SmallVector.h" using namespace llvm; @@ -39,11 +41,11 @@ static void EnsureFPIntrinsicsExist(Module &M, Function *Fn, switch((int)Fn->arg_begin()->getType()->getTypeID()) { case Type::FloatTyID: EnsureFunctionExists(M, FName, Fn->arg_begin(), Fn->arg_end(), - Type::FloatTy); + Type::getFloatTy(M.getContext())); break; case Type::DoubleTyID: EnsureFunctionExists(M, DName, Fn->arg_begin(), Fn->arg_end(), - Type::DoubleTy); + Type::getDoubleTy(M.getContext())); break; case Type::X86_FP80TyID: case Type::FP128TyID: @@ -82,39 +84,43 @@ static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI, } void IntrinsicLowering::AddPrototypes(Module &M) { + LLVMContext &Context = M.getContext(); for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) if (I->isDeclaration() && !I->use_empty()) switch (I->getIntrinsicID()) { default: break; case Intrinsic::setjmp: EnsureFunctionExists(M, "setjmp", I->arg_begin(), I->arg_end(), - Type::Int32Ty); + Type::getInt32Ty(M.getContext())); break; case Intrinsic::longjmp: EnsureFunctionExists(M, "longjmp", I->arg_begin(), I->arg_end(), - Type::VoidTy); + Type::getVoidTy(M.getContext())); break; case Intrinsic::siglongjmp: EnsureFunctionExists(M, "abort", I->arg_end(), I->arg_end(), - Type::VoidTy); + Type::getVoidTy(M.getContext())); break; case Intrinsic::memcpy: - M.getOrInsertFunction("memcpy", PointerType::getUnqual(Type::Int8Ty), - PointerType::getUnqual(Type::Int8Ty), - PointerType::getUnqual(Type::Int8Ty), - TD.getIntPtrType(), (Type *)0); + M.getOrInsertFunction("memcpy", + Type::getInt8PtrTy(Context), + Type::getInt8PtrTy(Context), + Type::getInt8PtrTy(Context), + TD.getIntPtrType(Context), (Type *)0); break; case Intrinsic::memmove: - M.getOrInsertFunction("memmove", PointerType::getUnqual(Type::Int8Ty), - PointerType::getUnqual(Type::Int8Ty), - PointerType::getUnqual(Type::Int8Ty), - TD.getIntPtrType(), (Type *)0); + M.getOrInsertFunction("memmove", + Type::getInt8PtrTy(Context), + Type::getInt8PtrTy(Context), + Type::getInt8PtrTy(Context), + TD.getIntPtrType(Context), (Type *)0); break; case Intrinsic::memset: - M.getOrInsertFunction("memset", PointerType::getUnqual(Type::Int8Ty), - PointerType::getUnqual(Type::Int8Ty), - Type::Int32Ty, - TD.getIntPtrType(), (Type *)0); + M.getOrInsertFunction("memset", + Type::getInt8PtrTy(Context), + Type::getInt8PtrTy(Context), + Type::getInt32Ty(M.getContext()), + TD.getIntPtrType(Context), (Type *)0); break; case Intrinsic::sqrt: EnsureFPIntrinsicsExist(M, I, "sqrtf", "sqrt", "sqrtl"); @@ -148,7 +154,7 @@ void IntrinsicLowering::AddPrototypes(Module &M) { /// LowerBSWAP - Emit the code to lower bswap of V before the specified /// instruction IP. -static Value *LowerBSWAP(Value *V, Instruction *IP) { +static Value *LowerBSWAP(LLVMContext &Context, Value *V, Instruction *IP) { assert(V->getType()->isInteger() && "Can't bswap a non-integer type!"); unsigned BitSize = V->getType()->getPrimitiveSizeInBits(); @@ -156,7 +162,7 @@ static Value *LowerBSWAP(Value *V, Instruction *IP) { IRBuilder<> Builder(IP->getParent(), IP); switch(BitSize) { - default: assert(0 && "Unhandled type size of value to byteswap!"); + default: llvm_unreachable("Unhandled type size of value to byteswap!"); case 16: { Value *Tmp1 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 8), "bswap.2"); @@ -172,11 +178,13 @@ static Value *LowerBSWAP(Value *V, Instruction *IP) { "bswap.3"); Value *Tmp2 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 8), "bswap.2"); - Value *Tmp1 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 24), + Value *Tmp1 = Builder.CreateLShr(V,ConstantInt::get(V->getType(), 24), "bswap.1"); - Tmp3 = Builder.CreateAnd(Tmp3, ConstantInt::get(Type::Int32Ty, 0xFF0000), + Tmp3 = Builder.CreateAnd(Tmp3, + ConstantInt::get(Type::getInt32Ty(Context), 0xFF0000), "bswap.and3"); - Tmp2 = Builder.CreateAnd(Tmp2, ConstantInt::get(Type::Int32Ty, 0xFF00), + Tmp2 = Builder.CreateAnd(Tmp2, + ConstantInt::get(Type::getInt32Ty(Context), 0xFF00), "bswap.and2"); Tmp4 = Builder.CreateOr(Tmp4, Tmp3, "bswap.or1"); Tmp2 = Builder.CreateOr(Tmp2, Tmp1, "bswap.or2"); @@ -194,31 +202,38 @@ static Value *LowerBSWAP(Value *V, Instruction *IP) { "bswap.5"); Value* Tmp4 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 8), "bswap.4"); - Value* Tmp3 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 24), + Value* Tmp3 = Builder.CreateLShr(V, + ConstantInt::get(V->getType(), 24), "bswap.3"); - Value* Tmp2 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 40), + Value* Tmp2 = Builder.CreateLShr(V, + ConstantInt::get(V->getType(), 40), "bswap.2"); - Value* Tmp1 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 56), + Value* Tmp1 = Builder.CreateLShr(V, + ConstantInt::get(V->getType(), 56), "bswap.1"); Tmp7 = Builder.CreateAnd(Tmp7, - ConstantInt::get(Type::Int64Ty, + ConstantInt::get(Type::getInt64Ty(Context), 0xFF000000000000ULL), "bswap.and7"); Tmp6 = Builder.CreateAnd(Tmp6, - ConstantInt::get(Type::Int64Ty, + ConstantInt::get(Type::getInt64Ty(Context), 0xFF0000000000ULL), "bswap.and6"); Tmp5 = Builder.CreateAnd(Tmp5, - ConstantInt::get(Type::Int64Ty, 0xFF00000000ULL), + ConstantInt::get(Type::getInt64Ty(Context), + 0xFF00000000ULL), "bswap.and5"); Tmp4 = Builder.CreateAnd(Tmp4, - ConstantInt::get(Type::Int64Ty, 0xFF000000ULL), + ConstantInt::get(Type::getInt64Ty(Context), + 0xFF000000ULL), "bswap.and4"); Tmp3 = Builder.CreateAnd(Tmp3, - ConstantInt::get(Type::Int64Ty, 0xFF0000ULL), + ConstantInt::get(Type::getInt64Ty(Context), + 0xFF0000ULL), "bswap.and3"); Tmp2 = Builder.CreateAnd(Tmp2, - ConstantInt::get(Type::Int64Ty, 0xFF00ULL), + ConstantInt::get(Type::getInt64Ty(Context), + 0xFF00ULL), "bswap.and2"); Tmp8 = Builder.CreateOr(Tmp8, Tmp7, "bswap.or1"); Tmp6 = Builder.CreateOr(Tmp6, Tmp5, "bswap.or2"); @@ -235,7 +250,7 @@ static Value *LowerBSWAP(Value *V, Instruction *IP) { /// LowerCTPOP - Emit the code to lower ctpop of V before the specified /// instruction IP. -static Value *LowerCTPOP(Value *V, Instruction *IP) { +static Value *LowerCTPOP(LLVMContext &Context, Value *V, Instruction *IP) { assert(V->getType()->isInteger() && "Can't ctpop a non-integer type!"); static const uint64_t MaskValues[6] = { @@ -257,7 +272,7 @@ static Value *LowerCTPOP(Value *V, Instruction *IP) { Value *MaskCst = ConstantInt::get(V->getType(), MaskValues[ct]); Value *LHS = Builder.CreateAnd(PartValue, MaskCst, "cppop.and1"); Value *VShift = Builder.CreateLShr(PartValue, - ConstantInt::get(V->getType(), i), + ConstantInt::get(V->getType(), i), "ctpop.sh"); Value *RHS = Builder.CreateAnd(VShift, MaskCst, "cppop.and2"); PartValue = Builder.CreateAdd(LHS, RHS, "ctpop.step"); @@ -275,7 +290,7 @@ static Value *LowerCTPOP(Value *V, Instruction *IP) { /// LowerCTLZ - Emit the code to lower ctlz of V before the specified /// instruction IP. -static Value *LowerCTLZ(Value *V, Instruction *IP) { +static Value *LowerCTLZ(LLVMContext &Context, Value *V, Instruction *IP) { IRBuilder<> Builder(IP->getParent(), IP); @@ -287,353 +302,21 @@ static Value *LowerCTLZ(Value *V, Instruction *IP) { } V = Builder.CreateNot(V); - return LowerCTPOP(V, IP); -} - -/// Convert the llvm.part.select.iX.iY intrinsic. This intrinsic takes -/// three integer arguments. The first argument is the Value from which the -/// bits will be selected. It may be of any bit width. The second and third -/// arguments specify a range of bits to select with the second argument -/// specifying the low bit and the third argument specifying the high bit. Both -/// must be type i32. The result is the corresponding selected bits from the -/// Value in the same width as the Value (first argument). If the low bit index -/// is higher than the high bit index then the inverse selection is done and -/// the bits are returned in inverse order. -/// @brief Lowering of llvm.part.select intrinsic. -static Instruction *LowerPartSelect(CallInst *CI) { - IRBuilder<> Builder; - - // Make sure we're dealing with a part select intrinsic here - Function *F = CI->getCalledFunction(); - const FunctionType *FT = F->getFunctionType(); - if (!F->isDeclaration() || !FT->getReturnType()->isInteger() || - FT->getNumParams() != 3 || !FT->getParamType(0)->isInteger() || - !FT->getParamType(1)->isInteger() || !FT->getParamType(2)->isInteger()) - return CI; - - // Get the intrinsic implementation function by converting all the . to _ - // in the intrinsic's function name and then reconstructing the function - // declaration. - std::string Name(F->getName()); - for (unsigned i = 4; i < Name.length(); ++i) - if (Name[i] == '.') - Name[i] = '_'; - Module* M = F->getParent(); - F = cast<Function>(M->getOrInsertFunction(Name, FT)); - F->setLinkage(GlobalValue::WeakAnyLinkage); - - // If we haven't defined the impl function yet, do so now - if (F->isDeclaration()) { - - // Get the arguments to the function - Function::arg_iterator args = F->arg_begin(); - Value* Val = args++; Val->setName("Val"); - Value* Lo = args++; Lo->setName("Lo"); - Value* Hi = args++; Hi->setName("High"); - - // We want to select a range of bits here such that [Hi, Lo] is shifted - // down to the low bits. However, it is quite possible that Hi is smaller - // than Lo in which case the bits have to be reversed. - - // Create the blocks we will need for the two cases (forward, reverse) - BasicBlock* CurBB = BasicBlock::Create("entry", F); - BasicBlock *RevSize = BasicBlock::Create("revsize", CurBB->getParent()); - BasicBlock *FwdSize = BasicBlock::Create("fwdsize", CurBB->getParent()); - BasicBlock *Compute = BasicBlock::Create("compute", CurBB->getParent()); - BasicBlock *Reverse = BasicBlock::Create("reverse", CurBB->getParent()); - BasicBlock *RsltBlk = BasicBlock::Create("result", CurBB->getParent()); - - Builder.SetInsertPoint(CurBB); - - // Cast Hi and Lo to the size of Val so the widths are all the same - if (Hi->getType() != Val->getType()) - Hi = Builder.CreateIntCast(Hi, Val->getType(), /* isSigned */ false, - "tmp"); - if (Lo->getType() != Val->getType()) - Lo = Builder.CreateIntCast(Lo, Val->getType(), /* isSigned */ false, - "tmp"); - - // Compute a few things that both cases will need, up front. - Constant* Zero = ConstantInt::get(Val->getType(), 0); - Constant* One = ConstantInt::get(Val->getType(), 1); - Constant* AllOnes = ConstantInt::getAllOnesValue(Val->getType()); - - // Compare the Hi and Lo bit positions. This is used to determine - // which case we have (forward or reverse) - Value *Cmp = Builder.CreateICmpULT(Hi, Lo, "less"); - Builder.CreateCondBr(Cmp, RevSize, FwdSize); - - // First, compute the number of bits in the forward case. - Builder.SetInsertPoint(FwdSize); - Value* FBitSize = Builder.CreateSub(Hi, Lo, "fbits"); - Builder.CreateBr(Compute); - - // Second, compute the number of bits in the reverse case. - Builder.SetInsertPoint(RevSize); - Value* RBitSize = Builder.CreateSub(Lo, Hi, "rbits"); - Builder.CreateBr(Compute); - - // Now, compute the bit range. Start by getting the bitsize and the shift - // amount (either Hi or Lo) from PHI nodes. Then we compute a mask for - // the number of bits we want in the range. We shift the bits down to the - // least significant bits, apply the mask to zero out unwanted high bits, - // and we have computed the "forward" result. It may still need to be - // reversed. - Builder.SetInsertPoint(Compute); - - // Get the BitSize from one of the two subtractions - PHINode *BitSize = Builder.CreatePHI(Val->getType(), "bits"); - BitSize->reserveOperandSpace(2); - BitSize->addIncoming(FBitSize, FwdSize); - BitSize->addIncoming(RBitSize, RevSize); - - // Get the ShiftAmount as the smaller of Hi/Lo - PHINode *ShiftAmt = Builder.CreatePHI(Val->getType(), "shiftamt"); - ShiftAmt->reserveOperandSpace(2); - ShiftAmt->addIncoming(Lo, FwdSize); - ShiftAmt->addIncoming(Hi, RevSize); - - // Increment the bit size - Value *BitSizePlusOne = Builder.CreateAdd(BitSize, One, "bits"); - - // Create a Mask to zero out the high order bits. - Value* Mask = Builder.CreateShl(AllOnes, BitSizePlusOne, "mask"); - Mask = Builder.CreateNot(Mask, "mask"); - - // Shift the bits down and apply the mask - Value* FRes = Builder.CreateLShr(Val, ShiftAmt, "fres"); - FRes = Builder.CreateAnd(FRes, Mask, "fres"); - Builder.CreateCondBr(Cmp, Reverse, RsltBlk); - - // In the Reverse block we have the mask already in FRes but we must reverse - // it by shifting FRes bits right and putting them in RRes by shifting them - // in from left. - Builder.SetInsertPoint(Reverse); - - // First set up our loop counters - PHINode *Count = Builder.CreatePHI(Val->getType(), "count"); - Count->reserveOperandSpace(2); - Count->addIncoming(BitSizePlusOne, Compute); - - // Next, get the value that we are shifting. - PHINode *BitsToShift = Builder.CreatePHI(Val->getType(), "val"); - BitsToShift->reserveOperandSpace(2); - BitsToShift->addIncoming(FRes, Compute); - - // Finally, get the result of the last computation - PHINode *RRes = Builder.CreatePHI(Val->getType(), "rres"); - RRes->reserveOperandSpace(2); - RRes->addIncoming(Zero, Compute); - - // Decrement the counter - Value *Decr = Builder.CreateSub(Count, One, "decr"); - Count->addIncoming(Decr, Reverse); - - // Compute the Bit that we want to move - Value *Bit = Builder.CreateAnd(BitsToShift, One, "bit"); - - // Compute the new value for next iteration. - Value *NewVal = Builder.CreateLShr(BitsToShift, One, "rshift"); - BitsToShift->addIncoming(NewVal, Reverse); - - // Shift the bit into the low bits of the result. - Value *NewRes = Builder.CreateShl(RRes, One, "lshift"); - NewRes = Builder.CreateOr(NewRes, Bit, "addbit"); - RRes->addIncoming(NewRes, Reverse); - - // Terminate loop if we've moved all the bits. - Value *Cond = Builder.CreateICmpEQ(Decr, Zero, "cond"); - Builder.CreateCondBr(Cond, RsltBlk, Reverse); - - // Finally, in the result block, select one of the two results with a PHI - // node and return the result; - Builder.SetInsertPoint(RsltBlk); - PHINode *BitSelect = Builder.CreatePHI(Val->getType(), "part_select"); - BitSelect->reserveOperandSpace(2); - BitSelect->addIncoming(FRes, Compute); - BitSelect->addIncoming(NewRes, Reverse); - Builder.CreateRet(BitSelect); - } - - // Return a call to the implementation function - Builder.SetInsertPoint(CI->getParent(), CI); - CallInst *NewCI = Builder.CreateCall3(F, CI->getOperand(1), - CI->getOperand(2), CI->getOperand(3)); - NewCI->setName(CI->getName()); - return NewCI; -} - -/// Convert the llvm.part.set.iX.iY.iZ intrinsic. This intrinsic takes -/// four integer arguments (iAny %Value, iAny %Replacement, i32 %Low, i32 %High) -/// The first two arguments can be any bit width. The result is the same width -/// as %Value. The operation replaces bits between %Low and %High with the value -/// in %Replacement. If %Replacement is not the same width, it is truncated or -/// zero extended as appropriate to fit the bits being replaced. If %Low is -/// greater than %High then the inverse set of bits are replaced. -/// @brief Lowering of llvm.bit.part.set intrinsic. -static Instruction *LowerPartSet(CallInst *CI) { - IRBuilder<> Builder; - - // Make sure we're dealing with a part select intrinsic here - Function *F = CI->getCalledFunction(); - const FunctionType *FT = F->getFunctionType(); - if (!F->isDeclaration() || !FT->getReturnType()->isInteger() || - FT->getNumParams() != 4 || !FT->getParamType(0)->isInteger() || - !FT->getParamType(1)->isInteger() || !FT->getParamType(2)->isInteger() || - !FT->getParamType(3)->isInteger()) - return CI; - - // Get the intrinsic implementation function by converting all the . to _ - // in the intrinsic's function name and then reconstructing the function - // declaration. - std::string Name(F->getName()); - for (unsigned i = 4; i < Name.length(); ++i) - if (Name[i] == '.') - Name[i] = '_'; - Module* M = F->getParent(); - F = cast<Function>(M->getOrInsertFunction(Name, FT)); - F->setLinkage(GlobalValue::WeakAnyLinkage); - - // If we haven't defined the impl function yet, do so now - if (F->isDeclaration()) { - // Get the arguments for the function. - Function::arg_iterator args = F->arg_begin(); - Value* Val = args++; Val->setName("Val"); - Value* Rep = args++; Rep->setName("Rep"); - Value* Lo = args++; Lo->setName("Lo"); - Value* Hi = args++; Hi->setName("Hi"); - - // Get some types we need - const IntegerType* ValTy = cast<IntegerType>(Val->getType()); - const IntegerType* RepTy = cast<IntegerType>(Rep->getType()); - uint32_t RepBits = RepTy->getBitWidth(); - - // Constant Definitions - ConstantInt* RepBitWidth = ConstantInt::get(Type::Int32Ty, RepBits); - ConstantInt* RepMask = ConstantInt::getAllOnesValue(RepTy); - ConstantInt* ValMask = ConstantInt::getAllOnesValue(ValTy); - ConstantInt* One = ConstantInt::get(Type::Int32Ty, 1); - ConstantInt* ValOne = ConstantInt::get(ValTy, 1); - ConstantInt* Zero = ConstantInt::get(Type::Int32Ty, 0); - ConstantInt* ValZero = ConstantInt::get(ValTy, 0); - - // Basic blocks we fill in below. - BasicBlock* entry = BasicBlock::Create("entry", F, 0); - BasicBlock* large = BasicBlock::Create("large", F, 0); - BasicBlock* small = BasicBlock::Create("small", F, 0); - BasicBlock* reverse = BasicBlock::Create("reverse", F, 0); - BasicBlock* result = BasicBlock::Create("result", F, 0); - - // BASIC BLOCK: entry - Builder.SetInsertPoint(entry); - // First, get the number of bits that we're placing as an i32 - Value* is_forward = Builder.CreateICmpULT(Lo, Hi); - Value* Hi_pn = Builder.CreateSelect(is_forward, Hi, Lo); - Value* Lo_pn = Builder.CreateSelect(is_forward, Lo, Hi); - Value* NumBits = Builder.CreateSub(Hi_pn, Lo_pn); - NumBits = Builder.CreateAdd(NumBits, One); - // Now, convert Lo and Hi to ValTy bit width - Lo = Builder.CreateIntCast(Lo_pn, ValTy, /* isSigned */ false); - // Determine if the replacement bits are larger than the number of bits we - // are replacing and deal with it. - Value* is_large = Builder.CreateICmpULT(NumBits, RepBitWidth); - Builder.CreateCondBr(is_large, large, small); - - // BASIC BLOCK: large - Builder.SetInsertPoint(large); - Value* MaskBits = Builder.CreateSub(RepBitWidth, NumBits); - MaskBits = Builder.CreateIntCast(MaskBits, RepMask->getType(), - /* isSigned */ false); - Value* Mask1 = Builder.CreateLShr(RepMask, MaskBits); - Value* Rep2 = Builder.CreateAnd(Mask1, Rep); - Builder.CreateBr(small); - - // BASIC BLOCK: small - Builder.SetInsertPoint(small); - PHINode* Rep3 = Builder.CreatePHI(RepTy); - Rep3->reserveOperandSpace(2); - Rep3->addIncoming(Rep2, large); - Rep3->addIncoming(Rep, entry); - Value* Rep4 = Builder.CreateIntCast(Rep3, ValTy, /* isSigned */ false); - Builder.CreateCondBr(is_forward, result, reverse); - - // BASIC BLOCK: reverse (reverses the bits of the replacement) - Builder.SetInsertPoint(reverse); - // Set up our loop counter as a PHI so we can decrement on each iteration. - // We will loop for the number of bits in the replacement value. - PHINode *Count = Builder.CreatePHI(Type::Int32Ty, "count"); - Count->reserveOperandSpace(2); - Count->addIncoming(NumBits, small); - - // Get the value that we are shifting bits out of as a PHI because - // we'll change this with each iteration. - PHINode *BitsToShift = Builder.CreatePHI(Val->getType(), "val"); - BitsToShift->reserveOperandSpace(2); - BitsToShift->addIncoming(Rep4, small); - - // Get the result of the last computation or zero on first iteration - PHINode *RRes = Builder.CreatePHI(Val->getType(), "rres"); - RRes->reserveOperandSpace(2); - RRes->addIncoming(ValZero, small); - - // Decrement the loop counter by one - Value *Decr = Builder.CreateSub(Count, One); - Count->addIncoming(Decr, reverse); - - // Get the bit that we want to move into the result - Value *Bit = Builder.CreateAnd(BitsToShift, ValOne); - - // Compute the new value of the bits to shift for the next iteration. - Value *NewVal = Builder.CreateLShr(BitsToShift, ValOne); - BitsToShift->addIncoming(NewVal, reverse); - - // Shift the bit we extracted into the low bit of the result. - Value *NewRes = Builder.CreateShl(RRes, ValOne); - NewRes = Builder.CreateOr(NewRes, Bit); - RRes->addIncoming(NewRes, reverse); - - // Terminate loop if we've moved all the bits. - Value *Cond = Builder.CreateICmpEQ(Decr, Zero); - Builder.CreateCondBr(Cond, result, reverse); - - // BASIC BLOCK: result - Builder.SetInsertPoint(result); - PHINode *Rplcmnt = Builder.CreatePHI(Val->getType()); - Rplcmnt->reserveOperandSpace(2); - Rplcmnt->addIncoming(NewRes, reverse); - Rplcmnt->addIncoming(Rep4, small); - Value* t0 = Builder.CreateIntCast(NumBits, ValTy, /* isSigned */ false); - Value* t1 = Builder.CreateShl(ValMask, Lo); - Value* t2 = Builder.CreateNot(t1); - Value* t3 = Builder.CreateShl(t1, t0); - Value* t4 = Builder.CreateOr(t2, t3); - Value* t5 = Builder.CreateAnd(t4, Val); - Value* t6 = Builder.CreateShl(Rplcmnt, Lo); - Value* Rslt = Builder.CreateOr(t5, t6, "part_set"); - Builder.CreateRet(Rslt); - } - - // Return a call to the implementation function - Builder.SetInsertPoint(CI->getParent(), CI); - CallInst *NewCI = Builder.CreateCall4(F, CI->getOperand(1), - CI->getOperand(2), CI->getOperand(3), - CI->getOperand(4)); - NewCI->setName(CI->getName()); - return NewCI; + return LowerCTPOP(Context, V, IP); } static void ReplaceFPIntrinsicWithCall(CallInst *CI, const char *Fname, const char *Dname, const char *LDname) { switch (CI->getOperand(1)->getType()->getTypeID()) { - default: assert(0 && "Invalid type in intrinsic"); abort(); + default: llvm_unreachable("Invalid type in intrinsic"); case Type::FloatTyID: ReplaceCallWith(Fname, CI, CI->op_begin() + 1, CI->op_end(), - Type::FloatTy); + Type::getFloatTy(CI->getContext())); break; case Type::DoubleTyID: ReplaceCallWith(Dname, CI, CI->op_begin() + 1, CI->op_end(), - Type::DoubleTy); + Type::getDoubleTy(CI->getContext())); break; case Type::X86_FP80TyID: case Type::FP128TyID: @@ -646,19 +329,18 @@ static void ReplaceFPIntrinsicWithCall(CallInst *CI, const char *Fname, void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { IRBuilder<> Builder(CI->getParent(), CI); + LLVMContext &Context = CI->getContext(); Function *Callee = CI->getCalledFunction(); assert(Callee && "Cannot lower an indirect call!"); switch (Callee->getIntrinsicID()) { case Intrinsic::not_intrinsic: - cerr << "Cannot lower a call to a non-intrinsic function '" - << Callee->getName() << "'!\n"; - abort(); + llvm_report_error("Cannot lower a call to a non-intrinsic function '"+ + Callee->getName() + "'!"); default: - cerr << "Error: Code generator does not support intrinsic function '" - << Callee->getName() << "'!\n"; - abort(); + llvm_report_error("Code generator does not support intrinsic function '"+ + Callee->getName()+"'!"); // The setjmp/longjmp intrinsics should only exist in the code if it was // never optimized (ie, right out of the CFE), or if it has been hacked on @@ -666,38 +348,38 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { // convert the call to an explicit setjmp or longjmp call. case Intrinsic::setjmp: { Value *V = ReplaceCallWith("setjmp", CI, CI->op_begin() + 1, CI->op_end(), - Type::Int32Ty); - if (CI->getType() != Type::VoidTy) + Type::getInt32Ty(Context)); + if (CI->getType() != Type::getVoidTy(Context)) CI->replaceAllUsesWith(V); break; } case Intrinsic::sigsetjmp: - if (CI->getType() != Type::VoidTy) + if (CI->getType() != Type::getVoidTy(Context)) CI->replaceAllUsesWith(Constant::getNullValue(CI->getType())); break; case Intrinsic::longjmp: { ReplaceCallWith("longjmp", CI, CI->op_begin() + 1, CI->op_end(), - Type::VoidTy); + Type::getVoidTy(Context)); break; } case Intrinsic::siglongjmp: { // Insert the call to abort ReplaceCallWith("abort", CI, CI->op_end(), CI->op_end(), - Type::VoidTy); + Type::getVoidTy(Context)); break; } case Intrinsic::ctpop: - CI->replaceAllUsesWith(LowerCTPOP(CI->getOperand(1), CI)); + CI->replaceAllUsesWith(LowerCTPOP(Context, CI->getOperand(1), CI)); break; case Intrinsic::bswap: - CI->replaceAllUsesWith(LowerBSWAP(CI->getOperand(1), CI)); + CI->replaceAllUsesWith(LowerBSWAP(Context, CI->getOperand(1), CI)); break; case Intrinsic::ctlz: - CI->replaceAllUsesWith(LowerCTLZ(CI->getOperand(1), CI)); + CI->replaceAllUsesWith(LowerCTLZ(Context, CI->getOperand(1), CI)); break; case Intrinsic::cttz: { @@ -707,24 +389,16 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { NotSrc->setName(Src->getName() + ".not"); Value *SrcM1 = ConstantInt::get(Src->getType(), 1); SrcM1 = Builder.CreateSub(Src, SrcM1); - Src = LowerCTPOP(Builder.CreateAnd(NotSrc, SrcM1), CI); + Src = LowerCTPOP(Context, Builder.CreateAnd(NotSrc, SrcM1), CI); CI->replaceAllUsesWith(Src); break; } - case Intrinsic::part_select: - CI->replaceAllUsesWith(LowerPartSelect(CI)); - break; - - case Intrinsic::part_set: - CI->replaceAllUsesWith(LowerPartSet(CI)); - break; - case Intrinsic::stacksave: case Intrinsic::stackrestore: { if (!Warned) - cerr << "WARNING: this target does not support the llvm.stack" - << (Callee->getIntrinsicID() == Intrinsic::stacksave ? + errs() << "WARNING: this target does not support the llvm.stack" + << (Callee->getIntrinsicID() == Intrinsic::stacksave ? "save" : "restore") << " intrinsic.\n"; Warned = true; if (Callee->getIntrinsicID() == Intrinsic::stacksave) @@ -734,8 +408,8 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { case Intrinsic::returnaddress: case Intrinsic::frameaddress: - cerr << "WARNING: this target does not support the llvm." - << (Callee->getIntrinsicID() == Intrinsic::returnaddress ? + errs() << "WARNING: this target does not support the llvm." + << (Callee->getIntrinsicID() == Intrinsic::returnaddress ? "return" : "frame") << "address intrinsic.\n"; CI->replaceAllUsesWith(ConstantPointerNull::get( cast<PointerType>(CI->getType()))); @@ -747,9 +421,9 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { case Intrinsic::pcmarker: break; // Simply strip out pcmarker on unsupported architectures case Intrinsic::readcyclecounter: { - cerr << "WARNING: this target does not support the llvm.readcyclecoun" - << "ter intrinsic. It is being lowered to a constant 0\n"; - CI->replaceAllUsesWith(ConstantInt::get(Type::Int64Ty, 0)); + errs() << "WARNING: this target does not support the llvm.readcyclecoun" + << "ter intrinsic. It is being lowered to a constant 0\n"; + CI->replaceAllUsesWith(ConstantInt::get(Type::getInt64Ty(Context), 0)); break; } @@ -761,13 +435,11 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { break; // Simply strip out debugging intrinsics case Intrinsic::eh_exception: - case Intrinsic::eh_selector_i32: - case Intrinsic::eh_selector_i64: + case Intrinsic::eh_selector: CI->replaceAllUsesWith(Constant::getNullValue(CI->getType())); break; - case Intrinsic::eh_typeid_for_i32: - case Intrinsic::eh_typeid_for_i64: + case Intrinsic::eh_typeid_for: // Return something different to eh_selector. CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1)); break; @@ -776,7 +448,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { break; // Strip out annotate intrinsic case Intrinsic::memcpy: { - const IntegerType *IntPtr = TD.getIntPtrType(); + const IntegerType *IntPtr = TD.getIntPtrType(Context); Value *Size = Builder.CreateIntCast(CI->getOperand(3), IntPtr, /* isSigned */ false); Value *Ops[3]; @@ -787,7 +459,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { break; } case Intrinsic::memmove: { - const IntegerType *IntPtr = TD.getIntPtrType(); + const IntegerType *IntPtr = TD.getIntPtrType(Context); Value *Size = Builder.CreateIntCast(CI->getOperand(3), IntPtr, /* isSigned */ false); Value *Ops[3]; @@ -798,13 +470,13 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { break; } case Intrinsic::memset: { - const IntegerType *IntPtr = TD.getIntPtrType(); + const IntegerType *IntPtr = TD.getIntPtrType(Context); Value *Size = Builder.CreateIntCast(CI->getOperand(3), IntPtr, /* isSigned */ false); Value *Ops[3]; Ops[0] = CI->getOperand(1); // Extend the amount to i32. - Ops[1] = Builder.CreateIntCast(CI->getOperand(2), Type::Int32Ty, + Ops[1] = Builder.CreateIntCast(CI->getOperand(2), Type::getInt32Ty(Context), /* isSigned */ false); Ops[2] = Size; ReplaceCallWith("memset", CI, Ops, Ops+3, CI->getOperand(1)->getType()); @@ -840,7 +512,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { } case Intrinsic::flt_rounds: // Lower to "round to the nearest" - if (CI->getType() != Type::VoidTy) + if (CI->getType() != Type::getVoidTy(Context)) CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1)); break; } diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index a163cac..4e713a6 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -15,14 +15,16 @@ #include "llvm/PassManager.h" #include "llvm/Pass.h" #include "llvm/Assembly/PrintModulePass.h" -#include "llvm/Analysis/LoopPass.h" +#include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/GCStrategy.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/Target/TargetRegistry.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/FormattedStream.h" using namespace llvm; namespace llvm { @@ -37,26 +39,31 @@ static cl::opt<bool> PrintEmittedAsm("print-emitted-asm", cl::Hidden, cl::desc("Dump emitter generated instructions as assembly")); static cl::opt<bool> PrintGCInfo("print-gc", cl::Hidden, cl::desc("Dump garbage collector data")); +static cl::opt<bool> HoistConstants("hoist-constants", cl::Hidden, + cl::desc("Hoist constants out of loops")); static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden, cl::desc("Verify generated machine code"), cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=NULL)); -// When this works it will be on by default. -static cl::opt<bool> -DisablePostRAScheduler("disable-post-RA-scheduler", - cl::desc("Disable scheduling after register allocation"), - cl::init(true)); - // Enable or disable FastISel. Both options are needed, because // FastISel is enabled by default with -fast, and we wish to be -// able to enable or disable fast-isel independently from -fast. +// able to enable or disable fast-isel independently from -O0. static cl::opt<cl::boolOrDefault> EnableFastISelOption("fast-isel", cl::Hidden, - cl::desc("Enable the experimental \"fast\" instruction selector")); + cl::desc("Enable the \"fast\" instruction selector")); + + +LLVMTargetMachine::LLVMTargetMachine(const Target &T, + const std::string &TargetTriple) + : TargetMachine(T) { + AsmInfo = T.createAsmInfo(TargetTriple); +} + + FileModel::Model LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, - raw_ostream &Out, + formatted_raw_ostream &Out, CodeGenFileType FileType, CodeGenOpt::Level OptLevel) { // Add common CodeGen passes. @@ -67,10 +74,10 @@ LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, PM.add(createDebugLabelFoldingPass()); if (PrintMachineCode) - PM.add(createMachineFunctionPrinterPass(cerr)); + PM.add(createMachineFunctionPrinterPass(errs())); if (addPreEmitPass(PM, OptLevel) && PrintMachineCode) - PM.add(createMachineFunctionPrinterPass(cerr)); + PM.add(createMachineFunctionPrinterPass(errs())); if (OptLevel != CodeGenOpt::None) PM.add(createCodePlacementOptPass()); @@ -92,6 +99,19 @@ LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, return FileModel::Error; } +bool LLVMTargetMachine::addAssemblyEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + bool Verbose, + formatted_raw_ostream &Out) { + FunctionPass *Printer = + getTarget().createAsmPrinter(Out, *this, getMCAsmInfo(), Verbose); + if (!Printer) + return true; + + PM.add(Printer); + return false; +} + /// addPassesToEmitFileFinish - If the passes to emit the specified file had to /// be split up (e.g., to add an object writer pass), this method can be used to /// finish up adding passes to emit the file, if necessary. @@ -99,13 +119,12 @@ bool LLVMTargetMachine::addPassesToEmitFileFinish(PassManagerBase &PM, MachineCodeEmitter *MCE, CodeGenOpt::Level OptLevel) { if (MCE) - addSimpleCodeEmitter(PM, OptLevel, PrintEmittedAsm, *MCE); + addSimpleCodeEmitter(PM, OptLevel, *MCE); + if (PrintEmittedAsm) + addAssemblyEmitter(PM, OptLevel, true, ferrs()); PM.add(createGCInfoDeleter()); - // Delete machine code for this function - PM.add(createMachineCodeDeleter()); - return false; // success! } @@ -116,12 +135,27 @@ bool LLVMTargetMachine::addPassesToEmitFileFinish(PassManagerBase &PM, JITCodeEmitter *JCE, CodeGenOpt::Level OptLevel) { if (JCE) - addSimpleCodeEmitter(PM, OptLevel, PrintEmittedAsm, *JCE); + addSimpleCodeEmitter(PM, OptLevel, *JCE); + if (PrintEmittedAsm) + addAssemblyEmitter(PM, OptLevel, true, ferrs()); PM.add(createGCInfoDeleter()); - // Delete machine code for this function - PM.add(createMachineCodeDeleter()); + return false; // success! +} + +/// addPassesToEmitFileFinish - If the passes to emit the specified file had to +/// be split up (e.g., to add an object writer pass), this method can be used to +/// finish up adding passes to emit the file, if necessary. +bool LLVMTargetMachine::addPassesToEmitFileFinish(PassManagerBase &PM, + ObjectCodeEmitter *OCE, + CodeGenOpt::Level OptLevel) { + if (OCE) + addSimpleCodeEmitter(PM, OptLevel, *OCE); + if (PrintEmittedAsm) + addAssemblyEmitter(PM, OptLevel, true, ferrs()); + + PM.add(createGCInfoDeleter()); return false; // success! } @@ -140,15 +174,14 @@ bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM, return true; if (addPreEmitPass(PM, OptLevel) && PrintMachineCode) - PM.add(createMachineFunctionPrinterPass(cerr)); + PM.add(createMachineFunctionPrinterPass(errs())); - addCodeEmitter(PM, OptLevel, PrintEmittedAsm, MCE); + addCodeEmitter(PM, OptLevel, MCE); + if (PrintEmittedAsm) + addAssemblyEmitter(PM, OptLevel, true, ferrs()); PM.add(createGCInfoDeleter()); - // Delete machine code for this function - PM.add(createMachineCodeDeleter()); - return false; // success! } @@ -166,22 +199,21 @@ bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM, return true; if (addPreEmitPass(PM, OptLevel) && PrintMachineCode) - PM.add(createMachineFunctionPrinterPass(cerr)); + PM.add(createMachineFunctionPrinterPass(errs())); - addCodeEmitter(PM, OptLevel, PrintEmittedAsm, JCE); + addCodeEmitter(PM, OptLevel, JCE); + if (PrintEmittedAsm) + addAssemblyEmitter(PM, OptLevel, true, ferrs()); PM.add(createGCInfoDeleter()); - // Delete machine code for this function - PM.add(createMachineCodeDeleter()); - return false; // success! } static void printAndVerify(PassManagerBase &PM, bool allowDoubleDefs = false) { if (PrintMachineCode) - PM.add(createMachineFunctionPrinterPass(cerr)); + PM.add(createMachineFunctionPrinterPass(errs())); if (VerifyMachineCode) PM.add(createMachineVerifierPass(allowDoubleDefs)); @@ -203,18 +235,31 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, // Turn exception handling constructs into something the code generators can // handle. - if (!getTargetAsmInfo()->doesSupportExceptionHandling()) - PM.add(createLowerInvokePass(getTargetLowering())); - else + switch (getMCAsmInfo()->getExceptionHandlingType()) + { + case ExceptionHandling::SjLj: + // SjLj piggy-backs on dwarf for this bit. The cleanups done apply to both PM.add(createDwarfEHPass(getTargetLowering(), OptLevel==CodeGenOpt::None)); + PM.add(createSjLjEHPass(getTargetLowering())); + break; + case ExceptionHandling::Dwarf: + PM.add(createDwarfEHPass(getTargetLowering(), OptLevel==CodeGenOpt::None)); + break; + case ExceptionHandling::None: + PM.add(createLowerInvokePass(getTargetLowering())); + break; + } PM.add(createGCLoweringPass()); // Make sure that no unreachable blocks are instruction selected. PM.add(createUnreachableBlockEliminationPass()); - if (OptLevel != CodeGenOpt::None) + if (OptLevel != CodeGenOpt::None) { + if (HoistConstants) + PM.add(createCodeGenLICMPass()); PM.add(createCodeGenPreparePass(getTargetLowering())); + } PM.add(createStackProtectorPass(getTargetLowering())); @@ -225,6 +270,9 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, // Standard Lower-Level Passes. + // Set up a MachineFunction for the rest of CodeGen to work on. + PM.add(new MachineFunctionAnalysis(*this, OptLevel)); + // Enable FastISel with -fast, but allow that to be overridden. if (EnableFastISelOption == cl::BOU_TRUE || (OptLevel == CodeGenOpt::None && EnableFastISelOption != cl::BOU_FALSE)) @@ -240,19 +288,21 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, if (OptLevel != CodeGenOpt::None) { PM.add(createMachineLICMPass()); PM.add(createMachineSinkingPass()); - printAndVerify(PM, /* allowDoubleDefs= */ false); + printAndVerify(PM, /* allowDoubleDefs= */ true); } // Run pre-ra passes. if (addPreRegAlloc(PM, OptLevel)) - printAndVerify(PM); + printAndVerify(PM, /* allowDoubleDefs= */ true); // Perform register allocation. PM.add(createRegisterAllocator()); // Perform stack slot coloring. if (OptLevel != CodeGenOpt::None) - PM.add(createStackSlotColoringPass(OptLevel >= CodeGenOpt::Aggressive)); + // FIXME: Re-enable coloring with register when it's capable of adding + // kill markers. + PM.add(createStackSlotColoringPass(false)); printAndVerify(PM); // Print the register-allocated code @@ -267,8 +317,12 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, PM.add(createPrologEpilogCodeInserter()); printAndVerify(PM); + // Run pre-sched2 passes. + if (addPreSched2(PM, OptLevel)) + printAndVerify(PM); + // Second pass scheduler. - if (OptLevel != CodeGenOpt::None && !DisablePostRAScheduler) { + if (OptLevel != CodeGenOpt::None) { PM.add(createPostRAScheduler()); printAndVerify(PM); } @@ -283,7 +337,7 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, printAndVerify(PM); if (PrintGCInfo) - PM.add(createGCInfoPrinter(*cerr)); + PM.add(createGCInfoPrinter(errs())); return false; } diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp index 26722a3..a02a4a6 100644 --- a/lib/CodeGen/LiveInterval.cpp +++ b/lib/CodeGen/LiveInterval.cpp @@ -23,12 +23,16 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/Support/Streams.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegisterInfo.h" #include <algorithm> -#include <ostream> using namespace llvm; +// Print a LiveIndex to a raw_ostream. +void LiveIndex::print(raw_ostream &os) const { + os << (index & ~PHI_BIT); +} + // An example for liveAt(): // // this = [1,4), liveAt(0) will return false. The instruction defining this @@ -36,7 +40,7 @@ using namespace llvm; // variable it represents. This is because slot 1 is used (def slot) and spans // up to slot 3 (store slot). // -bool LiveInterval::liveAt(unsigned I) const { +bool LiveInterval::liveAt(LiveIndex I) const { Ranges::const_iterator r = std::upper_bound(ranges.begin(), ranges.end(), I); if (r == ranges.begin()) @@ -49,7 +53,7 @@ bool LiveInterval::liveAt(unsigned I) const { // liveBeforeAndAt - Check if the interval is live at the index and the index // just before it. If index is liveAt, check if it starts a new live range. // If it does, then check if the previous live range ends at index-1. -bool LiveInterval::liveBeforeAndAt(unsigned I) const { +bool LiveInterval::liveBeforeAndAt(LiveIndex I) const { Ranges::const_iterator r = std::upper_bound(ranges.begin(), ranges.end(), I); if (r == ranges.begin()) @@ -127,7 +131,7 @@ bool LiveInterval::overlapsFrom(const LiveInterval& other, /// overlaps - Return true if the live interval overlaps a range specified /// by [Start, End). -bool LiveInterval::overlaps(unsigned Start, unsigned End) const { +bool LiveInterval::overlaps(LiveIndex Start, LiveIndex End) const { assert(Start < End && "Invalid range"); const_iterator I = begin(); const_iterator E = end(); @@ -145,10 +149,10 @@ bool LiveInterval::overlaps(unsigned Start, unsigned End) const { /// specified by I to end at the specified endpoint. To do this, we should /// merge and eliminate all ranges that this will overlap with. The iterator is /// not invalidated. -void LiveInterval::extendIntervalEndTo(Ranges::iterator I, unsigned NewEnd) { +void LiveInterval::extendIntervalEndTo(Ranges::iterator I, LiveIndex NewEnd) { assert(I != ranges.end() && "Not a valid interval!"); VNInfo *ValNo = I->valno; - unsigned OldEnd = I->end; + LiveIndex OldEnd = I->end; // Search for the first interval that we can't merge with. Ranges::iterator MergeTo = next(I); @@ -163,7 +167,7 @@ void LiveInterval::extendIntervalEndTo(Ranges::iterator I, unsigned NewEnd) { ranges.erase(next(I), MergeTo); // Update kill info. - removeKills(ValNo, OldEnd, I->end-1); + ValNo->removeKills(OldEnd, I->end.prevSlot_()); // If the newly formed range now touches the range after it and if they have // the same value number, merge the two ranges into one range. @@ -179,7 +183,7 @@ void LiveInterval::extendIntervalEndTo(Ranges::iterator I, unsigned NewEnd) { /// specified by I to start at the specified endpoint. To do this, we should /// merge and eliminate all ranges that this will overlap with. LiveInterval::Ranges::iterator -LiveInterval::extendIntervalStartTo(Ranges::iterator I, unsigned NewStart) { +LiveInterval::extendIntervalStartTo(Ranges::iterator I, LiveIndex NewStart) { assert(I != ranges.end() && "Not a valid interval!"); VNInfo *ValNo = I->valno; @@ -212,7 +216,7 @@ LiveInterval::extendIntervalStartTo(Ranges::iterator I, unsigned NewStart) { LiveInterval::iterator LiveInterval::addRangeFrom(LiveRange LR, iterator From) { - unsigned Start = LR.start, End = LR.end; + LiveIndex Start = LR.start, End = LR.end; iterator it = std::upper_bound(From, ranges.end(), Start); // If the inserted interval starts in the middle or right at the end of @@ -246,7 +250,7 @@ LiveInterval::addRangeFrom(LiveRange LR, iterator From) { extendIntervalEndTo(it, End); else if (End < it->end) // Overlapping intervals, there might have been a kill here. - removeKill(it->valno, End); + it->valno->removeKill(End); return it; } } else { @@ -262,33 +266,32 @@ LiveInterval::addRangeFrom(LiveRange LR, iterator From) { return ranges.insert(it, LR); } -/// isInOneLiveRange - Return true if the range specified is entirely in the +/// isInOneLiveRange - Return true if the range specified is entirely in /// a single LiveRange of the live interval. -bool LiveInterval::isInOneLiveRange(unsigned Start, unsigned End) { +bool LiveInterval::isInOneLiveRange(LiveIndex Start, LiveIndex End) { Ranges::iterator I = std::upper_bound(ranges.begin(), ranges.end(), Start); if (I == ranges.begin()) return false; --I; - return I->contains(Start) && I->contains(End-1); + return I->containsRange(Start, End); } /// removeRange - Remove the specified range from this interval. Note that /// the range must be in a single LiveRange in its entirety. -void LiveInterval::removeRange(unsigned Start, unsigned End, +void LiveInterval::removeRange(LiveIndex Start, LiveIndex End, bool RemoveDeadValNo) { // Find the LiveRange containing this span. Ranges::iterator I = std::upper_bound(ranges.begin(), ranges.end(), Start); assert(I != ranges.begin() && "Range is not in interval!"); --I; - assert(I->contains(Start) && I->contains(End-1) && - "Range is not entirely in interval!"); + assert(I->containsRange(Start, End) && "Range is not entirely in interval!"); // If the span we are removing is at the start of the LiveRange, adjust it. VNInfo *ValNo = I->valno; if (I->start == Start) { if (I->end == End) { - removeKills(I->valno, Start, End); + ValNo->removeKills(Start, End); if (RemoveDeadValNo) { // Check if val# is dead. bool isDead = true; @@ -322,13 +325,13 @@ void LiveInterval::removeRange(unsigned Start, unsigned End, // Otherwise if the span we are removing is at the end of the LiveRange, // adjust the other way. if (I->end == End) { - removeKills(ValNo, Start, End); + ValNo->removeKills(Start, End); I->end = Start; return; } // Otherwise, we are splitting the LiveRange into two pieces. - unsigned OldEnd = I->end; + LiveIndex OldEnd = I->end; I->end = Start; // Trim the old interval. // Insert the new one. @@ -362,11 +365,12 @@ void LiveInterval::removeValNo(VNInfo *ValNo) { /// scaleNumbering - Renumber VNI and ranges to provide gaps for new /// instructions. + void LiveInterval::scaleNumbering(unsigned factor) { // Scale ranges. for (iterator RI = begin(), RE = end(); RI != RE; ++RI) { - RI->start = InstrSlots::scale(RI->start, factor); - RI->end = InstrSlots::scale(RI->end, factor); + RI->start = RI->start.scale(factor); + RI->end = RI->end.scale(factor); } // Scale VNI info. @@ -374,19 +378,20 @@ void LiveInterval::scaleNumbering(unsigned factor) { VNInfo *vni = *VNI; if (vni->isDefAccurate()) - vni->def = InstrSlots::scale(vni->def, factor); + vni->def = vni->def.scale(factor); for (unsigned i = 0; i < vni->kills.size(); ++i) { - if (vni->kills[i] != 0) - vni->kills[i] = InstrSlots::scale(vni->kills[i], factor); + if (!vni->kills[i].isPHIIndex()) + vni->kills[i] = vni->kills[i].scale(factor); } } } + /// getLiveRangeContaining - Return the live range that contains the /// specified index, or null if there is none. LiveInterval::const_iterator -LiveInterval::FindLiveRangeContaining(unsigned Idx) const { +LiveInterval::FindLiveRangeContaining(LiveIndex Idx) const { const_iterator It = std::upper_bound(begin(), end(), Idx); if (It != ranges.begin()) { --It; @@ -398,7 +403,7 @@ LiveInterval::FindLiveRangeContaining(unsigned Idx) const { } LiveInterval::iterator -LiveInterval::FindLiveRangeContaining(unsigned Idx) { +LiveInterval::FindLiveRangeContaining(LiveIndex Idx) { iterator It = std::upper_bound(begin(), end(), Idx); if (It != begin()) { --It; @@ -409,17 +414,27 @@ LiveInterval::FindLiveRangeContaining(unsigned Idx) { return end(); } -/// findDefinedVNInfo - Find the VNInfo that's defined at the specified index -/// (register interval) or defined by the specified register (stack inteval). -VNInfo *LiveInterval::findDefinedVNInfo(unsigned DefIdxOrReg) const { - VNInfo *VNI = NULL; +/// findDefinedVNInfo - Find the VNInfo defined by the specified +/// index (register interval). +VNInfo *LiveInterval::findDefinedVNInfoForRegInt(LiveIndex Idx) const { for (LiveInterval::const_vni_iterator i = vni_begin(), e = vni_end(); - i != e; ++i) - if ((*i)->def == DefIdxOrReg) { - VNI = *i; - break; - } - return VNI; + i != e; ++i) { + if ((*i)->def == Idx) + return *i; + } + + return 0; +} + +/// findDefinedVNInfo - Find the VNInfo defined by the specified +/// register (stack inteval). +VNInfo *LiveInterval::findDefinedVNInfoForStackInt(unsigned reg) const { + for (LiveInterval::const_vni_iterator i = vni_begin(), e = vni_end(); + i != e; ++i) { + if ((*i)->getReg() == reg) + return *i; + } + return 0; } /// join - Join two live intervals (this, and other) together. This applies @@ -502,7 +517,7 @@ void LiveInterval::join(LiveInterval &Other, const int *LHSValNoAssignments, InsertPos = addRangeFrom(*I, InsertPos); } - weight += Other.weight; + ComputeJoinedWeight(Other); // Update regalloc hint if currently there isn't one. if (TargetRegisterInfo::isVirtualRegister(reg) && @@ -546,7 +561,7 @@ void LiveInterval::MergeValueInAsValue(const LiveInterval &RHS, for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) { if (I->valno != RHSValNo) continue; - unsigned Start = I->start, End = I->end; + LiveIndex Start = I->start, End = I->end; IP = std::upper_bound(IP, end(), Start); // If the start of this range overlaps with an existing liverange, trim it. if (IP != begin() && IP[-1].end > Start) { @@ -622,20 +637,21 @@ void LiveInterval::MergeInClobberRanges(const LiveInterval &Clobbers, else if (UnusedValNo) ClobberValNo = UnusedValNo; else { - UnusedValNo = ClobberValNo = getNextValue(0, 0, false, VNInfoAllocator); + UnusedValNo = ClobberValNo = + getNextValue(LiveIndex(), 0, false, VNInfoAllocator); ValNoMaps.insert(std::make_pair(I->valno, ClobberValNo)); } bool Done = false; - unsigned Start = I->start, End = I->end; + LiveIndex Start = I->start, End = I->end; // If a clobber range starts before an existing range and ends after // it, the clobber range will need to be split into multiple ranges. // Loop until the entire clobber range is handled. while (!Done) { Done = true; IP = std::upper_bound(IP, end(), Start); - unsigned SubRangeStart = Start; - unsigned SubRangeEnd = End; + LiveIndex SubRangeStart = Start; + LiveIndex SubRangeEnd = End; // If the start of this range overlaps with an existing liverange, trim it. if (IP != begin() && IP[-1].end > SubRangeStart) { @@ -671,11 +687,13 @@ void LiveInterval::MergeInClobberRanges(const LiveInterval &Clobbers, } } -void LiveInterval::MergeInClobberRange(unsigned Start, unsigned End, +void LiveInterval::MergeInClobberRange(LiveIndex Start, + LiveIndex End, BumpPtrAllocator &VNInfoAllocator) { // Find a value # to use for the clobber ranges. If there is already a value# // for unknown values, use it. - VNInfo *ClobberValNo = getNextValue(0, 0, false, VNInfoAllocator); + VNInfo *ClobberValNo = + getNextValue(LiveIndex(), 0, false, VNInfoAllocator); iterator IP = begin(); IP = std::upper_bound(IP, end(), Start); @@ -711,7 +729,7 @@ VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) { // Make sure V2 is smaller than V1. if (V1->id < V2->id) { - copyValNumInfo(V1, V2); + V1->copyFrom(*V2); std::swap(V1, V2); } @@ -788,20 +806,42 @@ void LiveInterval::Copy(const LiveInterval &RHS, unsigned LiveInterval::getSize() const { unsigned Sum = 0; for (const_iterator I = begin(), E = end(); I != E; ++I) - Sum += I->end - I->start; + Sum += I->start.distance(I->end); return Sum; } -std::ostream& llvm::operator<<(std::ostream& os, const LiveRange &LR) { +/// ComputeJoinedWeight - Set the weight of a live interval Joined +/// after Other has been merged into it. +void LiveInterval::ComputeJoinedWeight(const LiveInterval &Other) { + // If either of these intervals was spilled, the weight is the + // weight of the non-spilled interval. This can only happen with + // iterative coalescers. + + if (Other.weight != HUGE_VALF) { + weight += Other.weight; + } + else if (weight == HUGE_VALF && + !TargetRegisterInfo::isPhysicalRegister(reg)) { + // Remove this assert if you have an iterative coalescer + assert(0 && "Joining to spilled interval"); + weight = Other.weight; + } + else { + // Otherwise the weight stays the same + // Remove this assert if you have an iterative coalescer + assert(0 && "Joining from spilled interval"); + } +} + +raw_ostream& llvm::operator<<(raw_ostream& os, const LiveRange &LR) { return os << '[' << LR.start << ',' << LR.end << ':' << LR.valno->id << ")"; } void LiveRange::dump() const { - cerr << *this << "\n"; + errs() << *this << "\n"; } -void LiveInterval::print(std::ostream &OS, - const TargetRegisterInfo *TRI) const { +void LiveInterval::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const { if (isStackSlot()) OS << "SS#" << getStackSlotIndex(); else if (TRI && TargetRegisterInfo::isPhysicalRegister(reg)) @@ -841,6 +881,8 @@ void LiveInterval::print(std::ostream &OS, OS << "-("; for (unsigned j = 0; j != ee; ++j) { OS << vni->kills[j]; + if (vni->kills[j].isPHIIndex()) + OS << "*"; if (j != ee-1) OS << " "; } @@ -857,10 +899,10 @@ void LiveInterval::print(std::ostream &OS, } void LiveInterval::dump() const { - cerr << *this << "\n"; + errs() << *this << "\n"; } -void LiveRange::print(std::ostream &os) const { +void LiveRange::print(raw_ostream &os) const { os << *this; } diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index 52a30bc..93d3d4c 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -25,6 +25,7 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/PseudoSourceValue.h" @@ -34,6 +35,8 @@ #include "llvm/Target/TargetOptions.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" @@ -47,24 +50,24 @@ using namespace llvm; static cl::opt<bool> DisableReMat("disable-rematerialization", cl::init(false), cl::Hidden); -static cl::opt<bool> SplitAtBB("split-intervals-at-bb", - cl::init(true), cl::Hidden); -static cl::opt<int> SplitLimit("split-limit", - cl::init(-1), cl::Hidden); - -static cl::opt<bool> EnableAggressiveRemat("aggressive-remat", cl::Hidden); - static cl::opt<bool> EnableFastSpilling("fast-spill", cl::init(false), cl::Hidden); -STATISTIC(numIntervals, "Number of original intervals"); -STATISTIC(numFolds , "Number of loads/stores folded into instructions"); -STATISTIC(numSplits , "Number of intervals split"); +static cl::opt<bool> EarlyCoalescing("early-coalescing", cl::init(false)); + +static cl::opt<int> CoalescingLimit("early-coalescing-limit", + cl::init(-1), cl::Hidden); + +STATISTIC(numIntervals , "Number of original intervals"); +STATISTIC(numFolds , "Number of loads/stores folded into instructions"); +STATISTIC(numSplits , "Number of intervals split"); +STATISTIC(numCoalescing, "Number of early coalescing performed"); char LiveIntervals::ID = 0; static RegisterPass<LiveIntervals> X("liveintervals", "Live Interval Analysis"); void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); AU.addRequired<AliasAnalysis>(); AU.addPreserved<AliasAnalysis>(); AU.addPreserved<LiveVariables>(); @@ -92,15 +95,32 @@ void LiveIntervals::releaseMemory() { mi2iMap_.clear(); i2miMap_.clear(); r2iMap_.clear(); + terminatorGaps.clear(); + phiJoinCopies.clear(); + // Release VNInfo memroy regions after all VNInfo objects are dtor'd. VNInfoAllocator.Reset(); - while (!ClonedMIs.empty()) { - MachineInstr *MI = ClonedMIs.back(); - ClonedMIs.pop_back(); + while (!CloneMIs.empty()) { + MachineInstr *MI = CloneMIs.back(); + CloneMIs.pop_back(); mf_->DeleteMachineInstr(MI); } } +static bool CanTurnIntoImplicitDef(MachineInstr *MI, unsigned Reg, + unsigned OpIdx, const TargetInstrInfo *tii_){ + unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; + if (tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg) && + Reg == SrcReg) + return true; + + if (OpIdx == 2 && MI->getOpcode() == TargetInstrInfo::SUBREG_TO_REG) + return true; + if (OpIdx == 1 && MI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) + return true; + return false; +} + /// processImplicitDefs - Process IMPLICIT_DEF instructions and make sure /// there is one implicit_def for each use. Add isUndef marker to /// implicit_def defs and their uses. @@ -119,16 +139,33 @@ void LiveIntervals::processImplicitDefs() { ++I; if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) { unsigned Reg = MI->getOperand(0).getReg(); - MI->getOperand(0).setIsUndef(); ImpDefRegs.insert(Reg); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + for (const unsigned *SS = tri_->getSubRegisters(Reg); *SS; ++SS) + ImpDefRegs.insert(*SS); + } ImpDefMIs.push_back(MI); continue; } + if (MI->getOpcode() == TargetInstrInfo::INSERT_SUBREG) { + MachineOperand &MO = MI->getOperand(2); + if (ImpDefRegs.count(MO.getReg())) { + // %reg1032<def> = INSERT_SUBREG %reg1032, undef, 2 + // This is an identity copy, eliminate it now. + if (MO.isKill()) { + LiveVariables::VarInfo& vi = lv_->getVarInfo(MO.getReg()); + vi.removeKill(MI); + } + MI->eraseFromParent(); + continue; + } + } + bool ChangedToImpDef = false; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand& MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isUse()) + if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue; unsigned Reg = MO.getReg(); if (!Reg) @@ -136,22 +173,30 @@ void LiveIntervals::processImplicitDefs() { if (!ImpDefRegs.count(Reg)) continue; // Use is a copy, just turn it into an implicit_def. - unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; - if (tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg) && - Reg == SrcReg) { + if (CanTurnIntoImplicitDef(MI, Reg, i, tii_)) { bool isKill = MO.isKill(); MI->setDesc(tii_->get(TargetInstrInfo::IMPLICIT_DEF)); for (int j = MI->getNumOperands() - 1, ee = 0; j > ee; --j) MI->RemoveOperand(j); - if (isKill) + if (isKill) { ImpDefRegs.erase(Reg); + LiveVariables::VarInfo& vi = lv_->getVarInfo(Reg); + vi.removeKill(MI); + } ChangedToImpDef = true; break; } MO.setIsUndef(); - if (MO.isKill() || MI->isRegTiedToDefOperand(i)) + if (MO.isKill() || MI->isRegTiedToDefOperand(i)) { + // Make sure other uses of + for (unsigned j = i+1; j != e; ++j) { + MachineOperand &MOJ = MI->getOperand(j); + if (MOJ.isReg() && MOJ.isUse() && MOJ.getReg() == Reg) + MOJ.setIsUndef(); + } ImpDefRegs.erase(Reg); + } } if (ChangedToImpDef) { @@ -171,11 +216,13 @@ void LiveIntervals::processImplicitDefs() { for (unsigned i = 0, e = ImpDefMIs.size(); i != e; ++i) { MachineInstr *MI = ImpDefMIs[i]; unsigned Reg = MI->getOperand(0).getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) - // Physical registers are not liveout (yet). - continue; - if (!ImpDefRegs.count(Reg)) + if (TargetRegisterInfo::isPhysicalRegister(Reg) || + !ImpDefRegs.count(Reg)) { + // Delete all "local" implicit_def's. That include those which define + // physical registers since they cannot be liveout. + MI->eraseFromParent(); continue; + } // If there are multiple defs of the same register and at least one // is not an implicit_def, do not insert implicit_def's before the @@ -191,6 +238,10 @@ void LiveIntervals::processImplicitDefs() { if (Skip) continue; + // The only implicit_def which we want to keep are those that are live + // out of its block. + MI->eraseFromParent(); + for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(Reg), UE = mri_->use_end(); UI != UE; ) { MachineOperand &RMO = UI.getOperand(); @@ -199,12 +250,19 @@ void LiveIntervals::processImplicitDefs() { MachineBasicBlock *RMBB = RMI->getParent(); if (RMBB == MBB) continue; + + // Turn a copy use into an implicit_def. + unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; + if (tii_->isMoveInstr(*RMI, SrcReg, DstReg, SrcSubReg, DstSubReg) && + Reg == SrcReg) { + RMI->setDesc(tii_->get(TargetInstrInfo::IMPLICIT_DEF)); + for (int j = RMI->getNumOperands() - 1, ee = 0; j > ee; --j) + RMI->RemoveOperand(j); + continue; + } + const TargetRegisterClass* RC = mri_->getRegClass(Reg); unsigned NewVReg = mri_->createVirtualRegister(RC); - MachineInstrBuilder MIB = - BuildMI(*RMBB, RMI, RMI->getDebugLoc(), - tii_->get(TargetInstrInfo::IMPLICIT_DEF), NewVReg); - (*MIB).getOperand(0).setIsUndef(); RMO.setReg(NewVReg); RMO.setIsUndef(); RMO.setIsKill(); @@ -215,6 +273,7 @@ void LiveIntervals::processImplicitDefs() { } } + void LiveIntervals::computeNumbering() { Index2MiMap OldI2MI = i2miMap_; std::vector<IdxMBBPair> OldI2MBB = Idx2MBBMap; @@ -223,44 +282,79 @@ void LiveIntervals::computeNumbering() { MBB2IdxMap.clear(); mi2iMap_.clear(); i2miMap_.clear(); + terminatorGaps.clear(); + phiJoinCopies.clear(); FunctionSize = 0; // Number MachineInstrs and MachineBasicBlocks. // Initialize MBB indexes to a sentinal. - MBB2IdxMap.resize(mf_->getNumBlockIDs(), std::make_pair(~0U,~0U)); + MBB2IdxMap.resize(mf_->getNumBlockIDs(), + std::make_pair(LiveIndex(),LiveIndex())); - unsigned MIIndex = 0; + LiveIndex MIIndex; for (MachineFunction::iterator MBB = mf_->begin(), E = mf_->end(); MBB != E; ++MBB) { - unsigned StartIdx = MIIndex; + LiveIndex StartIdx = MIIndex; // Insert an empty slot at the beginning of each block. - MIIndex += InstrSlots::NUM; + MIIndex = getNextIndex(MIIndex); i2miMap_.push_back(0); for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ++I) { + + if (I == MBB->getFirstTerminator()) { + // Leave a gap for before terminators, this is where we will point + // PHI kills. + LiveIndex tGap(true, MIIndex); + bool inserted = + terminatorGaps.insert(std::make_pair(&*MBB, tGap)).second; + assert(inserted && + "Multiple 'first' terminators encountered during numbering."); + inserted = inserted; // Avoid compiler warning if assertions turned off. + i2miMap_.push_back(0); + + MIIndex = getNextIndex(MIIndex); + } + bool inserted = mi2iMap_.insert(std::make_pair(I, MIIndex)).second; assert(inserted && "multiple MachineInstr -> index mappings"); inserted = true; i2miMap_.push_back(I); - MIIndex += InstrSlots::NUM; + MIIndex = getNextIndex(MIIndex); FunctionSize++; // Insert max(1, numdefs) empty slots after every instruction. unsigned Slots = I->getDesc().getNumDefs(); if (Slots == 0) Slots = 1; - MIIndex += InstrSlots::NUM * Slots; - while (Slots--) + while (Slots--) { + MIIndex = getNextIndex(MIIndex); i2miMap_.push_back(0); + } + + } + + if (MBB->getFirstTerminator() == MBB->end()) { + // Leave a gap for before terminators, this is where we will point + // PHI kills. + LiveIndex tGap(true, MIIndex); + bool inserted = + terminatorGaps.insert(std::make_pair(&*MBB, tGap)).second; + assert(inserted && + "Multiple 'first' terminators encountered during numbering."); + inserted = inserted; // Avoid compiler warning if assertions turned off. + i2miMap_.push_back(0); + + MIIndex = getNextIndex(MIIndex); } // Set the MBB2IdxMap entry for this MBB. - MBB2IdxMap[MBB->getNumber()] = std::make_pair(StartIdx, MIIndex - 1); + MBB2IdxMap[MBB->getNumber()] = std::make_pair(StartIdx, getPrevSlot(MIIndex)); Idx2MBBMap.push_back(std::make_pair(StartIdx, MBB)); } + std::sort(Idx2MBBMap.begin(), Idx2MBBMap.end(), Idx2MBBCompare()); if (!OldI2MI.empty()) @@ -272,9 +366,9 @@ void LiveIntervals::computeNumbering() { // number, or our best guess at what it _should_ correspond to if the // original instruction has been erased. This is either the following // instruction or its predecessor. - unsigned index = LI->start / InstrSlots::NUM; - unsigned offset = LI->start % InstrSlots::NUM; - if (offset == InstrSlots::LOAD) { + unsigned index = LI->start.getVecIndex(); + LiveIndex::Slot offset = LI->start.getSlot(); + if (LI->start.isLoad()) { std::vector<IdxMBBPair>::const_iterator I = std::lower_bound(OldI2MBB.begin(), OldI2MBB.end(), LI->start); // Take the pair containing the index @@ -283,29 +377,34 @@ void LiveIntervals::computeNumbering() { LI->start = getMBBStartIdx(J->second); } else { - LI->start = mi2iMap_[OldI2MI[index]] + offset; + LI->start = LiveIndex( + LiveIndex(mi2iMap_[OldI2MI[index]]), + (LiveIndex::Slot)offset); } // Remap the ending index in the same way that we remapped the start, // except for the final step where we always map to the immediately // following instruction. - index = (LI->end - 1) / InstrSlots::NUM; - offset = LI->end % InstrSlots::NUM; - if (offset == InstrSlots::LOAD) { + index = (getPrevSlot(LI->end)).getVecIndex(); + offset = LI->end.getSlot(); + if (LI->end.isLoad()) { // VReg dies at end of block. std::vector<IdxMBBPair>::const_iterator I = std::lower_bound(OldI2MBB.begin(), OldI2MBB.end(), LI->end); --I; - LI->end = getMBBEndIdx(I->second) + 1; + LI->end = getNextSlot(getMBBEndIdx(I->second)); } else { unsigned idx = index; while (index < OldI2MI.size() && !OldI2MI[index]) ++index; if (index != OldI2MI.size()) - LI->end = mi2iMap_[OldI2MI[index]] + (idx == index ? offset : 0); + LI->end = + LiveIndex(mi2iMap_[OldI2MI[index]], + (idx == index ? offset : LiveIndex::LOAD)); else - LI->end = InstrSlots::NUM * i2miMap_.size(); + LI->end = + LiveIndex(LiveIndex::NUM * i2miMap_.size()); } } @@ -317,9 +416,9 @@ void LiveIntervals::computeNumbering() { // start indices above. VN's with special sentinel defs // don't need to be remapped. if (vni->isDefAccurate() && !vni->isUnused()) { - unsigned index = vni->def / InstrSlots::NUM; - unsigned offset = vni->def % InstrSlots::NUM; - if (offset == InstrSlots::LOAD) { + unsigned index = vni->def.getVecIndex(); + LiveIndex::Slot offset = vni->def.getSlot(); + if (vni->def.isLoad()) { std::vector<IdxMBBPair>::const_iterator I = std::lower_bound(OldI2MBB.begin(), OldI2MBB.end(), vni->def); // Take the pair containing the index @@ -328,25 +427,36 @@ void LiveIntervals::computeNumbering() { vni->def = getMBBStartIdx(J->second); } else { - vni->def = mi2iMap_[OldI2MI[index]] + offset; + vni->def = LiveIndex(mi2iMap_[OldI2MI[index]], offset); } } // Remap the VNInfo kill indices, which works the same as // the end indices above. for (size_t i = 0; i < vni->kills.size(); ++i) { - // PHI kills don't need to be remapped. - if (!vni->kills[i]) continue; - - unsigned index = (vni->kills[i]-1) / InstrSlots::NUM; - unsigned offset = vni->kills[i] % InstrSlots::NUM; - if (offset == InstrSlots::LOAD) { - std::vector<IdxMBBPair>::const_iterator I = + unsigned index = getPrevSlot(vni->kills[i]).getVecIndex(); + LiveIndex::Slot offset = vni->kills[i].getSlot(); + + if (vni->kills[i].isLoad()) { + assert("Value killed at a load slot."); + /*std::vector<IdxMBBPair>::const_iterator I = std::lower_bound(OldI2MBB.begin(), OldI2MBB.end(), vni->kills[i]); --I; - vni->kills[i] = getMBBEndIdx(I->second); + vni->kills[i] = getMBBEndIdx(I->second);*/ } else { + if (vni->kills[i].isPHIIndex()) { + std::vector<IdxMBBPair>::const_iterator I = + std::lower_bound(OldI2MBB.begin(), OldI2MBB.end(), vni->kills[i]); + --I; + vni->kills[i] = terminatorGaps[I->second]; + } else { + assert(OldI2MI[index] != 0 && + "Kill refers to instruction not present in index maps."); + vni->kills[i] = LiveIndex(mi2iMap_[OldI2MI[index]], offset); + } + + /* unsigned idx = index; while (index < OldI2MI.size() && !OldI2MI[index]) ++index; @@ -355,6 +465,7 @@ void LiveIntervals::computeNumbering() { (idx == index ? offset : 0); else vni->kills[i] = InstrSlots::NUM * i2miMap_.size(); + */ } } } @@ -372,13 +483,20 @@ void LiveIntervals::scaleNumbering(int factor) { Idx2MBBMap.clear(); for (MachineFunction::iterator MBB = mf_->begin(), MBBE = mf_->end(); MBB != MBBE; ++MBB) { - std::pair<unsigned, unsigned> &mbbIndices = MBB2IdxMap[MBB->getNumber()]; - mbbIndices.first = InstrSlots::scale(mbbIndices.first, factor); - mbbIndices.second = InstrSlots::scale(mbbIndices.second, factor); + std::pair<LiveIndex, LiveIndex> &mbbIndices = MBB2IdxMap[MBB->getNumber()]; + mbbIndices.first = mbbIndices.first.scale(factor); + mbbIndices.second = mbbIndices.second.scale(factor); Idx2MBBMap.push_back(std::make_pair(mbbIndices.first, MBB)); } std::sort(Idx2MBBMap.begin(), Idx2MBBMap.end(), Idx2MBBCompare()); + // Scale terminator gaps. + for (DenseMap<MachineBasicBlock*, LiveIndex>::iterator + TGI = terminatorGaps.begin(), TGE = terminatorGaps.end(); + TGI != TGE; ++TGI) { + terminatorGaps[TGI->first] = TGI->second.scale(factor); + } + // Scale the intervals. for (iterator LI = begin(), LE = end(); LI != LE; ++LI) { LI->second->scaleNumbering(factor); @@ -386,19 +504,20 @@ void LiveIntervals::scaleNumbering(int factor) { // Scale MachineInstrs. Mi2IndexMap oldmi2iMap = mi2iMap_; - unsigned highestSlot = 0; + LiveIndex highestSlot; for (Mi2IndexMap::iterator MI = oldmi2iMap.begin(), ME = oldmi2iMap.end(); MI != ME; ++MI) { - unsigned newSlot = InstrSlots::scale(MI->second, factor); + LiveIndex newSlot = MI->second.scale(factor); mi2iMap_[MI->first] = newSlot; highestSlot = std::max(highestSlot, newSlot); } + unsigned highestVIndex = highestSlot.getVecIndex(); i2miMap_.clear(); - i2miMap_.resize(highestSlot + 1); + i2miMap_.resize(highestVIndex + 1); for (Mi2IndexMap::iterator MI = mi2iMap_.begin(), ME = mi2iMap_.end(); MI != ME; ++MI) { - i2miMap_[MI->second] = MI->first; + i2miMap_[MI->second.getVecIndex()] = const_cast<MachineInstr *>(MI->first); } } @@ -419,6 +538,7 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) { processImplicitDefs(); computeNumbering(); computeIntervals(); + performEarlyCoalescing(); numIntervals += getNumIntervals(); @@ -427,36 +547,45 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) { } /// print - Implement the dump method. -void LiveIntervals::print(std::ostream &O, const Module* ) const { - O << "********** INTERVALS **********\n"; +void LiveIntervals::print(raw_ostream &OS, const Module* ) const { + OS << "********** INTERVALS **********\n"; for (const_iterator I = begin(), E = end(); I != E; ++I) { - I->second->print(O, tri_); - O << "\n"; + I->second->print(OS, tri_); + OS << "\n"; } - O << "********** MACHINEINSTRS **********\n"; + printInstrs(OS); +} + +void LiveIntervals::printInstrs(raw_ostream &OS) const { + OS << "********** MACHINEINSTRS **********\n"; + for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end(); mbbi != mbbe; ++mbbi) { - O << ((Value*)mbbi->getBasicBlock())->getName() << ":\n"; + OS << ((Value*)mbbi->getBasicBlock())->getName() << ":\n"; for (MachineBasicBlock::iterator mii = mbbi->begin(), mie = mbbi->end(); mii != mie; ++mii) { - O << getInstructionIndex(mii) << '\t' << *mii; + OS << getInstructionIndex(mii) << '\t' << *mii; } } } +void LiveIntervals::dumpInstrs() const { + printInstrs(errs()); +} + /// conflictsWithPhysRegDef - Returns true if the specified register /// is defined during the duration of the specified interval. bool LiveIntervals::conflictsWithPhysRegDef(const LiveInterval &li, VirtRegMap &vrm, unsigned reg) { for (LiveInterval::Ranges::const_iterator I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) { - for (unsigned index = getBaseIndex(I->start), - end = getBaseIndex(I->end-1) + InstrSlots::NUM; index != end; - index += InstrSlots::NUM) { + for (LiveIndex index = getBaseIndex(I->start), + end = getNextIndex(getBaseIndex(getPrevSlot(I->end))); index != end; + index = getNextIndex(index)) { // skip deleted instructions while (index != end && !getInstructionFromIndex(index)) - index += InstrSlots::NUM; + index = getNextIndex(index); if (index == end) break; MachineInstr *MI = getInstructionFromIndex(index); @@ -492,16 +621,16 @@ bool LiveIntervals::conflictsWithPhysRegRef(LiveInterval &li, SmallPtrSet<MachineInstr*,32> &JoinedCopies) { for (LiveInterval::Ranges::const_iterator I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) { - for (unsigned index = getBaseIndex(I->start), - end = getBaseIndex(I->end-1) + InstrSlots::NUM; index != end; - index += InstrSlots::NUM) { + for (LiveIndex index = getBaseIndex(I->start), + end = getNextIndex(getBaseIndex(getPrevSlot(I->end))); index != end; + index = getNextIndex(index)) { // Skip deleted instructions. MachineInstr *MI = 0; while (index != end) { MI = getInstructionFromIndex(index); if (MI) break; - index += InstrSlots::NUM; + index = getNextIndex(index); } if (index == end) break; @@ -525,35 +654,36 @@ bool LiveIntervals::conflictsWithPhysRegRef(LiveInterval &li, return false; } - -void LiveIntervals::printRegName(unsigned reg) const { +#ifndef NDEBUG +static void printRegName(unsigned reg, const TargetRegisterInfo* tri_) { if (TargetRegisterInfo::isPhysicalRegister(reg)) - cerr << tri_->getName(reg); + errs() << tri_->getName(reg); else - cerr << "%reg" << reg; + errs() << "%reg" << reg; } +#endif void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, MachineBasicBlock::iterator mi, - unsigned MIIdx, MachineOperand& MO, + LiveIndex MIIdx, + MachineOperand& MO, unsigned MOIdx, LiveInterval &interval) { - DOUT << "\t\tregister: "; DEBUG(printRegName(interval.reg)); - LiveVariables::VarInfo& vi = lv_->getVarInfo(interval.reg); - - if (mi->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) { - DOUT << "is a implicit_def\n"; - return; - } + DEBUG({ + errs() << "\t\tregister: "; + printRegName(interval.reg, tri_); + }); // Virtual registers may be defined multiple times (due to phi // elimination and 2-addr elimination). Much of what we do only has to be // done once for the vreg. We use an empty interval to detect the first // time we see a vreg. + LiveVariables::VarInfo& vi = lv_->getVarInfo(interval.reg); if (interval.empty()) { // Get the Idx of the defining instructions. - unsigned defIndex = getDefIndex(MIIdx); - // Earlyclobbers move back one. + LiveIndex defIndex = getDefIndex(MIIdx); + // Earlyclobbers move back one, so that they overlap the live range + // of inputs. if (MO.isEarlyClobber()) defIndex = getUseIndex(MIIdx); VNInfo *ValNo; @@ -575,11 +705,16 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, // will be a single kill, in MBB, which comes after the definition. if (vi.Kills.size() == 1 && vi.Kills[0]->getParent() == mbb) { // FIXME: what about dead vars? - unsigned killIdx; + LiveIndex killIdx; if (vi.Kills[0] != mi) - killIdx = getUseIndex(getInstructionIndex(vi.Kills[0]))+1; + killIdx = getNextSlot(getUseIndex(getInstructionIndex(vi.Kills[0]))); + else if (MO.isEarlyClobber()) + // Earlyclobbers that die in this instruction move up one extra, to + // compensate for having the starting point moved back one. This + // gets them to overlap the live range of other outputs. + killIdx = getNextSlot(getNextSlot(defIndex)); else - killIdx = defIndex+1; + killIdx = getNextSlot(defIndex); // If the kill happens after the definition, we have an intra-block // live range. @@ -588,8 +723,8 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, "Shouldn't be alive across any blocks!"); LiveRange LR(defIndex, killIdx, ValNo); interval.addRange(LR); - DOUT << " +" << LR << "\n"; - interval.addKill(ValNo, killIdx); + DEBUG(errs() << " +" << LR << "\n"); + ValNo->addKill(killIdx); return; } } @@ -598,8 +733,8 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, // of the defining block, potentially live across some blocks, then is // live into some number of blocks, but gets killed. Start by adding a // range that goes from this definition to the end of the defining block. - LiveRange NewLR(defIndex, getMBBEndIdx(mbb)+1, ValNo); - DOUT << " +" << NewLR; + LiveRange NewLR(defIndex, getNextSlot(getMBBEndIdx(mbb)), ValNo); + DEBUG(errs() << " +" << NewLR); interval.addRange(NewLR); // Iterate over all of the blocks that the variable is completely @@ -608,22 +743,22 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, for (SparseBitVector<>::iterator I = vi.AliveBlocks.begin(), E = vi.AliveBlocks.end(); I != E; ++I) { LiveRange LR(getMBBStartIdx(*I), - getMBBEndIdx(*I)+1, // MBB ends at -1. + getNextSlot(getMBBEndIdx(*I)), // MBB ends at -1. ValNo); interval.addRange(LR); - DOUT << " +" << LR; + DEBUG(errs() << " +" << LR); } // Finally, this virtual register is live from the start of any killing // block to the 'use' slot of the killing instruction. for (unsigned i = 0, e = vi.Kills.size(); i != e; ++i) { MachineInstr *Kill = vi.Kills[i]; - unsigned killIdx = getUseIndex(getInstructionIndex(Kill))+1; - LiveRange LR(getMBBStartIdx(Kill->getParent()), - killIdx, ValNo); + LiveIndex killIdx = + getNextSlot(getUseIndex(getInstructionIndex(Kill))); + LiveRange LR(getMBBStartIdx(Kill->getParent()), killIdx, ValNo); interval.addRange(LR); - interval.addKill(ValNo, killIdx); - DOUT << " +" << LR; + ValNo->addKill(killIdx); + DEBUG(errs() << " +" << LR); } } else { @@ -638,12 +773,13 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, // need to take the LiveRegion that defines this register and split it // into two values. assert(interval.containsOneValue()); - unsigned DefIndex = getDefIndex(interval.getValNumInfo(0)->def); - unsigned RedefIndex = getDefIndex(MIIdx); + LiveIndex DefIndex = getDefIndex(interval.getValNumInfo(0)->def); + LiveIndex RedefIndex = getDefIndex(MIIdx); if (MO.isEarlyClobber()) RedefIndex = getUseIndex(MIIdx); - const LiveRange *OldLR = interval.getLiveRangeContaining(RedefIndex-1); + const LiveRange *OldLR = + interval.getLiveRangeContaining(getPrevSlot(RedefIndex)); VNInfo *OldValNo = OldLR->valno; // Delete the initial value, which should be short and continuous, @@ -656,68 +792,85 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, // The new value number (#1) is defined by the instruction we claimed // defined value #0. - VNInfo *ValNo = interval.getNextValue(OldValNo->def, OldValNo->copy, + VNInfo *ValNo = interval.getNextValue(OldValNo->def, OldValNo->getCopy(), false, // update at * VNInfoAllocator); ValNo->setFlags(OldValNo->getFlags()); // * <- updating here // Value#0 is now defined by the 2-addr instruction. OldValNo->def = RedefIndex; - OldValNo->copy = 0; + OldValNo->setCopy(0); if (MO.isEarlyClobber()) OldValNo->setHasRedefByEC(true); // Add the new live interval which replaces the range for the input copy. LiveRange LR(DefIndex, RedefIndex, ValNo); - DOUT << " replace range with " << LR; + DEBUG(errs() << " replace range with " << LR); interval.addRange(LR); - interval.addKill(ValNo, RedefIndex); + ValNo->addKill(RedefIndex); // If this redefinition is dead, we need to add a dummy unit live // range covering the def slot. if (MO.isDead()) - interval.addRange(LiveRange(RedefIndex, RedefIndex+1, OldValNo)); - - DOUT << " RESULT: "; - interval.print(DOUT, tri_); - + interval.addRange( + LiveRange(RedefIndex, MO.isEarlyClobber() ? + getNextSlot(getNextSlot(RedefIndex)) : + getNextSlot(RedefIndex), OldValNo)); + + DEBUG({ + errs() << " RESULT: "; + interval.print(errs(), tri_); + }); } else { // Otherwise, this must be because of phi elimination. If this is the // first redefinition of the vreg that we have seen, go back and change // the live range in the PHI block to be a different value number. if (interval.containsOneValue()) { - assert(vi.Kills.size() == 1 && - "PHI elimination vreg should have one kill, the PHI itself!"); - // Remove the old range that we now know has an incorrect number. VNInfo *VNI = interval.getValNumInfo(0); MachineInstr *Killer = vi.Kills[0]; - unsigned Start = getMBBStartIdx(Killer->getParent()); - unsigned End = getUseIndex(getInstructionIndex(Killer))+1; - DOUT << " Removing [" << Start << "," << End << "] from: "; - interval.print(DOUT, tri_); DOUT << "\n"; - interval.removeRange(Start, End); + phiJoinCopies.push_back(Killer); + LiveIndex Start = getMBBStartIdx(Killer->getParent()); + LiveIndex End = + getNextSlot(getUseIndex(getInstructionIndex(Killer))); + DEBUG({ + errs() << " Removing [" << Start << "," << End << "] from: "; + interval.print(errs(), tri_); + errs() << "\n"; + }); + interval.removeRange(Start, End); + assert(interval.ranges.size() == 1 && + "Newly discovered PHI interval has >1 ranges."); + MachineBasicBlock *killMBB = getMBBFromIndex(interval.endIndex()); + VNI->addKill(terminatorGaps[killMBB]); VNI->setHasPHIKill(true); - DOUT << " RESULT: "; interval.print(DOUT, tri_); + DEBUG({ + errs() << " RESULT: "; + interval.print(errs(), tri_); + }); // Replace the interval with one of a NEW value number. Note that this // value number isn't actually defined by an instruction, weird huh? :) LiveRange LR(Start, End, - interval.getNextValue(mbb->getNumber(), 0, false, VNInfoAllocator)); + interval.getNextValue(LiveIndex(mbb->getNumber()), + 0, false, VNInfoAllocator)); LR.valno->setIsPHIDef(true); - DOUT << " replace range with " << LR; + DEBUG(errs() << " replace range with " << LR); interval.addRange(LR); - interval.addKill(LR.valno, End); - DOUT << " RESULT: "; interval.print(DOUT, tri_); + LR.valno->addKill(End); + DEBUG({ + errs() << " RESULT: "; + interval.print(errs(), tri_); + }); } // In the case of PHI elimination, each variable definition is only // live until the end of the block. We've already taken care of the // rest of the live range. - unsigned defIndex = getDefIndex(MIIdx); + LiveIndex defIndex = getDefIndex(MIIdx); if (MO.isEarlyClobber()) defIndex = getUseIndex(MIIdx); - + VNInfo *ValNo; MachineInstr *CopyMI = NULL; unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; @@ -728,55 +881,63 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, CopyMI = mi; ValNo = interval.getNextValue(defIndex, CopyMI, true, VNInfoAllocator); - unsigned killIndex = getMBBEndIdx(mbb) + 1; + LiveIndex killIndex = getNextSlot(getMBBEndIdx(mbb)); LiveRange LR(defIndex, killIndex, ValNo); interval.addRange(LR); - interval.addKill(ValNo, killIndex); + ValNo->addKill(terminatorGaps[mbb]); ValNo->setHasPHIKill(true); - DOUT << " +" << LR; + DEBUG(errs() << " +" << LR); } } - DOUT << '\n'; + DEBUG(errs() << '\n'); } void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB, MachineBasicBlock::iterator mi, - unsigned MIIdx, + LiveIndex MIIdx, MachineOperand& MO, LiveInterval &interval, MachineInstr *CopyMI) { // A physical register cannot be live across basic block, so its // lifetime must end somewhere in its defining basic block. - DOUT << "\t\tregister: "; DEBUG(printRegName(interval.reg)); + DEBUG({ + errs() << "\t\tregister: "; + printRegName(interval.reg, tri_); + }); - unsigned baseIndex = MIIdx; - unsigned start = getDefIndex(baseIndex); + LiveIndex baseIndex = MIIdx; + LiveIndex start = getDefIndex(baseIndex); // Earlyclobbers move back one. if (MO.isEarlyClobber()) start = getUseIndex(MIIdx); - unsigned end = start; + LiveIndex end = start; // If it is not used after definition, it is considered dead at // the instruction defining it. Hence its interval is: // [defSlot(def), defSlot(def)+1) + // For earlyclobbers, the defSlot was pushed back one; the extra + // advance below compensates. if (MO.isDead()) { - DOUT << " dead"; - end = start + 1; + DEBUG(errs() << " dead"); + if (MO.isEarlyClobber()) + end = getNextSlot(getNextSlot(start)); + else + end = getNextSlot(start); goto exit; } // If it is not dead on definition, it must be killed by a // subsequent instruction. Hence its interval is: // [defSlot(def), useSlot(kill)+1) - baseIndex += InstrSlots::NUM; + baseIndex = getNextIndex(baseIndex); while (++mi != MBB->end()) { - while (baseIndex / InstrSlots::NUM < i2miMap_.size() && + while (baseIndex.getVecIndex() < i2miMap_.size() && getInstructionFromIndex(baseIndex) == 0) - baseIndex += InstrSlots::NUM; + baseIndex = getNextIndex(baseIndex); if (mi->killsRegister(interval.reg, tri_)) { - DOUT << " killed"; - end = getUseIndex(baseIndex) + 1; + DEBUG(errs() << " killed"); + end = getNextSlot(getUseIndex(baseIndex)); goto exit; } else { int DefIdx = mi->findRegisterDefOperandIdx(interval.reg, false, tri_); @@ -791,21 +952,21 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB, // Then the register is essentially dead at the instruction that defines // it. Hence its interval is: // [defSlot(def), defSlot(def)+1) - DOUT << " dead"; - end = start + 1; + DEBUG(errs() << " dead"); + end = getNextSlot(start); } goto exit; } } - baseIndex += InstrSlots::NUM; + baseIndex = getNextIndex(baseIndex); } // The only case we should have a dead physreg here without a killing or // instruction where we know it's dead is if it is live-in to the function // and never used. Another possible case is the implicit use of the // physical register has been deleted by two-address pass. - end = start + 1; + end = getNextSlot(start); exit: assert(start < end && "did not find end of interval?"); @@ -819,13 +980,13 @@ exit: ValNo->setHasRedefByEC(true); LiveRange LR(start, end, ValNo); interval.addRange(LR); - interval.addKill(LR.valno, end); - DOUT << " +" << LR << '\n'; + LR.valno->addKill(end); + DEBUG(errs() << " +" << LR << '\n'); } void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB, MachineBasicBlock::iterator MI, - unsigned MIIdx, + LiveIndex MIIdx, MachineOperand& MO, unsigned MOIdx) { if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) @@ -852,25 +1013,28 @@ void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB, } void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB, - unsigned MIIdx, + LiveIndex MIIdx, LiveInterval &interval, bool isAlias) { - DOUT << "\t\tlivein register: "; DEBUG(printRegName(interval.reg)); + DEBUG({ + errs() << "\t\tlivein register: "; + printRegName(interval.reg, tri_); + }); // Look for kills, if it reaches a def before it's killed, then it shouldn't // be considered a livein. MachineBasicBlock::iterator mi = MBB->begin(); - unsigned baseIndex = MIIdx; - unsigned start = baseIndex; - while (baseIndex / InstrSlots::NUM < i2miMap_.size() && + LiveIndex baseIndex = MIIdx; + LiveIndex start = baseIndex; + while (baseIndex.getVecIndex() < i2miMap_.size() && getInstructionFromIndex(baseIndex) == 0) - baseIndex += InstrSlots::NUM; - unsigned end = baseIndex; + baseIndex = getNextIndex(baseIndex); + LiveIndex end = baseIndex; bool SeenDefUse = false; while (mi != MBB->end()) { if (mi->killsRegister(interval.reg, tri_)) { - DOUT << " killed"; - end = getUseIndex(baseIndex) + 1; + DEBUG(errs() << " killed"); + end = getNextSlot(getUseIndex(baseIndex)); SeenDefUse = true; break; } else if (mi->modifiesRegister(interval.reg, tri_)) { @@ -878,40 +1042,167 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB, // Then the register is essentially dead at the instruction that defines // it. Hence its interval is: // [defSlot(def), defSlot(def)+1) - DOUT << " dead"; - end = getDefIndex(start) + 1; + DEBUG(errs() << " dead"); + end = getNextSlot(getDefIndex(start)); SeenDefUse = true; break; } - baseIndex += InstrSlots::NUM; + baseIndex = getNextIndex(baseIndex); ++mi; if (mi != MBB->end()) { - while (baseIndex / InstrSlots::NUM < i2miMap_.size() && + while (baseIndex.getVecIndex() < i2miMap_.size() && getInstructionFromIndex(baseIndex) == 0) - baseIndex += InstrSlots::NUM; + baseIndex = getNextIndex(baseIndex); } } // Live-in register might not be used at all. if (!SeenDefUse) { if (isAlias) { - DOUT << " dead"; - end = getDefIndex(MIIdx) + 1; + DEBUG(errs() << " dead"); + end = getNextSlot(getDefIndex(MIIdx)); } else { - DOUT << " live through"; + DEBUG(errs() << " live through"); end = baseIndex; } } VNInfo *vni = - interval.getNextValue(MBB->getNumber(), 0, false, VNInfoAllocator); + interval.getNextValue(LiveIndex(MBB->getNumber()), + 0, false, VNInfoAllocator); vni->setIsPHIDef(true); LiveRange LR(start, end, vni); interval.addRange(LR); - interval.addKill(LR.valno, end); - DOUT << " +" << LR << '\n'; + LR.valno->addKill(end); + DEBUG(errs() << " +" << LR << '\n'); +} + +bool +LiveIntervals::isProfitableToCoalesce(LiveInterval &DstInt, LiveInterval &SrcInt, + SmallVector<MachineInstr*,16> &IdentCopies, + SmallVector<MachineInstr*,16> &OtherCopies) { + bool HaveConflict = false; + unsigned NumIdent = 0; + for (MachineRegisterInfo::def_iterator ri = mri_->def_begin(SrcInt.reg), + re = mri_->def_end(); ri != re; ++ri) { + MachineInstr *MI = &*ri; + unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; + if (!tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg)) + return false; + if (SrcReg != DstInt.reg) { + OtherCopies.push_back(MI); + HaveConflict |= DstInt.liveAt(getInstructionIndex(MI)); + } else { + IdentCopies.push_back(MI); + ++NumIdent; + } + } + + if (!HaveConflict) + return false; // Let coalescer handle it + return IdentCopies.size() > OtherCopies.size(); +} + +void LiveIntervals::performEarlyCoalescing() { + if (!EarlyCoalescing) + return; + + /// Perform early coalescing: eliminate copies which feed into phi joins + /// and whose sources are defined by the phi joins. + for (unsigned i = 0, e = phiJoinCopies.size(); i != e; ++i) { + MachineInstr *Join = phiJoinCopies[i]; + if (CoalescingLimit != -1 && (int)numCoalescing == CoalescingLimit) + break; + + unsigned PHISrc, PHIDst, SrcSubReg, DstSubReg; + bool isMove= tii_->isMoveInstr(*Join, PHISrc, PHIDst, SrcSubReg, DstSubReg); +#ifndef NDEBUG + assert(isMove && "PHI join instruction must be a move!"); +#else + isMove = isMove; +#endif + + LiveInterval &DstInt = getInterval(PHIDst); + LiveInterval &SrcInt = getInterval(PHISrc); + SmallVector<MachineInstr*, 16> IdentCopies; + SmallVector<MachineInstr*, 16> OtherCopies; + if (!isProfitableToCoalesce(DstInt, SrcInt, IdentCopies, OtherCopies)) + continue; + + DEBUG(errs() << "PHI Join: " << *Join); + assert(DstInt.containsOneValue() && "PHI join should have just one val#!"); + VNInfo *VNI = DstInt.getValNumInfo(0); + + // Change the non-identity copies to directly target the phi destination. + for (unsigned i = 0, e = OtherCopies.size(); i != e; ++i) { + MachineInstr *PHICopy = OtherCopies[i]; + DEBUG(errs() << "Moving: " << *PHICopy); + + LiveIndex MIIndex = getInstructionIndex(PHICopy); + LiveIndex DefIndex = getDefIndex(MIIndex); + LiveRange *SLR = SrcInt.getLiveRangeContaining(DefIndex); + LiveIndex StartIndex = SLR->start; + LiveIndex EndIndex = SLR->end; + + // Delete val# defined by the now identity copy and add the range from + // beginning of the mbb to the end of the range. + SrcInt.removeValNo(SLR->valno); + DEBUG(errs() << " added range [" << StartIndex << ',' + << EndIndex << "] to reg" << DstInt.reg << '\n'); + if (DstInt.liveAt(StartIndex)) + DstInt.removeRange(StartIndex, EndIndex); + VNInfo *NewVNI = DstInt.getNextValue(DefIndex, PHICopy, true, + VNInfoAllocator); + NewVNI->setHasPHIKill(true); + DstInt.addRange(LiveRange(StartIndex, EndIndex, NewVNI)); + for (unsigned j = 0, ee = PHICopy->getNumOperands(); j != ee; ++j) { + MachineOperand &MO = PHICopy->getOperand(j); + if (!MO.isReg() || MO.getReg() != PHISrc) + continue; + MO.setReg(PHIDst); + } + } + + // Now let's eliminate all the would-be identity copies. + for (unsigned i = 0, e = IdentCopies.size(); i != e; ++i) { + MachineInstr *PHICopy = IdentCopies[i]; + DEBUG(errs() << "Coalescing: " << *PHICopy); + + LiveIndex MIIndex = getInstructionIndex(PHICopy); + LiveIndex DefIndex = getDefIndex(MIIndex); + LiveRange *SLR = SrcInt.getLiveRangeContaining(DefIndex); + LiveIndex StartIndex = SLR->start; + LiveIndex EndIndex = SLR->end; + + // Delete val# defined by the now identity copy and add the range from + // beginning of the mbb to the end of the range. + SrcInt.removeValNo(SLR->valno); + RemoveMachineInstrFromMaps(PHICopy); + PHICopy->eraseFromParent(); + DEBUG(errs() << " added range [" << StartIndex << ',' + << EndIndex << "] to reg" << DstInt.reg << '\n'); + DstInt.addRange(LiveRange(StartIndex, EndIndex, VNI)); + } + + // Remove the phi join and update the phi block liveness. + LiveIndex MIIndex = getInstructionIndex(Join); + LiveIndex UseIndex = getUseIndex(MIIndex); + LiveIndex DefIndex = getDefIndex(MIIndex); + LiveRange *SLR = SrcInt.getLiveRangeContaining(UseIndex); + LiveRange *DLR = DstInt.getLiveRangeContaining(DefIndex); + DLR->valno->setCopy(0); + DLR->valno->setIsDefAccurate(false); + DstInt.addRange(LiveRange(SLR->start, SLR->end, DLR->valno)); + SrcInt.removeRange(SLR->start, SLR->end); + assert(SrcInt.empty()); + removeInterval(PHISrc); + RemoveMachineInstrFromMaps(Join); + Join->eraseFromParent(); + + ++numCoalescing; + } } /// computeIntervals - computes the live intervals for virtual @@ -919,17 +1210,17 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB, /// live interval is an interval [i, j) where 1 <= i <= j < N for /// which a variable is live void LiveIntervals::computeIntervals() { + DEBUG(errs() << "********** COMPUTING LIVE INTERVALS **********\n" + << "********** Function: " + << ((Value*)mf_->getFunction())->getName() << '\n'); - DOUT << "********** COMPUTING LIVE INTERVALS **********\n" - << "********** Function: " - << ((Value*)mf_->getFunction())->getName() << '\n'; - + SmallVector<unsigned, 8> UndefUses; for (MachineFunction::iterator MBBI = mf_->begin(), E = mf_->end(); MBBI != E; ++MBBI) { MachineBasicBlock *MBB = MBBI; // Track the index of the current machine instr. - unsigned MIIndex = getMBBStartIdx(MBB); - DOUT << ((Value*)MBB->getBasicBlock())->getName() << ":\n"; + LiveIndex MIIndex = getMBBStartIdx(MBB); + DEBUG(errs() << ((Value*)MBB->getBasicBlock())->getName() << ":\n"); MachineBasicBlock::iterator MI = MBB->begin(), miEnd = MBB->end(); @@ -945,37 +1236,52 @@ void LiveIntervals::computeIntervals() { } // Skip over empty initial indices. - while (MIIndex / InstrSlots::NUM < i2miMap_.size() && + while (MIIndex.getVecIndex() < i2miMap_.size() && getInstructionFromIndex(MIIndex) == 0) - MIIndex += InstrSlots::NUM; + MIIndex = getNextIndex(MIIndex); for (; MI != miEnd; ++MI) { - DOUT << MIIndex << "\t" << *MI; + DEBUG(errs() << MIIndex << "\t" << *MI); // Handle defs. for (int i = MI->getNumOperands() - 1; i >= 0; --i) { MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.getReg()) + continue; + // handle register defs - build intervals - if (MO.isReg() && MO.getReg() && MO.isDef()) { + if (MO.isDef()) handleRegisterDef(MBB, MI, MIIndex, MO, i); - } + else if (MO.isUndef()) + UndefUses.push_back(MO.getReg()); } // Skip over the empty slots after each instruction. unsigned Slots = MI->getDesc().getNumDefs(); if (Slots == 0) Slots = 1; - MIIndex += InstrSlots::NUM * Slots; + + while (Slots--) + MIIndex = getNextIndex(MIIndex); // Skip over empty indices. - while (MIIndex / InstrSlots::NUM < i2miMap_.size() && + while (MIIndex.getVecIndex() < i2miMap_.size() && getInstructionFromIndex(MIIndex) == 0) - MIIndex += InstrSlots::NUM; + MIIndex = getNextIndex(MIIndex); } } + + // Create empty intervals for registers defined by implicit_def's (except + // for those implicit_def that define values which are liveout of their + // blocks. + for (unsigned i = 0, e = UndefUses.size(); i != e; ++i) { + unsigned UndefReg = UndefUses[i]; + (void)getOrCreateInterval(UndefReg); + } } -bool LiveIntervals::findLiveInMBBs(unsigned Start, unsigned End, +bool LiveIntervals::findLiveInMBBs( + LiveIndex Start, LiveIndex End, SmallVectorImpl<MachineBasicBlock*> &MBBs) const { std::vector<IdxMBBPair>::const_iterator I = std::lower_bound(Idx2MBBMap.begin(), Idx2MBBMap.end(), Start); @@ -991,7 +1297,8 @@ bool LiveIntervals::findLiveInMBBs(unsigned Start, unsigned End, return ResVal; } -bool LiveIntervals::findReachableMBBs(unsigned Start, unsigned End, +bool LiveIntervals::findReachableMBBs( + LiveIndex Start, LiveIndex End, SmallVectorImpl<MachineBasicBlock*> &MBBs) const { std::vector<IdxMBBPair>::const_iterator I = std::lower_bound(Idx2MBBMap.begin(), Idx2MBBMap.end(), Start); @@ -1028,23 +1335,23 @@ LiveInterval* LiveIntervals::dupInterval(LiveInterval *li) { /// getVNInfoSourceReg - Helper function that parses the specified VNInfo /// copy field and returns the source register that defines it. unsigned LiveIntervals::getVNInfoSourceReg(const VNInfo *VNI) const { - if (!VNI->copy) + if (!VNI->getCopy()) return 0; - if (VNI->copy->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) { + if (VNI->getCopy()->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) { // If it's extracting out of a physical register, return the sub-register. - unsigned Reg = VNI->copy->getOperand(1).getReg(); + unsigned Reg = VNI->getCopy()->getOperand(1).getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) - Reg = tri_->getSubReg(Reg, VNI->copy->getOperand(2).getImm()); + Reg = tri_->getSubReg(Reg, VNI->getCopy()->getOperand(2).getImm()); return Reg; - } else if (VNI->copy->getOpcode() == TargetInstrInfo::INSERT_SUBREG || - VNI->copy->getOpcode() == TargetInstrInfo::SUBREG_TO_REG) - return VNI->copy->getOperand(2).getReg(); + } else if (VNI->getCopy()->getOpcode() == TargetInstrInfo::INSERT_SUBREG || + VNI->getCopy()->getOpcode() == TargetInstrInfo::SUBREG_TO_REG) + return VNI->getCopy()->getOperand(2).getReg(); unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; - if (tii_->isMoveInstr(*VNI->copy, SrcReg, DstReg, SrcSubReg, DstSubReg)) + if (tii_->isMoveInstr(*VNI->getCopy(), SrcReg, DstReg, SrcSubReg, DstSubReg)) return SrcReg; - assert(0 && "Unrecognized copy instruction!"); + llvm_unreachable("Unrecognized copy instruction!"); return 0; } @@ -1083,8 +1390,8 @@ unsigned LiveIntervals::getReMatImplicitUse(const LiveInterval &li, /// isValNoAvailableAt - Return true if the val# of the specified interval /// which reaches the given instruction also reaches the specified use index. bool LiveIntervals::isValNoAvailableAt(const LiveInterval &li, MachineInstr *MI, - unsigned UseIdx) const { - unsigned Index = getInstructionIndex(MI); + LiveIndex UseIdx) const { + LiveIndex Index = getInstructionIndex(MI); VNInfo *ValNo = li.FindLiveRangeContaining(Index)->valno; LiveInterval::const_iterator UI = li.FindLiveRangeContaining(UseIdx); return UI != li.end() && UI->valno == ValNo; @@ -1099,102 +1406,19 @@ bool LiveIntervals::isReMaterializable(const LiveInterval &li, if (DisableReMat) return false; - if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) - return true; - - int FrameIdx = 0; - if (tii_->isLoadFromStackSlot(MI, FrameIdx) && - mf_->getFrameInfo()->isImmutableObjectIndex(FrameIdx)) - // FIXME: Let target specific isReallyTriviallyReMaterializable determines - // this but remember this is not safe to fold into a two-address - // instruction. - // This is a load from fixed stack slot. It can be rematerialized. - return true; - - // If the target-specific rules don't identify an instruction as - // being trivially rematerializable, use some target-independent - // rules. - if (!MI->getDesc().isRematerializable() || - !tii_->isTriviallyReMaterializable(MI)) { - if (!EnableAggressiveRemat) - return false; - - // If the instruction accesses memory but the memoperands have been lost, - // we can't analyze it. - const TargetInstrDesc &TID = MI->getDesc(); - if ((TID.mayLoad() || TID.mayStore()) && MI->memoperands_empty()) - return false; - - // Avoid instructions obviously unsafe for remat. - if (TID.hasUnmodeledSideEffects() || TID.isNotDuplicable()) - return false; - - // If the instruction accesses memory and the memory could be non-constant, - // assume the instruction is not rematerializable. - for (std::list<MachineMemOperand>::const_iterator - I = MI->memoperands_begin(), E = MI->memoperands_end(); I != E; ++I){ - const MachineMemOperand &MMO = *I; - if (MMO.isVolatile() || MMO.isStore()) - return false; - const Value *V = MMO.getValue(); - if (!V) - return false; - if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) { - if (!PSV->isConstant(mf_->getFrameInfo())) - return false; - } else if (!aa_->pointsToConstantMemory(V)) - return false; - } - - // If any of the registers accessed are non-constant, conservatively assume - // the instruction is not rematerializable. - unsigned ImpUse = 0; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - if (MO.isReg()) { - unsigned Reg = MO.getReg(); - if (Reg == 0) - continue; - if (TargetRegisterInfo::isPhysicalRegister(Reg)) - return false; - - // Only allow one def, and that in the first operand. - if (MO.isDef() != (i == 0)) - return false; - - // Only allow constant-valued registers. - bool IsLiveIn = mri_->isLiveIn(Reg); - MachineRegisterInfo::def_iterator I = mri_->def_begin(Reg), - E = mri_->def_end(); - - // For the def, it should be the only def of that register. - if (MO.isDef() && (next(I) != E || IsLiveIn)) - return false; - - if (MO.isUse()) { - // Only allow one use other register use, as that's all the - // remat mechanisms support currently. - if (Reg != li.reg) { - if (ImpUse == 0) - ImpUse = Reg; - else if (Reg != ImpUse) - return false; - } - // For the use, there should be only one associated def. - if (I != E && (next(I) != E || IsLiveIn)) - return false; - } - } - } - } + if (!tii_->isTriviallyReMaterializable(MI, aa_)) + return false; + // Target-specific code can mark an instruction as being rematerializable + // if it has one virtual reg use, though it had better be something like + // a PIC base register which is likely to be live everywhere. unsigned ImpUse = getReMatImplicitUse(li, MI); if (ImpUse) { const LiveInterval &ImpLi = getInterval(ImpUse); for (MachineRegisterInfo::use_iterator ri = mri_->use_begin(li.reg), re = mri_->use_end(); ri != re; ++ri) { MachineInstr *UseMI = &*ri; - unsigned UseIdx = getInstructionIndex(UseMI); + LiveIndex UseIdx = getInstructionIndex(UseMI); if (li.FindLiveRangeContaining(UseIdx)->valno != ValNo) continue; if (!isValNoAvailableAt(ImpLi, MI, UseIdx)) @@ -1279,7 +1503,7 @@ static bool FilterFoldedOps(MachineInstr *MI, /// returns true. bool LiveIntervals::tryFoldMemoryOperand(MachineInstr* &MI, VirtRegMap &vrm, MachineInstr *DefMI, - unsigned InstrIdx, + LiveIndex InstrIdx, SmallVector<unsigned, 2> &Ops, bool isSS, int Slot, unsigned Reg) { // If it is an implicit def instruction, just delete it. @@ -1318,7 +1542,7 @@ bool LiveIntervals::tryFoldMemoryOperand(MachineInstr* &MI, vrm.transferRestorePts(MI, fmi); vrm.transferEmergencySpills(MI, fmi); mi2iMap_.erase(MI); - i2miMap_[InstrIdx /InstrSlots::NUM] = fmi; + i2miMap_[InstrIdx.getVecIndex()] = fmi; mi2iMap_[fmi] = InstrIdx; MI = MBB.insert(MBB.erase(MI), fmi); ++numFolds; @@ -1391,7 +1615,8 @@ void LiveIntervals::rewriteImplicitOps(const LiveInterval &li, /// for addIntervalsForSpills to rewrite uses / defs for the given live range. bool LiveIntervals:: rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI, - bool TrySplit, unsigned index, unsigned end, MachineInstr *MI, + bool TrySplit, LiveIndex index, LiveIndex end, + MachineInstr *MI, MachineInstr *ReMatOrigDefMI, MachineInstr *ReMatDefMI, unsigned Slot, int LdSlot, bool isLoad, bool isLoadSS, bool DefIsReMat, bool CanDelete, @@ -1422,8 +1647,8 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI, // If this is the rematerializable definition MI itself and // all of its uses are rematerialized, simply delete it. if (MI == ReMatOrigDefMI && CanDelete) { - DOUT << "\t\t\t\tErasing re-materlizable def: "; - DOUT << MI << '\n'; + DEBUG(errs() << "\t\t\t\tErasing re-materlizable def: " + << MI << '\n'); RemoveMachineInstrFromMaps(MI); vrm.RemoveMachineInstrFromMaps(MI); MI->eraseFromParent(); @@ -1465,23 +1690,13 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI, continue; if (RegJ == RegI) { Ops.push_back(j); - HasUse |= MOj.isUse(); - HasDef |= MOj.isDef(); + if (!MOj.isUndef()) { + HasUse |= MOj.isUse(); + HasDef |= MOj.isDef(); + } } } - if (HasUse && !li.liveAt(getUseIndex(index))) - // Must be defined by an implicit def. It should not be spilled. Note, - // this is for correctness reason. e.g. - // 8 %reg1024<def> = IMPLICIT_DEF - // 12 %reg1024<def> = INSERT_SUBREG %reg1024<kill>, %reg1025, 2 - // The live range [12, 14) are not part of the r1024 live interval since - // it's defined by an implicit def. It will not conflicts with live - // interval of r1025. Now suppose both registers are spilled, you can - // easily see a situation where both registers are reloaded before - // the INSERT_SUBREG and both target registers that would overlap. - HasUse = false; - // Create a new virtual register for the spill interval. // Create the new register now so we can map the fold instruction // to the new register so when it is unfolded we get the correct @@ -1537,7 +1752,7 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI, if (CreatedNewVReg) { if (DefIsReMat) { - vrm.setVirtIsReMaterialized(NewVReg, ReMatDefMI/*, CanDelete*/); + vrm.setVirtIsReMaterialized(NewVReg, ReMatDefMI); if (ReMatIds[VNI->id] == VirtRegMap::MAX_STACK_SLOT) { // Each valnum may have its own remat id. ReMatIds[VNI->id] = vrm.assignVirtReMatId(NewVReg); @@ -1577,38 +1792,46 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI, if (HasUse) { if (CreatedNewVReg) { - LiveRange LR(getLoadIndex(index), getUseIndex(index)+1, - nI.getNextValue(0, 0, false, VNInfoAllocator)); - DOUT << " +" << LR; + LiveRange LR(getLoadIndex(index), getNextSlot(getUseIndex(index)), + nI.getNextValue(LiveIndex(), 0, false, + VNInfoAllocator)); + DEBUG(errs() << " +" << LR); nI.addRange(LR); } else { // Extend the split live interval to this def / use. - unsigned End = getUseIndex(index)+1; + LiveIndex End = getNextSlot(getUseIndex(index)); LiveRange LR(nI.ranges[nI.ranges.size()-1].end, End, nI.getValNumInfo(nI.getNumValNums()-1)); - DOUT << " +" << LR; + DEBUG(errs() << " +" << LR); nI.addRange(LR); } } if (HasDef) { LiveRange LR(getDefIndex(index), getStoreIndex(index), - nI.getNextValue(0, 0, false, VNInfoAllocator)); - DOUT << " +" << LR; + nI.getNextValue(LiveIndex(), 0, false, + VNInfoAllocator)); + DEBUG(errs() << " +" << LR); nI.addRange(LR); } - DOUT << "\t\t\t\tAdded new interval: "; - nI.print(DOUT, tri_); - DOUT << '\n'; + DEBUG({ + errs() << "\t\t\t\tAdded new interval: "; + nI.print(errs(), tri_); + errs() << '\n'; + }); } return CanFold; } bool LiveIntervals::anyKillInMBBAfterIdx(const LiveInterval &li, const VNInfo *VNI, - MachineBasicBlock *MBB, unsigned Idx) const { - unsigned End = getMBBEndIdx(MBB); + MachineBasicBlock *MBB, + LiveIndex Idx) const { + LiveIndex End = getMBBEndIdx(MBB); for (unsigned j = 0, ee = VNI->kills.size(); j != ee; ++j) { - unsigned KillIdx = VNI->kills[j]; + if (VNI->kills[j].isPHIIndex()) + continue; + + LiveIndex KillIdx = VNI->kills[j]; if (KillIdx > Idx && KillIdx < End) return true; } @@ -1619,11 +1842,11 @@ bool LiveIntervals::anyKillInMBBAfterIdx(const LiveInterval &li, /// during spilling. namespace { struct RewriteInfo { - unsigned Index; + LiveIndex Index; MachineInstr *MI; bool HasUse; bool HasDef; - RewriteInfo(unsigned i, MachineInstr *mi, bool u, bool d) + RewriteInfo(LiveIndex i, MachineInstr *mi, bool u, bool d) : Index(i), MI(mi), HasUse(u), HasDef(d) {} }; @@ -1652,8 +1875,8 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit, std::vector<LiveInterval*> &NewLIs) { bool AllCanFold = true; unsigned NewVReg = 0; - unsigned start = getBaseIndex(I->start); - unsigned end = getBaseIndex(I->end-1) + InstrSlots::NUM; + LiveIndex start = getBaseIndex(I->start); + LiveIndex end = getNextIndex(getBaseIndex(getPrevSlot(I->end))); // First collect all the def / use in this live range that will be rewritten. // Make sure they are sorted according to instruction index. @@ -1664,10 +1887,11 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit, MachineOperand &O = ri.getOperand(); ++ri; assert(!O.isImplicit() && "Spilling register that's used as implicit use?"); - unsigned index = getInstructionIndex(MI); + LiveIndex index = getInstructionIndex(MI); if (index < start || index >= end) continue; - if (O.isUse() && !li.liveAt(getUseIndex(index))) + + if (O.isUndef()) // Must be defined by an implicit def. It should not be spilled. Note, // this is for correctness reason. e.g. // 8 %reg1024<def> = IMPLICIT_DEF @@ -1687,7 +1911,7 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit, for (unsigned i = 0, e = RewriteMIs.size(); i != e; ) { RewriteInfo &rwi = RewriteMIs[i]; ++i; - unsigned index = rwi.Index; + LiveIndex index = rwi.Index; bool MIHasUse = rwi.HasUse; bool MIHasDef = rwi.HasDef; MachineInstr *MI = rwi.MI; @@ -1773,7 +1997,7 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit, HasKill = anyKillInMBBAfterIdx(li, I->valno, MBB, getDefIndex(index)); else { // If this is a two-address code, then this index starts a new VNInfo. - const VNInfo *VNI = li.findDefinedVNInfo(getDefIndex(index)); + const VNInfo *VNI = li.findDefinedVNInfoForRegInt(getDefIndex(index)); if (VNI) HasKill = anyKillInMBBAfterIdx(li, VNI, MBB, getDefIndex(index)); } @@ -1786,7 +2010,7 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit, SpillIdxes.insert(std::make_pair(MBBId, S)); } else if (SII->second.back().vreg != NewVReg) { SII->second.push_back(SRInfo(index, NewVReg, true)); - } else if ((int)index > SII->second.back().index) { + } else if (index > SII->second.back().index) { // If there is an earlier def and this is a two-address // instruction, then it's not possible to fold the store (which // would also fold the load). @@ -1797,7 +2021,7 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit, SpillMBBs.set(MBBId); } else if (SII != SpillIdxes.end() && SII->second.back().vreg == NewVReg && - (int)index > SII->second.back().index) { + index > SII->second.back().index) { // There is an earlier def that's not killed (must be two-address). // The spill is no longer needed. SII->second.pop_back(); @@ -1814,7 +2038,7 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit, SpillIdxes.find(MBBId); if (SII != SpillIdxes.end() && SII->second.back().vreg == NewVReg && - (int)index > SII->second.back().index) + index > SII->second.back().index) // Use(s) following the last def, it's not safe to fold the spill. SII->second.back().canFold = false; DenseMap<unsigned, std::vector<SRInfo> >::iterator RII = @@ -1848,8 +2072,8 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit, } } -bool LiveIntervals::alsoFoldARestore(int Id, int index, unsigned vr, - BitVector &RestoreMBBs, +bool LiveIntervals::alsoFoldARestore(int Id, LiveIndex index, + unsigned vr, BitVector &RestoreMBBs, DenseMap<unsigned,std::vector<SRInfo> > &RestoreIdxes) { if (!RestoreMBBs[Id]) return false; @@ -1862,15 +2086,15 @@ bool LiveIntervals::alsoFoldARestore(int Id, int index, unsigned vr, return false; } -void LiveIntervals::eraseRestoreInfo(int Id, int index, unsigned vr, - BitVector &RestoreMBBs, +void LiveIntervals::eraseRestoreInfo(int Id, LiveIndex index, + unsigned vr, BitVector &RestoreMBBs, DenseMap<unsigned,std::vector<SRInfo> > &RestoreIdxes) { if (!RestoreMBBs[Id]) return; std::vector<SRInfo> &Restores = RestoreIdxes[Id]; for (unsigned i = 0, e = Restores.size(); i != e; ++i) if (Restores[i].index == index && Restores[i].vreg) - Restores[i].index = -1; + Restores[i].index = LiveIndex(); } /// handleSpilledImpDefs - Remove IMPLICIT_DEF instructions which are being @@ -1920,9 +2144,11 @@ addIntervalsForSpillsFast(const LiveInterval &li, assert(li.weight != HUGE_VALF && "attempt to spill already spilled interval!"); - DOUT << "\t\t\t\tadding intervals for spills for interval: "; - DEBUG(li.dump()); - DOUT << '\n'; + DEBUG({ + errs() << "\t\t\t\tadding intervals for spills for interval: "; + li.dump(); + errs() << '\n'; + }); const TargetRegisterClass* rc = mri_->getRegClass(li.reg); @@ -1967,27 +2193,31 @@ addIntervalsForSpillsFast(const LiveInterval &li, } // Fill in the new live interval. - unsigned index = getInstructionIndex(MI); + LiveIndex index = getInstructionIndex(MI); if (HasUse) { LiveRange LR(getLoadIndex(index), getUseIndex(index), - nI.getNextValue(0, 0, false, getVNInfoAllocator())); - DOUT << " +" << LR; + nI.getNextValue(LiveIndex(), 0, false, + getVNInfoAllocator())); + DEBUG(errs() << " +" << LR); nI.addRange(LR); vrm.addRestorePoint(NewVReg, MI); } if (HasDef) { LiveRange LR(getDefIndex(index), getStoreIndex(index), - nI.getNextValue(0, 0, false, getVNInfoAllocator())); - DOUT << " +" << LR; + nI.getNextValue(LiveIndex(), 0, false, + getVNInfoAllocator())); + DEBUG(errs() << " +" << LR); nI.addRange(LR); vrm.addSpillPoint(NewVReg, true, MI); } added.push_back(&nI); - DOUT << "\t\t\t\tadded new interval: "; - DEBUG(nI.dump()); - DOUT << '\n'; + DEBUG({ + errs() << "\t\t\t\tadded new interval: "; + nI.dump(); + errs() << '\n'; + }); } @@ -2008,9 +2238,11 @@ addIntervalsForSpills(const LiveInterval &li, assert(li.weight != HUGE_VALF && "attempt to spill already spilled interval!"); - DOUT << "\t\t\t\tadding intervals for spills for interval: "; - li.print(DOUT, tri_); - DOUT << '\n'; + DEBUG({ + errs() << "\t\t\t\tadding intervals for spills for interval: "; + li.print(errs(), tri_); + errs() << '\n'; + }); // Each bit specify whether a spill is required in the MBB. BitVector SpillMBBs(mf_->getNumBlockIDs()); @@ -2036,8 +2268,8 @@ addIntervalsForSpills(const LiveInterval &li, if (vrm.getPreSplitReg(li.reg)) { vrm.setIsSplitFromReg(li.reg, 0); // Unset the split kill marker on the last use. - unsigned KillIdx = vrm.getKillPoint(li.reg); - if (KillIdx) { + LiveIndex KillIdx = vrm.getKillPoint(li.reg); + if (KillIdx != LiveIndex()) { MachineInstr *KillMI = getInstructionFromIndex(KillIdx); assert(KillMI && "Last use disappeared?"); int KillOp = KillMI->findRegisterUseOperandIdx(li.reg, true); @@ -2081,9 +2313,7 @@ addIntervalsForSpills(const LiveInterval &li, return NewLIs; } - bool TrySplit = SplitAtBB && !intervalIsInOneMBB(li); - if (SplitLimit != -1 && (int)numSplits >= SplitLimit) - TrySplit = false; + bool TrySplit = !intervalIsInOneMBB(li); if (TrySplit) ++numSplits; bool NeedStackSlot = false; @@ -2102,7 +2332,7 @@ addIntervalsForSpills(const LiveInterval &li, ReMatOrigDefs[VN] = ReMatDefMI; // Original def may be modified so we have to make a copy here. MachineInstr *Clone = mf_->CloneMachineInstr(ReMatDefMI); - ClonedMIs.push_back(Clone); + CloneMIs.push_back(Clone); ReMatDefs[VN] = Clone; bool CanDelete = true; @@ -2165,7 +2395,7 @@ addIntervalsForSpills(const LiveInterval &li, while (Id != -1) { std::vector<SRInfo> &spills = SpillIdxes[Id]; for (unsigned i = 0, e = spills.size(); i != e; ++i) { - int index = spills[i].index; + LiveIndex index = spills[i].index; unsigned VReg = spills[i].vreg; LiveInterval &nI = getOrCreateInterval(VReg); bool isReMat = vrm.isReMaterialized(VReg); @@ -2203,7 +2433,7 @@ addIntervalsForSpills(const LiveInterval &li, if (FoundUse) { // Also folded uses, do not issue a load. eraseRestoreInfo(Id, index, VReg, RestoreMBBs, RestoreIdxes); - nI.removeRange(getLoadIndex(index), getUseIndex(index)+1); + nI.removeRange(getLoadIndex(index), getNextSlot(getUseIndex(index))); } nI.removeRange(getDefIndex(index), getStoreIndex(index)); } @@ -2228,8 +2458,8 @@ addIntervalsForSpills(const LiveInterval &li, while (Id != -1) { std::vector<SRInfo> &restores = RestoreIdxes[Id]; for (unsigned i = 0, e = restores.size(); i != e; ++i) { - int index = restores[i].index; - if (index == -1) + LiveIndex index = restores[i].index; + if (index == LiveIndex()) continue; unsigned VReg = restores[i].vreg; LiveInterval &nI = getOrCreateInterval(VReg); @@ -2284,7 +2514,7 @@ addIntervalsForSpills(const LiveInterval &li, // If folding is not possible / failed, then tell the spiller to issue a // load / rematerialization for us. if (Folded) - nI.removeRange(getLoadIndex(index), getUseIndex(index)+1); + nI.removeRange(getLoadIndex(index), getNextSlot(getUseIndex(index))); else vrm.addRestorePoint(VReg, MI); } @@ -2300,7 +2530,7 @@ addIntervalsForSpills(const LiveInterval &li, LI->weight /= InstrSlots::NUM * getApproximateInstructionCount(*LI); if (!AddedKill.count(LI)) { LiveRange *LR = &LI->ranges[LI->ranges.size()-1]; - unsigned LastUseIdx = getBaseIndex(LR->end); + LiveIndex LastUseIdx = getBaseIndex(LR->end); MachineInstr *LastUse = getInstructionFromIndex(LastUseIdx); int UseIdx = LastUse->findRegisterUseOperandIdx(LI->reg, false); assert(UseIdx != -1); @@ -2351,7 +2581,7 @@ unsigned LiveIntervals::getNumConflictsWithPhysReg(const LiveInterval &li, E = mri_->reg_end(); I != E; ++I) { MachineOperand &O = I.getOperand(); MachineInstr *MI = O.getParent(); - unsigned Index = getInstructionIndex(MI); + LiveIndex Index = getInstructionIndex(MI); if (pli.liveAt(Index)) ++NumConflicts; } @@ -2382,29 +2612,31 @@ bool LiveIntervals::spillPhysRegAroundRegDefsUses(const LiveInterval &li, if (SeenMIs.count(MI)) continue; SeenMIs.insert(MI); - unsigned Index = getInstructionIndex(MI); + LiveIndex Index = getInstructionIndex(MI); if (pli.liveAt(Index)) { vrm.addEmergencySpill(SpillReg, MI); - unsigned StartIdx = getLoadIndex(Index); - unsigned EndIdx = getStoreIndex(Index)+1; + LiveIndex StartIdx = getLoadIndex(Index); + LiveIndex EndIdx = getNextSlot(getStoreIndex(Index)); if (pli.isInOneLiveRange(StartIdx, EndIdx)) { pli.removeRange(StartIdx, EndIdx); Cut = true; } else { - cerr << "Ran out of registers during register allocation!\n"; + std::string msg; + raw_string_ostream Msg(msg); + Msg << "Ran out of registers during register allocation!"; if (MI->getOpcode() == TargetInstrInfo::INLINEASM) { - cerr << "Please check your inline asm statement for invalid " + Msg << "\nPlease check your inline asm statement for invalid " << "constraints:\n"; - MI->print(cerr.stream(), tm_); + MI->print(Msg, tm_); } - exit(1); + llvm_report_error(Msg.str()); } for (const unsigned* AS = tri_->getSubRegisters(SpillReg); *AS; ++AS) { if (!hasInterval(*AS)) continue; LiveInterval &spli = getInterval(*AS); if (spli.liveAt(Index)) - spli.removeRange(getLoadIndex(Index), getStoreIndex(Index)+1); + spli.removeRange(getLoadIndex(Index), getNextSlot(getStoreIndex(Index))); } } } @@ -2412,16 +2644,18 @@ bool LiveIntervals::spillPhysRegAroundRegDefsUses(const LiveInterval &li, } LiveRange LiveIntervals::addLiveRangeToEndOfBlock(unsigned reg, - MachineInstr* startInst) { + MachineInstr* startInst) { LiveInterval& Interval = getOrCreateInterval(reg); VNInfo* VN = Interval.getNextValue( - getInstructionIndex(startInst) + InstrSlots::DEF, - startInst, true, getVNInfoAllocator()); + LiveIndex(getInstructionIndex(startInst), LiveIndex::DEF), + startInst, true, getVNInfoAllocator()); VN->setHasPHIKill(true); - VN->kills.push_back(getMBBEndIdx(startInst->getParent())); - LiveRange LR(getInstructionIndex(startInst) + InstrSlots::DEF, - getMBBEndIdx(startInst->getParent()) + 1, VN); + VN->kills.push_back(terminatorGaps[startInst->getParent()]); + LiveRange LR( + LiveIndex(getInstructionIndex(startInst), LiveIndex::DEF), + getNextSlot(getMBBEndIdx(startInst->getParent())), VN); Interval.addRange(LR); return LR; } + diff --git a/lib/CodeGen/LiveStackAnalysis.cpp b/lib/CodeGen/LiveStackAnalysis.cpp index 86f7ea2..a7bea1f 100644 --- a/lib/CodeGen/LiveStackAnalysis.cpp +++ b/lib/CodeGen/LiveStackAnalysis.cpp @@ -19,6 +19,7 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/Statistic.h" #include <limits> using namespace llvm; @@ -52,15 +53,16 @@ bool LiveStacks::runOnMachineFunction(MachineFunction &) { } /// print - Implement the dump method. -void LiveStacks::print(std::ostream &O, const Module*) const { - O << "********** INTERVALS **********\n"; +void LiveStacks::print(raw_ostream &OS, const Module*) const { + + OS << "********** INTERVALS **********\n"; for (const_iterator I = begin(), E = end(); I != E; ++I) { - I->second.print(O); + I->second.print(OS); int Slot = I->first; const TargetRegisterClass *RC = getIntervalRegClass(Slot); if (RC) - O << " [" << RC->getName() << "]\n"; + OS << " [" << RC->getName() << "]\n"; else - O << " [Unknown]\n"; + OS << " [Unknown]\n"; } } diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp index bd84508..139e029 100644 --- a/lib/CodeGen/LiveVariables.cpp +++ b/lib/CodeGen/LiveVariables.cpp @@ -37,7 +37,6 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/Config/alloca.h" #include <algorithm> using namespace llvm; @@ -48,20 +47,21 @@ static RegisterPass<LiveVariables> X("livevars", "Live Variable Analysis"); void LiveVariables::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequiredID(UnreachableMachineBlockElimID); AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); } void LiveVariables::VarInfo::dump() const { - cerr << " Alive in blocks: "; + errs() << " Alive in blocks: "; for (SparseBitVector<>::iterator I = AliveBlocks.begin(), E = AliveBlocks.end(); I != E; ++I) - cerr << *I << ", "; - cerr << "\n Killed by:"; + errs() << *I << ", "; + errs() << "\n Killed by:"; if (Kills.empty()) - cerr << " No instructions.\n"; + errs() << " No instructions.\n"; else { for (unsigned i = 0, e = Kills.size(); i != e; ++i) - cerr << "\n #" << i << ": " << *Kills[i]; - cerr << "\n"; + errs() << "\n #" << i << ": " << *Kills[i]; + errs() << "\n"; } } @@ -180,9 +180,9 @@ void LiveVariables::HandleVirtRegDef(unsigned Reg, MachineInstr *MI) { } /// FindLastPartialDef - Return the last partial def of the specified register. -/// Also returns the sub-register that's defined. +/// Also returns the sub-registers that're defined by the instruction. MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg, - unsigned &PartDefReg) { + SmallSet<unsigned,4> &PartDefRegs) { unsigned LastDefReg = 0; unsigned LastDefDist = 0; MachineInstr *LastDef = NULL; @@ -198,7 +198,23 @@ MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg, LastDefDist = Dist; } } - PartDefReg = LastDefReg; + + if (!LastDef) + return 0; + + PartDefRegs.insert(LastDefReg); + for (unsigned i = 0, e = LastDef->getNumOperands(); i != e; ++i) { + MachineOperand &MO = LastDef->getOperand(i); + if (!MO.isReg() || !MO.isDef() || MO.getReg() == 0) + continue; + unsigned DefReg = MO.getReg(); + if (TRI->isSubRegister(Reg, DefReg)) { + PartDefRegs.insert(DefReg); + for (const unsigned *SubRegs = TRI->getSubRegisters(DefReg); + unsigned SubReg = *SubRegs; ++SubRegs) + PartDefRegs.insert(SubReg); + } + } return LastDef; } @@ -216,8 +232,8 @@ void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) { // ... // = EAX // All of the sub-registers must have been defined before the use of Reg! - unsigned PartDefReg = 0; - MachineInstr *LastPartialDef = FindLastPartialDef(Reg, PartDefReg); + SmallSet<unsigned, 4> PartDefRegs; + MachineInstr *LastPartialDef = FindLastPartialDef(Reg, PartDefRegs); // If LastPartialDef is NULL, it must be using a livein register. if (LastPartialDef) { LastPartialDef->addOperand(MachineOperand::CreateReg(Reg, true/*IsDef*/, @@ -228,7 +244,7 @@ void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) { unsigned SubReg = *SubRegs; ++SubRegs) { if (Processed.count(SubReg)) continue; - if (SubReg == PartDefReg || TRI->isSubRegister(PartDefReg, SubReg)) + if (PartDefRegs.count(SubReg)) continue; // This part of Reg was defined before the last partial def. It's killed // here. @@ -249,78 +265,13 @@ void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) { PhysRegUse[SubReg] = MI; } -/// hasRegisterUseBelow - Return true if the specified register is used after -/// the current instruction and before it's next definition. -bool LiveVariables::hasRegisterUseBelow(unsigned Reg, - MachineBasicBlock::iterator I, - MachineBasicBlock *MBB) { - if (I == MBB->end()) - return false; - - // First find out if there are any uses / defs below. - bool hasDistInfo = true; - unsigned CurDist = DistanceMap[I]; - SmallVector<MachineInstr*, 4> Uses; - SmallVector<MachineInstr*, 4> Defs; - for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(Reg), - RE = MRI->reg_end(); RI != RE; ++RI) { - MachineOperand &UDO = RI.getOperand(); - MachineInstr *UDMI = &*RI; - if (UDMI->getParent() != MBB) - continue; - DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UDMI); - bool isBelow = false; - if (DI == DistanceMap.end()) { - // Must be below if it hasn't been assigned a distance yet. - isBelow = true; - hasDistInfo = false; - } else if (DI->second > CurDist) - isBelow = true; - if (isBelow) { - if (UDO.isUse()) - Uses.push_back(UDMI); - if (UDO.isDef()) - Defs.push_back(UDMI); - } - } - - if (Uses.empty()) - // No uses below. - return false; - else if (!Uses.empty() && Defs.empty()) - // There are uses below but no defs below. - return true; - // There are both uses and defs below. We need to know which comes first. - if (!hasDistInfo) { - // Complete DistanceMap for this MBB. This information is computed only - // once per MBB. - ++I; - ++CurDist; - for (MachineBasicBlock::iterator E = MBB->end(); I != E; ++I, ++CurDist) - DistanceMap.insert(std::make_pair(I, CurDist)); - } - - unsigned EarliestUse = DistanceMap[Uses[0]]; - for (unsigned i = 1, e = Uses.size(); i != e; ++i) { - unsigned Dist = DistanceMap[Uses[i]]; - if (Dist < EarliestUse) - EarliestUse = Dist; - } - for (unsigned i = 0, e = Defs.size(); i != e; ++i) { - unsigned Dist = DistanceMap[Defs[i]]; - if (Dist < EarliestUse) - // The register is defined before its first use below. - return false; - } - return true; -} - bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) { - if (!PhysRegUse[Reg] && !PhysRegDef[Reg]) + MachineInstr *LastDef = PhysRegDef[Reg]; + MachineInstr *LastUse = PhysRegUse[Reg]; + if (!LastDef && !LastUse) return false; - MachineInstr *LastRefOrPartRef = PhysRegUse[Reg] - ? PhysRegUse[Reg] : PhysRegDef[Reg]; + MachineInstr *LastRefOrPartRef = LastUse ? LastUse : LastDef; unsigned LastRefOrPartRefDist = DistanceMap[LastRefOrPartRef]; // The whole register is used. // AL = @@ -339,9 +290,22 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) { // AX<dead> = AL<imp-def> // = AL<kill> // AX = + MachineInstr *LastPartDef = 0; + unsigned LastPartDefDist = 0; SmallSet<unsigned, 8> PartUses; for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); unsigned SubReg = *SubRegs; ++SubRegs) { + MachineInstr *Def = PhysRegDef[SubReg]; + if (Def && Def != LastDef) { + // There was a def of this sub-register in between. This is a partial + // def, keep track of the last one. + unsigned Dist = DistanceMap[Def]; + if (Dist > LastPartDefDist) { + LastPartDefDist = Dist; + LastPartDef = Def; + } + continue; + } if (MachineInstr *Use = PhysRegUse[SubReg]) { PartUses.insert(SubReg); for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS) @@ -354,35 +318,47 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) { } } - if (LastRefOrPartRef == PhysRegDef[Reg] && LastRefOrPartRef != MI) - // If the last reference is the last def, then it's not used at all. - // That is, unless we are currently processing the last reference itself. - LastRefOrPartRef->addRegisterDead(Reg, TRI, true); - - // Partial uses. Mark register def dead and add implicit def of - // sub-registers which are used. - // EAX<dead> = op AL<imp-def> - // That is, EAX def is dead but AL def extends pass it. - // Enable this after live interval analysis is fixed to improve codegen! - else if (!PhysRegUse[Reg]) { + if (LastRefOrPartRef == PhysRegDef[Reg] && LastRefOrPartRef != MI) { + if (LastPartDef) + // The last partial def kills the register. + LastPartDef->addOperand(MachineOperand::CreateReg(Reg, false/*IsDef*/, + true/*IsImp*/, true/*IsKill*/)); + else + // If the last reference is the last def, then it's not used at all. + // That is, unless we are currently processing the last reference itself. + LastRefOrPartRef->addRegisterDead(Reg, TRI, true); + } else if (!PhysRegUse[Reg]) { + // Partial uses. Mark register def dead and add implicit def of + // sub-registers which are used. + // EAX<dead> = op AL<imp-def> + // That is, EAX def is dead but AL def extends pass it. PhysRegDef[Reg]->addRegisterDead(Reg, TRI, true); for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); unsigned SubReg = *SubRegs; ++SubRegs) { - if (PartUses.count(SubReg)) { - PhysRegDef[Reg]->addOperand(MachineOperand::CreateReg(SubReg, - true, true)); - LastRefOrPartRef->addRegisterKilled(SubReg, TRI, true); - for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS) - PartUses.erase(*SS); + if (!PartUses.count(SubReg)) + continue; + bool NeedDef = true; + if (PhysRegDef[Reg] == PhysRegDef[SubReg]) { + MachineOperand *MO = PhysRegDef[Reg]->findRegisterDefOperand(SubReg); + if (MO) { + NeedDef = false; + assert(!MO->isDead()); + } } + if (NeedDef) + PhysRegDef[Reg]->addOperand(MachineOperand::CreateReg(SubReg, + true/*IsDef*/, true/*IsImp*/)); + LastRefOrPartRef->addRegisterKilled(SubReg, TRI, true); + for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS) + PartUses.erase(*SS); } - } - else + } else LastRefOrPartRef->addRegisterKilled(Reg, TRI, true); return true; } -void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI) { +void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI, + SmallVector<unsigned, 4> &Defs) { // What parts of the register are previously defined? SmallSet<unsigned, 32> Live; if (PhysRegDef[Reg] || PhysRegUse[Reg]) { @@ -398,6 +374,8 @@ void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI) { // AL = // AH = // = AX + if (Live.count(SubReg)) + continue; if (PhysRegDef[SubReg] || PhysRegUse[SubReg]) { Live.insert(SubReg); for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS) @@ -408,68 +386,25 @@ void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI) { // Start from the largest piece, find the last time any part of the register // is referenced. - if (!HandlePhysRegKill(Reg, MI)) { - // Only some of the sub-registers are used. - for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); - unsigned SubReg = *SubRegs; ++SubRegs) { - if (!Live.count(SubReg)) - // Skip if this sub-register isn't defined. - continue; - if (HandlePhysRegKill(SubReg, MI)) { - Live.erase(SubReg); - for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS) - Live.erase(*SS); - } - } - assert(Live.empty() && "Not all defined registers are killed / dead?"); + HandlePhysRegKill(Reg, MI); + // Only some of the sub-registers are used. + for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + unsigned SubReg = *SubRegs; ++SubRegs) { + if (!Live.count(SubReg)) + // Skip if this sub-register isn't defined. + continue; + HandlePhysRegKill(SubReg, MI); } - if (MI) { - // Does this extend the live range of a super-register? - SmallSet<unsigned, 8> Processed; - for (const unsigned *SuperRegs = TRI->getSuperRegisters(Reg); - unsigned SuperReg = *SuperRegs; ++SuperRegs) { - if (Processed.count(SuperReg)) - continue; - MachineInstr *LastRef = PhysRegUse[SuperReg] - ? PhysRegUse[SuperReg] : PhysRegDef[SuperReg]; - if (LastRef && LastRef != MI) { - // The larger register is previously defined. Now a smaller part is - // being re-defined. Treat it as read/mod/write if there are uses - // below. - // EAX = - // AX = EAX<imp-use,kill>, EAX<imp-def> - // ... - /// = EAX - if (hasRegisterUseBelow(SuperReg, MI, MI->getParent())) { - MI->addOperand(MachineOperand::CreateReg(SuperReg, false/*IsDef*/, - true/*IsImp*/,true/*IsKill*/)); - MI->addOperand(MachineOperand::CreateReg(SuperReg, true/*IsDef*/, - true/*IsImp*/)); - PhysRegDef[SuperReg] = MI; - PhysRegUse[SuperReg] = NULL; - Processed.insert(SuperReg); - for (const unsigned *SS = TRI->getSubRegisters(SuperReg); *SS; ++SS) { - PhysRegDef[*SS] = MI; - PhysRegUse[*SS] = NULL; - Processed.insert(*SS); - } - } else { - // Otherwise, the super register is killed. - if (HandlePhysRegKill(SuperReg, MI)) { - PhysRegDef[SuperReg] = NULL; - PhysRegUse[SuperReg] = NULL; - for (const unsigned *SS = TRI->getSubRegisters(SuperReg); *SS; ++SS) { - PhysRegDef[*SS] = NULL; - PhysRegUse[*SS] = NULL; - Processed.insert(*SS); - } - } - } - } - } + if (MI) + Defs.push_back(Reg); // Remember this def. +} - // Remember this def. +void LiveVariables::UpdatePhysRegDefs(MachineInstr *MI, + SmallVector<unsigned, 4> &Defs) { + while (!Defs.empty()) { + unsigned Reg = Defs.back(); + Defs.pop_back(); PhysRegDef[Reg] = MI; PhysRegUse[Reg] = NULL; for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); @@ -480,6 +415,21 @@ void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI) { } } +namespace { + struct RegSorter { + const TargetRegisterInfo *TRI; + + RegSorter(const TargetRegisterInfo *tri) : TRI(tri) { } + bool operator()(unsigned A, unsigned B) { + if (TRI->isSubRegister(A, B)) + return true; + else if (TRI->isSubRegister(B, A)) + return false; + return A < B; + } + }; +} + bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { MF = &mf; MRI = &mf.getRegInfo(); @@ -512,11 +462,12 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { MachineBasicBlock *MBB = *DFI; // Mark live-in registers as live-in. + SmallVector<unsigned, 4> Defs; for (MachineBasicBlock::const_livein_iterator II = MBB->livein_begin(), EE = MBB->livein_end(); II != EE; ++II) { assert(TargetRegisterInfo::isPhysicalRegister(*II) && "Cannot have a live-in virtual register!"); - HandlePhysRegDef(*II, 0); + HandlePhysRegDef(*II, 0, Defs); } // Loop over all of the instructions, processing them. @@ -563,8 +514,9 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { if (TargetRegisterInfo::isVirtualRegister(MOReg)) HandleVirtRegDef(MOReg, MI); else if (!ReservedRegisters[MOReg]) - HandlePhysRegDef(MOReg, MI); + HandlePhysRegDef(MOReg, MI, Defs); } + UpdatePhysRegDefs(MI, Defs); } // Handle any virtual assignments from PHI nodes which might be at the @@ -603,7 +555,7 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { // available at the end of the basic block. for (unsigned i = 0; i != NumRegs; ++i) if (PhysRegDef[i] || PhysRegUse[i]) - HandlePhysRegDef(i, 0); + HandlePhysRegDef(i, 0, Defs); std::fill(PhysRegDef, PhysRegDef + NumRegs, (MachineInstr*)0); std::fill(PhysRegUse, PhysRegUse + NumRegs, (MachineInstr*)0); diff --git a/lib/CodeGen/LowerSubregs.cpp b/lib/CodeGen/LowerSubregs.cpp index 14acb71..8486bb0 100644 --- a/lib/CodeGen/LowerSubregs.cpp +++ b/lib/CodeGen/LowerSubregs.cpp @@ -19,12 +19,14 @@ #include "llvm/Function.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; namespace { @@ -38,6 +40,7 @@ namespace { } virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); AU.addPreservedID(MachineLoopInfoID); AU.addPreservedID(MachineDominatorsID); MachineFunctionPass::getAnalysisUsage(AU); @@ -53,7 +56,8 @@ namespace { void TransferDeadFlag(MachineInstr *MI, unsigned DstReg, const TargetRegisterInfo &TRI); void TransferKillFlag(MachineInstr *MI, unsigned SrcReg, - const TargetRegisterInfo &TRI); + const TargetRegisterInfo &TRI, + bool AddIfNotFound = false); }; char LowerSubregsInstructionPass::ID = 0; @@ -85,10 +89,11 @@ LowerSubregsInstructionPass::TransferDeadFlag(MachineInstr *MI, void LowerSubregsInstructionPass::TransferKillFlag(MachineInstr *MI, unsigned SrcReg, - const TargetRegisterInfo &TRI) { + const TargetRegisterInfo &TRI, + bool AddIfNotFound) { for (MachineBasicBlock::iterator MII = prior(MachineBasicBlock::iterator(MI)); ; --MII) { - if (MII->addRegisterKilled(SrcReg, &TRI)) + if (MII->addRegisterKilled(SrcReg, &TRI, AddIfNotFound)) break; assert(MII != MI->getParent()->begin() && "copyRegToReg output doesn't reference source register!"); @@ -100,7 +105,7 @@ bool LowerSubregsInstructionPass::LowerExtract(MachineInstr *MI) { MachineFunction &MF = *MBB->getParent(); const TargetRegisterInfo &TRI = *MF.getTarget().getRegisterInfo(); const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); - + assert(MI->getOperand(0).isReg() && MI->getOperand(0).isDef() && MI->getOperand(1).isReg() && MI->getOperand(1).isUse() && MI->getOperand(2).isImm() && "Malformed extract_subreg"); @@ -114,41 +119,41 @@ bool LowerSubregsInstructionPass::LowerExtract(MachineInstr *MI) { "Extract supperg source must be a physical register"); assert(TargetRegisterInfo::isPhysicalRegister(DstReg) && "Extract destination must be in a physical register"); - - DOUT << "subreg: CONVERTING: " << *MI; + assert(SrcReg && "invalid subregister index for register"); + + DEBUG(errs() << "subreg: CONVERTING: " << *MI); if (SrcReg == DstReg) { - // No need to insert an identify copy instruction. - DOUT << "subreg: eliminated!"; - // Find the kill of the destination register's live range, and insert - // a kill of the source register at that point. - if (MI->getOperand(1).isKill() && !MI->getOperand(0).isDead()) - for (MachineBasicBlock::iterator MII = - next(MachineBasicBlock::iterator(MI)); - MII != MBB->end(); ++MII) - if (MII->killsRegister(DstReg, &TRI)) { - MII->addRegisterKilled(SuperReg, &TRI, /*AddIfNotFound=*/true); - break; - } + // No need to insert an identity copy instruction. + if (MI->getOperand(1).isKill()) { + // We must make sure the super-register gets killed. Replace the + // instruction with KILL. + MI->setDesc(TII.get(TargetInstrInfo::KILL)); + MI->RemoveOperand(2); // SubIdx + DEBUG(errs() << "subreg: replace by: " << *MI); + return true; + } + + DEBUG(errs() << "subreg: eliminated!"); } else { // Insert copy - const TargetRegisterClass *TRC = TRI.getPhysicalRegisterRegClass(DstReg); - assert(TRC == TRI.getPhysicalRegisterRegClass(SrcReg) && - "Extract subreg and Dst must be of same register class"); - TII.copyRegToReg(*MBB, MI, DstReg, SrcReg, TRC, TRC); + const TargetRegisterClass *TRCS = TRI.getPhysicalRegisterRegClass(DstReg); + const TargetRegisterClass *TRCD = TRI.getPhysicalRegisterRegClass(SrcReg); + bool Emitted = TII.copyRegToReg(*MBB, MI, DstReg, SrcReg, TRCD, TRCS); + (void)Emitted; + assert(Emitted && "Subreg and Dst must be of compatible register class"); // Transfer the kill/dead flags, if needed. if (MI->getOperand(0).isDead()) TransferDeadFlag(MI, DstReg, TRI); if (MI->getOperand(1).isKill()) - TransferKillFlag(MI, SrcReg, TRI); - -#ifndef NDEBUG - MachineBasicBlock::iterator dMI = MI; - DOUT << "subreg: " << *(--dMI); -#endif + TransferKillFlag(MI, SuperReg, TRI, true); + DEBUG({ + MachineBasicBlock::iterator dMI = MI; + errs() << "subreg: " << *(--dMI); + }); } - DOUT << "\n"; + DEBUG(errs() << '\n'); MBB->erase(MI); return true; } @@ -176,7 +181,7 @@ bool LowerSubregsInstructionPass::LowerSubregToReg(MachineInstr *MI) { assert(TargetRegisterInfo::isPhysicalRegister(InsReg) && "Inserted value must be in a physical register"); - DOUT << "subreg: CONVERTING: " << *MI; + DEBUG(errs() << "subreg: CONVERTING: " << *MI); if (DstSubReg == InsReg && InsSIdx == 0) { // No need to insert an identify copy instruction. @@ -185,7 +190,7 @@ bool LowerSubregsInstructionPass::LowerSubregToReg(MachineInstr *MI) { // %RAX<def> = SUBREG_TO_REG 0, %EAX:3<kill>, 3 // The first def is defining RAX, not EAX so the top bits were not // zero extended. - DOUT << "subreg: eliminated!"; + DEBUG(errs() << "subreg: eliminated!"); } else { // Insert sub-register copy const TargetRegisterClass *TRC0= TRI.getPhysicalRegisterRegClass(DstSubReg); @@ -196,14 +201,13 @@ bool LowerSubregsInstructionPass::LowerSubregToReg(MachineInstr *MI) { TransferDeadFlag(MI, DstSubReg, TRI); if (MI->getOperand(2).isKill()) TransferKillFlag(MI, InsReg, TRI); - -#ifndef NDEBUG - MachineBasicBlock::iterator dMI = MI; - DOUT << "subreg: " << *(--dMI); -#endif + DEBUG({ + MachineBasicBlock::iterator dMI = MI; + errs() << "subreg: " << *(--dMI); + }); } - DOUT << "\n"; + DEBUG(errs() << '\n'); MBB->erase(MI); return true; } @@ -228,49 +232,79 @@ bool LowerSubregsInstructionPass::LowerInsert(MachineInstr *MI) { assert(DstReg == SrcReg && "insert_subreg not a two-address instruction?"); assert(SubIdx != 0 && "Invalid index for insert_subreg"); unsigned DstSubReg = TRI.getSubReg(DstReg, SubIdx); - + assert(DstSubReg && "invalid subregister index for register"); assert(TargetRegisterInfo::isPhysicalRegister(SrcReg) && "Insert superreg source must be in a physical register"); assert(TargetRegisterInfo::isPhysicalRegister(InsReg) && "Inserted value must be in a physical register"); - DOUT << "subreg: CONVERTING: " << *MI; + DEBUG(errs() << "subreg: CONVERTING: " << *MI); if (DstSubReg == InsReg) { - // No need to insert an identify copy instruction. - DOUT << "subreg: eliminated!"; + // No need to insert an identity copy instruction. If the SrcReg was + // <undef>, we need to make sure it is alive by inserting a KILL + if (MI->getOperand(1).isUndef() && !MI->getOperand(0).isDead()) { + MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), + TII.get(TargetInstrInfo::KILL), DstReg); + if (MI->getOperand(2).isUndef()) + MIB.addReg(InsReg, RegState::Undef); + else + MIB.addReg(InsReg, RegState::Kill); + } else { + DEBUG(errs() << "subreg: eliminated!\n"); + MBB->erase(MI); + return true; + } } else { // Insert sub-register copy const TargetRegisterClass *TRC0= TRI.getPhysicalRegisterRegClass(DstSubReg); const TargetRegisterClass *TRC1= TRI.getPhysicalRegisterRegClass(InsReg); - TII.copyRegToReg(*MBB, MI, DstSubReg, InsReg, TRC0, TRC1); + if (MI->getOperand(2).isUndef()) + // If the source register being inserted is undef, then this becomes a + // KILL. + BuildMI(*MBB, MI, MI->getDebugLoc(), + TII.get(TargetInstrInfo::KILL), DstSubReg); + else + TII.copyRegToReg(*MBB, MI, DstSubReg, InsReg, TRC0, TRC1); + MachineBasicBlock::iterator CopyMI = MI; + --CopyMI; + + // INSERT_SUBREG is a two-address instruction so it implicitly kills SrcReg. + if (!MI->getOperand(1).isUndef()) + CopyMI->addOperand(MachineOperand::CreateReg(DstReg, false, true, true)); + // Transfer the kill/dead flags, if needed. - if (MI->getOperand(0).isDead()) + if (MI->getOperand(0).isDead()) { TransferDeadFlag(MI, DstSubReg, TRI); - if (MI->getOperand(1).isKill()) - TransferKillFlag(MI, InsReg, TRI); + } else { + // Make sure the full DstReg is live after this replacement. + CopyMI->addOperand(MachineOperand::CreateReg(DstReg, true, true)); + } -#ifndef NDEBUG - MachineBasicBlock::iterator dMI = MI; - DOUT << "subreg: " << *(--dMI); -#endif + // Make sure the inserted register gets killed + if (MI->getOperand(2).isKill() && !MI->getOperand(2).isUndef()) + TransferKillFlag(MI, InsReg, TRI); } - DOUT << "\n"; + DEBUG({ + MachineBasicBlock::iterator dMI = MI; + errs() << "subreg: " << *(--dMI) << "\n"; + }); + MBB->erase(MI); - return true; + return true; } /// runOnMachineFunction - Reduce subregister inserts and extracts to register /// copies. /// bool LowerSubregsInstructionPass::runOnMachineFunction(MachineFunction &MF) { - DOUT << "Machine Function\n"; - - bool MadeChange = false; + DEBUG(errs() << "Machine Function\n" + << "********** LOWERING SUBREG INSTRS **********\n" + << "********** Function: " + << MF.getFunction()->getName() << '\n'); - DOUT << "********** LOWERING SUBREG INSTRS **********\n"; - DOUT << "********** Function: " << MF.getFunction()->getName() << '\n'; + bool MadeChange = false; for (MachineFunction::iterator mbbi = MF.begin(), mbbe = MF.end(); mbbi != mbbe; ++mbbi) { diff --git a/lib/CodeGen/MachO.h b/lib/CodeGen/MachO.h index bd9bd61..f2b40fe 100644 --- a/lib/CodeGen/MachO.h +++ b/lib/CodeGen/MachO.h @@ -14,17 +14,15 @@ #ifndef MACHO_H #define MACHO_H -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/CodeGen/MachineRelocation.h" -#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/CodeGen/BinaryObject.h" #include <string> #include <vector> namespace llvm { -typedef std::vector<unsigned char> DataBuffer; - +class GlobalValue; +class MCAsmInfo; + /// MachOSym - This struct contains information about each symbol that is /// added to logical symbol table for the module. This is eventually /// turned into a real symbol table in the file. @@ -70,7 +68,7 @@ struct MachOSym { }; MachOSym(const GlobalValue *gv, std::string name, uint8_t sect, - const TargetAsmInfo *TAI); + const MCAsmInfo *MAI); struct SymCmp { // FIXME: this does not appear to be sorting 'f' after 'F' @@ -110,7 +108,7 @@ struct MachOHeader { /// HeaderData - The actual data for the header which we are building /// up for emission to the file. - DataBuffer HeaderData; + std::vector<unsigned char> HeaderData; // Constants for the filetype field // see <mach-o/loader.h> for additional info on the various types @@ -180,8 +178,8 @@ struct MachOHeader { }; MachOHeader() : magic(0), filetype(0), ncmds(0), sizeofcmds(0), flags(0), - reserved(0) { } - + reserved(0) {} + /// cmdSize - This routine returns the size of the MachOSection as written /// to disk, depending on whether the destination is a 64 bit Mach-O file. unsigned cmdSize(bool is64Bit) const { @@ -203,7 +201,7 @@ struct MachOHeader { } }; // end struct MachOHeader - + /// MachOSegment - This struct contains the necessary information to /// emit the load commands for each section in the file. struct MachOSegment { @@ -245,13 +243,13 @@ struct MachOSegment { SEG_VM_PROT_EXECUTE = VM_PROT_EXECUTE, SEG_VM_PROT_ALL = VM_PROT_ALL }; - + // Constants for the cmd field // see <mach-o/loader.h> enum { LC_SEGMENT = 0x01, // segment of this file to be mapped LC_SEGMENT_64 = 0x19 // 64-bit segment of this file to be mapped }; - + /// cmdSize - This routine returns the size of the MachOSection as written /// to disk, depending on whether the destination is a 64 bit Mach-O file. unsigned cmdSize(bool is64Bit) const { @@ -272,11 +270,10 @@ struct MachOSegment { /// turned into the SectionCommand in the load command for a particlar /// segment. -struct MachOSection { +struct MachOSection : public BinaryObject { std::string sectname; // name of this section, std::string segname; // segment this section goes in uint64_t addr; // memory address of this section - uint64_t size; // size in bytes of this section uint32_t offset; // file offset of this section uint32_t align; // section alignment (power of 2) uint32_t reloff; // file offset of relocation entries @@ -285,24 +282,15 @@ struct MachOSection { uint32_t reserved1; // reserved (for offset or index) uint32_t reserved2; // reserved (for count or sizeof) uint32_t reserved3; // reserved (64 bit only) - + /// A unique number for this section, which will be used to match symbols /// to the correct section. uint32_t Index; - - /// SectionData - The actual data for this section which we are building - /// up for emission to the file. - DataBuffer SectionData; /// RelocBuffer - A buffer to hold the mach-o relocations before we write /// them out at the appropriate location in the file. - DataBuffer RelocBuffer; - - /// Relocations - The relocations that we have encountered so far in this - /// section that we will need to convert to MachORelocation entries when - /// the file is written. - std::vector<MachineRelocation> Relocations; - + std::vector<unsigned char> RelocBuffer; + // Constants for the section types (low 8 bits of flags field) // see <mach-o/loader.h> enum { S_REGULAR = 0, @@ -374,48 +362,49 @@ struct MachOSection { } MachOSection(const std::string &seg, const std::string §) - : sectname(sect), segname(seg), addr(0), size(0), offset(0), align(2), - reloff(0), nreloc(0), flags(0), reserved1(0), reserved2(0), + : BinaryObject(), sectname(sect), segname(seg), addr(0), offset(0), + align(2), reloff(0), nreloc(0), flags(0), reserved1(0), reserved2(0), reserved3(0) { } }; // end struct MachOSection - /// MachOSymTab - This struct contains information about the offsets and - /// size of symbol table information. - /// segment. - struct MachODySymTab { - uint32_t cmd; // LC_DYSYMTAB - uint32_t cmdsize; // sizeof( MachODySymTab ) - uint32_t ilocalsym; // index to local symbols - uint32_t nlocalsym; // number of local symbols - uint32_t iextdefsym; // index to externally defined symbols - uint32_t nextdefsym; // number of externally defined symbols - uint32_t iundefsym; // index to undefined symbols - uint32_t nundefsym; // number of undefined symbols - uint32_t tocoff; // file offset to table of contents - uint32_t ntoc; // number of entries in table of contents - uint32_t modtaboff; // file offset to module table - uint32_t nmodtab; // number of module table entries - uint32_t extrefsymoff; // offset to referenced symbol table - uint32_t nextrefsyms; // number of referenced symbol table entries - uint32_t indirectsymoff; // file offset to the indirect symbol table - uint32_t nindirectsyms; // number of indirect symbol table entries - uint32_t extreloff; // offset to external relocation entries - uint32_t nextrel; // number of external relocation entries - uint32_t locreloff; // offset to local relocation entries - uint32_t nlocrel; // number of local relocation entries - - // Constants for the cmd field - // see <mach-o/loader.h> - enum { LC_DYSYMTAB = 0x0B // dynamic link-edit symbol table info - }; - - MachODySymTab() : cmd(LC_DYSYMTAB), cmdsize(20 * sizeof(uint32_t)), - ilocalsym(0), nlocalsym(0), iextdefsym(0), nextdefsym(0), - iundefsym(0), nundefsym(0), tocoff(0), ntoc(0), modtaboff(0), - nmodtab(0), extrefsymoff(0), nextrefsyms(0), indirectsymoff(0), - nindirectsyms(0), extreloff(0), nextrel(0), locreloff(0), nlocrel(0) { } - }; +/// MachOSymTab - This struct contains information about the offsets and +/// size of symbol table information. +/// segment. +struct MachODySymTab { + uint32_t cmd; // LC_DYSYMTAB + uint32_t cmdsize; // sizeof(MachODySymTab) + uint32_t ilocalsym; // index to local symbols + uint32_t nlocalsym; // number of local symbols + uint32_t iextdefsym; // index to externally defined symbols + uint32_t nextdefsym; // number of externally defined symbols + uint32_t iundefsym; // index to undefined symbols + uint32_t nundefsym; // number of undefined symbols + uint32_t tocoff; // file offset to table of contents + uint32_t ntoc; // number of entries in table of contents + uint32_t modtaboff; // file offset to module table + uint32_t nmodtab; // number of module table entries + uint32_t extrefsymoff; // offset to referenced symbol table + uint32_t nextrefsyms; // number of referenced symbol table entries + uint32_t indirectsymoff; // file offset to the indirect symbol table + uint32_t nindirectsyms; // number of indirect symbol table entries + uint32_t extreloff; // offset to external relocation entries + uint32_t nextrel; // number of external relocation entries + uint32_t locreloff; // offset to local relocation entries + uint32_t nlocrel; // number of local relocation entries + + // Constants for the cmd field + // see <mach-o/loader.h> + enum { LC_DYSYMTAB = 0x0B // dynamic link-edit symbol table info + }; + + MachODySymTab() : cmd(LC_DYSYMTAB), cmdsize(20 * sizeof(uint32_t)), + ilocalsym(0), nlocalsym(0), iextdefsym(0), nextdefsym(0), + iundefsym(0), nundefsym(0), tocoff(0), ntoc(0), modtaboff(0), + nmodtab(0), extrefsymoff(0), nextrefsyms(0), indirectsymoff(0), + nindirectsyms(0), extreloff(0), nextrel(0), locreloff(0), nlocrel(0) {} + +}; // end struct MachODySymTab } // end namespace llvm diff --git a/lib/CodeGen/MachOCodeEmitter.cpp b/lib/CodeGen/MachOCodeEmitter.cpp index 02b02de..1318477 100644 --- a/lib/CodeGen/MachOCodeEmitter.cpp +++ b/lib/CodeGen/MachOCodeEmitter.cpp @@ -7,22 +7,37 @@ // //===----------------------------------------------------------------------===// +#include "MachO.h" +#include "MachOWriter.h" #include "MachOCodeEmitter.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" #include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" -#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/CodeGen/MachineRelocation.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Mangler.h" #include "llvm/Support/OutputBuffer.h" +#include <vector> //===----------------------------------------------------------------------===// // MachOCodeEmitter Implementation //===----------------------------------------------------------------------===// namespace llvm { - + +MachOCodeEmitter::MachOCodeEmitter(MachOWriter &mow, MachOSection &mos) : + ObjectCodeEmitter(&mos), MOW(mow), TM(MOW.TM) { + is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64; + isLittleEndian = TM.getTargetData()->isLittleEndian(); + MAI = TM.getMCAsmInfo(); +} + /// startFunction - This callback is invoked when a new machine function is /// about to be emitted. @@ -39,28 +54,18 @@ void MachOCodeEmitter::startFunction(MachineFunction &MF) { // Get the Mach-O Section that this function belongs in. MachOSection *MOS = MOW.getTextSection(); - // FIXME: better memory management - MOS->SectionData.reserve(4096); - BufferBegin = &MOS->SectionData[0]; - BufferEnd = BufferBegin + MOS->SectionData.capacity(); - // Upgrade the section alignment if required. if (MOS->align < Align) MOS->align = Align; - // Round the size up to the correct alignment for starting the new function. - if ((MOS->size & ((1 << Align) - 1)) != 0) { - MOS->size += (1 << Align); - MOS->size &= ~((1 << Align) - 1); - } + MOS->emitAlignment(Align); + + // Create symbol for function entry + const GlobalValue *FuncV = MF.getFunction(); + MachOSym FnSym(FuncV, MOW.Mang->getMangledName(FuncV), MOS->Index, MAI); + FnSym.n_value = getCurrentPCOffset(); - // FIXME: Using MOS->size directly here instead of calculating it from the - // output buffer size (impossible because the code emitter deals only in raw - // bytes) forces us to manually synchronize size and write padding zero bytes - // to the output buffer for all non-text sections. For text sections, we do - // not synchonize the output buffer, and we just blow up if anyone tries to - // write non-code to it. An assert should probably be added to - // AddSymbolToSection to prevent calling it on the text section. - CurBufferPtr = BufferBegin + MOS->size; + // add it to the symtab. + MOW.SymbolTable.push_back(FnSym); } /// finishFunction - This callback is invoked after the function is completely @@ -71,15 +76,6 @@ bool MachOCodeEmitter::finishFunction(MachineFunction &MF) { // Get the Mach-O Section that this function belongs in. MachOSection *MOS = MOW.getTextSection(); - // Get a symbol for the function to add to the symbol table - // FIXME: it seems like we should call something like AddSymbolToSection - // in startFunction rather than changing the section size and symbol n_value - // here. - const GlobalValue *FuncV = MF.getFunction(); - MachOSym FnSym(FuncV, MOW.Mang->getValueName(FuncV), MOS->Index, TAI); - FnSym.n_value = MOS->size; - MOS->size = CurBufferPtr - BufferBegin; - // Emit constant pool to appropriate section(s) emitConstantPool(MF.getConstantPool()); @@ -110,14 +106,11 @@ bool MachOCodeEmitter::finishFunction(MachineFunction &MF) { // FIXME: This should be a set or something that uniques MOW.PendingGlobals.push_back(MR.getGlobalValue()); } else { - assert(0 && "Unhandled relocation type"); + llvm_unreachable("Unhandled relocation type"); } - MOS->Relocations.push_back(MR); + MOS->addRelocation(MR); } Relocations.clear(); - - // Finally, add it to the symtab. - MOW.SymbolTable.push_back(FnSym); // Clear per-function data structures. CPLocations.clear(); @@ -151,13 +144,10 @@ void MachOCodeEmitter::emitConstantPool(MachineConstantPool *MCP) { unsigned Size = TM.getTargetData()->getTypeAllocSize(Ty); MachOSection *Sec = MOW.getConstSection(CP[i].Val.ConstVal); - OutputBuffer SecDataOut(Sec->SectionData, is64Bit, isLittleEndian); + OutputBuffer SecDataOut(Sec->getData(), is64Bit, isLittleEndian); - CPLocations.push_back(Sec->SectionData.size()); + CPLocations.push_back(Sec->size()); CPSections.push_back(Sec->Index); - - // FIXME: remove when we have unified size + output buffer - Sec->size += Size; // Allocate space in the section for the global. // FIXME: need alignment? @@ -165,14 +155,13 @@ void MachOCodeEmitter::emitConstantPool(MachineConstantPool *MCP) { for (unsigned j = 0; j < Size; ++j) SecDataOut.outbyte(0); - MOW.InitMem(CP[i].Val.ConstVal, &Sec->SectionData[0], CPLocations[i], - TM.getTargetData(), Sec->Relocations); + MachOWriter::InitMem(CP[i].Val.ConstVal, CPLocations[i], + TM.getTargetData(), Sec); } } /// emitJumpTables - Emit all the jump tables for a given jump table info /// record to the appropriate section. - void MachOCodeEmitter::emitJumpTables(MachineJumpTableInfo *MJTI) { const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); if (JT.empty()) return; @@ -183,24 +172,21 @@ void MachOCodeEmitter::emitJumpTables(MachineJumpTableInfo *MJTI) { MachOSection *Sec = MOW.getJumpTableSection(); unsigned TextSecIndex = MOW.getTextSection()->Index; - OutputBuffer SecDataOut(Sec->SectionData, is64Bit, isLittleEndian); + OutputBuffer SecDataOut(Sec->getData(), is64Bit, isLittleEndian); for (unsigned i = 0, e = JT.size(); i != e; ++i) { // For each jump table, record its offset from the start of the section, // reserve space for the relocations to the MBBs, and add the relocations. const std::vector<MachineBasicBlock*> &MBBs = JT[i].MBBs; - JTLocations.push_back(Sec->SectionData.size()); + JTLocations.push_back(Sec->size()); for (unsigned mi = 0, me = MBBs.size(); mi != me; ++mi) { - MachineRelocation MR(MOW.GetJTRelocation(Sec->SectionData.size(), - MBBs[mi])); + MachineRelocation MR(MOW.GetJTRelocation(Sec->size(), MBBs[mi])); MR.setResultPointer((void *)JTLocations[i]); MR.setConstantVal(TextSecIndex); - Sec->Relocations.push_back(MR); + Sec->addRelocation(MR); SecDataOut.outaddr(0); } } - // FIXME: remove when we have unified size + output buffer - Sec->size = Sec->SectionData.size(); } } // end namespace llvm diff --git a/lib/CodeGen/MachOCodeEmitter.h b/lib/CodeGen/MachOCodeEmitter.h index 0a6e4e4..4752446 100644 --- a/lib/CodeGen/MachOCodeEmitter.h +++ b/lib/CodeGen/MachOCodeEmitter.h @@ -10,16 +10,17 @@ #ifndef MACHOCODEEMITTER_H #define MACHOCODEEMITTER_H -#include "MachOWriter.h" -#include "llvm/CodeGen/MachineCodeEmitter.h" -#include <vector> +#include "llvm/CodeGen/ObjectCodeEmitter.h" +#include <map> namespace llvm { +class MachOWriter; + /// MachOCodeEmitter - This class is used by the MachOWriter to emit the code /// for functions to the Mach-O file. -class MachOCodeEmitter : public MachineCodeEmitter { +class MachOCodeEmitter : public ObjectCodeEmitter { MachOWriter &MOW; /// Target machine description. @@ -29,36 +30,16 @@ class MachOCodeEmitter : public MachineCodeEmitter { /// machine directly, indicating what header values and flags to set. bool is64Bit, isLittleEndian; - const TargetAsmInfo *TAI; + const MCAsmInfo *MAI; /// Relocations - These are the relocations that the function needs, as /// emitted. std::vector<MachineRelocation> Relocations; - - /// CPLocations - This is a map of constant pool indices to offsets from the - /// start of the section for that constant pool index. - std::vector<uintptr_t> CPLocations; - - /// CPSections - This is a map of constant pool indices to the MachOSection - /// containing the constant pool entry for that index. - std::vector<unsigned> CPSections; - - /// JTLocations - This is a map of jump table indices to offsets from the - /// start of the section for that jump table index. - std::vector<uintptr_t> JTLocations; - - /// MBBLocations - This vector is a mapping from MBB ID's to their address. - /// It is filled in by the StartMachineBasicBlock callback and queried by - /// the getMachineBasicBlockAddress callback. - std::vector<uintptr_t> MBBLocations; - + + std::map<uint64_t, uintptr_t> Labels; + public: - MachOCodeEmitter(MachOWriter &mow) : MOW(mow), TM(MOW.TM) - { - is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64; - isLittleEndian = TM.getTargetData()->isLittleEndian(); - TAI = TM.getTargetAsmInfo(); - } + MachOCodeEmitter(MachOWriter &mow, MachOSection &mos); virtual void startFunction(MachineFunction &MF); virtual bool finishFunction(MachineFunction &MF); @@ -66,61 +47,20 @@ public: virtual void addRelocation(const MachineRelocation &MR) { Relocations.push_back(MR); } - + void emitConstantPool(MachineConstantPool *MCP); void emitJumpTables(MachineJumpTableInfo *MJTI); - - virtual uintptr_t getConstantPoolEntryAddress(unsigned Index) const { - assert(CPLocations.size() > Index && "CP not emitted!"); - return CPLocations[Index]; - } - virtual uintptr_t getJumpTableEntryAddress(unsigned Index) const { - assert(JTLocations.size() > Index && "JT not emitted!"); - return JTLocations[Index]; - } - - virtual void StartMachineBasicBlock(MachineBasicBlock *MBB) { - if (MBBLocations.size() <= (unsigned)MBB->getNumber()) - MBBLocations.resize((MBB->getNumber()+1)*2); - MBBLocations[MBB->getNumber()] = getCurrentPCOffset(); - } - virtual uintptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const { - assert(MBBLocations.size() > (unsigned)MBB->getNumber() && - MBBLocations[MBB->getNumber()] && "MBB not emitted!"); - return MBBLocations[MBB->getNumber()]; + virtual void emitLabel(uint64_t LabelID) { + Labels[LabelID] = getCurrentPCOffset(); } virtual uintptr_t getLabelAddress(uint64_t Label) const { - assert(0 && "get Label not implemented"); - abort(); - return 0; - } - - virtual void emitLabel(uint64_t LabelID) { - assert(0 && "emit Label not implemented"); - abort(); + return Labels.find(Label)->second; } virtual void setModuleInfo(llvm::MachineModuleInfo* MMI) { } - /// JIT SPECIFIC FUNCTIONS - DO NOT IMPLEMENT THESE HERE! - virtual void startGVStub(const GlobalValue* F, unsigned StubSize, - unsigned Alignment = 1) { - assert(0 && "JIT specific function called!"); - abort(); - } - virtual void startGVStub(const GlobalValue* F, void *Buffer, - unsigned StubSize) { - assert(0 && "JIT specific function called!"); - abort(); - } - virtual void *finishGVStub(const GlobalValue* F) { - assert(0 && "JIT specific function called!"); - abort(); - return 0; - } - }; // end class MachOCodeEmitter } // end namespace llvm diff --git a/lib/CodeGen/MachOWriter.cpp b/lib/CodeGen/MachOWriter.cpp index 163df69..73b15ed 100644 --- a/lib/CodeGen/MachOWriter.cpp +++ b/lib/CodeGen/MachOWriter.cpp @@ -22,36 +22,32 @@ // //===----------------------------------------------------------------------===// +#include "MachO.h" #include "MachOWriter.h" #include "MachOCodeEmitter.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Module.h" #include "llvm/PassManager.h" -#include "llvm/CodeGen/FileWriters.h" -#include "llvm/CodeGen/MachineCodeEmitter.h" -#include "llvm/CodeGen/MachineConstantPool.h" -#include "llvm/CodeGen/MachineJumpTableInfo.h" -#include "llvm/Target/TargetAsmInfo.h" -#include "llvm/Target/TargetJITInfo.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetMachOWriterInfo.h" #include "llvm/Support/Mangler.h" -#include "llvm/Support/MathExtras.h" #include "llvm/Support/OutputBuffer.h" -#include "llvm/Support/Streams.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include <algorithm> -#include <cstring> namespace llvm { /// AddMachOWriter - Concrete function to add the Mach-O writer to the function /// pass manager. -MachineCodeEmitter *AddMachOWriter(PassManagerBase &PM, +ObjectCodeEmitter *AddMachOWriter(PassManagerBase &PM, raw_ostream &O, TargetMachine &TM) { MachOWriter *MOW = new MachOWriter(O, TM); PM.add(MOW); - return &MOW->getMachineCodeEmitter(); + return MOW->getObjectCodeEmitter(); } //===----------------------------------------------------------------------===// @@ -65,15 +61,14 @@ MachOWriter::MachOWriter(raw_ostream &o, TargetMachine &tm) is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64; isLittleEndian = TM.getTargetData()->isLittleEndian(); - TAI = TM.getTargetAsmInfo(); + MAI = TM.getMCAsmInfo(); // Create the machine code emitter object for this target. - - MCE = new MachOCodeEmitter(*this); + MachOCE = new MachOCodeEmitter(*this, *getTextSection(true)); } MachOWriter::~MachOWriter() { - delete MCE; + delete MachOCE; } bool MachOWriter::doInitialization(Module &M) { @@ -97,9 +92,9 @@ bool MachOWriter::runOnMachineFunction(MachineFunction &MF) { /// the Mach-O file to 'O'. bool MachOWriter::doFinalization(Module &M) { // FIXME: we don't handle debug info yet, we should probably do that. - // Okay, the.text section has been completed, build the .data, .bss, and // "common" sections next. + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I) EmitGlobal(I); @@ -125,6 +120,89 @@ bool MachOWriter::doFinalization(Module &M) { return false; } +// getConstSection - Get constant section for Constant 'C' +MachOSection *MachOWriter::getConstSection(Constant *C) { + const ConstantArray *CVA = dyn_cast<ConstantArray>(C); + if (CVA && CVA->isCString()) + return getSection("__TEXT", "__cstring", + MachOSection::S_CSTRING_LITERALS); + + const Type *Ty = C->getType(); + if (Ty->isPrimitiveType() || Ty->isInteger()) { + unsigned Size = TM.getTargetData()->getTypeAllocSize(Ty); + switch(Size) { + default: break; // Fall through to __TEXT,__const + case 4: + return getSection("__TEXT", "__literal4", + MachOSection::S_4BYTE_LITERALS); + case 8: + return getSection("__TEXT", "__literal8", + MachOSection::S_8BYTE_LITERALS); + case 16: + return getSection("__TEXT", "__literal16", + MachOSection::S_16BYTE_LITERALS); + } + } + return getSection("__TEXT", "__const"); +} + +// getJumpTableSection - Select the Jump Table section +MachOSection *MachOWriter::getJumpTableSection() { + if (TM.getRelocationModel() == Reloc::PIC_) + return getTextSection(false); + else + return getSection("__TEXT", "__const"); +} + +// getSection - Return the section with the specified name, creating a new +// section if one does not already exist. +MachOSection *MachOWriter::getSection(const std::string &seg, + const std::string §, + unsigned Flags /* = 0 */ ) { + MachOSection *MOS = SectionLookup[seg+sect]; + if (MOS) return MOS; + + MOS = new MachOSection(seg, sect); + SectionList.push_back(MOS); + MOS->Index = SectionList.size(); + MOS->flags = MachOSection::S_REGULAR | Flags; + SectionLookup[seg+sect] = MOS; + return MOS; +} + +// getTextSection - Return text section with different flags for code/data +MachOSection *MachOWriter::getTextSection(bool isCode /* = true */ ) { + if (isCode) + return getSection("__TEXT", "__text", + MachOSection::S_ATTR_PURE_INSTRUCTIONS | + MachOSection::S_ATTR_SOME_INSTRUCTIONS); + else + return getSection("__TEXT", "__text"); +} + +MachOSection *MachOWriter::getBSSSection() { + return getSection("__DATA", "__bss", MachOSection::S_ZEROFILL); +} + +// GetJTRelocation - Get a relocation a new BB relocation based +// on target information. +MachineRelocation MachOWriter::GetJTRelocation(unsigned Offset, + MachineBasicBlock *MBB) const { + return TM.getMachOWriterInfo()->GetJTRelocation(Offset, MBB); +} + +// GetTargetRelocation - Returns the number of relocations. +unsigned MachOWriter::GetTargetRelocation(MachineRelocation &MR, + unsigned FromIdx, unsigned ToAddr, + unsigned ToIndex, OutputBuffer &RelocOut, + OutputBuffer &SecOut, bool Scattered, + bool Extern) { + return TM.getMachOWriterInfo()->GetTargetRelocation(MR, FromIdx, ToAddr, + ToIndex, RelocOut, + SecOut, Scattered, + Extern); +} + void MachOWriter::AddSymbolToSection(MachOSection *Sec, GlobalVariable *GV) { const Type *Ty = GV->getType()->getElementType(); unsigned Size = TM.getTargetData()->getTypeAllocSize(Ty); @@ -133,37 +211,31 @@ void MachOWriter::AddSymbolToSection(MachOSection *Sec, GlobalVariable *GV) { // Reserve space in the .bss section for this symbol while maintaining the // desired section alignment, which must be at least as much as required by // this symbol. - OutputBuffer SecDataOut(Sec->SectionData, is64Bit, isLittleEndian); + OutputBuffer SecDataOut(Sec->getData(), is64Bit, isLittleEndian); if (Align) { - uint64_t OrigSize = Sec->size; Align = Log2_32(Align); Sec->align = std::max(unsigned(Sec->align), Align); - Sec->size = (Sec->size + Align - 1) & ~(Align-1); - // Add alignment padding to buffer as well. - // FIXME: remove when we have unified size + output buffer - unsigned AlignedSize = Sec->size - OrigSize; - for (unsigned i = 0; i < AlignedSize; ++i) - SecDataOut.outbyte(0); + Sec->emitAlignment(Sec->align); } // Globals without external linkage apparently do not go in the symbol table. if (!GV->hasLocalLinkage()) { - MachOSym Sym(GV, Mang->getValueName(GV), Sec->Index, TAI); - Sym.n_value = Sec->size; + MachOSym Sym(GV, Mang->getMangledName(GV), Sec->Index, MAI); + Sym.n_value = Sec->size(); SymbolTable.push_back(Sym); } // Record the offset of the symbol, and then allocate space for it. // FIXME: remove when we have unified size + output buffer - Sec->size += Size; // Now that we know what section the GlovalVariable is going to be emitted // into, update our mappings. // FIXME: We may also need to update this when outputting non-GlobalVariable // GlobalValues such as functions. + GVSection[GV] = Sec; - GVOffset[GV] = Sec->SectionData.size(); + GVOffset[GV] = Sec->size(); // Allocate space in the section for the global. for (unsigned i = 0; i < Size; ++i) @@ -183,8 +255,8 @@ void MachOWriter::EmitGlobal(GlobalVariable *GV) { // merged with other symbols. if (NoInit || GV->hasLinkOnceLinkage() || GV->hasWeakLinkage() || GV->hasCommonLinkage()) { - MachOSym ExtOrCommonSym(GV, Mang->getValueName(GV), - MachOSym::NO_SECT, TAI); + MachOSym ExtOrCommonSym(GV, Mang->getMangledName(GV), + MachOSym::NO_SECT, MAI); // For undefined (N_UNDF) external (N_EXT) types, n_value is the size in // bytes of the symbol. ExtOrCommonSym.n_value = Size; @@ -205,8 +277,7 @@ void MachOWriter::EmitGlobal(GlobalVariable *GV) { MachOSection *Sec = GV->isConstant() ? getConstSection(GV->getInitializer()) : getDataSection(); AddSymbolToSection(Sec, GV); - InitMem(GV->getInitializer(), &Sec->SectionData[0], GVOffset[GV], - TM.getTargetData(), Sec->Relocations); + InitMem(GV->getInitializer(), GVOffset[GV], TM.getTargetData(), Sec); } @@ -214,6 +285,7 @@ void MachOWriter::EmitGlobal(GlobalVariable *GV) { void MachOWriter::EmitHeaderAndLoadCommands() { // Step #0: Fill in the segment load command size, since we need it to figure // out the rest of the header fields + MachOSegment SEG("", is64Bit); SEG.nsects = SectionList.size(); SEG.cmdsize = SEG.cmdSize(is64Bit) + @@ -231,7 +303,7 @@ void MachOWriter::EmitHeaderAndLoadCommands() { // Step #3: write the header to the file // Local alias to shortenify coming code. - DataBuffer &FH = Header.HeaderData; + std::vector<unsigned char> &FH = Header.HeaderData; OutputBuffer FHOut(FH, is64Bit, isLittleEndian); FHOut.outword(Header.magic); @@ -247,7 +319,7 @@ void MachOWriter::EmitHeaderAndLoadCommands() { // Step #4: Finish filling in the segment load command and write it out for (std::vector<MachOSection*>::iterator I = SectionList.begin(), E = SectionList.end(); I != E; ++I) - SEG.filesize += (*I)->size; + SEG.filesize += (*I)->size(); SEG.vmsize = SEG.filesize; SEG.fileoff = Header.cmdSize(is64Bit) + Header.sizeofcmds; @@ -271,9 +343,8 @@ void MachOWriter::EmitHeaderAndLoadCommands() { MachOSection *MOS = *I; MOS->addr = currentAddr; MOS->offset = currentAddr + SEG.fileoff; - // FIXME: do we need to do something with alignment here? - currentAddr += MOS->size; + currentAddr += MOS->size(); } // Step #6: Emit the symbol table to temporary buffers, so that we know the @@ -288,6 +359,7 @@ void MachOWriter::EmitHeaderAndLoadCommands() { for (std::vector<MachOSection*>::iterator I = SectionList.begin(), E = SectionList.end(); I != E; ++I) { MachOSection *MOS = *I; + // Convert the relocations to target-specific relocations, and fill in the // relocation offset for this section. CalculateRelocations(*MOS); @@ -298,7 +370,7 @@ void MachOWriter::EmitHeaderAndLoadCommands() { FHOut.outstring(MOS->sectname, 16); FHOut.outstring(MOS->segname, 16); FHOut.outaddr(MOS->addr); - FHOut.outaddr(MOS->size); + FHOut.outaddr(MOS->size()); FHOut.outword(MOS->offset); FHOut.outword(MOS->align); FHOut.outword(MOS->reloff); @@ -351,24 +423,26 @@ void MachOWriter::EmitHeaderAndLoadCommands() { /// EmitSections - Now that we have constructed the file header and load /// commands, emit the data for each section to the file. - void MachOWriter::EmitSections() { for (std::vector<MachOSection*>::iterator I = SectionList.begin(), E = SectionList.end(); I != E; ++I) // Emit the contents of each section - O.write((char*)&(*I)->SectionData[0], (*I)->size); + if ((*I)->size()) + O.write((char*)&(*I)->getData()[0], (*I)->size()); } + +/// EmitRelocations - emit relocation data from buffer. void MachOWriter::EmitRelocations() { for (std::vector<MachOSection*>::iterator I = SectionList.begin(), E = SectionList.end(); I != E; ++I) // Emit the relocation entry data for each section. - O.write((char*)&(*I)->RelocBuffer[0], (*I)->RelocBuffer.size()); + if ((*I)->RelocBuffer.size()) + O.write((char*)&(*I)->RelocBuffer[0], (*I)->RelocBuffer.size()); } /// BufferSymbolAndStringTable - Sort the symbols we encountered and assign them /// each a string table index so that they appear in the correct order in the /// output file. - void MachOWriter::BufferSymbolAndStringTable() { // The order of the symbol table is: // 1. local symbols @@ -377,11 +451,10 @@ void MachOWriter::BufferSymbolAndStringTable() { // Before sorting the symbols, check the PendingGlobals for any undefined // globals that need to be put in the symbol table. - for (std::vector<GlobalValue*>::iterator I = PendingGlobals.begin(), E = PendingGlobals.end(); I != E; ++I) { if (GVOffset[*I] == 0 && GVSection[*I] == 0) { - MachOSym UndfSym(*I, Mang->getValueName(*I), MachOSym::NO_SECT, TAI); + MachOSym UndfSym(*I, Mang->getMangledName(*I), MachOSym::NO_SECT, MAI); SymbolTable.push_back(UndfSym); GVOffset[*I] = -1; } @@ -389,19 +462,16 @@ void MachOWriter::BufferSymbolAndStringTable() { // Sort the symbols by name, so that when we partition the symbols by scope // of definition, we won't have to sort by name within each partition. - std::sort(SymbolTable.begin(), SymbolTable.end(), MachOSym::SymCmp()); // Parition the symbol table entries so that all local symbols come before // all symbols with external linkage. { 1 | 2 3 } - std::partition(SymbolTable.begin(), SymbolTable.end(), MachOSym::PartitionByLocal); // Advance iterator to beginning of external symbols and partition so that // all external symbols defined in this module come before all external // symbols defined elsewhere. { 1 | 2 | 3 } - for (std::vector<MachOSym>::iterator I = SymbolTable.begin(), E = SymbolTable.end(); I != E; ++I) { if (!MachOSym::PartitionByLocal(*I)) { @@ -413,7 +483,6 @@ void MachOWriter::BufferSymbolAndStringTable() { // Calculate the starting index for each of the local, extern defined, and // undefined symbols, as well as the number of each to put in the LC_DYSYMTAB // load command. - for (std::vector<MachOSym>::iterator I = SymbolTable.begin(), E = SymbolTable.end(); I != E; ++I) { if (MachOSym::PartitionByLocal(*I)) { @@ -430,7 +499,6 @@ void MachOWriter::BufferSymbolAndStringTable() { // Write out a leading zero byte when emitting string table, for n_strx == 0 // which means an empty string. - OutputBuffer StrTOut(StrT, is64Bit, isLittleEndian); StrTOut.outbyte(0); @@ -439,7 +507,6 @@ void MachOWriter::BufferSymbolAndStringTable() { // 2. strings for local symbols // Since this is the opposite order from the symbol table, which we have just // sorted, we can walk the symbol table backwards to output the string table. - for (std::vector<MachOSym>::reverse_iterator I = SymbolTable.rbegin(), E = SymbolTable.rend(); I != E; ++I) { if (I->GVName == "") { @@ -478,24 +545,22 @@ void MachOWriter::BufferSymbolAndStringTable() { /// and the offset into that section. From this information, create the /// appropriate target-specific MachORelocation type and add buffer it to be /// written out after we are finished writing out sections. - void MachOWriter::CalculateRelocations(MachOSection &MOS) { - for (unsigned i = 0, e = MOS.Relocations.size(); i != e; ++i) { - MachineRelocation &MR = MOS.Relocations[i]; + std::vector<MachineRelocation> Relocations = MOS.getRelocations(); + for (unsigned i = 0, e = Relocations.size(); i != e; ++i) { + MachineRelocation &MR = Relocations[i]; unsigned TargetSection = MR.getConstantVal(); unsigned TargetAddr = 0; unsigned TargetIndex = 0; // This is a scattered relocation entry if it points to a global value with // a non-zero offset. - bool Scattered = false; bool Extern = false; // Since we may not have seen the GlobalValue we were interested in yet at // the time we emitted the relocation for it, fix it up now so that it // points to the offset into the correct section. - if (MR.isGlobalValue()) { GlobalValue *GV = MR.getGlobalValue(); MachOSection *MOSPtr = GVSection[GV]; @@ -503,7 +568,6 @@ void MachOWriter::CalculateRelocations(MachOSection &MOS) { // If we have never seen the global before, it must be to a symbol // defined in another module (N_UNDF). - if (!MOSPtr) { // FIXME: need to append stub suffix Extern = true; @@ -518,7 +582,6 @@ void MachOWriter::CalculateRelocations(MachOSection &MOS) { // If the symbol is locally defined, pass in the address of the section and // the section index to the code which will generate the target relocation. - if (!Extern) { MachOSection &To = *SectionList[TargetSection - 1]; TargetAddr = To.addr; @@ -526,7 +589,7 @@ void MachOWriter::CalculateRelocations(MachOSection &MOS) { } OutputBuffer RelocOut(MOS.RelocBuffer, is64Bit, isLittleEndian); - OutputBuffer SecOut(MOS.SectionData, is64Bit, isLittleEndian); + OutputBuffer SecOut(MOS.getData(), is64Bit, isLittleEndian); MOS.nreloc += GetTargetRelocation(MR, MOS.Index, TargetAddr, TargetIndex, RelocOut, SecOut, Scattered, Extern); @@ -535,12 +598,11 @@ void MachOWriter::CalculateRelocations(MachOSection &MOS) { // InitMem - Write the value of a Constant to the specified memory location, // converting it into bytes and relocations. - -void MachOWriter::InitMem(const Constant *C, void *Addr, intptr_t Offset, - const TargetData *TD, - std::vector<MachineRelocation> &MRs) { +void MachOWriter::InitMem(const Constant *C, uintptr_t Offset, + const TargetData *TD, MachOSection* mos) { typedef std::pair<const Constant*, intptr_t> CPair; std::vector<CPair> WorkList; + uint8_t *Addr = &mos->getData()[0]; WorkList.push_back(CPair(C,(intptr_t)Addr + Offset)); @@ -572,9 +634,8 @@ void MachOWriter::InitMem(const Constant *C, void *Addr, intptr_t Offset, } case Instruction::Add: default: - cerr << "ConstantExpr not handled as global var init: " << *CE << "\n"; - abort(); - break; + errs() << "ConstantExpr not handled as global var init: " << *CE <<"\n"; + llvm_unreachable(0); } } else if (PC->getType()->isSingleValueType()) { unsigned char *ptr = (unsigned char *)PA; @@ -608,7 +669,7 @@ void MachOWriter::InitMem(const Constant *C, void *Addr, intptr_t Offset, ptr[6] = val >> 48; ptr[7] = val >> 56; } else { - assert(0 && "Not implemented: bit widths > 64"); + llvm_unreachable("Not implemented: bit widths > 64"); } break; } @@ -643,17 +704,19 @@ void MachOWriter::InitMem(const Constant *C, void *Addr, intptr_t Offset, memset(ptr, 0, TD->getPointerSize()); else if (const GlobalValue* GV = dyn_cast<GlobalValue>(PC)) { // FIXME: what about function stubs? - MRs.push_back(MachineRelocation::getGV(PA-(intptr_t)Addr, + mos->addRelocation(MachineRelocation::getGV(PA-(intptr_t)Addr, MachineRelocation::VANILLA, const_cast<GlobalValue*>(GV), ScatteredOffset)); ScatteredOffset = 0; } else - assert(0 && "Unknown constant pointer type!"); + llvm_unreachable("Unknown constant pointer type!"); break; default: - cerr << "ERROR: Constant unimp for type: " << *PC->getType() << "\n"; - abort(); + std::string msg; + raw_string_ostream Msg(msg); + Msg << "ERROR: Constant unimp for type: " << *PC->getType(); + llvm_report_error(Msg.str()); } } else if (isa<ConstantAggregateZero>(PC)) { memset((void*)PA, 0, (size_t)TD->getTypeAllocSize(PC->getType())); @@ -669,8 +732,8 @@ void MachOWriter::InitMem(const Constant *C, void *Addr, intptr_t Offset, WorkList.push_back(CPair(CPS->getOperand(i), PA+SL->getElementOffset(i))); } else { - cerr << "Bad Type: " << *PC->getType() << "\n"; - assert(0 && "Unknown constant type to initialize memory with!"); + errs() << "Bad Type: " << *PC->getType() << "\n"; + llvm_unreachable("Unknown constant type to initialize memory with!"); } } } @@ -680,13 +743,14 @@ void MachOWriter::InitMem(const Constant *C, void *Addr, intptr_t Offset, //===----------------------------------------------------------------------===// MachOSym::MachOSym(const GlobalValue *gv, std::string name, uint8_t sect, - const TargetAsmInfo *TAI) : + const MCAsmInfo *MAI) : GV(gv), n_strx(0), n_type(sect == NO_SECT ? N_UNDF : N_SECT), n_sect(sect), n_desc(0), n_value(0) { + // FIXME: This is completely broken, it should use the mangler interface. switch (GV->getLinkage()) { default: - assert(0 && "Unexpected linkage type!"); + llvm_unreachable("Unexpected linkage type!"); break; case GlobalValue::WeakAnyLinkage: case GlobalValue::WeakODRLinkage: @@ -695,17 +759,19 @@ MachOSym::MachOSym(const GlobalValue *gv, std::string name, uint8_t sect, case GlobalValue::CommonLinkage: assert(!isa<Function>(gv) && "Unexpected linkage type for Function!"); case GlobalValue::ExternalLinkage: - GVName = TAI->getGlobalPrefix() + name; + GVName = MAI->getGlobalPrefix() + name; n_type |= GV->hasHiddenVisibility() ? N_PEXT : N_EXT; break; case GlobalValue::PrivateLinkage: - GVName = TAI->getPrivateGlobalPrefix() + name; + GVName = MAI->getPrivateGlobalPrefix() + name; + break; + case GlobalValue::LinkerPrivateLinkage: + GVName = MAI->getLinkerPrivateGlobalPrefix() + name; break; case GlobalValue::InternalLinkage: - GVName = TAI->getGlobalPrefix() + name; + GVName = MAI->getGlobalPrefix() + name; break; } } } // end namespace llvm - diff --git a/lib/CodeGen/MachOWriter.h b/lib/CodeGen/MachOWriter.h index 3af2b0a..9273f38 100644 --- a/lib/CodeGen/MachOWriter.h +++ b/lib/CodeGen/MachOWriter.h @@ -14,22 +14,28 @@ #ifndef MACHOWRITER_H #define MACHOWRITER_H -#include "MachO.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetMachOWriterInfo.h" +#include <vector> #include <map> namespace llvm { + class Constant; class GlobalVariable; class Mangler; - class MachineCodeEmitter; + class MachineBasicBlock; + class MachineRelocation; class MachOCodeEmitter; + struct MachODySymTab; + struct MachOHeader; + struct MachOSection; + struct MachOSym; + class TargetData; + class TargetMachine; + class MCAsmInfo; + class ObjectCodeEmitter; class OutputBuffer; class raw_ostream; - /// MachOWriter - This class implements the common target-independent code for /// writing Mach-O files. Targets should derive a class from this to /// parameterize the output format. @@ -38,8 +44,9 @@ namespace llvm { friend class MachOCodeEmitter; public: static char ID; - MachineCodeEmitter &getMachineCodeEmitter() const { - return *(MachineCodeEmitter*)MCE; + + ObjectCodeEmitter *getObjectCodeEmitter() { + return reinterpret_cast<ObjectCodeEmitter*>(MachOCE); } MachOWriter(raw_ostream &O, TargetMachine &TM); @@ -61,36 +68,30 @@ namespace llvm { /// Mang - The object used to perform name mangling for this module. /// Mangler *Mang; - - /// MCE - The MachineCodeEmitter object that we are exposing to emit machine - /// code for functions to the .o file. - MachOCodeEmitter *MCE; + /// MachOCE - The MachineCodeEmitter object that we are exposing to emit + /// machine code for functions to the .o file. + MachOCodeEmitter *MachOCE; /// is64Bit/isLittleEndian - This information is inferred from the target /// machine directly, indicating what header values and flags to set. - bool is64Bit, isLittleEndian; // Target Asm Info - - const TargetAsmInfo *TAI; + const MCAsmInfo *MAI; /// Header - An instance of MachOHeader that we will update while we build /// the file, and then emit during finalization. - MachOHeader Header; /// doInitialization - Emit the file header and all of the global variables /// for the module to the Mach-O file. - bool doInitialization(Module &M); bool runOnMachineFunction(MachineFunction &MF); /// doFinalization - Now that the module has been completely processed, emit /// the Mach-O file to 'O'. - bool doFinalization(Module &M); private: @@ -98,85 +99,37 @@ namespace llvm { /// SectionList - This is the list of sections that we have emitted to the /// file. Once the file has been completely built, the segment load command /// SectionCommands are constructed from this info. - std::vector<MachOSection*> SectionList; /// SectionLookup - This is a mapping from section name to SectionList entry - std::map<std::string, MachOSection*> SectionLookup; - + /// GVSection - This is a mapping from a GlobalValue to a MachOSection, /// to aid in emitting relocations. - std::map<GlobalValue*, MachOSection*> GVSection; - /// GVOffset - This is a mapping from a GlobalValue to an offset from the + /// GVOffset - This is a mapping from a GlobalValue to an offset from the /// start of the section in which the GV resides, to aid in emitting /// relocations. - std::map<GlobalValue*, intptr_t> GVOffset; /// getSection - Return the section with the specified name, creating a new /// section if one does not already exist. - MachOSection *getSection(const std::string &seg, const std::string §, - unsigned Flags = 0) { - MachOSection *MOS = SectionLookup[seg+sect]; - if (MOS) return MOS; - - MOS = new MachOSection(seg, sect); - SectionList.push_back(MOS); - MOS->Index = SectionList.size(); - MOS->flags = MachOSection::S_REGULAR | Flags; - SectionLookup[seg+sect] = MOS; - return MOS; - } - MachOSection *getTextSection(bool isCode = true) { - if (isCode) - return getSection("__TEXT", "__text", - MachOSection::S_ATTR_PURE_INSTRUCTIONS | - MachOSection::S_ATTR_SOME_INSTRUCTIONS); - else - return getSection("__TEXT", "__text"); - } - MachOSection *getBSSSection() { - return getSection("__DATA", "__bss", MachOSection::S_ZEROFILL); - } + unsigned Flags = 0); + + /// getTextSection - Return text section with different flags for code/data + MachOSection *getTextSection(bool isCode = true); + MachOSection *getDataSection() { return getSection("__DATA", "__data"); } - MachOSection *getConstSection(Constant *C) { - const ConstantArray *CVA = dyn_cast<ConstantArray>(C); - if (CVA && CVA->isCString()) - return getSection("__TEXT", "__cstring", - MachOSection::S_CSTRING_LITERALS); - - const Type *Ty = C->getType(); - if (Ty->isPrimitiveType() || Ty->isInteger()) { - unsigned Size = TM.getTargetData()->getTypeAllocSize(Ty); - switch(Size) { - default: break; // Fall through to __TEXT,__const - case 4: - return getSection("__TEXT", "__literal4", - MachOSection::S_4BYTE_LITERALS); - case 8: - return getSection("__TEXT", "__literal8", - MachOSection::S_8BYTE_LITERALS); - case 16: - return getSection("__TEXT", "__literal16", - MachOSection::S_16BYTE_LITERALS); - } - } - return getSection("__TEXT", "__const"); - } - MachOSection *getJumpTableSection() { - if (TM.getRelocationModel() == Reloc::PIC_) - return getTextSection(false); - else - return getSection("__TEXT", "__const"); - } - - /// MachOSymTab - This struct contains information about the offsets and + + MachOSection *getBSSSection(); + MachOSection *getConstSection(Constant *C); + MachOSection *getJumpTableSection(); + + /// MachOSymTab - This struct contains information about the offsets and /// size of symbol table information. /// segment. struct MachOSymTab { @@ -191,43 +144,42 @@ namespace llvm { // see <mach-o/loader.h> enum { LC_SYMTAB = 0x02 // link-edit stab symbol table info }; - + MachOSymTab() : cmd(LC_SYMTAB), cmdsize(6 * sizeof(uint32_t)), symoff(0), nsyms(0), stroff(0), strsize(0) { } }; - + /// SymTab - The "stab" style symbol table information - MachOSymTab SymTab; + MachOSymTab SymTab; /// DySymTab - symbol table info for the dynamic link editor MachODySymTab DySymTab; protected: - + /// SymbolTable - This is the list of symbols we have emitted to the file. /// This actually gets rearranged before emission to the file (to put the /// local symbols first in the list). std::vector<MachOSym> SymbolTable; - + /// SymT - A buffer to hold the symbol table before we write it out at the /// appropriate location in the file. - DataBuffer SymT; - + std::vector<unsigned char> SymT; + /// StrT - A buffer to hold the string table before we write it out at the /// appropriate location in the file. - DataBuffer StrT; - + std::vector<unsigned char> StrT; + /// PendingSyms - This is a list of externally defined symbols that we have /// been asked to emit, but have not seen a reference to. When a reference /// is seen, the symbol will move from this list to the SymbolTable. std::vector<GlobalValue*> PendingGlobals; - + /// DynamicSymbolTable - This is just a vector of indices into /// SymbolTable to aid in emitting the DYSYMTAB load command. std::vector<unsigned> DynamicSymbolTable; - - static void InitMem(const Constant *C, void *Addr, intptr_t Offset, - const TargetData *TD, - std::vector<MachineRelocation> &MRs); + + static void InitMem(const Constant *C, uintptr_t Offset, + const TargetData *TD, MachOSection* mos); private: void AddSymbolToSection(MachOSection *MOS, GlobalVariable *GV); @@ -238,25 +190,16 @@ namespace llvm { void BufferSymbolAndStringTable(); void CalculateRelocations(MachOSection &MOS); + // GetJTRelocation - Get a relocation a new BB relocation based + // on target information. MachineRelocation GetJTRelocation(unsigned Offset, - MachineBasicBlock *MBB) const { - return TM.getMachOWriterInfo()->GetJTRelocation(Offset, MBB); - } + MachineBasicBlock *MBB) const; /// GetTargetRelocation - Returns the number of relocations. - unsigned GetTargetRelocation(MachineRelocation &MR, - unsigned FromIdx, - unsigned ToAddr, - unsigned ToIndex, - OutputBuffer &RelocOut, - OutputBuffer &SecOut, - bool Scattered, - bool Extern) { - return TM.getMachOWriterInfo()->GetTargetRelocation(MR, FromIdx, ToAddr, - ToIndex, RelocOut, - SecOut, Scattered, - Extern); - } + unsigned GetTargetRelocation(MachineRelocation &MR, unsigned FromIdx, + unsigned ToAddr, unsigned ToIndex, + OutputBuffer &RelocOut, OutputBuffer &SecOut, + bool Scattered, bool Extern); }; } diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index 71e6b3e..b3eb2da 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -19,6 +19,7 @@ #include "llvm/Target/TargetInstrDesc.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Support/LeakDetector.h" +#include "llvm/Support/raw_ostream.h" #include <algorithm> using namespace llvm; @@ -31,7 +32,7 @@ MachineBasicBlock::~MachineBasicBlock() { LeakDetector::removeGarbageObject(this); } -std::ostream& llvm::operator<<(std::ostream &OS, const MachineBasicBlock &MBB) { +raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineBasicBlock &MBB) { MBB.print(OS); return OS; } @@ -43,7 +44,7 @@ std::ostream& llvm::operator<<(std::ostream &OS, const MachineBasicBlock &MBB) { /// MBBs start out as #-1. When a MBB is added to a MachineFunction, it /// gets the next available unique MBB number. If it is removed from a /// MachineFunction, it goes back to being #-1. -void ilist_traits<MachineBasicBlock>::addNodeToList(MachineBasicBlock* N) { +void ilist_traits<MachineBasicBlock>::addNodeToList(MachineBasicBlock *N) { MachineFunction &MF = *N->getParent(); N->Number = MF.addToMBBNumbering(N); @@ -55,7 +56,7 @@ void ilist_traits<MachineBasicBlock>::addNodeToList(MachineBasicBlock* N) { LeakDetector::removeGarbageObject(N); } -void ilist_traits<MachineBasicBlock>::removeNodeFromList(MachineBasicBlock* N) { +void ilist_traits<MachineBasicBlock>::removeNodeFromList(MachineBasicBlock *N) { N->getParent()->removeFromMBBNumbering(N->Number); N->Number = -1; LeakDetector::addGarbageObject(N); @@ -65,7 +66,7 @@ void ilist_traits<MachineBasicBlock>::removeNodeFromList(MachineBasicBlock* N) { /// addNodeToList (MI) - When we add an instruction to a basic block /// list, we update its parent pointer and add its operands from reg use/def /// lists if appropriate. -void ilist_traits<MachineInstr>::addNodeToList(MachineInstr* N) { +void ilist_traits<MachineInstr>::addNodeToList(MachineInstr *N) { assert(N->getParent() == 0 && "machine instruction already in a basic block"); N->setParent(Parent); @@ -80,7 +81,7 @@ void ilist_traits<MachineInstr>::addNodeToList(MachineInstr* N) { /// removeNodeFromList (MI) - When we remove an instruction from a basic block /// list, we update its parent pointer and remove its operands from reg use/def /// lists if appropriate. -void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr* N) { +void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr *N) { assert(N->getParent() != 0 && "machine instruction not in a basic block"); // Remove from the use/def lists. @@ -94,10 +95,10 @@ void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr* N) { /// transferNodesFromList (MI) - When moving a range of instructions from one /// MBB list to another, we need to update the parent pointers and the use/def /// lists. -void ilist_traits<MachineInstr>::transferNodesFromList( - ilist_traits<MachineInstr>& fromList, - MachineBasicBlock::iterator first, - MachineBasicBlock::iterator last) { +void ilist_traits<MachineInstr>:: +transferNodesFromList(ilist_traits<MachineInstr> &fromList, + MachineBasicBlock::iterator first, + MachineBasicBlock::iterator last) { assert(Parent->getParent() == fromList.Parent->getParent() && "MachineInstr parent mismatch!"); @@ -123,21 +124,41 @@ MachineBasicBlock::iterator MachineBasicBlock::getFirstTerminator() { return I; } -bool -MachineBasicBlock::isOnlyReachableByFallthrough() const { - return !isLandingPad() && - !pred_empty() && - next(pred_begin()) == pred_end() && - (*pred_begin())->isLayoutSuccessor(this) && - ((*pred_begin())->empty() || - !(*pred_begin())->back().getDesc().isBarrier()); +/// isOnlyReachableViaFallthough - Return true if this basic block has +/// exactly one predecessor and the control transfer mechanism between +/// the predecessor and this block is a fall-through. +bool MachineBasicBlock::isOnlyReachableByFallthrough() const { + // If this is a landing pad, it isn't a fall through. If it has no preds, + // then nothing falls through to it. + if (isLandingPad() || pred_empty()) + return false; + + // If there isn't exactly one predecessor, it can't be a fall through. + const_pred_iterator PI = pred_begin(), PI2 = PI; + ++PI2; + if (PI2 != pred_end()) + return false; + + // The predecessor has to be immediately before this block. + const MachineBasicBlock *Pred = *PI; + + if (!Pred->isLayoutSuccessor(this)) + return false; + + // If the block is completely empty, then it definitely does fall through. + if (Pred->empty()) + return true; + + // Otherwise, check the last instruction. + const MachineInstr &LastInst = Pred->back(); + return !LastInst.getDesc().isBarrier(); } void MachineBasicBlock::dump() const { - print(*cerr.stream()); + print(errs()); } -static inline void OutputReg(std::ostream &os, unsigned RegNo, +static inline void OutputReg(raw_ostream &os, unsigned RegNo, const TargetRegisterInfo *TRI = 0) { if (!RegNo || TargetRegisterInfo::isPhysicalRegister(RegNo)) { if (TRI) @@ -148,16 +169,16 @@ static inline void OutputReg(std::ostream &os, unsigned RegNo, os << " %reg" << RegNo; } -void MachineBasicBlock::print(std::ostream &OS) const { +void MachineBasicBlock::print(raw_ostream &OS) const { const MachineFunction *MF = getParent(); - if(!MF) { + if (!MF) { OS << "Can't print out MachineBasicBlock because parent MachineFunction" << " is null\n"; return; } const BasicBlock *LBB = getBasicBlock(); - OS << "\n"; + OS << '\n'; if (LBB) OS << LBB->getName() << ": "; OS << (const void*)this << ", LLVM BB @" << (const void*) LBB << ", ID#" << getNumber(); @@ -170,18 +191,18 @@ void MachineBasicBlock::print(std::ostream &OS) const { OS << "Live Ins:"; for (const_livein_iterator I = livein_begin(),E = livein_end(); I != E; ++I) OutputReg(OS, *I, TRI); - OS << "\n"; + OS << '\n'; } // Print the preds of this block according to the CFG. if (!pred_empty()) { OS << " Predecessors according to CFG:"; for (const_pred_iterator PI = pred_begin(), E = pred_end(); PI != E; ++PI) - OS << " " << *PI << " (#" << (*PI)->getNumber() << ")"; - OS << "\n"; + OS << ' ' << *PI << " (#" << (*PI)->getNumber() << ')'; + OS << '\n'; } for (const_iterator I = begin(); I != end(); ++I) { - OS << "\t"; + OS << '\t'; I->print(OS, &getParent()->getTarget()); } @@ -189,8 +210,8 @@ void MachineBasicBlock::print(std::ostream &OS) const { if (!succ_empty()) { OS << " Successors according to CFG:"; for (const_succ_iterator SI = succ_begin(), E = succ_end(); SI != E; ++SI) - OS << " " << *SI << " (#" << (*SI)->getNumber() << ")"; - OS << "\n"; + OS << ' ' << *SI << " (#" << (*SI)->getNumber() << ')'; + OS << '\n'; } } @@ -245,16 +266,15 @@ void MachineBasicBlock::removePredecessor(MachineBasicBlock *pred) { Predecessors.erase(I); } -void MachineBasicBlock::transferSuccessors(MachineBasicBlock *fromMBB) -{ +void MachineBasicBlock::transferSuccessors(MachineBasicBlock *fromMBB) { if (this == fromMBB) return; - for(MachineBasicBlock::succ_iterator iter = fromMBB->succ_begin(), - end = fromMBB->succ_end(); iter != end; ++iter) { - addSuccessor(*iter); - } - while(!fromMBB->succ_empty()) + for (MachineBasicBlock::succ_iterator I = fromMBB->succ_begin(), + E = fromMBB->succ_end(); I != E; ++I) + addSuccessor(*I); + + while (!fromMBB->succ_empty()) fromMBB->removeSuccessor(fromMBB->succ_begin()); } diff --git a/lib/CodeGen/MachineDominators.cpp b/lib/CodeGen/MachineDominators.cpp index 37c8601..0f796f3 100644 --- a/lib/CodeGen/MachineDominators.cpp +++ b/lib/CodeGen/MachineDominators.cpp @@ -51,3 +51,7 @@ MachineDominatorTree::~MachineDominatorTree() { void MachineDominatorTree::releaseMemory() { DT->releaseMemory(); } + +void MachineDominatorTree::print(raw_ostream &OS, const Module*) const { + DT->print(OS); +} diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index 599efb8..b0ec809 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/Config/config.h" #include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstr.h" @@ -32,89 +33,56 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/raw_ostream.h" -#include <fstream> -#include <sstream> using namespace llvm; -bool MachineFunctionPass::runOnFunction(Function &F) { - // Do not codegen any 'available_externally' functions at all, they have - // definitions outside the translation unit. - if (F.hasAvailableExternallyLinkage()) - return false; - - return runOnMachineFunction(MachineFunction::get(&F)); -} - namespace { struct VISIBILITY_HIDDEN Printer : public MachineFunctionPass { static char ID; - std::ostream *OS; + raw_ostream &OS; const std::string Banner; - Printer (std::ostream *os, const std::string &banner) + Printer(raw_ostream &os, const std::string &banner) : MachineFunctionPass(&ID), OS(os), Banner(banner) {} const char *getPassName() const { return "MachineFunction Printer"; } virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); } bool runOnMachineFunction(MachineFunction &MF) { - (*OS) << Banner; - MF.print (*OS); + OS << Banner; + MF.print(OS); return false; } }; char Printer::ID = 0; } -/// Returns a newly-created MachineFunction Printer pass. The default output -/// stream is std::cerr; the default banner is empty. +/// Returns a newly-created MachineFunction Printer pass. The default banner is +/// empty. /// -FunctionPass *llvm::createMachineFunctionPrinterPass(std::ostream *OS, +FunctionPass *llvm::createMachineFunctionPrinterPass(raw_ostream &OS, const std::string &Banner){ return new Printer(OS, Banner); } -namespace { - struct VISIBILITY_HIDDEN Deleter : public MachineFunctionPass { - static char ID; - Deleter() : MachineFunctionPass(&ID) {} - - const char *getPassName() const { return "Machine Code Deleter"; } - - bool runOnMachineFunction(MachineFunction &MF) { - // Delete the annotation from the function now. - MachineFunction::destruct(MF.getFunction()); - return true; - } - }; - char Deleter::ID = 0; -} - -/// MachineCodeDeletion Pass - This pass deletes all of the machine code for -/// the current function, which should happen after the function has been -/// emitted to a .s file or to memory. -FunctionPass *llvm::createMachineCodeDeleter() { - return new Deleter(); -} - - - //===---------------------------------------------------------------------===// // MachineFunction implementation //===---------------------------------------------------------------------===// +// Out of line virtual method. +MachineFunctionInfo::~MachineFunctionInfo() {} + void ilist_traits<MachineBasicBlock>::deleteNode(MachineBasicBlock *MBB) { MBB->getParent()->DeleteMachineBasicBlock(MBB); } -MachineFunction::MachineFunction(const Function *F, +MachineFunction::MachineFunction(Function *F, const TargetMachine &TM) - : Annotation(AnnotationManager::getID("CodeGen::MachineCodeForFunction")), - Fn(F), Target(TM) { + : Fn(F), Target(TM) { if (TM.getRegisterInfo()) RegInfo = new (Allocator.Allocate<MachineRegisterInfo>()) MachineRegisterInfo(*TM.getRegisterInfo()); @@ -131,7 +99,8 @@ MachineFunction::MachineFunction(const Function *F, const TargetData &TD = *TM.getTargetData(); bool IsPic = TM.getRelocationModel() == Reloc::PIC_; unsigned EntrySize = IsPic ? 4 : TD.getPointerSize(); - unsigned TyAlignment = IsPic ? TD.getABITypeAlignment(Type::Int32Ty) + unsigned TyAlignment = IsPic ? + TD.getABITypeAlignment(Type::getInt32Ty(F->getContext())) : TD.getPointerABIAlignment(); JumpTableInfo = new (Allocator.Allocate<MachineJumpTableInfo>()) MachineJumpTableInfo(EntrySize, TyAlignment); @@ -221,11 +190,6 @@ MachineFunction::CloneMachineInstr(const MachineInstr *Orig) { /// void MachineFunction::DeleteMachineInstr(MachineInstr *MI) { - // Clear the instructions memoperands. This must be done manually because - // the instruction's parent pointer is now null, so it can't properly - // deallocate them on its own. - MI->clearMemOperands(*this); - MI->~MachineInstr(); InstructionRecycler.Deallocate(Allocator, MI); } @@ -248,12 +212,99 @@ MachineFunction::DeleteMachineBasicBlock(MachineBasicBlock *MBB) { BasicBlockRecycler.Deallocate(Allocator, MBB); } +MachineMemOperand * +MachineFunction::getMachineMemOperand(const Value *v, unsigned f, + int64_t o, uint64_t s, + unsigned base_alignment) { + return new (Allocator.Allocate<MachineMemOperand>()) + MachineMemOperand(v, f, o, s, base_alignment); +} + +MachineMemOperand * +MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO, + int64_t Offset, uint64_t Size) { + return new (Allocator.Allocate<MachineMemOperand>()) + MachineMemOperand(MMO->getValue(), MMO->getFlags(), + int64_t(uint64_t(MMO->getOffset()) + + uint64_t(Offset)), + Size, MMO->getBaseAlignment()); +} + +MachineInstr::mmo_iterator +MachineFunction::allocateMemRefsArray(unsigned long Num) { + return Allocator.Allocate<MachineMemOperand *>(Num); +} + +std::pair<MachineInstr::mmo_iterator, MachineInstr::mmo_iterator> +MachineFunction::extractLoadMemRefs(MachineInstr::mmo_iterator Begin, + MachineInstr::mmo_iterator End) { + // Count the number of load mem refs. + unsigned Num = 0; + for (MachineInstr::mmo_iterator I = Begin; I != End; ++I) + if ((*I)->isLoad()) + ++Num; + + // Allocate a new array and populate it with the load information. + MachineInstr::mmo_iterator Result = allocateMemRefsArray(Num); + unsigned Index = 0; + for (MachineInstr::mmo_iterator I = Begin; I != End; ++I) { + if ((*I)->isLoad()) { + if (!(*I)->isStore()) + // Reuse the MMO. + Result[Index] = *I; + else { + // Clone the MMO and unset the store flag. + MachineMemOperand *JustLoad = + getMachineMemOperand((*I)->getValue(), + (*I)->getFlags() & ~MachineMemOperand::MOStore, + (*I)->getOffset(), (*I)->getSize(), + (*I)->getBaseAlignment()); + Result[Index] = JustLoad; + } + ++Index; + } + } + return std::make_pair(Result, Result + Num); +} + +std::pair<MachineInstr::mmo_iterator, MachineInstr::mmo_iterator> +MachineFunction::extractStoreMemRefs(MachineInstr::mmo_iterator Begin, + MachineInstr::mmo_iterator End) { + // Count the number of load mem refs. + unsigned Num = 0; + for (MachineInstr::mmo_iterator I = Begin; I != End; ++I) + if ((*I)->isStore()) + ++Num; + + // Allocate a new array and populate it with the store information. + MachineInstr::mmo_iterator Result = allocateMemRefsArray(Num); + unsigned Index = 0; + for (MachineInstr::mmo_iterator I = Begin; I != End; ++I) { + if ((*I)->isStore()) { + if (!(*I)->isLoad()) + // Reuse the MMO. + Result[Index] = *I; + else { + // Clone the MMO and unset the load flag. + MachineMemOperand *JustStore = + getMachineMemOperand((*I)->getValue(), + (*I)->getFlags() & ~MachineMemOperand::MOLoad, + (*I)->getOffset(), (*I)->getSize(), + (*I)->getBaseAlignment()); + Result[Index] = JustStore; + } + ++Index; + } + } + return std::make_pair(Result, Result + Num); +} + void MachineFunction::dump() const { - print(*cerr.stream()); + print(errs()); } -void MachineFunction::print(std::ostream &OS) const { - OS << "# Machine code for " << Fn->getName () << "():\n"; +void MachineFunction::print(raw_ostream &OS) const { + OS << "# Machine code for " << Fn->getName() << "():\n"; // Print Frame Information FrameInfo->print(*this, OS); @@ -262,10 +313,7 @@ void MachineFunction::print(std::ostream &OS) const { JumpTableInfo->print(OS); // Print Constant Pool - { - raw_os_ostream OSS(OS); - ConstantPool->print(OSS); - } + ConstantPool->print(OS); const TargetRegisterInfo *TRI = getTarget().getRegisterInfo(); @@ -279,32 +327,32 @@ void MachineFunction::print(std::ostream &OS) const { OS << " Reg #" << I->first; if (I->second) - OS << " in VR#" << I->second << " "; + OS << " in VR#" << I->second << ' '; } - OS << "\n"; + OS << '\n'; } if (RegInfo && !RegInfo->liveout_empty()) { OS << "Live Outs:"; for (MachineRegisterInfo::liveout_iterator I = RegInfo->liveout_begin(), E = RegInfo->liveout_end(); I != E; ++I) if (TRI) - OS << " " << TRI->getName(*I); + OS << ' ' << TRI->getName(*I); else OS << " Reg #" << *I; - OS << "\n"; + OS << '\n'; } - for (const_iterator BB = begin(); BB != end(); ++BB) + for (const_iterator BB = begin(), E = end(); BB != E; ++BB) BB->print(OS); - OS << "\n# End machine code for " << Fn->getName () << "().\n\n"; + OS << "\n# End machine code for " << Fn->getName() << "().\n\n"; } namespace llvm { template<> struct DOTGraphTraits<const MachineFunction*> : public DefaultDOTGraphTraits { static std::string getGraphName(const MachineFunction *F) { - return "CFG for '" + F->getFunction()->getName() + "' function"; + return "CFG for '" + F->getFunction()->getNameStr() + "' function"; } static std::string getNodeLabel(const MachineBasicBlock *Node, @@ -312,17 +360,18 @@ namespace llvm { bool ShortNames) { if (ShortNames && Node->getBasicBlock() && !Node->getBasicBlock()->getName().empty()) - return Node->getBasicBlock()->getName() + ":"; - - std::ostringstream Out; - if (ShortNames) { - Out << Node->getNumber() << ':'; - return Out.str(); + return Node->getBasicBlock()->getNameStr() + ":"; + + std::string OutStr; + { + raw_string_ostream OSS(OutStr); + + if (ShortNames) + OSS << Node->getNumber() << ':'; + else + Node->print(OSS); } - Node->print(Out); - - std::string OutStr = Out.str(); if (OutStr[0] == '\n') OutStr.erase(OutStr.begin()); // Process string output to make it nicer... @@ -339,59 +388,23 @@ namespace llvm { void MachineFunction::viewCFG() const { #ifndef NDEBUG - ViewGraph(this, "mf" + getFunction()->getName()); + ViewGraph(this, "mf" + getFunction()->getNameStr()); #else - cerr << "SelectionDAG::viewGraph is only available in debug builds on " - << "systems with Graphviz or gv!\n"; + errs() << "SelectionDAG::viewGraph is only available in debug builds on " + << "systems with Graphviz or gv!\n"; #endif // NDEBUG } void MachineFunction::viewCFGOnly() const { #ifndef NDEBUG - ViewGraph(this, "mf" + getFunction()->getName(), true); + ViewGraph(this, "mf" + getFunction()->getNameStr(), true); #else - cerr << "SelectionDAG::viewGraph is only available in debug builds on " - << "systems with Graphviz or gv!\n"; + errs() << "SelectionDAG::viewGraph is only available in debug builds on " + << "systems with Graphviz or gv!\n"; #endif // NDEBUG } -// The next two methods are used to construct and to retrieve -// the MachineCodeForFunction object for the given function. -// construct() -- Allocates and initializes for a given function and target -// get() -- Returns a handle to the object. -// This should not be called before "construct()" -// for a given Function. -// -MachineFunction& -MachineFunction::construct(const Function *Fn, const TargetMachine &Tar) -{ - AnnotationID MF_AID = - AnnotationManager::getID("CodeGen::MachineCodeForFunction"); - assert(Fn->getAnnotation(MF_AID) == 0 && - "Object already exists for this function!"); - MachineFunction* mcInfo = new MachineFunction(Fn, Tar); - Fn->addAnnotation(mcInfo); - return *mcInfo; -} - -void MachineFunction::destruct(const Function *Fn) { - AnnotationID MF_AID = - AnnotationManager::getID("CodeGen::MachineCodeForFunction"); - bool Deleted = Fn->deleteAnnotation(MF_AID); - assert(Deleted && "Machine code did not exist for function!"); - Deleted = Deleted; // silence warning when no assertions. -} - -MachineFunction& MachineFunction::get(const Function *F) -{ - AnnotationID MF_AID = - AnnotationManager::getID("CodeGen::MachineCodeForFunction"); - MachineFunction *mc = (MachineFunction*)F->getAnnotation(MF_AID); - assert(mc && "Call construct() method first to allocate the object"); - return *mc; -} - /// addLiveIn - Add the specified physical register as a live-in value and /// create a corresponding virtual register for it. unsigned MachineFunction::addLiveIn(unsigned PReg, @@ -402,23 +415,6 @@ unsigned MachineFunction::addLiveIn(unsigned PReg, return VReg; } -/// getOrCreateDebugLocID - Look up the DebugLocTuple index with the given -/// source file, line, and column. If none currently exists, create a new -/// DebugLocTuple, and insert it into the DebugIdMap. -unsigned MachineFunction::getOrCreateDebugLocID(GlobalVariable *CompileUnit, - unsigned Line, unsigned Col) { - DebugLocTuple Tuple(CompileUnit, Line, Col); - DenseMap<DebugLocTuple, unsigned>::iterator II - = DebugLocInfo.DebugIdMap.find(Tuple); - if (II != DebugLocInfo.DebugIdMap.end()) - return II->second; - // Add a new tuple. - unsigned Id = DebugLocInfo.DebugLocations.size(); - DebugLocInfo.DebugLocations.push_back(Tuple); - DebugLocInfo.DebugIdMap[Tuple] = Id; - return Id; -} - /// getDebugLocTuple - Get the DebugLocTuple for a given DebugLoc object. DebugLocTuple MachineFunction::getDebugLocTuple(DebugLoc DL) const { unsigned Idx = DL.getIndex(); @@ -444,7 +440,38 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset, } -void MachineFrameInfo::print(const MachineFunction &MF, std::ostream &OS) const{ +BitVector +MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const { + assert(MBB && "MBB must be valid"); + const MachineFunction *MF = MBB->getParent(); + assert(MF && "MBB must be part of a MachineFunction"); + const TargetMachine &TM = MF->getTarget(); + const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + BitVector BV(TRI->getNumRegs()); + + // Before CSI is calculated, no registers are considered pristine. They can be + // freely used and PEI will make sure they are saved. + if (!isCalleeSavedInfoValid()) + return BV; + + for (const unsigned *CSR = TRI->getCalleeSavedRegs(MF); CSR && *CSR; ++CSR) + BV.set(*CSR); + + // The entry MBB always has all CSRs pristine. + if (MBB == &MF->front()) + return BV; + + // On other MBBs the saved CSRs are not pristine. + const std::vector<CalleeSavedInfo> &CSI = getCalleeSavedInfo(); + for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(), + E = CSI.end(); I != E; ++I) + BV.reset(I->getReg()); + + return BV; +} + + +void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{ const TargetFrameInfo *FI = MF.getTarget().getFrameInfo(); int ValOffset = (FI ? FI->getOffsetOfLocalArea() : 0); @@ -481,10 +508,9 @@ void MachineFrameInfo::print(const MachineFunction &MF, std::ostream &OS) const{ } void MachineFrameInfo::dump(const MachineFunction &MF) const { - print(MF, *cerr.stream()); + print(MF, errs()); } - //===----------------------------------------------------------------------===// // MachineJumpTableInfo implementation //===----------------------------------------------------------------------===// @@ -521,7 +547,7 @@ MachineJumpTableInfo::ReplaceMBBInJumpTables(MachineBasicBlock *Old, return MadeChange; } -void MachineJumpTableInfo::print(std::ostream &OS) const { +void MachineJumpTableInfo::print(raw_ostream &OS) const { // FIXME: this is lame, maybe we could print out the MBB numbers or something // like {1, 2, 4, 5, 3, 0} for (unsigned i = 0, e = JumpTables.size(); i != e; ++i) { @@ -530,7 +556,7 @@ void MachineJumpTableInfo::print(std::ostream &OS) const { } } -void MachineJumpTableInfo::dump() const { print(*cerr.stream()); } +void MachineJumpTableInfo::dump() const { print(errs()); } //===----------------------------------------------------------------------===// @@ -539,10 +565,17 @@ void MachineJumpTableInfo::dump() const { print(*cerr.stream()); } const Type *MachineConstantPoolEntry::getType() const { if (isMachineConstantPoolEntry()) - return Val.MachineCPVal->getType(); + return Val.MachineCPVal->getType(); return Val.ConstVal->getType(); } + +unsigned MachineConstantPoolEntry::getRelocationInfo() const { + if (isMachineConstantPoolEntry()) + return Val.MachineCPVal->getRelocationInfo(); + return Val.ConstVal->getRelocationInfo(); +} + MachineConstantPool::~MachineConstantPool() { for (unsigned i = 0, e = Constants.size(); i != e; ++i) if (Constants[i].isMachineConstantPoolEntry()) diff --git a/lib/CodeGen/MachineFunctionAnalysis.cpp b/lib/CodeGen/MachineFunctionAnalysis.cpp new file mode 100644 index 0000000..56294d9 --- /dev/null +++ b/lib/CodeGen/MachineFunctionAnalysis.cpp @@ -0,0 +1,50 @@ +//===-- MachineFunctionAnalysis.cpp ---------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the definitions of the MachineFunctionAnalysis members. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/MachineFunction.h" +using namespace llvm; + +// Register this pass with PassInfo directly to avoid having to define +// a default constructor. +static PassInfo +X("Machine Function Analysis", "machine-function-analysis", + intptr_t(&MachineFunctionAnalysis::ID), 0, + /*CFGOnly=*/false, /*is_analysis=*/true); + +char MachineFunctionAnalysis::ID = 0; + +MachineFunctionAnalysis::MachineFunctionAnalysis(TargetMachine &tm, + CodeGenOpt::Level OL) : + FunctionPass(&ID), TM(tm), OptLevel(OL), MF(0) { +} + +MachineFunctionAnalysis::~MachineFunctionAnalysis() { + releaseMemory(); + assert(!MF && "MachineFunctionAnalysis left initialized!"); +} + +bool MachineFunctionAnalysis::runOnFunction(Function &F) { + assert(!MF && "MachineFunctionAnalysis already initialized!"); + MF = new MachineFunction(&F, TM); + return false; +} + +void MachineFunctionAnalysis::releaseMemory() { + delete MF; + MF = 0; +} + +void MachineFunctionAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); +} diff --git a/lib/CodeGen/MachineFunctionPass.cpp b/lib/CodeGen/MachineFunctionPass.cpp new file mode 100644 index 0000000..2f8d4c9 --- /dev/null +++ b/lib/CodeGen/MachineFunctionPass.cpp @@ -0,0 +1,50 @@ +//===-- MachineFunctionPass.cpp -------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the definitions of the MachineFunctionPass members. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Function.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +using namespace llvm; + +bool MachineFunctionPass::runOnFunction(Function &F) { + // Do not codegen any 'available_externally' functions at all, they have + // definitions outside the translation unit. + if (F.hasAvailableExternallyLinkage()) + return false; + + MachineFunction &MF = getAnalysis<MachineFunctionAnalysis>().getMF(); + return runOnMachineFunction(MF); +} + +void MachineFunctionPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<MachineFunctionAnalysis>(); + AU.addPreserved<MachineFunctionAnalysis>(); + + // MachineFunctionPass preserves all LLVM IR passes, but there's no + // high-level way to express this. Instead, just list a bunch of + // passes explicitly. This does not include setPreservesCFG, + // because CodeGen overloads that to mean preserving the MachineBasicBlock + // CFG in addition to the LLVM IR CFG. + AU.addPreserved<AliasAnalysis>(); + AU.addPreserved("scalar-evolution"); + AU.addPreserved("iv-users"); + AU.addPreserved("memdep"); + AU.addPreserved("live-values"); + AU.addPreserved("domtree"); + AU.addPreserved("domfrontier"); + AU.addPreserved("loops"); + AU.addPreserved("lda"); + + FunctionPass::getAnalysisUsage(AU); +} diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index d44305f..cbe5c7c 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -15,17 +15,20 @@ #include "llvm/Constants.h" #include "llvm/InlineAsm.h" #include "llvm/Value.h" +#include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetInstrDesc.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/DebugInfo.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/LeakDetector.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/Streams.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/FoldingSet.h" using namespace llvm; @@ -156,7 +159,7 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const { return false; switch (getType()) { - default: assert(0 && "Unrecognized operand type"); + default: llvm_unreachable("Unrecognized operand type"); case MachineOperand::MO_Register: return getReg() == Other.getReg() && isDef() == Other.isDef() && getSubReg() == Other.getSubReg(); @@ -182,11 +185,6 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const { /// print - Print the specified machine operand. /// -void MachineOperand::print(std::ostream &OS, const TargetMachine *TM) const { - raw_os_ostream RawOS(OS); - print(RawOS, TM); -} - void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { switch (getType()) { case MachineOperand::MO_Register: @@ -242,7 +240,7 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { OS << getImm(); break; case MachineOperand::MO_FPImmediate: - if (getFPImm()->getType() == Type::FloatTy) + if (getFPImm()->getType()->isFloatTy()) OS << getFPImm()->getValueAPF().convertToFloat(); else OS << getFPImm()->getValueAPF().convertToDouble(); @@ -274,7 +272,7 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { OS << '>'; break; default: - assert(0 && "Unrecognized operand type"); + llvm_unreachable("Unrecognized operand type"); } if (unsigned TF = getTargetFlags()) @@ -289,7 +287,7 @@ MachineMemOperand::MachineMemOperand(const Value *v, unsigned int f, int64_t o, uint64_t s, unsigned int a) : Offset(o), Size(s), V(v), Flags((f & 7) | ((Log2_32(a) + 1) << 3)) { - assert(isPowerOf2_32(a) && "Alignment is not a power of 2!"); + assert(getBaseAlignment() == a && "Alignment is not a power of 2!"); assert((isLoad() || isStore()) && "Not a load/store!"); } @@ -302,6 +300,66 @@ void MachineMemOperand::Profile(FoldingSetNodeID &ID) const { ID.AddInteger(Flags); } +void MachineMemOperand::refineAlignment(const MachineMemOperand *MMO) { + // The Value and Offset may differ due to CSE. But the flags and size + // should be the same. + assert(MMO->getFlags() == getFlags() && "Flags mismatch!"); + assert(MMO->getSize() == getSize() && "Size mismatch!"); + + if (MMO->getBaseAlignment() >= getBaseAlignment()) { + // Update the alignment value. + Flags = (Flags & 7) | ((Log2_32(MMO->getBaseAlignment()) + 1) << 3); + // Also update the base and offset, because the new alignment may + // not be applicable with the old ones. + V = MMO->getValue(); + Offset = MMO->getOffset(); + } +} + +/// getAlignment - Return the minimum known alignment in bytes of the +/// actual memory reference. +uint64_t MachineMemOperand::getAlignment() const { + return MinAlign(getBaseAlignment(), getOffset()); +} + +raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) { + assert((MMO.isLoad() || MMO.isStore()) && + "SV has to be a load, store or both."); + + if (MMO.isVolatile()) + OS << "Volatile "; + + if (MMO.isLoad()) + OS << "LD"; + if (MMO.isStore()) + OS << "ST"; + OS << MMO.getSize(); + + // Print the address information. + OS << "["; + if (!MMO.getValue()) + OS << "<unknown>"; + else + WriteAsOperand(OS, MMO.getValue(), /*PrintType=*/false); + + // If the alignment of the memory reference itself differs from the alignment + // of the base pointer, print the base alignment explicitly, next to the base + // pointer. + if (MMO.getBaseAlignment() != MMO.getAlignment()) + OS << "(align=" << MMO.getBaseAlignment() << ")"; + + if (MMO.getOffset() != 0) + OS << "+" << MMO.getOffset(); + OS << "]"; + + // Print the alignment of the reference. + if (MMO.getBaseAlignment() != MMO.getAlignment() || + MMO.getBaseAlignment() != MMO.getSize()) + OS << "(align=" << MMO.getAlignment() << ")"; + + return OS; +} + //===----------------------------------------------------------------------===// // MachineInstr Implementation //===----------------------------------------------------------------------===// @@ -309,7 +367,8 @@ void MachineMemOperand::Profile(FoldingSetNodeID &ID) const { /// MachineInstr ctor - This constructor creates a dummy MachineInstr with /// TID NULL and no operands. MachineInstr::MachineInstr() - : TID(0), NumImplicitOps(0), Parent(0), debugLoc(DebugLoc::getUnknownLoc()) { + : TID(0), NumImplicitOps(0), MemRefs(0), MemRefsEnd(0), + Parent(0), debugLoc(DebugLoc::getUnknownLoc()) { // Make sure that we get added to a machine basicblock LeakDetector::addGarbageObject(this); } @@ -328,7 +387,7 @@ void MachineInstr::addImplicitDefUseOperands() { /// TargetInstrDesc or the numOperands if it is not zero. (for /// instructions with variable number of operands). MachineInstr::MachineInstr(const TargetInstrDesc &tid, bool NoImp) - : TID(&tid), NumImplicitOps(0), Parent(0), + : TID(&tid), NumImplicitOps(0), MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(DebugLoc::getUnknownLoc()) { if (!NoImp && TID->getImplicitDefs()) for (const unsigned *ImpDefs = TID->getImplicitDefs(); *ImpDefs; ++ImpDefs) @@ -346,7 +405,8 @@ MachineInstr::MachineInstr(const TargetInstrDesc &tid, bool NoImp) /// MachineInstr ctor - As above, but with a DebugLoc. MachineInstr::MachineInstr(const TargetInstrDesc &tid, const DebugLoc dl, bool NoImp) - : TID(&tid), NumImplicitOps(0), Parent(0), debugLoc(dl) { + : TID(&tid), NumImplicitOps(0), MemRefs(0), MemRefsEnd(0), + Parent(0), debugLoc(dl) { if (!NoImp && TID->getImplicitDefs()) for (const unsigned *ImpDefs = TID->getImplicitDefs(); *ImpDefs; ++ImpDefs) NumImplicitOps++; @@ -365,7 +425,7 @@ MachineInstr::MachineInstr(const TargetInstrDesc &tid, const DebugLoc dl, /// basic block. /// MachineInstr::MachineInstr(MachineBasicBlock *MBB, const TargetInstrDesc &tid) - : TID(&tid), NumImplicitOps(0), Parent(0), + : TID(&tid), NumImplicitOps(0), MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(DebugLoc::getUnknownLoc()) { assert(MBB && "Cannot use inserting ctor with null basic block!"); if (TID->ImplicitDefs) @@ -385,7 +445,8 @@ MachineInstr::MachineInstr(MachineBasicBlock *MBB, const TargetInstrDesc &tid) /// MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl, const TargetInstrDesc &tid) - : TID(&tid), NumImplicitOps(0), Parent(0), debugLoc(dl) { + : TID(&tid), NumImplicitOps(0), MemRefs(0), MemRefsEnd(0), + Parent(0), debugLoc(dl) { assert(MBB && "Cannot use inserting ctor with null basic block!"); if (TID->ImplicitDefs) for (const unsigned *ImpDefs = TID->getImplicitDefs(); *ImpDefs; ++ImpDefs) @@ -403,8 +464,9 @@ MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl, /// MachineInstr ctor - Copies MachineInstr arg exactly /// MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI) - : TID(&MI.getDesc()), NumImplicitOps(0), Parent(0), - debugLoc(MI.getDebugLoc()) { + : TID(&MI.getDesc()), NumImplicitOps(0), + MemRefs(MI.MemRefs), MemRefsEnd(MI.MemRefsEnd), + Parent(0), debugLoc(MI.getDebugLoc()) { Operands.reserve(MI.getNumOperands()); // Add operands @@ -412,11 +474,6 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI) addOperand(MI.getOperand(i)); NumImplicitOps = MI.NumImplicitOps; - // Add memory operands. - for (std::list<MachineMemOperand>::const_iterator i = MI.memoperands_begin(), - j = MI.memoperands_end(); i != j; ++i) - addMemOperand(MF, *i); - // Set parent to null. Parent = 0; @@ -425,8 +482,6 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI) MachineInstr::~MachineInstr() { LeakDetector::removeGarbageObject(this); - assert(MemOperands.empty() && - "MachineInstr being deleted with live memoperands!"); #ifndef NDEBUG for (unsigned i = 0, e = Operands.size(); i != e; ++i) { assert(Operands[i].ParentMI == this && "ParentMI mismatch!"); @@ -587,18 +642,24 @@ void MachineInstr::RemoveOperand(unsigned OpNo) { } } -/// addMemOperand - Add a MachineMemOperand to the machine instruction, -/// referencing arbitrary storage. +/// addMemOperand - Add a MachineMemOperand to the machine instruction. +/// This function should be used only occasionally. The setMemRefs function +/// is the primary method for setting up a MachineInstr's MemRefs list. void MachineInstr::addMemOperand(MachineFunction &MF, - const MachineMemOperand &MO) { - MemOperands.push_back(MO); -} + MachineMemOperand *MO) { + mmo_iterator OldMemRefs = MemRefs; + mmo_iterator OldMemRefsEnd = MemRefsEnd; -/// clearMemOperands - Erase all of this MachineInstr's MachineMemOperands. -void MachineInstr::clearMemOperands(MachineFunction &MF) { - MemOperands.clear(); -} + size_t NewNum = (MemRefsEnd - MemRefs) + 1; + mmo_iterator NewMemRefs = MF.allocateMemRefsArray(NewNum); + mmo_iterator NewMemRefsEnd = NewMemRefs + NewNum; + + std::copy(OldMemRefs, OldMemRefsEnd, NewMemRefs); + NewMemRefs[NewNum - 1] = MO; + MemRefs = NewMemRefs; + MemRefsEnd = NewMemRefsEnd; +} /// removeFromParent - This method unlinks 'this' from the containing basic /// block, and returns it, but does not delete it. @@ -657,7 +718,7 @@ bool MachineInstr::isDebugLabel() const { } /// findRegisterUseOperandIdx() - Returns the MachineOperand that is a use of -/// the specific register or -1 if it is not found. It further tightening +/// the specific register or -1 if it is not found. It further tightens /// the search criteria to a use that kills the register if isKill is true. int MachineInstr::findRegisterUseOperandIdx(unsigned Reg, bool isKill, const TargetRegisterInfo *TRI) const { @@ -731,7 +792,9 @@ isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx) const { unsigned DefPart = 0; for (unsigned i = 1, e = getNumOperands(); i < e; ) { const MachineOperand &FMO = getOperand(i); - assert(FMO.isImm()); + // After the normal asm operands there may be additional imp-def regs. + if (!FMO.isImm()) + return false; // Skip over this def. unsigned NumOps = InlineAsm::getNumOperandRegisters(FMO.getImm()); unsigned PrevDef = i + 1; @@ -782,16 +845,22 @@ isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx) const { const MachineOperand &MO = getOperand(UseOpIdx); if (!MO.isReg() || !MO.isUse() || MO.getReg() == 0) return false; - int FlagIdx = UseOpIdx - 1; - if (FlagIdx < 1) - return false; - while (!getOperand(FlagIdx).isImm()) { - if (--FlagIdx == 0) + + // Find the flag operand corresponding to UseOpIdx + unsigned FlagIdx, NumOps=0; + for (FlagIdx = 1; FlagIdx < UseOpIdx; FlagIdx += NumOps+1) { + const MachineOperand &UFMO = getOperand(FlagIdx); + // After the normal asm operands there may be additional imp-def regs. + if (!UFMO.isImm()) return false; + NumOps = InlineAsm::getNumOperandRegisters(UFMO.getImm()); + assert(NumOps < getNumOperands() && "Invalid inline asm flag"); + if (UseOpIdx < FlagIdx+NumOps+1) + break; } - const MachineOperand &UFMO = getOperand(FlagIdx); - if (FlagIdx + InlineAsm::getNumOperandRegisters(UFMO.getImm()) < UseOpIdx) + if (FlagIdx >= UseOpIdx) return false; + const MachineOperand &UFMO = getOperand(FlagIdx); unsigned DefNo; if (InlineAsm::isUseOperandTiedToDef(UFMO.getImm(), DefNo)) { if (!DefOpIdx) @@ -864,7 +933,8 @@ void MachineInstr::copyPredicates(const MachineInstr *MI) { /// SawStore is set to true, it means that there is a store (or call) between /// the instruction's location and its intended destination. bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII, - bool &SawStore) const { + bool &SawStore, + AliasAnalysis *AA) const { // Ignore stuff that we obviously can't move. if (TID->mayStore() || TID->isCall()) { SawStore = true; @@ -878,9 +948,9 @@ bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII, // destination. The check for isInvariantLoad gives the targe the chance to // classify the load as always returning a constant, e.g. a constant pool // load. - if (TID->mayLoad() && !TII->isInvariantLoad(this)) + if (TID->mayLoad() && !isInvariantLoad(AA)) // Otherwise, this is a real load. If there is a store between the load and - // end of block, or if the laod is volatile, we can't move it. + // end of block, or if the load is volatile, we can't move it. return !SawStore && !hasVolatileMemoryRef(); return true; @@ -889,11 +959,11 @@ bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII, /// isSafeToReMat - Return true if it's safe to rematerialize the specified /// instruction which defined the specified register instead of copying it. bool MachineInstr::isSafeToReMat(const TargetInstrInfo *TII, - unsigned DstReg) const { + unsigned DstReg, + AliasAnalysis *AA) const { bool SawStore = false; - if (!getDesc().isRematerializable() || - !TII->isTriviallyReMaterializable(this) || - !isSafeToMove(TII, SawStore)) + if (!TII->isTriviallyReMaterializable(this, AA) || + !isSafeToMove(TII, SawStore, AA)) return false; for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { const MachineOperand &MO = getOperand(i); @@ -930,21 +1000,55 @@ bool MachineInstr::hasVolatileMemoryRef() const { return true; // Check the memory reference information for volatile references. - for (std::list<MachineMemOperand>::const_iterator I = memoperands_begin(), - E = memoperands_end(); I != E; ++I) - if (I->isVolatile()) + for (mmo_iterator I = memoperands_begin(), E = memoperands_end(); I != E; ++I) + if ((*I)->isVolatile()) return true; return false; } -void MachineInstr::dump() const { - cerr << " " << *this; +/// isInvariantLoad - Return true if this instruction is loading from a +/// location whose value is invariant across the function. For example, +/// loading a value from the constant pool or from from the argument area +/// of a function if it does not change. This should only return true of +/// *all* loads the instruction does are invariant (if it does multiple loads). +bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const { + // If the instruction doesn't load at all, it isn't an invariant load. + if (!TID->mayLoad()) + return false; + + // If the instruction has lost its memoperands, conservatively assume that + // it may not be an invariant load. + if (memoperands_empty()) + return false; + + const MachineFrameInfo *MFI = getParent()->getParent()->getFrameInfo(); + + for (mmo_iterator I = memoperands_begin(), + E = memoperands_end(); I != E; ++I) { + if ((*I)->isVolatile()) return false; + if ((*I)->isStore()) return false; + + if (const Value *V = (*I)->getValue()) { + // A load from a constant PseudoSourceValue is invariant. + if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) + if (PSV->isConstant(MFI)) + continue; + // If we have an AliasAnalysis, ask it whether the memory is constant. + if (AA && AA->pointsToConstantMemory(V)) + continue; + } + + // Otherwise assume conservatively. + return false; + } + + // Everything checks out. + return true; } -void MachineInstr::print(std::ostream &OS, const TargetMachine *TM) const { - raw_os_ostream RawOS(OS); - print(RawOS, TM); +void MachineInstr::dump() const { + errs() << " " << *this; } void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { @@ -967,46 +1071,23 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { if (!memoperands_empty()) { OS << ", Mem:"; - for (std::list<MachineMemOperand>::const_iterator i = memoperands_begin(), - e = memoperands_end(); i != e; ++i) { - const MachineMemOperand &MRO = *i; - const Value *V = MRO.getValue(); - - assert((MRO.isLoad() || MRO.isStore()) && - "SV has to be a load, store or both."); - - if (MRO.isVolatile()) - OS << "Volatile "; - - if (MRO.isLoad()) - OS << "LD"; - if (MRO.isStore()) - OS << "ST"; - - OS << "(" << MRO.getSize() << "," << MRO.getAlignment() << ") ["; - - if (!V) - OS << "<unknown>"; - else if (!V->getName().empty()) - OS << V->getName(); - else if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) { - PSV->print(OS); - } else - OS << V; - - OS << " + " << MRO.getOffset() << "]"; + for (mmo_iterator i = memoperands_begin(), e = memoperands_end(); + i != e; ++i) { + OS << **i; + if (next(i) != e) + OS << " "; } } if (!debugLoc.isUnknown()) { const MachineFunction *MF = getParent()->getParent(); DebugLocTuple DLT = MF->getDebugLocTuple(debugLoc); - DICompileUnit CU(DLT.CompileUnit); - std::string Dir, Fn; - OS << " [dbg: " - << CU.getDirectory(Dir) << '/' << CU.getFilename(Fn) << "," - << DLT.Line << "," - << DLT.Col << "]"; + DICompileUnit CU(DLT.Scope); + if (!CU.isNull()) + OS << " [dbg: " + << CU.getDirectory() << '/' << CU.getFilename() << "," + << DLT.Line << "," + << DLT.Col << "]"; } OS << "\n"; @@ -1021,7 +1102,7 @@ bool MachineInstr::addRegisterKilled(unsigned IncomingReg, SmallVector<unsigned,4> DeadOps; for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { MachineOperand &MO = getOperand(i); - if (!MO.isReg() || !MO.isUse()) + if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue; unsigned Reg = MO.getReg(); if (!Reg) @@ -1032,6 +1113,9 @@ bool MachineInstr::addRegisterKilled(unsigned IncomingReg, if (MO.isKill()) // The register is already marked kill. return true; + if (isPhysReg && isRegTiedToDefOperand(i)) + // Two-address uses of physregs must not be marked kill. + return true; MO.setIsKill(); Found = true; } diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index aaa4de4..f92ddb2 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -28,11 +28,12 @@ #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -43,8 +44,11 @@ namespace { class VISIBILITY_HIDDEN MachineLICM : public MachineFunctionPass { const TargetMachine *TM; const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + BitVector AllocatableSet; // Various analyses that we use... + AliasAnalysis *AA; // Alias analysis info. MachineLoopInfo *LI; // Current MachineLoopInfo MachineDominatorTree *DT; // Machine dominator tree for the cur loop MachineRegisterInfo *RegInfo; // Machine register information @@ -70,6 +74,7 @@ namespace { AU.setPreservesCFG(); AU.addRequired<MachineLoopInfo>(); AU.addRequired<MachineDominatorTree>(); + AU.addRequired<AliasAnalysis>(); AU.addPreserved<MachineLoopInfo>(); AU.addPreserved<MachineDominatorTree>(); MachineFunctionPass::getAnalysisUsage(AU); @@ -126,20 +131,19 @@ static bool LoopIsOuterMostWithPreheader(MachineLoop *CurLoop) { /// loop. /// bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { - const Function *F = MF.getFunction(); - if (F->hasFnAttr(Attribute::OptimizeForSize)) - return false; - - DOUT << "******** Machine LICM ********\n"; + DEBUG(errs() << "******** Machine LICM ********\n"); Changed = false; TM = &MF.getTarget(); TII = TM->getInstrInfo(); + TRI = TM->getRegisterInfo(); RegInfo = &MF.getRegInfo(); + AllocatableSet = TRI->getAllocatableSet(MF); // Get our Loop information... LI = &getAnalysis<MachineLoopInfo>(); DT = &getAnalysis<MachineDominatorTree>(); + AA = &getAnalysis<AliasAnalysis>(); for (MachineLoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) { @@ -210,7 +214,7 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { // Okay, this instruction does a load. As a refinement, we allow the target // to decide whether the loaded value is actually a constant. If so, we can // actually use it as a load. - if (!TII->isInvariantLoad(&I)) + if (!I.isInvariantLoad(AA)) // FIXME: we should be able to sink loads with no other side effects if // there is nothing that can change memory from here until the end of // block. This is a trivial form of alias analysis. @@ -218,28 +222,28 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { } DEBUG({ - DOUT << "--- Checking if we can hoist " << I; + errs() << "--- Checking if we can hoist " << I; if (I.getDesc().getImplicitUses()) { - DOUT << " * Instruction has implicit uses:\n"; + errs() << " * Instruction has implicit uses:\n"; const TargetRegisterInfo *TRI = TM->getRegisterInfo(); for (const unsigned *ImpUses = I.getDesc().getImplicitUses(); *ImpUses; ++ImpUses) - DOUT << " -> " << TRI->getName(*ImpUses) << "\n"; + errs() << " -> " << TRI->getName(*ImpUses) << "\n"; } if (I.getDesc().getImplicitDefs()) { - DOUT << " * Instruction has implicit defines:\n"; + errs() << " * Instruction has implicit defines:\n"; const TargetRegisterInfo *TRI = TM->getRegisterInfo(); for (const unsigned *ImpDefs = I.getDesc().getImplicitDefs(); *ImpDefs; ++ImpDefs) - DOUT << " -> " << TRI->getName(*ImpDefs) << "\n"; + errs() << " -> " << TRI->getName(*ImpDefs) << "\n"; } }); if (I.getDesc().getImplicitDefs() || I.getDesc().getImplicitUses()) { - DOUT << "Cannot hoist with implicit defines or uses\n"; + DEBUG(errs() << "Cannot hoist with implicit defines or uses\n"); return false; } @@ -254,8 +258,30 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { if (Reg == 0) continue; // Don't hoist an instruction that uses or defines a physical register. - if (TargetRegisterInfo::isPhysicalRegister(Reg)) - return false; + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (MO.isUse()) { + // If the physreg has no defs anywhere, it's just an ambient register + // and we can freely move its uses. Alternatively, if it's allocatable, + // it could get allocated to something with a def during allocation. + if (!RegInfo->def_empty(Reg)) + return false; + if (AllocatableSet.test(Reg)) + return false; + // Check for a def among the register's aliases too. + for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { + unsigned AliasReg = *Alias; + if (!RegInfo->def_empty(AliasReg)) + return false; + if (AllocatableSet.test(AliasReg)) + return false; + } + // Otherwise it's safe to move. + continue; + } else if (!MO.isDead()) { + // A def that isn't dead. We can't move it. + return false; + } + } if (!MO.isUse()) continue; @@ -291,13 +317,10 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { if (MI.getOpcode() == TargetInstrInfo::IMPLICIT_DEF) return false; - const TargetInstrDesc &TID = MI.getDesc(); - // FIXME: For now, only hoist re-materilizable instructions. LICM will // increase register pressure. We want to make sure it doesn't increase // spilling. - if (!TID.mayLoad() && (!TID.isRematerializable() || - !TII->isTriviallyReMaterializable(&MI))) + if (!TII->isTriviallyReMaterializable(&MI, AA)) return false; // If result(s) of this instruction is used by PHIs, then don't hoist it. @@ -355,14 +378,14 @@ void MachineLICM::Hoist(MachineInstr &MI) { // Now move the instructions to the predecessor, inserting it before any // terminator instructions. DEBUG({ - DOUT << "Hoisting " << MI; + errs() << "Hoisting " << MI; if (CurPreheader->getBasicBlock()) - DOUT << " to MachineBasicBlock " - << CurPreheader->getBasicBlock()->getName(); + errs() << " to MachineBasicBlock " + << CurPreheader->getBasicBlock()->getName(); if (MI.getParent()->getBasicBlock()) - DOUT << " from MachineBasicBlock " - << MI.getParent()->getBasicBlock()->getName(); - DOUT << "\n"; + errs() << " from MachineBasicBlock " + << MI.getParent()->getBasicBlock()->getName(); + errs() << "\n"; }); // Look for opportunity to CSE the hoisted instruction. @@ -374,8 +397,7 @@ void MachineLICM::Hoist(MachineInstr &MI) { if (CI != CSEMap.end()) { const MachineInstr *Dup = LookForDuplicate(&MI, CI->second, RegInfo); if (Dup) { - DOUT << "CSEing " << MI; - DOUT << " with " << *Dup; + DEBUG(errs() << "CSEing " << MI << " with " << *Dup); for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI.getOperand(i); if (MO.isReg() && MO.isDef()) diff --git a/lib/CodeGen/MachineLoopInfo.cpp b/lib/CodeGen/MachineLoopInfo.cpp index ff56f4d..2da8e37 100644 --- a/lib/CodeGen/MachineLoopInfo.cpp +++ b/lib/CodeGen/MachineLoopInfo.cpp @@ -19,8 +19,12 @@ #include "llvm/CodeGen/Passes.h" using namespace llvm; -TEMPLATE_INSTANTIATION(class LoopBase<MachineBasicBlock>); -TEMPLATE_INSTANTIATION(class LoopInfoBase<MachineBasicBlock>); +#define MLB class LoopBase<MachineBasicBlock, MachineLoop> +TEMPLATE_INSTANTIATION(MLB); +#undef MLB +#define MLIB class LoopInfoBase<MachineBasicBlock, MachineLoop> +TEMPLATE_INSTANTIATION(MLIB); +#undef MLIB char MachineLoopInfo::ID = 0; static RegisterPass<MachineLoopInfo> @@ -37,4 +41,5 @@ bool MachineLoopInfo::runOnMachineFunction(MachineFunction &) { void MachineLoopInfo::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addRequired<MachineDominatorTree>(); + MachineFunctionPass::getAnalysisUsage(AU); } diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp index 1d8109e..b62803f 100644 --- a/lib/CodeGen/MachineModuleInfo.cpp +++ b/lib/CodeGen/MachineModuleInfo.cpp @@ -23,7 +23,7 @@ #include "llvm/Instructions.h" #include "llvm/Module.h" #include "llvm/Support/Dwarf.h" -#include "llvm/Support/Streams.h" +#include "llvm/Support/ErrorHandling.h" using namespace llvm; using namespace llvm::dwarf; @@ -32,23 +32,23 @@ static RegisterPass<MachineModuleInfo> X("machinemoduleinfo", "Module Information"); char MachineModuleInfo::ID = 0; +// Out of line virtual method. +MachineModuleInfoImpl::~MachineModuleInfoImpl() {} + //===----------------------------------------------------------------------===// - + MachineModuleInfo::MachineModuleInfo() : ImmutablePass(&ID) -, LabelIDList() -, FrameMoves() -, LandingPads() -, Personalities() +, ObjFileMMI(0) , CallsEHReturn(0) , CallsUnwindInit(0) -, DbgInfoAvailable(false) -{ - // Always emit "no personality" info +, DbgInfoAvailable(false) { + // Always emit some info, by default "no personality" info. Personalities.push_back(NULL); } -MachineModuleInfo::~MachineModuleInfo() { +MachineModuleInfo::~MachineModuleInfo() { + delete ObjFileMMI; } /// doInitialization - Initialize the state for a new module. @@ -63,18 +63,12 @@ bool MachineModuleInfo::doFinalization() { return false; } -/// BeginFunction - Begin gathering function meta information. -/// -void MachineModuleInfo::BeginFunction(MachineFunction *MF) { - // Coming soon. -} - /// EndFunction - Discard function meta information. /// void MachineModuleInfo::EndFunction() { // Clean up frame info. FrameMoves.clear(); - + // Clean up exception info. LandingPads.clear(); TypeInfos.clear(); @@ -82,12 +76,16 @@ void MachineModuleInfo::EndFunction() { FilterEnds.clear(); CallsEHReturn = 0; CallsUnwindInit = 0; +#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN + VariableDbgInfo.clear(); +#endif } /// AnalyzeModule - Scan the module for global debug information. /// void MachineModuleInfo::AnalyzeModule(Module &M) { - // Insert functions in the llvm.used array into UsedFunctions. + // Insert functions in the llvm.used array (but not llvm.compiler.used) into + // UsedFunctions. GlobalVariable *GV = M.getGlobalVariable("llvm.used"); if (!GV || !GV->hasInitializer()) return; @@ -95,12 +93,10 @@ void MachineModuleInfo::AnalyzeModule(Module &M) { ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer()); if (InitList == 0) return; - for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) { - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(InitList->getOperand(i))) - if (CE->getOpcode() == Instruction::BitCast) - if (Function *F = dyn_cast<Function>(CE->getOperand(0))) - UsedFunctions.insert(F); - } + for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) + if (Function *F = + dyn_cast<Function>(InitList->getOperand(i)->stripPointerCasts())) + UsedFunctions.insert(F); } //===-EH-------------------------------------------------------------------===// @@ -115,7 +111,7 @@ LandingPadInfo &MachineModuleInfo::getOrCreateLandingPadInfo if (LP.LandingPadBlock == LandingPad) return LP; } - + LandingPads.push_back(LandingPadInfo(LandingPad)); return LandingPads[N]; } @@ -134,7 +130,7 @@ void MachineModuleInfo::addInvoke(MachineBasicBlock *LandingPad, unsigned MachineModuleInfo::addLandingPad(MachineBasicBlock *LandingPad) { unsigned LandingPadLabel = NextLabelID(); LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); - LP.LandingPadLabel = LandingPadLabel; + LP.LandingPadLabel = LandingPadLabel; return LandingPadLabel; } @@ -148,8 +144,13 @@ void MachineModuleInfo::addPersonality(MachineBasicBlock *LandingPad, for (unsigned i = 0; i < Personalities.size(); ++i) if (Personalities[i] == Personality) return; - - Personalities.push_back(Personality); + + // If this is the first personality we're adding go + // ahead and add it at the beginning. + if (Personalities[0] == NULL) + Personalities[0] = Personality; + else + Personalities.push_back(Personality); } /// addCatchTypeInfo - Provide the catch typeinfo for a landing pad. @@ -224,7 +225,7 @@ void MachineModuleInfo::TidyLandingPads() { } } -/// getTypeIDFor - Return the type id for the specified typeinfo. This is +/// getTypeIDFor - Return the type id for the specified typeinfo. This is /// function wide. unsigned MachineModuleInfo::getTypeIDFor(GlobalVariable *TI) { for (unsigned i = 0, N = TypeInfos.size(); i != N; ++i) @@ -273,24 +274,24 @@ Function *MachineModuleInfo::getPersonality() const { } /// getPersonalityIndex - Return unique index for current personality -/// function. NULL personality function should always get zero index. +/// function. NULL/first personality function should always get zero index. unsigned MachineModuleInfo::getPersonalityIndex() const { const Function* Personality = NULL; - + // Scan landing pads. If there is at least one non-NULL personality - use it. for (unsigned i = 0; i != LandingPads.size(); ++i) if (LandingPads[i].Personality) { Personality = LandingPads[i].Personality; break; } - + for (unsigned i = 0; i < Personalities.size(); ++i) { if (Personalities[i] == Personality) return i; } - // This should never happen - assert(0 && "Personality function should be set!"); + // This will happen if the current personality function is + // in the zero index. return 0; } @@ -306,6 +307,7 @@ struct DebugLabelFolder : public MachineFunctionPass { DebugLabelFolder() : MachineFunctionPass(&ID) {} virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); AU.addPreservedID(MachineLoopInfoID); AU.addPreservedID(MachineDominatorsID); MachineFunctionPass::getAnalysisUsage(AU); @@ -321,12 +323,12 @@ bool DebugLabelFolder::runOnMachineFunction(MachineFunction &MF) { // Get machine module info. MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>(); if (!MMI) return false; - + // Track if change is made. bool MadeChange = false; // No prior label to begin. unsigned PriorLabel = 0; - + // Iterate through basic blocks. for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB) { @@ -336,7 +338,7 @@ bool DebugLabelFolder::runOnMachineFunction(MachineFunction &MF) { if (I->isDebugLabel() && !MMI->isDbgLabelUsed(I->getOperand(0).getImm())){ // The label ID # is always operand #0, an immediate. unsigned NextLabel = I->getOperand(0).getImm(); - + // If there was an immediate prior label. if (PriorLabel) { // Remap the current label to prior label. @@ -354,15 +356,14 @@ bool DebugLabelFolder::runOnMachineFunction(MachineFunction &MF) { // No consecutive labels. PriorLabel = 0; } - + ++I; } } - + return MadeChange; } FunctionPass *createDebugLabelFoldingPass() { return new DebugLabelFolder(); } } - diff --git a/lib/CodeGen/MachineModuleInfoImpls.cpp b/lib/CodeGen/MachineModuleInfoImpls.cpp new file mode 100644 index 0000000..7a62929 --- /dev/null +++ b/lib/CodeGen/MachineModuleInfoImpls.cpp @@ -0,0 +1,45 @@ +//===-- llvm/CodeGen/MachineModuleInfoImpls.cpp ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements object-file format specific implementations of +// MachineModuleInfoImpl. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/MC/MCSymbol.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// MachineModuleInfoMachO +//===----------------------------------------------------------------------===// + +// Out of line virtual method. +void MachineModuleInfoMachO::Anchor() {} + + +static int SortSymbolPair(const void *LHS, const void *RHS) { + const MCSymbol *LHSS = + ((const std::pair<const MCSymbol*, const MCSymbol*>*)LHS)->first; + const MCSymbol *RHSS = + ((const std::pair<const MCSymbol*, const MCSymbol*>*)RHS)->first; + return LHSS->getName().compare(RHSS->getName()); +} + +/// GetSortedStubs - Return the entries from a DenseMap in a deterministic +/// sorted orer. +MachineModuleInfoMachO::SymbolListTy +MachineModuleInfoMachO::GetSortedStubs(const DenseMap<const MCSymbol*, + const MCSymbol*> &Map) { + MachineModuleInfoMachO::SymbolListTy List(Map.begin(), Map.end()); + if (!List.empty()) + qsort(&List[0], List.size(), sizeof(List[0]), SortSymbolPair); + return List; +} + diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp index 544d83a..b31973e 100644 --- a/lib/CodeGen/MachineRegisterInfo.cpp +++ b/lib/CodeGen/MachineRegisterInfo.cpp @@ -110,11 +110,9 @@ void MachineRegisterInfo::replaceRegWith(unsigned FromReg, unsigned ToReg) { MachineInstr *MachineRegisterInfo::getVRegDef(unsigned Reg) const { assert(Reg-TargetRegisterInfo::FirstVirtualRegister < VRegInfo.size() && "Invalid vreg!"); - for (reg_iterator I = reg_begin(Reg), E = reg_end(); I != E; ++I) { - // Since we are in SSA form, we can stop at the first definition. - if (I.getOperand().isDef()) - return &*I; - } + // Since we are in SSA form, we can use the first definition. + if (!def_empty(Reg)) + return &*def_begin(Reg); return 0; } diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp index 0e18fa7..0f3b33f 100644 --- a/lib/CodeGen/MachineSink.cpp +++ b/lib/CodeGen/MachineSink.cpp @@ -7,7 +7,12 @@ // //===----------------------------------------------------------------------===// // -// This pass +// This pass moves instructions into successor blocks, when possible, so that +// they aren't executed on paths where their results aren't needed. +// +// This pass is not intended to be a replacement or a complete alternative +// for an LLVM-IR-level sinking pass. It is only designed to sink simple +// constructs that are not exposed before lowering and instruction selection. // //===----------------------------------------------------------------------===// @@ -15,12 +20,14 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineDominators.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; STATISTIC(NumSunk, "Number of machine instructions sunk"); @@ -29,9 +36,12 @@ namespace { class VISIBILITY_HIDDEN MachineSinking : public MachineFunctionPass { const TargetMachine *TM; const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; MachineFunction *CurMF; // Current MachineFunction MachineRegisterInfo *RegInfo; // Machine register information - MachineDominatorTree *DT; // Machine dominator tree for the current Loop + MachineDominatorTree *DT; // Machine dominator tree + AliasAnalysis *AA; + BitVector AllocatableSet; // Which physregs are allocatable? public: static char ID; // Pass identification @@ -40,7 +50,9 @@ namespace { virtual bool runOnMachineFunction(MachineFunction &MF); virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); + AU.addRequired<AliasAnalysis>(); AU.addRequired<MachineDominatorTree>(); AU.addPreserved<MachineDominatorTree>(); } @@ -63,10 +75,8 @@ bool MachineSinking::AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB) const { assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Only makes sense for vregs"); - for (MachineRegisterInfo::reg_iterator I = RegInfo->reg_begin(Reg), - E = RegInfo->reg_end(); I != E; ++I) { - if (I.getOperand().isDef()) continue; // ignore def. - + for (MachineRegisterInfo::use_iterator I = RegInfo->use_begin(Reg), + E = RegInfo->use_end(); I != E; ++I) { // Determine the block of the use. MachineInstr *UseInst = &*I; MachineBasicBlock *UseBlock = UseInst->getParent(); @@ -85,13 +95,16 @@ bool MachineSinking::AllUsesDominatedByBlock(unsigned Reg, bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { - DOUT << "******** Machine Sinking ********\n"; + DEBUG(errs() << "******** Machine Sinking ********\n"); CurMF = &MF; TM = &CurMF->getTarget(); TII = TM->getInstrInfo(); + TRI = TM->getRegisterInfo(); RegInfo = &CurMF->getRegInfo(); DT = &getAnalysis<MachineDominatorTree>(); + AA = &getAnalysis<AliasAnalysis>(); + AllocatableSet = TRI->getAllocatableSet(*CurMF); bool EverMadeChange = false; @@ -142,7 +155,7 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) { /// instruction out of its current block into a successor. bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { // Check if it's safe to move the instruction. - if (!MI->isSafeToMove(TII, SawStore)) + if (!MI->isSafeToMove(TII, SawStore, AA)) return false; // FIXME: This should include support for sinking instructions within the @@ -151,7 +164,7 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { // also sink them down before their first use in the block. This xform has to // be careful not to *increase* register pressure though, e.g. sinking // "x = y + z" down if it kills y and z would increase the live ranges of y - // and z only the shrink the live range of x. + // and z and only shrink the live range of x. // Loop over all the operands of the specified instruction. If there is // anything we can't handle, bail out. @@ -169,10 +182,26 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { if (Reg == 0) continue; if (TargetRegisterInfo::isPhysicalRegister(Reg)) { - // If this is a physical register use, we can't move it. If it is a def, - // we can move it, but only if the def is dead. - if (MO.isUse() || !MO.isDead()) + if (MO.isUse()) { + // If the physreg has no defs anywhere, it's just an ambient register + // and we can freely move its uses. Alternatively, if it's allocatable, + // it could get allocated to something with a def during allocation. + if (!RegInfo->def_empty(Reg)) + return false; + if (AllocatableSet.test(Reg)) + return false; + // Check for a def among the register's aliases too. + for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { + unsigned AliasReg = *Alias; + if (!RegInfo->def_empty(AliasReg)) + return false; + if (AllocatableSet.test(AliasReg)) + return false; + } + } else if (!MO.isDead()) { + // A def that isn't dead. We can't move it. return false; + } } else { // Virtual register uses are always safe to sink. if (MO.isUse()) continue; @@ -232,15 +261,15 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { if (MI->getParent() == SuccToSinkTo) return false; - DEBUG(cerr << "Sink instr " << *MI); - DEBUG(cerr << "to block " << *SuccToSinkTo); + DEBUG(errs() << "Sink instr " << *MI); + DEBUG(errs() << "to block " << *SuccToSinkTo); // If the block has multiple predecessors, this would introduce computation on // a path that it doesn't already exist. We could split the critical edge, // but for now we just punt. // FIXME: Split critical edges if not backedges. if (SuccToSinkTo->pred_size() > 1) { - DEBUG(cerr << " *** PUNTING: Critical edge found\n"); + DEBUG(errs() << " *** PUNTING: Critical edge found\n"); return false; } diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index be1396c..18a3ead 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -23,21 +23,23 @@ // the verifier errors. //===----------------------------------------------------------------------===// -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/SetOperations.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/Function.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SetOperations.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" -#include <fstream> - +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; namespace { @@ -53,6 +55,7 @@ namespace { void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); } bool runOnMachineFunction(MachineFunction &MF); @@ -61,7 +64,7 @@ namespace { const bool allowPhysDoubleDefs; const char *const OutFileName; - std::ostream *OS; + raw_ostream *OS; const MachineFunction *MF; const TargetMachine *TM; const TargetRegisterInfo *TRI; @@ -75,7 +78,8 @@ namespace { BitVector regsReserved; RegSet regsLive; - RegVector regsDefined, regsImpDefined, regsDead, regsKilled; + RegVector regsDefined, regsDead, regsKilled; + RegSet regsLiveInButUnused; // Add Reg and any sub-registers to RV void addRegWithSubRegs(RegVector &RV, unsigned Reg) { @@ -85,14 +89,6 @@ namespace { RV.push_back(*R); } - // Does RS contain any super-registers of Reg? - bool anySuperRegisters(const RegSet &RS, unsigned Reg) { - for (const unsigned *R = TRI->getSuperRegisters(Reg); *R; R++) - if (RS.count(*R)) - return true; - return false; - } - struct BBInfo { // Is this MBB reachable from the MF entry point? bool reachable; @@ -148,7 +144,7 @@ namespace { DenseMap<const MachineBasicBlock*, BBInfo> MBBInfoMap; bool isReserved(unsigned Reg) { - return Reg < regsReserved.size() && regsReserved[Reg]; + return Reg < regsReserved.size() && regsReserved.test(Reg); } void visitMachineFunctionBefore(); @@ -176,21 +172,24 @@ static RegisterPass<MachineVerifier> MachineVer("machineverifier", "Verify generated machine code"); static const PassInfo *const MachineVerifyID = &MachineVer; -FunctionPass * -llvm::createMachineVerifierPass(bool allowPhysDoubleDefs) -{ +FunctionPass *llvm::createMachineVerifierPass(bool allowPhysDoubleDefs) { return new MachineVerifier(allowPhysDoubleDefs); } -bool -MachineVerifier::runOnMachineFunction(MachineFunction &MF) -{ - std::ofstream OutFile; +bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { + raw_ostream *OutFile = 0; if (OutFileName) { - OutFile.open(OutFileName, std::ios::out | std::ios::app); - OS = &OutFile; + std::string ErrorInfo; + OutFile = new raw_fd_ostream(OutFileName, ErrorInfo, + raw_fd_ostream::F_Append); + if (!ErrorInfo.empty()) { + errs() << "Error opening '" << OutFileName << "': " << ErrorInfo << '\n'; + exit(1); + } + + OS = OutFile; } else { - OS = cerr.stream(); + OS = &errs(); } foundErrors = 0; @@ -215,51 +214,48 @@ MachineVerifier::runOnMachineFunction(MachineFunction &MF) } visitMachineFunctionAfter(); - if (OutFileName) - OutFile.close(); + if (OutFile) + delete OutFile; + else if (foundErrors) + llvm_report_error("Found "+Twine(foundErrors)+" machine code errors."); - if (foundErrors) { - cerr << "\nStopping with " << foundErrors << " machine code errors.\n"; - exit(1); - } + // Clean up. + regsLive.clear(); + regsDefined.clear(); + regsDead.clear(); + regsKilled.clear(); + regsLiveInButUnused.clear(); + MBBInfoMap.clear(); return false; // no changes } -void -MachineVerifier::report(const char *msg, const MachineFunction *MF) -{ +void MachineVerifier::report(const char *msg, const MachineFunction *MF) { assert(MF); - *OS << "\n"; + *OS << '\n'; if (!foundErrors++) - MF->print(OS); + MF->print(*OS); *OS << "*** Bad machine code: " << msg << " ***\n" - << "- function: " << MF->getFunction()->getName() << "\n"; + << "- function: " << MF->getFunction()->getNameStr() << "\n"; } -void -MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) -{ +void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) { assert(MBB); report(msg, MBB->getParent()); - *OS << "- basic block: " << MBB->getBasicBlock()->getName() + *OS << "- basic block: " << MBB->getBasicBlock()->getNameStr() << " " << (void*)MBB << " (#" << MBB->getNumber() << ")\n"; } -void -MachineVerifier::report(const char *msg, const MachineInstr *MI) -{ +void MachineVerifier::report(const char *msg, const MachineInstr *MI) { assert(MI); report(msg, MI->getParent()); *OS << "- instruction: "; - MI->print(OS, TM); + MI->print(*OS, TM); } -void -MachineVerifier::report(const char *msg, - const MachineOperand *MO, unsigned MONum) -{ +void MachineVerifier::report(const char *msg, + const MachineOperand *MO, unsigned MONum) { assert(MO); report(msg, MO->getParent()); *OS << "- operand " << MONum << ": "; @@ -267,9 +263,7 @@ MachineVerifier::report(const char *msg, *OS << "\n"; } -void -MachineVerifier::markReachable(const MachineBasicBlock *MBB) -{ +void MachineVerifier::markReachable(const MachineBasicBlock *MBB) { BBInfo &MInfo = MBBInfoMap[MBB]; if (!MInfo.reachable) { MInfo.reachable = true; @@ -279,16 +273,158 @@ MachineVerifier::markReachable(const MachineBasicBlock *MBB) } } -void -MachineVerifier::visitMachineFunctionBefore() -{ +void MachineVerifier::visitMachineFunctionBefore() { regsReserved = TRI->getReservedRegs(*MF); + + // A sub-register of a reserved register is also reserved + for (int Reg = regsReserved.find_first(); Reg>=0; + Reg = regsReserved.find_next(Reg)) { + for (const unsigned *Sub = TRI->getSubRegisters(Reg); *Sub; ++Sub) { + // FIXME: This should probably be: + // assert(regsReserved.test(*Sub) && "Non-reserved sub-register"); + regsReserved.set(*Sub); + } + } markReachable(&MF->front()); } -void -MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) -{ +void MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { + const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); + + // Start with minimal CFG sanity checks. + MachineFunction::const_iterator MBBI = MBB; + ++MBBI; + if (MBBI != MF->end()) { + // Block is not last in function. + if (!MBB->isSuccessor(MBBI)) { + // Block does not fall through. + if (MBB->empty()) { + report("MBB doesn't fall through but is empty!", MBB); + } + } + if (TII->BlockHasNoFallThrough(*MBB)) { + if (MBB->empty()) { + report("TargetInstrInfo says the block has no fall through, but the " + "block is empty!", MBB); + } else if (!MBB->back().getDesc().isBarrier()) { + report("TargetInstrInfo says the block has no fall through, but the " + "block does not end in a barrier!", MBB); + } + } + } else { + // Block is last in function. + if (MBB->empty()) { + report("MBB is last in function but is empty!", MBB); + } + } + + // Call AnalyzeBranch. If it succeeds, there several more conditions to check. + MachineBasicBlock *TBB = 0, *FBB = 0; + SmallVector<MachineOperand, 4> Cond; + if (!TII->AnalyzeBranch(*const_cast<MachineBasicBlock *>(MBB), + TBB, FBB, Cond)) { + // Ok, AnalyzeBranch thinks it knows what's going on with this block. Let's + // check whether its answers match up with reality. + if (!TBB && !FBB) { + // Block falls through to its successor. + MachineFunction::const_iterator MBBI = MBB; + ++MBBI; + if (MBBI == MF->end()) { + // It's possible that the block legitimately ends with a noreturn + // call or an unreachable, in which case it won't actually fall + // out the bottom of the function. + } else if (MBB->succ_empty()) { + // It's possible that the block legitimately ends with a noreturn + // call or an unreachable, in which case it won't actuall fall + // out of the block. + } else if (MBB->succ_size() != 1) { + report("MBB exits via unconditional fall-through but doesn't have " + "exactly one CFG successor!", MBB); + } else if (MBB->succ_begin()[0] != MBBI) { + report("MBB exits via unconditional fall-through but its successor " + "differs from its CFG successor!", MBB); + } + if (!MBB->empty() && MBB->back().getDesc().isBarrier()) { + report("MBB exits via unconditional fall-through but ends with a " + "barrier instruction!", MBB); + } + if (!Cond.empty()) { + report("MBB exits via unconditional fall-through but has a condition!", + MBB); + } + } else if (TBB && !FBB && Cond.empty()) { + // Block unconditionally branches somewhere. + if (MBB->succ_size() != 1) { + report("MBB exits via unconditional branch but doesn't have " + "exactly one CFG successor!", MBB); + } else if (MBB->succ_begin()[0] != TBB) { + report("MBB exits via unconditional branch but the CFG " + "successor doesn't match the actual successor!", MBB); + } + if (MBB->empty()) { + report("MBB exits via unconditional branch but doesn't contain " + "any instructions!", MBB); + } else if (!MBB->back().getDesc().isBarrier()) { + report("MBB exits via unconditional branch but doesn't end with a " + "barrier instruction!", MBB); + } else if (!MBB->back().getDesc().isTerminator()) { + report("MBB exits via unconditional branch but the branch isn't a " + "terminator instruction!", MBB); + } + } else if (TBB && !FBB && !Cond.empty()) { + // Block conditionally branches somewhere, otherwise falls through. + MachineFunction::const_iterator MBBI = MBB; + ++MBBI; + if (MBBI == MF->end()) { + report("MBB conditionally falls through out of function!", MBB); + } if (MBB->succ_size() != 2) { + report("MBB exits via conditional branch/fall-through but doesn't have " + "exactly two CFG successors!", MBB); + } else if ((MBB->succ_begin()[0] == TBB && MBB->succ_end()[1] == MBBI) || + (MBB->succ_begin()[1] == TBB && MBB->succ_end()[0] == MBBI)) { + report("MBB exits via conditional branch/fall-through but the CFG " + "successors don't match the actual successors!", MBB); + } + if (MBB->empty()) { + report("MBB exits via conditional branch/fall-through but doesn't " + "contain any instructions!", MBB); + } else if (MBB->back().getDesc().isBarrier()) { + report("MBB exits via conditional branch/fall-through but ends with a " + "barrier instruction!", MBB); + } else if (!MBB->back().getDesc().isTerminator()) { + report("MBB exits via conditional branch/fall-through but the branch " + "isn't a terminator instruction!", MBB); + } + } else if (TBB && FBB) { + // Block conditionally branches somewhere, otherwise branches + // somewhere else. + if (MBB->succ_size() != 2) { + report("MBB exits via conditional branch/branch but doesn't have " + "exactly two CFG successors!", MBB); + } else if ((MBB->succ_begin()[0] == TBB && MBB->succ_end()[1] == FBB) || + (MBB->succ_begin()[1] == TBB && MBB->succ_end()[0] == FBB)) { + report("MBB exits via conditional branch/branch but the CFG " + "successors don't match the actual successors!", MBB); + } + if (MBB->empty()) { + report("MBB exits via conditional branch/branch but doesn't " + "contain any instructions!", MBB); + } else if (!MBB->back().getDesc().isBarrier()) { + report("MBB exits via conditional branch/branch but doesn't end with a " + "barrier instruction!", MBB); + } else if (!MBB->back().getDesc().isTerminator()) { + report("MBB exits via conditional branch/branch but the branch " + "isn't a terminator instruction!", MBB); + } + if (Cond.empty()) { + report("MBB exits via conditinal branch/branch but there's no " + "condition!", MBB); + } + } else { + report("AnalyzeBranch returned invalid data!", MBB); + } + } + regsLive.clear(); for (MachineBasicBlock::const_livein_iterator I = MBB->livein_begin(), E = MBB->livein_end(); I != E; ++I) { @@ -300,32 +436,41 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) for (const unsigned *R = TRI->getSubRegisters(*I); *R; R++) regsLive.insert(*R); } + regsLiveInButUnused = regsLive; + + const MachineFrameInfo *MFI = MF->getFrameInfo(); + assert(MFI && "Function has no frame info"); + BitVector PR = MFI->getPristineRegs(MBB); + for (int I = PR.find_first(); I>0; I = PR.find_next(I)) { + regsLive.insert(I); + for (const unsigned *R = TRI->getSubRegisters(I); *R; R++) + regsLive.insert(*R); + } + regsKilled.clear(); regsDefined.clear(); - regsImpDefined.clear(); } -void -MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) -{ +void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { const TargetInstrDesc &TI = MI->getDesc(); - if (MI->getNumExplicitOperands() < TI.getNumOperands()) { + if (MI->getNumOperands() < TI.getNumOperands()) { report("Too few operands", MI); *OS << TI.getNumOperands() << " operands expected, but " << MI->getNumExplicitOperands() << " given.\n"; } - if (!TI.isVariadic()) { - if (MI->getNumExplicitOperands() > TI.getNumOperands()) { - report("Too many operands", MI); - *OS << TI.getNumOperands() << " operands expected, but " - << MI->getNumExplicitOperands() << " given.\n"; - } + + // Check the MachineMemOperands for basic consistency. + for (MachineInstr::mmo_iterator I = MI->memoperands_begin(), + E = MI->memoperands_end(); I != E; ++I) { + if ((*I)->isLoad() && !TI.mayLoad()) + report("Missing mayLoad flag", MI); + if ((*I)->isStore() && !TI.mayStore()) + report("Missing mayStore flag", MI); } } void -MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) -{ +MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { const MachineInstr *MI = MO->getParent(); const TargetInstrDesc &TI = MI->getDesc(); @@ -337,6 +482,16 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) report("Explicit definition marked as use", MO, MONum); else if (MO->isImplicit()) report("Explicit definition marked as implicit", MO, MONum); + } else if (MONum < TI.getNumOperands()) { + if (MO->isReg()) { + if (MO->isDef()) + report("Explicit operand marked as def", MO, MONum); + if (MO->isImplicit()) + report("Explicit operand marked as implicit", MO, MONum); + } + } else { + if (MO->isReg() && !MO->isImplicit() && !TI.isVariadic()) + report("Extra explicit operand on non-variadic instruction", MO, MONum); } switch (MO->getType()) { @@ -346,18 +501,26 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) return; // Check Live Variables. - if (MO->isUse()) { + if (MO->isUndef()) { + // An <undef> doesn't refer to any register, so just skip it. + } else if (MO->isUse()) { + regsLiveInButUnused.erase(Reg); + if (MO->isKill()) { addRegWithSubRegs(regsKilled, Reg); + // Tied operands on two-address instuctions MUST NOT have a <kill> flag. + if (MI->isRegTiedToDefOperand(MONum)) + report("Illegal kill flag on two-address instruction operand", + MO, MONum); } else { - // TwoAddress instr modyfying a reg is treated as kill+def. + // TwoAddress instr modifying a reg is treated as kill+def. unsigned defIdx; if (MI->isRegTiedToDefOperand(MONum, &defIdx) && MI->getOperand(defIdx).getReg() == Reg) addRegWithSubRegs(regsKilled, Reg); } - // Explicit use of a dead register. - if (!MO->isImplicit() && !regsLive.count(Reg)) { + // Use of a dead register. + if (!regsLive.count(Reg)) { if (TargetRegisterInfo::isPhysicalRegister(Reg)) { // Reserved registers may be used even when 'dead'. if (!isReserved(Reg)) @@ -374,15 +537,13 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) } } } else { + assert(MO->isDef()); // Register defined. // TODO: verify that earlyclobber ops are not used. - if (MO->isImplicit()) - addRegWithSubRegs(regsImpDefined, Reg); - else - addRegWithSubRegs(regsDefined, Reg); - if (MO->isDead()) addRegWithSubRegs(regsDead, Reg); + else + addRegWithSubRegs(regsDefined, Reg); } // Check register classes. @@ -401,8 +562,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) } sr = s; } - if (TOI.RegClass) { - const TargetRegisterClass *DRC = TRI->getRegClass(TOI.RegClass); + if (const TargetRegisterClass *DRC = TOI.getRegClass(TRI)) { if (!DRC->contains(sr)) { report("Illegal physical register for instruction", MO, MONum); *OS << TRI->getName(sr) << " is not a " @@ -419,8 +579,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) } RC = *(RC->subregclasses_begin()+SubIdx); } - if (TOI.RegClass) { - const TargetRegisterClass *DRC = TRI->getRegClass(TOI.RegClass); + if (const TargetRegisterClass *DRC = TOI.getRegClass(TRI)) { if (RC != DRC && !RC->hasSuperClass(DRC)) { report("Illegal virtual register for instruction", MO, MONum); *OS << "Expected a " << DRC->getName() << " register, but got a " @@ -431,34 +590,35 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) } break; } - // Can PHI instrs refer to MBBs not in the CFG? X86 and ARM do. - // case MachineOperand::MO_MachineBasicBlock: - // if (MI->getOpcode() == TargetInstrInfo::PHI) { - // if (!MO->getMBB()->isSuccessor(MI->getParent())) - // report("PHI operand is not in the CFG", MO, MONum); - // } - // break; + + case MachineOperand::MO_MachineBasicBlock: + if (MI->getOpcode() == TargetInstrInfo::PHI) { + if (!MO->getMBB()->isSuccessor(MI->getParent())) + report("PHI operand is not in the CFG", MO, MONum); + } + break; + default: break; } } -void -MachineVerifier::visitMachineInstrAfter(const MachineInstr *MI) -{ +void MachineVerifier::visitMachineInstrAfter(const MachineInstr *MI) { BBInfo &MInfo = MBBInfoMap[MI->getParent()]; set_union(MInfo.regsKilled, regsKilled); set_subtract(regsLive, regsKilled); regsKilled.clear(); - for (RegVector::const_iterator I = regsDefined.begin(), - E = regsDefined.end(); I != E; ++I) { + // Verify that both <def> and <def,dead> operands refer to dead registers. + RegVector defs(regsDefined); + defs.append(regsDead.begin(), regsDead.end()); + + for (RegVector::const_iterator I = defs.begin(), E = defs.end(); + I != E; ++I) { if (regsLive.count(*I)) { if (TargetRegisterInfo::isPhysicalRegister(*I)) { - // We allow double defines to physical registers with live - // super-registers. if (!allowPhysDoubleDefs && !isReserved(*I) && - !anySuperRegisters(regsLive, *I)) { + !regsLiveInButUnused.count(*I)) { report("Redefining a live physical register", MI); *OS << "Register " << TRI->getName(*I) << " was defined but already live.\n"; @@ -478,14 +638,12 @@ MachineVerifier::visitMachineInstrAfter(const MachineInstr *MI) } } - set_union(regsLive, regsDefined); regsDefined.clear(); - set_union(regsLive, regsImpDefined); regsImpDefined.clear(); set_subtract(regsLive, regsDead); regsDead.clear(); + set_union(regsLive, regsDefined); regsDefined.clear(); } void -MachineVerifier::visitMachineBasicBlockAfter(const MachineBasicBlock *MBB) -{ +MachineVerifier::visitMachineBasicBlockAfter(const MachineBasicBlock *MBB) { MBBInfoMap[MBB].regsLiveOut = regsLive; regsLive.clear(); } @@ -493,9 +651,7 @@ MachineVerifier::visitMachineBasicBlockAfter(const MachineBasicBlock *MBB) // Calculate the largest possible vregsPassed sets. These are the registers that // can pass through an MBB live, but may not be live every time. It is assumed // that all vregsPassed sets are empty before the call. -void -MachineVerifier::calcMaxRegsPassed() -{ +void MachineVerifier::calcMaxRegsPassed() { // First push live-out regs to successors' vregsPassed. Remember the MBBs that // have any vregsPassed. DenseSet<const MachineBasicBlock*> todo; @@ -533,9 +689,7 @@ MachineVerifier::calcMaxRegsPassed() // Calculate the minimum vregsPassed set. These are the registers that always // pass live through an MBB. The calculation assumes that calcMaxRegsPassed has // been called earlier. -void -MachineVerifier::calcMinRegsPassed() -{ +void MachineVerifier::calcMinRegsPassed() { DenseSet<const MachineBasicBlock*> todo; for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end(); MFI != MFE; ++MFI) @@ -570,9 +724,7 @@ MachineVerifier::calcMinRegsPassed() // Check PHI instructions at the beginning of MBB. It is assumed that // calcMinRegsPassed has been run so BBInfo::isLiveOut is valid. -void -MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB) -{ +void MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB) { for (MachineBasicBlock::const_iterator BBI = MBB->begin(), BBE = MBB->end(); BBI != BBE && BBI->getOpcode() == TargetInstrInfo::PHI; ++BBI) { DenseSet<const MachineBasicBlock*> seen; @@ -601,9 +753,7 @@ MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB) } } -void -MachineVerifier::visitMachineFunctionAfter() -{ +void MachineVerifier::visitMachineFunctionAfter() { calcMaxRegsPassed(); // With the maximal set of vregsPassed we can verify dead-in registers. diff --git a/lib/CodeGen/ObjectCodeEmitter.cpp b/lib/CodeGen/ObjectCodeEmitter.cpp new file mode 100644 index 0000000..cf05275 --- /dev/null +++ b/lib/CodeGen/ObjectCodeEmitter.cpp @@ -0,0 +1,141 @@ +//===-- llvm/CodeGen/ObjectCodeEmitter.cpp -------------------- -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/BinaryObject.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineRelocation.h" +#include "llvm/CodeGen/ObjectCodeEmitter.h" + +//===----------------------------------------------------------------------===// +// ObjectCodeEmitter Implementation +//===----------------------------------------------------------------------===// + +namespace llvm { + +ObjectCodeEmitter::ObjectCodeEmitter() : BO(0) {} +ObjectCodeEmitter::ObjectCodeEmitter(BinaryObject *bo) : BO(bo) {} +ObjectCodeEmitter::~ObjectCodeEmitter() {} + +/// setBinaryObject - set the BinaryObject we are writting to +void ObjectCodeEmitter::setBinaryObject(BinaryObject *bo) { BO = bo; } + +/// emitByte - This callback is invoked when a byte needs to be +/// written to the data stream, without buffer overflow testing. +void ObjectCodeEmitter::emitByte(uint8_t B) { + BO->emitByte(B); +} + +/// emitWordLE - This callback is invoked when a 32-bit word needs to be +/// written to the data stream in little-endian format. +void ObjectCodeEmitter::emitWordLE(uint32_t W) { + BO->emitWordLE(W); +} + +/// emitWordBE - This callback is invoked when a 32-bit word needs to be +/// written to the data stream in big-endian format. +void ObjectCodeEmitter::emitWordBE(uint32_t W) { + BO->emitWordBE(W); +} + +/// emitDWordLE - This callback is invoked when a 64-bit word needs to be +/// written to the data stream in little-endian format. +void ObjectCodeEmitter::emitDWordLE(uint64_t W) { + BO->emitDWordLE(W); +} + +/// emitDWordBE - This callback is invoked when a 64-bit word needs to be +/// written to the data stream in big-endian format. +void ObjectCodeEmitter::emitDWordBE(uint64_t W) { + BO->emitDWordBE(W); +} + +/// emitAlignment - Align 'BO' to the necessary alignment boundary. +void ObjectCodeEmitter::emitAlignment(unsigned Alignment /* 0 */, + uint8_t fill /* 0 */) { + BO->emitAlignment(Alignment, fill); +} + +/// emitULEB128Bytes - This callback is invoked when a ULEB128 needs to be +/// written to the data stream. +void ObjectCodeEmitter::emitULEB128Bytes(uint64_t Value) { + BO->emitULEB128Bytes(Value); +} + +/// emitSLEB128Bytes - This callback is invoked when a SLEB128 needs to be +/// written to the data stream. +void ObjectCodeEmitter::emitSLEB128Bytes(uint64_t Value) { + BO->emitSLEB128Bytes(Value); +} + +/// emitString - This callback is invoked when a String needs to be +/// written to the data stream. +void ObjectCodeEmitter::emitString(const std::string &String) { + BO->emitString(String); +} + +/// getCurrentPCValue - This returns the address that the next emitted byte +/// will be output to. +uintptr_t ObjectCodeEmitter::getCurrentPCValue() const { + return BO->getCurrentPCOffset(); +} + +/// getCurrentPCOffset - Return the offset from the start of the emitted +/// buffer that we are currently writing to. +uintptr_t ObjectCodeEmitter::getCurrentPCOffset() const { + return BO->getCurrentPCOffset(); +} + +/// addRelocation - Whenever a relocatable address is needed, it should be +/// noted with this interface. +void ObjectCodeEmitter::addRelocation(const MachineRelocation& relocation) { + BO->addRelocation(relocation); +} + +/// StartMachineBasicBlock - This should be called by the target when a new +/// basic block is about to be emitted. This way the MCE knows where the +/// start of the block is, and can implement getMachineBasicBlockAddress. +void ObjectCodeEmitter::StartMachineBasicBlock(MachineBasicBlock *MBB) { + if (MBBLocations.size() <= (unsigned)MBB->getNumber()) + MBBLocations.resize((MBB->getNumber()+1)*2); + MBBLocations[MBB->getNumber()] = getCurrentPCOffset(); +} + +/// getMachineBasicBlockAddress - Return the address of the specified +/// MachineBasicBlock, only usable after the label for the MBB has been +/// emitted. +uintptr_t +ObjectCodeEmitter::getMachineBasicBlockAddress(MachineBasicBlock *MBB) const { + assert(MBBLocations.size() > (unsigned)MBB->getNumber() && + MBBLocations[MBB->getNumber()] && "MBB not emitted!"); + return MBBLocations[MBB->getNumber()]; +} + +/// getJumpTableEntryAddress - Return the address of the jump table with index +/// 'Index' in the function that last called initJumpTableInfo. +uintptr_t ObjectCodeEmitter::getJumpTableEntryAddress(unsigned Index) const { + assert(JTLocations.size() > Index && "JT not emitted!"); + return JTLocations[Index]; +} + +/// getConstantPoolEntryAddress - Return the address of the 'Index' entry in +/// the constant pool that was last emitted with the emitConstantPool method. +uintptr_t ObjectCodeEmitter::getConstantPoolEntryAddress(unsigned Index) const { + assert(CPLocations.size() > Index && "CP not emitted!"); + return CPLocations[Index]; +} + +/// getConstantPoolEntrySection - Return the section of the 'Index' entry in +/// the constant pool that was last emitted with the emitConstantPool method. +uintptr_t ObjectCodeEmitter::getConstantPoolEntrySection(unsigned Index) const { + assert(CPSections.size() > Index && "CP not emitted!"); + return CPSections[Index]; +} + +} // end namespace llvm + diff --git a/lib/CodeGen/PBQP/AnnotatedGraph.h b/lib/CodeGen/PBQP/AnnotatedGraph.h new file mode 100644 index 0000000..904061c --- /dev/null +++ b/lib/CodeGen/PBQP/AnnotatedGraph.h @@ -0,0 +1,184 @@ +//===-- AnnotatedGraph.h - Annotated PBQP Graph ----------------*- C++ --*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Annotated PBQP Graph class. This class is used internally by the PBQP solver +// to cache information to speed up reduction. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_PBQP_ANNOTATEDGRAPH_H +#define LLVM_CODEGEN_PBQP_ANNOTATEDGRAPH_H + +#include "GraphBase.h" + +namespace PBQP { + + +template <typename NodeData, typename EdgeData> class AnnotatedEdge; + +template <typename NodeData, typename EdgeData> +class AnnotatedNode : public NodeBase<AnnotatedNode<NodeData, EdgeData>, + AnnotatedEdge<NodeData, EdgeData> > { +private: + + NodeData nodeData; + +public: + + AnnotatedNode(const Vector &costs, const NodeData &nodeData) : + NodeBase<AnnotatedNode<NodeData, EdgeData>, + AnnotatedEdge<NodeData, EdgeData> >(costs), + nodeData(nodeData) {} + + NodeData& getNodeData() { return nodeData; } + const NodeData& getNodeData() const { return nodeData; } + +}; + +template <typename NodeData, typename EdgeData> +class AnnotatedEdge : public EdgeBase<AnnotatedNode<NodeData, EdgeData>, + AnnotatedEdge<NodeData, EdgeData> > { +private: + + typedef typename GraphBase<AnnotatedNode<NodeData, EdgeData>, + AnnotatedEdge<NodeData, EdgeData> >::NodeIterator + NodeIterator; + + EdgeData edgeData; + +public: + + + AnnotatedEdge(const NodeIterator &node1Itr, const NodeIterator &node2Itr, + const Matrix &costs, const EdgeData &edgeData) : + EdgeBase<AnnotatedNode<NodeData, EdgeData>, + AnnotatedEdge<NodeData, EdgeData> >(node1Itr, node2Itr, costs), + edgeData(edgeData) {} + + EdgeData& getEdgeData() { return edgeData; } + const EdgeData& getEdgeData() const { return edgeData; } + +}; + +template <typename NodeData, typename EdgeData> +class AnnotatedGraph : public GraphBase<AnnotatedNode<NodeData, EdgeData>, + AnnotatedEdge<NodeData, EdgeData> > { +private: + + typedef GraphBase<AnnotatedNode<NodeData, EdgeData>, + AnnotatedEdge<NodeData, EdgeData> > PGraph; + + typedef AnnotatedNode<NodeData, EdgeData> NodeEntry; + typedef AnnotatedEdge<NodeData, EdgeData> EdgeEntry; + + + void copyFrom(const AnnotatedGraph &other) { + if (!other.areNodeIDsValid()) { + other.assignNodeIDs(); + } + std::vector<NodeIterator> newNodeItrs(other.getNumNodes()); + + for (ConstNodeIterator nItr = other.nodesBegin(), nEnd = other.nodesEnd(); + nItr != nEnd; ++nItr) { + newNodeItrs[other.getNodeID(nItr)] = addNode(other.getNodeCosts(nItr)); + } + + for (ConstEdgeIterator eItr = other.edgesBegin(), eEnd = other.edgesEnd(); + eItr != eEnd; ++eItr) { + + unsigned node1ID = other.getNodeID(other.getEdgeNode1(eItr)), + node2ID = other.getNodeID(other.getEdgeNode2(eItr)); + + addEdge(newNodeItrs[node1ID], newNodeItrs[node2ID], + other.getEdgeCosts(eItr), other.getEdgeData(eItr)); + } + + } + +public: + + typedef typename PGraph::NodeIterator NodeIterator; + typedef typename PGraph::ConstNodeIterator ConstNodeIterator; + typedef typename PGraph::EdgeIterator EdgeIterator; + typedef typename PGraph::ConstEdgeIterator ConstEdgeIterator; + + AnnotatedGraph() {} + + AnnotatedGraph(const AnnotatedGraph &other) { + copyFrom(other); + } + + AnnotatedGraph& operator=(const AnnotatedGraph &other) { + PGraph::clear(); + copyFrom(other); + return *this; + } + + NodeIterator addNode(const Vector &costs, const NodeData &data) { + return PGraph::addConstructedNode(NodeEntry(costs, data)); + } + + EdgeIterator addEdge(const NodeIterator &node1Itr, + const NodeIterator &node2Itr, + const Matrix &costs, const EdgeData &data) { + return PGraph::addConstructedEdge(EdgeEntry(node1Itr, node2Itr, + costs, data)); + } + + NodeData& getNodeData(const NodeIterator &nodeItr) { + return getNodeEntry(nodeItr).getNodeData(); + } + + const NodeData& getNodeData(const NodeIterator &nodeItr) const { + return getNodeEntry(nodeItr).getNodeData(); + } + + EdgeData& getEdgeData(const EdgeIterator &edgeItr) { + return getEdgeEntry(edgeItr).getEdgeData(); + } + + const EdgeEntry& getEdgeData(const EdgeIterator &edgeItr) const { + return getEdgeEntry(edgeItr).getEdgeData(); + } + + SimpleGraph toSimpleGraph() const { + SimpleGraph g; + + if (!PGraph::areNodeIDsValid()) { + PGraph::assignNodeIDs(); + } + std::vector<SimpleGraph::NodeIterator> newNodeItrs(PGraph::getNumNodes()); + + for (ConstNodeIterator nItr = PGraph::nodesBegin(), + nEnd = PGraph::nodesEnd(); + nItr != nEnd; ++nItr) { + + newNodeItrs[getNodeID(nItr)] = g.addNode(getNodeCosts(nItr)); + } + + for (ConstEdgeIterator + eItr = PGraph::edgesBegin(), eEnd = PGraph::edgesEnd(); + eItr != eEnd; ++eItr) { + + unsigned node1ID = getNodeID(getEdgeNode1(eItr)), + node2ID = getNodeID(getEdgeNode2(eItr)); + + g.addEdge(newNodeItrs[node1ID], newNodeItrs[node2ID], + getEdgeCosts(eItr)); + } + + return g; + } + +}; + + +} + +#endif // LLVM_CODEGEN_PBQP_ANNOTATEDGRAPH_H diff --git a/lib/CodeGen/PBQP/ExhaustiveSolver.h b/lib/CodeGen/PBQP/ExhaustiveSolver.h new file mode 100644 index 0000000..b2f2e6f --- /dev/null +++ b/lib/CodeGen/PBQP/ExhaustiveSolver.h @@ -0,0 +1,110 @@ +//===-- ExhaustiveSolver.h - Brute Force PBQP Solver -----------*- C++ --*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Uses a trivial brute force algorithm to solve a PBQP problem. +// PBQP is NP-HARD - This solver should only be used for debugging small +// problems. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_PBQP_EXHAUSTIVESOLVER_H +#define LLVM_CODEGEN_PBQP_EXHAUSTIVESOLVER_H + +#include "Solver.h" + +namespace PBQP { + +/// A brute force PBQP solver. This solver takes exponential time. It should +/// only be used for debugging purposes. +class ExhaustiveSolverImpl { +private: + + const SimpleGraph &g; + + PBQPNum getSolutionCost(const Solution &solution) const { + PBQPNum cost = 0.0; + + for (SimpleGraph::ConstNodeIterator + nodeItr = g.nodesBegin(), nodeEnd = g.nodesEnd(); + nodeItr != nodeEnd; ++nodeItr) { + + unsigned nodeId = g.getNodeID(nodeItr); + + cost += g.getNodeCosts(nodeItr)[solution.getSelection(nodeId)]; + } + + for (SimpleGraph::ConstEdgeIterator + edgeItr = g.edgesBegin(), edgeEnd = g.edgesEnd(); + edgeItr != edgeEnd; ++edgeItr) { + + SimpleGraph::ConstNodeIterator n1 = g.getEdgeNode1Itr(edgeItr), + n2 = g.getEdgeNode2Itr(edgeItr); + unsigned sol1 = solution.getSelection(g.getNodeID(n1)), + sol2 = solution.getSelection(g.getNodeID(n2)); + + cost += g.getEdgeCosts(edgeItr)[sol1][sol2]; + } + + return cost; + } + +public: + + ExhaustiveSolverImpl(const SimpleGraph &g) : g(g) {} + + Solution solve() const { + Solution current(g.getNumNodes(), true), optimal(current); + + PBQPNum bestCost = std::numeric_limits<PBQPNum>::infinity(); + bool finished = false; + + while (!finished) { + PBQPNum currentCost = getSolutionCost(current); + + if (currentCost < bestCost) { + optimal = current; + bestCost = currentCost; + } + + // assume we're done. + finished = true; + + for (unsigned i = 0; i < g.getNumNodes(); ++i) { + if (current.getSelection(i) == + (g.getNodeCosts(g.getNodeItr(i)).getLength() - 1)) { + current.setSelection(i, 0); + } + else { + current.setSelection(i, current.getSelection(i) + 1); + finished = false; + break; + } + } + + } + + optimal.setSolutionCost(bestCost); + + return optimal; + } + +}; + +class ExhaustiveSolver : public Solver { +public: + ~ExhaustiveSolver() {} + Solution solve(const SimpleGraph &g) const { + ExhaustiveSolverImpl solver(g); + return solver.solve(); + } +}; + +} + +#endif // LLVM_CODGEN_PBQP_EXHAUSTIVESOLVER_HPP diff --git a/lib/CodeGen/PBQP/GraphBase.h b/lib/CodeGen/PBQP/GraphBase.h new file mode 100644 index 0000000..cc3e017 --- /dev/null +++ b/lib/CodeGen/PBQP/GraphBase.h @@ -0,0 +1,582 @@ +//===-- GraphBase.h - Abstract Base PBQP Graph -----------------*- C++ --*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Base class for PBQP Graphs. +// +//===----------------------------------------------------------------------===// + + +#ifndef LLVM_CODEGEN_PBQP_GRAPHBASE_H +#define LLVM_CODEGEN_PBQP_GRAPHBASE_H + +#include "PBQPMath.h" + +#include <list> +#include <vector> + +namespace PBQP { + +// UGLY, but I'm not sure there's a good way around this: We need to be able to +// look up a Node's "adjacent edge list" structure type before the Node type is +// fully constructed. We can enable this by pushing the choice of data type +// out into this traits class. +template <typename Graph> +class NodeBaseTraits { + public: + typedef std::list<typename Graph::EdgeIterator> AdjEdgeList; + typedef typename AdjEdgeList::iterator AdjEdgeIterator; + typedef typename AdjEdgeList::const_iterator ConstAdjEdgeIterator; +}; + +/// \brief Base for concrete graph classes. Provides a basic set of graph +/// operations which are useful for PBQP solvers. +template <typename NodeEntry, typename EdgeEntry> +class GraphBase { +private: + + typedef GraphBase<NodeEntry, EdgeEntry> ThisGraphT; + + typedef std::list<NodeEntry> NodeList; + typedef std::list<EdgeEntry> EdgeList; + + NodeList nodeList; + unsigned nodeListSize; + + EdgeList edgeList; + unsigned edgeListSize; + + GraphBase(const ThisGraphT &other) { abort(); } + void operator=(const ThisGraphT &other) { abort(); } + +public: + + /// \brief Iterates over the nodes of a graph. + typedef typename NodeList::iterator NodeIterator; + /// \brief Iterates over the nodes of a const graph. + typedef typename NodeList::const_iterator ConstNodeIterator; + /// \brief Iterates over the edges of a graph. + typedef typename EdgeList::iterator EdgeIterator; + /// \brief Iterates over the edges of a const graph. + typedef typename EdgeList::const_iterator ConstEdgeIterator; + + /// \brief Iterates over the edges attached to a node. + typedef typename NodeBaseTraits<ThisGraphT>::AdjEdgeIterator + AdjEdgeIterator; + + /// \brief Iterates over the edges attached to a node in a const graph. + typedef typename NodeBaseTraits<ThisGraphT>::ConstAdjEdgeIterator + ConstAdjEdgeIterator; + +private: + + typedef std::vector<NodeIterator> IDToNodeMap; + + IDToNodeMap idToNodeMap; + bool nodeIDsValid; + + void invalidateNodeIDs() { + if (nodeIDsValid) { + idToNodeMap.clear(); + nodeIDsValid = false; + } + } + + template <typename ItrT> + bool iteratorInRange(ItrT itr, const ItrT &begin, const ItrT &end) { + for (ItrT t = begin; t != end; ++t) { + if (itr == t) + return true; + } + + return false; + } + +protected: + + GraphBase() : nodeListSize(0), edgeListSize(0), nodeIDsValid(false) {} + + NodeEntry& getNodeEntry(const NodeIterator &nodeItr) { return *nodeItr; } + const NodeEntry& getNodeEntry(const ConstNodeIterator &nodeItr) const { + return *nodeItr; + } + + EdgeEntry& getEdgeEntry(const EdgeIterator &edgeItr) { return *edgeItr; } + const EdgeEntry& getEdgeEntry(const ConstEdgeIterator &edgeItr) const { + return *edgeItr; + } + + NodeIterator addConstructedNode(const NodeEntry &nodeEntry) { + ++nodeListSize; + + invalidateNodeIDs(); + + NodeIterator newNodeItr = nodeList.insert(nodeList.end(), nodeEntry); + + return newNodeItr; + } + + EdgeIterator addConstructedEdge(const EdgeEntry &edgeEntry) { + + assert((findEdge(edgeEntry.getNode1Itr(), edgeEntry.getNode2Itr()) + == edgeList.end()) && "Attempt to add duplicate edge."); + + ++edgeListSize; + + // Add the edge to the graph. + EdgeIterator edgeItr = edgeList.insert(edgeList.end(), edgeEntry); + + // Get a reference to the version in the graph. + EdgeEntry &newEdgeEntry = getEdgeEntry(edgeItr); + + // Node entries: + NodeEntry &node1Entry = getNodeEntry(newEdgeEntry.getNode1Itr()), + &node2Entry = getNodeEntry(newEdgeEntry.getNode2Itr()); + + // Sanity check on matrix dimensions. + assert((node1Entry.getCosts().getLength() == + newEdgeEntry.getCosts().getRows()) && + (node2Entry.getCosts().getLength() == + newEdgeEntry.getCosts().getCols()) && + "Matrix dimensions do not match cost vector dimensions."); + + // Create links between nodes and edges. + newEdgeEntry.setNode1ThisEdgeItr( + node1Entry.addAdjEdge(edgeItr)); + newEdgeEntry.setNode2ThisEdgeItr( + node2Entry.addAdjEdge(edgeItr)); + + return edgeItr; + } + +public: + + /// \brief Returns the number of nodes in this graph. + unsigned getNumNodes() const { return nodeListSize; } + + /// \brief Returns the number of edges in this graph. + unsigned getNumEdges() const { return edgeListSize; } + + /// \brief Return the cost vector for the given node. + Vector& getNodeCosts(const NodeIterator &nodeItr) { + return getNodeEntry(nodeItr).getCosts(); + } + + /// \brief Return the cost vector for the give node. + const Vector& getNodeCosts(const ConstNodeIterator &nodeItr) const { + return getNodeEntry(nodeItr).getCosts(); + } + + /// \brief Return the degree of the given node. + unsigned getNodeDegree(const NodeIterator &nodeItr) const { + return getNodeEntry(nodeItr).getDegree(); + } + + /// \brief Assigns sequential IDs to the nodes, starting at 0, which + /// remain valid until the next addition or removal of a node. + void assignNodeIDs() { + unsigned curID = 0; + idToNodeMap.resize(getNumNodes()); + for (NodeIterator nodeItr = nodesBegin(), nodeEnd = nodesEnd(); + nodeItr != nodeEnd; ++nodeItr, ++curID) { + getNodeEntry(nodeItr).setID(curID); + idToNodeMap[curID] = nodeItr; + } + nodeIDsValid = true; + } + + /// \brief Assigns sequential IDs to the nodes using the ordering of the + /// given vector. + void assignNodeIDs(const std::vector<NodeIterator> &nodeOrdering) { + assert((getNumNodes() == nodeOrdering.size()) && + "Wrong number of nodes in node ordering."); + idToNodeMap = nodeOrdering; + for (unsigned nodeID = 0; nodeID < idToNodeMap.size(); ++nodeID) { + getNodeEntry(idToNodeMap[nodeID]).setID(nodeID); + } + nodeIDsValid = true; + } + + /// \brief Returns true if valid node IDs are assigned, false otherwise. + bool areNodeIDsValid() const { return nodeIDsValid; } + + /// \brief Return the numeric ID of the given node. + /// + /// Calls to this method will result in an assertion failure if there have + /// been any node additions or removals since the last call to + /// assignNodeIDs(). + unsigned getNodeID(const ConstNodeIterator &nodeItr) const { + assert(nodeIDsValid && "Attempt to retrieve invalid ID."); + return getNodeEntry(nodeItr).getID(); + } + + /// \brief Returns the iterator associated with the given node ID. + NodeIterator getNodeItr(unsigned nodeID) { + assert(nodeIDsValid && "Attempt to retrieve iterator with invalid ID."); + return idToNodeMap[nodeID]; + } + + /// \brief Returns the iterator associated with the given node ID. + ConstNodeIterator getNodeItr(unsigned nodeID) const { + assert(nodeIDsValid && "Attempt to retrieve iterator with invalid ID."); + return idToNodeMap[nodeID]; + } + + /// \brief Removes the given node (and all attached edges) from the graph. + void removeNode(const NodeIterator &nodeItr) { + assert(iteratorInRange(nodeItr, nodeList.begin(), nodeList.end()) && + "Iterator does not belong to this graph!"); + + invalidateNodeIDs(); + + NodeEntry &nodeEntry = getNodeEntry(nodeItr); + + // We need to copy this out because it will be destroyed as the edges are + // removed. + typedef std::vector<EdgeIterator> AdjEdgeList; + typedef typename AdjEdgeList::iterator AdjEdgeListItr; + + AdjEdgeList adjEdges; + adjEdges.reserve(nodeEntry.getDegree()); + std::copy(nodeEntry.adjEdgesBegin(), nodeEntry.adjEdgesEnd(), + std::back_inserter(adjEdges)); + + // Iterate over the copied out edges and remove them from the graph. + for (AdjEdgeListItr itr = adjEdges.begin(), end = adjEdges.end(); + itr != end; ++itr) { + removeEdge(*itr); + } + + // Erase the node from the nodelist. + nodeList.erase(nodeItr); + --nodeListSize; + } + + NodeIterator nodesBegin() { return nodeList.begin(); } + ConstNodeIterator nodesBegin() const { return nodeList.begin(); } + NodeIterator nodesEnd() { return nodeList.end(); } + ConstNodeIterator nodesEnd() const { return nodeList.end(); } + + AdjEdgeIterator adjEdgesBegin(const NodeIterator &nodeItr) { + return getNodeEntry(nodeItr).adjEdgesBegin(); + } + + ConstAdjEdgeIterator adjEdgesBegin(const ConstNodeIterator &nodeItr) const { + return getNodeEntry(nodeItr).adjEdgesBegin(); + } + + AdjEdgeIterator adjEdgesEnd(const NodeIterator &nodeItr) { + return getNodeEntry(nodeItr).adjEdgesEnd(); + } + + ConstAdjEdgeIterator adjEdgesEnd(const ConstNodeIterator &nodeItr) const { + getNodeEntry(nodeItr).adjEdgesEnd(); + } + + EdgeIterator findEdge(const NodeIterator &node1Itr, + const NodeIterator &node2Itr) { + + for (AdjEdgeIterator adjEdgeItr = adjEdgesBegin(node1Itr), + adjEdgeEnd = adjEdgesEnd(node1Itr); + adjEdgeItr != adjEdgeEnd; ++adjEdgeItr) { + if ((getEdgeNode1Itr(*adjEdgeItr) == node2Itr) || + (getEdgeNode2Itr(*adjEdgeItr) == node2Itr)) { + return *adjEdgeItr; + } + } + + return edgeList.end(); + } + + ConstEdgeIterator findEdge(const ConstNodeIterator &node1Itr, + const ConstNodeIterator &node2Itr) const { + + for (ConstAdjEdgeIterator adjEdgeItr = adjEdgesBegin(node1Itr), + adjEdgeEnd = adjEdgesEnd(node1Itr); + adjEdgeItr != adjEdgesEnd; ++adjEdgeItr) { + if ((getEdgeNode1Itr(*adjEdgeItr) == node2Itr) || + (getEdgeNode2Itr(*adjEdgeItr) == node2Itr)) { + return *adjEdgeItr; + } + } + + return edgeList.end(); + } + + Matrix& getEdgeCosts(const EdgeIterator &edgeItr) { + return getEdgeEntry(edgeItr).getCosts(); + } + + const Matrix& getEdgeCosts(const ConstEdgeIterator &edgeItr) const { + return getEdgeEntry(edgeItr).getCosts(); + } + + NodeIterator getEdgeNode1Itr(const EdgeIterator &edgeItr) { + return getEdgeEntry(edgeItr).getNode1Itr(); + } + + ConstNodeIterator getEdgeNode1Itr(const ConstEdgeIterator &edgeItr) const { + return getEdgeEntry(edgeItr).getNode1Itr(); + } + + NodeIterator getEdgeNode2Itr(const EdgeIterator &edgeItr) { + return getEdgeEntry(edgeItr).getNode2Itr(); + } + + ConstNodeIterator getEdgeNode2Itr(const ConstEdgeIterator &edgeItr) const { + return getEdgeEntry(edgeItr).getNode2Itr(); + } + + NodeIterator getEdgeOtherNode(const EdgeIterator &edgeItr, + const NodeIterator &nodeItr) { + + EdgeEntry &edgeEntry = getEdgeEntry(edgeItr); + if (nodeItr == edgeEntry.getNode1Itr()) { + return edgeEntry.getNode2Itr(); + } + //else + return edgeEntry.getNode1Itr(); + } + + ConstNodeIterator getEdgeOtherNode(const ConstEdgeIterator &edgeItr, + const ConstNodeIterator &nodeItr) const { + + const EdgeEntry &edgeEntry = getEdgeEntry(edgeItr); + if (nodeItr == edgeEntry.getNode1Itr()) { + return edgeEntry.getNode2Itr(); + } + //else + return edgeEntry.getNode1Itr(); + } + + void removeEdge(const EdgeIterator &edgeItr) { + assert(iteratorInRange(edgeItr, edgeList.begin(), edgeList.end()) && + "Iterator does not belong to this graph!"); + + --edgeListSize; + + // Get the edge entry. + EdgeEntry &edgeEntry = getEdgeEntry(edgeItr); + + // Get the nodes entry. + NodeEntry &node1Entry(getNodeEntry(edgeEntry.getNode1Itr())), + &node2Entry(getNodeEntry(edgeEntry.getNode2Itr())); + + // Disconnect the edge from the nodes. + node1Entry.removeAdjEdge(edgeEntry.getNode1ThisEdgeItr()); + node2Entry.removeAdjEdge(edgeEntry.getNode2ThisEdgeItr()); + + // Remove the edge from the graph. + edgeList.erase(edgeItr); + } + + EdgeIterator edgesBegin() { return edgeList.begin(); } + ConstEdgeIterator edgesBegin() const { return edgeList.begin(); } + EdgeIterator edgesEnd() { return edgeList.end(); } + ConstEdgeIterator edgesEnd() const { return edgeList.end(); } + + void clear() { + nodeList.clear(); + nodeListSize = 0; + edgeList.clear(); + edgeListSize = 0; + idToNodeMap.clear(); + } + + template <typename OStream> + void printDot(OStream &os) const { + + assert(areNodeIDsValid() && + "Cannot print a .dot of a graph unless IDs have been assigned."); + + os << "graph {\n"; + + for (ConstNodeIterator nodeItr = nodesBegin(), nodeEnd = nodesEnd(); + nodeItr != nodeEnd; ++nodeItr) { + + os << " node" << getNodeID(nodeItr) << " [ label=\"" + << getNodeID(nodeItr) << ": " << getNodeCosts(nodeItr) << "\" ]\n"; + } + + os << " edge [ len=" << getNumNodes() << " ]\n"; + + for (ConstEdgeIterator edgeItr = edgesBegin(), edgeEnd = edgesEnd(); + edgeItr != edgeEnd; ++edgeItr) { + + os << " node" << getNodeID(getEdgeNode1Itr(edgeItr)) + << " -- node" << getNodeID(getEdgeNode2Itr(edgeItr)) + << " [ label=\""; + + const Matrix &edgeCosts = getEdgeCosts(edgeItr); + + for (unsigned i = 0; i < edgeCosts.getRows(); ++i) { + os << edgeCosts.getRowAsVector(i) << "\\n"; + } + + os << "\" ]\n"; + } + + os << "}\n"; + } + + template <typename OStream> + void printDot(OStream &os) { + if (!areNodeIDsValid()) { + assignNodeIDs(); + } + + const_cast<const ThisGraphT*>(this)->printDot(os); + } + + template <typename OStream> + void dumpTo(OStream &os) const { + typedef ConstNodeIterator ConstNodeID; + + assert(areNodeIDsValid() && + "Cannot dump a graph unless IDs have been assigned."); + + for (ConstNodeIterator nItr = nodesBegin(), nEnd = nodesEnd(); + nItr != nEnd; ++nItr) { + os << getNodeID(nItr) << "\n"; + } + + unsigned edgeNumber = 1; + for (ConstEdgeIterator eItr = edgesBegin(), eEnd = edgesEnd(); + eItr != eEnd; ++eItr) { + + os << edgeNumber++ << ": { " + << getNodeID(getEdgeNode1Itr(eItr)) << ", " + << getNodeID(getEdgeNode2Itr(eItr)) << " }\n"; + } + + } + + template <typename OStream> + void dumpTo(OStream &os) { + if (!areNodeIDsValid()) { + assignNodeIDs(); + } + + const_cast<const ThisGraphT*>(this)->dumpTo(os); + } + +}; + +/// \brief Provides a base from which to derive nodes for GraphBase. +template <typename NodeImpl, typename EdgeImpl> +class NodeBase { +private: + + typedef GraphBase<NodeImpl, EdgeImpl> GraphBaseT; + typedef NodeBaseTraits<GraphBaseT> ThisNodeBaseTraits; + +public: + typedef typename GraphBaseT::EdgeIterator EdgeIterator; + +private: + typedef typename ThisNodeBaseTraits::AdjEdgeList AdjEdgeList; + + unsigned degree, id; + Vector costs; + AdjEdgeList adjEdges; + + void operator=(const NodeBase& other) { + assert(false && "Can't assign NodeEntrys."); + } + +public: + + typedef typename ThisNodeBaseTraits::AdjEdgeIterator AdjEdgeIterator; + typedef typename ThisNodeBaseTraits::ConstAdjEdgeIterator + ConstAdjEdgeIterator; + + NodeBase(const Vector &costs) : degree(0), costs(costs) { + assert((costs.getLength() > 0) && "Can't have zero-length cost vector."); + } + + Vector& getCosts() { return costs; } + const Vector& getCosts() const { return costs; } + + unsigned getDegree() const { return degree; } + + void setID(unsigned id) { this->id = id; } + unsigned getID() const { return id; } + + AdjEdgeIterator addAdjEdge(const EdgeIterator &edgeItr) { + ++degree; + return adjEdges.insert(adjEdges.end(), edgeItr); + } + + void removeAdjEdge(const AdjEdgeIterator &adjEdgeItr) { + --degree; + adjEdges.erase(adjEdgeItr); + } + + AdjEdgeIterator adjEdgesBegin() { return adjEdges.begin(); } + ConstAdjEdgeIterator adjEdgesBegin() const { return adjEdges.begin(); } + AdjEdgeIterator adjEdgesEnd() { return adjEdges.end(); } + ConstAdjEdgeIterator adjEdgesEnd() const { return adjEdges.end(); } + +}; + +template <typename NodeImpl, typename EdgeImpl> +class EdgeBase { +public: + typedef typename GraphBase<NodeImpl, EdgeImpl>::NodeIterator NodeIterator; + typedef typename GraphBase<NodeImpl, EdgeImpl>::EdgeIterator EdgeIterator; + + typedef typename NodeImpl::AdjEdgeIterator NodeAdjEdgeIterator; + +private: + + NodeIterator node1Itr, node2Itr; + NodeAdjEdgeIterator node1ThisEdgeItr, node2ThisEdgeItr; + Matrix costs; + + void operator=(const EdgeBase &other) { + assert(false && "Can't assign EdgeEntrys."); + } + +public: + + EdgeBase(const NodeIterator &node1Itr, const NodeIterator &node2Itr, + const Matrix &costs) : + node1Itr(node1Itr), node2Itr(node2Itr), costs(costs) { + + assert((costs.getRows() > 0) && (costs.getCols() > 0) && + "Can't have zero-dimensioned cost matrices"); + } + + Matrix& getCosts() { return costs; } + const Matrix& getCosts() const { return costs; } + + const NodeIterator& getNode1Itr() const { return node1Itr; } + const NodeIterator& getNode2Itr() const { return node2Itr; } + + void setNode1ThisEdgeItr(const NodeAdjEdgeIterator &node1ThisEdgeItr) { + this->node1ThisEdgeItr = node1ThisEdgeItr; + } + + const NodeAdjEdgeIterator& getNode1ThisEdgeItr() const { + return node1ThisEdgeItr; + } + + void setNode2ThisEdgeItr(const NodeAdjEdgeIterator &node2ThisEdgeItr) { + this->node2ThisEdgeItr = node2ThisEdgeItr; + } + + const NodeAdjEdgeIterator& getNode2ThisEdgeItr() const { + return node2ThisEdgeItr; + } + +}; + + +} + +#endif // LLVM_CODEGEN_PBQP_GRAPHBASE_HPP diff --git a/lib/CodeGen/PBQP/HeuristicSolver.h b/lib/CodeGen/PBQP/HeuristicSolver.h new file mode 100644 index 0000000..e786246 --- /dev/null +++ b/lib/CodeGen/PBQP/HeuristicSolver.h @@ -0,0 +1,789 @@ +//===-- HeuristicSolver.h - Heuristic PBQP Solver --------------*- C++ --*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Heuristic PBQP solver. This solver is able to perform optimal reductions for +// nodes of degree 0, 1 or 2. For nodes of degree >2 a plugable heuristic is +// used to to select a node for reduction. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_PBQP_HEURISTICSOLVER_H +#define LLVM_CODEGEN_PBQP_HEURISTICSOLVER_H + +#include "Solver.h" +#include "AnnotatedGraph.h" +#include "llvm/Support/raw_ostream.h" +#include <limits> + +namespace PBQP { + +/// \brief Important types for the HeuristicSolverImpl. +/// +/// Declared seperately to allow access to heuristic classes before the solver +/// is fully constructed. +template <typename HeuristicNodeData, typename HeuristicEdgeData> +class HSITypes { +public: + + class NodeData; + class EdgeData; + + typedef AnnotatedGraph<NodeData, EdgeData> SolverGraph; + typedef typename SolverGraph::NodeIterator GraphNodeIterator; + typedef typename SolverGraph::EdgeIterator GraphEdgeIterator; + typedef typename SolverGraph::AdjEdgeIterator GraphAdjEdgeIterator; + + typedef std::list<GraphNodeIterator> NodeList; + typedef typename NodeList::iterator NodeListIterator; + + typedef std::vector<GraphNodeIterator> NodeStack; + typedef typename NodeStack::iterator NodeStackIterator; + + class NodeData { + friend class EdgeData; + + private: + + typedef std::list<GraphEdgeIterator> LinksList; + + unsigned numLinks; + LinksList links, solvedLinks; + NodeListIterator bucketItr; + HeuristicNodeData heuristicData; + + public: + + typedef typename LinksList::iterator AdjLinkIterator; + + private: + + AdjLinkIterator addLink(const GraphEdgeIterator &edgeItr) { + ++numLinks; + return links.insert(links.end(), edgeItr); + } + + void delLink(const AdjLinkIterator &adjLinkItr) { + --numLinks; + links.erase(adjLinkItr); + } + + public: + + NodeData() : numLinks(0) {} + + unsigned getLinkDegree() const { return numLinks; } + + HeuristicNodeData& getHeuristicData() { return heuristicData; } + const HeuristicNodeData& getHeuristicData() const { + return heuristicData; + } + + void setBucketItr(const NodeListIterator &bucketItr) { + this->bucketItr = bucketItr; + } + + const NodeListIterator& getBucketItr() const { + return bucketItr; + } + + AdjLinkIterator adjLinksBegin() { + return links.begin(); + } + + AdjLinkIterator adjLinksEnd() { + return links.end(); + } + + void addSolvedLink(const GraphEdgeIterator &solvedLinkItr) { + solvedLinks.push_back(solvedLinkItr); + } + + AdjLinkIterator solvedLinksBegin() { + return solvedLinks.begin(); + } + + AdjLinkIterator solvedLinksEnd() { + return solvedLinks.end(); + } + + }; + + class EdgeData { + private: + + SolverGraph &g; + GraphNodeIterator node1Itr, node2Itr; + HeuristicEdgeData heuristicData; + typename NodeData::AdjLinkIterator node1ThisEdgeItr, node2ThisEdgeItr; + + public: + + EdgeData(SolverGraph &g) : g(g) {} + + HeuristicEdgeData& getHeuristicData() { return heuristicData; } + const HeuristicEdgeData& getHeuristicData() const { + return heuristicData; + } + + void setup(const GraphEdgeIterator &thisEdgeItr) { + node1Itr = g.getEdgeNode1Itr(thisEdgeItr); + node2Itr = g.getEdgeNode2Itr(thisEdgeItr); + + node1ThisEdgeItr = g.getNodeData(node1Itr).addLink(thisEdgeItr); + node2ThisEdgeItr = g.getNodeData(node2Itr).addLink(thisEdgeItr); + } + + void unlink() { + g.getNodeData(node1Itr).delLink(node1ThisEdgeItr); + g.getNodeData(node2Itr).delLink(node2ThisEdgeItr); + } + + }; + +}; + +template <typename Heuristic> +class HeuristicSolverImpl { +public: + // Typedefs to make life easier: + typedef HSITypes<typename Heuristic::NodeData, + typename Heuristic::EdgeData> HSIT; + typedef typename HSIT::SolverGraph SolverGraph; + typedef typename HSIT::NodeData NodeData; + typedef typename HSIT::EdgeData EdgeData; + typedef typename HSIT::GraphNodeIterator GraphNodeIterator; + typedef typename HSIT::GraphEdgeIterator GraphEdgeIterator; + typedef typename HSIT::GraphAdjEdgeIterator GraphAdjEdgeIterator; + + typedef typename HSIT::NodeList NodeList; + typedef typename HSIT::NodeListIterator NodeListIterator; + + typedef std::vector<GraphNodeIterator> NodeStack; + typedef typename NodeStack::iterator NodeStackIterator; + + /// \brief Constructor, which performs all the actual solver work. + HeuristicSolverImpl(const SimpleGraph &orig) : + solution(orig.getNumNodes(), true) + { + copyGraph(orig); + simplify(); + setup(); + computeSolution(); + computeSolutionCost(orig); + } + + /// \brief Returns the graph for this solver. + SolverGraph& getGraph() { return g; } + + /// \brief Return the solution found by this solver. + const Solution& getSolution() const { return solution; } + +private: + + /// \brief Add the given node to the appropriate bucket for its link + /// degree. + void addToBucket(const GraphNodeIterator &nodeItr) { + NodeData &nodeData = g.getNodeData(nodeItr); + + switch (nodeData.getLinkDegree()) { + case 0: nodeData.setBucketItr( + r0Bucket.insert(r0Bucket.end(), nodeItr)); + break; + case 1: nodeData.setBucketItr( + r1Bucket.insert(r1Bucket.end(), nodeItr)); + break; + case 2: nodeData.setBucketItr( + r2Bucket.insert(r2Bucket.end(), nodeItr)); + break; + default: heuristic.addToRNBucket(nodeItr); + break; + } + } + + /// \brief Remove the given node from the appropriate bucket for its link + /// degree. + void removeFromBucket(const GraphNodeIterator &nodeItr) { + NodeData &nodeData = g.getNodeData(nodeItr); + + switch (nodeData.getLinkDegree()) { + case 0: r0Bucket.erase(nodeData.getBucketItr()); break; + case 1: r1Bucket.erase(nodeData.getBucketItr()); break; + case 2: r2Bucket.erase(nodeData.getBucketItr()); break; + default: heuristic.removeFromRNBucket(nodeItr); break; + } + } + +public: + + /// \brief Add a link. + void addLink(const GraphEdgeIterator &edgeItr) { + g.getEdgeData(edgeItr).setup(edgeItr); + + if ((g.getNodeData(g.getEdgeNode1Itr(edgeItr)).getLinkDegree() > 2) || + (g.getNodeData(g.getEdgeNode2Itr(edgeItr)).getLinkDegree() > 2)) { + heuristic.handleAddLink(edgeItr); + } + } + + /// \brief Remove link, update info for node. + /// + /// Only updates information for the given node, since usually the other + /// is about to be removed. + void removeLink(const GraphEdgeIterator &edgeItr, + const GraphNodeIterator &nodeItr) { + + if (g.getNodeData(nodeItr).getLinkDegree() > 2) { + heuristic.handleRemoveLink(edgeItr, nodeItr); + } + g.getEdgeData(edgeItr).unlink(); + } + + /// \brief Remove link, update info for both nodes. Useful for R2 only. + void removeLinkR2(const GraphEdgeIterator &edgeItr) { + GraphNodeIterator node1Itr = g.getEdgeNode1Itr(edgeItr); + + if (g.getNodeData(node1Itr).getLinkDegree() > 2) { + heuristic.handleRemoveLink(edgeItr, node1Itr); + } + removeLink(edgeItr, g.getEdgeNode2Itr(edgeItr)); + } + + /// \brief Removes all links connected to the given node. + void unlinkNode(const GraphNodeIterator &nodeItr) { + NodeData &nodeData = g.getNodeData(nodeItr); + + typedef std::vector<GraphEdgeIterator> TempEdgeList; + + TempEdgeList edgesToUnlink; + edgesToUnlink.reserve(nodeData.getLinkDegree()); + + // Copy adj edges into a temp vector. We want to destroy them during + // the unlink, and we can't do that while we're iterating over them. + std::copy(nodeData.adjLinksBegin(), nodeData.adjLinksEnd(), + std::back_inserter(edgesToUnlink)); + + for (typename TempEdgeList::iterator + edgeItr = edgesToUnlink.begin(), edgeEnd = edgesToUnlink.end(); + edgeItr != edgeEnd; ++edgeItr) { + + GraphNodeIterator otherNode = g.getEdgeOtherNode(*edgeItr, nodeItr); + + removeFromBucket(otherNode); + removeLink(*edgeItr, otherNode); + addToBucket(otherNode); + } + } + + /// \brief Push the given node onto the stack to be solved with + /// backpropagation. + void pushStack(const GraphNodeIterator &nodeItr) { + stack.push_back(nodeItr); + } + + /// \brief Set the solution of the given node. + void setSolution(const GraphNodeIterator &nodeItr, unsigned solIndex) { + solution.setSelection(g.getNodeID(nodeItr), solIndex); + + for (GraphAdjEdgeIterator adjEdgeItr = g.adjEdgesBegin(nodeItr), + adjEdgeEnd = g.adjEdgesEnd(nodeItr); + adjEdgeItr != adjEdgeEnd; ++adjEdgeItr) { + GraphEdgeIterator edgeItr(*adjEdgeItr); + GraphNodeIterator adjNodeItr(g.getEdgeOtherNode(edgeItr, nodeItr)); + g.getNodeData(adjNodeItr).addSolvedLink(edgeItr); + } + } + +private: + + SolverGraph g; + Heuristic heuristic; + Solution solution; + + NodeList r0Bucket, + r1Bucket, + r2Bucket; + + NodeStack stack; + + // Copy the SimpleGraph into an annotated graph which we can use for reduction. + void copyGraph(const SimpleGraph &orig) { + + assert((g.getNumEdges() == 0) && (g.getNumNodes() == 0) && + "Graph should be empty prior to solver setup."); + + assert(orig.areNodeIDsValid() && + "Cannot copy from a graph with invalid node IDs."); + + std::vector<GraphNodeIterator> newNodeItrs; + + for (unsigned nodeID = 0; nodeID < orig.getNumNodes(); ++nodeID) { + newNodeItrs.push_back( + g.addNode(orig.getNodeCosts(orig.getNodeItr(nodeID)), NodeData())); + } + + for (SimpleGraph::ConstEdgeIterator + origEdgeItr = orig.edgesBegin(), origEdgeEnd = orig.edgesEnd(); + origEdgeItr != origEdgeEnd; ++origEdgeItr) { + + unsigned id1 = orig.getNodeID(orig.getEdgeNode1Itr(origEdgeItr)), + id2 = orig.getNodeID(orig.getEdgeNode2Itr(origEdgeItr)); + + g.addEdge(newNodeItrs[id1], newNodeItrs[id2], + orig.getEdgeCosts(origEdgeItr), EdgeData(g)); + } + + // Assign IDs to the new nodes using the ordering from the old graph, + // this will lead to nodes in the new graph getting the same ID as the + // corresponding node in the old graph. + g.assignNodeIDs(newNodeItrs); + } + + // Simplify the annotated graph by eliminating independent edges and trivial + // nodes. + void simplify() { + disconnectTrivialNodes(); + eliminateIndependentEdges(); + } + + // Eliminate trivial nodes. + void disconnectTrivialNodes() { + for (GraphNodeIterator nodeItr = g.nodesBegin(), nodeEnd = g.nodesEnd(); + nodeItr != nodeEnd; ++nodeItr) { + + if (g.getNodeCosts(nodeItr).getLength() == 1) { + + std::vector<GraphEdgeIterator> edgesToRemove; + + for (GraphAdjEdgeIterator adjEdgeItr = g.adjEdgesBegin(nodeItr), + adjEdgeEnd = g.adjEdgesEnd(nodeItr); + adjEdgeItr != adjEdgeEnd; ++adjEdgeItr) { + + GraphEdgeIterator edgeItr = *adjEdgeItr; + + if (g.getEdgeNode1Itr(edgeItr) == nodeItr) { + GraphNodeIterator otherNodeItr = g.getEdgeNode2Itr(edgeItr); + g.getNodeCosts(otherNodeItr) += + g.getEdgeCosts(edgeItr).getRowAsVector(0); + } + else { + GraphNodeIterator otherNodeItr = g.getEdgeNode1Itr(edgeItr); + g.getNodeCosts(otherNodeItr) += + g.getEdgeCosts(edgeItr).getColAsVector(0); + } + + edgesToRemove.push_back(edgeItr); + } + + while (!edgesToRemove.empty()) { + g.removeEdge(edgesToRemove.back()); + edgesToRemove.pop_back(); + } + } + } + } + + void eliminateIndependentEdges() { + std::vector<GraphEdgeIterator> edgesToProcess; + + for (GraphEdgeIterator edgeItr = g.edgesBegin(), edgeEnd = g.edgesEnd(); + edgeItr != edgeEnd; ++edgeItr) { + edgesToProcess.push_back(edgeItr); + } + + while (!edgesToProcess.empty()) { + tryToEliminateEdge(edgesToProcess.back()); + edgesToProcess.pop_back(); + } + } + + void tryToEliminateEdge(const GraphEdgeIterator &edgeItr) { + if (tryNormaliseEdgeMatrix(edgeItr)) { + g.removeEdge(edgeItr); + } + } + + bool tryNormaliseEdgeMatrix(const GraphEdgeIterator &edgeItr) { + + Matrix &edgeCosts = g.getEdgeCosts(edgeItr); + Vector &uCosts = g.getNodeCosts(g.getEdgeNode1Itr(edgeItr)), + &vCosts = g.getNodeCosts(g.getEdgeNode2Itr(edgeItr)); + + for (unsigned r = 0; r < edgeCosts.getRows(); ++r) { + PBQPNum rowMin = edgeCosts.getRowMin(r); + uCosts[r] += rowMin; + if (rowMin != std::numeric_limits<PBQPNum>::infinity()) { + edgeCosts.subFromRow(r, rowMin); + } + else { + edgeCosts.setRow(r, 0); + } + } + + for (unsigned c = 0; c < edgeCosts.getCols(); ++c) { + PBQPNum colMin = edgeCosts.getColMin(c); + vCosts[c] += colMin; + if (colMin != std::numeric_limits<PBQPNum>::infinity()) { + edgeCosts.subFromCol(c, colMin); + } + else { + edgeCosts.setCol(c, 0); + } + } + + return edgeCosts.isZero(); + } + + void setup() { + setupLinks(); + heuristic.initialise(*this); + setupBuckets(); + } + + void setupLinks() { + for (GraphEdgeIterator edgeItr = g.edgesBegin(), edgeEnd = g.edgesEnd(); + edgeItr != edgeEnd; ++edgeItr) { + g.getEdgeData(edgeItr).setup(edgeItr); + } + } + + void setupBuckets() { + for (GraphNodeIterator nodeItr = g.nodesBegin(), nodeEnd = g.nodesEnd(); + nodeItr != nodeEnd; ++nodeItr) { + addToBucket(nodeItr); + } + } + + void computeSolution() { + assert(g.areNodeIDsValid() && + "Nodes cannot be added/removed during reduction."); + + reduce(); + computeTrivialSolutions(); + backpropagate(); + } + + void printNode(const GraphNodeIterator &nodeItr) { + llvm::errs() << "Node " << g.getNodeID(nodeItr) << " (" << &*nodeItr << "):\n" + << " costs = " << g.getNodeCosts(nodeItr) << "\n" + << " link degree = " << g.getNodeData(nodeItr).getLinkDegree() << "\n" + << " links = [ "; + + for (typename HSIT::NodeData::AdjLinkIterator + aeItr = g.getNodeData(nodeItr).adjLinksBegin(), + aeEnd = g.getNodeData(nodeItr).adjLinksEnd(); + aeItr != aeEnd; ++aeItr) { + llvm::errs() << "(" << g.getNodeID(g.getEdgeNode1Itr(*aeItr)) + << ", " << g.getNodeID(g.getEdgeNode2Itr(*aeItr)) + << ") "; + } + llvm::errs() << "]\n"; + } + + void dumpState() { + llvm::errs() << "\n"; + + for (GraphNodeIterator nodeItr = g.nodesBegin(), nodeEnd = g.nodesEnd(); + nodeItr != nodeEnd; ++nodeItr) { + printNode(nodeItr); + } + + NodeList* buckets[] = { &r0Bucket, &r1Bucket, &r2Bucket }; + + for (unsigned b = 0; b < 3; ++b) { + NodeList &bucket = *buckets[b]; + + llvm::errs() << "Bucket " << b << ": [ "; + + for (NodeListIterator nItr = bucket.begin(), nEnd = bucket.end(); + nItr != nEnd; ++nItr) { + llvm::errs() << g.getNodeID(*nItr) << " "; + } + + llvm::errs() << "]\n"; + } + + llvm::errs() << "Stack: [ "; + for (NodeStackIterator nsItr = stack.begin(), nsEnd = stack.end(); + nsItr != nsEnd; ++nsItr) { + llvm::errs() << g.getNodeID(*nsItr) << " "; + } + llvm::errs() << "]\n"; + } + + void reduce() { + bool reductionFinished = r1Bucket.empty() && r2Bucket.empty() && + heuristic.rNBucketEmpty(); + + while (!reductionFinished) { + + if (!r1Bucket.empty()) { + processR1(); + } + else if (!r2Bucket.empty()) { + processR2(); + } + else if (!heuristic.rNBucketEmpty()) { + solution.setProvedOptimal(false); + solution.incRNReductions(); + heuristic.processRN(); + } + else reductionFinished = true; + } + + }; + + void processR1() { + + // Remove the first node in the R0 bucket: + GraphNodeIterator xNodeItr = r1Bucket.front(); + r1Bucket.pop_front(); + + solution.incR1Reductions(); + + //llvm::errs() << "Applying R1 to " << g.getNodeID(xNodeItr) << "\n"; + + assert((g.getNodeData(xNodeItr).getLinkDegree() == 1) && + "Node in R1 bucket has degree != 1"); + + GraphEdgeIterator edgeItr = *g.getNodeData(xNodeItr).adjLinksBegin(); + + const Matrix &edgeCosts = g.getEdgeCosts(edgeItr); + + const Vector &xCosts = g.getNodeCosts(xNodeItr); + unsigned xLen = xCosts.getLength(); + + // Duplicate a little code to avoid transposing matrices: + if (xNodeItr == g.getEdgeNode1Itr(edgeItr)) { + GraphNodeIterator yNodeItr = g.getEdgeNode2Itr(edgeItr); + Vector &yCosts = g.getNodeCosts(yNodeItr); + unsigned yLen = yCosts.getLength(); + + for (unsigned j = 0; j < yLen; ++j) { + PBQPNum min = edgeCosts[0][j] + xCosts[0]; + for (unsigned i = 1; i < xLen; ++i) { + PBQPNum c = edgeCosts[i][j] + xCosts[i]; + if (c < min) + min = c; + } + yCosts[j] += min; + } + } + else { + GraphNodeIterator yNodeItr = g.getEdgeNode1Itr(edgeItr); + Vector &yCosts = g.getNodeCosts(yNodeItr); + unsigned yLen = yCosts.getLength(); + + for (unsigned i = 0; i < yLen; ++i) { + PBQPNum min = edgeCosts[i][0] + xCosts[0]; + + for (unsigned j = 1; j < xLen; ++j) { + PBQPNum c = edgeCosts[i][j] + xCosts[j]; + if (c < min) + min = c; + } + yCosts[i] += min; + } + } + + unlinkNode(xNodeItr); + pushStack(xNodeItr); + } + + void processR2() { + + GraphNodeIterator xNodeItr = r2Bucket.front(); + r2Bucket.pop_front(); + + solution.incR2Reductions(); + + // Unlink is unsafe here. At some point it may optimistically more a node + // to a lower-degree list when its degree will later rise, or vice versa, + // violating the assumption that node degrees monotonically decrease + // during the reduction phase. Instead we'll bucket shuffle manually. + pushStack(xNodeItr); + + assert((g.getNodeData(xNodeItr).getLinkDegree() == 2) && + "Node in R2 bucket has degree != 2"); + + const Vector &xCosts = g.getNodeCosts(xNodeItr); + + typename NodeData::AdjLinkIterator tempItr = + g.getNodeData(xNodeItr).adjLinksBegin(); + + GraphEdgeIterator yxEdgeItr = *tempItr, + zxEdgeItr = *(++tempItr); + + GraphNodeIterator yNodeItr = g.getEdgeOtherNode(yxEdgeItr, xNodeItr), + zNodeItr = g.getEdgeOtherNode(zxEdgeItr, xNodeItr); + + removeFromBucket(yNodeItr); + removeFromBucket(zNodeItr); + + removeLink(yxEdgeItr, yNodeItr); + removeLink(zxEdgeItr, zNodeItr); + + // Graph some of the costs: + bool flipEdge1 = (g.getEdgeNode1Itr(yxEdgeItr) == xNodeItr), + flipEdge2 = (g.getEdgeNode1Itr(zxEdgeItr) == xNodeItr); + + const Matrix *yxCosts = flipEdge1 ? + new Matrix(g.getEdgeCosts(yxEdgeItr).transpose()) : + &g.getEdgeCosts(yxEdgeItr), + *zxCosts = flipEdge2 ? + new Matrix(g.getEdgeCosts(zxEdgeItr).transpose()) : + &g.getEdgeCosts(zxEdgeItr); + + unsigned xLen = xCosts.getLength(), + yLen = yxCosts->getRows(), + zLen = zxCosts->getRows(); + + // Compute delta: + Matrix delta(yLen, zLen); + + for (unsigned i = 0; i < yLen; ++i) { + for (unsigned j = 0; j < zLen; ++j) { + PBQPNum min = (*yxCosts)[i][0] + (*zxCosts)[j][0] + xCosts[0]; + for (unsigned k = 1; k < xLen; ++k) { + PBQPNum c = (*yxCosts)[i][k] + (*zxCosts)[j][k] + xCosts[k]; + if (c < min) { + min = c; + } + } + delta[i][j] = min; + } + } + + if (flipEdge1) + delete yxCosts; + + if (flipEdge2) + delete zxCosts; + + // Deal with the potentially induced yz edge. + GraphEdgeIterator yzEdgeItr = g.findEdge(yNodeItr, zNodeItr); + if (yzEdgeItr == g.edgesEnd()) { + yzEdgeItr = g.addEdge(yNodeItr, zNodeItr, delta, EdgeData(g)); + } + else { + // There was an edge, but we're going to screw with it. Delete the old + // link, update the costs. We'll re-link it later. + removeLinkR2(yzEdgeItr); + g.getEdgeCosts(yzEdgeItr) += + (yNodeItr == g.getEdgeNode1Itr(yzEdgeItr)) ? + delta : delta.transpose(); + } + + bool nullCostEdge = tryNormaliseEdgeMatrix(yzEdgeItr); + + // Nulled the edge, remove it entirely. + if (nullCostEdge) { + g.removeEdge(yzEdgeItr); + } + else { + // Edge remains - re-link it. + addLink(yzEdgeItr); + } + + addToBucket(yNodeItr); + addToBucket(zNodeItr); + } + + void computeTrivialSolutions() { + + for (NodeListIterator r0Itr = r0Bucket.begin(), r0End = r0Bucket.end(); + r0Itr != r0End; ++r0Itr) { + GraphNodeIterator nodeItr = *r0Itr; + + solution.incR0Reductions(); + setSolution(nodeItr, g.getNodeCosts(nodeItr).minIndex()); + } + + } + + void backpropagate() { + while (!stack.empty()) { + computeSolution(stack.back()); + stack.pop_back(); + } + } + + void computeSolution(const GraphNodeIterator &nodeItr) { + + NodeData &nodeData = g.getNodeData(nodeItr); + + Vector v(g.getNodeCosts(nodeItr)); + + // Solve based on existing links. + for (typename NodeData::AdjLinkIterator + solvedLinkItr = nodeData.solvedLinksBegin(), + solvedLinkEnd = nodeData.solvedLinksEnd(); + solvedLinkItr != solvedLinkEnd; ++solvedLinkItr) { + + GraphEdgeIterator solvedEdgeItr(*solvedLinkItr); + Matrix &edgeCosts = g.getEdgeCosts(solvedEdgeItr); + + if (nodeItr == g.getEdgeNode1Itr(solvedEdgeItr)) { + GraphNodeIterator adjNode(g.getEdgeNode2Itr(solvedEdgeItr)); + unsigned adjSolution = + solution.getSelection(g.getNodeID(adjNode)); + v += edgeCosts.getColAsVector(adjSolution); + } + else { + GraphNodeIterator adjNode(g.getEdgeNode1Itr(solvedEdgeItr)); + unsigned adjSolution = + solution.getSelection(g.getNodeID(adjNode)); + v += edgeCosts.getRowAsVector(adjSolution); + } + + } + + setSolution(nodeItr, v.minIndex()); + } + + void computeSolutionCost(const SimpleGraph &orig) { + PBQPNum cost = 0.0; + + for (SimpleGraph::ConstNodeIterator + nodeItr = orig.nodesBegin(), nodeEnd = orig.nodesEnd(); + nodeItr != nodeEnd; ++nodeItr) { + + unsigned nodeId = orig.getNodeID(nodeItr); + + cost += orig.getNodeCosts(nodeItr)[solution.getSelection(nodeId)]; + } + + for (SimpleGraph::ConstEdgeIterator + edgeItr = orig.edgesBegin(), edgeEnd = orig.edgesEnd(); + edgeItr != edgeEnd; ++edgeItr) { + + SimpleGraph::ConstNodeIterator n1 = orig.getEdgeNode1Itr(edgeItr), + n2 = orig.getEdgeNode2Itr(edgeItr); + unsigned sol1 = solution.getSelection(orig.getNodeID(n1)), + sol2 = solution.getSelection(orig.getNodeID(n2)); + + cost += orig.getEdgeCosts(edgeItr)[sol1][sol2]; + } + + solution.setSolutionCost(cost); + } + +}; + +template <typename Heuristic> +class HeuristicSolver : public Solver { +public: + Solution solve(const SimpleGraph &g) const { + HeuristicSolverImpl<Heuristic> solverImpl(g); + return solverImpl.getSolution(); + } +}; + +} + +#endif // LLVM_CODEGEN_PBQP_HEURISTICSOLVER_H diff --git a/lib/CodeGen/PBQP/Heuristics/Briggs.h b/lib/CodeGen/PBQP/Heuristics/Briggs.h new file mode 100644 index 0000000..3ac9e70 --- /dev/null +++ b/lib/CodeGen/PBQP/Heuristics/Briggs.h @@ -0,0 +1,383 @@ +//===-- Briggs.h --- Briggs Heuristic for PBQP -----------------*- C++ --*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class implements the Briggs test for "allocability" of nodes in a +// PBQP graph representing a register allocation problem. Nodes which can be +// proven allocable (by a safe and relatively accurate test) are removed from +// the PBQP graph first. If no provably allocable node is present in the graph +// then the node with the minimal spill-cost to degree ratio is removed. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_PBQP_HEURISTICS_BRIGGS_H +#define LLVM_CODEGEN_PBQP_HEURISTICS_BRIGGS_H + +#include "../HeuristicSolver.h" + +#include <set> + +namespace PBQP { +namespace Heuristics { + +class Briggs { + public: + + class NodeData; + class EdgeData; + + private: + + typedef HeuristicSolverImpl<Briggs> Solver; + typedef HSITypes<NodeData, EdgeData> HSIT; + typedef HSIT::SolverGraph SolverGraph; + typedef HSIT::GraphNodeIterator GraphNodeIterator; + typedef HSIT::GraphEdgeIterator GraphEdgeIterator; + + class LinkDegreeComparator { + public: + LinkDegreeComparator() : g(0) {} + LinkDegreeComparator(SolverGraph *g) : g(g) {} + + bool operator()(const GraphNodeIterator &node1Itr, + const GraphNodeIterator &node2Itr) const { + assert((g != 0) && "Graph object not set, cannot access node data."); + unsigned n1Degree = g->getNodeData(node1Itr).getLinkDegree(), + n2Degree = g->getNodeData(node2Itr).getLinkDegree(); + if (n1Degree > n2Degree) { + return true; + } + else if (n1Degree < n2Degree) { + return false; + } + // else they're "equal" by degree, differentiate based on ID. + return g->getNodeID(node1Itr) < g->getNodeID(node2Itr); + } + + private: + SolverGraph *g; + }; + + class SpillPriorityComparator { + public: + SpillPriorityComparator() : g(0) {} + SpillPriorityComparator(SolverGraph *g) : g(g) {} + + bool operator()(const GraphNodeIterator &node1Itr, + const GraphNodeIterator &node2Itr) const { + assert((g != 0) && "Graph object not set, cannot access node data."); + PBQPNum cost1 = + g->getNodeCosts(node1Itr)[0] / + g->getNodeData(node1Itr).getLinkDegree(), + cost2 = + g->getNodeCosts(node2Itr)[0] / + g->getNodeData(node2Itr).getLinkDegree(); + + if (cost1 < cost2) { + return true; + } + else if (cost1 > cost2) { + return false; + } + // else they'er "equal" again, differentiate based on address again. + return g->getNodeID(node1Itr) < g->getNodeID(node2Itr); + } + + private: + SolverGraph *g; + }; + + typedef std::set<GraphNodeIterator, LinkDegreeComparator> + RNAllocableNodeList; + typedef RNAllocableNodeList::iterator RNAllocableNodeListIterator; + + typedef std::set<GraphNodeIterator, SpillPriorityComparator> + RNUnallocableNodeList; + typedef RNUnallocableNodeList::iterator RNUnallocableNodeListIterator; + + public: + + class NodeData { + private: + RNAllocableNodeListIterator rNAllocableNodeListItr; + RNUnallocableNodeListIterator rNUnallocableNodeListItr; + unsigned numRegOptions, numDenied, numSafe; + std::vector<unsigned> unsafeDegrees; + bool allocable; + + void addRemoveLink(SolverGraph &g, const GraphNodeIterator &nodeItr, + const GraphEdgeIterator &edgeItr, bool add) { + + //assume we're adding... + unsigned udTarget = 0, dir = 1; + + if (!add) { + udTarget = 1; + dir = ~0; + } + + EdgeData &linkEdgeData = g.getEdgeData(edgeItr).getHeuristicData(); + + EdgeData::ConstUnsafeIterator edgeUnsafeBegin, edgeUnsafeEnd; + + if (nodeItr == g.getEdgeNode1Itr(edgeItr)) { + numDenied += (dir * linkEdgeData.getWorstDegree()); + edgeUnsafeBegin = linkEdgeData.unsafeBegin(); + edgeUnsafeEnd = linkEdgeData.unsafeEnd(); + } + else { + numDenied += (dir * linkEdgeData.getReverseWorstDegree()); + edgeUnsafeBegin = linkEdgeData.reverseUnsafeBegin(); + edgeUnsafeEnd = linkEdgeData.reverseUnsafeEnd(); + } + + assert((unsafeDegrees.size() == + static_cast<unsigned>( + std::distance(edgeUnsafeBegin, edgeUnsafeEnd))) + && "Unsafe array size mismatch."); + + std::vector<unsigned>::iterator unsafeDegreesItr = + unsafeDegrees.begin(); + + for (EdgeData::ConstUnsafeIterator edgeUnsafeItr = edgeUnsafeBegin; + edgeUnsafeItr != edgeUnsafeEnd; + ++edgeUnsafeItr, ++unsafeDegreesItr) { + + if ((*edgeUnsafeItr == 1) && (*unsafeDegreesItr == udTarget)) { + numSafe -= dir; + } + *unsafeDegreesItr += (dir * (*edgeUnsafeItr)); + } + + allocable = (numDenied < numRegOptions) || (numSafe > 0); + } + + public: + + void setup(SolverGraph &g, const GraphNodeIterator &nodeItr) { + + numRegOptions = g.getNodeCosts(nodeItr).getLength() - 1; + + numSafe = numRegOptions; // Optimistic, correct below. + numDenied = 0; // Also optimistic. + unsafeDegrees.resize(numRegOptions, 0); + + HSIT::NodeData &nodeData = g.getNodeData(nodeItr); + + for (HSIT::NodeData::AdjLinkIterator + adjLinkItr = nodeData.adjLinksBegin(), + adjLinkEnd = nodeData.adjLinksEnd(); + adjLinkItr != adjLinkEnd; ++adjLinkItr) { + + addRemoveLink(g, nodeItr, *adjLinkItr, true); + } + } + + bool isAllocable() const { return allocable; } + + void handleAddLink(SolverGraph &g, const GraphNodeIterator &nodeItr, + const GraphEdgeIterator &adjEdge) { + addRemoveLink(g, nodeItr, adjEdge, true); + } + + void handleRemoveLink(SolverGraph &g, const GraphNodeIterator &nodeItr, + const GraphEdgeIterator &adjEdge) { + addRemoveLink(g, nodeItr, adjEdge, false); + } + + void setRNAllocableNodeListItr( + const RNAllocableNodeListIterator &rNAllocableNodeListItr) { + + this->rNAllocableNodeListItr = rNAllocableNodeListItr; + } + + RNAllocableNodeListIterator getRNAllocableNodeListItr() const { + return rNAllocableNodeListItr; + } + + void setRNUnallocableNodeListItr( + const RNUnallocableNodeListIterator &rNUnallocableNodeListItr) { + + this->rNUnallocableNodeListItr = rNUnallocableNodeListItr; + } + + RNUnallocableNodeListIterator getRNUnallocableNodeListItr() const { + return rNUnallocableNodeListItr; + } + + + }; + + class EdgeData { + private: + + typedef std::vector<unsigned> UnsafeArray; + + unsigned worstDegree, + reverseWorstDegree; + UnsafeArray unsafe, reverseUnsafe; + + public: + + EdgeData() : worstDegree(0), reverseWorstDegree(0) {} + + typedef UnsafeArray::const_iterator ConstUnsafeIterator; + + void setup(SolverGraph &g, const GraphEdgeIterator &edgeItr) { + const Matrix &edgeCosts = g.getEdgeCosts(edgeItr); + unsigned numRegs = edgeCosts.getRows() - 1, + numReverseRegs = edgeCosts.getCols() - 1; + + unsafe.resize(numRegs, 0); + reverseUnsafe.resize(numReverseRegs, 0); + + std::vector<unsigned> rowInfCounts(numRegs, 0), + colInfCounts(numReverseRegs, 0); + + for (unsigned i = 0; i < numRegs; ++i) { + for (unsigned j = 0; j < numReverseRegs; ++j) { + if (edgeCosts[i + 1][j + 1] == + std::numeric_limits<PBQPNum>::infinity()) { + unsafe[i] = 1; + reverseUnsafe[j] = 1; + ++rowInfCounts[i]; + ++colInfCounts[j]; + + if (colInfCounts[j] > worstDegree) { + worstDegree = colInfCounts[j]; + } + + if (rowInfCounts[i] > reverseWorstDegree) { + reverseWorstDegree = rowInfCounts[i]; + } + } + } + } + } + + unsigned getWorstDegree() const { return worstDegree; } + unsigned getReverseWorstDegree() const { return reverseWorstDegree; } + ConstUnsafeIterator unsafeBegin() const { return unsafe.begin(); } + ConstUnsafeIterator unsafeEnd() const { return unsafe.end(); } + ConstUnsafeIterator reverseUnsafeBegin() const { + return reverseUnsafe.begin(); + } + ConstUnsafeIterator reverseUnsafeEnd() const { + return reverseUnsafe.end(); + } + }; + + void initialise(Solver &solver) { + this->s = &solver; + g = &s->getGraph(); + rNAllocableBucket = RNAllocableNodeList(LinkDegreeComparator(g)); + rNUnallocableBucket = + RNUnallocableNodeList(SpillPriorityComparator(g)); + + for (GraphEdgeIterator + edgeItr = g->edgesBegin(), edgeEnd = g->edgesEnd(); + edgeItr != edgeEnd; ++edgeItr) { + + g->getEdgeData(edgeItr).getHeuristicData().setup(*g, edgeItr); + } + + for (GraphNodeIterator + nodeItr = g->nodesBegin(), nodeEnd = g->nodesEnd(); + nodeItr != nodeEnd; ++nodeItr) { + + g->getNodeData(nodeItr).getHeuristicData().setup(*g, nodeItr); + } + } + + void addToRNBucket(const GraphNodeIterator &nodeItr) { + NodeData &nodeData = g->getNodeData(nodeItr).getHeuristicData(); + + if (nodeData.isAllocable()) { + nodeData.setRNAllocableNodeListItr( + rNAllocableBucket.insert(rNAllocableBucket.begin(), nodeItr)); + } + else { + nodeData.setRNUnallocableNodeListItr( + rNUnallocableBucket.insert(rNUnallocableBucket.begin(), nodeItr)); + } + } + + void removeFromRNBucket(const GraphNodeIterator &nodeItr) { + NodeData &nodeData = g->getNodeData(nodeItr).getHeuristicData(); + + if (nodeData.isAllocable()) { + rNAllocableBucket.erase(nodeData.getRNAllocableNodeListItr()); + } + else { + rNUnallocableBucket.erase(nodeData.getRNUnallocableNodeListItr()); + } + } + + void handleAddLink(const GraphEdgeIterator &edgeItr) { + // We assume that if we got here this edge is attached to at least + // one high degree node. + g->getEdgeData(edgeItr).getHeuristicData().setup(*g, edgeItr); + + GraphNodeIterator n1Itr = g->getEdgeNode1Itr(edgeItr), + n2Itr = g->getEdgeNode2Itr(edgeItr); + + HSIT::NodeData &n1Data = g->getNodeData(n1Itr), + &n2Data = g->getNodeData(n2Itr); + + if (n1Data.getLinkDegree() > 2) { + n1Data.getHeuristicData().handleAddLink(*g, n1Itr, edgeItr); + } + if (n2Data.getLinkDegree() > 2) { + n2Data.getHeuristicData().handleAddLink(*g, n2Itr, edgeItr); + } + } + + void handleRemoveLink(const GraphEdgeIterator &edgeItr, + const GraphNodeIterator &nodeItr) { + NodeData &nodeData = g->getNodeData(nodeItr).getHeuristicData(); + nodeData.handleRemoveLink(*g, nodeItr, edgeItr); + } + + void processRN() { + + if (!rNAllocableBucket.empty()) { + GraphNodeIterator selectedNodeItr = *rNAllocableBucket.begin(); + //std::cerr << "RN safely pushing " << g->getNodeID(selectedNodeItr) << "\n"; + rNAllocableBucket.erase(rNAllocableBucket.begin()); + s->pushStack(selectedNodeItr); + s->unlinkNode(selectedNodeItr); + } + else { + GraphNodeIterator selectedNodeItr = *rNUnallocableBucket.begin(); + //std::cerr << "RN optimistically pushing " << g->getNodeID(selectedNodeItr) << "\n"; + rNUnallocableBucket.erase(rNUnallocableBucket.begin()); + s->pushStack(selectedNodeItr); + s->unlinkNode(selectedNodeItr); + } + + } + + bool rNBucketEmpty() const { + return (rNAllocableBucket.empty() && rNUnallocableBucket.empty()); + } + +private: + + Solver *s; + SolverGraph *g; + RNAllocableNodeList rNAllocableBucket; + RNUnallocableNodeList rNUnallocableBucket; +}; + + + +} +} + + +#endif // LLVM_CODEGEN_PBQP_HEURISTICS_BRIGGS_H diff --git a/lib/CodeGen/PBQP/PBQPMath.h b/lib/CodeGen/PBQP/PBQPMath.h new file mode 100644 index 0000000..11f4b4b --- /dev/null +++ b/lib/CodeGen/PBQP/PBQPMath.h @@ -0,0 +1,288 @@ +//===-- PBQPMath.h - PBQP Vector and Matrix classes ------------*- C++ --*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_PBQP_PBQPMATH_H +#define LLVM_CODEGEN_PBQP_PBQPMATH_H + +#include <cassert> +#include <algorithm> +#include <functional> + +namespace PBQP { + +typedef double PBQPNum; + +/// \brief PBQP Vector class. +class Vector { + public: + + /// \brief Construct a PBQP vector of the given size. + explicit Vector(unsigned length) : + length(length), data(new PBQPNum[length]) { + } + + /// \brief Construct a PBQP vector with initializer. + Vector(unsigned length, PBQPNum initVal) : + length(length), data(new PBQPNum[length]) { + std::fill(data, data + length, initVal); + } + + /// \brief Copy construct a PBQP vector. + Vector(const Vector &v) : + length(v.length), data(new PBQPNum[length]) { + std::copy(v.data, v.data + length, data); + } + + /// \brief Destroy this vector, return its memory. + ~Vector() { delete[] data; } + + /// \brief Assignment operator. + Vector& operator=(const Vector &v) { + delete[] data; + length = v.length; + data = new PBQPNum[length]; + std::copy(v.data, v.data + length, data); + return *this; + } + + /// \brief Return the length of the vector + unsigned getLength() const { + return length; + } + + /// \brief Element access. + PBQPNum& operator[](unsigned index) { + assert(index < length && "Vector element access out of bounds."); + return data[index]; + } + + /// \brief Const element access. + const PBQPNum& operator[](unsigned index) const { + assert(index < length && "Vector element access out of bounds."); + return data[index]; + } + + /// \brief Add another vector to this one. + Vector& operator+=(const Vector &v) { + assert(length == v.length && "Vector length mismatch."); + std::transform(data, data + length, v.data, data, std::plus<PBQPNum>()); + return *this; + } + + /// \brief Subtract another vector from this one. + Vector& operator-=(const Vector &v) { + assert(length == v.length && "Vector length mismatch."); + std::transform(data, data + length, v.data, data, std::minus<PBQPNum>()); + return *this; + } + + /// \brief Returns the index of the minimum value in this vector + unsigned minIndex() const { + return std::min_element(data, data + length) - data; + } + + private: + unsigned length; + PBQPNum *data; +}; + +/// \brief Output a textual representation of the given vector on the given +/// output stream. +template <typename OStream> +OStream& operator<<(OStream &os, const Vector &v) { + assert((v.getLength() != 0) && "Zero-length vector badness."); + + os << "[ " << v[0]; + for (unsigned i = 1; i < v.getLength(); ++i) { + os << ", " << v[i]; + } + os << " ]"; + + return os; +} + + +/// \brief PBQP Matrix class +class Matrix { + public: + + /// \brief Construct a PBQP Matrix with the given dimensions. + Matrix(unsigned rows, unsigned cols) : + rows(rows), cols(cols), data(new PBQPNum[rows * cols]) { + } + + /// \brief Construct a PBQP Matrix with the given dimensions and initial + /// value. + Matrix(unsigned rows, unsigned cols, PBQPNum initVal) : + rows(rows), cols(cols), data(new PBQPNum[rows * cols]) { + std::fill(data, data + (rows * cols), initVal); + } + + /// \brief Copy construct a PBQP matrix. + Matrix(const Matrix &m) : + rows(m.rows), cols(m.cols), data(new PBQPNum[rows * cols]) { + std::copy(m.data, m.data + (rows * cols), data); + } + + /// \brief Destroy this matrix, return its memory. + ~Matrix() { delete[] data; } + + /// \brief Assignment operator. + Matrix& operator=(const Matrix &m) { + delete[] data; + rows = m.rows; cols = m.cols; + data = new PBQPNum[rows * cols]; + std::copy(m.data, m.data + (rows * cols), data); + return *this; + } + + /// \brief Return the number of rows in this matrix. + unsigned getRows() const { return rows; } + + /// \brief Return the number of cols in this matrix. + unsigned getCols() const { return cols; } + + /// \brief Matrix element access. + PBQPNum* operator[](unsigned r) { + assert(r < rows && "Row out of bounds."); + return data + (r * cols); + } + + /// \brief Matrix element access. + const PBQPNum* operator[](unsigned r) const { + assert(r < rows && "Row out of bounds."); + return data + (r * cols); + } + + /// \brief Returns the given row as a vector. + Vector getRowAsVector(unsigned r) const { + Vector v(cols); + for (unsigned c = 0; c < cols; ++c) + v[c] = (*this)[r][c]; + return v; + } + + /// \brief Returns the given column as a vector. + Vector getColAsVector(unsigned c) const { + Vector v(rows); + for (unsigned r = 0; r < rows; ++r) + v[r] = (*this)[r][c]; + return v; + } + + /// \brief Reset the matrix to the given value. + Matrix& reset(PBQPNum val = 0) { + std::fill(data, data + (rows * cols), val); + return *this; + } + + /// \brief Set a single row of this matrix to the given value. + Matrix& setRow(unsigned r, PBQPNum val) { + assert(r < rows && "Row out of bounds."); + std::fill(data + (r * cols), data + ((r + 1) * cols), val); + return *this; + } + + /// \brief Set a single column of this matrix to the given value. + Matrix& setCol(unsigned c, PBQPNum val) { + assert(c < cols && "Column out of bounds."); + for (unsigned r = 0; r < rows; ++r) + (*this)[r][c] = val; + return *this; + } + + /// \brief Matrix transpose. + Matrix transpose() const { + Matrix m(cols, rows); + for (unsigned r = 0; r < rows; ++r) + for (unsigned c = 0; c < cols; ++c) + m[c][r] = (*this)[r][c]; + return m; + } + + /// \brief Returns the diagonal of the matrix as a vector. + /// + /// Matrix must be square. + Vector diagonalize() const { + assert(rows == cols && "Attempt to diagonalize non-square matrix."); + + Vector v(rows); + for (unsigned r = 0; r < rows; ++r) + v[r] = (*this)[r][r]; + return v; + } + + /// \brief Add the given matrix to this one. + Matrix& operator+=(const Matrix &m) { + assert(rows == m.rows && cols == m.cols && + "Matrix dimensions mismatch."); + std::transform(data, data + (rows * cols), m.data, data, + std::plus<PBQPNum>()); + return *this; + } + + /// \brief Returns the minimum of the given row + PBQPNum getRowMin(unsigned r) const { + assert(r < rows && "Row out of bounds"); + return *std::min_element(data + (r * cols), data + ((r + 1) * cols)); + } + + /// \brief Returns the minimum of the given column + PBQPNum getColMin(unsigned c) const { + PBQPNum minElem = (*this)[0][c]; + for (unsigned r = 1; r < rows; ++r) + if ((*this)[r][c] < minElem) minElem = (*this)[r][c]; + return minElem; + } + + /// \brief Subtracts the given scalar from the elements of the given row. + Matrix& subFromRow(unsigned r, PBQPNum val) { + assert(r < rows && "Row out of bounds"); + std::transform(data + (r * cols), data + ((r + 1) * cols), + data + (r * cols), + std::bind2nd(std::minus<PBQPNum>(), val)); + return *this; + } + + /// \brief Subtracts the given scalar from the elements of the given column. + Matrix& subFromCol(unsigned c, PBQPNum val) { + for (unsigned r = 0; r < rows; ++r) + (*this)[r][c] -= val; + return *this; + } + + /// \brief Returns true if this is a zero matrix. + bool isZero() const { + return find_if(data, data + (rows * cols), + std::bind2nd(std::not_equal_to<PBQPNum>(), 0)) == + data + (rows * cols); + } + + private: + unsigned rows, cols; + PBQPNum *data; +}; + +/// \brief Output a textual representation of the given matrix on the given +/// output stream. +template <typename OStream> +OStream& operator<<(OStream &os, const Matrix &m) { + + assert((m.getRows() != 0) && "Zero-row matrix badness."); + + for (unsigned i = 0; i < m.getRows(); ++i) { + os << m.getRowAsVector(i); + } + + return os; +} + +} + +#endif // LLVM_CODEGEN_PBQP_PBQPMATH_HPP diff --git a/lib/CodeGen/PBQP/SimpleGraph.h b/lib/CodeGen/PBQP/SimpleGraph.h new file mode 100644 index 0000000..1ca9cae --- /dev/null +++ b/lib/CodeGen/PBQP/SimpleGraph.h @@ -0,0 +1,100 @@ +//===-- SimpleGraph.h - Simple PBQP Graph ----------------------*- C++ --*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Simple PBQP graph class representing a PBQP problem. Graphs of this type +// can be passed to a PBQPSolver instance to solve the PBQP problem. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_PBQP_SIMPLEGRAPH_H +#define LLVM_CODEGEN_PBQP_SIMPLEGRAPH_H + +#include "GraphBase.h" + +namespace PBQP { + +class SimpleEdge; + +class SimpleNode : public NodeBase<SimpleNode, SimpleEdge> { +public: + SimpleNode(const Vector &costs) : + NodeBase<SimpleNode, SimpleEdge>(costs) {} +}; + +class SimpleEdge : public EdgeBase<SimpleNode, SimpleEdge> { +public: + SimpleEdge(const NodeIterator &node1Itr, const NodeIterator &node2Itr, + const Matrix &costs) : + EdgeBase<SimpleNode, SimpleEdge>(node1Itr, node2Itr, costs) {} +}; + +class SimpleGraph : public GraphBase<SimpleNode, SimpleEdge> { +private: + + typedef GraphBase<SimpleNode, SimpleEdge> PGraph; + + void copyFrom(const SimpleGraph &other) { + assert(other.areNodeIDsValid() && + "Cannot copy from another graph unless IDs have been assigned."); + + std::vector<NodeIterator> newNodeItrs(other.getNumNodes()); + + for (ConstNodeIterator nItr = other.nodesBegin(), nEnd = other.nodesEnd(); + nItr != nEnd; ++nItr) { + newNodeItrs[other.getNodeID(nItr)] = addNode(other.getNodeCosts(nItr)); + } + + for (ConstEdgeIterator eItr = other.edgesBegin(), eEnd = other.edgesEnd(); + eItr != eEnd; ++eItr) { + + unsigned node1ID = other.getNodeID(other.getEdgeNode1Itr(eItr)), + node2ID = other.getNodeID(other.getEdgeNode2Itr(eItr)); + + addEdge(newNodeItrs[node1ID], newNodeItrs[node2ID], + other.getEdgeCosts(eItr)); + } + } + + void copyFrom(SimpleGraph &other) { + if (!other.areNodeIDsValid()) { + other.assignNodeIDs(); + } + copyFrom(const_cast<const SimpleGraph&>(other)); + } + +public: + + SimpleGraph() {} + + + SimpleGraph(const SimpleGraph &other) : PGraph() { + copyFrom(other); + } + + SimpleGraph& operator=(const SimpleGraph &other) { + clear(); + copyFrom(other); + return *this; + } + + NodeIterator addNode(const Vector &costs) { + return PGraph::addConstructedNode(SimpleNode(costs)); + } + + EdgeIterator addEdge(const NodeIterator &node1Itr, + const NodeIterator &node2Itr, + const Matrix &costs) { + return PGraph::addConstructedEdge(SimpleEdge(node1Itr, node2Itr, costs)); + } + +}; + +} + +#endif // LLVM_CODEGEN_PBQP_SIMPLEGRAPH_H diff --git a/lib/CodeGen/PBQP/Solution.h b/lib/CodeGen/PBQP/Solution.h new file mode 100644 index 0000000..c91e2fa --- /dev/null +++ b/lib/CodeGen/PBQP/Solution.h @@ -0,0 +1,88 @@ +//===-- Solution.h ------- PBQP Solution -----------------------*- C++ --*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Annotated PBQP Graph class. This class is used internally by the PBQP solver +// to cache information to speed up reduction. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_PBQP_SOLUTION_H +#define LLVM_CODEGEN_PBQP_SOLUTION_H + +#include "PBQPMath.h" + +namespace PBQP { + +class Solution { + + friend class SolverImplementation; + +private: + + std::vector<unsigned> selections; + PBQPNum solutionCost; + bool provedOptimal; + unsigned r0Reductions, r1Reductions, + r2Reductions, rNReductions; + +public: + + Solution() : + solutionCost(0.0), provedOptimal(false), + r0Reductions(0), r1Reductions(0), r2Reductions(0), rNReductions(0) {} + + Solution(unsigned length, bool assumeOptimal) : + selections(length), solutionCost(0.0), provedOptimal(assumeOptimal), + r0Reductions(0), r1Reductions(0), r2Reductions(0), rNReductions(0) {} + + void setProvedOptimal(bool provedOptimal) { + this->provedOptimal = provedOptimal; + } + + void setSelection(unsigned nodeID, unsigned selection) { + selections[nodeID] = selection; + } + + void setSolutionCost(PBQPNum solutionCost) { + this->solutionCost = solutionCost; + } + + void incR0Reductions() { ++r0Reductions; } + void incR1Reductions() { ++r1Reductions; } + void incR2Reductions() { ++r2Reductions; } + void incRNReductions() { ++rNReductions; } + + unsigned numNodes() const { return selections.size(); } + + unsigned getSelection(unsigned nodeID) const { + return selections[nodeID]; + } + + PBQPNum getCost() const { return solutionCost; } + + bool isProvedOptimal() const { return provedOptimal; } + + unsigned getR0Reductions() const { return r0Reductions; } + unsigned getR1Reductions() const { return r1Reductions; } + unsigned getR2Reductions() const { return r2Reductions; } + unsigned getRNReductions() const { return rNReductions; } + + bool operator==(const Solution &other) const { + return (selections == other.selections); + } + + bool operator!=(const Solution &other) const { + return !(*this == other); + } + +}; + +} + +#endif // LLVM_CODEGEN_PBQP_SOLUTION_H diff --git a/lib/CodeGen/PBQP/Solver.h b/lib/CodeGen/PBQP/Solver.h new file mode 100644 index 0000000..a9c5f83 --- /dev/null +++ b/lib/CodeGen/PBQP/Solver.h @@ -0,0 +1,31 @@ +//===-- Solver.h ------- PBQP solver interface -----------------*- C++ --*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + + +#ifndef LLVM_CODEGEN_PBQP_SOLVER_H +#define LLVM_CODEGEN_PBQP_SOLVER_H + +#include "SimpleGraph.h" +#include "Solution.h" + +namespace PBQP { + +/// \brief Interface for solver classes. +class Solver { +public: + + virtual ~Solver() = 0; + virtual Solution solve(const SimpleGraph &orig) const = 0; +}; + +Solver::~Solver() {} + +} + +#endif // LLVM_CODEGEN_PBQP_SOLVER_H diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp index c5c76fc..8071b0a 100644 --- a/lib/CodeGen/PHIElimination.cpp +++ b/lib/CodeGen/PHIElimination.cpp @@ -14,6 +14,7 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "phielim" +#include "PHIElimination.h" #include "llvm/BasicBlock.h" #include "llvm/Instructions.h" #include "llvm/CodeGen/LiveVariables.h" @@ -22,7 +23,6 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/STLExtras.h" @@ -34,78 +34,25 @@ using namespace llvm; STATISTIC(NumAtomic, "Number of atomic phis lowered"); -namespace { - class VISIBILITY_HIDDEN PNE : public MachineFunctionPass { - MachineRegisterInfo *MRI; // Machine register information - - public: - static char ID; // Pass identification, replacement for typeid - PNE() : MachineFunctionPass(&ID) {} - - virtual bool runOnMachineFunction(MachineFunction &Fn); - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addPreserved<LiveVariables>(); - AU.addPreservedID(MachineLoopInfoID); - AU.addPreservedID(MachineDominatorsID); - MachineFunctionPass::getAnalysisUsage(AU); - } - - private: - /// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions - /// in predecessor basic blocks. - /// - bool EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB); - void LowerAtomicPHINode(MachineBasicBlock &MBB, - MachineBasicBlock::iterator AfterPHIsIt); - - /// analyzePHINodes - Gather information about the PHI nodes in - /// here. In particular, we want to map the number of uses of a virtual - /// register which is used in a PHI node. We map that to the BB the - /// vreg is coming from. This is used later to determine when the vreg - /// is killed in the BB. - /// - void analyzePHINodes(const MachineFunction& Fn); - - // FindCopyInsertPoint - Find a safe place in MBB to insert a copy from - // SrcReg. This needs to be after any def or uses of SrcReg, but before - // any subsequent point where control flow might jump out of the basic - // block. - MachineBasicBlock::iterator FindCopyInsertPoint(MachineBasicBlock &MBB, - unsigned SrcReg); - - // SkipPHIsAndLabels - Copies need to be inserted after phi nodes and - // also after any exception handling labels: in landing pads execution - // starts at the label, so any copies placed before it won't be executed! - MachineBasicBlock::iterator SkipPHIsAndLabels(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) { - // Rather than assuming that EH labels come before other kinds of labels, - // just skip all labels. - while (I != MBB.end() && - (I->getOpcode() == TargetInstrInfo::PHI || I->isLabel())) - ++I; - return I; - } - - typedef std::pair<const MachineBasicBlock*, unsigned> BBVRegPair; - typedef std::map<BBVRegPair, unsigned> VRegPHIUse; - - VRegPHIUse VRegPHIUseCount; - - // Defs of PHI sources which are implicit_def. - SmallPtrSet<MachineInstr*, 4> ImpDefs; - }; -} - -char PNE::ID = 0; -static RegisterPass<PNE> +char PHIElimination::ID = 0; +static RegisterPass<PHIElimination> X("phi-node-elimination", "Eliminate PHI nodes for register allocation"); const PassInfo *const llvm::PHIEliminationID = &X; -bool PNE::runOnMachineFunction(MachineFunction &Fn) { +void llvm::PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addPreserved<LiveVariables>(); + AU.addPreservedID(MachineLoopInfoID); + AU.addPreservedID(MachineDominatorsID); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool llvm::PHIElimination::runOnMachineFunction(MachineFunction &Fn) { MRI = &Fn.getRegInfo(); + PHIDefs.clear(); + PHIKills.clear(); analyzePHINodes(Fn); bool Changed = false; @@ -132,7 +79,8 @@ bool PNE::runOnMachineFunction(MachineFunction &Fn) { /// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions in /// predecessor basic blocks. /// -bool PNE::EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB) { +bool llvm::PHIElimination::EliminatePHINodes(MachineFunction &MF, + MachineBasicBlock &MBB) { if (MBB.empty() || MBB.front().getOpcode() != TargetInstrInfo::PHI) return false; // Quick exit for basic blocks without PHIs. @@ -162,8 +110,9 @@ static bool isSourceDefinedByImplicitDef(const MachineInstr *MPhi, // FindCopyInsertPoint - Find a safe place in MBB to insert a copy from SrcReg. // This needs to be after any def or uses of SrcReg, but before any subsequent // point where control flow might jump out of the basic block. -MachineBasicBlock::iterator PNE::FindCopyInsertPoint(MachineBasicBlock &MBB, - unsigned SrcReg) { +MachineBasicBlock::iterator +llvm::PHIElimination::FindCopyInsertPoint(MachineBasicBlock &MBB, + unsigned SrcReg) { // Handle the trivial case trivially. if (MBB.empty()) return MBB.begin(); @@ -206,9 +155,10 @@ MachineBasicBlock::iterator PNE::FindCopyInsertPoint(MachineBasicBlock &MBB, /// under the assuption that it needs to be lowered in a way that supports /// atomic execution of PHIs. This lowering method is always correct all of the /// time. -/// -void PNE::LowerAtomicPHINode(MachineBasicBlock &MBB, - MachineBasicBlock::iterator AfterPHIsIt) { +/// +void llvm::PHIElimination::LowerAtomicPHINode( + MachineBasicBlock &MBB, + MachineBasicBlock::iterator AfterPHIsIt) { // Unlink the PHI node from the basic block, but don't delete the PHI yet. MachineInstr *MPhi = MBB.remove(MBB.begin()); @@ -235,6 +185,10 @@ void PNE::LowerAtomicPHINode(MachineBasicBlock &MBB, TII->copyRegToReg(MBB, AfterPHIsIt, DestReg, IncomingReg, RC, RC); } + // Record PHI def. + assert(!hasPHIDef(DestReg) && "Vreg has multiple phi-defs?"); + PHIDefs[DestReg] = &MBB; + // Update live variable information if there is any. LiveVariables *LV = getAnalysisIfAvailable<LiveVariables>(); if (LV) { @@ -276,6 +230,13 @@ void PNE::LowerAtomicPHINode(MachineBasicBlock &MBB, assert(TargetRegisterInfo::isVirtualRegister(SrcReg) && "Machine PHI Operands must all be virtual registers!"); + // Get the MachineBasicBlock equivalent of the BasicBlock that is the source + // path the PHI. + MachineBasicBlock &opBlock = *MPhi->getOperand(i*2+2).getMBB(); + + // Record the kill. + PHIKills[SrcReg].insert(&opBlock); + // If source is defined by an implicit def, there is no need to insert a // copy. MachineInstr *DefMI = MRI->getVRegDef(SrcReg); @@ -284,10 +245,6 @@ void PNE::LowerAtomicPHINode(MachineBasicBlock &MBB, continue; } - // Get the MachineBasicBlock equivalent of the BasicBlock that is the source - // path the PHI. - MachineBasicBlock &opBlock = *MPhi->getOperand(i*2+2).getMBB(); - // Check to make sure we haven't already emitted the copy for this block. // This can happen because PHI nodes may have multiple entries for the same // basic block. @@ -420,7 +377,7 @@ void PNE::LowerAtomicPHINode(MachineBasicBlock &MBB, /// used in a PHI node. We map that to the BB the vreg is coming from. This is /// used later to determine when the vreg is killed in the BB. /// -void PNE::analyzePHINodes(const MachineFunction& Fn) { +void llvm::PHIElimination::analyzePHINodes(const MachineFunction& Fn) { for (MachineFunction::const_iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) for (MachineBasicBlock::const_iterator BBI = I->begin(), BBE = I->end(); diff --git a/lib/CodeGen/PHIElimination.h b/lib/CodeGen/PHIElimination.h new file mode 100644 index 0000000..3d02dfd --- /dev/null +++ b/lib/CodeGen/PHIElimination.h @@ -0,0 +1,125 @@ +//===-- lib/CodeGen/PHIElimination.h ----------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_PHIELIMINATION_HPP +#define LLVM_CODEGEN_PHIELIMINATION_HPP + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/Target/TargetInstrInfo.h" + +#include <map> + +namespace llvm { + + /// Lower PHI instructions to copies. + class PHIElimination : public MachineFunctionPass { + MachineRegisterInfo *MRI; // Machine register information + private: + + typedef SmallSet<MachineBasicBlock*, 4> PHIKillList; + typedef DenseMap<unsigned, PHIKillList> PHIKillMap; + typedef DenseMap<unsigned, MachineBasicBlock*> PHIDefMap; + + public: + + typedef PHIKillList::iterator phi_kill_iterator; + typedef PHIKillList::const_iterator const_phi_kill_iterator; + + static char ID; // Pass identification, replacement for typeid + PHIElimination() : MachineFunctionPass(&ID) {} + + virtual bool runOnMachineFunction(MachineFunction &Fn); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + + /// Return true if the given vreg was defined by a PHI intsr prior to + /// lowering. + bool hasPHIDef(unsigned vreg) const { + return PHIDefs.count(vreg); + } + + /// Returns the block in which the PHI instruction which defined the + /// given vreg used to reside. + MachineBasicBlock* getPHIDefBlock(unsigned vreg) { + PHIDefMap::iterator phiDefItr = PHIDefs.find(vreg); + assert(phiDefItr != PHIDefs.end() && "vreg has no phi-def."); + return phiDefItr->second; + } + + /// Returns true if the given vreg was killed by a PHI instr. + bool hasPHIKills(unsigned vreg) const { + return PHIKills.count(vreg); + } + + /// Returns an iterator over the BasicBlocks which contained PHI + /// kills of this register prior to lowering. + phi_kill_iterator phiKillsBegin(unsigned vreg) { + PHIKillMap::iterator phiKillItr = PHIKills.find(vreg); + assert(phiKillItr != PHIKills.end() && "vreg has no phi-kills."); + return phiKillItr->second.begin(); + } + phi_kill_iterator phiKillsEnd(unsigned vreg) { + PHIKillMap::iterator phiKillItr = PHIKills.find(vreg); + assert(phiKillItr != PHIKills.end() && "vreg has no phi-kills."); + return phiKillItr->second.end(); + } + + private: + /// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions + /// in predecessor basic blocks. + /// + bool EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB); + void LowerAtomicPHINode(MachineBasicBlock &MBB, + MachineBasicBlock::iterator AfterPHIsIt); + + /// analyzePHINodes - Gather information about the PHI nodes in + /// here. In particular, we want to map the number of uses of a virtual + /// register which is used in a PHI node. We map that to the BB the + /// vreg is coming from. This is used later to determine when the vreg + /// is killed in the BB. + /// + void analyzePHINodes(const MachineFunction& Fn); + + // FindCopyInsertPoint - Find a safe place in MBB to insert a copy from + // SrcReg. This needs to be after any def or uses of SrcReg, but before + // any subsequent point where control flow might jump out of the basic + // block. + MachineBasicBlock::iterator FindCopyInsertPoint(MachineBasicBlock &MBB, + unsigned SrcReg); + + // SkipPHIsAndLabels - Copies need to be inserted after phi nodes and + // also after any exception handling labels: in landing pads execution + // starts at the label, so any copies placed before it won't be executed! + MachineBasicBlock::iterator SkipPHIsAndLabels(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) { + // Rather than assuming that EH labels come before other kinds of labels, + // just skip all labels. + while (I != MBB.end() && + (I->getOpcode() == TargetInstrInfo::PHI || I->isLabel())) + ++I; + return I; + } + + typedef std::pair<const MachineBasicBlock*, unsigned> BBVRegPair; + typedef std::map<BBVRegPair, unsigned> VRegPHIUse; + + VRegPHIUse VRegPHIUseCount; + PHIDefMap PHIDefs; + PHIKillMap PHIKills; + + // Defs of PHI sources which are implicit_def. + SmallPtrSet<MachineInstr*, 4> ImpDefs; + }; + +} + +#endif /* LLVM_CODEGEN_PHIELIMINATION_HPP */ diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp index de774685..e52158c 100644 --- a/lib/CodeGen/PostRASchedulerList.cpp +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -19,45 +19,73 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "post-RA-sched" +#include "ExactHazardRecognizer.h" +#include "SimpleHazardRecognizer.h" #include "ScheduleDAGInstrs.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/LatencyPriorityQueue.h" #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtarget.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/Statistic.h" #include <map> +#include <set> using namespace llvm; STATISTIC(NumNoops, "Number of noops inserted"); STATISTIC(NumStalls, "Number of pipeline stalls"); +// Post-RA scheduling is enabled with +// TargetSubtarget.enablePostRAScheduler(). This flag can be used to +// override the target. +static cl::opt<bool> +EnablePostRAScheduler("post-RA-scheduler", + cl::desc("Enable scheduling after register allocation"), + cl::init(false), cl::Hidden); static cl::opt<bool> EnableAntiDepBreaking("break-anti-dependencies", cl::desc("Break post-RA scheduling anti-dependencies"), cl::init(true), cl::Hidden); - static cl::opt<bool> EnablePostRAHazardAvoidance("avoid-hazards", - cl::desc("Enable simple hazard-avoidance"), + cl::desc("Enable exact hazard avoidance"), cl::init(true), cl::Hidden); +// If DebugDiv > 0 then only schedule MBB with (ID % DebugDiv) == DebugMod +static cl::opt<int> +DebugDiv("postra-sched-debugdiv", + cl::desc("Debug control MBBs that are scheduled"), + cl::init(0), cl::Hidden); +static cl::opt<int> +DebugMod("postra-sched-debugmod", + cl::desc("Debug control MBBs that are scheduled"), + cl::init(0), cl::Hidden); + namespace { class VISIBILITY_HIDDEN PostRAScheduler : public MachineFunctionPass { + AliasAnalysis *AA; + public: static char ID; PostRAScheduler() : MachineFunctionPass(&ID) {} void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired<AliasAnalysis>(); AU.addRequired<MachineDominatorTree>(); AU.addPreserved<MachineDominatorTree>(); AU.addRequired<MachineLoopInfo>(); @@ -95,6 +123,9 @@ namespace { /// HazardRec - The hazard recognizer to use. ScheduleHazardRecognizer *HazardRec; + /// AA - AliasAnalysis for making memory reference queries. + AliasAnalysis *AA; + /// Classes - For live regs that are only used in one register class in a /// live range, the register class. If the register is not live, the /// corresponding value is null. If the register is live but used in @@ -106,22 +137,27 @@ namespace { /// RegRegs - Map registers to all their references within a live range. std::multimap<unsigned, MachineOperand *> RegRefs; - /// The index of the most recent kill (proceding bottom-up), or ~0u if - /// the register is not live. + /// KillIndices - The index of the most recent kill (proceding bottom-up), + /// or ~0u if the register is not live. unsigned KillIndices[TargetRegisterInfo::FirstVirtualRegister]; - /// The index of the most recent complete def (proceding bottom up), or ~0u - /// if the register is live. + /// DefIndices - The index of the most recent complete def (proceding bottom + /// up), or ~0u if the register is live. unsigned DefIndices[TargetRegisterInfo::FirstVirtualRegister]; + /// KeepRegs - A set of registers which are live and cannot be changed to + /// break anti-dependencies. + SmallSet<unsigned, 4> KeepRegs; + public: SchedulePostRATDList(MachineFunction &MF, const MachineLoopInfo &MLI, const MachineDominatorTree &MDT, - ScheduleHazardRecognizer *HR) + ScheduleHazardRecognizer *HR, + AliasAnalysis *aa) : ScheduleDAGInstrs(MF, MLI, MDT), Topo(SUnits), AllocatableSet(TRI->getAllocatableSet(MF)), - HazardRec(HR) {} + HazardRec(HR), AA(aa) {} ~SchedulePostRATDList() { delete HazardRec; @@ -135,6 +171,11 @@ namespace { /// Schedule - Schedule the instruction range using list scheduling. /// void Schedule(); + + /// FixupKills - Fix register kill flags that have been made + /// invalid due to scheduling + /// + void FixupKills(MachineBasicBlock *MBB); /// Observe - Update liveness information to account for the current /// instruction, which will not be scheduled. @@ -153,62 +194,15 @@ namespace { void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle); void ListScheduleTopDown(); bool BreakAntiDependencies(); - }; - - /// SimpleHazardRecognizer - A *very* simple hazard recognizer. It uses - /// a coarse classification and attempts to avoid that instructions of - /// a given class aren't grouped too densely together. - class SimpleHazardRecognizer : public ScheduleHazardRecognizer { - /// Class - A simple classification for SUnits. - enum Class { - Other, Load, Store - }; - - /// Window - The Class values of the most recently issued - /// instructions. - Class Window[8]; - - /// getClass - Classify the given SUnit. - Class getClass(const SUnit *SU) { - const MachineInstr *MI = SU->getInstr(); - const TargetInstrDesc &TID = MI->getDesc(); - if (TID.mayLoad()) - return Load; - if (TID.mayStore()) - return Store; - return Other; - } - - /// Step - Rotate the existing entries in Window and insert the - /// given class value in position as the most recent. - void Step(Class C) { - std::copy(Window+1, array_endof(Window), Window); - Window[array_lengthof(Window)-1] = C; - } - - public: - SimpleHazardRecognizer() : Window() {} - - virtual HazardType getHazardType(SUnit *SU) { - Class C = getClass(SU); - if (C == Other) - return NoHazard; - unsigned Score = 0; - for (unsigned i = 0; i != array_lengthof(Window); ++i) - if (Window[i] == C) - Score += i + 1; - if (Score > array_lengthof(Window) * 2) - return Hazard; - return NoHazard; - } - - virtual void EmitInstruction(SUnit *SU) { - Step(getClass(SU)); - } - - virtual void AdvanceCycle() { - Step(Other); - } + unsigned findSuitableFreeRegister(unsigned AntiDepReg, + unsigned LastNewReg, + const TargetRegisterClass *); + void StartBlockForKills(MachineBasicBlock *BB); + + // ToggleKillFlag - Toggle a register operand kill flag. Other + // adjustments may be made to the instruction if necessary. Return + // true if the operand has been deleted, false if not. + bool ToggleKillFlag(MachineInstr *MI, MachineOperand &MO); }; } @@ -235,19 +229,44 @@ static bool isSchedulingBoundary(const MachineInstr *MI, } bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { - DOUT << "PostRAScheduler\n"; + AA = &getAnalysis<AliasAnalysis>(); + + // Check for explicit enable/disable of post-ra scheduling. + if (EnablePostRAScheduler.getPosition() > 0) { + if (!EnablePostRAScheduler) + return true; + } else { + // Check that post-RA scheduling is enabled for this function + const TargetSubtarget &ST = Fn.getTarget().getSubtarget<TargetSubtarget>(); + if (!ST.enablePostRAScheduler()) + return true; + } + + DEBUG(errs() << "PostRAScheduler\n"); const MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>(); const MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>(); + const InstrItineraryData &InstrItins = Fn.getTarget().getInstrItineraryData(); ScheduleHazardRecognizer *HR = EnablePostRAHazardAvoidance ? - new SimpleHazardRecognizer : - new ScheduleHazardRecognizer(); + (ScheduleHazardRecognizer *)new ExactHazardRecognizer(InstrItins) : + (ScheduleHazardRecognizer *)new SimpleHazardRecognizer(); - SchedulePostRATDList Scheduler(Fn, MLI, MDT, HR); + SchedulePostRATDList Scheduler(Fn, MLI, MDT, HR, AA); // Loop over all of the basic blocks for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); MBB != MBBe; ++MBB) { +#ifndef NDEBUG + // If DebugDiv > 0 then only schedule MBB with (ID % DebugDiv) == DebugMod + if (DebugDiv > 0) { + static int bbcnt = 0; + if (bbcnt++ % DebugDiv != DebugMod) + continue; + errs() << "*** DEBUG scheduling " << Fn.getFunction()->getNameStr() << + ":MBB ID#" << MBB->getNumber() << " ***\n"; + } +#endif + // Initialize register live-range state for scheduling in this block. Scheduler.StartBlock(MBB); @@ -259,7 +278,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { MachineInstr *MI = prior(I); if (isSchedulingBoundary(MI, Fn)) { Scheduler.Run(MBB, I, Current, CurrentCount); - Scheduler.EmitSchedule(); + Scheduler.EmitSchedule(0); Current = MI; CurrentCount = Count - 1; Scheduler.Observe(MI, CurrentCount); @@ -271,10 +290,13 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { assert((MBB->begin() == Current || CurrentCount != 0) && "Instruction count mismatch!"); Scheduler.Run(MBB, MBB->begin(), Current, CurrentCount); - Scheduler.EmitSchedule(); + Scheduler.EmitSchedule(0); // Clean up register live-range state. Scheduler.FinishBlock(); + + // Update register kills + Scheduler.FixupKills(MBB); } return true; @@ -287,6 +309,9 @@ void SchedulePostRATDList::StartBlock(MachineBasicBlock *BB) { // Call the superclass. ScheduleDAGInstrs::StartBlock(BB); + // Reset the hazard recognizer. + HazardRec->Reset(); + // Clear out the register class data. std::fill(Classes, array_endof(Classes), static_cast<const TargetRegisterClass *>(0)); @@ -295,8 +320,13 @@ void SchedulePostRATDList::StartBlock(MachineBasicBlock *BB) { std::fill(KillIndices, array_endof(KillIndices), ~0u); std::fill(DefIndices, array_endof(DefIndices), BB->size()); + // Clear "do not change" set. + KeepRegs.clear(); + + bool IsReturnBlock = (!BB->empty() && BB->back().getDesc().isReturn()); + // Determine the live-out physregs for this block. - if (!BB->empty() && BB->back().getDesc().isReturn()) + if (IsReturnBlock) { // In a return block, examine the function live-out regs. for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(), E = MRI.liveout_end(); I != E; ++I) { @@ -312,7 +342,7 @@ void SchedulePostRATDList::StartBlock(MachineBasicBlock *BB) { DefIndices[AliasReg] = ~0u; } } - else + } else { // In a non-return block, examine the live-in regs of all successors. for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), SE = BB->succ_end(); SI != SE; ++SI) @@ -330,18 +360,16 @@ void SchedulePostRATDList::StartBlock(MachineBasicBlock *BB) { DefIndices[AliasReg] = ~0u; } } + } - // Consider callee-saved registers as live-out, since we're running after - // prologue/epilogue insertion so there's no way to add additional - // saved registers. - // - // TODO: If the callee saves and restores these, then we can potentially - // use them between the save and the restore. To do that, we could scan - // the exit blocks to see which of these registers are defined. - // Alternatively, callee-saved registers that aren't saved and restored - // could be marked live-in in every block. + // Mark live-out callee-saved registers. In a return block this is + // all callee-saved registers. In non-return this is any + // callee-saved register that is not saved in the prolog. + const MachineFrameInfo *MFI = MF.getFrameInfo(); + BitVector Pristine = MFI->getPristineRegs(BB); for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) { unsigned Reg = *I; + if (!IsReturnBlock && !Pristine.test(Reg)) continue; Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); KillIndices[Reg] = BB->size(); DefIndices[Reg] = ~0u; @@ -358,10 +386,10 @@ void SchedulePostRATDList::StartBlock(MachineBasicBlock *BB) { /// Schedule - Schedule the instruction range using list scheduling. /// void SchedulePostRATDList::Schedule() { - DOUT << "********** List Scheduling **********\n"; + DEBUG(errs() << "********** List Scheduling **********\n"); // Build the scheduling graph. - BuildSchedGraph(); + BuildSchedGraph(AA); if (EnableAntiDepBreaking) { if (BreakAntiDependencies()) { @@ -374,10 +402,13 @@ void SchedulePostRATDList::Schedule() { SUnits.clear(); EntrySU = SUnit(); ExitSU = SUnit(); - BuildSchedGraph(); + BuildSchedGraph(AA); } } + DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) + SUnits[su].dumpAll(this)); + AvailableQueue.initNodes(SUnits); ListScheduleTopDown(); @@ -448,8 +479,10 @@ void SchedulePostRATDList::PrescanInstruction(MachineInstr *MI) { if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (Reg == 0) continue; - const TargetRegisterClass *NewRC = - getInstrOperandRegClass(TRI, MI->getDesc(), i); + const TargetRegisterClass *NewRC = 0; + + if (i < MI->getDesc().getNumOperands()) + NewRC = MI->getDesc().OpInfo[i].getRegClass(TRI); // For now, only allow the register to be changed if its register // class is consistent across all uses. @@ -473,6 +506,16 @@ void SchedulePostRATDList::PrescanInstruction(MachineInstr *MI) { // If we're still willing to consider this register, note the reference. if (Classes[Reg] != reinterpret_cast<TargetRegisterClass *>(-1)) RegRefs.insert(std::make_pair(Reg, &MO)); + + // It's not safe to change register allocation for source operands of + // that have special allocation requirements. + if (MO.isUse() && MI->getDesc().hasExtraSrcRegAllocReq()) { + if (KeepRegs.insert(Reg)) { + for (const unsigned *Subreg = TRI->getSubRegisters(Reg); + *Subreg; ++Subreg) + KeepRegs.insert(*Subreg); + } + } } } @@ -492,9 +535,10 @@ void SchedulePostRATDList::ScanInstruction(MachineInstr *MI, DefIndices[Reg] = Count; KillIndices[Reg] = ~0u; - assert(((KillIndices[Reg] == ~0u) != - (DefIndices[Reg] == ~0u)) && - "Kill and Def maps aren't consistent for Reg!"); + assert(((KillIndices[Reg] == ~0u) != + (DefIndices[Reg] == ~0u)) && + "Kill and Def maps aren't consistent for Reg!"); + KeepRegs.erase(Reg); Classes[Reg] = 0; RegRefs.erase(Reg); // Repeat, for all subregs. @@ -503,6 +547,7 @@ void SchedulePostRATDList::ScanInstruction(MachineInstr *MI, unsigned SubregReg = *Subreg; DefIndices[SubregReg] = Count; KillIndices[SubregReg] = ~0u; + KeepRegs.erase(SubregReg); Classes[SubregReg] = 0; RegRefs.erase(SubregReg); } @@ -520,8 +565,9 @@ void SchedulePostRATDList::ScanInstruction(MachineInstr *MI, if (Reg == 0) continue; if (!MO.isUse()) continue; - const TargetRegisterClass *NewRC = - getInstrOperandRegClass(TRI, MI->getDesc(), i); + const TargetRegisterClass *NewRC = 0; + if (i < MI->getDesc().getNumOperands()) + NewRC = MI->getDesc().OpInfo[i].getRegClass(TRI); // For now, only allow the register to be changed if its register // class is consistent across all uses. @@ -551,6 +597,36 @@ void SchedulePostRATDList::ScanInstruction(MachineInstr *MI, } } +unsigned +SchedulePostRATDList::findSuitableFreeRegister(unsigned AntiDepReg, + unsigned LastNewReg, + const TargetRegisterClass *RC) { + for (TargetRegisterClass::iterator R = RC->allocation_order_begin(MF), + RE = RC->allocation_order_end(MF); R != RE; ++R) { + unsigned NewReg = *R; + // Don't replace a register with itself. + if (NewReg == AntiDepReg) continue; + // Don't replace a register with one that was recently used to repair + // an anti-dependence with this AntiDepReg, because that would + // re-introduce that anti-dependence. + if (NewReg == LastNewReg) continue; + // If NewReg is dead and NewReg's most recent def is not before + // AntiDepReg's kill, it's safe to replace AntiDepReg with NewReg. + assert(((KillIndices[AntiDepReg] == ~0u) != (DefIndices[AntiDepReg] == ~0u)) && + "Kill and Def maps aren't consistent for AntiDepReg!"); + assert(((KillIndices[NewReg] == ~0u) != (DefIndices[NewReg] == ~0u)) && + "Kill and Def maps aren't consistent for NewReg!"); + if (KillIndices[NewReg] != ~0u || + Classes[NewReg] == reinterpret_cast<TargetRegisterClass *>(-1) || + KillIndices[AntiDepReg] > DefIndices[NewReg]) + continue; + return NewReg; + } + + // No registers are free and available! + return 0; +} + /// BreakAntiDependencies - Identifiy anti-dependencies along the critical path /// of the ScheduleDAG and break them by renaming registers. /// @@ -567,8 +643,18 @@ bool SchedulePostRATDList::BreakAntiDependencies() { Max = SU; } - DOUT << "Critical path has total latency " - << (Max->getDepth() + Max->Latency) << "\n"; +#ifndef NDEBUG + { + DEBUG(errs() << "Critical path has total latency " + << (Max->getDepth() + Max->Latency) << "\n"); + DEBUG(errs() << "Available regs:"); + for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) { + if (KillIndices[Reg] == ~0u) + DEBUG(errs() << " " << TRI->getName(Reg)); + } + DEBUG(errs() << '\n'); + } +#endif // Track progress along the critical path through the SUnit graph as we walk // the instructions. @@ -598,7 +684,7 @@ bool SchedulePostRATDList::BreakAntiDependencies() { // isn't A which is free. This re-introduces anti-dependencies // at all but one of the original anti-dependencies that we were // trying to break. To avoid this, keep track of the most recent - // register that each register was replaced with, avoid avoid + // register that each register was replaced with, avoid // using it to repair an anti-dependence on the same register. // This lets us produce this: // A = ... @@ -627,13 +713,6 @@ bool SchedulePostRATDList::BreakAntiDependencies() { I != E; --Count) { MachineInstr *MI = --I; - // After regalloc, IMPLICIT_DEF instructions aren't safe to treat as - // dependence-breaking. In the case of an INSERT_SUBREG, the IMPLICIT_DEF - // is left behind appearing to clobber the super-register, while the - // subregister needs to remain live. So we just ignore them. - if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) - continue; - // Check if this instruction has a dependence on the critical path that // is an anti-dependence that we may be able to break. If it is, set // AntiDepReg to the non-zero register associated with the anti-dependence. @@ -656,8 +735,12 @@ bool SchedulePostRATDList::BreakAntiDependencies() { if (Edge->getKind() == SDep::Anti) { AntiDepReg = Edge->getReg(); assert(AntiDepReg != 0 && "Anti-dependence on reg0?"); - // Don't break anti-dependencies on non-allocatable registers. if (!AllocatableSet.test(AntiDepReg)) + // Don't break anti-dependencies on non-allocatable registers. + AntiDepReg = 0; + else if (KeepRegs.count(AntiDepReg)) + // Don't break anti-dependencies if an use down below requires + // this exact register. AntiDepReg = 0; else { // If the SUnit has other dependencies on the SUnit that it @@ -689,16 +772,22 @@ bool SchedulePostRATDList::BreakAntiDependencies() { PrescanInstruction(MI); - // If this instruction has a use of AntiDepReg, breaking it - // is invalid. - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (Reg == 0) continue; - if (MO.isUse() && AntiDepReg == Reg) { - AntiDepReg = 0; - break; + if (MI->getDesc().hasExtraDefRegAllocReq()) + // If this instruction's defs have special allocation requirement, don't + // break this anti-dependency. + AntiDepReg = 0; + else if (AntiDepReg) { + // If this instruction has a use of AntiDepReg, breaking it + // is invalid. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + if (MO.isUse() && AntiDepReg == Reg) { + AntiDepReg = 0; + break; + } } } @@ -715,60 +804,43 @@ bool SchedulePostRATDList::BreakAntiDependencies() { // TODO: Instead of picking the first free register, consider which might // be the best. if (AntiDepReg != 0) { - for (TargetRegisterClass::iterator R = RC->allocation_order_begin(MF), - RE = RC->allocation_order_end(MF); R != RE; ++R) { - unsigned NewReg = *R; - // Don't replace a register with itself. - if (NewReg == AntiDepReg) continue; - // Don't replace a register with one that was recently used to repair - // an anti-dependence with this AntiDepReg, because that would - // re-introduce that anti-dependence. - if (NewReg == LastNewReg[AntiDepReg]) continue; - // If NewReg is dead and NewReg's most recent def is not before - // AntiDepReg's kill, it's safe to replace AntiDepReg with NewReg. - assert(((KillIndices[AntiDepReg] == ~0u) != (DefIndices[AntiDepReg] == ~0u)) && - "Kill and Def maps aren't consistent for AntiDepReg!"); - assert(((KillIndices[NewReg] == ~0u) != (DefIndices[NewReg] == ~0u)) && - "Kill and Def maps aren't consistent for NewReg!"); - if (KillIndices[NewReg] == ~0u && - Classes[NewReg] != reinterpret_cast<TargetRegisterClass *>(-1) && - KillIndices[AntiDepReg] <= DefIndices[NewReg]) { - DOUT << "Breaking anti-dependence edge on " - << TRI->getName(AntiDepReg) - << " with " << RegRefs.count(AntiDepReg) << " references" - << " using " << TRI->getName(NewReg) << "!\n"; - - // Update the references to the old register to refer to the new - // register. - std::pair<std::multimap<unsigned, MachineOperand *>::iterator, - std::multimap<unsigned, MachineOperand *>::iterator> - Range = RegRefs.equal_range(AntiDepReg); - for (std::multimap<unsigned, MachineOperand *>::iterator - Q = Range.first, QE = Range.second; Q != QE; ++Q) - Q->second->setReg(NewReg); - - // We just went back in time and modified history; the - // liveness information for the anti-depenence reg is now - // inconsistent. Set the state as if it were dead. - Classes[NewReg] = Classes[AntiDepReg]; - DefIndices[NewReg] = DefIndices[AntiDepReg]; - KillIndices[NewReg] = KillIndices[AntiDepReg]; - assert(((KillIndices[NewReg] == ~0u) != - (DefIndices[NewReg] == ~0u)) && - "Kill and Def maps aren't consistent for NewReg!"); - - Classes[AntiDepReg] = 0; - DefIndices[AntiDepReg] = KillIndices[AntiDepReg]; - KillIndices[AntiDepReg] = ~0u; - assert(((KillIndices[AntiDepReg] == ~0u) != - (DefIndices[AntiDepReg] == ~0u)) && - "Kill and Def maps aren't consistent for AntiDepReg!"); - - RegRefs.erase(AntiDepReg); - Changed = true; - LastNewReg[AntiDepReg] = NewReg; - break; - } + if (unsigned NewReg = findSuitableFreeRegister(AntiDepReg, + LastNewReg[AntiDepReg], + RC)) { + DEBUG(errs() << "Breaking anti-dependence edge on " + << TRI->getName(AntiDepReg) + << " with " << RegRefs.count(AntiDepReg) << " references" + << " using " << TRI->getName(NewReg) << "!\n"); + + // Update the references to the old register to refer to the new + // register. + std::pair<std::multimap<unsigned, MachineOperand *>::iterator, + std::multimap<unsigned, MachineOperand *>::iterator> + Range = RegRefs.equal_range(AntiDepReg); + for (std::multimap<unsigned, MachineOperand *>::iterator + Q = Range.first, QE = Range.second; Q != QE; ++Q) + Q->second->setReg(NewReg); + + // We just went back in time and modified history; the + // liveness information for the anti-depenence reg is now + // inconsistent. Set the state as if it were dead. + Classes[NewReg] = Classes[AntiDepReg]; + DefIndices[NewReg] = DefIndices[AntiDepReg]; + KillIndices[NewReg] = KillIndices[AntiDepReg]; + assert(((KillIndices[NewReg] == ~0u) != + (DefIndices[NewReg] == ~0u)) && + "Kill and Def maps aren't consistent for NewReg!"); + + Classes[AntiDepReg] = 0; + DefIndices[AntiDepReg] = KillIndices[AntiDepReg]; + KillIndices[AntiDepReg] = ~0u; + assert(((KillIndices[AntiDepReg] == ~0u) != + (DefIndices[AntiDepReg] == ~0u)) && + "Kill and Def maps aren't consistent for AntiDepReg!"); + + RegRefs.erase(AntiDepReg); + Changed = true; + LastNewReg[AntiDepReg] = NewReg; } } @@ -778,6 +850,177 @@ bool SchedulePostRATDList::BreakAntiDependencies() { return Changed; } +/// StartBlockForKills - Initialize register live-range state for updating kills +/// +void SchedulePostRATDList::StartBlockForKills(MachineBasicBlock *BB) { + // Initialize the indices to indicate that no registers are live. + std::fill(KillIndices, array_endof(KillIndices), ~0u); + + // Determine the live-out physregs for this block. + if (!BB->empty() && BB->back().getDesc().isReturn()) { + // In a return block, examine the function live-out regs. + for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(), + E = MRI.liveout_end(); I != E; ++I) { + unsigned Reg = *I; + KillIndices[Reg] = BB->size(); + // Repeat, for all subregs. + for (const unsigned *Subreg = TRI->getSubRegisters(Reg); + *Subreg; ++Subreg) { + KillIndices[*Subreg] = BB->size(); + } + } + } + else { + // In a non-return block, examine the live-in regs of all successors. + for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + SE = BB->succ_end(); SI != SE; ++SI) { + for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), + E = (*SI)->livein_end(); I != E; ++I) { + unsigned Reg = *I; + KillIndices[Reg] = BB->size(); + // Repeat, for all subregs. + for (const unsigned *Subreg = TRI->getSubRegisters(Reg); + *Subreg; ++Subreg) { + KillIndices[*Subreg] = BB->size(); + } + } + } + } +} + +bool SchedulePostRATDList::ToggleKillFlag(MachineInstr *MI, + MachineOperand &MO) { + // Setting kill flag... + if (!MO.isKill()) { + MO.setIsKill(true); + return false; + } + + // If MO itself is live, clear the kill flag... + if (KillIndices[MO.getReg()] != ~0u) { + MO.setIsKill(false); + return false; + } + + // If any subreg of MO is live, then create an imp-def for that + // subreg and keep MO marked as killed. + MO.setIsKill(false); + bool AllDead = true; + const unsigned SuperReg = MO.getReg(); + for (const unsigned *Subreg = TRI->getSubRegisters(SuperReg); + *Subreg; ++Subreg) { + if (KillIndices[*Subreg] != ~0u) { + MI->addOperand(MachineOperand::CreateReg(*Subreg, + true /*IsDef*/, + true /*IsImp*/, + false /*IsKill*/, + false /*IsDead*/)); + AllDead = false; + } + } + + if(AllDead) + MO.setIsKill(true); + return false; +} + +/// FixupKills - Fix the register kill flags, they may have been made +/// incorrect by instruction reordering. +/// +void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) { + DEBUG(errs() << "Fixup kills for BB ID#" << MBB->getNumber() << '\n'); + + std::set<unsigned> killedRegs; + BitVector ReservedRegs = TRI->getReservedRegs(MF); + + StartBlockForKills(MBB); + + // Examine block from end to start... + unsigned Count = MBB->size(); + for (MachineBasicBlock::iterator I = MBB->end(), E = MBB->begin(); + I != E; --Count) { + MachineInstr *MI = --I; + + // Update liveness. Registers that are defed but not used in this + // instruction are now dead. Mark register and all subregs as they + // are completely defined. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + if (!MO.isDef()) continue; + // Ignore two-addr defs. + if (MI->isRegTiedToUseOperand(i)) continue; + + KillIndices[Reg] = ~0u; + + // Repeat for all subregs. + for (const unsigned *Subreg = TRI->getSubRegisters(Reg); + *Subreg; ++Subreg) { + KillIndices[*Subreg] = ~0u; + } + } + + // Examine all used registers and set/clear kill flag. When a + // register is used multiple times we only set the kill flag on + // the first use. + killedRegs.clear(); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isUse()) continue; + unsigned Reg = MO.getReg(); + if ((Reg == 0) || ReservedRegs.test(Reg)) continue; + + bool kill = false; + if (killedRegs.find(Reg) == killedRegs.end()) { + kill = true; + // A register is not killed if any subregs are live... + for (const unsigned *Subreg = TRI->getSubRegisters(Reg); + *Subreg; ++Subreg) { + if (KillIndices[*Subreg] != ~0u) { + kill = false; + break; + } + } + + // If subreg is not live, then register is killed if it became + // live in this instruction + if (kill) + kill = (KillIndices[Reg] == ~0u); + } + + if (MO.isKill() != kill) { + bool removed = ToggleKillFlag(MI, MO); + if (removed) { + DEBUG(errs() << "Fixed <removed> in "); + } else { + DEBUG(errs() << "Fixed " << MO << " in "); + } + DEBUG(MI->dump()); + } + + killedRegs.insert(Reg); + } + + // Mark any used register (that is not using undef) and subregs as + // now live... + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue; + unsigned Reg = MO.getReg(); + if ((Reg == 0) || ReservedRegs.test(Reg)) continue; + + KillIndices[Reg] = Count; + + for (const unsigned *Subreg = TRI->getSubRegisters(Reg); + *Subreg; ++Subreg) { + KillIndices[*Subreg] = Count; + } + } + } +} + //===----------------------------------------------------------------------===// // Top-Down Scheduling //===----------------------------------------------------------------------===// @@ -786,17 +1029,17 @@ bool SchedulePostRATDList::BreakAntiDependencies() { /// the PendingQueue if the count reaches zero. Also update its cycle bound. void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) { SUnit *SuccSU = SuccEdge->getSUnit(); - --SuccSU->NumPredsLeft; - + #ifndef NDEBUG - if (SuccSU->NumPredsLeft < 0) { - cerr << "*** Scheduling failed! ***\n"; + if (SuccSU->NumPredsLeft == 0) { + errs() << "*** Scheduling failed! ***\n"; SuccSU->dump(this); - cerr << " has been released too many times!\n"; - assert(0); + errs() << " has been released too many times!\n"; + llvm_unreachable(0); } #endif - + --SuccSU->NumPredsLeft; + // Compute how many cycles it will be before this actually becomes // available. This is the max of the start time of all predecessors plus // their latencies. @@ -819,7 +1062,7 @@ void SchedulePostRATDList::ReleaseSuccessors(SUnit *SU) { /// count of its successors. If a successor pending count is zero, add it to /// the Available queue. void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { - DOUT << "*** Scheduling [" << CurCycle << "]: "; + DEBUG(errs() << "*** Scheduling [" << CurCycle << "]: "); DEBUG(SU->dump(this)); Sequence.push_back(SU); @@ -848,6 +1091,10 @@ void SchedulePostRATDList::ListScheduleTopDown() { } } + // In any cycle where we can't schedule any instructions, we must + // stall or emit a noop, depending on the target. + bool CycleHasInsts = false; + // While Available queue is not empty, grab the node with the highest // priority. If it is not ready put it back. Schedule the node. std::vector<SUnit*> NotReady; @@ -866,13 +1113,14 @@ void SchedulePostRATDList::ListScheduleTopDown() { } else if (PendingQueue[i]->getDepth() < MinDepth) MinDepth = PendingQueue[i]->getDepth(); } - - // If there are no instructions available, don't try to issue anything, and - // don't advance the hazard recognizer. - if (AvailableQueue.empty()) { - CurCycle = MinDepth != ~0u ? MinDepth : CurCycle + 1; - continue; - } + + DEBUG(errs() << "\n*** Examining Available\n"; + LatencyPriorityQueue q = AvailableQueue; + while (!q.empty()) { + SUnit *su = q.pop(); + errs() << "Height " << su->getHeight() << ": "; + su->dump(this); + }); SUnit *FoundSUnit = 0; @@ -903,27 +1151,38 @@ void SchedulePostRATDList::ListScheduleTopDown() { if (FoundSUnit) { ScheduleNodeTopDown(FoundSUnit, CurCycle); HazardRec->EmitInstruction(FoundSUnit); - - // If this is a pseudo-op node, we don't want to increment the current - // cycle. - if (FoundSUnit->Latency) // Don't increment CurCycle for pseudo-ops! - ++CurCycle; - } else if (!HasNoopHazards) { - // Otherwise, we have a pipeline stall, but no other problem, just advance - // the current cycle and try again. - DOUT << "*** Advancing cycle, no work to do\n"; - HazardRec->AdvanceCycle(); - ++NumStalls; - ++CurCycle; + CycleHasInsts = true; + + // If we are using the target-specific hazards, then don't + // advance the cycle time just because we schedule a node. If + // the target allows it we can schedule multiple nodes in the + // same cycle. + if (!EnablePostRAHazardAvoidance) { + if (FoundSUnit->Latency) // Don't increment CurCycle for pseudo-ops! + ++CurCycle; + } } else { - // Otherwise, we have no instructions to issue and we have instructions - // that will fault if we don't do this right. This is the case for - // processors without pipeline interlocks and other cases. - DOUT << "*** Emitting noop\n"; - HazardRec->EmitNoop(); - Sequence.push_back(0); // NULL here means noop - ++NumNoops; + if (CycleHasInsts) { + DEBUG(errs() << "*** Finished cycle " << CurCycle << '\n'); + HazardRec->AdvanceCycle(); + } else if (!HasNoopHazards) { + // Otherwise, we have a pipeline stall, but no other problem, + // just advance the current cycle and try again. + DEBUG(errs() << "*** Stall in cycle " << CurCycle << '\n'); + HazardRec->AdvanceCycle(); + ++NumStalls; + } else { + // Otherwise, we have no instructions to issue and we have instructions + // that will fault if we don't do this right. This is the case for + // processors without pipeline interlocks and other cases. + DEBUG(errs() << "*** Emitting noop in cycle " << CurCycle << '\n'); + HazardRec->EmitNoop(); + Sequence.push_back(0); // NULL here means noop + ++NumNoops; + } + ++CurCycle; + CycleHasInsts = false; } } diff --git a/lib/CodeGen/PreAllocSplitting.cpp b/lib/CodeGen/PreAllocSplitting.cpp index ae60c86..8fa07d4 100644 --- a/lib/CodeGen/PreAllocSplitting.cpp +++ b/lib/CodeGen/PreAllocSplitting.cpp @@ -31,6 +31,7 @@ #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallPtrSet.h" @@ -67,7 +68,7 @@ namespace { MachineBasicBlock *BarrierMBB; // Barrier - Current barrier index. - unsigned BarrierIdx; + LiveIndex BarrierIdx; // CurrLI - Current live interval being split. LiveInterval *CurrLI; @@ -82,7 +83,7 @@ namespace { DenseMap<unsigned, int> IntervalSSMap; // Def2SpillMap - A map from a def instruction index to spill index. - DenseMap<unsigned, unsigned> Def2SpillMap; + DenseMap<LiveIndex, LiveIndex> Def2SpillMap; public: static char ID; @@ -91,6 +92,7 @@ namespace { virtual bool runOnMachineFunction(MachineFunction &MF); virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); AU.addRequired<LiveIntervals>(); AU.addPreserved<LiveIntervals>(); AU.addRequired<LiveStacks>(); @@ -119,33 +121,31 @@ namespace { } /// print - Implement the dump method. - virtual void print(std::ostream &O, const Module* M = 0) const { + virtual void print(raw_ostream &O, const Module* M = 0) const { LIs->print(O, M); } - void print(std::ostream *O, const Module* M = 0) const { - if (O) print(*O, M); - } private: MachineBasicBlock::iterator findNextEmptySlot(MachineBasicBlock*, MachineInstr*, - unsigned&); + LiveIndex&); MachineBasicBlock::iterator findSpillPoint(MachineBasicBlock*, MachineInstr*, MachineInstr*, - SmallPtrSet<MachineInstr*, 4>&, unsigned&); + SmallPtrSet<MachineInstr*, 4>&, LiveIndex&); MachineBasicBlock::iterator - findRestorePoint(MachineBasicBlock*, MachineInstr*, unsigned, - SmallPtrSet<MachineInstr*, 4>&, unsigned&); + findRestorePoint(MachineBasicBlock*, MachineInstr*, LiveIndex, + SmallPtrSet<MachineInstr*, 4>&, LiveIndex&); int CreateSpillStackSlot(unsigned, const TargetRegisterClass *); - bool IsAvailableInStack(MachineBasicBlock*, unsigned, unsigned, unsigned, - unsigned&, int&) const; + bool IsAvailableInStack(MachineBasicBlock*, unsigned, + LiveIndex, LiveIndex, + LiveIndex&, int&) const; - void UpdateSpillSlotInterval(VNInfo*, unsigned, unsigned); + void UpdateSpillSlotInterval(VNInfo*, LiveIndex, LiveIndex); bool SplitRegLiveInterval(LiveInterval*); @@ -157,7 +157,7 @@ namespace { bool Rematerialize(unsigned vreg, VNInfo* ValNo, MachineInstr* DefMI, MachineBasicBlock::iterator RestorePt, - unsigned RestoreIdx, + LiveIndex RestoreIdx, SmallPtrSet<MachineInstr*, 4>& RefsInMBB); MachineInstr* FoldSpill(unsigned vreg, const TargetRegisterClass* RC, MachineInstr* DefMI, @@ -209,11 +209,12 @@ const PassInfo *const llvm::PreAllocSplittingID = &X; /// instruction index map. If there isn't one, return end(). MachineBasicBlock::iterator PreAllocSplitting::findNextEmptySlot(MachineBasicBlock *MBB, MachineInstr *MI, - unsigned &SpotIndex) { + LiveIndex &SpotIndex) { MachineBasicBlock::iterator MII = MI; if (++MII != MBB->end()) { - unsigned Index = LIs->findGapBeforeInstr(LIs->getInstructionIndex(MII)); - if (Index) { + LiveIndex Index = + LIs->findGapBeforeInstr(LIs->getInstructionIndex(MII)); + if (Index != LiveIndex()) { SpotIndex = Index; return MII; } @@ -229,7 +230,7 @@ MachineBasicBlock::iterator PreAllocSplitting::findSpillPoint(MachineBasicBlock *MBB, MachineInstr *MI, MachineInstr *DefMI, SmallPtrSet<MachineInstr*, 4> &RefsInMBB, - unsigned &SpillIndex) { + LiveIndex &SpillIndex) { MachineBasicBlock::iterator Pt = MBB->begin(); MachineBasicBlock::iterator MII = MI; @@ -242,7 +243,7 @@ PreAllocSplitting::findSpillPoint(MachineBasicBlock *MBB, MachineInstr *MI, if (MII == EndPt || RefsInMBB.count(MII)) return Pt; while (MII != EndPt && !RefsInMBB.count(MII)) { - unsigned Index = LIs->getInstructionIndex(MII); + LiveIndex Index = LIs->getInstructionIndex(MII); // We can't insert the spill between the barrier (a call), and its // corresponding call frame setup. @@ -275,9 +276,9 @@ PreAllocSplitting::findSpillPoint(MachineBasicBlock *MBB, MachineInstr *MI, /// found. MachineBasicBlock::iterator PreAllocSplitting::findRestorePoint(MachineBasicBlock *MBB, MachineInstr *MI, - unsigned LastIdx, + LiveIndex LastIdx, SmallPtrSet<MachineInstr*, 4> &RefsInMBB, - unsigned &RestoreIndex) { + LiveIndex &RestoreIndex) { // FIXME: Allow spill to be inserted to the beginning of the mbb. Update mbb // begin index accordingly. MachineBasicBlock::iterator Pt = MBB->end(); @@ -298,10 +299,10 @@ PreAllocSplitting::findRestorePoint(MachineBasicBlock *MBB, MachineInstr *MI, // FIXME: Limit the number of instructions to examine to reduce // compile time? while (MII != EndPt) { - unsigned Index = LIs->getInstructionIndex(MII); + LiveIndex Index = LIs->getInstructionIndex(MII); if (Index > LastIdx) break; - unsigned Gap = LIs->findGapBeforeInstr(Index); + LiveIndex Gap = LIs->findGapBeforeInstr(Index); // We can't insert a restore between the barrier (a call) and its // corresponding call frame teardown. @@ -310,7 +311,7 @@ PreAllocSplitting::findRestorePoint(MachineBasicBlock *MBB, MachineInstr *MI, if (MII == EndPt || RefsInMBB.count(MII)) return Pt; ++MII; } while (MII->getOpcode() != TRI->getCallFrameDestroyOpcode()); - } else if (Gap) { + } else if (Gap != LiveIndex()) { Pt = MII; RestoreIndex = Gap; } @@ -343,7 +344,8 @@ int PreAllocSplitting::CreateSpillStackSlot(unsigned Reg, if (CurrSLI->hasAtLeastOneValue()) CurrSValNo = CurrSLI->getValNumInfo(0); else - CurrSValNo = CurrSLI->getNextValue(0, 0, false, LSs->getVNInfoAllocator()); + CurrSValNo = CurrSLI->getNextValue(LiveIndex(), 0, false, + LSs->getVNInfoAllocator()); return SS; } @@ -351,8 +353,9 @@ int PreAllocSplitting::CreateSpillStackSlot(unsigned Reg, /// slot at the specified index. bool PreAllocSplitting::IsAvailableInStack(MachineBasicBlock *DefMBB, - unsigned Reg, unsigned DefIndex, - unsigned RestoreIndex, unsigned &SpillIndex, + unsigned Reg, LiveIndex DefIndex, + LiveIndex RestoreIndex, + LiveIndex &SpillIndex, int& SS) const { if (!DefMBB) return false; @@ -360,7 +363,8 @@ PreAllocSplitting::IsAvailableInStack(MachineBasicBlock *DefMBB, DenseMap<unsigned, int>::iterator I = IntervalSSMap.find(Reg); if (I == IntervalSSMap.end()) return false; - DenseMap<unsigned, unsigned>::iterator II = Def2SpillMap.find(DefIndex); + DenseMap<LiveIndex, LiveIndex>::iterator + II = Def2SpillMap.find(DefIndex); if (II == Def2SpillMap.end()) return false; @@ -380,8 +384,8 @@ PreAllocSplitting::IsAvailableInStack(MachineBasicBlock *DefMBB, /// interval being split, and the spill and restore indicies, update the live /// interval of the spill stack slot. void -PreAllocSplitting::UpdateSpillSlotInterval(VNInfo *ValNo, unsigned SpillIndex, - unsigned RestoreIndex) { +PreAllocSplitting::UpdateSpillSlotInterval(VNInfo *ValNo, LiveIndex SpillIndex, + LiveIndex RestoreIndex) { assert(LIs->getMBBFromIndex(RestoreIndex) == BarrierMBB && "Expect restore in the barrier mbb"); @@ -394,8 +398,8 @@ PreAllocSplitting::UpdateSpillSlotInterval(VNInfo *ValNo, unsigned SpillIndex, } SmallPtrSet<MachineBasicBlock*, 4> Processed; - unsigned EndIdx = LIs->getMBBEndIdx(MBB); - LiveRange SLR(SpillIndex, EndIdx+1, CurrSValNo); + LiveIndex EndIdx = LIs->getMBBEndIdx(MBB); + LiveRange SLR(SpillIndex, LIs->getNextSlot(EndIdx), CurrSValNo); CurrSLI->addRange(SLR); Processed.insert(MBB); @@ -414,7 +418,7 @@ PreAllocSplitting::UpdateSpillSlotInterval(VNInfo *ValNo, unsigned SpillIndex, WorkList.pop_back(); if (Processed.count(MBB)) continue; - unsigned Idx = LIs->getMBBStartIdx(MBB); + LiveIndex Idx = LIs->getMBBStartIdx(MBB); LR = CurrLI->getLiveRangeContaining(Idx); if (LR && LR->valno == ValNo) { EndIdx = LIs->getMBBEndIdx(MBB); @@ -424,7 +428,7 @@ PreAllocSplitting::UpdateSpillSlotInterval(VNInfo *ValNo, unsigned SpillIndex, CurrSLI->addRange(SLR); } else if (LR->end > EndIdx) { // Live range extends beyond end of mbb, process successors. - LiveRange SLR(Idx, EndIdx+1, CurrSValNo); + LiveRange SLR(Idx, LIs->getNextIndex(EndIdx), CurrSValNo); CurrSLI->addRange(SLR); for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE; ++SI) @@ -487,12 +491,12 @@ PreAllocSplitting::PerformPHIConstruction(MachineBasicBlock::iterator UseI, } // Once we've found it, extend its VNInfo to our instruction. - unsigned DefIndex = LIs->getInstructionIndex(Walker); - DefIndex = LiveIntervals::getDefIndex(DefIndex); - unsigned EndIndex = LIs->getMBBEndIdx(MBB); + LiveIndex DefIndex = LIs->getInstructionIndex(Walker); + DefIndex = LIs->getDefIndex(DefIndex); + LiveIndex EndIndex = LIs->getMBBEndIdx(MBB); RetVNI = NewVNs[Walker]; - LI->addRange(LiveRange(DefIndex, EndIndex+1, RetVNI)); + LI->addRange(LiveRange(DefIndex, LIs->getNextSlot(EndIndex), RetVNI)); } else if (!ContainsDefs && ContainsUses) { SmallPtrSet<MachineInstr*, 2>& BlockUses = Uses[MBB]; @@ -524,12 +528,12 @@ PreAllocSplitting::PerformPHIConstruction(MachineBasicBlock::iterator UseI, IsTopLevel, IsIntraBlock); } - unsigned UseIndex = LIs->getInstructionIndex(Walker); - UseIndex = LiveIntervals::getUseIndex(UseIndex); - unsigned EndIndex = 0; + LiveIndex UseIndex = LIs->getInstructionIndex(Walker); + UseIndex = LIs->getUseIndex(UseIndex); + LiveIndex EndIndex; if (IsIntraBlock) { EndIndex = LIs->getInstructionIndex(UseI); - EndIndex = LiveIntervals::getUseIndex(EndIndex); + EndIndex = LIs->getUseIndex(EndIndex); } else EndIndex = LIs->getMBBEndIdx(MBB); @@ -538,12 +542,12 @@ PreAllocSplitting::PerformPHIConstruction(MachineBasicBlock::iterator UseI, RetVNI = PerformPHIConstruction(Walker, MBB, LI, Visited, Defs, Uses, NewVNs, LiveOut, Phis, false, true); - LI->addRange(LiveRange(UseIndex, EndIndex+1, RetVNI)); + LI->addRange(LiveRange(UseIndex, LIs->getNextSlot(EndIndex), RetVNI)); // FIXME: Need to set kills properly for inter-block stuff. - if (LI->isKill(RetVNI, UseIndex)) LI->removeKill(RetVNI, UseIndex); + if (RetVNI->isKill(UseIndex)) RetVNI->removeKill(UseIndex); if (IsIntraBlock) - LI->addKill(RetVNI, EndIndex); + RetVNI->addKill(EndIndex); } else if (ContainsDefs && ContainsUses) { SmallPtrSet<MachineInstr*, 2>& BlockDefs = Defs[MBB]; SmallPtrSet<MachineInstr*, 2>& BlockUses = Uses[MBB]; @@ -584,13 +588,13 @@ PreAllocSplitting::PerformPHIConstruction(MachineBasicBlock::iterator UseI, IsTopLevel, IsIntraBlock); } - unsigned StartIndex = LIs->getInstructionIndex(Walker); - StartIndex = foundDef ? LiveIntervals::getDefIndex(StartIndex) : - LiveIntervals::getUseIndex(StartIndex); - unsigned EndIndex = 0; + LiveIndex StartIndex = LIs->getInstructionIndex(Walker); + StartIndex = foundDef ? LIs->getDefIndex(StartIndex) : + LIs->getUseIndex(StartIndex); + LiveIndex EndIndex; if (IsIntraBlock) { EndIndex = LIs->getInstructionIndex(UseI); - EndIndex = LiveIntervals::getUseIndex(EndIndex); + EndIndex = LIs->getUseIndex(EndIndex); } else EndIndex = LIs->getMBBEndIdx(MBB); @@ -600,12 +604,12 @@ PreAllocSplitting::PerformPHIConstruction(MachineBasicBlock::iterator UseI, RetVNI = PerformPHIConstruction(Walker, MBB, LI, Visited, Defs, Uses, NewVNs, LiveOut, Phis, false, true); - LI->addRange(LiveRange(StartIndex, EndIndex+1, RetVNI)); + LI->addRange(LiveRange(StartIndex, LIs->getNextSlot(EndIndex), RetVNI)); - if (foundUse && LI->isKill(RetVNI, StartIndex)) - LI->removeKill(RetVNI, StartIndex); + if (foundUse && RetVNI->isKill(StartIndex)) + RetVNI->removeKill(StartIndex); if (IsIntraBlock) { - LI->addKill(RetVNI, EndIndex); + RetVNI->addKill(EndIndex); } } @@ -636,9 +640,10 @@ PreAllocSplitting::PerformPHIConstructionFallBack(MachineBasicBlock::iterator Us // assume that we are not intrablock here. if (Phis.count(MBB)) return Phis[MBB]; - unsigned StartIndex = LIs->getMBBStartIdx(MBB); + LiveIndex StartIndex = LIs->getMBBStartIdx(MBB); VNInfo *RetVNI = Phis[MBB] = - LI->getNextValue(0, /*FIXME*/ 0, false, LIs->getVNInfoAllocator()); + LI->getNextValue(LiveIndex(), /*FIXME*/ 0, false, + LIs->getVNInfoAllocator()); if (!IsIntraBlock) LiveOut[MBB] = RetVNI; @@ -680,21 +685,21 @@ PreAllocSplitting::PerformPHIConstructionFallBack(MachineBasicBlock::iterator Us for (DenseMap<MachineBasicBlock*, VNInfo*>::iterator I = IncomingVNs.begin(), E = IncomingVNs.end(); I != E; ++I) { I->second->setHasPHIKill(true); - unsigned KillIndex = LIs->getMBBEndIdx(I->first); - if (!LiveInterval::isKill(I->second, KillIndex)) - LI->addKill(I->second, KillIndex); + LiveIndex KillIndex = LIs->getMBBEndIdx(I->first); + if (!I->second->isKill(KillIndex)) + I->second->addKill(KillIndex); } } - unsigned EndIndex = 0; + LiveIndex EndIndex; if (IsIntraBlock) { EndIndex = LIs->getInstructionIndex(UseI); - EndIndex = LiveIntervals::getUseIndex(EndIndex); + EndIndex = LIs->getUseIndex(EndIndex); } else EndIndex = LIs->getMBBEndIdx(MBB); - LI->addRange(LiveRange(StartIndex, EndIndex+1, RetVNI)); + LI->addRange(LiveRange(StartIndex, LIs->getNextSlot(EndIndex), RetVNI)); if (IsIntraBlock) - LI->addKill(RetVNI, EndIndex); + RetVNI->addKill(EndIndex); // Memoize results so we don't have to recompute them. if (!IsIntraBlock) @@ -728,8 +733,8 @@ void PreAllocSplitting::ReconstructLiveInterval(LiveInterval* LI) { DE = MRI->def_end(); DI != DE; ++DI) { Defs[(*DI).getParent()].insert(&*DI); - unsigned DefIdx = LIs->getInstructionIndex(&*DI); - DefIdx = LiveIntervals::getDefIndex(DefIdx); + LiveIndex DefIdx = LIs->getInstructionIndex(&*DI); + DefIdx = LIs->getDefIndex(DefIdx); assert(DI->getOpcode() != TargetInstrInfo::PHI && "Following NewVN isPHIDef flag incorrect. Fix me!"); @@ -739,7 +744,7 @@ void PreAllocSplitting::ReconstructLiveInterval(LiveInterval* LI) { unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; if (TII->isMoveInstr(*DI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) if (DstReg == LI->reg) - NewVN->copy = &*DI; + NewVN->setCopy(&*DI); NewVNs[&*DI] = NewVN; } @@ -764,14 +769,32 @@ void PreAllocSplitting::ReconstructLiveInterval(LiveInterval* LI) { // Add ranges for dead defs for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(LI->reg), DE = MRI->def_end(); DI != DE; ++DI) { - unsigned DefIdx = LIs->getInstructionIndex(&*DI); - DefIdx = LiveIntervals::getDefIndex(DefIdx); + LiveIndex DefIdx = LIs->getInstructionIndex(&*DI); + DefIdx = LIs->getDefIndex(DefIdx); if (LI->liveAt(DefIdx)) continue; VNInfo* DeadVN = NewVNs[&*DI]; - LI->addRange(LiveRange(DefIdx, DefIdx+1, DeadVN)); - LI->addKill(DeadVN, DefIdx); + LI->addRange(LiveRange(DefIdx, LIs->getNextSlot(DefIdx), DeadVN)); + DeadVN->addKill(DefIdx); + } + + // Update kill markers. + for (LiveInterval::vni_iterator VI = LI->vni_begin(), VE = LI->vni_end(); + VI != VE; ++VI) { + VNInfo* VNI = *VI; + for (unsigned i = 0, e = VNI->kills.size(); i != e; ++i) { + LiveIndex KillIdx = VNI->kills[i]; + if (KillIdx.isPHIIndex()) + continue; + MachineInstr *KillMI = LIs->getInstructionFromIndex(KillIdx); + if (KillMI) { + MachineOperand *KillMO = KillMI->findRegisterUseOperand(CurrLI->reg); + if (KillMO) + // It could be a dead def. + KillMO->setIsKill(); + } + } } } @@ -801,14 +824,16 @@ void PreAllocSplitting::RenumberValno(VNInfo* VN) { VNsToCopy.push_back(OldVN); // Locate two-address redefinitions - for (SmallVector<unsigned, 4>::iterator KI = OldVN->kills.begin(), + for (VNInfo::KillSet::iterator KI = OldVN->kills.begin(), KE = OldVN->kills.end(); KI != KE; ++KI) { + assert(!KI->isPHIIndex() && + "VN previously reported having no PHI kills."); MachineInstr* MI = LIs->getInstructionFromIndex(*KI); unsigned DefIdx = MI->findRegisterDefOperandIdx(CurrLI->reg); if (DefIdx == ~0U) continue; if (MI->isRegTiedToUseOperand(DefIdx)) { VNInfo* NextVN = - CurrLI->findDefinedVNInfo(LiveIntervals::getDefIndex(*KI)); + CurrLI->findDefinedVNInfoForRegInt(LIs->getDefIndex(*KI)); if (NextVN == OldVN) continue; Stack.push_back(NextVN); } @@ -840,10 +865,10 @@ void PreAllocSplitting::RenumberValno(VNInfo* VN) { for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(CurrLI->reg), E = MRI->reg_end(); I != E; ++I) { MachineOperand& MO = I.getOperand(); - unsigned InstrIdx = LIs->getInstructionIndex(&*I); + LiveIndex InstrIdx = LIs->getInstructionIndex(&*I); - if ((MO.isUse() && NewLI.liveAt(LiveIntervals::getUseIndex(InstrIdx))) || - (MO.isDef() && NewLI.liveAt(LiveIntervals::getDefIndex(InstrIdx)))) + if ((MO.isUse() && NewLI.liveAt(LIs->getUseIndex(InstrIdx))) || + (MO.isDef() && NewLI.liveAt(LIs->getDefIndex(InstrIdx)))) OpsToChange.push_back(std::make_pair(&*I, I.getOperandNo())); } @@ -865,15 +890,15 @@ void PreAllocSplitting::RenumberValno(VNInfo* VN) { NumRenumbers++; } -bool PreAllocSplitting::Rematerialize(unsigned vreg, VNInfo* ValNo, +bool PreAllocSplitting::Rematerialize(unsigned VReg, VNInfo* ValNo, MachineInstr* DefMI, MachineBasicBlock::iterator RestorePt, - unsigned RestoreIdx, + LiveIndex RestoreIdx, SmallPtrSet<MachineInstr*, 4>& RefsInMBB) { MachineBasicBlock& MBB = *RestorePt->getParent(); MachineBasicBlock::iterator KillPt = BarrierMBB->end(); - unsigned KillIdx = 0; + LiveIndex KillIdx; if (!ValNo->isDefAccurate() || DefMI->getParent() == BarrierMBB) KillPt = findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB, KillIdx); else @@ -882,13 +907,13 @@ bool PreAllocSplitting::Rematerialize(unsigned vreg, VNInfo* ValNo, if (KillPt == DefMI->getParent()->end()) return false; - TII->reMaterialize(MBB, RestorePt, vreg, DefMI); + TII->reMaterialize(MBB, RestorePt, VReg, 0, DefMI); LIs->InsertMachineInstrInMaps(prior(RestorePt), RestoreIdx); ReconstructLiveInterval(CurrLI); - unsigned RematIdx = LIs->getInstructionIndex(prior(RestorePt)); - RematIdx = LiveIntervals::getDefIndex(RematIdx); - RenumberValno(CurrLI->findDefinedVNInfo(RematIdx)); + LiveIndex RematIdx = LIs->getInstructionIndex(prior(RestorePt)); + RematIdx = LIs->getDefIndex(RematIdx); + RenumberValno(CurrLI->findDefinedVNInfoForRegInt(RematIdx)); ++NumSplits; ++NumRemats; @@ -943,7 +968,8 @@ MachineInstr* PreAllocSplitting::FoldSpill(unsigned vreg, if (CurrSLI->hasAtLeastOneValue()) CurrSValNo = CurrSLI->getValNumInfo(0); else - CurrSValNo = CurrSLI->getNextValue(0, 0, false, LSs->getVNInfoAllocator()); + CurrSValNo = CurrSLI->getNextValue(LiveIndex(), 0, false, + LSs->getVNInfoAllocator()); } return FMI; @@ -1033,11 +1059,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) { CurrLI->FindLiveRangeContaining(LIs->getUseIndex(BarrierIdx)); VNInfo *ValNo = LR->valno; - if (ValNo->isUnused()) { - // Defined by a dead def? How can this be? - assert(0 && "Val# is defined by a dead def?"); - abort(); - } + assert(!ValNo->isUnused() && "Val# is defined by a dead def?"); MachineInstr *DefMI = ValNo->isDefAccurate() ? LIs->getInstructionFromIndex(ValNo->def) : NULL; @@ -1056,7 +1078,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) { } // Find a point to restore the value after the barrier. - unsigned RestoreIndex = 0; + LiveIndex RestoreIndex; MachineBasicBlock::iterator RestorePt = findRestorePoint(BarrierMBB, Barrier, LR->end, RefsInMBB, RestoreIndex); if (RestorePt == BarrierMBB->end()) @@ -1070,7 +1092,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) { // Add a spill either before the barrier or after the definition. MachineBasicBlock *DefMBB = DefMI ? DefMI->getParent() : NULL; const TargetRegisterClass *RC = MRI->getRegClass(CurrLI->reg); - unsigned SpillIndex = 0; + LiveIndex SpillIndex; MachineInstr *SpillMI = NULL; int SS = -1; if (!ValNo->isDefAccurate()) { @@ -1098,7 +1120,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) { return false; // Def is dead. Do nothing. if ((SpillMI = FoldSpill(LI->reg, RC, DefMI, Barrier, - BarrierMBB, SS, RefsInMBB))) { + BarrierMBB, SS, RefsInMBB))) { SpillIndex = LIs->getInstructionIndex(SpillMI); } else { // Check if it's possible to insert a spill after the def MI. @@ -1114,11 +1136,9 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) { if (SpillPt == DefMBB->end()) return false; // No gap to insert spill. } - // Add spill. The store instruction kills the register if def is before - // the barrier in the barrier block. + // Add spill. SS = CreateSpillStackSlot(CurrLI->reg, RC); - TII->storeRegToStackSlot(*DefMBB, SpillPt, CurrLI->reg, - DefMBB == BarrierMBB, SS, RC); + TII->storeRegToStackSlot(*DefMBB, SpillPt, CurrLI->reg, false, SS, RC); SpillMI = prior(SpillPt); LIs->InsertMachineInstrInMaps(SpillMI, SpillIndex); } @@ -1142,15 +1162,15 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) { } // Update spill stack slot live interval. - UpdateSpillSlotInterval(ValNo, LIs->getUseIndex(SpillIndex)+1, + UpdateSpillSlotInterval(ValNo, LIs->getNextSlot(LIs->getUseIndex(SpillIndex)), LIs->getDefIndex(RestoreIndex)); ReconstructLiveInterval(CurrLI); - + if (!FoldedRestore) { - unsigned RestoreIdx = LIs->getInstructionIndex(prior(RestorePt)); - RestoreIdx = LiveIntervals::getDefIndex(RestoreIdx); - RenumberValno(CurrLI->findDefinedVNInfo(RestoreIdx)); + LiveIndex RestoreIdx = LIs->getInstructionIndex(prior(RestorePt)); + RestoreIdx = LIs->getDefIndex(RestoreIdx); + RenumberValno(CurrLI->findDefinedVNInfoForRegInt(RestoreIdx)); } ++NumSplits; @@ -1189,8 +1209,6 @@ PreAllocSplitting::SplitRegLiveIntervals(const TargetRegisterClass **RCs, while (!Intervals.empty()) { if (PreSplitLimit != -1 && (int)NumSplits == PreSplitLimit) break; - else if (NumSplits == 4) - Change |= Change; LiveInterval *LI = Intervals.back(); Intervals.pop_back(); bool result = SplitRegLiveInterval(LI); @@ -1236,8 +1254,8 @@ bool PreAllocSplitting::removeDeadSpills(SmallPtrSet<LiveInterval*, 8>& split) { // reaching definition (VNInfo). for (MachineRegisterInfo::use_iterator UI = MRI->use_begin((*LI)->reg), UE = MRI->use_end(); UI != UE; ++UI) { - unsigned index = LIs->getInstructionIndex(&*UI); - index = LiveIntervals::getUseIndex(index); + LiveIndex index = LIs->getInstructionIndex(&*UI); + index = LIs->getUseIndex(index); const LiveRange* LR = (*LI)->getLiveRangeContaining(index); VNUseCount[LR->valno].insert(&*UI); @@ -1386,7 +1404,7 @@ bool PreAllocSplitting::createsNewJoin(LiveRange* LR, if (LR->valno->hasPHIKill()) return false; - unsigned MBBEnd = LIs->getMBBEndIdx(BarrierMBB); + LiveIndex MBBEnd = LIs->getMBBEndIdx(BarrierMBB); if (LR->end < MBBEnd) return false; diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index 9e7ad67..8793df7 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -31,7 +31,9 @@ #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetFrameInfo.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" +#include "llvm/ADT/IndexedMap.h" #include "llvm/ADT/STLExtras.h" #include <climits> @@ -51,22 +53,26 @@ FunctionPass *llvm::createPrologEpilogCodeInserter() { return new PEI(); } /// frame indexes with appropriate references. /// bool PEI::runOnMachineFunction(MachineFunction &Fn) { + const Function* F = Fn.getFunction(); const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : NULL; + FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn); // Get MachineModuleInfo so that we can track the construction of the // frame. if (MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>()) Fn.getFrameInfo()->setMachineModuleInfo(MMI); + // Calculate the MaxCallFrameSize and HasCalls variables for the function's + // frame information. Also eliminates call frame pseudo instructions. + calculateCallsInformation(Fn); + // Allow the target machine to make some adjustments to the function // e.g. UsedPhysRegs before calculateCalleeSavedRegisters. TRI->processFunctionBeforeCalleeSavedScan(Fn, RS); - // Scan the function for modified callee saved registers and insert spill - // code for any callee saved registers that are modified. Also calculate - // the MaxCallFrameSize and HasCalls variables for the function's frame - // information and eliminates call frame pseudo instructions. + // Scan the function for modified callee saved registers and insert spill code + // for any callee saved registers that are modified. calculateCalleeSavedRegisters(Fn); // Determine placement of CSR spill/restore code: @@ -78,7 +84,8 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { placeCSRSpillsAndRestores(Fn); // Add the code to save and restore the callee saved registers - insertCSRSpillsAndRestores(Fn); + if (!F->hasFnAttr(Attribute::Naked)) + insertCSRSpillsAndRestores(Fn); // Allow the target machine to make final modifications to the function // before the frame layout is finalized. @@ -92,13 +99,20 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { // called functions. Because of this, calculateCalleeSavedRegisters // must be called before this function in order to set the HasCalls // and MaxCallFrameSize variables. - insertPrologEpilogCode(Fn); + if (!F->hasFnAttr(Attribute::Naked)) + insertPrologEpilogCode(Fn); // Replace all MO_FrameIndex operands with physical register references // and actual offsets. // replaceFrameIndices(Fn); + // If register scavenging is needed, as we've enabled doing it as a + // post-pass, scavenge the virtual registers that frame index elimiation + // inserted. + if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging) + scavengeFrameVirtualRegs(Fn); + delete RS; clearAllSets(); return true; @@ -117,35 +131,24 @@ void PEI::getAnalysisUsage(AnalysisUsage &AU) const { } #endif -/// calculateCalleeSavedRegisters - Scan the function for modified callee saved -/// registers. Also calculate the MaxCallFrameSize and HasCalls variables for -/// the function's frame information and eliminates call frame pseudo -/// instructions. -/// -void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { +/// calculateCallsInformation - Calculate the MaxCallFrameSize and HasCalls +/// variables for the function's frame information and eliminate call frame +/// pseudo instructions. +void PEI::calculateCallsInformation(MachineFunction &Fn) { const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); - const TargetFrameInfo *TFI = Fn.getTarget().getFrameInfo(); - // Get the callee saved register list... - const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(&Fn); + unsigned MaxCallFrameSize = 0; + bool HasCalls = false; // Get the function call frame set-up and tear-down instruction opcode int FrameSetupOpcode = RegInfo->getCallFrameSetupOpcode(); int FrameDestroyOpcode = RegInfo->getCallFrameDestroyOpcode(); - // These are used to keep track the callee-save area. Initialize them. - MinCSFrameIndex = INT_MAX; - MaxCSFrameIndex = 0; - - // Early exit for targets which have no callee saved registers and no call - // frame setup/destroy pseudo instructions. - if ((CSRegs == 0 || CSRegs[0] == 0) && - FrameSetupOpcode == -1 && FrameDestroyOpcode == -1) + // Early exit for targets which have no call frame setup/destroy pseudo + // instructions. + if (FrameSetupOpcode == -1 && FrameDestroyOpcode == -1) return; - unsigned MaxCallFrameSize = 0; - bool HasCalls = false; - std::vector<MachineBasicBlock::iterator> FrameSDOps; for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) @@ -157,31 +160,57 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { if (Size > MaxCallFrameSize) MaxCallFrameSize = Size; HasCalls = true; FrameSDOps.push_back(I); + } else if (I->getOpcode() == TargetInstrInfo::INLINEASM) { + // An InlineAsm might be a call; assume it is to get the stack frame + // aligned correctly for calls. + HasCalls = true; } MachineFrameInfo *FFI = Fn.getFrameInfo(); FFI->setHasCalls(HasCalls); FFI->setMaxCallFrameSize(MaxCallFrameSize); - for (unsigned i = 0, e = FrameSDOps.size(); i != e; ++i) { - MachineBasicBlock::iterator I = FrameSDOps[i]; - // If call frames are not being included as part of the stack frame, - // and there is no dynamic allocation (therefore referencing frame slots - // off sp), leave the pseudo ops alone. We'll eliminate them later. + for (std::vector<MachineBasicBlock::iterator>::iterator + i = FrameSDOps.begin(), e = FrameSDOps.end(); i != e; ++i) { + MachineBasicBlock::iterator I = *i; + + // If call frames are not being included as part of the stack frame, and + // there is no dynamic allocation (therefore referencing frame slots off + // sp), leave the pseudo ops alone. We'll eliminate them later. if (RegInfo->hasReservedCallFrame(Fn) || RegInfo->hasFP(Fn)) RegInfo->eliminateCallFramePseudoInstr(Fn, *I->getParent(), I); } +} + + +/// calculateCalleeSavedRegisters - Scan the function for modified callee saved +/// registers. +void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { + const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); + const TargetFrameInfo *TFI = Fn.getTarget().getFrameInfo(); + MachineFrameInfo *FFI = Fn.getFrameInfo(); + + // Get the callee saved register list... + const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(&Fn); + + // These are used to keep track the callee-save area. Initialize them. + MinCSFrameIndex = INT_MAX; + MaxCSFrameIndex = 0; + + // Early exit for targets which have no callee saved registers. + if (CSRegs == 0 || CSRegs[0] == 0) + return; - // Now figure out which *callee saved* registers are modified by the current + // Figure out which *callee saved* registers are modified by the current // function, thus needing to be saved and restored in the prolog/epilog. - // - const TargetRegisterClass* const *CSRegClasses = + const TargetRegisterClass * const *CSRegClasses = RegInfo->getCalleeSavedRegClasses(&Fn); + std::vector<CalleeSavedInfo> CSI; for (unsigned i = 0; CSRegs[i]; ++i) { unsigned Reg = CSRegs[i]; if (Fn.getRegInfo().isPhysRegUsed(Reg)) { - // If the reg is modified, save it! + // If the reg is modified, save it! CSI.push_back(CalleeSavedInfo(Reg, CSRegClasses[i])); } else { for (const unsigned *AliasSet = RegInfo->getAliasSet(Reg); @@ -198,39 +227,47 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { return; // Early exit if no callee saved registers are modified! unsigned NumFixedSpillSlots; - const std::pair<unsigned,int> *FixedSpillSlots = + const TargetFrameInfo::SpillSlot *FixedSpillSlots = TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots); // Now that we know which registers need to be saved and restored, allocate // stack slots for them. - for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - unsigned Reg = CSI[i].getReg(); - const TargetRegisterClass *RC = CSI[i].getRegClass(); + for (std::vector<CalleeSavedInfo>::iterator + I = CSI.begin(), E = CSI.end(); I != E; ++I) { + unsigned Reg = I->getReg(); + const TargetRegisterClass *RC = I->getRegClass(); + + int FrameIdx; + if (RegInfo->hasReservedSpillSlot(Fn, Reg, FrameIdx)) { + I->setFrameIdx(FrameIdx); + continue; + } // Check to see if this physreg must be spilled to a particular stack slot // on this target. - const std::pair<unsigned,int> *FixedSlot = FixedSpillSlots; + const TargetFrameInfo::SpillSlot *FixedSlot = FixedSpillSlots; while (FixedSlot != FixedSpillSlots+NumFixedSpillSlots && - FixedSlot->first != Reg) + FixedSlot->Reg != Reg) ++FixedSlot; - int FrameIdx; - if (FixedSlot == FixedSpillSlots+NumFixedSpillSlots) { + if (FixedSlot == FixedSpillSlots + NumFixedSpillSlots) { // Nope, just spill it anywhere convenient. unsigned Align = RC->getAlignment(); unsigned StackAlign = TFI->getStackAlignment(); - // We may not be able to sastify the desired alignment specification of - // the TargetRegisterClass if the stack alignment is smaller. - // Use the min. + + // We may not be able to satisfy the desired alignment specification of + // the TargetRegisterClass if the stack alignment is smaller. Use the + // min. Align = std::min(Align, StackAlign); FrameIdx = FFI->CreateStackObject(RC->getSize(), Align); if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx; if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx; } else { // Spill it to the stack where we must. - FrameIdx = FFI->CreateFixedObject(RC->getSize(), FixedSlot->second); + FrameIdx = FFI->CreateFixedObject(RC->getSize(), FixedSlot->Offset); } - CSI[i].setFrameIdx(FrameIdx); + + I->setFrameIdx(FrameIdx); } FFI->setCalleeSavedInfo(CSI); @@ -244,6 +281,8 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { MachineFrameInfo *FFI = Fn.getFrameInfo(); const std::vector<CalleeSavedInfo> &CSI = FFI->getCalleeSavedInfo(); + FFI->setCalleeSavedInfoValid(true); + // Early exit if no callee saved registers are modified! if (CSI.empty()) return; @@ -403,8 +442,7 @@ static inline void AdjustStackOffset(MachineFrameInfo *FFI, int FrameIdx, bool StackGrowsDown, int64_t &Offset, unsigned &MaxAlign) { - // If stack grows down, we need to add size of find the lowest address of the - // object. + // If the stack grows down, add the object size to find the lowest address. if (StackGrowsDown) Offset += FFI->getObjectSize(FrameIdx); @@ -437,16 +475,17 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // Loop over all of the stack objects, assigning sequential addresses... MachineFrameInfo *FFI = Fn.getFrameInfo(); - unsigned MaxAlign = FFI->getMaxAlignment(); + unsigned MaxAlign = 1; // Start at the beginning of the local area. // The Offset is the distance from the stack top in the direction // of stack growth -- so it's always nonnegative. - int64_t Offset = TFI.getOffsetOfLocalArea(); + int LocalAreaOffset = TFI.getOffsetOfLocalArea(); if (StackGrowsDown) - Offset = -Offset; - assert(Offset >= 0 + LocalAreaOffset = -LocalAreaOffset; + assert(LocalAreaOffset >= 0 && "Local area offset should be in direction of stack growth"); + int64_t Offset = LocalAreaOffset; // If there are fixed sized objects that are preallocated in the local area, // non-fixed objects can't be allocated right at the start of local area. @@ -538,32 +577,38 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { AdjustStackOffset(FFI, SFI, StackGrowsDown, Offset, MaxAlign); } - // Round up the size to a multiple of the alignment, but only if there are - // calls or alloca's in the function. This ensures that any calls to - // subroutines have their stack frames suitable aligned. - // Also do this if we need runtime alignment of the stack. In this case - // offsets will be relative to SP not FP; round up the stack size so this - // works. - if (!RegInfo->targetHandlesStackFrameRounding() && - (FFI->hasCalls() || FFI->hasVarSizedObjects() || - (RegInfo->needsStackRealignment(Fn) && - FFI->getObjectIndexEnd() != 0))) { + if (!RegInfo->targetHandlesStackFrameRounding()) { // If we have reserved argument space for call sites in the function // immediately on entry to the current function, count it as part of the // overall stack size. - if (RegInfo->hasReservedCallFrame(Fn)) + if (FFI->hasCalls() && RegInfo->hasReservedCallFrame(Fn)) Offset += FFI->getMaxCallFrameSize(); - unsigned AlignMask = std::max(TFI.getStackAlignment(),MaxAlign) - 1; + // Round up the size to a multiple of the alignment. If the function has + // any calls or alloca's, align to the target's StackAlignment value to + // ensure that the callee's frame or the alloca data is suitably aligned; + // otherwise, for leaf functions, align to the TransientStackAlignment + // value. + unsigned StackAlign; + if (FFI->hasCalls() || FFI->hasVarSizedObjects() || + (RegInfo->needsStackRealignment(Fn) && FFI->getObjectIndexEnd() != 0)) + StackAlign = TFI.getStackAlignment(); + else + StackAlign = TFI.getTransientStackAlignment(); + // If the frame pointer is eliminated, all frame offsets will be relative + // to SP not FP; align to MaxAlign so this works. + StackAlign = std::max(StackAlign, MaxAlign); + unsigned AlignMask = StackAlign - 1; Offset = (Offset + AlignMask) & ~uint64_t(AlignMask); } // Update frame info to pretend that this is part of the stack... - FFI->setStackSize(Offset+TFI.getOffsetOfLocalArea()); + FFI->setStackSize(Offset - LocalAreaOffset); // Remember the required stack alignment in case targets need it to perform // dynamic stack alignment. - FFI->setMaxAlignment(MaxAlign); + if (MaxAlign > FFI->getMaxAlignment()) + FFI->setMaxAlignment(MaxAlign); } @@ -604,14 +649,9 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { int SPAdj = 0; // SP offset due to call frame setup / destroy. - if (RS) RS->enterBasicBlock(BB); + if (RS && !FrameIndexVirtualScavenging) RS->enterBasicBlock(BB); for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { - if (I->getOpcode() == TargetInstrInfo::DECLARE) { - // Ignore it. - ++I; - continue; - } if (I->getOpcode() == FrameSetupOpcode || I->getOpcode() == FrameDestroyOpcode) { @@ -654,8 +694,16 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { // If this instruction has a FrameIndex operand, we need to // use that target machine register info object to eliminate // it. - - TRI.eliminateFrameIndex(MI, SPAdj, RS); + int Value; + unsigned VReg = + TRI.eliminateFrameIndex(MI, SPAdj, &Value, + FrameIndexVirtualScavenging ? NULL : RS); + if (VReg) { + assert (FrameIndexVirtualScavenging && + "Not scavenging, but virtual returned from " + "eliminateFrameIndex()!"); + FrameConstantRegMap[VReg] = FrameConstantEntry(Value, SPAdj); + } // Reset the iterator if we were at the beginning of the BB. if (AtBeginning) { @@ -670,10 +718,170 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { if (DoIncr && I != BB->end()) ++I; // Update register states. - if (RS && MI) RS->forward(MI); + if (RS && !FrameIndexVirtualScavenging && MI) RS->forward(MI); } assert(SPAdj == 0 && "Unbalanced call frame setup / destroy pairs?"); } } +/// findLastUseReg - find the killing use of the specified register within +/// the instruciton range. Return the operand number of the kill in Operand. +static MachineBasicBlock::iterator +findLastUseReg(MachineBasicBlock::iterator I, MachineBasicBlock::iterator ME, + unsigned Reg, unsigned *Operand) { + // Scan forward to find the last use of this virtual register + for (++I; I != ME; ++I) { + MachineInstr *MI = I; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) + if (MI->getOperand(i).isReg()) { + unsigned OpReg = MI->getOperand(i).getReg(); + if (OpReg == 0 || !TargetRegisterInfo::isVirtualRegister(OpReg)) + continue; + assert (OpReg == Reg + && "overlapping use of scavenged index register!"); + // If this is the killing use, we're done + if (MI->getOperand(i).isKill()) { + if (Operand) + *Operand = i; + return I; + } + } + } + // If we hit the end of the basic block, there was no kill of + // the virtual register, which is wrong. + assert (0 && "scavenged index register never killed!"); + return ME; +} + +/// scavengeFrameVirtualRegs - Replace all frame index virtual registers +/// with physical registers. Use the register scavenger to find an +/// appropriate register to use. +void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) { + // Run through the instructions and find any virtual registers. + for (MachineFunction::iterator BB = Fn.begin(), + E = Fn.end(); BB != E; ++BB) { + RS->enterBasicBlock(BB); + + unsigned CurrentVirtReg = 0; + unsigned CurrentScratchReg = 0; + bool havePrevValue = false; + unsigned PrevScratchReg = 0; + int PrevValue; + MachineInstr *PrevLastUseMI = NULL; + unsigned PrevLastUseOp = 0; + bool trackingCurrentValue = false; + int SPAdj = 0; + int Value = 0; + + // The instruction stream may change in the loop, so check BB->end() + // directly. + for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) { + MachineInstr *MI = I; + // Likewise, call getNumOperands() each iteration, as the MI may change + // inside the loop (with 'i' updated accordingly). + for (unsigned i = 0; i != MI->getNumOperands(); ++i) + if (MI->getOperand(i).isReg()) { + MachineOperand &MO = MI->getOperand(i); + unsigned Reg = MO.getReg(); + if (Reg == 0) + continue; + if (!TargetRegisterInfo::isVirtualRegister(Reg)) { + // If we have an active scavenged register, we shouldn't be + // seeing any references to it. + assert (Reg != CurrentScratchReg + && "overlapping use of scavenged frame index register!"); + + // If we have a previous scratch reg, check and see if anything + // here kills whatever value is in there. + if (Reg == PrevScratchReg) { + if (MO.isUse()) { + // Two-address operands implicitly kill + if (MO.isKill() || MI->isRegTiedToDefOperand(i)) { + havePrevValue = false; + PrevScratchReg = 0; + } + } else { + assert (MO.isDef()); + havePrevValue = false; + PrevScratchReg = 0; + } + } + continue; + } + + // Have we already allocated a scratch register for this virtual? + if (Reg != CurrentVirtReg) { + // When we first encounter a new virtual register, it + // must be a definition. + assert(MI->getOperand(i).isDef() && + "frame index virtual missing def!"); + // We can't have nested virtual register live ranges because + // there's only a guarantee of one scavenged register at a time. + assert (CurrentVirtReg == 0 && + "overlapping frame index virtual registers!"); + + // If the target gave us information about what's in the register, + // we can use that to re-use scratch regs. + DenseMap<unsigned, FrameConstantEntry>::iterator Entry = + FrameConstantRegMap.find(Reg); + trackingCurrentValue = Entry != FrameConstantRegMap.end(); + if (trackingCurrentValue) { + SPAdj = (*Entry).second.second; + Value = (*Entry).second.first; + } else + SPAdj = Value = 0; + + // If the scratch register from the last allocation is still + // available, see if the value matches. If it does, just re-use it. + if (trackingCurrentValue && havePrevValue && PrevValue == Value) { + // FIXME: This assumes that the instructions in the live range + // for the virtual register are exclusively for the purpose + // of populating the value in the register. That's reasonable + // for these frame index registers, but it's still a very, very + // strong assumption. Perhaps this implies that the frame index + // elimination should be before register allocation, with + // conservative heuristics since we'll know less then, and + // the reuse calculations done directly when doing the code-gen? + + // Find the last use of the new virtual register. Remove all + // instruction between here and there, and update the current + // instruction to reference the last use insn instead. + MachineBasicBlock::iterator LastUseMI = + findLastUseReg(I, BB->end(), Reg, &i); + // Remove all instructions up 'til the last use, since they're + // just calculating the value we already have. + BB->erase(I, LastUseMI); + MI = I = LastUseMI; + + CurrentScratchReg = PrevScratchReg; + // Extend the live range of the register + PrevLastUseMI->getOperand(PrevLastUseOp).setIsKill(false); + RS->setUsed(CurrentScratchReg); + } else { + CurrentVirtReg = Reg; + const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(Reg); + CurrentScratchReg = RS->FindUnusedReg(RC); + if (CurrentScratchReg == 0) + // No register is "free". Scavenge a register. + CurrentScratchReg = RS->scavengeRegister(RC, I, SPAdj); + + PrevValue = Value; + } + } + assert (CurrentScratchReg && "Missing scratch register!"); + MI->getOperand(i).setReg(CurrentScratchReg); + + // If this is the last use of the register, stop tracking it. + if (MI->getOperand(i).isKill()) { + PrevScratchReg = CurrentScratchReg; + PrevLastUseMI = MI; + PrevLastUseOp = i; + CurrentScratchReg = CurrentVirtReg = 0; + havePrevValue = trackingCurrentValue; + } + } + RS->forward(MI); + } + } +} diff --git a/lib/CodeGen/PrologEpilogInserter.h b/lib/CodeGen/PrologEpilogInserter.h index c158dd8..931f1eb 100644 --- a/lib/CodeGen/PrologEpilogInserter.h +++ b/lib/CodeGen/PrologEpilogInserter.h @@ -27,6 +27,7 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/ADT/SparseBitVector.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/Target/TargetRegisterInfo.h" namespace llvm { class RegScavenger; @@ -93,6 +94,17 @@ namespace llvm { // functions. bool ShrinkWrapThisFunction; + // Flag to control whether to use the register scavenger to resolve + // frame index materialization registers. Set according to + // TRI->requiresFrameIndexScavenging() for the curren function. + bool FrameIndexVirtualScavenging; + + // When using the scavenger post-pass to resolve frame reference + // materialization registers, maintain a map of the registers to + // the constant value and SP adjustment associated with it. + typedef std::pair<int, int> FrameConstantEntry; + DenseMap<unsigned, FrameConstantEntry> FrameConstantRegMap; + #ifndef NDEBUG // Machine function handle. MachineFunction* MF; @@ -118,10 +130,12 @@ namespace llvm { CSRegBlockMap &prevRestores); void placeSpillsAndRestores(MachineFunction &Fn); void placeCSRSpillsAndRestores(MachineFunction &Fn); + void calculateCallsInformation(MachineFunction &Fn); void calculateCalleeSavedRegisters(MachineFunction &Fn); void insertCSRSpillsAndRestores(MachineFunction &Fn); void calculateFrameObjectOffsets(MachineFunction &Fn); void replaceFrameIndices(MachineFunction &Fn); + void scavengeFrameVirtualRegs(MachineFunction &Fn); void insertPrologEpilogCode(MachineFunction &Fn); // Initialize DFA sets, called before iterations. diff --git a/lib/CodeGen/PseudoSourceValue.cpp b/lib/CodeGen/PseudoSourceValue.cpp index b4c20e6..00c5d46 100644 --- a/lib/CodeGen/PseudoSourceValue.cpp +++ b/lib/CodeGen/PseudoSourceValue.cpp @@ -15,6 +15,7 @@ #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/DerivedTypes.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/raw_ostream.h" #include <map> @@ -38,15 +39,16 @@ static const char *const PSVNames[] = { "ConstantPool" }; +// FIXME: THIS IS A HACK!!!! +// Eventually these should be uniqued on LLVMContext rather than in a managed +// static. For now, we can safely use the global context for the time being to +// squeak by. PseudoSourceValue::PseudoSourceValue() : - Value(PointerType::getUnqual(Type::Int8Ty), PseudoSourceValueVal) {} + Value(Type::getInt8PtrTy(getGlobalContext()), + PseudoSourceValueVal) {} -void PseudoSourceValue::dump() const { - print(errs()); errs() << '\n'; -} - -void PseudoSourceValue::print(raw_ostream &OS) const { - OS << PSVNames[this - *PSVs]; +void PseudoSourceValue::printCustom(raw_ostream &O) const { + O << PSVNames[this - *PSVs]; } namespace { @@ -61,7 +63,7 @@ namespace { virtual bool isConstant(const MachineFrameInfo *MFI) const; - virtual void print(raw_ostream &OS) const { + virtual void printCustom(raw_ostream &OS) const { OS << "FixedStack" << FI; } }; @@ -83,7 +85,7 @@ bool PseudoSourceValue::isConstant(const MachineFrameInfo *) const { this == getConstantPool() || this == getJumpTable()) return true; - assert(0 && "Unknown PseudoSourceValue!"); + llvm_unreachable("Unknown PseudoSourceValue!"); return false; } diff --git a/lib/CodeGen/README.txt b/lib/CodeGen/README.txt index 64374ce..b655dda4 100644 --- a/lib/CodeGen/README.txt +++ b/lib/CodeGen/README.txt @@ -30,44 +30,6 @@ It also increase the likelyhood the store may become dead. //===---------------------------------------------------------------------===// -I think we should have a "hasSideEffects" flag (which is automatically set for -stuff that "isLoad" "isCall" etc), and the remat pass should eventually be able -to remat any instruction that has no side effects, if it can handle it and if -profitable. - -For now, I'd suggest having the remat stuff work like this: - -1. I need to spill/reload this thing. -2. Check to see if it has side effects. -3. Check to see if it is simple enough: e.g. it only has one register -destination and no register input. -4. If so, clone the instruction, do the xform, etc. - -Advantages of this are: - -1. the .td file describes the behavior of the instructions, not the way the - algorithm should work. -2. as remat gets smarter in the future, we shouldn't have to be changing the .td - files. -3. it is easier to explain what the flag means in the .td file, because you - don't have to pull in the explanation of how the current remat algo works. - -Some potential added complexities: - -1. Some instructions have to be glued to it's predecessor or successor. All of - the PC relative instructions and condition code setting instruction. We could - mark them as hasSideEffects, but that's not quite right. PC relative loads - from constantpools can be remat'ed, for example. But it requires more than - just cloning the instruction. Some instructions can be remat'ed but it - expands to more than one instruction. But allocator will have to make a - decision. - -4. As stated in 3, not as simple as cloning in some cases. The target will have - to decide how to remat it. For example, an ARM 2-piece constant generation - instruction is remat'ed as a load from constantpool. - -//===---------------------------------------------------------------------===// - bb27 ... ... %reg1037 = ADDri %reg1039, 1 @@ -206,3 +168,32 @@ Stack coloring improvments: not spill slots. 2. Reorder objects to fill in gaps between objects. e.g. 4, 1, <gap>, 4, 1, 1, 1, <gap>, 4 => 4, 1, 1, 1, 1, 4, 4 + +//===---------------------------------------------------------------------===// + +The scheduler should be able to sort nearby instructions by their address. For +example, in an expanded memset sequence it's not uncommon to see code like this: + + movl $0, 4(%rdi) + movl $0, 8(%rdi) + movl $0, 12(%rdi) + movl $0, 0(%rdi) + +Each of the stores is independent, and the scheduler is currently making an +arbitrary decision about the order. + +//===---------------------------------------------------------------------===// + +Another opportunitiy in this code is that the $0 could be moved to a register: + + movl $0, 4(%rdi) + movl $0, 8(%rdi) + movl $0, 12(%rdi) + movl $0, 0(%rdi) + +This would save substantial code size, especially for longer sequences like +this. It would be easy to have a rule telling isel to avoid matching MOV32mi +if the immediate has more than some fixed number of uses. It's more involved +to teach the register allocator how to do late folding to recover from +excessive register pressure. + diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp index 904b4cb..5d58ea9 100644 --- a/lib/CodeGen/RegAllocLinearScan.cpp +++ b/lib/CodeGen/RegAllocLinearScan.cpp @@ -33,8 +33,10 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include <algorithm> #include <set> #include <queue> @@ -142,6 +144,7 @@ namespace { } virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); AU.addRequired<LiveIntervals>(); if (StrongPHIElim) AU.addRequiredID(StrongPHIEliminationID); @@ -173,11 +176,11 @@ namespace { /// processActiveIntervals - expire old intervals and move non-overlapping /// ones to the inactive list. - void processActiveIntervals(unsigned CurPoint); + void processActiveIntervals(LiveIndex CurPoint); /// processInactiveIntervals - expire old intervals and move overlapping /// ones to the active list. - void processInactiveIntervals(unsigned CurPoint); + void processInactiveIntervals(LiveIndex CurPoint); /// hasNextReloadInterval - Return the next liveinterval that's being /// defined by a reload from the same SS as the specified one. @@ -230,12 +233,12 @@ namespace { bool Error = false; for (unsigned i = 0, e = tri_->getNumRegs(); i != e; ++i) { if (regUse_[i] != 0) { - cerr << tri_->getName(i) << " is still in use!\n"; + errs() << tri_->getName(i) << " is still in use!\n"; Error = true; } } if (Error) - abort(); + llvm_unreachable(0); #endif regUse_.clear(); regUseBackUp_.clear(); @@ -295,15 +298,20 @@ namespace { template <typename ItTy> void printIntervals(const char* const str, ItTy i, ItTy e) const { - if (str) DOUT << str << " intervals:\n"; - for (; i != e; ++i) { - DOUT << "\t" << *i->first << " -> "; - unsigned reg = i->first->reg; - if (TargetRegisterInfo::isVirtualRegister(reg)) { - reg = vrm_->getPhys(reg); - } - DOUT << tri_->getName(reg) << '\n'; - } + DEBUG({ + if (str) + errs() << str << " intervals:\n"; + + for (; i != e; ++i) { + errs() << "\t" << *i->first << " -> "; + + unsigned reg = i->first->reg; + if (TargetRegisterInfo::isVirtualRegister(reg)) + reg = vrm_->getPhys(reg); + + errs() << tri_->getName(reg) << '\n'; + } + }); } }; char RALinScan::ID = 0; @@ -358,7 +366,8 @@ unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) { return Reg; VNInfo *vni = cur.begin()->valno; - if (!vni->def || vni->isUnused() || !vni->isDefAccurate()) + if ((vni->def == LiveIndex()) || + vni->isUnused() || !vni->isDefAccurate()) return Reg; MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def); unsigned SrcReg, DstReg, SrcSubReg, DstSubReg, PhysReg; @@ -380,18 +389,18 @@ unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) { // Try to coalesce. if (!li_->conflictsWithPhysRegDef(cur, *vrm_, PhysReg)) { - DOUT << "Coalescing: " << cur << " -> " << tri_->getName(PhysReg) - << '\n'; + DEBUG(errs() << "Coalescing: " << cur << " -> " << tri_->getName(PhysReg) + << '\n'); vrm_->clearVirt(cur.reg); vrm_->assignVirt2Phys(cur.reg, PhysReg); // Remove unnecessary kills since a copy does not clobber the register. if (li_->hasInterval(SrcReg)) { LiveInterval &SrcLI = li_->getInterval(SrcReg); - for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(cur.reg), - E = mri_->reg_end(); I != E; ++I) { + for (MachineRegisterInfo::use_iterator I = mri_->use_begin(cur.reg), + E = mri_->use_end(); I != E; ++I) { MachineOperand &O = I.getOperand(); - if (!O.isUse() || !O.isKill()) + if (!O.isKill()) continue; MachineInstr *MI = &*I; if (SrcLI.liveAt(li_->getDefIndex(li_->getInstructionIndex(MI)))) @@ -478,24 +487,25 @@ void RALinScan::initIntervalSets() } } -void RALinScan::linearScan() -{ +void RALinScan::linearScan() { // linear scan algorithm - DOUT << "********** LINEAR SCAN **********\n"; - DOUT << "********** Function: " << mf_->getFunction()->getName() << '\n'; - - DEBUG(printIntervals("fixed", fixed_.begin(), fixed_.end())); + DEBUG({ + errs() << "********** LINEAR SCAN **********\n" + << "********** Function: " + << mf_->getFunction()->getName() << '\n'; + printIntervals("fixed", fixed_.begin(), fixed_.end()); + }); while (!unhandled_.empty()) { // pick the interval with the earliest start point LiveInterval* cur = unhandled_.top(); unhandled_.pop(); ++NumIters; - DOUT << "\n*** CURRENT ***: " << *cur << '\n'; + DEBUG(errs() << "\n*** CURRENT ***: " << *cur << '\n'); if (!cur->empty()) { - processActiveIntervals(cur->beginNumber()); - processInactiveIntervals(cur->beginNumber()); + processActiveIntervals(cur->beginIndex()); + processInactiveIntervals(cur->beginIndex()); assert(TargetRegisterInfo::isVirtualRegister(cur->reg) && "Can only allocate virtual registers!"); @@ -506,15 +516,17 @@ void RALinScan::linearScan() // assign it one. assignRegOrStackSlotAtInterval(cur); - DEBUG(printIntervals("active", active_.begin(), active_.end())); - DEBUG(printIntervals("inactive", inactive_.begin(), inactive_.end())); + DEBUG({ + printIntervals("active", active_.begin(), active_.end()); + printIntervals("inactive", inactive_.begin(), inactive_.end()); + }); } // Expire any remaining active intervals while (!active_.empty()) { IntervalPtr &IP = active_.back(); unsigned reg = IP.first->reg; - DOUT << "\tinterval " << *IP.first << " expired\n"; + DEBUG(errs() << "\tinterval " << *IP.first << " expired\n"); assert(TargetRegisterInfo::isVirtualRegister(reg) && "Can only allocate virtual registers!"); reg = vrm_->getPhys(reg); @@ -523,9 +535,11 @@ void RALinScan::linearScan() } // Expire any remaining inactive intervals - DEBUG(for (IntervalPtrs::reverse_iterator - i = inactive_.rbegin(); i != inactive_.rend(); ++i) - DOUT << "\tinterval " << *i->first << " expired\n"); + DEBUG({ + for (IntervalPtrs::reverse_iterator + i = inactive_.rbegin(); i != inactive_.rend(); ++i) + errs() << "\tinterval " << *i->first << " expired\n"; + }); inactive_.clear(); // Add live-ins to every BB except for entry. Also perform trivial coalescing. @@ -560,7 +574,7 @@ void RALinScan::linearScan() } } - DOUT << *vrm_; + DEBUG(errs() << *vrm_); // Look for physical registers that end up not being allocated even though // register allocator had to spill other registers in its register class. @@ -572,9 +586,9 @@ void RALinScan::linearScan() /// processActiveIntervals - expire old intervals and move non-overlapping ones /// to the inactive list. -void RALinScan::processActiveIntervals(unsigned CurPoint) +void RALinScan::processActiveIntervals(LiveIndex CurPoint) { - DOUT << "\tprocessing active intervals:\n"; + DEBUG(errs() << "\tprocessing active intervals:\n"); for (unsigned i = 0, e = active_.size(); i != e; ++i) { LiveInterval *Interval = active_[i].first; @@ -584,7 +598,7 @@ void RALinScan::processActiveIntervals(unsigned CurPoint) IntervalPos = Interval->advanceTo(IntervalPos, CurPoint); if (IntervalPos == Interval->end()) { // Remove expired intervals. - DOUT << "\t\tinterval " << *Interval << " expired\n"; + DEBUG(errs() << "\t\tinterval " << *Interval << " expired\n"); assert(TargetRegisterInfo::isVirtualRegister(reg) && "Can only allocate virtual registers!"); reg = vrm_->getPhys(reg); @@ -597,7 +611,7 @@ void RALinScan::processActiveIntervals(unsigned CurPoint) } else if (IntervalPos->start > CurPoint) { // Move inactive intervals to inactive list. - DOUT << "\t\tinterval " << *Interval << " inactive\n"; + DEBUG(errs() << "\t\tinterval " << *Interval << " inactive\n"); assert(TargetRegisterInfo::isVirtualRegister(reg) && "Can only allocate virtual registers!"); reg = vrm_->getPhys(reg); @@ -618,9 +632,9 @@ void RALinScan::processActiveIntervals(unsigned CurPoint) /// processInactiveIntervals - expire old intervals and move overlapping /// ones to the active list. -void RALinScan::processInactiveIntervals(unsigned CurPoint) +void RALinScan::processInactiveIntervals(LiveIndex CurPoint) { - DOUT << "\tprocessing inactive intervals:\n"; + DEBUG(errs() << "\tprocessing inactive intervals:\n"); for (unsigned i = 0, e = inactive_.size(); i != e; ++i) { LiveInterval *Interval = inactive_[i].first; @@ -630,7 +644,7 @@ void RALinScan::processInactiveIntervals(unsigned CurPoint) IntervalPos = Interval->advanceTo(IntervalPos, CurPoint); if (IntervalPos == Interval->end()) { // remove expired intervals. - DOUT << "\t\tinterval " << *Interval << " expired\n"; + DEBUG(errs() << "\t\tinterval " << *Interval << " expired\n"); // Pop off the end of the list. inactive_[i] = inactive_.back(); @@ -638,7 +652,7 @@ void RALinScan::processInactiveIntervals(unsigned CurPoint) --i; --e; } else if (IntervalPos->start <= CurPoint) { // move re-activated intervals in active list - DOUT << "\t\tinterval " << *Interval << " active\n"; + DEBUG(errs() << "\t\tinterval " << *Interval << " active\n"); assert(TargetRegisterInfo::isVirtualRegister(reg) && "Can only allocate virtual registers!"); reg = vrm_->getPhys(reg); @@ -699,7 +713,7 @@ FindIntervalInVector(RALinScan::IntervalPtrs &IP, LiveInterval *LI) { return IP.end(); } -static void RevertVectorIteratorsTo(RALinScan::IntervalPtrs &V, unsigned Point){ +static void RevertVectorIteratorsTo(RALinScan::IntervalPtrs &V, LiveIndex Point){ for (unsigned i = 0, e = V.size(); i != e; ++i) { RALinScan::IntervalPtr &IP = V[i]; LiveInterval::iterator I = std::upper_bound(IP.first->begin(), @@ -725,7 +739,8 @@ static void addStackInterval(LiveInterval *cur, LiveStacks *ls_, if (SI.hasAtLeastOneValue()) VNI = SI.getValNumInfo(0); else - VNI = SI.getNextValue(0, 0, false, ls_->getVNInfoAllocator()); + VNI = SI.getNextValue(LiveIndex(), 0, false, + ls_->getVNInfoAllocator()); LiveInterval &RI = li_->getInterval(cur->reg); // FIXME: This may be overly conservative. @@ -764,10 +779,12 @@ void RALinScan::findIntervalsToSpill(LiveInterval *cur, float Conflicts[3] = { 0.0f, 0.0f, 0.0f }; SmallVector<LiveInterval*, 8> SLIs[3]; - DOUT << "\tConsidering " << NumCands << " candidates: "; - DEBUG(for (unsigned i = 0; i != NumCands; ++i) - DOUT << tri_->getName(Candidates[i].first) << " "; - DOUT << "\n";); + DEBUG({ + errs() << "\tConsidering " << NumCands << " candidates: "; + for (unsigned i = 0; i != NumCands; ++i) + errs() << tri_->getName(Candidates[i].first) << " "; + errs() << "\n"; + }); // Calculate the number of conflicts of each candidate. for (IntervalPtrs::iterator i = active_.begin(); i != active_.end(); ++i) { @@ -865,16 +882,15 @@ void RALinScan::UpgradeRegister(unsigned Reg) { namespace { struct LISorter { bool operator()(LiveInterval* A, LiveInterval* B) { - return A->beginNumber() < B->beginNumber(); + return A->beginIndex() < B->beginIndex(); } }; } /// assignRegOrStackSlotAtInterval - assign a register if one is available, or /// spill. -void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) -{ - DOUT << "\tallocating current interval: "; +void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { + DEBUG(errs() << "\tallocating current interval: "); // This is an implicitly defined live interval, just assign any register. const TargetRegisterClass *RC = mri_->getRegClass(cur->reg); @@ -882,7 +898,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) unsigned physReg = vrm_->getRegAllocPref(cur->reg); if (!physReg) physReg = *RC->allocation_order_begin(*mf_); - DOUT << tri_->getName(physReg) << '\n'; + DEBUG(errs() << tri_->getName(physReg) << '\n'); // Note the register is not really in use. vrm_->assignVirt2Phys(cur->reg, physReg); return; @@ -891,7 +907,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) backUpRegUses(); std::vector<std::pair<unsigned, float> > SpillWeightsToAdd; - unsigned StartPosition = cur->beginNumber(); + LiveIndex StartPosition = cur->beginIndex(); const TargetRegisterClass *RCLeader = RelatedRegClasses.getLeaderValue(RC); // If start of this live interval is defined by a move instruction and its @@ -901,7 +917,8 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) // one, e.g. X86::mov32to32_. These move instructions are not coalescable. if (!vrm_->getRegAllocPref(cur->reg) && cur->hasAtLeastOneValue()) { VNInfo *vni = cur->begin()->valno; - if (vni->def && !vni->isUnused() && vni->isDefAccurate()) { + if ((vni->def != LiveIndex()) && !vni->isUnused() && + vni->isDefAccurate()) { MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def); unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; if (CopyMI && @@ -963,7 +980,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) // Okay, this reg is on the fixed list. Check to see if we actually // conflict. LiveInterval *I = IP.first; - if (I->endNumber() > StartPosition) { + if (I->endIndex() > StartPosition) { LiveInterval::iterator II = I->advanceTo(IP.second, StartPosition); IP.second = II; if (II != I->begin() && II->start > StartPosition) @@ -988,7 +1005,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) const TargetRegisterClass *RegRC = OneClassForEachPhysReg[I->reg]; if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader && - I->endNumber() > StartPosition) { + I->endIndex() > StartPosition) { LiveInterval::iterator II = I->advanceTo(IP.second, StartPosition); IP.second = II; if (II != I->begin() && II->start > StartPosition) @@ -1015,7 +1032,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) // the free physical register and add this interval to the active // list. if (physReg) { - DOUT << tri_->getName(physReg) << '\n'; + DEBUG(errs() << tri_->getName(physReg) << '\n'); vrm_->assignVirt2Phys(cur->reg, physReg); addRegUse(physReg); active_.push_back(std::make_pair(cur, cur->begin())); @@ -1031,7 +1048,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) } return; } - DOUT << "no free registers\n"; + DEBUG(errs() << "no free registers\n"); // Compile the spill weights into an array that is better for scanning. std::vector<float> SpillWeights(tri_->getNumRegs(), 0.0f); @@ -1049,7 +1066,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) updateSpillWeights(SpillWeights, reg, i->first->weight, RC); } - DOUT << "\tassigning stack slot at interval "<< *cur << ":\n"; + DEBUG(errs() << "\tassigning stack slot at interval "<< *cur << ":\n"); // Find a register to spill. float minWeight = HUGE_VALF; @@ -1102,8 +1119,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) DowngradedRegs.clear(); assignRegOrStackSlotAtInterval(cur); } else { - cerr << "Ran out of registers during register allocation!\n"; - exit(1); + llvm_report_error("Ran out of registers during register allocation!"); } return; } @@ -1117,16 +1133,19 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) --LastCandidate; } - DOUT << "\t\tregister(s) with min weight(s): "; - DEBUG(for (unsigned i = 0; i != LastCandidate; ++i) - DOUT << tri_->getName(RegsWeights[i].first) - << " (" << RegsWeights[i].second << ")\n"); + DEBUG({ + errs() << "\t\tregister(s) with min weight(s): "; + + for (unsigned i = 0; i != LastCandidate; ++i) + errs() << tri_->getName(RegsWeights[i].first) + << " (" << RegsWeights[i].second << ")\n"; + }); // If the current has the minimum weight, we need to spill it and // add any added intervals back to unhandled, and restart // linearscan. if (cur->weight != HUGE_VALF && cur->weight <= minWeight) { - DOUT << "\t\t\tspilling(c): " << *cur << '\n'; + DEBUG(errs() << "\t\t\tspilling(c): " << *cur << '\n'); SmallVector<LiveInterval*, 8> spillIs; std::vector<LiveInterval*> added; @@ -1154,14 +1173,14 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) LiveInterval *ReloadLi = added[i]; if (ReloadLi->weight == HUGE_VALF && li_->getApproximateInstructionCount(*ReloadLi) == 0) { - unsigned ReloadIdx = ReloadLi->beginNumber(); + LiveIndex ReloadIdx = ReloadLi->beginIndex(); MachineBasicBlock *ReloadMBB = li_->getMBBFromIndex(ReloadIdx); int ReloadSS = vrm_->getStackSlot(ReloadLi->reg); if (LastReloadMBB == ReloadMBB && LastReloadSS == ReloadSS) { // Last reload of same SS is in the same MBB. We want to try to // allocate both reloads the same register and make sure the reg // isn't clobbered in between if at all possible. - assert(LastReload->beginNumber() < ReloadIdx); + assert(LastReload->beginIndex() < ReloadIdx); NextReloadMap.insert(std::make_pair(LastReload->reg, ReloadLi->reg)); } LastReloadMBB = ReloadMBB; @@ -1206,12 +1225,11 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) // mark our rollback point. std::vector<LiveInterval*> added; while (!spillIs.empty()) { - bool epicFail = false; LiveInterval *sli = spillIs.back(); spillIs.pop_back(); - DOUT << "\t\t\tspilling(a): " << *sli << '\n'; + DEBUG(errs() << "\t\t\tspilling(a): " << *sli << '\n'); earliestStartInterval = - (earliestStartInterval->beginNumber() < sli->beginNumber()) ? + (earliestStartInterval->beginIndex() < sli->beginIndex()) ? earliestStartInterval : sli; std::vector<LiveInterval*> newIs; @@ -1223,15 +1241,11 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) addStackInterval(sli, ls_, li_, mri_, *vrm_); std::copy(newIs.begin(), newIs.end(), std::back_inserter(added)); spilled.insert(sli->reg); - - if (epicFail) { - //abort(); - } } - unsigned earliestStart = earliestStartInterval->beginNumber(); + LiveIndex earliestStart = earliestStartInterval->beginIndex(); - DOUT << "\t\trolling back to: " << earliestStart << '\n'; + DEBUG(errs() << "\t\trolling back to: " << earliestStart << '\n'); // Scan handled in reverse order up to the earliest start of a // spilled live interval and undo each one, restoring the state of @@ -1239,9 +1253,9 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) while (!handled_.empty()) { LiveInterval* i = handled_.back(); // If this interval starts before t we are done. - if (i->beginNumber() < earliestStart) + if (i->beginIndex() < earliestStart) break; - DOUT << "\t\t\tundo changes for: " << *i << '\n'; + DEBUG(errs() << "\t\t\tundo changes for: " << *i << '\n'); handled_.pop_back(); // When undoing a live interval allocation we must know if it is active or @@ -1290,8 +1304,8 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) for (unsigned i = 0, e = handled_.size(); i != e; ++i) { LiveInterval *HI = handled_[i]; if (!HI->expiredAt(earliestStart) && - HI->expiredAt(cur->beginNumber())) { - DOUT << "\t\t\tundo changes for: " << *HI << '\n'; + HI->expiredAt(cur->beginIndex())) { + DEBUG(errs() << "\t\t\tundo changes for: " << *HI << '\n'); active_.push_back(std::make_pair(HI, HI->begin())); assert(!TargetRegisterInfo::isPhysicalRegister(HI->reg)); addRegUse(vrm_->getPhys(HI->reg)); @@ -1310,14 +1324,14 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) LiveInterval *ReloadLi = added[i]; if (ReloadLi->weight == HUGE_VALF && li_->getApproximateInstructionCount(*ReloadLi) == 0) { - unsigned ReloadIdx = ReloadLi->beginNumber(); + LiveIndex ReloadIdx = ReloadLi->beginIndex(); MachineBasicBlock *ReloadMBB = li_->getMBBFromIndex(ReloadIdx); int ReloadSS = vrm_->getStackSlot(ReloadLi->reg); if (LastReloadMBB == ReloadMBB && LastReloadSS == ReloadSS) { // Last reload of same SS is in the same MBB. We want to try to // allocate both reloads the same register and make sure the reg // isn't clobbered in between if at all possible. - assert(LastReload->beginNumber() < ReloadIdx); + assert(LastReload->beginIndex() < ReloadIdx); NextReloadMap.insert(std::make_pair(LastReload->reg, ReloadLi->reg)); } LastReloadMBB = ReloadMBB; @@ -1420,7 +1434,7 @@ unsigned RALinScan::getFreePhysReg(LiveInterval *cur) { // available first. unsigned Preference = vrm_->getRegAllocPref(cur->reg); if (Preference) { - DOUT << "(preferred: " << tri_->getName(Preference) << ") "; + DEBUG(errs() << "(preferred: " << tri_->getName(Preference) << ") "); if (isRegAvail(Preference) && RC->contains(Preference)) return Preference; diff --git a/lib/CodeGen/RegAllocLocal.cpp b/lib/CodeGen/RegAllocLocal.cpp index e1cc20c..6caa2d3 100644 --- a/lib/CodeGen/RegAllocLocal.cpp +++ b/lib/CodeGen/RegAllocLocal.cpp @@ -25,6 +25,8 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/IndexedMap.h" #include "llvm/ADT/SmallSet.h" @@ -151,6 +153,7 @@ namespace { } virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); AU.addRequiredID(PHIEliminationID); AU.addRequiredID(TwoAddressInstructionPassID); MachineFunctionPass::getAnalysisUsage(AU); @@ -291,11 +294,11 @@ void RALocal::spillVirtReg(MachineBasicBlock &MBB, assert(VirtReg && "Spilling a physical register is illegal!" " Must not have appropriate kill for the register or use exists beyond" " the intended one."); - DOUT << " Spilling register " << TRI->getName(PhysReg) - << " containing %reg" << VirtReg; + DEBUG(errs() << " Spilling register " << TRI->getName(PhysReg) + << " containing %reg" << VirtReg); if (!isVirtRegModified(VirtReg)) { - DOUT << " which has not been modified, so no store necessary!"; + DEBUG(errs() << " which has not been modified, so no store necessary!"); std::pair<MachineInstr*, unsigned> &LastUse = getVirtRegLastUse(VirtReg); if (LastUse.first) LastUse.first->getOperand(LastUse.second).setIsKill(); @@ -305,7 +308,7 @@ void RALocal::spillVirtReg(MachineBasicBlock &MBB, // modified. const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg); int FrameIndex = getStackSpaceFor(VirtReg, RC); - DOUT << " to stack slot #" << FrameIndex; + DEBUG(errs() << " to stack slot #" << FrameIndex); // If the instruction reads the register that's spilled, (e.g. this can // happen if it is a move to a physical register), then the spill // instruction is not a kill. @@ -316,7 +319,7 @@ void RALocal::spillVirtReg(MachineBasicBlock &MBB, getVirt2PhysRegMapSlot(VirtReg) = 0; // VirtReg no longer available - DOUT << "\n"; + DEBUG(errs() << '\n'); removePhysReg(PhysReg); } @@ -505,8 +508,8 @@ MachineInstr *RALocal::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, markVirtRegModified(VirtReg, false); // Note that this reg was just reloaded - DOUT << " Reloading %reg" << VirtReg << " into " - << TRI->getName(PhysReg) << "\n"; + DEBUG(errs() << " Reloading %reg" << VirtReg << " into " + << TRI->getName(PhysReg) << "\n"); // Add move instruction(s) TII->loadRegFromStackSlot(MBB, MI, PhysReg, FrameIndex, RC); @@ -517,24 +520,28 @@ MachineInstr *RALocal::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, getVirtRegLastUse(VirtReg) = std::make_pair(MI, OpNum); if (!ReloadedRegs.insert(PhysReg)) { - cerr << "Ran out of registers during register allocation!\n"; + std::string msg; + raw_string_ostream Msg(msg); + Msg << "Ran out of registers during register allocation!"; if (MI->getOpcode() == TargetInstrInfo::INLINEASM) { - cerr << "Please check your inline asm statement for invalid " + Msg << "\nPlease check your inline asm statement for invalid " << "constraints:\n"; - MI->print(cerr.stream(), TM); + MI->print(Msg, TM); } - exit(1); + llvm_report_error(Msg.str()); } for (const unsigned *SubRegs = TRI->getSubRegisters(PhysReg); *SubRegs; ++SubRegs) { if (!ReloadedRegs.insert(*SubRegs)) { - cerr << "Ran out of registers during register allocation!\n"; + std::string msg; + raw_string_ostream Msg(msg); + Msg << "Ran out of registers during register allocation!"; if (MI->getOpcode() == TargetInstrInfo::INLINEASM) { - cerr << "Please check your inline asm statement for invalid " + Msg << "\nPlease check your inline asm statement for invalid " << "constraints:\n"; - MI->print(cerr.stream(), TM); + MI->print(Msg, TM); } - exit(1); + llvm_report_error(Msg.str()); } } @@ -707,8 +714,11 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) { // loop over each instruction MachineBasicBlock::iterator MII = MBB.begin(); - DEBUG(const BasicBlock *LBB = MBB.getBasicBlock(); - if (LBB) DOUT << "\nStarting RegAlloc of BB: " << LBB->getName()); + DEBUG({ + const BasicBlock *LBB = MBB.getBasicBlock(); + if (LBB) + errs() << "\nStarting RegAlloc of BB: " << LBB->getName(); + }); // Add live-in registers as active. for (MachineBasicBlock::livein_iterator I = MBB.livein_begin(), @@ -733,13 +743,15 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) { while (MII != MBB.end()) { MachineInstr *MI = MII++; const TargetInstrDesc &TID = MI->getDesc(); - DEBUG(DOUT << "\nStarting RegAlloc of: " << *MI; - DOUT << " Regs have values: "; - for (unsigned i = 0; i != TRI->getNumRegs(); ++i) - if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2) - DOUT << "[" << TRI->getName(i) - << ",%reg" << PhysRegsUsed[i] << "] "; - DOUT << "\n"); + DEBUG({ + errs() << "\nStarting RegAlloc of: " << *MI; + errs() << " Regs have values: "; + for (unsigned i = 0; i != TRI->getNumRegs(); ++i) + if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2) + errs() << "[" << TRI->getName(i) + << ",%reg" << PhysRegsUsed[i] << "] "; + errs() << '\n'; + }); // Loop over the implicit uses, making sure that they are at the head of the // use order list, so they don't get reallocated. @@ -783,8 +795,8 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) { markVirtRegModified(DestVirtReg); getVirtRegLastUse(DestVirtReg) = std::make_pair((MachineInstr*)0, 0); - DOUT << " Assigning " << TRI->getName(DestPhysReg) - << " to %reg" << DestVirtReg << "\n"; + DEBUG(errs() << " Assigning " << TRI->getName(DestPhysReg) + << " to %reg" << DestVirtReg << "\n"); MO.setReg(DestPhysReg); // Assign the earlyclobber register } else { unsigned Reg = MO.getReg(); @@ -849,15 +861,15 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) { } if (PhysReg) { - DOUT << " Last use of " << TRI->getName(PhysReg) - << "[%reg" << VirtReg <<"], removing it from live set\n"; + DEBUG(errs() << " Last use of " << TRI->getName(PhysReg) + << "[%reg" << VirtReg <<"], removing it from live set\n"); removePhysReg(PhysReg); for (const unsigned *SubRegs = TRI->getSubRegisters(PhysReg); *SubRegs; ++SubRegs) { if (PhysRegsUsed[*SubRegs] != -2) { - DOUT << " Last use of " - << TRI->getName(*SubRegs) - << "[%reg" << VirtReg <<"], removing it from live set\n"; + DEBUG(errs() << " Last use of " + << TRI->getName(*SubRegs) << "[%reg" << VirtReg + <<"], removing it from live set\n"); removePhysReg(*SubRegs); } } @@ -942,8 +954,8 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) { MF->getRegInfo().setPhysRegUsed(DestPhysReg); markVirtRegModified(DestVirtReg); getVirtRegLastUse(DestVirtReg) = std::make_pair((MachineInstr*)0, 0); - DOUT << " Assigning " << TRI->getName(DestPhysReg) - << " to %reg" << DestVirtReg << "\n"; + DEBUG(errs() << " Assigning " << TRI->getName(DestPhysReg) + << " to %reg" << DestVirtReg << "\n"); MO.setReg(DestPhysReg); // Assign the output register } } @@ -965,16 +977,16 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) { } if (PhysReg) { - DOUT << " Register " << TRI->getName(PhysReg) - << " [%reg" << VirtReg - << "] is never used, removing it from live set\n"; + DEBUG(errs() << " Register " << TRI->getName(PhysReg) + << " [%reg" << VirtReg + << "] is never used, removing it from live set\n"); removePhysReg(PhysReg); for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg); *AliasSet; ++AliasSet) { if (PhysRegsUsed[*AliasSet] != -2) { - DOUT << " Register " << TRI->getName(*AliasSet) - << " [%reg" << *AliasSet - << "] is never used, removing it from live set\n"; + DEBUG(errs() << " Register " << TRI->getName(*AliasSet) + << " [%reg" << *AliasSet + << "] is never used, removing it from live set\n"); removePhysReg(*AliasSet); } } @@ -1022,7 +1034,7 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) { /// runOnMachineFunction - Register allocate the whole function /// bool RALocal::runOnMachineFunction(MachineFunction &Fn) { - DOUT << "Machine Function " << "\n"; + DEBUG(errs() << "Machine Function\n"); MF = &Fn; TM = &Fn.getTarget(); TRI = TM->getRegisterInfo(); diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index 89e2c59..bee5d93 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -31,7 +31,9 @@ #define DEBUG_TYPE "regalloc" -#include "PBQP.h" +#include "PBQP/HeuristicSolver.h" +#include "PBQP/SimpleGraph.h" +#include "PBQP/Heuristics/Briggs.h" #include "VirtRegMap.h" #include "VirtRegRewriter.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" @@ -42,6 +44,7 @@ #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/CodeGen/RegisterCoalescer.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include <limits> @@ -53,32 +56,38 @@ using namespace llvm; static RegisterRegAlloc -registerPBQPRepAlloc("pbqp", "PBQP register allocator", - createPBQPRegisterAllocator); +registerPBQPRepAlloc("pbqp", "PBQP register allocator.", + llvm::createPBQPRegisterAllocator); + +static cl::opt<bool> +pbqpCoalescing("pbqp-coalescing", + cl::desc("Attempt coalescing during PBQP register allocation."), + cl::init(false), cl::Hidden); namespace { - //! - //! PBQP based allocators solve the register allocation problem by mapping - //! register allocation problems to Partitioned Boolean Quadratic - //! Programming problems. + /// + /// PBQP based allocators solve the register allocation problem by mapping + /// register allocation problems to Partitioned Boolean Quadratic + /// Programming problems. class VISIBILITY_HIDDEN PBQPRegAlloc : public MachineFunctionPass { public: static char ID; - //! Construct a PBQP register allocator. - PBQPRegAlloc() : MachineFunctionPass((intptr_t)&ID) {} + /// Construct a PBQP register allocator. + PBQPRegAlloc() : MachineFunctionPass(&ID) {} - //! Return the pass name. - virtual const char* getPassName() const throw() { + /// Return the pass name. + virtual const char* getPassName() const { return "PBQP Register Allocator"; } - //! PBQP analysis usage. + /// PBQP analysis usage. virtual void getAnalysisUsage(AnalysisUsage &au) const { au.addRequired<LiveIntervals>(); - au.addRequiredTransitive<RegisterCoalescer>(); + //au.addRequiredID(SplitCriticalEdgesID); + au.addRequired<RegisterCoalescer>(); au.addRequired<LiveStacks>(); au.addPreserved<LiveStacks>(); au.addRequired<MachineLoopInfo>(); @@ -87,7 +96,7 @@ namespace { MachineFunctionPass::getAnalysisUsage(au); } - //! Perform register allocation + /// Perform register allocation virtual bool runOnMachineFunction(MachineFunction &MF); private: @@ -97,7 +106,7 @@ namespace { typedef std::vector<AllowedSet> AllowedSetMap; typedef std::set<unsigned> RegSet; typedef std::pair<unsigned, unsigned> RegPair; - typedef std::map<RegPair, PBQPNum> CoalesceMap; + typedef std::map<RegPair, PBQP::PBQPNum> CoalesceMap; typedef std::set<LiveInterval*> LiveIntervalSet; @@ -119,60 +128,60 @@ namespace { emptyVRegIntervals; - //! Builds a PBQP cost vector. + /// Builds a PBQP cost vector. template <typename RegContainer> - PBQPVector* buildCostVector(unsigned vReg, - const RegContainer &allowed, - const CoalesceMap &cealesces, - PBQPNum spillCost) const; - - //! \brief Builds a PBQP interference matrix. - //! - //! @return Either a pointer to a non-zero PBQP matrix representing the - //! allocation option costs, or a null pointer for a zero matrix. - //! - //! Expects allowed sets for two interfering LiveIntervals. These allowed - //! sets should contain only allocable registers from the LiveInterval's - //! register class, with any interfering pre-colored registers removed. + PBQP::Vector buildCostVector(unsigned vReg, + const RegContainer &allowed, + const CoalesceMap &cealesces, + PBQP::PBQPNum spillCost) const; + + /// \brief Builds a PBQP interference matrix. + /// + /// @return Either a pointer to a non-zero PBQP matrix representing the + /// allocation option costs, or a null pointer for a zero matrix. + /// + /// Expects allowed sets for two interfering LiveIntervals. These allowed + /// sets should contain only allocable registers from the LiveInterval's + /// register class, with any interfering pre-colored registers removed. template <typename RegContainer> - PBQPMatrix* buildInterferenceMatrix(const RegContainer &allowed1, - const RegContainer &allowed2) const; - - //! - //! Expects allowed sets for two potentially coalescable LiveIntervals, - //! and an estimated benefit due to coalescing. The allowed sets should - //! contain only allocable registers from the LiveInterval's register - //! classes, with any interfering pre-colored registers removed. + PBQP::Matrix* buildInterferenceMatrix(const RegContainer &allowed1, + const RegContainer &allowed2) const; + + /// + /// Expects allowed sets for two potentially coalescable LiveIntervals, + /// and an estimated benefit due to coalescing. The allowed sets should + /// contain only allocable registers from the LiveInterval's register + /// classes, with any interfering pre-colored registers removed. template <typename RegContainer> - PBQPMatrix* buildCoalescingMatrix(const RegContainer &allowed1, - const RegContainer &allowed2, - PBQPNum cBenefit) const; - - //! \brief Finds coalescing opportunities and returns them as a map. - //! - //! Any entries in the map are guaranteed coalescable, even if their - //! corresponding live intervals overlap. + PBQP::Matrix* buildCoalescingMatrix(const RegContainer &allowed1, + const RegContainer &allowed2, + PBQP::PBQPNum cBenefit) const; + + /// \brief Finds coalescing opportunities and returns them as a map. + /// + /// Any entries in the map are guaranteed coalescable, even if their + /// corresponding live intervals overlap. CoalesceMap findCoalesces(); - //! \brief Finds the initial set of vreg intervals to allocate. + /// \brief Finds the initial set of vreg intervals to allocate. void findVRegIntervalsToAlloc(); - //! \brief Constructs a PBQP problem representation of the register - //! allocation problem for this function. - //! - //! @return a PBQP solver object for the register allocation problem. - pbqp* constructPBQPProblem(); + /// \brief Constructs a PBQP problem representation of the register + /// allocation problem for this function. + /// + /// @return a PBQP solver object for the register allocation problem. + PBQP::SimpleGraph constructPBQPProblem(); - //! \brief Adds a stack interval if the given live interval has been - //! spilled. Used to support stack slot coloring. + /// \brief Adds a stack interval if the given live interval has been + /// spilled. Used to support stack slot coloring. void addStackInterval(const LiveInterval *spilled,MachineRegisterInfo* mri); - //! \brief Given a solved PBQP problem maps this solution back to a register - //! assignment. - bool mapPBQPToRegAlloc(pbqp *problem); + /// \brief Given a solved PBQP problem maps this solution back to a register + /// assignment. + bool mapPBQPToRegAlloc(const PBQP::Solution &solution); - //! \brief Postprocessing before final spilling. Sets basic block "live in" - //! variables. + /// \brief Postprocessing before final spilling. Sets basic block "live in" + /// variables. void finalizeAlloc() const; }; @@ -182,17 +191,17 @@ namespace { template <typename RegContainer> -PBQPVector* PBQPRegAlloc::buildCostVector(unsigned vReg, - const RegContainer &allowed, - const CoalesceMap &coalesces, - PBQPNum spillCost) const { +PBQP::Vector PBQPRegAlloc::buildCostVector(unsigned vReg, + const RegContainer &allowed, + const CoalesceMap &coalesces, + PBQP::PBQPNum spillCost) const { typedef typename RegContainer::const_iterator AllowedItr; // Allocate vector. Additional element (0th) used for spill option - PBQPVector *v = new PBQPVector(allowed.size() + 1); + PBQP::Vector v(allowed.size() + 1, 0); - (*v)[0] = spillCost; + v[0] = spillCost; // Iterate over the allowed registers inserting coalesce benefits if there // are any. @@ -210,14 +219,14 @@ PBQPVector* PBQPRegAlloc::buildCostVector(unsigned vReg, continue; // We have a coalesce - insert the benefit. - (*v)[ai + 1] = -cmItr->second; + v[ai + 1] = -cmItr->second; } return v; } template <typename RegContainer> -PBQPMatrix* PBQPRegAlloc::buildInterferenceMatrix( +PBQP::Matrix* PBQPRegAlloc::buildInterferenceMatrix( const RegContainer &allowed1, const RegContainer &allowed2) const { typedef typename RegContainer::const_iterator RegContainerIterator; @@ -230,7 +239,8 @@ PBQPMatrix* PBQPRegAlloc::buildInterferenceMatrix( // that the spill option (element 0,0) has zero cost, since we can allocate // both intervals to memory safely (the cost for each individual allocation // to memory is accounted for by the cost vectors for each live interval). - PBQPMatrix *m = new PBQPMatrix(allowed1.size() + 1, allowed2.size() + 1); + PBQP::Matrix *m = + new PBQP::Matrix(allowed1.size() + 1, allowed2.size() + 1, 0); // Assume this is a zero matrix until proven otherwise. Zero matrices occur // between interfering live ranges with non-overlapping register sets (e.g. @@ -259,8 +269,8 @@ PBQPMatrix* PBQPRegAlloc::buildInterferenceMatrix( unsigned reg2 = *a2Itr; // If the row/column regs are identical or alias insert an infinity. - if ((reg1 == reg2) || tri->areAliases(reg1, reg2)) { - (*m)[ri][ci] = std::numeric_limits<PBQPNum>::infinity(); + if (tri->regsOverlap(reg1, reg2)) { + (*m)[ri][ci] = std::numeric_limits<PBQP::PBQPNum>::infinity(); isZeroMatrix = false; } @@ -282,9 +292,9 @@ PBQPMatrix* PBQPRegAlloc::buildInterferenceMatrix( } template <typename RegContainer> -PBQPMatrix* PBQPRegAlloc::buildCoalescingMatrix( +PBQP::Matrix* PBQPRegAlloc::buildCoalescingMatrix( const RegContainer &allowed1, const RegContainer &allowed2, - PBQPNum cBenefit) const { + PBQP::PBQPNum cBenefit) const { typedef typename RegContainer::const_iterator RegContainerIterator; @@ -293,7 +303,8 @@ PBQPMatrix* PBQPRegAlloc::buildCoalescingMatrix( // for the LiveIntervals which are (potentially) to be coalesced. The amount // -cBenefit will be placed in any element representing the same register // for both intervals. - PBQPMatrix *m = new PBQPMatrix(allowed1.size() + 1, allowed2.size() + 1); + PBQP::Matrix *m = + new PBQP::Matrix(allowed1.size() + 1, allowed2.size() + 1, 0); // Reset costs to zero. m->reset(0); @@ -442,7 +453,7 @@ PBQPRegAlloc::CoalesceMap PBQPRegAlloc::findCoalesces() { vniItr != vniEnd; ++vniItr) { // We want to make sure we skip the copy instruction itself. - if ((*vniItr)->copy == instr) + if ((*vniItr)->getCopy() == instr) continue; if (srcLI->liveAt((*vniItr)->def)) { @@ -495,10 +506,11 @@ void PBQPRegAlloc::findVRegIntervalsToAlloc() { } } -pbqp* PBQPRegAlloc::constructPBQPProblem() { +PBQP::SimpleGraph PBQPRegAlloc::constructPBQPProblem() { typedef std::vector<const LiveInterval*> LIVector; typedef std::vector<unsigned> RegVector; + typedef std::vector<PBQP::SimpleGraph::NodeIterator> NodeVector; // This will store the physical intervals for easy reference. LIVector physIntervals; @@ -530,10 +542,15 @@ pbqp* PBQPRegAlloc::constructPBQPProblem() { } // Get the set of potential coalesces. - CoalesceMap coalesces(findCoalesces()); + CoalesceMap coalesces; + + if (pbqpCoalescing) { + coalesces = findCoalesces(); + } // Construct a PBQP solver for this problem - pbqp *solver = alloc_pbqp(vregIntervalsToAlloc.size()); + PBQP::SimpleGraph problem; + NodeVector problemNodes(vregIntervalsToAlloc.size()); // Resize allowedSets container appropriately. allowedSets.resize(vregIntervalsToAlloc.size()); @@ -594,13 +611,13 @@ pbqp* PBQPRegAlloc::constructPBQPProblem() { // Set the spill cost to the interval weight, or epsilon if the // interval weight is zero - PBQPNum spillCost = (li->weight != 0.0) ? - li->weight : std::numeric_limits<PBQPNum>::min(); + PBQP::PBQPNum spillCost = (li->weight != 0.0) ? + li->weight : std::numeric_limits<PBQP::PBQPNum>::min(); // Build a cost vector for this interval. - add_pbqp_nodecosts(solver, node, - buildCostVector(li->reg, allowedSets[node], coalesces, - spillCost)); + problemNodes[node] = + problem.addNode( + buildCostVector(li->reg, allowedSets[node], coalesces, spillCost)); } @@ -616,7 +633,7 @@ pbqp* PBQPRegAlloc::constructPBQPProblem() { CoalesceMap::const_iterator cmItr = coalesces.find(RegPair(li->reg, li2->reg)); - PBQPMatrix *m = 0; + PBQP::Matrix *m = 0; if (cmItr != coalesces.end()) { m = buildCoalescingMatrix(allowedSets[node1], allowedSets[node2], @@ -627,14 +644,29 @@ pbqp* PBQPRegAlloc::constructPBQPProblem() { } if (m != 0) { - add_pbqp_edgecosts(solver, node1, node2, m); + problem.addEdge(problemNodes[node1], + problemNodes[node2], + *m); + delete m; } } } + problem.assignNodeIDs(); + + assert(problem.getNumNodes() == allowedSets.size()); + for (unsigned i = 0; i < allowedSets.size(); ++i) { + assert(problem.getNodeItr(i) == problemNodes[i]); + } +/* + std::cerr << "Allocating for " << problem.getNumNodes() << " nodes, " + << problem.getNumEdges() << " edges.\n"; + + problem.printDot(std::cerr); +*/ // We're done, PBQP problem constructed - return it. - return solver; + return problem; } void PBQPRegAlloc::addStackInterval(const LiveInterval *spilled, @@ -651,14 +683,14 @@ void PBQPRegAlloc::addStackInterval(const LiveInterval *spilled, if (stackInterval.getNumValNums() != 0) vni = stackInterval.getValNumInfo(0); else - vni = stackInterval.getNextValue(0, 0, false, lss->getVNInfoAllocator()); + vni = stackInterval.getNextValue( + LiveIndex(), 0, false, lss->getVNInfoAllocator()); LiveInterval &rhsInterval = lis->getInterval(spilled->reg); stackInterval.MergeRangesInAsValue(rhsInterval, vni); } -bool PBQPRegAlloc::mapPBQPToRegAlloc(pbqp *problem) { - +bool PBQPRegAlloc::mapPBQPToRegAlloc(const PBQP::Solution &solution) { // Set to true if we have any spills bool anotherRoundNeeded = false; @@ -668,14 +700,16 @@ bool PBQPRegAlloc::mapPBQPToRegAlloc(pbqp *problem) { // Iterate over the nodes mapping the PBQP solution to a register assignment. for (unsigned node = 0; node < node2LI.size(); ++node) { unsigned virtReg = node2LI[node]->reg, - allocSelection = get_pbqp_solution(problem, node); + allocSelection = solution.getSelection(node); + // If the PBQP solution is non-zero it's a physical register... if (allocSelection != 0) { // Get the physical reg, subtracting 1 to account for the spill option. unsigned physReg = allowedSets[node][allocSelection - 1]; - DOUT << "VREG " << virtReg << " -> " << tri->getName(physReg) << "\n"; + DEBUG(errs() << "VREG " << virtReg << " -> " + << tri->getName(physReg) << "\n"); assert(physReg != 0); @@ -697,8 +731,9 @@ bool PBQPRegAlloc::mapPBQPToRegAlloc(pbqp *problem) { lis->addIntervalsForSpills(*spillInterval, spillIs, loopInfo, *vrm); addStackInterval(spillInterval, mri); - DOUT << "VREG " << virtReg << " -> SPILLED (Cost: " - << oldSpillWeight << ", New vregs: "; + (void) oldSpillWeight; + DEBUG(errs() << "VREG " << virtReg << " -> SPILLED (Cost: " + << oldSpillWeight << ", New vregs: "); // Copy any newly inserted live intervals into the list of regs to // allocate. @@ -708,12 +743,12 @@ bool PBQPRegAlloc::mapPBQPToRegAlloc(pbqp *problem) { assert(!(*itr)->empty() && "Empty spill range."); - DOUT << (*itr)->reg << " "; + DEBUG(errs() << (*itr)->reg << " "); vregIntervalsToAlloc.insert(*itr); } - DOUT << ")\n"; + DEBUG(errs() << ")\n"); // We need another round if spill intervals were added. anotherRoundNeeded |= !newSpills.empty(); @@ -734,6 +769,7 @@ void PBQPRegAlloc::finalizeAlloc() const { LiveInterval *li = *itr; unsigned physReg = vrm->getRegAllocPref(li->reg); + if (physReg == 0) { const TargetRegisterClass *liRC = mri->getRegClass(li->reg); physReg = *liRC->allocation_order_begin(*mf); @@ -764,8 +800,8 @@ void PBQPRegAlloc::finalizeAlloc() const { continue; } - // Ignore unallocated vregs: if (reg == 0) { + // Filter out zero regs - they're for intervals that were spilled. continue; } @@ -804,7 +840,7 @@ bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) { vrm = &getAnalysis<VirtRegMap>(); - DOUT << "PBQP Register Allocating for " << mf->getFunction()->getName() << "\n"; + DEBUG(errs() << "PBQP2 Register Allocating for " << mf->getFunction()->getName() << "\n"); // Allocator main loop: // @@ -829,15 +865,14 @@ bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) { unsigned round = 0; while (!pbqpAllocComplete) { - DOUT << " PBQP Regalloc round " << round << ":\n"; - - pbqp *problem = constructPBQPProblem(); - - solve_pbqp(problem); + DEBUG(errs() << " PBQP Regalloc round " << round << ":\n"); - pbqpAllocComplete = mapPBQPToRegAlloc(problem); + PBQP::SimpleGraph problem = constructPBQPProblem(); + PBQP::HeuristicSolver<PBQP::Heuristics::Briggs> solver; + problem.assignNodeIDs(); + PBQP::Solution solution = solver.solve(problem); - free_pbqp(problem); + pbqpAllocComplete = mapPBQPToRegAlloc(solution); ++round; } @@ -852,7 +887,7 @@ bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) { node2LI.clear(); allowedSets.clear(); - DOUT << "Post alloc VirtRegMap:\n" << *vrm << "\n"; + DEBUG(errs() << "Post alloc VirtRegMap:\n" << *vrm << "\n"); // Run rewriter std::auto_ptr<VirtRegRewriter> rewriter(createVirtRegRewriter()); diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp index d7fe7a2..5f1c4e2 100644 --- a/lib/CodeGen/RegisterScavenging.cpp +++ b/lib/CodeGen/RegisterScavenging.cpp @@ -16,46 +16,21 @@ #define DEBUG_TYPE "reg-scavenging" #include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" using namespace llvm; -/// RedefinesSuperRegPart - Return true if the specified register is redefining -/// part of a super-register. -static bool RedefinesSuperRegPart(const MachineInstr *MI, unsigned SubReg, - const TargetRegisterInfo *TRI) { - bool SeenSuperUse = false; - bool SeenSuperDef = false; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || MO.isUndef()) - continue; - if (TRI->isSuperRegister(SubReg, MO.getReg())) { - if (MO.isUse()) - SeenSuperUse = true; - else if (MO.isImplicit()) - SeenSuperDef = true; - } - } - - return SeenSuperDef && SeenSuperUse; -} - -static bool RedefinesSuperRegPart(const MachineInstr *MI, - const MachineOperand &MO, - const TargetRegisterInfo *TRI) { - assert(MO.isReg() && MO.isDef() && "Not a register def!"); - return RedefinesSuperRegPart(MI, MO.getReg(), TRI); -} - /// setUsed - Set the register and its sub-registers as being used. void RegScavenger::setUsed(unsigned Reg) { RegsAvailable.reset(Reg); @@ -65,14 +40,38 @@ void RegScavenger::setUsed(unsigned Reg) { RegsAvailable.reset(SubReg); } -/// setUnused - Set the register and its sub-registers as being unused. -void RegScavenger::setUnused(unsigned Reg, const MachineInstr *MI) { - RegsAvailable.set(Reg); +bool RegScavenger::isAliasUsed(unsigned Reg) const { + if (isUsed(Reg)) + return true; + for (const unsigned *R = TRI->getAliasSet(Reg); *R; ++R) + if (isUsed(*R)) + return true; + return false; +} - for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); - unsigned SubReg = *SubRegs; ++SubRegs) - if (!RedefinesSuperRegPart(MI, Reg, TRI)) - RegsAvailable.set(SubReg); +void RegScavenger::initRegState() { + ScavengedReg = 0; + ScavengedRC = NULL; + ScavengeRestore = NULL; + + // All registers started out unused. + RegsAvailable.set(); + + // Reserved registers are always used. + RegsAvailable ^= ReservedRegs; + + if (!MBB) + return; + + // Live-in registers are in use. + for (MachineBasicBlock::const_livein_iterator I = MBB->livein_begin(), + E = MBB->livein_end(); I != E; ++I) + setUsed(*I); + + // Pristine CSRs are also unavailable. + BitVector PR = MBB->getParent()->getFrameInfo()->getPristineRegs(MBB); + for (int I = PR.find_first(); I>0; I = PR.find_next(I)) + setUsed(I); } void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) { @@ -85,6 +84,7 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) { assert((NumPhysRegs == 0 || NumPhysRegs == TRI->getNumRegs()) && "Target changed?"); + // Self-initialize. if (!MBB) { NumPhysRegs = TRI->getNumRegs(); RegsAvailable.resize(NumPhysRegs); @@ -100,73 +100,26 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) { CalleeSavedRegs.set(CSRegs[i]); } - MBB = mbb; - ScavengedReg = 0; - ScavengedRC = NULL; - ScavengeRestore = NULL; - CurrDist = 0; - DistanceMap.clear(); - - // All registers started out unused. - RegsAvailable.set(); - - // Reserved registers are always used. - RegsAvailable ^= ReservedRegs; - - // Live-in registers are in use. - if (!MBB->livein_empty()) - for (MachineBasicBlock::const_livein_iterator I = MBB->livein_begin(), - E = MBB->livein_end(); I != E; ++I) - setUsed(*I); + // RS used within emit{Pro,Epi}logue() + if (mbb != MBB) { + MBB = mbb; + initRegState(); + } Tracking = false; } -void RegScavenger::restoreScavengedReg() { - TII->loadRegFromStackSlot(*MBB, MBBI, ScavengedReg, - ScavengingFrameIndex, ScavengedRC); - MachineBasicBlock::iterator II = prior(MBBI); - TRI->eliminateFrameIndex(II, 0, this); - setUsed(ScavengedReg); - ScavengedReg = 0; - ScavengedRC = NULL; +void RegScavenger::addRegWithSubRegs(BitVector &BV, unsigned Reg) { + BV.set(Reg); + for (const unsigned *R = TRI->getSubRegisters(Reg); *R; R++) + BV.set(*R); } -#ifndef NDEBUG -/// isLiveInButUnusedBefore - Return true if register is livein the MBB not -/// not used before it reaches the MI that defines register. -static bool isLiveInButUnusedBefore(unsigned Reg, MachineInstr *MI, - MachineBasicBlock *MBB, - const TargetRegisterInfo *TRI, - MachineRegisterInfo* MRI) { - // First check if register is livein. - bool isLiveIn = false; - for (MachineBasicBlock::const_livein_iterator I = MBB->livein_begin(), - E = MBB->livein_end(); I != E; ++I) - if (Reg == *I || TRI->isSuperRegister(Reg, *I)) { - isLiveIn = true; - break; - } - if (!isLiveIn) - return false; - - // Is there any use of it before the specified MI? - SmallPtrSet<MachineInstr*, 4> UsesInMBB; - for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg), - UE = MRI->use_end(); UI != UE; ++UI) { - MachineInstr *UseMI = &*UI; - if (UseMI->getParent() == MBB) - UsesInMBB.insert(UseMI); - } - if (UsesInMBB.empty()) - return true; - - for (MachineBasicBlock::iterator I = MBB->begin(), E = MI; I != E; ++I) - if (UsesInMBB.count(&*I)) - return false; - return true; +void RegScavenger::addRegWithAliases(BitVector &BV, unsigned Reg) { + BV.set(Reg); + for (const unsigned *R = TRI->getAliasSet(Reg); *R; R++) + BV.set(*R); } -#endif void RegScavenger::forward() { // Move ptr forward. @@ -179,7 +132,6 @@ void RegScavenger::forward() { } MachineInstr *MI = MBBI; - DistanceMap.insert(std::make_pair(MI, CurrDist++)); if (MI == ScavengeRestore) { ScavengedReg = 0; @@ -187,153 +139,63 @@ void RegScavenger::forward() { ScavengeRestore = NULL; } -#if 0 - if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) - return; -#endif - - // Separate register operands into 3 classes: uses, defs, earlyclobbers. - SmallVector<std::pair<const MachineOperand*,unsigned>, 4> UseMOs; - SmallVector<std::pair<const MachineOperand*,unsigned>, 4> DefMOs; - SmallVector<std::pair<const MachineOperand*,unsigned>, 4> EarlyClobberMOs; + // Find out which registers are early clobbered, killed, defined, and marked + // def-dead in this instruction. + BitVector EarlyClobberRegs(NumPhysRegs); + BitVector KillRegs(NumPhysRegs); + BitVector DefRegs(NumPhysRegs); + BitVector DeadRegs(NumPhysRegs); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || MO.getReg() == 0 || MO.isUndef()) + if (!MO.isReg() || MO.isUndef()) continue; - if (MO.isUse()) - UseMOs.push_back(std::make_pair(&MO,i)); - else if (MO.isEarlyClobber()) - EarlyClobberMOs.push_back(std::make_pair(&MO,i)); - else - DefMOs.push_back(std::make_pair(&MO,i)); - } - - // Process uses first. - BitVector KillRegs(NumPhysRegs); - for (unsigned i = 0, e = UseMOs.size(); i != e; ++i) { - const MachineOperand MO = *UseMOs[i].first; unsigned Reg = MO.getReg(); - - assert(isUsed(Reg) && "Using an undefined register!"); - - if (MO.isKill() && !isReserved(Reg)) { - KillRegs.set(Reg); - - // Mark sub-registers as used. - for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); - unsigned SubReg = *SubRegs; ++SubRegs) - KillRegs.set(SubReg); - } - } - - // Change states of all registers after all the uses are processed to guard - // against multiple uses. - setUnused(KillRegs); - - // Process early clobber defs then process defs. We can have a early clobber - // that is dead, it should not conflict with a def that happens one "slot" - // (see InstrSlots in LiveIntervalAnalysis.h) later. - unsigned NumECs = EarlyClobberMOs.size(); - unsigned NumDefs = DefMOs.size(); - - for (unsigned i = 0, e = NumECs + NumDefs; i != e; ++i) { - const MachineOperand &MO = (i < NumECs) - ? *EarlyClobberMOs[i].first : *DefMOs[i-NumECs].first; - unsigned Idx = (i < NumECs) - ? EarlyClobberMOs[i].second : DefMOs[i-NumECs].second; - unsigned Reg = MO.getReg(); - if (MO.isUndef()) + if (!Reg || isReserved(Reg)) continue; - // If it's dead upon def, then it is now free. - if (MO.isDead()) { - setUnused(Reg, MI); - continue; - } - - // Skip two-address destination operand. - unsigned UseIdx; - if (MI->isRegTiedToUseOperand(Idx, &UseIdx) && - !MI->getOperand(UseIdx).isUndef()) { - assert(isUsed(Reg) && "Using an undefined register!"); - continue; + if (MO.isUse()) { + // Two-address operands implicitly kill. + if (MO.isKill() || MI->isRegTiedToDefOperand(i)) + addRegWithSubRegs(KillRegs, Reg); + } else { + assert(MO.isDef()); + if (MO.isDead()) + addRegWithSubRegs(DeadRegs, Reg); + else + addRegWithSubRegs(DefRegs, Reg); + if (MO.isEarlyClobber()) + addRegWithAliases(EarlyClobberRegs, Reg); } - - // Skip if this is merely redefining part of a super-register. - if (RedefinesSuperRegPart(MI, MO, TRI)) - continue; - - // Implicit def is allowed to "re-define" any register. Similarly, - // implicitly defined registers can be clobbered. - assert((isReserved(Reg) || isUnused(Reg) || - isLiveInButUnusedBefore(Reg, MI, MBB, TRI, MRI)) && - "Re-defining a live register!"); - setUsed(Reg); } -} - -void RegScavenger::backward() { - assert(Tracking && "Not tracking states!"); - assert(MBBI != MBB->begin() && "Already at start of basic block!"); - // Move ptr backward. - MBBI = prior(MBBI); - - MachineInstr *MI = MBBI; - DistanceMap.erase(MI); - --CurrDist; - // Separate register operands into 3 classes: uses, defs, earlyclobbers. - SmallVector<std::pair<const MachineOperand*,unsigned>, 4> UseMOs; - SmallVector<std::pair<const MachineOperand*,unsigned>, 4> DefMOs; - SmallVector<std::pair<const MachineOperand*,unsigned>, 4> EarlyClobberMOs; + // Verify uses and defs. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || MO.getReg() == 0 || MO.isUndef()) - continue; - if (MO.isUse()) - UseMOs.push_back(std::make_pair(&MO,i)); - else if (MO.isEarlyClobber()) - EarlyClobberMOs.push_back(std::make_pair(&MO,i)); - else - DefMOs.push_back(std::make_pair(&MO,i)); - } - - - // Process defs first. - unsigned NumECs = EarlyClobberMOs.size(); - unsigned NumDefs = DefMOs.size(); - for (unsigned i = 0, e = NumECs + NumDefs; i != e; ++i) { - const MachineOperand &MO = (i < NumDefs) - ? *DefMOs[i].first : *EarlyClobberMOs[i-NumDefs].first; - unsigned Idx = (i < NumECs) - ? DefMOs[i].second : EarlyClobberMOs[i-NumDefs].second; - if (MO.isUndef()) - continue; - - // Skip two-address destination operand. - if (MI->isRegTiedToUseOperand(Idx)) + if (!MO.isReg() || MO.isUndef()) continue; - unsigned Reg = MO.getReg(); - assert(isUsed(Reg)); - if (!isReserved(Reg)) - setUnused(Reg, MI); + if (!Reg || isReserved(Reg)) + continue; + if (MO.isUse()) { + assert(isUsed(Reg) && "Using an undefined register!"); + assert((!EarlyClobberRegs.test(Reg) || MI->isRegTiedToDefOperand(i)) && + "Using an early clobbered register!"); + } else { + assert(MO.isDef()); +#if 0 + // FIXME: Enable this once we've figured out how to correctly transfer + // implicit kills during codegen passes like the coalescer. + assert((KillRegs.test(Reg) || isUnused(Reg) || + isLiveInButUnusedBefore(Reg, MI, MBB, TRI, MRI)) && + "Re-defining a live register!"); +#endif + } } - // Process uses. - BitVector UseRegs(NumPhysRegs); - for (unsigned i = 0, e = UseMOs.size(); i != e; ++i) { - const MachineOperand MO = *UseMOs[i].first; - unsigned Reg = MO.getReg(); - assert(isUnused(Reg) || isReserved(Reg)); - UseRegs.set(Reg); - - // Set the sub-registers as "used". - for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); - unsigned SubReg = *SubRegs; ++SubRegs) - UseRegs.set(SubReg); - } - setUsed(UseRegs); + // Commit the changes. + setUnused(KillRegs); + setUnused(DeadRegs); + setUsed(DefRegs); } void RegScavenger::getRegsUsed(BitVector &used, bool includeReserved) { @@ -351,129 +213,110 @@ static void CreateRegClassMask(const TargetRegisterClass *RC, BitVector &Mask) { Mask.set(*I); } -unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RegClass, - const BitVector &Candidates) const { - // Mask off the registers which are not in the TargetRegisterClass. - BitVector RegsAvailableCopy(NumPhysRegs, false); - CreateRegClassMask(RegClass, RegsAvailableCopy); - RegsAvailableCopy &= RegsAvailable; - - // Restrict the search to candidates. - RegsAvailableCopy &= Candidates; - - // Returns the first unused (bit is set) register, or 0 is none is found. - int Reg = RegsAvailableCopy.find_first(); - return (Reg == -1) ? 0 : Reg; +unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const { + for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); + I != E; ++I) + if (!isAliasUsed(*I)) + return *I; + return 0; } -unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RegClass, - bool ExCalleeSaved) const { - // Mask off the registers which are not in the TargetRegisterClass. - BitVector RegsAvailableCopy(NumPhysRegs, false); - CreateRegClassMask(RegClass, RegsAvailableCopy); - RegsAvailableCopy &= RegsAvailable; - - // If looking for a non-callee-saved register, mask off all the callee-saved - // registers. - if (ExCalleeSaved) - RegsAvailableCopy &= ~CalleeSavedRegs; - - // Returns the first unused (bit is set) register, or 0 is none is found. - int Reg = RegsAvailableCopy.find_first(); - return (Reg == -1) ? 0 : Reg; -} +/// findSurvivorReg - Return the candidate register that is unused for the +/// longest after MBBI. UseMI is set to the instruction where the search +/// stopped. +/// +/// No more than InstrLimit instructions are inspected. +/// +unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator MI, + BitVector &Candidates, + unsigned InstrLimit, + MachineBasicBlock::iterator &UseMI) { + int Survivor = Candidates.find_first(); + assert(Survivor > 0 && "No candidates for scavenging"); + + MachineBasicBlock::iterator ME = MBB->getFirstTerminator(); + assert(MI != ME && "MI already at terminator"); + + for (++MI; InstrLimit > 0 && MI != ME; ++MI, --InstrLimit) { + // Remove any candidates touched by instruction. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || MO.isUndef() || !MO.getReg() || + TargetRegisterInfo::isVirtualRegister(MO.getReg())) + continue; + Candidates.reset(MO.getReg()); + for (const unsigned *R = TRI->getAliasSet(MO.getReg()); *R; R++) + Candidates.reset(*R); + } -/// findFirstUse - Calculate the distance to the first use of the -/// specified register. -MachineInstr* -RegScavenger::findFirstUse(MachineBasicBlock *MBB, - MachineBasicBlock::iterator I, unsigned Reg, - unsigned &Dist) { - MachineInstr *UseMI = 0; - Dist = ~0U; - for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(Reg), - RE = MRI->reg_end(); RI != RE; ++RI) { - MachineInstr *UDMI = &*RI; - if (UDMI->getParent() != MBB) + // Was our survivor untouched by this instruction? + if (Candidates.test(Survivor)) continue; - DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UDMI); - if (DI == DistanceMap.end()) { - // If it's not in map, it's below current MI, let's initialize the - // map. - I = next(I); - unsigned Dist = CurrDist + 1; - while (I != MBB->end()) { - DistanceMap.insert(std::make_pair(I, Dist++)); - I = next(I); - } - } - DI = DistanceMap.find(UDMI); - if (DI->second > CurrDist && DI->second < Dist) { - Dist = DI->second; - UseMI = UDMI; - } + + // All candidates gone? + if (Candidates.none()) + break; + + Survivor = Candidates.find_first(); } - return UseMI; + + // We ran out of candidates, so stop the search. + UseMI = MI; + return Survivor; } unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, MachineBasicBlock::iterator I, int SPAdj) { - assert(ScavengingFrameIndex >= 0 && - "Cannot scavenge a register without an emergency spill slot!"); - // Mask off the registers which are not in the TargetRegisterClass. BitVector Candidates(NumPhysRegs, false); CreateRegClassMask(RC, Candidates); - Candidates ^= ReservedRegs; // Do not include reserved registers. + // Do not include reserved registers. + Candidates ^= ReservedRegs & Candidates; // Exclude all the registers being used by the instruction. for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { MachineOperand &MO = I->getOperand(i); - if (MO.isReg()) + if (MO.isReg() && MO.getReg() != 0 && + !TargetRegisterInfo::isVirtualRegister(MO.getReg())) Candidates.reset(MO.getReg()); } // Find the register whose use is furthest away. - unsigned SReg = 0; - unsigned MaxDist = 0; - MachineInstr *MaxUseMI = 0; - int Reg = Candidates.find_first(); - while (Reg != -1) { - unsigned Dist; - MachineInstr *UseMI = findFirstUse(MBB, I, Reg, Dist); - for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) { - unsigned AsDist; - MachineInstr *AsUseMI = findFirstUse(MBB, I, *AS, AsDist); - if (AsDist < Dist) { - Dist = AsDist; - UseMI = AsUseMI; - } - } - if (Dist >= MaxDist) { - MaxDist = Dist; - MaxUseMI = UseMI; - SReg = Reg; - } - Reg = Candidates.find_next(Reg); - } + MachineBasicBlock::iterator UseMI; + unsigned SReg = findSurvivorReg(I, Candidates, 25, UseMI); - if (ScavengedReg != 0) { - assert(0 && "Scavenger slot is live, unable to scavenge another register!"); - abort(); - } + // If we found an unused register there is no reason to spill it. We have + // probably found a callee-saved register that has been saved in the + // prologue, but happens to be unused at this point. + if (!isAliasUsed(SReg)) + return SReg; - // Spill the scavenged register before I. - TII->storeRegToStackSlot(*MBB, I, SReg, true, ScavengingFrameIndex, RC); - MachineBasicBlock::iterator II = prior(I); - TRI->eliminateFrameIndex(II, SPAdj, this); + assert(ScavengedReg == 0 && + "Scavenger slot is live, unable to scavenge another register!"); - // Restore the scavenged register before its use (or first terminator). - II = MaxUseMI - ? MachineBasicBlock::iterator(MaxUseMI) : MBB->getFirstTerminator(); - TII->loadRegFromStackSlot(*MBB, II, SReg, ScavengingFrameIndex, RC); - ScavengeRestore = prior(II); + // Avoid infinite regress ScavengedReg = SReg; + + // If the target knows how to save/restore the register, let it do so; + // otherwise, use the emergency stack spill slot. + if (!TRI->saveScavengerRegister(*MBB, I, RC, SReg)) { + // Spill the scavenged register before I. + assert(ScavengingFrameIndex >= 0 && + "Cannot scavenge register without an emergency spill slot!"); + TII->storeRegToStackSlot(*MBB, I, SReg, true, ScavengingFrameIndex, RC); + MachineBasicBlock::iterator II = prior(I); + TRI->eliminateFrameIndex(II, SPAdj, NULL, this); + + // Restore the scavenged register before its use (or first terminator). + TII->loadRegFromStackSlot(*MBB, UseMI, SReg, ScavengingFrameIndex, RC); + } else + TRI->restoreScavengerRegister(*MBB, UseMI, RC, SReg); + + ScavengeRestore = prior(UseMI); + + // Doing this here leads to infinite regress. + // ScavengedReg = SReg; ScavengedRC = RC; return SReg; diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp index a8452df..5a59862 100644 --- a/lib/CodeGen/ScheduleDAG.cpp +++ b/lib/CodeGen/ScheduleDAG.cpp @@ -19,6 +19,7 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include <climits> using namespace llvm; @@ -40,7 +41,7 @@ void ScheduleDAG::dumpSchedule() const { if (SUnit *SU = Sequence[i]) SU->dump(this); else - cerr << "**** NOOP ****\n"; + errs() << "**** NOOP ****\n"; } } @@ -59,9 +60,11 @@ void ScheduleDAG::Run(MachineBasicBlock *bb, Schedule(); - DOUT << "*** Final schedule ***\n"; - DEBUG(dumpSchedule()); - DOUT << "\n"; + DEBUG({ + errs() << "*** Final schedule ***\n"; + dumpSchedule(); + errs() << '\n'; + }); } /// addPred - This adds the specified edge as a pred of the current node if @@ -79,13 +82,19 @@ void SUnit::addPred(const SDep &D) { SUnit *N = D.getSUnit(); // Update the bookkeeping. if (D.getKind() == SDep::Data) { + assert(NumPreds < UINT_MAX && "NumPreds will overflow!"); + assert(N->NumSuccs < UINT_MAX && "NumSuccs will overflow!"); ++NumPreds; ++N->NumSuccs; } - if (!N->isScheduled) + if (!N->isScheduled) { + assert(NumPredsLeft < UINT_MAX && "NumPredsLeft will overflow!"); ++NumPredsLeft; - if (!isScheduled) + } + if (!isScheduled) { + assert(N->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!"); ++N->NumSuccsLeft; + } Preds.push_back(D); N->Succs.push_back(P); if (P.getLatency() != 0) { @@ -118,13 +127,19 @@ void SUnit::removePred(const SDep &D) { Preds.erase(I); // Update the bookkeeping. if (P.getKind() == SDep::Data) { + assert(NumPreds > 0 && "NumPreds will underflow!"); + assert(N->NumSuccs > 0 && "NumSuccs will underflow!"); --NumPreds; --N->NumSuccs; } - if (!N->isScheduled) + if (!N->isScheduled) { + assert(NumPredsLeft > 0 && "NumPredsLeft will underflow!"); --NumPredsLeft; - if (!isScheduled) + } + if (!isScheduled) { + assert(N->NumSuccsLeft > 0 && "NumSuccsLeft will underflow!"); --N->NumSuccsLeft; + } if (P.getLatency() != 0) { this->setDepthDirty(); N->setHeightDirty(); @@ -256,56 +271,58 @@ void SUnit::ComputeHeight() { /// SUnit - Scheduling unit. It's an wrapper around either a single SDNode or /// a group of nodes flagged together. void SUnit::dump(const ScheduleDAG *G) const { - cerr << "SU(" << NodeNum << "): "; + errs() << "SU(" << NodeNum << "): "; G->dumpNode(this); } void SUnit::dumpAll(const ScheduleDAG *G) const { dump(G); - cerr << " # preds left : " << NumPredsLeft << "\n"; - cerr << " # succs left : " << NumSuccsLeft << "\n"; - cerr << " Latency : " << Latency << "\n"; - cerr << " Depth : " << Depth << "\n"; - cerr << " Height : " << Height << "\n"; + errs() << " # preds left : " << NumPredsLeft << "\n"; + errs() << " # succs left : " << NumSuccsLeft << "\n"; + errs() << " Latency : " << Latency << "\n"; + errs() << " Depth : " << Depth << "\n"; + errs() << " Height : " << Height << "\n"; if (Preds.size() != 0) { - cerr << " Predecessors:\n"; + errs() << " Predecessors:\n"; for (SUnit::const_succ_iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) { - cerr << " "; + errs() << " "; switch (I->getKind()) { - case SDep::Data: cerr << "val "; break; - case SDep::Anti: cerr << "anti"; break; - case SDep::Output: cerr << "out "; break; - case SDep::Order: cerr << "ch "; break; + case SDep::Data: errs() << "val "; break; + case SDep::Anti: errs() << "anti"; break; + case SDep::Output: errs() << "out "; break; + case SDep::Order: errs() << "ch "; break; } - cerr << "#"; - cerr << I->getSUnit() << " - SU(" << I->getSUnit()->NodeNum << ")"; + errs() << "#"; + errs() << I->getSUnit() << " - SU(" << I->getSUnit()->NodeNum << ")"; if (I->isArtificial()) - cerr << " *"; - cerr << "\n"; + errs() << " *"; + errs() << ": Latency=" << I->getLatency(); + errs() << "\n"; } } if (Succs.size() != 0) { - cerr << " Successors:\n"; + errs() << " Successors:\n"; for (SUnit::const_succ_iterator I = Succs.begin(), E = Succs.end(); I != E; ++I) { - cerr << " "; + errs() << " "; switch (I->getKind()) { - case SDep::Data: cerr << "val "; break; - case SDep::Anti: cerr << "anti"; break; - case SDep::Output: cerr << "out "; break; - case SDep::Order: cerr << "ch "; break; + case SDep::Data: errs() << "val "; break; + case SDep::Anti: errs() << "anti"; break; + case SDep::Output: errs() << "out "; break; + case SDep::Order: errs() << "ch "; break; } - cerr << "#"; - cerr << I->getSUnit() << " - SU(" << I->getSUnit()->NodeNum << ")"; + errs() << "#"; + errs() << I->getSUnit() << " - SU(" << I->getSUnit()->NodeNum << ")"; if (I->isArtificial()) - cerr << " *"; - cerr << "\n"; + errs() << " *"; + errs() << ": Latency=" << I->getLatency(); + errs() << "\n"; } } - cerr << "\n"; + errs() << "\n"; } #ifndef NDEBUG @@ -323,35 +340,35 @@ void ScheduleDAG::VerifySchedule(bool isBottomUp) { continue; } if (!AnyNotSched) - cerr << "*** Scheduling failed! ***\n"; + errs() << "*** Scheduling failed! ***\n"; SUnits[i].dump(this); - cerr << "has not been scheduled!\n"; + errs() << "has not been scheduled!\n"; AnyNotSched = true; } if (SUnits[i].isScheduled && (isBottomUp ? SUnits[i].getHeight() : SUnits[i].getHeight()) > unsigned(INT_MAX)) { if (!AnyNotSched) - cerr << "*** Scheduling failed! ***\n"; + errs() << "*** Scheduling failed! ***\n"; SUnits[i].dump(this); - cerr << "has an unexpected " + errs() << "has an unexpected " << (isBottomUp ? "Height" : "Depth") << " value!\n"; AnyNotSched = true; } if (isBottomUp) { if (SUnits[i].NumSuccsLeft != 0) { if (!AnyNotSched) - cerr << "*** Scheduling failed! ***\n"; + errs() << "*** Scheduling failed! ***\n"; SUnits[i].dump(this); - cerr << "has successors left!\n"; + errs() << "has successors left!\n"; AnyNotSched = true; } } else { if (SUnits[i].NumPredsLeft != 0) { if (!AnyNotSched) - cerr << "*** Scheduling failed! ***\n"; + errs() << "*** Scheduling failed! ***\n"; SUnits[i].dump(this); - cerr << "has predecessors left!\n"; + errs() << "has predecessors left!\n"; AnyNotSched = true; } } diff --git a/lib/CodeGen/ScheduleDAGEmit.cpp b/lib/CodeGen/ScheduleDAGEmit.cpp index 770f5bb..0d15c02 100644 --- a/lib/CodeGen/ScheduleDAGEmit.cpp +++ b/lib/CodeGen/ScheduleDAGEmit.cpp @@ -28,10 +28,6 @@ #include "llvm/Support/MathExtras.h" using namespace llvm; -void ScheduleDAG::AddMemOperand(MachineInstr *MI, const MachineMemOperand &MO) { - MI->addMemOperand(MF, MO); -} - void ScheduleDAG::EmitNoop() { TII->insertNoop(*BB, InsertPos); } diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index 8e18b3d..44e9296 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -14,8 +14,10 @@ #define DEBUG_TYPE "sched-instrs" #include "ScheduleDAGInstrs.h" +#include "llvm/Operator.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Target/TargetMachine.h" @@ -45,35 +47,24 @@ void ScheduleDAGInstrs::Run(MachineBasicBlock *bb, ScheduleDAG::Run(bb, end); } -/// getOpcode - If this is an Instruction or a ConstantExpr, return the -/// opcode value. Otherwise return UserOp1. -static unsigned getOpcode(const Value *V) { - if (const Instruction *I = dyn_cast<Instruction>(V)) - return I->getOpcode(); - if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) - return CE->getOpcode(); - // Use UserOp1 to mean there's no opcode. - return Instruction::UserOp1; -} - /// getUnderlyingObjectFromInt - This is the function that does the work of /// looking through basic ptrtoint+arithmetic+inttoptr sequences. static const Value *getUnderlyingObjectFromInt(const Value *V) { do { - if (const User *U = dyn_cast<User>(V)) { + if (const Operator *U = dyn_cast<Operator>(V)) { // If we find a ptrtoint, we can transfer control back to the // regular getUnderlyingObjectFromInt. - if (getOpcode(U) == Instruction::PtrToInt) + if (U->getOpcode() == Instruction::PtrToInt) return U->getOperand(0); // If we find an add of a constant or a multiplied value, it's // likely that the other operand will lead us to the base // object. We don't have to worry about the case where the - // object address is somehow being computed bt the multiply, + // object address is somehow being computed by the multiply, // because our callers only care when the result is an // identifibale object. - if (getOpcode(U) != Instruction::Add || + if (U->getOpcode() != Instruction::Add || (!isa<ConstantInt>(U->getOperand(1)) && - getOpcode(U->getOperand(1)) != Instruction::Mul)) + Operator::getOpcode(U->getOperand(1)) != Instruction::Mul)) return V; V = U->getOperand(0); } else { @@ -90,7 +81,7 @@ static const Value *getUnderlyingObject(const Value *V) { do { V = V->getUnderlyingObject(); // If it found an inttoptr, use special code to continue climing. - if (getOpcode(V) != Instruction::IntToPtr) + if (Operator::getOpcode(V) != Instruction::IntToPtr) break; const Value *O = getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0)); // If that succeeded in finding a pointer, continue the search. @@ -106,11 +97,11 @@ static const Value *getUnderlyingObject(const Value *V) { /// object, return the Value for that object. Otherwise return null. static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI) { if (!MI->hasOneMemOperand() || - !MI->memoperands_begin()->getValue() || - MI->memoperands_begin()->isVolatile()) + !(*MI->memoperands_begin())->getValue() || + (*MI->memoperands_begin())->isVolatile()) return 0; - const Value *V = MI->memoperands_begin()->getValue(); + const Value *V = (*MI->memoperands_begin())->getValue(); if (!V) return 0; @@ -132,7 +123,7 @@ void ScheduleDAGInstrs::StartBlock(MachineBasicBlock *BB) { } } -void ScheduleDAGInstrs::BuildSchedGraph() { +void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { // We'll be allocating one SUnit for each instruction, plus one for // the region exit node. SUnits.reserve(BB->size()); @@ -155,8 +146,8 @@ void ScheduleDAGInstrs::BuildSchedGraph() { bool UnitLatencies = ForceUnitLatencies(); // Ask the target if address-backscheduling is desirable, and if so how much. - unsigned SpecialAddressLatency = - TM.getSubtarget<TargetSubtarget>().getSpecialAddressLatency(); + const TargetSubtarget &ST = TM.getSubtarget<TargetSubtarget>(); + unsigned SpecialAddressLatency = ST.getSpecialAddressLatency(); // Walk the list of instructions, from bottom moving up. for (MachineBasicBlock::iterator MII = InsertPos, MIE = Begin; @@ -184,16 +175,20 @@ void ScheduleDAGInstrs::BuildSchedGraph() { assert(TRI->isPhysicalRegister(Reg) && "Virtual register encountered!"); std::vector<SUnit *> &UseList = Uses[Reg]; std::vector<SUnit *> &DefList = Defs[Reg]; - // Optionally add output and anti dependencies. - // TODO: Using a latency of 1 here assumes there's no cost for - // reusing registers. + // Optionally add output and anti dependencies. For anti + // dependencies we use a latency of 0 because for a multi-issue + // target we want to allow the defining instruction to issue + // in the same cycle as the using instruction. + // TODO: Using a latency of 1 here for output dependencies assumes + // there's no cost for reusing registers. SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output; + unsigned AOLatency = (Kind == SDep::Anti) ? 0 : 1; for (unsigned i = 0, e = DefList.size(); i != e; ++i) { SUnit *DefSU = DefList[i]; if (DefSU != SU && (Kind != SDep::Output || !MO.isDead() || !DefSU->getInstr()->registerDefIsDead(Reg))) - DefSU->addPred(SDep(SU, Kind, /*Latency=*/1, /*Reg=*/Reg)); + DefSU->addPred(SDep(SU, Kind, AOLatency, /*Reg=*/Reg)); } for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { std::vector<SUnit *> &DefList = Defs[*Alias]; @@ -202,7 +197,7 @@ void ScheduleDAGInstrs::BuildSchedGraph() { if (DefSU != SU && (Kind != SDep::Output || !MO.isDead() || !DefSU->getInstr()->registerDefIsDead(Reg))) - DefSU->addPred(SDep(SU, Kind, /*Latency=*/1, /*Reg=*/ *Alias)); + DefSU->addPred(SDep(SU, Kind, AOLatency, /*Reg=*/ *Alias)); } } @@ -216,6 +211,10 @@ void ScheduleDAGInstrs::BuildSchedGraph() { // Optionally add in a special extra latency for nodes that // feed addresses. // TODO: Do this for register aliases too. + // TODO: Perhaps we should get rid of + // SpecialAddressLatency and just move this into + // adjustSchedDependency for the targets that care about + // it. if (SpecialAddressLatency != 0 && !UnitLatencies) { MachineInstr *UseMI = UseSU->getInstr(); const TargetInstrDesc &UseTID = UseMI->getDesc(); @@ -226,15 +225,29 @@ void ScheduleDAGInstrs::BuildSchedGraph() { UseTID.OpInfo[RegUseIndex].isLookupPtrRegClass()) LDataLatency += SpecialAddressLatency; } - UseSU->addPred(SDep(SU, SDep::Data, LDataLatency, Reg)); + // Adjust the dependence latency using operand def/use + // information (if any), and then allow the target to + // perform its own adjustments. + const SDep& dep = SDep(SU, SDep::Data, LDataLatency, Reg); + if (!UnitLatencies) { + ComputeOperandLatency(SU, UseSU, (SDep &)dep); + ST.adjustSchedDependency(SU, UseSU, (SDep &)dep); + } + UseSU->addPred(dep); } } for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { std::vector<SUnit *> &UseList = Uses[*Alias]; for (unsigned i = 0, e = UseList.size(); i != e; ++i) { SUnit *UseSU = UseList[i]; - if (UseSU != SU) - UseSU->addPred(SDep(SU, SDep::Data, DataLatency, *Alias)); + if (UseSU != SU) { + const SDep& dep = SDep(SU, SDep::Data, DataLatency, *Alias); + if (!UnitLatencies) { + ComputeOperandLatency(SU, UseSU, (SDep &)dep); + ST.adjustSchedDependency(SU, UseSU, (SDep &)dep); + } + UseSU->addPred(dep); + } } } @@ -323,10 +336,10 @@ void ScheduleDAGInstrs::BuildSchedGraph() { if (!ChainTID.isCall() && !ChainTID.hasUnmodeledSideEffects() && ChainMI->hasOneMemOperand() && - !ChainMI->memoperands_begin()->isVolatile() && - ChainMI->memoperands_begin()->getValue()) + !(*ChainMI->memoperands_begin())->isVolatile() && + (*ChainMI->memoperands_begin())->getValue()) // We know that the Chain accesses one specific memory location. - ChainMMO = &*ChainMI->memoperands_begin(); + ChainMMO = *ChainMI->memoperands_begin(); else // Unknown memory accesses. Assume the worst. ChainMMO = 0; @@ -362,7 +375,7 @@ void ScheduleDAGInstrs::BuildSchedGraph() { // Treat all other stores conservatively. goto new_chain; } else if (TID.mayLoad()) { - if (TII->isInvariantLoad(MI)) { + if (MI->isInvariantLoad(AA)) { // Invariant load, no chain dependencies needed! } else if (const Value *V = getUnderlyingObjectForInstr(MI)) { // A load from a specific PseudoSourceValue. Add precise dependencies. @@ -409,10 +422,9 @@ void ScheduleDAGInstrs::FinishBlock() { void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) { const InstrItineraryData &InstrItins = TM.getInstrItineraryData(); - // Compute the latency for the node. We use the sum of the latencies for - // all nodes flagged together into this SUnit. + // Compute the latency for the node. SU->Latency = - InstrItins.getLatency(SU->getInstr()->getDesc().getSchedClass()); + InstrItins.getStageLatency(SU->getInstr()->getDesc().getSchedClass()); // Simplistic target-independent heuristic: assume that loads take // extra time. @@ -421,6 +433,50 @@ void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) { SU->Latency += 2; } +void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use, + SDep& dep) const { + const InstrItineraryData &InstrItins = TM.getInstrItineraryData(); + if (InstrItins.isEmpty()) + return; + + // For a data dependency with a known register... + if ((dep.getKind() != SDep::Data) || (dep.getReg() == 0)) + return; + + const unsigned Reg = dep.getReg(); + + // ... find the definition of the register in the defining + // instruction + MachineInstr *DefMI = Def->getInstr(); + int DefIdx = DefMI->findRegisterDefOperandIdx(Reg); + if (DefIdx != -1) { + int DefCycle = InstrItins.getOperandCycle(DefMI->getDesc().getSchedClass(), DefIdx); + if (DefCycle >= 0) { + MachineInstr *UseMI = Use->getInstr(); + const unsigned UseClass = UseMI->getDesc().getSchedClass(); + + // For all uses of the register, calculate the maxmimum latency + int Latency = -1; + for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = UseMI->getOperand(i); + if (!MO.isReg() || !MO.isUse()) + continue; + unsigned MOReg = MO.getReg(); + if (MOReg != Reg) + continue; + + int UseCycle = InstrItins.getOperandCycle(UseClass, i); + if (UseCycle >= 0) + Latency = std::max(Latency, DefCycle - UseCycle + 1); + } + + // If we found a latency, then replace the existing dependence latency. + if (Latency >= 0) + dep.setLatency(Latency); + } + } +} + void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const { SU->getInstr()->dump(); } @@ -438,7 +494,8 @@ std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const { } // EmitSchedule - Emit the machine code in scheduled order. -MachineBasicBlock *ScheduleDAGInstrs::EmitSchedule() { +MachineBasicBlock *ScheduleDAGInstrs:: +EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) { // For MachineInstr-based scheduling, we're rescheduling the instructions in // the block, so start by removing them from the block. while (Begin != InsertPos) { diff --git a/lib/CodeGen/ScheduleDAGInstrs.h b/lib/CodeGen/ScheduleDAGInstrs.h index 00d6268..29e1c98 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.h +++ b/lib/CodeGen/ScheduleDAGInstrs.h @@ -15,12 +15,13 @@ #ifndef SCHEDULEDAGINSTRS_H #define SCHEDULEDAGINSTRS_H -#include "llvm/ADT/SmallSet.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/Support/Compiler.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallSet.h" #include <map> namespace llvm { @@ -120,7 +121,6 @@ namespace llvm { SmallSet<unsigned, 8> LoopLiveInRegs; public: - MachineBasicBlock *BB; // Current basic block MachineBasicBlock::iterator Begin; // The beginning of the range to // be scheduled. The range extends // to InsertPos. @@ -154,13 +154,20 @@ namespace llvm { /// BuildSchedGraph - Build SUnits from the MachineBasicBlock that we are /// input. - virtual void BuildSchedGraph(); + virtual void BuildSchedGraph(AliasAnalysis *AA); /// ComputeLatency - Compute node latency. /// virtual void ComputeLatency(SUnit *SU); - virtual MachineBasicBlock *EmitSchedule(); + /// ComputeOperandLatency - Override dependence edge latency using + /// operand use/def information + /// + virtual void ComputeOperandLatency(SUnit *Def, SUnit *Use, + SDep& dep) const; + + virtual MachineBasicBlock* + EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*>*); /// StartBlock - Prepare to perform scheduling in the given block. /// diff --git a/lib/CodeGen/ScheduleDAGPrinter.cpp b/lib/CodeGen/ScheduleDAGPrinter.cpp index 5efd274..95ad05e 100644 --- a/lib/CodeGen/ScheduleDAGPrinter.cpp +++ b/lib/CodeGen/ScheduleDAGPrinter.cpp @@ -86,14 +86,14 @@ void ScheduleDAG::viewGraph() { // This code is only for debugging! #ifndef NDEBUG if (BB->getBasicBlock()) - ViewGraph(this, "dag." + MF.getFunction()->getName(), false, - "Scheduling-Units Graph for " + MF.getFunction()->getName() + ':' + - BB->getBasicBlock()->getName()); + ViewGraph(this, "dag." + MF.getFunction()->getNameStr(), false, + "Scheduling-Units Graph for " + MF.getFunction()->getNameStr() + + ":" + BB->getBasicBlock()->getNameStr()); else - ViewGraph(this, "dag." + MF.getFunction()->getName(), false, - "Scheduling-Units Graph for " + MF.getFunction()->getName()); + ViewGraph(this, "dag." + MF.getFunction()->getNameStr(), false, + "Scheduling-Units Graph for " + MF.getFunction()->getNameStr()); #else - cerr << "ScheduleDAG::viewGraph is only available in debug builds on " - << "systems with Graphviz or gv!\n"; + errs() << "ScheduleDAG::viewGraph is only available in debug builds on " + << "systems with Graphviz or gv!\n"; #endif // NDEBUG } diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt index 4ffe88f..c766859 100644 --- a/lib/CodeGen/SelectionDAG/CMakeLists.txt +++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt @@ -2,6 +2,7 @@ add_llvm_library(LLVMSelectionDAG CallingConvLower.cpp DAGCombiner.cpp FastISel.cpp + InstrEmitter.cpp LegalizeDAG.cpp LegalizeFloatTypes.cpp LegalizeIntegerTypes.cpp @@ -9,13 +10,12 @@ add_llvm_library(LLVMSelectionDAG LegalizeTypesGeneric.cpp LegalizeVectorOps.cpp LegalizeVectorTypes.cpp - ScheduleDAGSDNodes.cpp - ScheduleDAGSDNodesEmit.cpp ScheduleDAGFast.cpp ScheduleDAGList.cpp ScheduleDAGRRList.cpp - SelectionDAGBuild.cpp + ScheduleDAGSDNodes.cpp SelectionDAG.cpp + SelectionDAGBuild.cpp SelectionDAGISel.cpp SelectionDAGPrinter.cpp TargetLowering.cpp diff --git a/lib/CodeGen/SelectionDAG/CallingConvLower.cpp b/lib/CodeGen/SelectionDAG/CallingConvLower.cpp index 7cd2b73..fbe40b6 100644 --- a/lib/CodeGen/SelectionDAG/CallingConvLower.cpp +++ b/lib/CodeGen/SelectionDAG/CallingConvLower.cpp @@ -13,15 +13,17 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" using namespace llvm; -CCState::CCState(unsigned CC, bool isVarArg, const TargetMachine &tm, - SmallVector<CCValAssign, 16> &locs) +CCState::CCState(CallingConv::ID CC, bool isVarArg, const TargetMachine &tm, + SmallVector<CCValAssign, 16> &locs, LLVMContext &C) : CallingConv(CC), IsVarArg(isVarArg), TM(tm), - TRI(*TM.getRegisterInfo()), Locs(locs) { + TRI(*TM.getRegisterInfo()), Locs(locs), Context(C) { // No stack is used. StackOffset = 0; @@ -31,8 +33,8 @@ CCState::CCState(unsigned CC, bool isVarArg, const TargetMachine &tm, // HandleByVal - Allocate a stack slot large enough to pass an argument by // value. The size and alignment information of the argument is encoded in its // parameter attribute. -void CCState::HandleByVal(unsigned ValNo, MVT ValVT, - MVT LocVT, CCValAssign::LocInfo LocInfo, +void CCState::HandleByVal(unsigned ValNo, EVT ValVT, + EVT LocVT, CCValAssign::LocInfo LocInfo, int MinSize, int MinAlign, ISD::ArgFlagsTy ArgFlags) { unsigned Align = ArgFlags.getByValAlign(); @@ -55,94 +57,107 @@ void CCState::MarkAllocated(unsigned Reg) { UsedRegs[Reg/32] |= 1 << (Reg&31); } -/// AnalyzeFormalArguments - Analyze an ISD::FORMAL_ARGUMENTS node, +/// AnalyzeFormalArguments - Analyze an array of argument values, /// incorporating info about the formals into this state. -void CCState::AnalyzeFormalArguments(SDNode *TheArgs, CCAssignFn Fn) { - unsigned NumArgs = TheArgs->getNumValues()-1; - +void +CCState::AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins, + CCAssignFn Fn) { + unsigned NumArgs = Ins.size(); + for (unsigned i = 0; i != NumArgs; ++i) { - MVT ArgVT = TheArgs->getValueType(i); - ISD::ArgFlagsTy ArgFlags = - cast<ARG_FLAGSSDNode>(TheArgs->getOperand(3+i))->getArgFlags(); + EVT ArgVT = Ins[i].VT; + ISD::ArgFlagsTy ArgFlags = Ins[i].Flags; if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) { - cerr << "Formal argument #" << i << " has unhandled type " - << ArgVT.getMVTString() << "\n"; - abort(); +#ifndef NDEBUG + errs() << "Formal argument #" << i << " has unhandled type " + << ArgVT.getEVTString(); +#endif + llvm_unreachable(0); } } } -/// AnalyzeReturn - Analyze the returned values of an ISD::RET node, +/// AnalyzeReturn - Analyze the returned values of a return, /// incorporating info about the result values into this state. -void CCState::AnalyzeReturn(SDNode *TheRet, CCAssignFn Fn) { +void CCState::AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs, + CCAssignFn Fn) { // Determine which register each value should be copied into. - for (unsigned i = 0, e = TheRet->getNumOperands() / 2; i != e; ++i) { - MVT VT = TheRet->getOperand(i*2+1).getValueType(); - ISD::ArgFlagsTy ArgFlags = - cast<ARG_FLAGSSDNode>(TheRet->getOperand(i*2+2))->getArgFlags(); - if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)){ - cerr << "Return operand #" << i << " has unhandled type " - << VT.getMVTString() << "\n"; - abort(); + for (unsigned i = 0, e = Outs.size(); i != e; ++i) { + EVT VT = Outs[i].Val.getValueType(); + ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; + if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)) { +#ifndef NDEBUG + errs() << "Return operand #" << i << " has unhandled type " + << VT.getEVTString(); +#endif + llvm_unreachable(0); } } } -/// AnalyzeCallOperands - Analyze an ISD::CALL node, incorporating info -/// about the passed values into this state. -void CCState::AnalyzeCallOperands(CallSDNode *TheCall, CCAssignFn Fn) { - unsigned NumOps = TheCall->getNumArgs(); +/// AnalyzeCallOperands - Analyze the outgoing arguments to a call, +/// incorporating info about the passed values into this state. +void CCState::AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs, + CCAssignFn Fn) { + unsigned NumOps = Outs.size(); for (unsigned i = 0; i != NumOps; ++i) { - MVT ArgVT = TheCall->getArg(i).getValueType(); - ISD::ArgFlagsTy ArgFlags = TheCall->getArgFlags(i); + EVT ArgVT = Outs[i].Val.getValueType(); + ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) { - cerr << "Call operand #" << i << " has unhandled type " - << ArgVT.getMVTString() << "\n"; - abort(); +#ifndef NDEBUG + errs() << "Call operand #" << i << " has unhandled type " + << ArgVT.getEVTString(); +#endif + llvm_unreachable(0); } } } /// AnalyzeCallOperands - Same as above except it takes vectors of types /// and argument flags. -void CCState::AnalyzeCallOperands(SmallVectorImpl<MVT> &ArgVTs, +void CCState::AnalyzeCallOperands(SmallVectorImpl<EVT> &ArgVTs, SmallVectorImpl<ISD::ArgFlagsTy> &Flags, CCAssignFn Fn) { unsigned NumOps = ArgVTs.size(); for (unsigned i = 0; i != NumOps; ++i) { - MVT ArgVT = ArgVTs[i]; + EVT ArgVT = ArgVTs[i]; ISD::ArgFlagsTy ArgFlags = Flags[i]; if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) { - cerr << "Call operand #" << i << " has unhandled type " - << ArgVT.getMVTString() << "\n"; - abort(); +#ifndef NDEBUG + errs() << "Call operand #" << i << " has unhandled type " + << ArgVT.getEVTString(); +#endif + llvm_unreachable(0); } } } -/// AnalyzeCallResult - Analyze the return values of an ISD::CALL node, +/// AnalyzeCallResult - Analyze the return values of a call, /// incorporating info about the passed values into this state. -void CCState::AnalyzeCallResult(CallSDNode *TheCall, CCAssignFn Fn) { - for (unsigned i = 0, e = TheCall->getNumRetVals(); i != e; ++i) { - MVT VT = TheCall->getRetValType(i); - ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); - if (TheCall->isInreg()) - Flags.setInReg(); +void CCState::AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins, + CCAssignFn Fn) { + for (unsigned i = 0, e = Ins.size(); i != e; ++i) { + EVT VT = Ins[i].VT; + ISD::ArgFlagsTy Flags = Ins[i].Flags; if (Fn(i, VT, VT, CCValAssign::Full, Flags, *this)) { - cerr << "Call result #" << i << " has unhandled type " - << VT.getMVTString() << "\n"; - abort(); +#ifndef NDEBUG + errs() << "Call result #" << i << " has unhandled type " + << VT.getEVTString(); +#endif + llvm_unreachable(0); } } } /// AnalyzeCallResult - Same as above except it's specialized for calls which /// produce a single value. -void CCState::AnalyzeCallResult(MVT VT, CCAssignFn Fn) { +void CCState::AnalyzeCallResult(EVT VT, CCAssignFn Fn) { if (Fn(0, VT, VT, CCValAssign::Full, ISD::ArgFlagsTy(), *this)) { - cerr << "Call result has unhandled type " - << VT.getMVTString() << "\n"; - abort(); +#ifndef NDEBUG + errs() << "Call result has unhandled type " + << VT.getEVTString(); +#endif + llvm_unreachable(0); } } diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 609ec82..1ed3082 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -19,6 +19,7 @@ #define DEBUG_TYPE "dagcombine" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/DerivedTypes.h" +#include "llvm/LLVMContext.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" @@ -33,7 +34,9 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" #include <algorithm> #include <set> using namespace llvm; @@ -213,12 +216,12 @@ namespace { SDValue SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, bool NotExtCompare = false); - SDValue SimplifySetCC(MVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, + SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, DebugLoc DL, bool foldBooleans = true); SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, unsigned HiOp); - SDValue CombineConsecutiveLoads(SDNode *N, MVT VT); - SDValue ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *, MVT); + SDValue CombineConsecutiveLoads(SDNode *N, EVT VT); + SDValue ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *, EVT); SDValue BuildSDIV(SDNode *N); SDValue BuildUDIV(SDNode *N); SDNode *MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL); @@ -236,14 +239,17 @@ namespace { /// overlap. bool isAlias(SDValue Ptr1, int64_t Size1, const Value *SrcValue1, int SrcValueOffset1, + unsigned SrcValueAlign1, SDValue Ptr2, int64_t Size2, - const Value *SrcValue2, int SrcValueOffset2) const; + const Value *SrcValue2, int SrcValueOffset2, + unsigned SrcValueAlign2) const; /// FindAliasInfo - Extracts the relevant alias information from the memory /// node. Returns true if the operand was a load. bool FindAliasInfo(SDNode *N, SDValue &Ptr, int64_t &Size, - const Value *&SrcValue, int &SrcValueOffset) const; + const Value *&SrcValue, int &SrcValueOffset, + unsigned &SrcValueAlignment) const; /// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, /// looking for a better chain (aliasing node.) @@ -251,7 +257,7 @@ namespace { /// getShiftAmountTy - Returns a type large enough to hold any valid /// shift amount - before type legalization these can be huge. - MVT getShiftAmountTy() { + EVT getShiftAmountTy() { return LegalTypes ? TLI.getShiftAmountTy() : TLI.getPointerTy(); } @@ -392,7 +398,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree"); switch (Op.getOpcode()) { - default: assert(0 && "Unknown code"); + default: llvm_unreachable("Unknown code"); case ISD::ConstantFP: { APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF(); V.changeSign(); @@ -495,7 +501,7 @@ static bool isOneUseSetCC(SDValue N) { SDValue DAGCombiner::ReassociateOps(unsigned Opc, DebugLoc DL, SDValue N0, SDValue N1) { - MVT VT = N0.getValueType(); + EVT VT = N0.getValueType(); if (N0.getOpcode() == Opc && isa<ConstantSDNode>(N0.getOperand(1))) { if (isa<ConstantSDNode>(N1)) { // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2)) @@ -537,10 +543,12 @@ SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, bool AddTo) { assert(N->getNumValues() == NumTo && "Broken CombineTo call!"); ++NodesCombined; - DOUT << "\nReplacing.1 "; DEBUG(N->dump(&DAG)); - DOUT << "\nWith: "; DEBUG(To[0].getNode()->dump(&DAG)); - DOUT << " and " << NumTo-1 << " other values\n"; - DEBUG(for (unsigned i = 0, e = NumTo; i != e; ++i) + DEBUG(errs() << "\nReplacing.1 "; + N->dump(&DAG); + errs() << "\nWith: "; + To[0].getNode()->dump(&DAG); + errs() << " and " << NumTo-1 << " other values\n"; + for (unsigned i = 0, e = NumTo; i != e; ++i) assert(N->getValueType(i) == To[i].getValueType() && "Cannot combine value to value of different type!")); WorkListRemover DeadNodes(*this); @@ -612,9 +620,11 @@ bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) { // Replace the old value with the new one. ++NodesCombined; - DOUT << "\nReplacing.2 "; DEBUG(TLO.Old.getNode()->dump(&DAG)); - DOUT << "\nWith: "; DEBUG(TLO.New.getNode()->dump(&DAG)); - DOUT << '\n'; + DEBUG(errs() << "\nReplacing.2 "; + TLO.Old.getNode()->dump(&DAG); + errs() << "\nWith: "; + TLO.New.getNode()->dump(&DAG); + errs() << '\n'); CommitTargetLoweringOpt(TLO); return true; @@ -680,9 +690,11 @@ void DAGCombiner::Run(CombineLevel AtLevel) { RV.getNode()->getOpcode() != ISD::DELETED_NODE && "Node was deleted but visit returned new node!"); - DOUT << "\nReplacing.3 "; DEBUG(N->dump(&DAG)); - DOUT << "\nWith: "; DEBUG(RV.getNode()->dump(&DAG)); - DOUT << '\n'; + DEBUG(errs() << "\nReplacing.3 "; + N->dump(&DAG); + errs() << "\nWith: "; + RV.getNode()->dump(&DAG); + errs() << '\n'); WorkListRemover DeadNodes(*this); if (N->getNumValues() == RV.getNode()->getNumValues()) DAG.ReplaceAllUsesWith(N, RV.getNode(), &DeadNodes); @@ -800,7 +812,7 @@ SDValue DAGCombiner::combine(SDNode *N) { // Expose the DAG combiner to the target combiner impls. TargetLowering::DAGCombinerInfo - DagCombineInfo(DAG, Level == Unrestricted, false, this); + DagCombineInfo(DAG, !LegalTypes, !LegalOperations, false, this); RV = TLI.PerformDAGCombine(N, DagCombineInfo); } @@ -877,7 +889,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { break; case ISD::TokenFactor: - if ((CombinerAA || Op.hasOneUse()) && + if (Op.hasOneUse() && std::find(TFs.begin(), TFs.end(), Op.getNode()) == TFs.end()) { // Queue up for processing. TFs.push_back(Op.getNode()); @@ -898,7 +910,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { } } } - + SDValue Result; // If we've change things around then replace token factor. @@ -922,9 +934,14 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { /// MERGE_VALUES can always be eliminated. SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) { WorkListRemover DeadNodes(*this); - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) - DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i), - &DeadNodes); + // Replacing results may cause a different MERGE_VALUES to suddenly + // be CSE'd with N, and carry its uses with it. Iterate until no + // uses remain, to ensure that the node can be safely deleted. + do { + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i), + &DeadNodes); + } while (!N->use_empty()); removeFromWorkList(N); DAG.DeleteNode(N); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -933,7 +950,7 @@ SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) { static SDValue combineShlAddConstant(DebugLoc DL, SDValue N0, SDValue N1, SelectionDAG &DAG) { - MVT VT = N0.getValueType(); + EVT VT = N0.getValueType(); SDValue N00 = N0.getOperand(0); SDValue N01 = N0.getOperand(1); ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N01); @@ -957,7 +974,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); - MVT VT = N0.getValueType(); + EVT VT = N0.getValueType(); // fold vector ops if (VT.isVector()) { @@ -1080,7 +1097,7 @@ SDValue DAGCombiner::visitADDC(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); - MVT VT = N0.getValueType(); + EVT VT = N0.getValueType(); // If the flag result is dead, turn this into an ADD. if (N->hasNUsesOfValue(0, 1)) @@ -1142,7 +1159,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode()); ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); - MVT VT = N0.getValueType(); + EVT VT = N0.getValueType(); // fold vector ops if (VT.isVector()) { @@ -1215,7 +1232,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); - MVT VT = N0.getValueType(); + EVT VT = N0.getValueType(); // fold vector ops if (VT.isVector()) { @@ -1308,7 +1325,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode()); ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold vector ops if (VT.isVector()) { @@ -1395,7 +1412,7 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode()); ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold vector ops if (VT.isVector()) { @@ -1415,7 +1432,7 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { if (N1.getOpcode() == ISD::SHL) { if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) { if (SHC->getAPIntValue().isPowerOf2()) { - MVT ADDVT = N1.getOperand(1).getValueType(); + EVT ADDVT = N1.getOperand(1).getValueType(); SDValue Add = DAG.getNode(ISD::ADD, N->getDebugLoc(), ADDVT, N1.getOperand(1), DAG.getConstant(SHC->getAPIntValue() @@ -1447,7 +1464,7 @@ SDValue DAGCombiner::visitSREM(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold (srem c1, c2) -> c1%c2 if (N0C && N1C && !N1C->isNullValue()) @@ -1489,7 +1506,7 @@ SDValue DAGCombiner::visitUREM(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold (urem c1, c2) -> c1%c2 if (N0C && N1C && !N1C->isNullValue()) @@ -1541,7 +1558,7 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold (mulhs x, 0) -> 0 if (N1C && N1C->isNullValue()) @@ -1562,7 +1579,7 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold (mulhu x, 0) -> 0 if (N1C && N1C->isNullValue()) @@ -1665,7 +1682,7 @@ SDValue DAGCombiner::visitUDIVREM(SDNode *N) { /// two operands of the same opcode, try to simplify it. SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); - MVT VT = N0.getValueType(); + EVT VT = N0.getValueType(); assert(N0.getOpcode() == N1.getOpcode() && "Bad input!"); // For each of OP in AND/OR/XOR: @@ -1677,7 +1694,9 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { N0.getOpcode() == ISD::SIGN_EXTEND || (N0.getOpcode() == ISD::TRUNCATE && !TLI.isTruncateFree(N0.getOperand(0).getValueType(), VT))) && - N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()) { + N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() && + (!LegalOperations || + TLI.isOperationLegal(N->getOpcode(), N0.getOperand(0).getValueType()))) { SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(), N0.getOperand(0).getValueType(), N0.getOperand(0), N1.getOperand(0)); @@ -1709,7 +1728,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { SDValue LL, LR, RL, RR, CC0, CC1; ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); - MVT VT = N1.getValueType(); + EVT VT = N1.getValueType(); unsigned BitWidth = VT.getSizeInBits(); // fold vector ops @@ -1820,18 +1839,18 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // fold (zext_inreg (extload x)) -> (zextload x) if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); - MVT EVT = LN0->getMemoryVT(); + EVT MemVT = LN0->getMemoryVT(); // If we zero all the possible extended bits, then we can turn this into // a zextload if we are running before legalize or the operation is legal. unsigned BitWidth = N1.getValueSizeInBits(); if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, - BitWidth - EVT.getSizeInBits())) && + BitWidth - MemVT.getSizeInBits())) && ((!LegalOperations && !LN0->isVolatile()) || - TLI.isLoadExtLegal(ISD::ZEXTLOAD, EVT))) { + TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) { SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT, LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), - LN0->getSrcValueOffset(), EVT, + LN0->getSrcValueOffset(), MemVT, LN0->isVolatile(), LN0->getAlignment()); AddToWorkList(N); CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); @@ -1842,18 +1861,18 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); - MVT EVT = LN0->getMemoryVT(); + EVT MemVT = LN0->getMemoryVT(); // If we zero all the possible extended bits, then we can turn this into // a zextload if we are running before legalize or the operation is legal. unsigned BitWidth = N1.getValueSizeInBits(); if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, - BitWidth - EVT.getSizeInBits())) && + BitWidth - MemVT.getSizeInBits())) && ((!LegalOperations && !LN0->isVolatile()) || - TLI.isLoadExtLegal(ISD::ZEXTLOAD, EVT))) { + TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) { SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT, LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), - LN0->getSrcValueOffset(), EVT, + LN0->getSrcValueOffset(), MemVT, LN0->isVolatile(), LN0->getAlignment()); AddToWorkList(N); CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); @@ -1869,24 +1888,24 @@ SDValue DAGCombiner::visitAND(SDNode *N) { LN0->isUnindexed() && N0.hasOneUse() && // Do not change the width of a volatile load. !LN0->isVolatile()) { - MVT EVT = MVT::Other; + EVT ExtVT = MVT::Other; uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits(); if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())) - EVT = MVT::getIntegerVT(ActiveBits); + ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); - MVT LoadedVT = LN0->getMemoryVT(); + EVT LoadedVT = LN0->getMemoryVT(); // Do not generate loads of non-round integer types since these can // be expensive (and would be wrong if the type is not byte sized). - if (EVT != MVT::Other && LoadedVT.bitsGT(EVT) && EVT.isRound() && - (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, EVT))) { - MVT PtrType = N0.getOperand(1).getValueType(); + if (ExtVT != MVT::Other && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() && + (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) { + EVT PtrType = N0.getOperand(1).getValueType(); // For big endian targets, we need to add an offset to the pointer to // load the correct bytes. For little endian systems, we merely need to // read fewer bytes from the same pointer. - unsigned LVTStoreBytes = LoadedVT.getStoreSizeInBits()/8; - unsigned EVTStoreBytes = EVT.getStoreSizeInBits()/8; + unsigned LVTStoreBytes = LoadedVT.getStoreSize(); + unsigned EVTStoreBytes = ExtVT.getStoreSize(); unsigned PtrOff = LVTStoreBytes - EVTStoreBytes; unsigned Alignment = LN0->getAlignment(); SDValue NewPtr = LN0->getBasePtr(); @@ -1901,7 +1920,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { SDValue Load = DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), VT, LN0->getChain(), NewPtr, LN0->getSrcValue(), LN0->getSrcValueOffset(), - EVT, LN0->isVolatile(), Alignment); + ExtVT, LN0->isVolatile(), Alignment); AddToWorkList(N); CombineTo(N0.getNode(), Load, Load.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -1918,7 +1937,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) { SDValue LL, LR, RL, RR, CC0, CC1; ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); - MVT VT = N1.getValueType(); + EVT VT = N1.getValueType(); // fold vector ops if (VT.isVector()) { @@ -1928,7 +1947,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) { // fold (or x, undef) -> -1 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) - return DAG.getConstant(~0ULL, VT); + return DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT); // fold (or c1, c2) -> c1|c2 if (N0C && N1C) return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C); @@ -2058,7 +2077,7 @@ static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) { // a rot[lr]. SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) { // Must be a legal type. Expanded 'n promoted things won't work with rotates. - MVT VT = LHS.getValueType(); + EVT VT = LHS.getValueType(); if (!TLI.isTypeLegal(VT)) return 0; // The target must have at least one rotate flavor. @@ -2219,7 +2238,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { SDValue LHS, RHS, CC; ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); - MVT VT = N0.getValueType(); + EVT VT = N0.getValueType(); // fold vector ops if (VT.isVector()) { @@ -2258,8 +2277,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { if (!LegalOperations || TLI.isCondCodeLegal(NotCC, LHS.getValueType())) { switch (N0.getOpcode()) { default: - assert(0 && "Unhandled SetCC Equivalent!"); - abort(); + llvm_unreachable("Unhandled SetCC Equivalent!"); case ISD::SETCC: return DAG.getSetCC(N->getDebugLoc(), VT, LHS, RHS, NotCC); case ISD::SELECT_CC: @@ -2388,7 +2406,7 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) { !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))) return SDValue(); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // If this is a signed shift right, and the high bit is modified by the // logical operation, do not perform the transformation. The highBitSet @@ -2418,7 +2436,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); - MVT VT = N0.getValueType(); + EVT VT = N0.getValueType(); unsigned OpSizeInBits = VT.getSizeInBits(); // fold (shl c1, c2) -> c1<<c2 @@ -2443,7 +2461,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { N1.hasOneUse() && N1.getOperand(0).hasOneUse()) { SDValue N101 = N1.getOperand(0).getOperand(1); if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) { - MVT TruncVT = N1.getValueType(); + EVT TruncVT = N1.getValueType(); SDValue N100 = N1.getOperand(0).getOperand(0); APInt TruncC = N101C->getAPIntValue(); TruncC.trunc(TruncVT.getSizeInBits()); @@ -2474,20 +2492,33 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { if (N1C && N0.getOpcode() == ISD::SRL && N0.getOperand(1).getOpcode() == ISD::Constant) { uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); - uint64_t c2 = N1C->getZExtValue(); - SDValue Mask = DAG.getNode(ISD::AND, N0.getDebugLoc(), VT, N0.getOperand(0), - DAG.getConstant(~0ULL << c1, VT)); - if (c2 > c1) - return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, Mask, - DAG.getConstant(c2-c1, N1.getValueType())); - else - return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, Mask, - DAG.getConstant(c1-c2, N1.getValueType())); + if (c1 < VT.getSizeInBits()) { + uint64_t c2 = N1C->getZExtValue(); + SDValue HiBitsMask = + DAG.getConstant(APInt::getHighBitsSet(VT.getSizeInBits(), + VT.getSizeInBits() - c1), + VT); + SDValue Mask = DAG.getNode(ISD::AND, N0.getDebugLoc(), VT, + N0.getOperand(0), + HiBitsMask); + if (c2 > c1) + return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, Mask, + DAG.getConstant(c2-c1, N1.getValueType())); + else + return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, Mask, + DAG.getConstant(c1-c2, N1.getValueType())); + } } // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1)) - if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) + if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) { + SDValue HiBitsMask = + DAG.getConstant(APInt::getHighBitsSet(VT.getSizeInBits(), + VT.getSizeInBits() - + N1C->getZExtValue()), + VT); return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0), - DAG.getConstant(~0ULL << N1C->getZExtValue(), VT)); + HiBitsMask); + } return N1C ? visitShiftByConstant(N, N1C->getZExtValue()) : SDValue(); } @@ -2497,7 +2528,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); - MVT VT = N0.getValueType(); + EVT VT = N0.getValueType(); // fold (sra c1, c2) -> (sra c1, c2) if (N0C && N1C) @@ -2518,7 +2549,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { // sext_inreg. if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) { unsigned LowBits = VT.getSizeInBits() - (unsigned)N1C->getZExtValue(); - MVT EVT = MVT::getIntegerVT(LowBits); + EVT EVT = EVT::getIntegerVT(*DAG.getContext(), LowBits); if ((!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, EVT))) return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, N0.getOperand(0), DAG.getValueType(EVT)); @@ -2545,8 +2576,8 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { if (N01C && N1C) { // Determine what the truncate's result bitsize and type would be. unsigned VTValSize = VT.getSizeInBits(); - MVT TruncVT = - MVT::getIntegerVT(VTValSize - N1C->getZExtValue()); + EVT TruncVT = + EVT::getIntegerVT(*DAG.getContext(), VTValSize - N1C->getZExtValue()); // Determine the residual right-shift amount. signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue(); @@ -2576,7 +2607,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { N1.hasOneUse() && N1.getOperand(0).hasOneUse()) { SDValue N101 = N1.getOperand(0).getOperand(1); if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) { - MVT TruncVT = N1.getValueType(); + EVT TruncVT = N1.getValueType(); SDValue N100 = N1.getOperand(0).getOperand(0); APInt TruncC = N101C->getAPIntValue(); TruncC.trunc(TruncVT.getSizeInBits()); @@ -2607,7 +2638,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); - MVT VT = N0.getValueType(); + EVT VT = N0.getValueType(); unsigned OpSizeInBits = VT.getSizeInBits(); // fold (srl c1, c2) -> c1 >>u c2 @@ -2641,7 +2672,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // fold (srl (anyextend x), c) -> (anyextend (srl x, c)) if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { // Shifting in all undef bits? - MVT SmallVT = N0.getOperand(0).getValueType(); + EVT SmallVT = N0.getOperand(0).getValueType(); if (N1C->getZExtValue() >= SmallVT.getSizeInBits()) return DAG.getUNDEF(VT); @@ -2700,7 +2731,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { N1.hasOneUse() && N1.getOperand(0).hasOneUse()) { SDValue N101 = N1.getOperand(0).getOperand(1); if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) { - MVT TruncVT = N1.getValueType(); + EVT TruncVT = N1.getValueType(); SDValue N100 = N1.getOperand(0).getOperand(0); APInt TruncC = N101C->getAPIntValue(); TruncC.trunc(TruncVT.getSizeInBits()); @@ -2724,7 +2755,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { SDValue DAGCombiner::visitCTLZ(SDNode *N) { SDValue N0 = N->getOperand(0); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold (ctlz c1) -> c2 if (isa<ConstantSDNode>(N0)) @@ -2734,7 +2765,7 @@ SDValue DAGCombiner::visitCTLZ(SDNode *N) { SDValue DAGCombiner::visitCTTZ(SDNode *N) { SDValue N0 = N->getOperand(0); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold (cttz c1) -> c2 if (isa<ConstantSDNode>(N0)) @@ -2744,7 +2775,7 @@ SDValue DAGCombiner::visitCTTZ(SDNode *N) { SDValue DAGCombiner::visitCTPOP(SDNode *N) { SDValue N0 = N->getOperand(0); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold (ctpop c1) -> c2 if (isa<ConstantSDNode>(N0)) @@ -2759,8 +2790,8 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2); - MVT VT = N->getValueType(0); - MVT VT0 = N0.getValueType(); + EVT VT = N->getValueType(0); + EVT VT0 = N0.getValueType(); // fold (select C, X, X) -> X if (N1 == N2) @@ -2825,7 +2856,8 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { // Check against MVT::Other for SELECT_CC, which is a workaround for targets // having to say they don't support SELECT_CC on every type the DAG knows // about, since there is no way to mark an opcode illegal at all value types - if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other)) + if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other) && + TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, N0.getOperand(0), N0.getOperand(1), N1, N2, N0.getOperand(2)); @@ -2945,7 +2977,7 @@ static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0, SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold (sext c1) -> c1 if (isa<ConstantSDNode>(N0)) @@ -3054,13 +3086,13 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) && ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); - MVT EVT = LN0->getMemoryVT(); + EVT MemVT = LN0->getMemoryVT(); if ((!LegalOperations && !LN0->isVolatile()) || - TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT)) { + TLI.isLoadExtLegal(ISD::SEXTLOAD, MemVT)) { SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), - LN0->getSrcValueOffset(), EVT, + LN0->getSrcValueOffset(), MemVT, LN0->isVolatile(), LN0->getAlignment()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), @@ -3071,14 +3103,34 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { } } - // sext(setcc x, y, cc) -> (select_cc x, y, -1, 0, cc) if (N0.getOpcode() == ISD::SETCC) { + // sext(setcc) -> sext_in_reg(vsetcc) for vectors. + if (VT.isVector() && + // We know that the # elements of the results is the same as the + // # elements of the compare (and the # elements of the compare result + // for that matter). Check to see that they are the same size. If so, + // we know that the element size of the sext'd result matches the + // element size of the compare operands. + VT.getSizeInBits() == N0.getOperand(0).getValueType().getSizeInBits() && + + // Only do this before legalize for now. + !LegalOperations) { + return DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0), + N0.getOperand(1), + cast<CondCodeSDNode>(N0.getOperand(2))->get()); + } + + // sext(setcc x, y, cc) -> (select_cc x, y, -1, 0, cc) + SDValue NegOne = + DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT); SDValue SCC = SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1), - DAG.getConstant(~0ULL, VT), DAG.getConstant(0, VT), + NegOne, DAG.getConstant(0, VT), cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); if (SCC.getNode()) return SCC; } + + // fold (sext x) -> (zext x) if the sign bit is known zero. if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) && @@ -3090,7 +3142,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold (zext c1) -> c1 if (isa<ConstantSDNode>(N0)) @@ -3194,13 +3246,13 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) && ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); - MVT EVT = LN0->getMemoryVT(); + EVT MemVT = LN0->getMemoryVT(); if ((!LegalOperations && !LN0->isVolatile()) || - TLI.isLoadExtLegal(ISD::ZEXTLOAD, EVT)) { + TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT)) { SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT, LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), - LN0->getSrcValueOffset(), EVT, + LN0->getSrcValueOffset(), MemVT, LN0->isVolatile(), LN0->getAlignment()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), @@ -3225,7 +3277,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold (aext c1) -> c1 if (isa<ConstantSDNode>(N0)) @@ -3330,11 +3382,11 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); - MVT EVT = LN0->getMemoryVT(); + EVT MemVT = LN0->getMemoryVT(); SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), N->getDebugLoc(), VT, LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), - LN0->getSrcValueOffset(), EVT, + LN0->getSrcValueOffset(), MemVT, LN0->isVolatile(), LN0->getAlignment()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), @@ -3400,8 +3452,8 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { unsigned Opc = N->getOpcode(); ISD::LoadExtType ExtType = ISD::NON_EXTLOAD; SDValue N0 = N->getOperand(0); - MVT VT = N->getValueType(0); - MVT EVT = VT; + EVT VT = N->getValueType(0); + EVT ExtVT = VT; // This transformation isn't valid for vector loads. if (VT.isVector()) @@ -3411,20 +3463,21 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { // extended to VT. if (Opc == ISD::SIGN_EXTEND_INREG) { ExtType = ISD::SEXTLOAD; - EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); - if (LegalOperations && !TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT)) + ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT(); + if (LegalOperations && !TLI.isLoadExtLegal(ISD::SEXTLOAD, ExtVT)) return SDValue(); } - unsigned EVTBits = EVT.getSizeInBits(); + unsigned EVTBits = ExtVT.getSizeInBits(); unsigned ShAmt = 0; - if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { + if (N0.getOpcode() == ISD::SRL && N0.hasOneUse() && ExtVT.isRound()) { if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { ShAmt = N01->getZExtValue(); // Is the shift amount a multiple of size of VT? if ((ShAmt & (EVTBits-1)) == 0) { N0 = N0.getOperand(0); - if (N0.getValueType().getSizeInBits() <= EVTBits) + // Is the load width a multiple of size of VT? + if ((N0.getValueType().getSizeInBits() & (EVTBits-1)) != 0) return SDValue(); } } @@ -3432,18 +3485,18 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { // Do not generate loads of non-round integer types since these can // be expensive (and would be wrong if the type is not byte sized). - if (isa<LoadSDNode>(N0) && N0.hasOneUse() && EVT.isRound() && + if (isa<LoadSDNode>(N0) && N0.hasOneUse() && ExtVT.isRound() && cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits() > EVTBits && // Do not change the width of a volatile load. !cast<LoadSDNode>(N0)->isVolatile()) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); - MVT PtrType = N0.getOperand(1).getValueType(); + EVT PtrType = N0.getOperand(1).getValueType(); // For big endian targets, we need to adjust the offset to the pointer to // load the correct bytes. if (TLI.isBigEndian()) { unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits(); - unsigned EVTStoreBits = EVT.getStoreSizeInBits(); + unsigned EVTStoreBits = ExtVT.getStoreSizeInBits(); ShAmt = LVTStoreBits - EVTStoreBits - ShAmt; } @@ -3460,7 +3513,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { LN0->isVolatile(), NewAlign) : DAG.getExtLoad(ExtType, N0.getDebugLoc(), VT, LN0->getChain(), NewPtr, LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff, - EVT, LN0->isVolatile(), NewAlign); + ExtVT, LN0->isVolatile(), NewAlign); // Replace the old load's chain with the new load's chain. WorkListRemover DeadNodes(*this); @@ -3477,8 +3530,8 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - MVT VT = N->getValueType(0); - MVT EVT = cast<VTSDNode>(N1)->getVT(); + EVT VT = N->getValueType(0); + EVT EVT = cast<VTSDNode>(N1)->getVT(); unsigned VTBits = VT.getSizeInBits(); unsigned EVTBits = EVT.getSizeInBits(); @@ -3573,7 +3626,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { SDValue N0 = N->getOperand(0); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // noop truncate if (N0.getValueType() == N->getValueType(0)) @@ -3623,14 +3676,14 @@ static SDNode *getBuildPairElt(SDNode *N, unsigned i) { /// CombineConsecutiveLoads - build_pair (load, load) -> load /// if load locations are consecutive. -SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, MVT VT) { +SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { assert(N->getOpcode() == ISD::BUILD_PAIR); LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0)); LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1)); if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse()) return SDValue(); - MVT LD1VT = LD1->getValueType(0); + EVT LD1VT = LD1->getValueType(0); const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); if (ISD::isNON_EXTLoad(LD2) && @@ -3642,7 +3695,7 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, MVT VT) { TLI.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1, MFI)) { unsigned Align = LD1->getAlignment(); unsigned NewAlign = TLI.getTargetData()-> - getABITypeAlignment(VT.getTypeForMVT()); + getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext())); if (NewAlign <= Align && (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) @@ -3656,7 +3709,7 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, MVT VT) { SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) { SDValue N0 = N->getOperand(0); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // If the input is a BUILD_VECTOR with all constant elements, fold this now. // Only do this before legalize, since afterward the target may be depending @@ -3674,7 +3727,7 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) { break; } - MVT DestEltVT = N->getValueType(0).getVectorElementType(); + EVT DestEltVT = N->getValueType(0).getVectorElementType(); assert(!DestEltVT.isVector() && "Element type of vector ValueType must not be vector!"); if (isSimple) @@ -3684,7 +3737,18 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) { // If the input is a constant, let getNode fold it. if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) { SDValue Res = DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, N0); - if (Res.getNode() != N) return Res; + if (Res.getNode() != N) { + if (!LegalOperations || + TLI.isOperationLegal(Res.getNode()->getOpcode(), VT)) + return Res; + + // Folding it resulted in an illegal node, and it's too late to + // do that. Clean up the old node and forego the transformation. + // Ideally this won't happen very often, because instcombine + // and the earlier dagcombine runs (where illegal nodes are + // permitted) should have folded most of them already. + DAG.DeleteNode(Res.getNode()); + } } // (conv (conv x, t1), t2) -> (conv x, t2) @@ -3700,7 +3764,7 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) { (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); unsigned Align = TLI.getTargetData()-> - getABITypeAlignment(VT.getTypeForMVT()); + getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext())); unsigned OrigAlign = LN0->getAlignment(); if (Align <= OrigAlign) { @@ -3743,7 +3807,7 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) { isa<ConstantFPSDNode>(N0.getOperand(0)) && VT.isInteger() && !VT.isVector()) { unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits(); - MVT IntXVT = MVT::getIntegerVT(OrigXWidth); + EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth); if (TLI.isTypeLegal(IntXVT) || !LegalTypes) { SDValue X = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(), IntXVT, N0.getOperand(1)); @@ -3791,7 +3855,7 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) { } SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) { - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); return CombineConsecutiveLoads(N, VT); } @@ -3799,8 +3863,8 @@ SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) { /// node with Constant, ConstantFP or Undef operands. DstEltVT indicates the /// destination element value type. SDValue DAGCombiner:: -ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, MVT DstEltVT) { - MVT SrcEltVT = BV->getValueType(0).getVectorElementType(); +ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { + EVT SrcEltVT = BV->getValueType(0).getVectorElementType(); // If this is already the right type, we're done. if (SrcEltVT == DstEltVT) return SDValue(BV, 0); @@ -3822,7 +3886,7 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, MVT DstEltVT) { DstEltVT, Op)); AddToWorkList(Ops.back().getNode()); } - MVT VT = MVT::getVectorVT(DstEltVT, + EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, BV->getValueType(0).getVectorNumElements()); return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT, &Ops[0], Ops.size()); @@ -3835,7 +3899,7 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, MVT DstEltVT) { // Convert the input float vector to a int vector where the elements are the // same sizes. assert((SrcEltVT == MVT::f32 || SrcEltVT == MVT::f64) && "Unknown FP VT!"); - MVT IntVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits()); + EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits()); BV = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, IntVT).getNode(); SrcEltVT = IntVT; } @@ -3844,7 +3908,7 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, MVT DstEltVT) { // convert to integer first, then to FP of the right size. if (DstEltVT.isFloatingPoint()) { assert((DstEltVT == MVT::f32 || DstEltVT == MVT::f64) && "Unknown FP VT!"); - MVT TmpVT = MVT::getIntegerVT(DstEltVT.getSizeInBits()); + EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits()); SDNode *Tmp = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, TmpVT).getNode(); // Next, convert to FP elements of the same size. @@ -3880,7 +3944,7 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, MVT DstEltVT) { Ops.push_back(DAG.getConstant(NewBits, DstEltVT)); } - MVT VT = MVT::getVectorVT(DstEltVT, Ops.size()); + EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size()); return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT, &Ops[0], Ops.size()); } @@ -3889,7 +3953,8 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, MVT DstEltVT) { // turns into multiple outputs. bool isS2V = ISD::isScalarToVector(BV); unsigned NumOutputsPerInput = SrcBitSize/DstBitSize; - MVT VT = MVT::getVectorVT(DstEltVT, NumOutputsPerInput*BV->getNumOperands()); + EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, + NumOutputsPerInput*BV->getNumOperands()); SmallVector<SDValue, 8> Ops; for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { @@ -3926,7 +3991,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold vector ops if (VT.isVector()) { @@ -3967,7 +4032,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold vector ops if (VT.isVector()) { @@ -4001,7 +4066,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold vector ops if (VT.isVector()) { @@ -4024,7 +4089,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { // fold (fmul X, 2.0) -> (fadd X, X) if (N1CFP && N1CFP->isExactlyValue(+2.0)) return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N0); - // fold (fmul X, (fneg 1.0)) -> (fneg X) + // fold (fmul X, -1.0) -> (fneg X) if (N1CFP && N1CFP->isExactlyValue(-1.0)) if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N0); @@ -4056,7 +4121,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold vector ops if (VT.isVector()) { @@ -4089,7 +4154,7 @@ SDValue DAGCombiner::visitFREM(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold (frem c1, c2) -> fmod(c1,c2) if (N0CFP && N1CFP && VT != MVT::ppcf128) @@ -4103,7 +4168,7 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); if (N0CFP && N1CFP && VT != MVT::ppcf128) // Constant fold return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, N0, N1); @@ -4151,8 +4216,8 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { SDValue N0 = N->getOperand(0); ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); - MVT VT = N->getValueType(0); - MVT OpVT = N0.getValueType(); + EVT VT = N->getValueType(0); + EVT OpVT = N0.getValueType(); // fold (sint_to_fp c1) -> c1fp if (N0C && OpVT != MVT::ppcf128) @@ -4173,8 +4238,8 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { SDValue N0 = N->getOperand(0); ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); - MVT VT = N->getValueType(0); - MVT OpVT = N0.getValueType(); + EVT VT = N->getValueType(0); + EVT OpVT = N0.getValueType(); // fold (uint_to_fp c1) -> c1fp if (N0C && OpVT != MVT::ppcf128) @@ -4195,7 +4260,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) { SDValue N0 = N->getOperand(0); ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold (fp_to_sint c1fp) -> c1 if (N0CFP) @@ -4207,7 +4272,7 @@ SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) { SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) { SDValue N0 = N->getOperand(0); ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold (fp_to_uint c1fp) -> c1 if (N0CFP && VT != MVT::ppcf128) @@ -4220,7 +4285,7 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold (fp_round c1fp) -> c1fp if (N0CFP && N0.getValueType() != MVT::ppcf128) @@ -4253,8 +4318,8 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) { SDValue N0 = N->getOperand(0); - MVT VT = N->getValueType(0); - MVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); + EVT VT = N->getValueType(0); + EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); // fold (fp_round_inreg c1fp) -> c1fp @@ -4269,7 +4334,7 @@ SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) { SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded. if (N->hasOneUse() && @@ -4326,7 +4391,7 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { N0.getOperand(0).getValueType().isInteger() && !N0.getOperand(0).getValueType().isVector()) { SDValue Int = N0.getOperand(0); - MVT IntVT = Int.getValueType(); + EVT IntVT = Int.getValueType(); if (IntVT.isInteger() && !IntVT.isVector()) { Int = DAG.getNode(ISD::XOR, N0.getDebugLoc(), IntVT, Int, DAG.getConstant(APInt::getSignBit(IntVT.getSizeInBits()), IntVT)); @@ -4342,7 +4407,7 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { SDValue DAGCombiner::visitFABS(SDNode *N) { SDValue N0 = N->getOperand(0); ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold (fabs c1) -> fabs(c1) if (N0CFP && VT != MVT::ppcf128) @@ -4361,7 +4426,7 @@ SDValue DAGCombiner::visitFABS(SDNode *N) { N0.getOperand(0).getValueType().isInteger() && !N0.getOperand(0).getValueType().isVector()) { SDValue Int = N0.getOperand(0); - MVT IntVT = Int.getValueType(); + EVT IntVT = Int.getValueType(); if (IntVT.isInteger() && !IntVT.isVector()) { Int = DAG.getNode(ISD::AND, N0.getDebugLoc(), IntVT, Int, DAG.getConstant(~APInt::getSignBit(IntVT.getSizeInBits()), IntVT)); @@ -4419,7 +4484,6 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { if (Op0.getOpcode() == ISD::AND && Op0.hasOneUse() && Op1.getOpcode() == ISD::Constant) { - SDValue AndOp0 = Op0.getOperand(0); SDValue AndOp1 = Op0.getOperand(1); if (AndOp1.getOpcode() == ISD::Constant) { @@ -4491,7 +4555,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { bool isLoad = true; SDValue Ptr; - MVT VT; + EVT VT; if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { if (LD->isIndexed()) return false; @@ -4579,9 +4643,11 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { BasePtr, Offset, AM); ++PreIndexedNodes; ++NodesCombined; - DOUT << "\nReplacing.4 "; DEBUG(N->dump(&DAG)); - DOUT << "\nWith: "; DEBUG(Result.getNode()->dump(&DAG)); - DOUT << '\n'; + DEBUG(errs() << "\nReplacing.4 "; + N->dump(&DAG); + errs() << "\nWith: "; + Result.getNode()->dump(&DAG); + errs() << '\n'); WorkListRemover DeadNodes(*this); if (isLoad) { DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0), @@ -4616,7 +4682,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { bool isLoad = true; SDValue Ptr; - MVT VT; + EVT VT; if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { if (LD->isIndexed()) return false; @@ -4652,7 +4718,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { SDValue Offset; ISD::MemIndexedMode AM = ISD::UNINDEXED; if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) { - if (Ptr == Offset) + if (Ptr == Offset && Op->getOpcode() == ISD::ADD) std::swap(BasePtr, Offset); if (Ptr != BasePtr) continue; @@ -4711,9 +4777,11 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { BasePtr, Offset, AM); ++PostIndexedNodes; ++NodesCombined; - DOUT << "\nReplacing.5 "; DEBUG(N->dump(&DAG)); - DOUT << "\nWith: "; DEBUG(Result.getNode()->dump(&DAG)); - DOUT << '\n'; + DEBUG(errs() << "\nReplacing.5 "; + N->dump(&DAG); + errs() << "\nWith: "; + Result.getNode()->dump(&DAG); + errs() << '\n'); WorkListRemover DeadNodes(*this); if (isLoad) { DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0), @@ -4815,9 +4883,11 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { // v3 = add v2, c // Now we replace use of chain2 with chain1. This makes the second load // isomorphic to the one we are deleting, and thus makes this load live. - DOUT << "\nReplacing.6 "; DEBUG(N->dump(&DAG)); - DOUT << "\nWith chain: "; DEBUG(Chain.getNode()->dump(&DAG)); - DOUT << "\n"; + DEBUG(errs() << "\nReplacing.6 "; + N->dump(&DAG); + errs() << "\nWith chain: "; + Chain.getNode()->dump(&DAG); + errs() << "\n"); WorkListRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain, &DeadNodes); @@ -4833,9 +4903,11 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?"); if (N->hasNUsesOfValue(0, 0) && N->hasNUsesOfValue(0, 1)) { SDValue Undef = DAG.getUNDEF(N->getValueType(0)); - DOUT << "\nReplacing.6 "; DEBUG(N->dump(&DAG)); - DOUT << "\nWith: "; DEBUG(Undef.getNode()->dump(&DAG)); - DOUT << " and 2 other values\n"; + DEBUG(errs() << "\nReplacing.6 "; + N->dump(&DAG); + errs() << "\nWith: "; + Undef.getNode()->dump(&DAG); + errs() << " and 2 other values\n"); WorkListRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef, &DeadNodes); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), @@ -4890,7 +4962,10 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { // Create token factor to keep old chain connected. SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other, Chain, ReplLoad.getValue(1)); - + + // Make sure the new and old chains are cleaned up. + AddToWorkList(Token.getNode()); + // Replace uses with load result and token factor. Don't add users // to work list. return CombineTo(N, ReplLoad.getValue(0), Token, false); @@ -4917,7 +4992,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { SDValue Chain = ST->getChain(); SDValue Value = ST->getValue(); SDValue Ptr = ST->getBasePtr(); - MVT VT = Value.getValueType(); + EVT VT = Value.getValueType(); if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse()) return SDValue(); @@ -4944,12 +5019,12 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { unsigned ShAmt = Imm.countTrailingZeros(); unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1; unsigned NewBW = NextPowerOf2(MSB - ShAmt); - MVT NewVT = MVT::getIntegerVT(NewBW); + EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW); while (NewBW < BitWidth && !(TLI.isOperationLegalOrCustom(Opc, NewVT) && TLI.isNarrowingProfitable(VT, NewVT))) { NewBW = NextPowerOf2(NewBW); - NewVT = MVT::getIntegerVT(NewBW); + NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW); } if (NewBW >= BitWidth) return SDValue(); @@ -4971,7 +5046,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff); if (NewAlign < - TLI.getTargetData()->getABITypeAlignment(NewVT.getTypeForMVT())) + TLI.getTargetData()->getABITypeAlignment(NewVT.getTypeForEVT(*DAG.getContext()))) return SDValue(); SDValue NewPtr = DAG.getNode(ISD::ADD, LD->getDebugLoc(), @@ -5024,9 +5099,9 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { if (Value.getOpcode() == ISD::BIT_CONVERT && !ST->isTruncatingStore() && ST->isUnindexed()) { unsigned OrigAlign = ST->getAlignment(); - MVT SVT = Value.getOperand(0).getValueType(); + EVT SVT = Value.getOperand(0).getValueType(); unsigned Align = TLI.getTargetData()-> - getABITypeAlignment(SVT.getTypeForMVT()); + getABITypeAlignment(SVT.getTypeForEVT(*DAG.getContext())); if (Align <= OrigAlign && ((!LegalOperations && !ST->isVolatile()) || TLI.isOperationLegalOrCustom(ISD::STORE, SVT))) @@ -5043,8 +5118,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // transform should not be done in this case. if (Value.getOpcode() != ISD::TargetConstantFP) { SDValue Tmp; - switch (CFP->getValueType(0).getSimpleVT()) { - default: assert(0 && "Unknown FP type"); + switch (CFP->getValueType(0).getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unknown FP type"); case MVT::f80: // We don't do this for these yet. case MVT::f128: case MVT::ppcf128: @@ -5111,8 +5186,9 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // If there is a better chain. if (Chain != BetterChain) { - // Replace the chain to avoid dependency. SDValue ReplStore; + + // Replace the chain to avoid dependency. if (ST->isTruncatingStore()) { ReplStore = DAG.getTruncStore(BetterChain, N->getDebugLoc(), Value, Ptr, ST->getSrcValue(),ST->getSrcValueOffset(), @@ -5128,6 +5204,9 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other, Chain, ReplStore); + // Make sure the new and old chains are cleaned up. + AddToWorkList(Token.getNode()); + // Don't add users to work list. return CombineTo(N, Token, false); } @@ -5211,10 +5290,10 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { // BUILD_VECTOR with undef elements and the inserted element. if (!LegalOperations && InVec.getOpcode() == ISD::UNDEF && isa<ConstantSDNode>(EltNo)) { - MVT VT = InVec.getValueType(); - MVT EVT = VT.getVectorElementType(); + EVT VT = InVec.getValueType(); + EVT EltVT = VT.getVectorElementType(); unsigned NElts = VT.getVectorNumElements(); - SmallVector<SDValue, 8> Ops(NElts, DAG.getUNDEF(EVT)); + SmallVector<SDValue, 8> Ops(NElts, DAG.getUNDEF(EltVT)); unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); if (Elt < Ops.size()) @@ -5232,7 +5311,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) { // If the operand is wider than the vector element type then it is implicitly // truncated. Make that explicit here. - MVT EltVT = InVec.getValueType().getVectorElementType(); + EVT EltVT = InVec.getValueType().getVectorElementType(); SDValue InOp = InVec.getOperand(0); if (InOp.getValueType() != EltVT) return DAG.getNode(ISD::TRUNCATE, InVec.getDebugLoc(), EltVT, InOp); @@ -5252,18 +5331,18 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); bool NewLoad = false; bool BCNumEltsChanged = false; - MVT VT = InVec.getValueType(); - MVT EVT = VT.getVectorElementType(); - MVT LVT = EVT; + EVT VT = InVec.getValueType(); + EVT ExtVT = VT.getVectorElementType(); + EVT LVT = ExtVT; if (InVec.getOpcode() == ISD::BIT_CONVERT) { - MVT BCVT = InVec.getOperand(0).getValueType(); - if (!BCVT.isVector() || EVT.bitsGT(BCVT.getVectorElementType())) + EVT BCVT = InVec.getOperand(0).getValueType(); + if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType())) return SDValue(); if (VT.getVectorNumElements() != BCVT.getVectorNumElements()) BCNumEltsChanged = true; InVec = InVec.getOperand(0); - EVT = BCVT.getVectorElementType(); + ExtVT = BCVT.getVectorElementType(); NewLoad = true; } @@ -5272,7 +5351,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { if (ISD::isNormalLoad(InVec.getNode())) { LN0 = cast<LoadSDNode>(InVec); } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR && - InVec.getOperand(0).getValueType() == EVT && + InVec.getOperand(0).getValueType() == ExtVT && ISD::isNormalLoad(InVec.getOperand(0).getNode())) { LN0 = cast<LoadSDNode>(InVec.getOperand(0)); } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) { @@ -5306,7 +5385,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // Check the resultant load doesn't need a higher alignment than the // original load. unsigned NewAlign = - TLI.getTargetData()->getABITypeAlignment(LVT.getTypeForMVT()); + TLI.getTargetData()->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext())); if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, LVT)) return SDValue(); @@ -5317,7 +5396,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { SDValue NewPtr = LN0->getBasePtr(); if (Elt) { unsigned PtrOff = LVT.getSizeInBits() * Elt / 8; - MVT PtrType = NewPtr.getValueType(); + EVT PtrType = NewPtr.getValueType(); if (TLI.isBigEndian()) PtrOff = VT.getSizeInBits() / 8 - PtrOff; NewPtr = DAG.getNode(ISD::ADD, N->getDebugLoc(), PtrType, NewPtr, @@ -5334,8 +5413,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { unsigned NumInScalars = N->getNumOperands(); - MVT VT = N->getValueType(0); - MVT EltType = VT.getVectorElementType(); + EVT VT = N->getValueType(0); // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT // operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from @@ -5432,11 +5510,10 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { return SDValue(); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); unsigned NumElts = VT.getVectorNumElements(); SDValue N0 = N->getOperand(0); - SDValue N1 = N->getOperand(1); assert(N0.getValueType().getVectorNumElements() == NumElts && "Vector shuffle must be normalized in DAG"); @@ -5494,7 +5571,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==> /// vector_shuffle V, Zero, <0, 4, 2, 4> SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); DebugLoc dl = N->getDebugLoc(); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); @@ -5517,14 +5594,14 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { } // Let's see if the target supports this vector_shuffle. - MVT RVT = RHS.getValueType(); + EVT RVT = RHS.getValueType(); if (!TLI.isVectorClearMaskLegal(Indices, RVT)) return SDValue(); // Return the new VECTOR_SHUFFLE node. - MVT EVT = RVT.getVectorElementType(); + EVT EltVT = RVT.getVectorElementType(); SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(), - DAG.getConstant(0, EVT)); + DAG.getConstant(0, EltVT)); SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), RVT, &ZeroOps[0], ZeroOps.size()); LHS = DAG.getNode(ISD::BIT_CONVERT, dl, RVT, LHS); @@ -5543,10 +5620,10 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { // things. Simplifying them may result in a loss of legality. if (LegalOperations) return SDValue(); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); assert(VT.isVector() && "SimplifyVBinOp only works on vectors!"); - MVT EltType = VT.getVectorElementType(); + EVT EltType = VT.getVectorElementType(); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); SDValue Shuffle = XformToShuffleWithZero(N); @@ -5589,7 +5666,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { } if (Ops.size() == LHS.getNumOperands()) { - MVT VT = LHS.getValueType(); + EVT VT = LHS.getValueType(); return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, &Ops[0], Ops.size()); } @@ -5728,7 +5805,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, // (x ? y : y) -> y. if (N2 == N3) return N2; - MVT VT = N2.getValueType(); + EVT VT = N2.getValueType(); ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode()); ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3.getNode()); @@ -5820,8 +5897,8 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, N2.getValueType().isInteger() && (N1C->isNullValue() || // (a < 0) ? b : 0 (N1C->getAPIntValue() == 1 && N0 == N2))) { // (a < 1) ? a : 0 - MVT XType = N0.getValueType(); - MVT AType = N2.getValueType(); + EVT XType = N0.getValueType(); + EVT AType = N2.getValueType(); if (XType.bitsGE(AType)) { // and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a // single-bit constant. @@ -5900,7 +5977,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, // FIXME: Turn all of these into setcc if setcc if setcc is legal // otherwise, go ahead with the folds. if (0 && N3C && N3C->isNullValue() && N2C && (N2C->getAPIntValue() == 1ULL)) { - MVT XType = N0.getValueType(); + EVT XType = N0.getValueType(); if (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(XType))) { SDValue Res = DAG.getSetCC(DL, TLI.getSetCCResultType(XType), N0, N1, CC); @@ -5942,7 +6019,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, if (N1C && N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE) && N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1) && N2.getOperand(0) == N1 && N0.getValueType().isInteger()) { - MVT XType = N0.getValueType(); + EVT XType = N0.getValueType(); SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType, N0, DAG.getConstant(XType.getSizeInBits()-1, getShiftAmountTy())); @@ -5957,7 +6034,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT && N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1)) { if (ConstantSDNode *SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0))) { - MVT XType = N0.getValueType(); + EVT XType = N0.getValueType(); if (SubC->isNullValue() && XType.isInteger()) { SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType, N0, @@ -5976,11 +6053,11 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, } /// SimplifySetCC - This is a stub for TargetLowering::SimplifySetCC. -SDValue DAGCombiner::SimplifySetCC(MVT VT, SDValue N0, +SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, DebugLoc DL, bool foldBooleans) { TargetLowering::DAGCombinerInfo - DagCombineInfo(DAG, Level == Unrestricted, false, this); + DagCombineInfo(DAG, !LegalTypes, !LegalOperations, false, this); return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL); } @@ -6012,11 +6089,12 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) { return S; } -/// FindBaseOffset - Return true if base is known not to alias with anything -/// but itself. Provides base object and offset as results. -static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset) { +/// FindBaseOffset - Return true if base is a frame index, which is known not +// to alias with anything but itself. Provides base object and offset as results. +static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, + GlobalValue *&GV, void *&CV) { // Assume it is a primitive operation. - Base = Ptr; Offset = 0; + Base = Ptr; Offset = 0; GV = 0; CV = 0; // If it's an adding a simple constant then integrate the offset. if (Base.getOpcode() == ISD::ADD) { @@ -6025,36 +6103,73 @@ static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset) { Offset += C->getZExtValue(); } } + + // Return the underlying GlobalValue, and update the Offset. Return false + // for GlobalAddressSDNode since the same GlobalAddress may be represented + // by multiple nodes with different offsets. + if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Base)) { + GV = G->getGlobal(); + Offset += G->getOffset(); + return false; + } + // Return the underlying Constant value, and update the Offset. Return false + // for ConstantSDNodes since the same constant pool entry may be represented + // by multiple nodes with different offsets. + if (ConstantPoolSDNode *C = dyn_cast<ConstantPoolSDNode>(Base)) { + CV = C->isMachineConstantPoolEntry() ? (void *)C->getMachineCPVal() + : (void *)C->getConstVal(); + Offset += C->getOffset(); + return false; + } // If it's any of the following then it can't alias with anything but itself. - return isa<FrameIndexSDNode>(Base) || - isa<ConstantPoolSDNode>(Base) || - isa<GlobalAddressSDNode>(Base); + return isa<FrameIndexSDNode>(Base); } /// isAlias - Return true if there is any possibility that the two addresses /// overlap. bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, const Value *SrcValue1, int SrcValueOffset1, + unsigned SrcValueAlign1, SDValue Ptr2, int64_t Size2, - const Value *SrcValue2, int SrcValueOffset2) const { + const Value *SrcValue2, int SrcValueOffset2, + unsigned SrcValueAlign2) const { // If they are the same then they must be aliases. if (Ptr1 == Ptr2) return true; // Gather base node and offset information. SDValue Base1, Base2; int64_t Offset1, Offset2; - bool KnownBase1 = FindBaseOffset(Ptr1, Base1, Offset1); - bool KnownBase2 = FindBaseOffset(Ptr2, Base2, Offset2); + GlobalValue *GV1, *GV2; + void *CV1, *CV2; + bool isFrameIndex1 = FindBaseOffset(Ptr1, Base1, Offset1, GV1, CV1); + bool isFrameIndex2 = FindBaseOffset(Ptr2, Base2, Offset2, GV2, CV2); - // If they have a same base address then... - if (Base1 == Base2) - // Check to see if the addresses overlap. + // If they have a same base address then check to see if they overlap. + if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2))) return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1); - // If we know both bases then they can't alias. - if (KnownBase1 && KnownBase2) return false; + // If we know what the bases are, and they aren't identical, then we know they + // cannot alias. + if ((isFrameIndex1 || CV1 || GV1) && (isFrameIndex2 || CV2 || GV2)) + return false; + // If we know required SrcValue1 and SrcValue2 have relatively large alignment + // compared to the size and offset of the access, we may be able to prove they + // do not alias. This check is conservative for now to catch cases created by + // splitting vector types. + if ((SrcValueAlign1 == SrcValueAlign2) && + (SrcValueOffset1 != SrcValueOffset2) && + (Size1 == Size2) && (SrcValueAlign1 > Size1)) { + int64_t OffAlign1 = SrcValueOffset1 % SrcValueAlign1; + int64_t OffAlign2 = SrcValueOffset2 % SrcValueAlign1; + + // There is no overlap between these relatively aligned accesses of similar + // size, return no alias. + if ((OffAlign1 + Size1) <= OffAlign2 || (OffAlign2 + Size2) <= OffAlign1) + return false; + } + if (CombinerGlobalAA) { // Use alias analysis information. int64_t MinOffset = std::min(SrcValueOffset1, SrcValueOffset2); @@ -6074,20 +6189,24 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, /// node. Returns true if the operand was a load. bool DAGCombiner::FindAliasInfo(SDNode *N, SDValue &Ptr, int64_t &Size, - const Value *&SrcValue, int &SrcValueOffset) const { + const Value *&SrcValue, + int &SrcValueOffset, + unsigned &SrcValueAlign) const { if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { Ptr = LD->getBasePtr(); Size = LD->getMemoryVT().getSizeInBits() >> 3; SrcValue = LD->getSrcValue(); SrcValueOffset = LD->getSrcValueOffset(); + SrcValueAlign = LD->getOriginalAlignment(); return true; } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { Ptr = ST->getBasePtr(); Size = ST->getMemoryVT().getSizeInBits() >> 3; SrcValue = ST->getSrcValue(); SrcValueOffset = ST->getSrcValueOffset(); + SrcValueAlign = ST->getOriginalAlignment(); } else { - assert(0 && "FindAliasInfo expected a memory operand"); + llvm_unreachable("FindAliasInfo expected a memory operand"); } return false; @@ -6098,28 +6217,45 @@ bool DAGCombiner::FindAliasInfo(SDNode *N, void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, SmallVector<SDValue, 8> &Aliases) { SmallVector<SDValue, 8> Chains; // List of chains to visit. - std::set<SDNode *> Visited; // Visited node set. + SmallPtrSet<SDNode *, 16> Visited; // Visited node set. // Get alias information for node. SDValue Ptr; - int64_t Size = 0; - const Value *SrcValue = 0; - int SrcValueOffset = 0; - bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset); + int64_t Size; + const Value *SrcValue; + int SrcValueOffset; + unsigned SrcValueAlign; + bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset, + SrcValueAlign); // Starting off. Chains.push_back(OriginalChain); - + unsigned Depth = 0; + // Look at each chain and determine if it is an alias. If so, add it to the // aliases list. If not, then continue up the chain looking for the next // candidate. while (!Chains.empty()) { SDValue Chain = Chains.back(); Chains.pop_back(); + + // For TokenFactor nodes, look at each operand and only continue up the + // chain until we find two aliases. If we've seen two aliases, assume we'll + // find more and revert to original chain since the xform is unlikely to be + // profitable. + // + // FIXME: The depth check could be made to return the last non-aliasing + // chain we found before we hit a tokenfactor rather than the original + // chain. + if (Depth > 6 || Aliases.size() == 2) { + Aliases.clear(); + Aliases.push_back(OriginalChain); + break; + } - // Don't bother if we've been before. - if (Visited.find(Chain.getNode()) != Visited.end()) continue; - Visited.insert(Chain.getNode()); + // Don't bother if we've been before. + if (!Visited.insert(Chain.getNode())) + continue; switch (Chain.getOpcode()) { case ISD::EntryToken: @@ -6130,35 +6266,40 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, case ISD::STORE: { // Get alias information for Chain. SDValue OpPtr; - int64_t OpSize = 0; - const Value *OpSrcValue = 0; - int OpSrcValueOffset = 0; + int64_t OpSize; + const Value *OpSrcValue; + int OpSrcValueOffset; + unsigned OpSrcValueAlign; bool IsOpLoad = FindAliasInfo(Chain.getNode(), OpPtr, OpSize, - OpSrcValue, OpSrcValueOffset); + OpSrcValue, OpSrcValueOffset, + OpSrcValueAlign); // If chain is alias then stop here. if (!(IsLoad && IsOpLoad) && - isAlias(Ptr, Size, SrcValue, SrcValueOffset, - OpPtr, OpSize, OpSrcValue, OpSrcValueOffset)) { + isAlias(Ptr, Size, SrcValue, SrcValueOffset, SrcValueAlign, + OpPtr, OpSize, OpSrcValue, OpSrcValueOffset, + OpSrcValueAlign)) { Aliases.push_back(Chain); } else { // Look further up the chain. Chains.push_back(Chain.getOperand(0)); - // Clean up old chain. - AddToWorkList(Chain.getNode()); + ++Depth; } break; } case ISD::TokenFactor: - // We have to check each of the operands of the token factor, so we queue - // then up. Adding the operands to the queue (stack) in reverse order - // maintains the original order and increases the likelihood that getNode - // will find a matching token factor (CSE.) + // We have to check each of the operands of the token factor for "small" + // token factors, so we queue them up. Adding the operands to the queue + // (stack) in reverse order maintains the original order and increases the + // likelihood that getNode will find a matching token factor (CSE.) + if (Chain.getNumOperands() > 16) { + Aliases.push_back(Chain); + break; + } for (unsigned n = Chain.getNumOperands(); n;) Chains.push_back(Chain.getOperand(--n)); - // Eliminate the token factor if we can. - AddToWorkList(Chain.getNode()); + ++Depth; break; default: @@ -6184,15 +6325,10 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { // If a single operand then chain to it. We don't need to revisit it. return Aliases[0]; } - + // Construct a custom tailored token factor. - SDValue NewChain = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other, - &Aliases[0], Aliases.size()); - - // Make sure the old chain gets cleaned up. - if (NewChain != OldChain) AddToWorkList(OldChain.getNode()); - - return NewChain; + return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other, + &Aliases[0], Aliases.size()); } // SelectionDAG::Combine - This is the entry point for the file. diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index cd2d5ac..8e955af 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -57,7 +57,7 @@ using namespace llvm; unsigned FastISel::getRegForValue(Value *V) { - MVT RealVT = TLI.getValueType(V->getType(), /*AllowUnknown=*/true); + EVT RealVT = TLI.getValueType(V->getType(), /*AllowUnknown=*/true); // Don't handle non-simple values in FastISel. if (!RealVT.isSimple()) return 0; @@ -65,11 +65,11 @@ unsigned FastISel::getRegForValue(Value *V) { // Ignore illegal types. We must do this before looking up the value // in ValueMap because Arguments are given virtual registers regardless // of whether FastISel can handle them. - MVT::SimpleValueType VT = RealVT.getSimpleVT(); + MVT VT = RealVT.getSimpleVT(); if (!TLI.isTypeLegal(VT)) { // Promote MVT::i1 to a legal type though, because it's common and easy. if (VT == MVT::i1) - VT = TLI.getTypeToTransformTo(VT).getSimpleVT(); + VT = TLI.getTypeToTransformTo(V->getContext(), VT).getSimpleVT(); else return 0; } @@ -92,13 +92,14 @@ unsigned FastISel::getRegForValue(Value *V) { } else if (isa<ConstantPointerNull>(V)) { // Translate this as an integer zero so that it can be // local-CSE'd with actual integer zeros. - Reg = getRegForValue(Constant::getNullValue(TD.getIntPtrType())); + Reg = + getRegForValue(Constant::getNullValue(TD.getIntPtrType(V->getContext()))); } else if (ConstantFP *CF = dyn_cast<ConstantFP>(V)) { Reg = FastEmit_f(VT, VT, ISD::ConstantFP, CF); if (!Reg) { const APFloat &Flt = CF->getValueAPF(); - MVT IntVT = TLI.getPointerTy(); + EVT IntVT = TLI.getPointerTy(); uint64_t x[2]; uint32_t IntBitWidth = IntVT.getSizeInBits(); @@ -108,7 +109,8 @@ unsigned FastISel::getRegForValue(Value *V) { if (isExact) { APInt IntVal(IntBitWidth, 2, x); - unsigned IntegerReg = getRegForValue(ConstantInt::get(IntVal)); + unsigned IntegerReg = + getRegForValue(ConstantInt::get(V->getContext(), IntVal)); if (IntegerReg != 0) Reg = FastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP, IntegerReg); } @@ -174,13 +176,11 @@ unsigned FastISel::getRegForGEPIndex(Value *Idx) { // If the index is smaller or larger than intptr_t, truncate or extend it. MVT PtrVT = TLI.getPointerTy(); - MVT IdxVT = MVT::getMVT(Idx->getType(), /*HandleUnknown=*/false); + EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false); if (IdxVT.bitsLT(PtrVT)) - IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT.getSimpleVT(), - ISD::SIGN_EXTEND, IdxN); + IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::SIGN_EXTEND, IdxN); else if (IdxVT.bitsGT(PtrVT)) - IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT.getSimpleVT(), - ISD::TRUNCATE, IdxN); + IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::TRUNCATE, IdxN); return IdxN; } @@ -188,7 +188,7 @@ unsigned FastISel::getRegForGEPIndex(Value *Idx) { /// which has an opcode which directly corresponds to the given ISD opcode. /// bool FastISel::SelectBinaryOp(User *I, ISD::NodeType ISDOpcode) { - MVT VT = MVT::getMVT(I->getType(), /*HandleUnknown=*/true); + EVT VT = EVT::getEVT(I->getType(), /*HandleUnknown=*/true); if (VT == MVT::Other || !VT.isSimple()) // Unhandled type. Halt "fast" selection and bail. return false; @@ -203,7 +203,7 @@ bool FastISel::SelectBinaryOp(User *I, ISD::NodeType ISDOpcode) { if (VT == MVT::i1 && (ISDOpcode == ISD::AND || ISDOpcode == ISD::OR || ISDOpcode == ISD::XOR)) - VT = TLI.getTypeToTransformTo(VT); + VT = TLI.getTypeToTransformTo(I->getContext(), VT); else return false; } @@ -260,7 +260,7 @@ bool FastISel::SelectGetElementPtr(User *I) { return false; const Type *Ty = I->getOperand(0)->getType(); - MVT::SimpleValueType VT = TLI.getPointerTy().getSimpleVT(); + MVT VT = TLI.getPointerTy(); for (GetElementPtrInst::op_iterator OI = I->op_begin()+1, E = I->op_end(); OI != E; ++OI) { Value *Idx = *OI; @@ -335,7 +335,7 @@ bool FastISel::SelectCall(User *I) { if (isValidDebugInfoIntrinsic(*RSI, CodeGenOpt::None) && DW && DW->ShouldEmitDwarfDebug()) { unsigned ID = - DW->RecordRegionStart(cast<GlobalVariable>(RSI->getContext())); + DW->RecordRegionStart(RSI->getContext()); const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL); BuildMI(MBB, DL, II).addImm(ID); } @@ -346,7 +346,7 @@ bool FastISel::SelectCall(User *I) { if (isValidDebugInfoIntrinsic(*REI, CodeGenOpt::None) && DW && DW->ShouldEmitDwarfDebug()) { unsigned ID = 0; - DISubprogram Subprogram(cast<GlobalVariable>(REI->getContext())); + DISubprogram Subprogram(REI->getContext()); if (isInlinedFnEnd(*REI, MF.getFunction())) { // This is end of an inlined function. const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL); @@ -359,7 +359,7 @@ bool FastISel::SelectCall(User *I) { BuildMI(MBB, DL, II).addImm(ID); } else { const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL); - ID = DW->RecordRegionEnd(cast<GlobalVariable>(REI->getContext())); + ID = DW->RecordRegionEnd(REI->getContext()); BuildMI(MBB, DL, II).addImm(ID); } } @@ -384,11 +384,10 @@ bool FastISel::SelectCall(User *I) { setCurDebugLoc(ExtractDebugLocation(*FSI, MF.getDebugLocInfo())); DebugLocTuple PrevLocTpl = MF.getDebugLocTuple(PrevLoc); - DISubprogram SP(cast<GlobalVariable>(FSI->getSubprogram())); - unsigned LabelID = DW->RecordInlinedFnStart(SP, - DICompileUnit(PrevLocTpl.CompileUnit), - PrevLocTpl.Line, - PrevLocTpl.Col); + DISubprogram SP(FSI->getSubprogram()); + unsigned LabelID = + DW->RecordInlinedFnStart(SP,DICompileUnit(PrevLocTpl.Scope), + PrevLocTpl.Line, PrevLocTpl.Col); const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL); BuildMI(MBB, DL, II).addImm(LabelID); return true; @@ -398,7 +397,7 @@ bool FastISel::SelectCall(User *I) { MF.setDefaultDebugLoc(ExtractDebugLocation(*FSI, MF.getDebugLocInfo())); // llvm.dbg.func_start also defines beginning of function scope. - DW->RecordRegionStart(cast<GlobalVariable>(FSI->getSubprogram())); + DW->RecordRegionStart(FSI->getSubprogram()); return true; } case Intrinsic::dbg_declare: { @@ -407,7 +406,6 @@ bool FastISel::SelectCall(User *I) { || !DW->ShouldEmitDwarfDebug()) return true; - Value *Variable = DI->getVariable(); Value *Address = DI->getAddress(); if (BitCastInst *BCI = dyn_cast<BitCastInst>(Address)) Address = BCI->getOperand(0); @@ -418,20 +416,15 @@ bool FastISel::SelectCall(User *I) { StaticAllocaMap.find(AI); if (SI == StaticAllocaMap.end()) break; // VLAs. int FI = SI->second; - - // Determine the debug globalvariable. - GlobalValue *GV = cast<GlobalVariable>(Variable); - - // Build the DECLARE instruction. - const TargetInstrDesc &II = TII.get(TargetInstrInfo::DECLARE); - MachineInstr *DeclareMI - = BuildMI(MBB, DL, II).addFrameIndex(FI).addGlobalAddress(GV); - DIVariable DV(cast<GlobalVariable>(GV)); - DW->RecordVariableScope(DV, DeclareMI); + if (MMI) + MMI->setVariableDbgInfo(DI->getVariable(), FI); +#ifndef ATTACH_DEBUG_INFO_TO_AN_INSN + DW->RecordVariable(DI->getVariable(), FI); +#endif return true; } case Intrinsic::eh_exception: { - MVT VT = TLI.getValueType(I->getType()); + EVT VT = TLI.getValueType(I->getType()); switch (TLI.getOperationAction(ISD::EXCEPTIONADDR, VT)) { default: break; case TargetLowering::Expand: { @@ -449,15 +442,11 @@ bool FastISel::SelectCall(User *I) { } break; } - case Intrinsic::eh_selector_i32: - case Intrinsic::eh_selector_i64: { - MVT VT = TLI.getValueType(I->getType()); + case Intrinsic::eh_selector: { + EVT VT = TLI.getValueType(I->getType()); switch (TLI.getOperationAction(ISD::EHSELECTION, VT)) { default: break; case TargetLowering::Expand: { - MVT VT = (IID == Intrinsic::eh_selector_i32 ? - MVT::i32 : MVT::i64); - if (MMI) { if (MBB->isLandingPad()) AddCatchInfo(*cast<CallInst>(I), MMI, MBB); @@ -471,12 +460,25 @@ bool FastISel::SelectCall(User *I) { } unsigned Reg = TLI.getExceptionSelectorRegister(); - const TargetRegisterClass *RC = TLI.getRegClassFor(VT); + EVT SrcVT = TLI.getPointerTy(); + const TargetRegisterClass *RC = TLI.getRegClassFor(SrcVT); unsigned ResultReg = createResultReg(RC); - bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, - Reg, RC, RC); + bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, Reg, + RC, RC); assert(InsertedCopy && "Can't copy address registers!"); InsertedCopy = InsertedCopy; + + // Cast the register to the type of the selector. + if (SrcVT.bitsGT(MVT::i32)) + ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, ISD::TRUNCATE, + ResultReg); + else if (SrcVT.bitsLT(MVT::i32)) + ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, + ISD::SIGN_EXTEND, ResultReg); + if (ResultReg == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + UpdateValueMap(I, ResultReg); } else { unsigned ResultReg = @@ -493,8 +495,8 @@ bool FastISel::SelectCall(User *I) { } bool FastISel::SelectCast(User *I, ISD::NodeType Opcode) { - MVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); - MVT DstVT = TLI.getValueType(I->getType()); + EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); + EVT DstVT = TLI.getValueType(I->getType()); if (SrcVT == MVT::Other || !SrcVT.isSimple() || DstVT == MVT::Other || !DstVT.isSimple()) @@ -524,14 +526,14 @@ bool FastISel::SelectCast(User *I, ISD::NodeType Opcode) { // If the operand is i1, arrange for the high bits in the register to be zero. if (SrcVT == MVT::i1) { - SrcVT = TLI.getTypeToTransformTo(SrcVT); + SrcVT = TLI.getTypeToTransformTo(I->getContext(), SrcVT); InputReg = FastEmitZExtFromI1(SrcVT.getSimpleVT(), InputReg); if (!InputReg) return false; } // If the result is i1, truncate to the target's type for i1 first. if (DstVT == MVT::i1) - DstVT = TLI.getTypeToTransformTo(DstVT); + DstVT = TLI.getTypeToTransformTo(I->getContext(), DstVT); unsigned ResultReg = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), @@ -555,8 +557,8 @@ bool FastISel::SelectBitCast(User *I) { } // Bitcasts of other values become reg-reg copies or BIT_CONVERT operators. - MVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); - MVT DstVT = TLI.getValueType(I->getType()); + EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); + EVT DstVT = TLI.getValueType(I->getType()); if (SrcVT == MVT::Other || !SrcVT.isSimple() || DstVT == MVT::Other || !DstVT.isSimple() || @@ -616,6 +618,49 @@ FastISel::FastEmitBranch(MachineBasicBlock *MSucc) { MBB->addSuccessor(MSucc); } +/// SelectFNeg - Emit an FNeg operation. +/// +bool +FastISel::SelectFNeg(User *I) { + unsigned OpReg = getRegForValue(BinaryOperator::getFNegArgument(I)); + if (OpReg == 0) return false; + + // If the target has ISD::FNEG, use it. + EVT VT = TLI.getValueType(I->getType()); + unsigned ResultReg = FastEmit_r(VT.getSimpleVT(), VT.getSimpleVT(), + ISD::FNEG, OpReg); + if (ResultReg != 0) { + UpdateValueMap(I, ResultReg); + return true; + } + + // Bitcast the value to integer, twiddle the sign bit with xor, + // and then bitcast it back to floating-point. + if (VT.getSizeInBits() > 64) return false; + EVT IntVT = EVT::getIntegerVT(I->getContext(), VT.getSizeInBits()); + if (!TLI.isTypeLegal(IntVT)) + return false; + + unsigned IntReg = FastEmit_r(VT.getSimpleVT(), IntVT.getSimpleVT(), + ISD::BIT_CONVERT, OpReg); + if (IntReg == 0) + return false; + + unsigned IntResultReg = FastEmit_ri_(IntVT.getSimpleVT(), ISD::XOR, IntReg, + UINT64_C(1) << (VT.getSizeInBits()-1), + IntVT.getSimpleVT()); + if (IntResultReg == 0) + return false; + + ResultReg = FastEmit_r(IntVT.getSimpleVT(), VT.getSimpleVT(), + ISD::BIT_CONVERT, IntResultReg); + if (ResultReg == 0) + return false; + + UpdateValueMap(I, ResultReg); + return true; +} + bool FastISel::SelectOperator(User *I, unsigned Opcode) { switch (Opcode) { @@ -626,6 +671,9 @@ FastISel::SelectOperator(User *I, unsigned Opcode) { case Instruction::Sub: return SelectBinaryOp(I, ISD::SUB); case Instruction::FSub: + // FNeg is currently represented in LLVM IR as a special case of FSub. + if (BinaryOperator::isFNeg(I)) + return SelectFNeg(I); return SelectBinaryOp(I, ISD::FSUB); case Instruction::Mul: return SelectBinaryOp(I, ISD::MUL); @@ -709,8 +757,8 @@ FastISel::SelectOperator(User *I, unsigned Opcode) { case Instruction::IntToPtr: // Deliberate fall-through. case Instruction::PtrToInt: { - MVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); - MVT DstVT = TLI.getValueType(I->getType()); + EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); + EVT DstVT = TLI.getValueType(I->getType()); if (DstVT.bitsGT(SrcVT)) return SelectCast(I, ISD::ZERO_EXTEND); if (DstVT.bitsLT(SrcVT)) @@ -758,45 +806,44 @@ FastISel::FastISel(MachineFunction &mf, FastISel::~FastISel() {} -unsigned FastISel::FastEmit_(MVT::SimpleValueType, MVT::SimpleValueType, +unsigned FastISel::FastEmit_(MVT, MVT, ISD::NodeType) { return 0; } -unsigned FastISel::FastEmit_r(MVT::SimpleValueType, MVT::SimpleValueType, +unsigned FastISel::FastEmit_r(MVT, MVT, ISD::NodeType, unsigned /*Op0*/) { return 0; } -unsigned FastISel::FastEmit_rr(MVT::SimpleValueType, MVT::SimpleValueType, +unsigned FastISel::FastEmit_rr(MVT, MVT, ISD::NodeType, unsigned /*Op0*/, unsigned /*Op0*/) { return 0; } -unsigned FastISel::FastEmit_i(MVT::SimpleValueType, MVT::SimpleValueType, - ISD::NodeType, uint64_t /*Imm*/) { +unsigned FastISel::FastEmit_i(MVT, MVT, ISD::NodeType, uint64_t /*Imm*/) { return 0; } -unsigned FastISel::FastEmit_f(MVT::SimpleValueType, MVT::SimpleValueType, +unsigned FastISel::FastEmit_f(MVT, MVT, ISD::NodeType, ConstantFP * /*FPImm*/) { return 0; } -unsigned FastISel::FastEmit_ri(MVT::SimpleValueType, MVT::SimpleValueType, +unsigned FastISel::FastEmit_ri(MVT, MVT, ISD::NodeType, unsigned /*Op0*/, uint64_t /*Imm*/) { return 0; } -unsigned FastISel::FastEmit_rf(MVT::SimpleValueType, MVT::SimpleValueType, +unsigned FastISel::FastEmit_rf(MVT, MVT, ISD::NodeType, unsigned /*Op0*/, ConstantFP * /*FPImm*/) { return 0; } -unsigned FastISel::FastEmit_rri(MVT::SimpleValueType, MVT::SimpleValueType, +unsigned FastISel::FastEmit_rri(MVT, MVT, ISD::NodeType, unsigned /*Op0*/, unsigned /*Op1*/, uint64_t /*Imm*/) { @@ -807,9 +854,9 @@ unsigned FastISel::FastEmit_rri(MVT::SimpleValueType, MVT::SimpleValueType, /// to emit an instruction with an immediate operand using FastEmit_ri. /// If that fails, it materializes the immediate into a register and try /// FastEmit_rr instead. -unsigned FastISel::FastEmit_ri_(MVT::SimpleValueType VT, ISD::NodeType Opcode, +unsigned FastISel::FastEmit_ri_(MVT VT, ISD::NodeType Opcode, unsigned Op0, uint64_t Imm, - MVT::SimpleValueType ImmType) { + MVT ImmType) { // First check if immediate type is legal. If not, we can't use the ri form. unsigned ResultReg = FastEmit_ri(VT, VT, Opcode, Op0, Imm); if (ResultReg != 0) @@ -824,9 +871,9 @@ unsigned FastISel::FastEmit_ri_(MVT::SimpleValueType VT, ISD::NodeType Opcode, /// to emit an instruction with a floating-point immediate operand using /// FastEmit_rf. If that fails, it materializes the immediate into a register /// and try FastEmit_rr instead. -unsigned FastISel::FastEmit_rf_(MVT::SimpleValueType VT, ISD::NodeType Opcode, +unsigned FastISel::FastEmit_rf_(MVT VT, ISD::NodeType Opcode, unsigned Op0, ConstantFP *FPImm, - MVT::SimpleValueType ImmType) { + MVT ImmType) { // First check if immediate type is legal. If not, we can't use the rf form. unsigned ResultReg = FastEmit_rf(VT, VT, Opcode, Op0, FPImm); if (ResultReg != 0) @@ -842,7 +889,7 @@ unsigned FastISel::FastEmit_rf_(MVT::SimpleValueType VT, ISD::NodeType Opcode, // be replaced by code that creates a load from a constant-pool entry, // which will require some target-specific work. const APFloat &Flt = FPImm->getValueAPF(); - MVT IntVT = TLI.getPointerTy(); + EVT IntVT = TLI.getPointerTy(); uint64_t x[2]; uint32_t IntBitWidth = IntVT.getSizeInBits(); @@ -987,7 +1034,7 @@ unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode, return ResultReg; } -unsigned FastISel::FastEmitInst_extractsubreg(MVT::SimpleValueType RetVT, +unsigned FastISel::FastEmitInst_extractsubreg(MVT RetVT, unsigned Op0, uint32_t Idx) { const TargetRegisterClass* RC = MRI.getRegClass(Op0); @@ -1008,6 +1055,6 @@ unsigned FastISel::FastEmitInst_extractsubreg(MVT::SimpleValueType RetVT, /// FastEmitZExtFromI1 - Emit MachineInstrs to compute the value of Op /// with all but the least significant bit set to zero. -unsigned FastISel::FastEmitZExtFromI1(MVT::SimpleValueType VT, unsigned Op) { +unsigned FastISel::FastEmitZExtFromI1(MVT VT, unsigned Op) { return FastEmit_ri(VT, VT, ISD::AND, Op, 1); } diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp new file mode 100644 index 0000000..d3ffb2a --- /dev/null +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -0,0 +1,693 @@ +//==--- InstrEmitter.cpp - Emit MachineInstrs for the SelectionDAG class ---==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the Emit routines for the SelectionDAG class, which creates +// MachineInstrs based on the decisions of the SelectionDAG instruction +// selection. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "instr-emitter" +#include "InstrEmitter.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +using namespace llvm; + +/// CountResults - The results of target nodes have register or immediate +/// operands first, then an optional chain, and optional flag operands (which do +/// not go into the resulting MachineInstr). +unsigned InstrEmitter::CountResults(SDNode *Node) { + unsigned N = Node->getNumValues(); + while (N && Node->getValueType(N - 1) == MVT::Flag) + --N; + if (N && Node->getValueType(N - 1) == MVT::Other) + --N; // Skip over chain result. + return N; +} + +/// CountOperands - The inputs to target nodes have any actual inputs first, +/// followed by an optional chain operand, then an optional flag operand. +/// Compute the number of actual operands that will go into the resulting +/// MachineInstr. +unsigned InstrEmitter::CountOperands(SDNode *Node) { + unsigned N = Node->getNumOperands(); + while (N && Node->getOperand(N - 1).getValueType() == MVT::Flag) + --N; + if (N && Node->getOperand(N - 1).getValueType() == MVT::Other) + --N; // Ignore chain if it exists. + return N; +} + +/// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an +/// implicit physical register output. +void InstrEmitter:: +EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, + unsigned SrcReg, DenseMap<SDValue, unsigned> &VRBaseMap) { + unsigned VRBase = 0; + if (TargetRegisterInfo::isVirtualRegister(SrcReg)) { + // Just use the input register directly! + SDValue Op(Node, ResNo); + if (IsClone) + VRBaseMap.erase(Op); + bool isNew = VRBaseMap.insert(std::make_pair(Op, SrcReg)).second; + isNew = isNew; // Silence compiler warning. + assert(isNew && "Node emitted out of order - early"); + return; + } + + // If the node is only used by a CopyToReg and the dest reg is a vreg, use + // the CopyToReg'd destination register instead of creating a new vreg. + bool MatchReg = true; + const TargetRegisterClass *UseRC = NULL; + if (!IsClone && !IsCloned) + for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end(); + UI != E; ++UI) { + SDNode *User = *UI; + bool Match = true; + if (User->getOpcode() == ISD::CopyToReg && + User->getOperand(2).getNode() == Node && + User->getOperand(2).getResNo() == ResNo) { + unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); + if (TargetRegisterInfo::isVirtualRegister(DestReg)) { + VRBase = DestReg; + Match = false; + } else if (DestReg != SrcReg) + Match = false; + } else { + for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { + SDValue Op = User->getOperand(i); + if (Op.getNode() != Node || Op.getResNo() != ResNo) + continue; + EVT VT = Node->getValueType(Op.getResNo()); + if (VT == MVT::Other || VT == MVT::Flag) + continue; + Match = false; + if (User->isMachineOpcode()) { + const TargetInstrDesc &II = TII->get(User->getMachineOpcode()); + const TargetRegisterClass *RC = 0; + if (i+II.getNumDefs() < II.getNumOperands()) + RC = II.OpInfo[i+II.getNumDefs()].getRegClass(TRI); + if (!UseRC) + UseRC = RC; + else if (RC) { + const TargetRegisterClass *ComRC = getCommonSubClass(UseRC, RC); + // If multiple uses expect disjoint register classes, we emit + // copies in AddRegisterOperand. + if (ComRC) + UseRC = ComRC; + } + } + } + } + MatchReg &= Match; + if (VRBase) + break; + } + + EVT VT = Node->getValueType(ResNo); + const TargetRegisterClass *SrcRC = 0, *DstRC = 0; + SrcRC = TRI->getPhysicalRegisterRegClass(SrcReg, VT); + + // Figure out the register class to create for the destreg. + if (VRBase) { + DstRC = MRI->getRegClass(VRBase); + } else if (UseRC) { + assert(UseRC->hasType(VT) && "Incompatible phys register def and uses!"); + DstRC = UseRC; + } else { + DstRC = TLI->getRegClassFor(VT); + } + + // If all uses are reading from the src physical register and copying the + // register is either impossible or very expensive, then don't create a copy. + if (MatchReg && SrcRC->getCopyCost() < 0) { + VRBase = SrcReg; + } else { + // Create the reg, emit the copy. + VRBase = MRI->createVirtualRegister(DstRC); + bool Emitted = TII->copyRegToReg(*MBB, InsertPos, VRBase, SrcReg, + DstRC, SrcRC); + + assert(Emitted && "Unable to issue a copy instruction!\n"); + (void) Emitted; + } + + SDValue Op(Node, ResNo); + if (IsClone) + VRBaseMap.erase(Op); + bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second; + isNew = isNew; // Silence compiler warning. + assert(isNew && "Node emitted out of order - early"); +} + +/// getDstOfCopyToRegUse - If the only use of the specified result number of +/// node is a CopyToReg, return its destination register. Return 0 otherwise. +unsigned InstrEmitter::getDstOfOnlyCopyToRegUse(SDNode *Node, + unsigned ResNo) const { + if (!Node->hasOneUse()) + return 0; + + SDNode *User = *Node->use_begin(); + if (User->getOpcode() == ISD::CopyToReg && + User->getOperand(2).getNode() == Node && + User->getOperand(2).getResNo() == ResNo) { + unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) + return Reg; + } + return 0; +} + +void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstr *MI, + const TargetInstrDesc &II, + bool IsClone, bool IsCloned, + DenseMap<SDValue, unsigned> &VRBaseMap) { + assert(Node->getMachineOpcode() != TargetInstrInfo::IMPLICIT_DEF && + "IMPLICIT_DEF should have been handled as a special case elsewhere!"); + + for (unsigned i = 0; i < II.getNumDefs(); ++i) { + // If the specific node value is only used by a CopyToReg and the dest reg + // is a vreg in the same register class, use the CopyToReg'd destination + // register instead of creating a new vreg. + unsigned VRBase = 0; + const TargetRegisterClass *RC = II.OpInfo[i].getRegClass(TRI); + if (II.OpInfo[i].isOptionalDef()) { + // Optional def must be a physical register. + unsigned NumResults = CountResults(Node); + VRBase = cast<RegisterSDNode>(Node->getOperand(i-NumResults))->getReg(); + assert(TargetRegisterInfo::isPhysicalRegister(VRBase)); + MI->addOperand(MachineOperand::CreateReg(VRBase, true)); + } + + if (!VRBase && !IsClone && !IsCloned) + for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end(); + UI != E; ++UI) { + SDNode *User = *UI; + if (User->getOpcode() == ISD::CopyToReg && + User->getOperand(2).getNode() == Node && + User->getOperand(2).getResNo() == i) { + unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + const TargetRegisterClass *RegRC = MRI->getRegClass(Reg); + if (RegRC == RC) { + VRBase = Reg; + MI->addOperand(MachineOperand::CreateReg(Reg, true)); + break; + } + } + } + } + + // Create the result registers for this node and add the result regs to + // the machine instruction. + if (VRBase == 0) { + assert(RC && "Isn't a register operand!"); + VRBase = MRI->createVirtualRegister(RC); + MI->addOperand(MachineOperand::CreateReg(VRBase, true)); + } + + SDValue Op(Node, i); + if (IsClone) + VRBaseMap.erase(Op); + bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second; + isNew = isNew; // Silence compiler warning. + assert(isNew && "Node emitted out of order - early"); + } +} + +/// getVR - Return the virtual register corresponding to the specified result +/// of the specified node. +unsigned InstrEmitter::getVR(SDValue Op, + DenseMap<SDValue, unsigned> &VRBaseMap) { + if (Op.isMachineOpcode() && + Op.getMachineOpcode() == TargetInstrInfo::IMPLICIT_DEF) { + // Add an IMPLICIT_DEF instruction before every use. + unsigned VReg = getDstOfOnlyCopyToRegUse(Op.getNode(), Op.getResNo()); + // IMPLICIT_DEF can produce any type of result so its TargetInstrDesc + // does not include operand register class info. + if (!VReg) { + const TargetRegisterClass *RC = TLI->getRegClassFor(Op.getValueType()); + VReg = MRI->createVirtualRegister(RC); + } + BuildMI(MBB, Op.getDebugLoc(), + TII->get(TargetInstrInfo::IMPLICIT_DEF), VReg); + return VReg; + } + + DenseMap<SDValue, unsigned>::iterator I = VRBaseMap.find(Op); + assert(I != VRBaseMap.end() && "Node emitted out of order - late"); + return I->second; +} + + +/// AddRegisterOperand - Add the specified register as an operand to the +/// specified machine instr. Insert register copies if the register is +/// not in the required register class. +void +InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op, + unsigned IIOpNum, + const TargetInstrDesc *II, + DenseMap<SDValue, unsigned> &VRBaseMap) { + assert(Op.getValueType() != MVT::Other && + Op.getValueType() != MVT::Flag && + "Chain and flag operands should occur at end of operand list!"); + // Get/emit the operand. + unsigned VReg = getVR(Op, VRBaseMap); + assert(TargetRegisterInfo::isVirtualRegister(VReg) && "Not a vreg?"); + + const TargetInstrDesc &TID = MI->getDesc(); + bool isOptDef = IIOpNum < TID.getNumOperands() && + TID.OpInfo[IIOpNum].isOptionalDef(); + + // If the instruction requires a register in a different class, create + // a new virtual register and copy the value into it. + if (II) { + const TargetRegisterClass *SrcRC = MRI->getRegClass(VReg); + const TargetRegisterClass *DstRC = 0; + if (IIOpNum < II->getNumOperands()) + DstRC = II->OpInfo[IIOpNum].getRegClass(TRI); + assert((DstRC || (TID.isVariadic() && IIOpNum >= TID.getNumOperands())) && + "Don't have operand info for this instruction!"); + if (DstRC && SrcRC != DstRC && !SrcRC->hasSuperClass(DstRC)) { + unsigned NewVReg = MRI->createVirtualRegister(DstRC); + bool Emitted = TII->copyRegToReg(*MBB, InsertPos, NewVReg, VReg, + DstRC, SrcRC); + assert(Emitted && "Unable to issue a copy instruction!\n"); + (void) Emitted; + VReg = NewVReg; + } + } + + MI->addOperand(MachineOperand::CreateReg(VReg, isOptDef)); +} + +/// AddOperand - Add the specified operand to the specified machine instr. II +/// specifies the instruction information for the node, and IIOpNum is the +/// operand number (in the II) that we are adding. IIOpNum and II are used for +/// assertions only. +void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op, + unsigned IIOpNum, + const TargetInstrDesc *II, + DenseMap<SDValue, unsigned> &VRBaseMap) { + if (Op.isMachineOpcode()) { + AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap); + } else if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { + MI->addOperand(MachineOperand::CreateImm(C->getSExtValue())); + } else if (ConstantFPSDNode *F = dyn_cast<ConstantFPSDNode>(Op)) { + const ConstantFP *CFP = F->getConstantFPValue(); + MI->addOperand(MachineOperand::CreateFPImm(CFP)); + } else if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) { + MI->addOperand(MachineOperand::CreateReg(R->getReg(), false)); + } else if (GlobalAddressSDNode *TGA = dyn_cast<GlobalAddressSDNode>(Op)) { + MI->addOperand(MachineOperand::CreateGA(TGA->getGlobal(), TGA->getOffset(), + TGA->getTargetFlags())); + } else if (BasicBlockSDNode *BBNode = dyn_cast<BasicBlockSDNode>(Op)) { + MI->addOperand(MachineOperand::CreateMBB(BBNode->getBasicBlock())); + } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op)) { + MI->addOperand(MachineOperand::CreateFI(FI->getIndex())); + } else if (JumpTableSDNode *JT = dyn_cast<JumpTableSDNode>(Op)) { + MI->addOperand(MachineOperand::CreateJTI(JT->getIndex(), + JT->getTargetFlags())); + } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op)) { + int Offset = CP->getOffset(); + unsigned Align = CP->getAlignment(); + const Type *Type = CP->getType(); + // MachineConstantPool wants an explicit alignment. + if (Align == 0) { + Align = TM->getTargetData()->getPrefTypeAlignment(Type); + if (Align == 0) { + // Alignment of vector types. FIXME! + Align = TM->getTargetData()->getTypeAllocSize(Type); + } + } + + unsigned Idx; + MachineConstantPool *MCP = MF->getConstantPool(); + if (CP->isMachineConstantPoolEntry()) + Idx = MCP->getConstantPoolIndex(CP->getMachineCPVal(), Align); + else + Idx = MCP->getConstantPoolIndex(CP->getConstVal(), Align); + MI->addOperand(MachineOperand::CreateCPI(Idx, Offset, + CP->getTargetFlags())); + } else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) { + MI->addOperand(MachineOperand::CreateES(ES->getSymbol(), + ES->getTargetFlags())); + } else { + assert(Op.getValueType() != MVT::Other && + Op.getValueType() != MVT::Flag && + "Chain and flag operands should occur at end of operand list!"); + AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap); + } +} + +/// getSuperRegisterRegClass - Returns the register class of a superreg A whose +/// "SubIdx"'th sub-register class is the specified register class and whose +/// type matches the specified type. +static const TargetRegisterClass* +getSuperRegisterRegClass(const TargetRegisterClass *TRC, + unsigned SubIdx, EVT VT) { + // Pick the register class of the superegister for this type + for (TargetRegisterInfo::regclass_iterator I = TRC->superregclasses_begin(), + E = TRC->superregclasses_end(); I != E; ++I) + if ((*I)->hasType(VT) && (*I)->getSubRegisterRegClass(SubIdx) == TRC) + return *I; + assert(false && "Couldn't find the register class"); + return 0; +} + +/// EmitSubregNode - Generate machine code for subreg nodes. +/// +void InstrEmitter::EmitSubregNode(SDNode *Node, + DenseMap<SDValue, unsigned> &VRBaseMap){ + unsigned VRBase = 0; + unsigned Opc = Node->getMachineOpcode(); + + // If the node is only used by a CopyToReg and the dest reg is a vreg, use + // the CopyToReg'd destination register instead of creating a new vreg. + for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end(); + UI != E; ++UI) { + SDNode *User = *UI; + if (User->getOpcode() == ISD::CopyToReg && + User->getOperand(2).getNode() == Node) { + unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); + if (TargetRegisterInfo::isVirtualRegister(DestReg)) { + VRBase = DestReg; + break; + } + } + } + + if (Opc == TargetInstrInfo::EXTRACT_SUBREG) { + unsigned SubIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); + + // Create the extract_subreg machine instruction. + MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), + TII->get(TargetInstrInfo::EXTRACT_SUBREG)); + + // Figure out the register class to create for the destreg. + unsigned VReg = getVR(Node->getOperand(0), VRBaseMap); + const TargetRegisterClass *TRC = MRI->getRegClass(VReg); + const TargetRegisterClass *SRC = TRC->getSubRegisterRegClass(SubIdx); + assert(SRC && "Invalid subregister index in EXTRACT_SUBREG"); + + // Figure out the register class to create for the destreg. + // Note that if we're going to directly use an existing register, + // it must be precisely the required class, and not a subclass + // thereof. + if (VRBase == 0 || SRC != MRI->getRegClass(VRBase)) { + // Create the reg + assert(SRC && "Couldn't find source register class"); + VRBase = MRI->createVirtualRegister(SRC); + } + + // Add def, source, and subreg index + MI->addOperand(MachineOperand::CreateReg(VRBase, true)); + AddOperand(MI, Node->getOperand(0), 0, 0, VRBaseMap); + MI->addOperand(MachineOperand::CreateImm(SubIdx)); + MBB->insert(InsertPos, MI); + } else if (Opc == TargetInstrInfo::INSERT_SUBREG || + Opc == TargetInstrInfo::SUBREG_TO_REG) { + SDValue N0 = Node->getOperand(0); + SDValue N1 = Node->getOperand(1); + SDValue N2 = Node->getOperand(2); + unsigned SubReg = getVR(N1, VRBaseMap); + unsigned SubIdx = cast<ConstantSDNode>(N2)->getZExtValue(); + const TargetRegisterClass *TRC = MRI->getRegClass(SubReg); + const TargetRegisterClass *SRC = + getSuperRegisterRegClass(TRC, SubIdx, + Node->getValueType(0)); + + // Figure out the register class to create for the destreg. + // Note that if we're going to directly use an existing register, + // it must be precisely the required class, and not a subclass + // thereof. + if (VRBase == 0 || SRC != MRI->getRegClass(VRBase)) { + // Create the reg + assert(SRC && "Couldn't find source register class"); + VRBase = MRI->createVirtualRegister(SRC); + } + + // Create the insert_subreg or subreg_to_reg machine instruction. + MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), TII->get(Opc)); + MI->addOperand(MachineOperand::CreateReg(VRBase, true)); + + // If creating a subreg_to_reg, then the first input operand + // is an implicit value immediate, otherwise it's a register + if (Opc == TargetInstrInfo::SUBREG_TO_REG) { + const ConstantSDNode *SD = cast<ConstantSDNode>(N0); + MI->addOperand(MachineOperand::CreateImm(SD->getZExtValue())); + } else + AddOperand(MI, N0, 0, 0, VRBaseMap); + // Add the subregster being inserted + AddOperand(MI, N1, 0, 0, VRBaseMap); + MI->addOperand(MachineOperand::CreateImm(SubIdx)); + MBB->insert(InsertPos, MI); + } else + llvm_unreachable("Node is not insert_subreg, extract_subreg, or subreg_to_reg"); + + SDValue Op(Node, 0); + bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second; + isNew = isNew; // Silence compiler warning. + assert(isNew && "Node emitted out of order - early"); +} + +/// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS nodes. +/// COPY_TO_REGCLASS is just a normal copy, except that the destination +/// register is constrained to be in a particular register class. +/// +void +InstrEmitter::EmitCopyToRegClassNode(SDNode *Node, + DenseMap<SDValue, unsigned> &VRBaseMap) { + unsigned VReg = getVR(Node->getOperand(0), VRBaseMap); + const TargetRegisterClass *SrcRC = MRI->getRegClass(VReg); + + unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); + const TargetRegisterClass *DstRC = TRI->getRegClass(DstRCIdx); + + // Create the new VReg in the destination class and emit a copy. + unsigned NewVReg = MRI->createVirtualRegister(DstRC); + bool Emitted = TII->copyRegToReg(*MBB, InsertPos, NewVReg, VReg, + DstRC, SrcRC); + assert(Emitted && + "Unable to issue a copy instruction for a COPY_TO_REGCLASS node!\n"); + (void) Emitted; + + SDValue Op(Node, 0); + bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second; + isNew = isNew; // Silence compiler warning. + assert(isNew && "Node emitted out of order - early"); +} + +/// EmitNode - Generate machine code for an node and needed dependencies. +/// +void InstrEmitter::EmitNode(SDNode *Node, bool IsClone, bool IsCloned, + DenseMap<SDValue, unsigned> &VRBaseMap, + DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) { + // If machine instruction + if (Node->isMachineOpcode()) { + unsigned Opc = Node->getMachineOpcode(); + + // Handle subreg insert/extract specially + if (Opc == TargetInstrInfo::EXTRACT_SUBREG || + Opc == TargetInstrInfo::INSERT_SUBREG || + Opc == TargetInstrInfo::SUBREG_TO_REG) { + EmitSubregNode(Node, VRBaseMap); + return; + } + + // Handle COPY_TO_REGCLASS specially. + if (Opc == TargetInstrInfo::COPY_TO_REGCLASS) { + EmitCopyToRegClassNode(Node, VRBaseMap); + return; + } + + if (Opc == TargetInstrInfo::IMPLICIT_DEF) + // We want a unique VR for each IMPLICIT_DEF use. + return; + + const TargetInstrDesc &II = TII->get(Opc); + unsigned NumResults = CountResults(Node); + unsigned NodeOperands = CountOperands(Node); + bool HasPhysRegOuts = (NumResults > II.getNumDefs()) && + II.getImplicitDefs() != 0; +#ifndef NDEBUG + unsigned NumMIOperands = NodeOperands + NumResults; + assert((II.getNumOperands() == NumMIOperands || + HasPhysRegOuts || II.isVariadic()) && + "#operands for dag node doesn't match .td file!"); +#endif + + // Create the new machine instruction. + MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), II); + + // Add result register values for things that are defined by this + // instruction. + if (NumResults) + CreateVirtualRegisters(Node, MI, II, IsClone, IsCloned, VRBaseMap); + + // Emit all of the actual operands of this instruction, adding them to the + // instruction as appropriate. + bool HasOptPRefs = II.getNumDefs() > NumResults; + assert((!HasOptPRefs || !HasPhysRegOuts) && + "Unable to cope with optional defs and phys regs defs!"); + unsigned NumSkip = HasOptPRefs ? II.getNumDefs() - NumResults : 0; + for (unsigned i = NumSkip; i != NodeOperands; ++i) + AddOperand(MI, Node->getOperand(i), i-NumSkip+II.getNumDefs(), &II, + VRBaseMap); + + // Transfer all of the memory reference descriptions of this instruction. + MI->setMemRefs(cast<MachineSDNode>(Node)->memoperands_begin(), + cast<MachineSDNode>(Node)->memoperands_end()); + + if (II.usesCustomDAGSchedInsertionHook()) { + // Insert this instruction into the basic block using a target + // specific inserter which may returns a new basic block. + MBB = TLI->EmitInstrWithCustomInserter(MI, MBB, EM); + InsertPos = MBB->end(); + } else { + MBB->insert(InsertPos, MI); + } + + // Additional results must be an physical register def. + if (HasPhysRegOuts) { + for (unsigned i = II.getNumDefs(); i < NumResults; ++i) { + unsigned Reg = II.getImplicitDefs()[i - II.getNumDefs()]; + if (Node->hasAnyUseOfValue(i)) + EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap); + } + } + return; + } + + switch (Node->getOpcode()) { + default: +#ifndef NDEBUG + Node->dump(); +#endif + llvm_unreachable("This target-independent node should have been selected!"); + break; + case ISD::EntryToken: + llvm_unreachable("EntryToken should have been excluded from the schedule!"); + break; + case ISD::MERGE_VALUES: + case ISD::TokenFactor: // fall thru + break; + case ISD::CopyToReg: { + unsigned SrcReg; + SDValue SrcVal = Node->getOperand(2); + if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(SrcVal)) + SrcReg = R->getReg(); + else + SrcReg = getVR(SrcVal, VRBaseMap); + + unsigned DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); + if (SrcReg == DestReg) // Coalesced away the copy? Ignore. + break; + + const TargetRegisterClass *SrcTRC = 0, *DstTRC = 0; + // Get the register classes of the src/dst. + if (TargetRegisterInfo::isVirtualRegister(SrcReg)) + SrcTRC = MRI->getRegClass(SrcReg); + else + SrcTRC = TRI->getPhysicalRegisterRegClass(SrcReg,SrcVal.getValueType()); + + if (TargetRegisterInfo::isVirtualRegister(DestReg)) + DstTRC = MRI->getRegClass(DestReg); + else + DstTRC = TRI->getPhysicalRegisterRegClass(DestReg, + Node->getOperand(1).getValueType()); + + bool Emitted = TII->copyRegToReg(*MBB, InsertPos, DestReg, SrcReg, + DstTRC, SrcTRC); + assert(Emitted && "Unable to issue a copy instruction!\n"); + (void) Emitted; + break; + } + case ISD::CopyFromReg: { + unsigned SrcReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); + EmitCopyFromReg(Node, 0, IsClone, IsCloned, SrcReg, VRBaseMap); + break; + } + case ISD::INLINEASM: { + unsigned NumOps = Node->getNumOperands(); + if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag) + --NumOps; // Ignore the flag operand. + + // Create the inline asm machine instruction. + MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), + TII->get(TargetInstrInfo::INLINEASM)); + + // Add the asm string as an external symbol operand. + const char *AsmStr = + cast<ExternalSymbolSDNode>(Node->getOperand(1))->getSymbol(); + MI->addOperand(MachineOperand::CreateES(AsmStr)); + + // Add all of the operand registers to the instruction. + for (unsigned i = 2; i != NumOps;) { + unsigned Flags = + cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); + unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); + + MI->addOperand(MachineOperand::CreateImm(Flags)); + ++i; // Skip the ID value. + + switch (Flags & 7) { + default: llvm_unreachable("Bad flags!"); + case 2: // Def of register. + for (; NumVals; --NumVals, ++i) { + unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); + MI->addOperand(MachineOperand::CreateReg(Reg, true)); + } + break; + case 6: // Def of earlyclobber register. + for (; NumVals; --NumVals, ++i) { + unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); + MI->addOperand(MachineOperand::CreateReg(Reg, true, false, false, + false, false, true)); + } + break; + case 1: // Use of register. + case 3: // Immediate. + case 4: // Addressing mode. + // The addressing mode has been selected, just add all of the + // operands to the machine instruction. + for (; NumVals; --NumVals, ++i) + AddOperand(MI, Node->getOperand(i), 0, 0, VRBaseMap); + break; + } + } + MBB->insert(InsertPos, MI); + break; + } + } +} + +/// InstrEmitter - Construct an InstrEmitter and set it to start inserting +/// at the given position in the given block. +InstrEmitter::InstrEmitter(MachineBasicBlock *mbb, + MachineBasicBlock::iterator insertpos) + : MF(mbb->getParent()), + MRI(&MF->getRegInfo()), + TM(&MF->getTarget()), + TII(TM->getInstrInfo()), + TRI(TM->getRegisterInfo()), + TLI(TM->getTargetLowering()), + MBB(mbb), InsertPos(insertpos) { +} diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.h b/lib/CodeGen/SelectionDAG/InstrEmitter.h new file mode 100644 index 0000000..bb4634d --- /dev/null +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.h @@ -0,0 +1,119 @@ +//===---- InstrEmitter.h - Emit MachineInstrs for the SelectionDAG class ---==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This declares the Emit routines for the SelectionDAG class, which creates +// MachineInstrs based on the decisions of the SelectionDAG instruction +// selection. +// +//===----------------------------------------------------------------------===// + +#ifndef INSTREMITTER_H +#define INSTREMITTER_H + +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/ADT/DenseMap.h" + +namespace llvm { + +class TargetInstrDesc; + +class InstrEmitter { + MachineFunction *MF; + MachineRegisterInfo *MRI; + const TargetMachine *TM; + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + const TargetLowering *TLI; + + MachineBasicBlock *MBB; + MachineBasicBlock::iterator InsertPos; + + /// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an + /// implicit physical register output. + void EmitCopyFromReg(SDNode *Node, unsigned ResNo, + bool IsClone, bool IsCloned, + unsigned SrcReg, + DenseMap<SDValue, unsigned> &VRBaseMap); + + /// getDstOfCopyToRegUse - If the only use of the specified result number of + /// node is a CopyToReg, return its destination register. Return 0 otherwise. + unsigned getDstOfOnlyCopyToRegUse(SDNode *Node, + unsigned ResNo) const; + + void CreateVirtualRegisters(SDNode *Node, MachineInstr *MI, + const TargetInstrDesc &II, + bool IsClone, bool IsCloned, + DenseMap<SDValue, unsigned> &VRBaseMap); + + /// getVR - Return the virtual register corresponding to the specified result + /// of the specified node. + unsigned getVR(SDValue Op, + DenseMap<SDValue, unsigned> &VRBaseMap); + + /// AddRegisterOperand - Add the specified register as an operand to the + /// specified machine instr. Insert register copies if the register is + /// not in the required register class. + void AddRegisterOperand(MachineInstr *MI, SDValue Op, + unsigned IIOpNum, + const TargetInstrDesc *II, + DenseMap<SDValue, unsigned> &VRBaseMap); + + /// AddOperand - Add the specified operand to the specified machine instr. II + /// specifies the instruction information for the node, and IIOpNum is the + /// operand number (in the II) that we are adding. IIOpNum and II are used for + /// assertions only. + void AddOperand(MachineInstr *MI, SDValue Op, + unsigned IIOpNum, + const TargetInstrDesc *II, + DenseMap<SDValue, unsigned> &VRBaseMap); + + /// EmitSubregNode - Generate machine code for subreg nodes. + /// + void EmitSubregNode(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap); + + /// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS nodes. + /// COPY_TO_REGCLASS is just a normal copy, except that the destination + /// register is constrained to be in a particular register class. + /// + void EmitCopyToRegClassNode(SDNode *Node, + DenseMap<SDValue, unsigned> &VRBaseMap); + +public: + /// CountResults - The results of target nodes have register or immediate + /// operands first, then an optional chain, and optional flag operands + /// (which do not go into the machine instrs.) + static unsigned CountResults(SDNode *Node); + + /// CountOperands - The inputs to target nodes have any actual inputs first, + /// followed by an optional chain operand, then flag operands. Compute + /// the number of actual operands that will go into the resulting + /// MachineInstr. + static unsigned CountOperands(SDNode *Node); + + /// EmitNode - Generate machine code for an node and needed dependencies. + /// + void EmitNode(SDNode *Node, bool IsClone, bool IsCloned, + DenseMap<SDValue, unsigned> &VRBaseMap, + DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM); + + /// getBlock - Return the current basic block. + MachineBasicBlock *getBlock() { return MBB; } + + /// getInsertPos - Return the current insertion position. + MachineBasicBlock::iterator getInsertPos() { return InsertPos; } + + /// InstrEmitter - Construct an InstrEmitter and set it to start inserting + /// at the given position in the given block. + InstrEmitter(MachineBasicBlock *mbb, MachineBasicBlock::iterator insertpos); +}; + +} + +#endif diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 1413d95..fc01b07 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -30,9 +30,12 @@ #include "llvm/DerivedTypes.h" #include "llvm/Function.h" #include "llvm/GlobalVariable.h" +#include "llvm/LLVMContext.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SmallPtrSet.h" @@ -98,13 +101,14 @@ public: /// getTypeAction - Return how we should legalize values of this type, either /// it is already legal or we need to expand it into multiple registers of /// smaller integer type, or we need to promote it to a larger type. - LegalizeAction getTypeAction(MVT VT) const { - return (LegalizeAction)ValueTypeActions.getTypeAction(VT); + LegalizeAction getTypeAction(EVT VT) const { + return + (LegalizeAction)ValueTypeActions.getTypeAction(*DAG.getContext(), VT); } /// isTypeLegal - Return true if this type is legal on this target. /// - bool isTypeLegal(MVT VT) const { + bool isTypeLegal(EVT VT) const { return getTypeAction(VT) == Legal; } @@ -131,14 +135,14 @@ private: /// performs the same shuffe in terms of order or result bytes, but on a type /// whose vector element type is narrower than the original shuffle type. /// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3> - SDValue ShuffleWithNarrowerEltType(MVT NVT, MVT VT, DebugLoc dl, + SDValue ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl, SDValue N1, SDValue N2, SmallVectorImpl<int> &Mask) const; bool LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest, SmallPtrSet<SDNode*, 32> &NodesLeadingTo); - void LegalizeSetCCCondCode(MVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, + void LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, DebugLoc dl); SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned); @@ -149,18 +153,18 @@ private: RTLIB::Libcall Call_I32, RTLIB::Libcall Call_I64, RTLIB::Libcall Call_I128); - SDValue EmitStackConvert(SDValue SrcOp, MVT SlotVT, MVT DestVT, DebugLoc dl); + SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, DebugLoc dl); SDValue ExpandBUILD_VECTOR(SDNode *Node); SDValue ExpandSCALAR_TO_VECTOR(SDNode *Node); SDValue ExpandDBG_STOPPOINT(SDNode *Node); void ExpandDYNAMIC_STACKALLOC(SDNode *Node, SmallVectorImpl<SDValue> &Results); SDValue ExpandFCOPYSIGN(SDNode *Node); - SDValue ExpandLegalINT_TO_FP(bool isSigned, SDValue LegalOp, MVT DestVT, + SDValue ExpandLegalINT_TO_FP(bool isSigned, SDValue LegalOp, EVT DestVT, DebugLoc dl); - SDValue PromoteLegalINT_TO_FP(SDValue LegalOp, MVT DestVT, bool isSigned, + SDValue PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, bool isSigned, DebugLoc dl); - SDValue PromoteLegalFP_TO_INT(SDValue LegalOp, MVT DestVT, bool isSigned, + SDValue PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT, bool isSigned, DebugLoc dl); SDValue ExpandBSWAP(SDValue Op, DebugLoc dl); @@ -179,10 +183,10 @@ private: /// whose vector element type is narrower than the original shuffle type. /// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3> SDValue -SelectionDAGLegalize::ShuffleWithNarrowerEltType(MVT NVT, MVT VT, DebugLoc dl, +SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl, SDValue N1, SDValue N2, SmallVectorImpl<int> &Mask) const { - MVT EltVT = NVT.getVectorElementType(); + EVT EltVT = NVT.getVectorElementType(); unsigned NumMaskElts = VT.getVectorNumElements(); unsigned NumDestElts = NVT.getVectorNumElements(); unsigned NumEltsGrowth = NumDestElts / NumMaskElts; @@ -342,7 +346,7 @@ static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP, // double. This shrinks FP constants and canonicalizes them for targets where // an FP extending load is the same cost as a normal load (such as on the x87 // fp stack or PPC FP unit). - MVT VT = CFP->getValueType(0); + EVT VT = CFP->getValueType(0); ConstantFP *LLVMC = const_cast<ConstantFP*>(CFP->getConstantFPValue()); if (!UseCP) { assert((VT == MVT::f64 || VT == MVT::f32) && "Invalid type expansion"); @@ -350,16 +354,16 @@ static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP, (VT == MVT::f64) ? MVT::i64 : MVT::i32); } - MVT OrigVT = VT; - MVT SVT = VT; + EVT OrigVT = VT; + EVT SVT = VT; while (SVT != MVT::f32) { - SVT = (MVT::SimpleValueType)(SVT.getSimpleVT() - 1); + SVT = (MVT::SimpleValueType)(SVT.getSimpleVT().SimpleTy - 1); if (CFP->isValueValidForType(SVT, CFP->getValueAPF()) && // Only do this if the target has a native EXTLOAD instruction from // smaller type. TLI.isLoadExtLegal(ISD::EXTLOAD, SVT) && TLI.ShouldShrinkFPConstant(OrigVT)) { - const Type *SType = SVT.getTypeForMVT(); + const Type *SType = SVT.getTypeForEVT(*DAG.getContext()); LLVMC = cast<ConstantFP>(ConstantExpr::getFPTrunc(LLVMC, SType)); VT = SVT; Extend = true; @@ -384,13 +388,13 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, SDValue Chain = ST->getChain(); SDValue Ptr = ST->getBasePtr(); SDValue Val = ST->getValue(); - MVT VT = Val.getValueType(); + EVT VT = Val.getValueType(); int Alignment = ST->getAlignment(); int SVOffset = ST->getSrcValueOffset(); DebugLoc dl = ST->getDebugLoc(); if (ST->getMemoryVT().isFloatingPoint() || ST->getMemoryVT().isVector()) { - MVT intVT = MVT::getIntegerVT(VT.getSizeInBits()); + EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); if (TLI.isTypeLegal(intVT)) { // Expand to a bitconvert of the value to the integer type of the // same size, then a (misaligned) int store. @@ -401,9 +405,9 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, } else { // Do a (aligned) store to a stack slot, then copy from the stack slot // to the final destination using (unaligned) integer loads and stores. - MVT StoredVT = ST->getMemoryVT(); - MVT RegVT = - TLI.getRegisterType(MVT::getIntegerVT(StoredVT.getSizeInBits())); + EVT StoredVT = ST->getMemoryVT(); + EVT RegVT = + TLI.getRegisterType(*DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), StoredVT.getSizeInBits())); unsigned StoredBytes = StoredVT.getSizeInBits() / 8; unsigned RegBytes = RegVT.getSizeInBits() / 8; unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes; @@ -437,7 +441,7 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, // The last store may be partial. Do a truncating store. On big-endian // machines this requires an extending load from the stack slot to ensure // that the bits are in the right place. - MVT MemVT = MVT::getIntegerVT(8 * (StoredBytes - Offset)); + EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset)); // Load from the stack slot. SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr, @@ -456,8 +460,8 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, !ST->getMemoryVT().isVector() && "Unaligned store of unknown type."); // Get the half-size VT - MVT NewStoredVT = - (MVT::SimpleValueType)(ST->getMemoryVT().getSimpleVT() - 1); + EVT NewStoredVT = + (MVT::SimpleValueType)(ST->getMemoryVT().getSimpleVT().SimpleTy - 1); int NumBits = NewStoredVT.getSizeInBits(); int IncrementSize = NumBits / 8; @@ -488,11 +492,11 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, int SVOffset = LD->getSrcValueOffset(); SDValue Chain = LD->getChain(); SDValue Ptr = LD->getBasePtr(); - MVT VT = LD->getValueType(0); - MVT LoadedVT = LD->getMemoryVT(); + EVT VT = LD->getValueType(0); + EVT LoadedVT = LD->getMemoryVT(); DebugLoc dl = LD->getDebugLoc(); if (VT.isFloatingPoint() || VT.isVector()) { - MVT intVT = MVT::getIntegerVT(LoadedVT.getSizeInBits()); + EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits()); if (TLI.isTypeLegal(intVT)) { // Expand to a (misaligned) integer load of the same size, // then bitconvert to floating point or vector. @@ -508,7 +512,7 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, } else { // Copy the value to a (aligned) stack slot using (unaligned) integer // loads and stores, then do a (aligned) load from the stack slot. - MVT RegVT = TLI.getRegisterType(intVT); + EVT RegVT = TLI.getRegisterType(*DAG.getContext(), intVT); unsigned LoadedBytes = LoadedVT.getSizeInBits() / 8; unsigned RegBytes = RegVT.getSizeInBits() / 8; unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes; @@ -538,7 +542,7 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, } // The last copy may be partial. Do an extending load. - MVT MemVT = MVT::getIntegerVT(8 * (LoadedBytes - Offset)); + EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), 8 * (LoadedBytes - Offset)); SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr, LD->getSrcValue(), SVOffset + Offset, MemVT, LD->isVolatile(), @@ -568,8 +572,8 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, // Compute the new VT that is half the size of the old one. This is an // integer MVT. unsigned NumBits = LoadedVT.getSizeInBits(); - MVT NewLoadedVT; - NewLoadedVT = MVT::getIntegerVT(NumBits/2); + EVT NewLoadedVT; + NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2); NumBits >>= 1; unsigned Alignment = LD->getAlignment(); @@ -629,10 +633,10 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx, // with a "move to register" or "extload into register" instruction, then // permute it into place, if the idx is a constant and if the idx is // supported by the target. - MVT VT = Tmp1.getValueType(); - MVT EltVT = VT.getVectorElementType(); - MVT IdxVT = Tmp3.getValueType(); - MVT PtrVT = TLI.getPointerTy(); + EVT VT = Tmp1.getValueType(); + EVT EltVT = VT.getVectorElementType(); + EVT IdxVT = Tmp3.getValueType(); + EVT PtrVT = TLI.getPointerTy(); SDValue StackPtr = DAG.CreateStackTemporary(VT); int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); @@ -663,7 +667,7 @@ ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, SDValue Idx, DebugLoc dl) { // SCALAR_TO_VECTOR requires that the type of the value being inserted // match the element type of the vector being created, except for // integers in which case the inserted value can be over width. - MVT EltVT = Vec.getValueType().getVectorElementType(); + EVT EltVT = Vec.getValueType().getVectorElementType(); if (Val.getValueType() == EltVT || (EltVT.isInteger() && Val.getValueType().bitsGE(EltVT))) { SDValue ScVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, @@ -785,7 +789,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { break; case ISD::FP_ROUND_INREG: case ISD::SIGN_EXTEND_INREG: { - MVT InnerType = cast<VTSDNode>(Node->getOperand(1))->getVT(); + EVT InnerType = cast<VTSDNode>(Node->getOperand(1))->getVT(); Action = TLI.getOperationAction(Node->getOpcode(), InnerType); break; } @@ -795,7 +799,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { unsigned CCOperand = Node->getOpcode() == ISD::SELECT_CC ? 4 : Node->getOpcode() == ISD::SETCC ? 2 : 1; unsigned CompareOperand = Node->getOpcode() == ISD::BR_CC ? 2 : 0; - MVT OpVT = Node->getOperand(CompareOperand).getValueType(); + EVT OpVT = Node->getOperand(CompareOperand).getValueType(); ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(CCOperand))->get(); Action = TLI.getCondCodeAction(CCCode, OpVT); @@ -821,11 +825,6 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // special case should be done as part of making LegalizeDAG non-recursive. SimpleFinishLegalizing = false; break; - case ISD::CALL: - // FIXME: Legalization for calls requires custom-lowering the call before - // legalizing the operands! (I haven't looked into precisely why.) - SimpleFinishLegalizing = false; - break; case ISD::EXTRACT_ELEMENT: case ISD::FLT_ROUNDS_: case ISD::SADDO: @@ -847,7 +846,6 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { case ISD::TRAMPOLINE: case ISD::FRAMEADDR: case ISD::RETURNADDR: - case ISD::FORMAL_ARGUMENTS: // These operations lie about being legal: when they claim to be legal, // they should actually be custom-lowered. Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); @@ -885,7 +883,6 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { case ISD::BR_JT: case ISD::BR_CC: case ISD::BRCOND: - case ISD::RET: // Branches tweak the chain to include LastCALLSEQ_END Ops[0] = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ops[0], LastCALLSEQ_END); @@ -902,6 +899,14 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { if (!Ops[1].getValueType().isVector()) Ops[1] = LegalizeOp(DAG.getShiftAmountOperand(Ops[1])); break; + case ISD::SRL_PARTS: + case ISD::SRA_PARTS: + case ISD::SHL_PARTS: + // Legalizing shifts/rotates requires adjusting the shift amount + // to the appropriate width. + if (!Ops[2].getValueType().isVector()) + Ops[2] = LegalizeOp(DAG.getShiftAmountOperand(Ops[2])); + break; } Result = DAG.UpdateNodeOperands(Result.getValue(0), Ops.data(), @@ -946,44 +951,15 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { switch (Node->getOpcode()) { default: #ifndef NDEBUG - cerr << "NODE: "; Node->dump(&DAG); cerr << "\n"; + errs() << "NODE: "; + Node->dump(&DAG); + errs() << "\n"; #endif - assert(0 && "Do not know how to legalize this operator!"); - abort(); - case ISD::CALL: - // The only option for this is to custom lower it. - Tmp3 = TLI.LowerOperation(Result.getValue(0), DAG); - assert(Tmp3.getNode() && "Target didn't custom lower this node!"); - // A call within a calling sequence must be legalized to something - // other than the normal CALLSEQ_END. Violating this gets Legalize - // into an infinite loop. - assert ((!IsLegalizingCall || - Node->getOpcode() != ISD::CALL || - Tmp3.getNode()->getOpcode() != ISD::CALLSEQ_END) && - "Nested CALLSEQ_START..CALLSEQ_END not supported."); - - // The number of incoming and outgoing values should match; unless the final - // outgoing value is a flag. - assert((Tmp3.getNode()->getNumValues() == Result.getNode()->getNumValues() || - (Tmp3.getNode()->getNumValues() == Result.getNode()->getNumValues() + 1 && - Tmp3.getNode()->getValueType(Tmp3.getNode()->getNumValues() - 1) == - MVT::Flag)) && - "Lowering call/formal_arguments produced unexpected # results!"); - - // Since CALL/FORMAL_ARGUMENTS nodes produce multiple values, make sure to - // remember that we legalized all of them, so it doesn't get relegalized. - for (unsigned i = 0, e = Tmp3.getNode()->getNumValues(); i != e; ++i) { - if (Tmp3.getNode()->getValueType(i) == MVT::Flag) - continue; - Tmp1 = LegalizeOp(Tmp3.getValue(i)); - if (Op.getResNo() == i) - Tmp2 = Tmp1; - AddLegalizedOperand(SDValue(Node, i), Tmp1); - } - return Tmp2; + llvm_unreachable("Do not know how to legalize this operator!"); + case ISD::BUILD_VECTOR: switch (TLI.getOperationAction(ISD::BUILD_VECTOR, Node->getValueType(0))) { - default: assert(0 && "This action is not supported yet!"); + default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Custom: Tmp3 = TLI.LowerOperation(Result, DAG); if (Tmp3.getNode()) { @@ -1094,22 +1070,22 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { ISD::LoadExtType ExtType = LD->getExtensionType(); if (ExtType == ISD::NON_EXTLOAD) { - MVT VT = Node->getValueType(0); + EVT VT = Node->getValueType(0); Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, LD->getOffset()); Tmp3 = Result.getValue(0); Tmp4 = Result.getValue(1); switch (TLI.getOperationAction(Node->getOpcode(), VT)) { - default: assert(0 && "This action is not supported yet!"); + default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: // If this is an unaligned load and the target doesn't support it, // expand it. - if (!TLI.allowsUnalignedMemoryAccesses()) { - unsigned ABIAlignment = TLI.getTargetData()-> - getABITypeAlignment(LD->getMemoryVT().getTypeForMVT()); + if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) { + const Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); + unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty); if (LD->getAlignment() < ABIAlignment){ - Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()), DAG, - TLI); + Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()), + DAG, TLI); Tmp3 = Result.getOperand(0); Tmp4 = Result.getOperand(1); Tmp3 = LegalizeOp(Tmp3); @@ -1128,7 +1104,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // Only promote a load of vector type to another. assert(VT.isVector() && "Cannot promote this load!"); // Change base type to a different vector type. - MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT); + EVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT); Tmp1 = DAG.getLoad(NVT, dl, Tmp1, Tmp2, LD->getSrcValue(), LD->getSrcValueOffset(), @@ -1144,7 +1120,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { AddLegalizedOperand(SDValue(Node, 1), Tmp4); return Op.getResNo() ? Tmp4 : Tmp3; } else { - MVT SrcVT = LD->getMemoryVT(); + EVT SrcVT = LD->getMemoryVT(); unsigned SrcWidth = SrcVT.getSizeInBits(); int SVOffset = LD->getSrcValueOffset(); unsigned Alignment = LD->getAlignment(); @@ -1163,7 +1139,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // Promote to a byte-sized load if not loading an integral number of // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24. unsigned NewWidth = SrcVT.getStoreSizeInBits(); - MVT NVT = MVT::getIntegerVT(NewWidth); + EVT NVT = EVT::getIntegerVT(*DAG.getContext(), NewWidth); SDValue Ch; // The extra bits are guaranteed to be zero, since we stored them that @@ -1201,8 +1177,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { assert(ExtraWidth < RoundWidth); assert(!(RoundWidth % 8) && !(ExtraWidth % 8) && "Load size not an integral number of bytes!"); - MVT RoundVT = MVT::getIntegerVT(RoundWidth); - MVT ExtraVT = MVT::getIntegerVT(ExtraWidth); + EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth); + EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth); SDValue Lo, Hi, Ch; unsigned IncrementSize; @@ -1269,7 +1245,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Tmp2 = LegalizeOp(Ch); } else { switch (TLI.getLoadExtAction(ExtType, SrcVT)) { - default: assert(0 && "This action is not supported yet!"); + default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Custom: isCustom = true; // FALLTHROUGH @@ -1287,12 +1263,12 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { } else { // If this is an unaligned load and the target doesn't support it, // expand it. - if (!TLI.allowsUnalignedMemoryAccesses()) { - unsigned ABIAlignment = TLI.getTargetData()-> - getABITypeAlignment(LD->getMemoryVT().getTypeForMVT()); + if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) { + const Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); + unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty); if (LD->getAlignment() < ABIAlignment){ - Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()), DAG, - TLI); + Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()), + DAG, TLI); Tmp1 = Result.getOperand(0); Tmp2 = Result.getOperand(1); Tmp1 = LegalizeOp(Tmp1); @@ -1303,10 +1279,13 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { break; case TargetLowering::Expand: // f64 = EXTLOAD f32 should expand to LOAD, FP_EXTEND - if (SrcVT == MVT::f32 && Node->getValueType(0) == MVT::f64) { + // f128 = EXTLOAD {f32,f64} too + if ((SrcVT == MVT::f32 && (Node->getValueType(0) == MVT::f64 || + Node->getValueType(0) == MVT::f128)) || + (SrcVT == MVT::f64 && Node->getValueType(0) == MVT::f128)) { SDValue Load = DAG.getLoad(SrcVT, dl, Tmp1, Tmp2, LD->getSrcValue(), - LD->getSrcValueOffset(), - LD->isVolatile(), LD->getAlignment()); + LD->getSrcValueOffset(), + LD->isVolatile(), LD->getAlignment()); Result = DAG.getNode(ISD::FP_EXTEND, dl, Node->getValueType(0), Load); Tmp1 = LegalizeOp(Result); // Relegalize new nodes. @@ -1359,18 +1338,18 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp3, Tmp2, ST->getOffset()); - MVT VT = Tmp3.getValueType(); + EVT VT = Tmp3.getValueType(); switch (TLI.getOperationAction(ISD::STORE, VT)) { - default: assert(0 && "This action is not supported yet!"); + default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: // If this is an unaligned store and the target doesn't support it, // expand it. - if (!TLI.allowsUnalignedMemoryAccesses()) { - unsigned ABIAlignment = TLI.getTargetData()-> - getABITypeAlignment(ST->getMemoryVT().getTypeForMVT()); + if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) { + const Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); + unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty); if (ST->getAlignment() < ABIAlignment) - Result = ExpandUnalignedStore(cast<StoreSDNode>(Result.getNode()), DAG, - TLI); + Result = ExpandUnalignedStore(cast<StoreSDNode>(Result.getNode()), + DAG, TLI); } break; case TargetLowering::Custom: @@ -1391,14 +1370,14 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { } else { Tmp3 = LegalizeOp(ST->getValue()); - MVT StVT = ST->getMemoryVT(); + EVT StVT = ST->getMemoryVT(); unsigned StWidth = StVT.getSizeInBits(); if (StWidth != StVT.getStoreSizeInBits()) { // Promote to a byte-sized store with upper bits zero if not // storing an integral number of bytes. For example, promote // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1) - MVT NVT = MVT::getIntegerVT(StVT.getStoreSizeInBits()); + EVT NVT = EVT::getIntegerVT(*DAG.getContext(), StVT.getStoreSizeInBits()); Tmp3 = DAG.getZeroExtendInReg(Tmp3, dl, StVT); Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), SVOffset, NVT, isVolatile, Alignment); @@ -1412,8 +1391,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { assert(ExtraWidth < RoundWidth); assert(!(RoundWidth % 8) && !(ExtraWidth % 8) && "Store size not an integral number of bytes!"); - MVT RoundVT = MVT::getIntegerVT(RoundWidth); - MVT ExtraVT = MVT::getIntegerVT(ExtraWidth); + EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth); + EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth); SDValue Lo, Hi; unsigned IncrementSize; @@ -1460,16 +1439,16 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { ST->getOffset()); switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) { - default: assert(0 && "This action is not supported yet!"); + default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: // If this is an unaligned store and the target doesn't support it, // expand it. - if (!TLI.allowsUnalignedMemoryAccesses()) { - unsigned ABIAlignment = TLI.getTargetData()-> - getABITypeAlignment(ST->getMemoryVT().getTypeForMVT()); + if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) { + const Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); + unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty); if (ST->getAlignment() < ABIAlignment) - Result = ExpandUnalignedStore(cast<StoreSDNode>(Result.getNode()), DAG, - TLI); + Result = ExpandUnalignedStore(cast<StoreSDNode>(Result.getNode()), + DAG, TLI); } break; case TargetLowering::Custom: @@ -1522,7 +1501,11 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { StackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr); - return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, NULL, 0); + if (Op.getValueType().isVector()) + return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, NULL, 0); + else + return DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr, + NULL, 0, Vec.getValueType().getVectorElementType()); } SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { @@ -1530,8 +1513,8 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { // aligned object on the stack, store each element into it, then load // the result as a vector. // Create the stack frame object. - MVT VT = Node->getValueType(0); - MVT OpVT = Node->getOperand(0).getValueType(); + EVT VT = Node->getValueType(0); + EVT OpVT = Node->getOperand(0).getValueType(); DebugLoc dl = Node->getDebugLoc(); SDValue FIPtr = DAG.CreateStackTemporary(VT); int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex(); @@ -1574,7 +1557,7 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { "Ugly special-cased code!"); // Get the sign bit of the RHS. SDValue SignBit; - MVT IVT = Tmp2.getValueType() == MVT::f64 ? MVT::i64 : MVT::i32; + EVT IVT = Tmp2.getValueType() == MVT::f64 ? MVT::i64 : MVT::i32; if (isTypeLegal(IVT)) { SignBit = DAG.getNode(ISD::BIT_CONVERT, dl, IVT, Tmp2); } else { @@ -1613,9 +1596,8 @@ SDValue SelectionDAGLegalize::ExpandDBG_STOPPOINT(SDNode* Node) { bool useLABEL = TLI.isOperationLegalOrCustom(ISD::DBG_LABEL, MVT::Other); const DbgStopPointSDNode *DSP = cast<DbgStopPointSDNode>(Node); - GlobalVariable *CU_GV = cast<GlobalVariable>(DSP->getCompileUnit()); - if (DW && (useDEBUG_LOC || useLABEL) && !CU_GV->isDeclaration()) { - DICompileUnit CU(cast<GlobalVariable>(DSP->getCompileUnit())); + MDNode *CU_Node = DSP->getCompileUnit(); + if (DW && (useDEBUG_LOC || useLABEL)) { unsigned Line = DSP->getLine(); unsigned Col = DSP->getColumn(); @@ -1627,9 +1609,9 @@ SDValue SelectionDAGLegalize::ExpandDBG_STOPPOINT(SDNode* Node) { return DAG.getNode(ISD::DEBUG_LOC, dl, MVT::Other, Node->getOperand(0), DAG.getConstant(Line, MVT::i32), DAG.getConstant(Col, MVT::i32), - DAG.getSrcValue(CU.getGV())); + DAG.getSrcValue(CU_Node)); } else { - unsigned ID = DW->RecordSourceLine(Line, Col, CU); + unsigned ID = DW->RecordSourceLine(Line, Col, CU_Node); return DAG.getLabel(ISD::DBG_LABEL, dl, Node->getOperand(0), ID); } } @@ -1643,7 +1625,7 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and" " not tell us which reg is the stack pointer!"); DebugLoc dl = Node->getDebugLoc(); - MVT VT = Node->getValueType(0); + EVT VT = Node->getValueType(0); SDValue Tmp1 = SDValue(Node, 0); SDValue Tmp2 = SDValue(Node, 1); SDValue Tmp3 = Node->getOperand(2); @@ -1676,14 +1658,14 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, /// condition code CC on the current target. This routine assumes LHS and rHS /// have already been legalized by LegalizeSetCCOperands. It expands SETCC with /// illegal condition code into AND / OR of multiple SETCC values. -void SelectionDAGLegalize::LegalizeSetCCCondCode(MVT VT, +void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, DebugLoc dl) { - MVT OpVT = LHS.getValueType(); + EVT OpVT = LHS.getValueType(); ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get(); switch (TLI.getCondCodeAction(CCCode, OpVT)) { - default: assert(0 && "Unknown condition code action!"); + default: llvm_unreachable("Unknown condition code action!"); case TargetLowering::Legal: // Nothing to do. break; @@ -1691,7 +1673,7 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(MVT VT, ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID; unsigned Opc = 0; switch (CCCode) { - default: assert(0 && "Don't know how to expand this condition!"); abort(); + default: llvm_unreachable("Don't know how to expand this condition!"); case ISD::SETOEQ: CC1 = ISD::SETEQ; CC2 = ISD::SETO; Opc = ISD::AND; break; case ISD::SETOGT: CC1 = ISD::SETGT; CC2 = ISD::SETO; Opc = ISD::AND; break; case ISD::SETOGE: CC1 = ISD::SETGE; CC2 = ISD::SETO; Opc = ISD::AND; break; @@ -1722,13 +1704,13 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(MVT VT, /// a load from the stack slot to DestVT, extending it if needed. /// The resultant code need not be legal. SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, - MVT SlotVT, - MVT DestVT, + EVT SlotVT, + EVT DestVT, DebugLoc dl) { // Create the stack frame object. unsigned SrcAlign = TLI.getTargetData()->getPrefTypeAlignment(SrcOp.getValueType(). - getTypeForMVT()); + getTypeForEVT(*DAG.getContext())); SDValue FIPtr = DAG.CreateStackTemporary(SlotVT, SrcAlign); FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(FIPtr); @@ -1739,7 +1721,7 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, unsigned SlotSize = SlotVT.getSizeInBits(); unsigned DestSize = DestVT.getSizeInBits(); unsigned DestAlign = - TLI.getTargetData()->getPrefTypeAlignment(DestVT.getTypeForMVT()); + TLI.getTargetData()->getPrefTypeAlignment(DestVT.getTypeForEVT(*DAG.getContext())); // Emit a store to the stack slot. Use a truncstore if the input value is // later than DestVT. @@ -1787,9 +1769,9 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { unsigned NumElems = Node->getNumOperands(); SDValue Value1, Value2; DebugLoc dl = Node->getDebugLoc(); - MVT VT = Node->getValueType(0); - MVT OpVT = Node->getOperand(0).getValueType(); - MVT EltVT = VT.getVectorElementType(); + EVT VT = Node->getValueType(0); + EVT OpVT = Node->getOperand(0).getValueType(); + EVT EltVT = VT.getVectorElementType(); // If the only non-undef value is the low element, turn this into a // SCALAR_TO_VECTOR node. If this is { X, X, X, X }, determine X. @@ -1833,7 +1815,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { CV.push_back(const_cast<ConstantInt *>(V->getConstantIntValue())); } else { assert(Node->getOperand(i).getOpcode() == ISD::UNDEF); - const Type *OpNTy = OpVT.getTypeForMVT(); + const Type *OpNTy = OpVT.getTypeForEVT(*DAG.getContext()); CV.push_back(UndefValue::get(OpNTy)); } } @@ -1886,8 +1868,8 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) { - MVT ArgVT = Node->getOperand(i).getValueType(); - const Type *ArgTy = ArgVT.getTypeForMVT(); + EVT ArgVT = Node->getOperand(i).getValueType(); + const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); Entry.Node = Node->getOperand(i); Entry.Ty = ArgTy; Entry.isSExt = isSigned; Entry.isZExt = !isSigned; @@ -1897,10 +1879,12 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, TLI.getPointerTy()); // Splice the libcall in wherever FindInputOutputChains tells us to. - const Type *RetTy = Node->getValueType(0).getTypeForMVT(); + const Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, - 0, CallingConv::C, false, Callee, Args, DAG, + 0, TLI.getLibcallCallingConv(LC), false, + /*isReturnValueUsed=*/true, + Callee, Args, DAG, Node->getDebugLoc()); // Legalize the call sequence, starting with the chain. This will advance @@ -1916,8 +1900,8 @@ SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node, RTLIB::Libcall Call_F80, RTLIB::Libcall Call_PPCF128) { RTLIB::Libcall LC; - switch (Node->getValueType(0).getSimpleVT()) { - default: assert(0 && "Unexpected request for libcall!"); + switch (Node->getValueType(0).getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unexpected request for libcall!"); case MVT::f32: LC = Call_F32; break; case MVT::f64: LC = Call_F64; break; case MVT::f80: LC = Call_F80; break; @@ -1932,8 +1916,8 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned, RTLIB::Libcall Call_I64, RTLIB::Libcall Call_I128) { RTLIB::Libcall LC; - switch (Node->getValueType(0).getSimpleVT()) { - default: assert(0 && "Unexpected request for libcall!"); + switch (Node->getValueType(0).getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unexpected request for libcall!"); case MVT::i16: LC = Call_I16; break; case MVT::i32: LC = Call_I32; break; case MVT::i64: LC = Call_I64; break; @@ -1948,7 +1932,7 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned, /// legal for the target. SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0, - MVT DestVT, + EVT DestVT, DebugLoc dl) { if (Op0.getValueType() == MVT::i32) { // simple 32-bit [signed|unsigned] integer to float/double expansion @@ -2018,15 +2002,16 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, // as a negative number. To counteract this, the dynamic code adds an // offset depending on the data type. uint64_t FF; - switch (Op0.getValueType().getSimpleVT()) { - default: assert(0 && "Unsupported integer type!"); + switch (Op0.getValueType().getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unsupported integer type!"); case MVT::i8 : FF = 0x43800000ULL; break; // 2^8 (as a float) case MVT::i16: FF = 0x47800000ULL; break; // 2^16 (as a float) case MVT::i32: FF = 0x4F800000ULL; break; // 2^32 (as a float) case MVT::i64: FF = 0x5F800000ULL; break; // 2^64 (as a float) } if (TLI.isLittleEndian()) FF <<= 32; - Constant *FudgeFactor = ConstantInt::get(Type::Int64Ty, FF); + Constant *FudgeFactor = ConstantInt::get( + Type::getInt64Ty(*DAG.getContext()), FF); SDValue CPIdx = DAG.getConstantPool(FudgeFactor, TLI.getPointerTy()); unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); @@ -2054,17 +2039,17 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, /// legal for the target, and that there is a legal UINT_TO_FP or SINT_TO_FP /// operation that takes a larger input. SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, - MVT DestVT, + EVT DestVT, bool isSigned, DebugLoc dl) { // First step, figure out the appropriate *INT_TO_FP operation to use. - MVT NewInTy = LegalOp.getValueType(); + EVT NewInTy = LegalOp.getValueType(); unsigned OpToUse = 0; // Scan for the appropriate larger type to use. while (1) { - NewInTy = (MVT::SimpleValueType)(NewInTy.getSimpleVT()+1); + NewInTy = (MVT::SimpleValueType)(NewInTy.getSimpleVT().SimpleTy+1); assert(NewInTy.isInteger() && "Ran out of possibilities!"); // If the target supports SINT_TO_FP of this type, use it. @@ -2096,17 +2081,17 @@ SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, /// legal for the target, and that there is a legal FP_TO_UINT or FP_TO_SINT /// operation that returns a larger result. SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, - MVT DestVT, + EVT DestVT, bool isSigned, DebugLoc dl) { // First step, figure out the appropriate FP_TO*INT operation to use. - MVT NewOutTy = DestVT; + EVT NewOutTy = DestVT; unsigned OpToUse = 0; // Scan for the appropriate larger type to use. while (1) { - NewOutTy = (MVT::SimpleValueType)(NewOutTy.getSimpleVT()+1); + NewOutTy = (MVT::SimpleValueType)(NewOutTy.getSimpleVT().SimpleTy+1); assert(NewOutTy.isInteger() && "Ran out of possibilities!"); if (TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NewOutTy)) { @@ -2134,11 +2119,11 @@ SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, /// ExpandBSWAP - Open code the operations for BSWAP of the specified operation. /// SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) { - MVT VT = Op.getValueType(); - MVT SHVT = TLI.getShiftAmountTy(); + EVT VT = Op.getValueType(); + EVT SHVT = TLI.getShiftAmountTy(); SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8; - switch (VT.getSimpleVT()) { - default: assert(0 && "Unhandled Expand type in BSWAP!"); abort(); + switch (VT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unhandled Expand type in BSWAP!"); case MVT::i16: Tmp2 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT)); Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT)); @@ -2183,15 +2168,15 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) { SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, DebugLoc dl) { switch (Opc) { - default: assert(0 && "Cannot expand this yet!"); + default: llvm_unreachable("Cannot expand this yet!"); case ISD::CTPOP: { static const uint64_t mask[6] = { 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL, 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL }; - MVT VT = Op.getValueType(); - MVT ShVT = TLI.getShiftAmountTy(); + EVT VT = Op.getValueType(); + EVT ShVT = TLI.getShiftAmountTy(); unsigned len = VT.getSizeInBits(); for (unsigned i = 0; (1U << i) <= (len / 2); ++i) { //x = (x & mask[i][len/8]) + (x >> (1 << i) & mask[i][len/8]) @@ -2217,8 +2202,8 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, // return popcount(~x); // // but see also: http://www.hackersdelight.org/HDcode/nlz.cc - MVT VT = Op.getValueType(); - MVT ShVT = TLI.getShiftAmountTy(); + EVT VT = Op.getValueType(); + EVT ShVT = TLI.getShiftAmountTy(); unsigned len = VT.getSizeInBits(); for (unsigned i = 0; (1U << i) <= (len / 2); ++i) { SDValue Tmp3 = DAG.getConstant(1ULL << i, ShVT); @@ -2233,7 +2218,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, // unless the target has ctlz but not ctpop, in which case we use: // { return 32 - nlz(~x & (x-1)); } // see also http://www.hackersdelight.org/HDcode/ntz.cc - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); SDValue Tmp3 = DAG.getNode(ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT), DAG.getNode(ISD::SUB, dl, VT, Op, @@ -2272,7 +2257,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, Results.push_back(DAG.getConstant(1, Node->getValueType(0))); break; case ISD::EH_RETURN: - case ISD::DECLARE: case ISD::DBG_LABEL: case ISD::EH_LABEL: case ISD::PREFETCH: @@ -2291,21 +2275,22 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, Results.push_back(Node->getOperand(i)); break; case ISD::UNDEF: { - MVT VT = Node->getValueType(0); + EVT VT = Node->getValueType(0); if (VT.isInteger()) Results.push_back(DAG.getConstant(0, VT)); else if (VT.isFloatingPoint()) Results.push_back(DAG.getConstantFP(0, VT)); else - assert(0 && "Unknown value type!"); + llvm_unreachable("Unknown value type!"); break; } case ISD::TRAP: { // If this operation is not supported, lower it to 'abort()' call TargetLowering::ArgListTy Args; std::pair<SDValue, SDValue> CallResult = - TLI.LowerCallTo(Node->getOperand(0), Type::VoidTy, + TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()), false, false, false, false, 0, CallingConv::C, false, + /*isReturnValueUsed=*/true, DAG.getExternalSymbol("abort", TLI.getPointerTy()), Args, DAG, dl); Results.push_back(CallResult.second); @@ -2326,7 +2311,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, case ISD::SIGN_EXTEND_INREG: { // NOTE: we could fall back on load/store here too for targets without // SAR. However, it is doubtful that any exist. - MVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT(); + EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT(); unsigned BitsDiff = Node->getValueType(0).getSizeInBits() - ExtraVT.getSizeInBits(); SDValue ShiftCst = DAG.getConstant(BitsDiff, TLI.getShiftAmountTy()); @@ -2343,7 +2328,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, // NOTE: there is a choice here between constantly creating new stack // slots and always reusing the same one. We currently always create // new ones, as reuse may inhibit scheduling. - MVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT(); + EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT(); Tmp1 = EmitStackConvert(Node->getOperand(0), ExtraVT, Node->getValueType(0), dl); Results.push_back(Tmp1); @@ -2357,8 +2342,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, break; case ISD::FP_TO_UINT: { SDValue True, False; - MVT VT = Node->getOperand(0).getValueType(); - MVT NVT = Node->getValueType(0); + EVT VT = Node->getOperand(0).getValueType(); + EVT NVT = Node->getValueType(0); const uint64_t zero[] = {0, 0}; APFloat apf = APFloat(APInt(VT.getSizeInBits(), 2, zero)); APInt x = APInt::getSignBit(NVT.getSizeInBits()); @@ -2379,14 +2364,14 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, } case ISD::VAARG: { const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue(); - MVT VT = Node->getValueType(0); + EVT VT = Node->getValueType(0); Tmp1 = Node->getOperand(0); Tmp2 = Node->getOperand(1); SDValue VAList = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2, V, 0); // Increment the pointer, VAList, to the next vaarg Tmp3 = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList, DAG.getConstant(TLI.getTargetData()-> - getTypeAllocSize(VT.getTypeForMVT()), + getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())), TLI.getPointerTy())); // Store the incremented VAList to the legalized pointer Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Tmp2, V, 0); @@ -2434,8 +2419,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, SmallVector<int, 8> Mask; cast<ShuffleVectorSDNode>(Node)->getMask(Mask); - MVT VT = Node->getValueType(0); - MVT EltVT = VT.getVectorElementType(); + EVT VT = Node->getValueType(0); + EVT EltVT = VT.getVectorElementType(); unsigned NumElems = VT.getVectorNumElements(); SmallVector<SDValue, 8> Ops; for (unsigned i = 0; i != NumElems; ++i) { @@ -2458,7 +2443,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, break; } case ISD::EXTRACT_ELEMENT: { - MVT OpTy = Node->getOperand(0).getValueType(); + EVT OpTy = Node->getOperand(0).getValueType(); if (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue()) { // 1 -> Hi Tmp1 = DAG.getNode(ISD::SRL, dl, OpTy, Node->getOperand(0), @@ -2507,7 +2492,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, break; case ISD::FABS: { // Expand Y = FABS(X) -> Y = (X >u 0.0) ? X : fneg(X). - MVT VT = Node->getValueType(0); + EVT VT = Node->getValueType(0); Tmp1 = Node->getOperand(0); Tmp2 = DAG.getConstantFP(0.0, VT); Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(Tmp1.getValueType()), @@ -2622,7 +2607,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, break; } case ISD::SUB: { - MVT VT = Node->getValueType(0); + EVT VT = Node->getValueType(0); assert(TLI.isOperationLegalOrCustom(ISD::ADD, VT) && TLI.isOperationLegalOrCustom(ISD::XOR, VT) && "Don't know how to expand this subtraction!"); @@ -2634,7 +2619,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, } case ISD::UREM: case ISD::SREM: { - MVT VT = Node->getValueType(0); + EVT VT = Node->getValueType(0); SDVTList VTs = DAG.getVTList(VT, VT); bool isSigned = Node->getOpcode() == ISD::SREM; unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV; @@ -2662,7 +2647,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, case ISD::SDIV: { bool isSigned = Node->getOpcode() == ISD::SDIV; unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; - MVT VT = Node->getValueType(0); + EVT VT = Node->getValueType(0); SDVTList VTs = DAG.getVTList(VT, VT); if (TLI.isOperationLegalOrCustom(DivRemOpc, VT)) Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Node->getOperand(0), @@ -2680,7 +2665,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, case ISD::MULHS: { unsigned ExpandOpcode = Node->getOpcode() == ISD::MULHU ? ISD::UMUL_LOHI : ISD::SMUL_LOHI; - MVT VT = Node->getValueType(0); + EVT VT = Node->getValueType(0); SDVTList VTs = DAG.getVTList(VT, VT); assert(TLI.isOperationLegalOrCustom(ExpandOpcode, VT) && "If this wasn't legal, it shouldn't have been created!"); @@ -2690,7 +2675,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, break; } case ISD::MUL: { - MVT VT = Node->getValueType(0); + EVT VT = Node->getValueType(0); SDVTList VTs = DAG.getVTList(VT, VT); // See if multiply or divide can be lowered using two-result operations. // We just need the low half of the multiply; try both the signed @@ -2729,7 +2714,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, ISD::ADD : ISD::SUB, dl, LHS.getValueType(), LHS, RHS); Results.push_back(Sum); - MVT OType = Node->getValueType(1); + EVT OType = Node->getValueType(1); SDValue Zero = DAG.getConstant(0, LHS.getValueType()); @@ -2770,7 +2755,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, } case ISD::UMULO: case ISD::SMULO: { - MVT VT = Node->getValueType(0); + EVT VT = Node->getValueType(0); SDValue LHS = Node->getOperand(0); SDValue RHS = Node->getOperand(1); SDValue BottomHalf; @@ -2786,8 +2771,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS, RHS); TopHalf = BottomHalf.getValue(1); - } else if (TLI.isTypeLegal(MVT::getIntegerVT(VT.getSizeInBits() * 2))) { - MVT WideVT = MVT::getIntegerVT(VT.getSizeInBits() * 2); + } else if (TLI.isTypeLegal(EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2))) { + EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2); LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS); RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS); Tmp1 = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS); @@ -2800,7 +2785,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, // type in some cases cases. // Also, we can fall back to a division in some cases, but that's a big // performance hit in the general case. - assert(0 && "Don't know how to expand this operation yet!"); + llvm_unreachable("Don't know how to expand this operation yet!"); } if (isSigned) { Tmp1 = DAG.getConstant(VT.getSizeInBits() - 1, TLI.getShiftAmountTy()); @@ -2816,7 +2801,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, break; } case ISD::BUILD_PAIR: { - MVT PairTy = Node->getValueType(0); + EVT PairTy = Node->getValueType(0); Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, PairTy, Node->getOperand(0)); Tmp2 = DAG.getNode(ISD::ANY_EXTEND, dl, PairTy, Node->getOperand(1)); Tmp2 = DAG.getNode(ISD::SHL, dl, PairTy, Tmp2, @@ -2845,14 +2830,14 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, SDValue Table = Node->getOperand(1); SDValue Index = Node->getOperand(2); - MVT PTy = TLI.getPointerTy(); + EVT PTy = TLI.getPointerTy(); MachineFunction &MF = DAG.getMachineFunction(); unsigned EntrySize = MF.getJumpTableInfo()->getEntrySize(); Index= DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(EntrySize, PTy)); SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table); - MVT MemVT = MVT::getIntegerVT(EntrySize * 8); + EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8); SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, dl, PTy, Chain, Addr, PseudoSourceValue::getJumpTable(), 0, MemVT); Addr = LD; @@ -2899,7 +2884,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, // Otherwise, SETCC for the given comparison type must be completely // illegal; expand it into a SELECT_CC. - MVT VT = Node->getValueType(0); + EVT VT = Node->getValueType(0); Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, VT, Tmp1, Tmp2, DAG.getConstant(1, VT), DAG.getConstant(0, VT), Tmp3); Results.push_back(Tmp1); @@ -2958,12 +2943,13 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, } void SelectionDAGLegalize::PromoteNode(SDNode *Node, SmallVectorImpl<SDValue> &Results) { - MVT OVT = Node->getValueType(0); + EVT OVT = Node->getValueType(0); if (Node->getOpcode() == ISD::UINT_TO_FP || - Node->getOpcode() == ISD::SINT_TO_FP) { + Node->getOpcode() == ISD::SINT_TO_FP || + Node->getOpcode() == ISD::SETCC) { OVT = Node->getOperand(0).getValueType(); } - MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT); + EVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT); DebugLoc dl = Node->getDebugLoc(); SDValue Tmp1, Tmp2, Tmp3; switch (Node->getOpcode()) { @@ -2973,10 +2959,10 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node, // Zero extend the argument. Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0)); // Perform the larger operation. - Tmp1 = DAG.getNode(Node->getOpcode(), dl, Node->getValueType(0), Tmp1); + Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1); if (Node->getOpcode() == ISD::CTTZ) { //if Tmp1 == sizeinbits(NVT) then Tmp1 = sizeinbits(Old VT) - Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(Tmp1.getValueType()), + Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Tmp1, DAG.getConstant(NVT.getSizeInBits(), NVT), ISD::SETEQ); Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp2, @@ -2987,7 +2973,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node, DAG.getConstant(NVT.getSizeInBits() - OVT.getSizeInBits(), NVT)); } - Results.push_back(Tmp1); + Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp1)); break; case ISD::BSWAP: { unsigned DiffBits = NVT.getSizeInBits() - OVT.getSizeInBits(); @@ -3012,16 +2998,26 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node, break; case ISD::AND: case ISD::OR: - case ISD::XOR: - assert(OVT.isVector() && "Don't know how to promote scalar logic ops"); - // Bit convert each of the values to the new type. - Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Node->getOperand(0)); - Tmp2 = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Node->getOperand(1)); + case ISD::XOR: { + unsigned ExtOp, TruncOp; + if (OVT.isVector()) { + ExtOp = ISD::BIT_CONVERT; + TruncOp = ISD::BIT_CONVERT; + } else if (OVT.isInteger()) { + ExtOp = ISD::ANY_EXTEND; + TruncOp = ISD::TRUNCATE; + } else { + llvm_report_error("Cannot promote logic operation"); + } + // Promote each of the values to the new type. + Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(0)); + Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1)); + // Perform the larger operation, then convert back Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2); - // Bit convert the result back the original type. - Results.push_back(DAG.getNode(ISD::BIT_CONVERT, dl, OVT, Tmp1)); + Results.push_back(DAG.getNode(TruncOp, dl, OVT, Tmp1)); break; - case ISD::SELECT: + } + case ISD::SELECT: { unsigned ExtOp, TruncOp; if (Node->getValueType(0).isVector()) { ExtOp = ISD::BIT_CONVERT; @@ -3046,6 +3042,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node, DAG.getIntPtrConstant(0)); Results.push_back(Tmp1); break; + } case ISD::VECTOR_SHUFFLE: { SmallVector<int, 8> Mask; cast<ShuffleVectorSDNode>(Node)->getMask(Mask); @@ -3061,31 +3058,14 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node, break; } case ISD::SETCC: { - // First step, figure out the appropriate operation to use. - // Allow SETCC to not be supported for all legal data types - // Mostly this targets FP - MVT NewInTy = Node->getOperand(0).getValueType(); - MVT OldVT = NewInTy; OldVT = OldVT; - - // Scan for the appropriate larger type to use. - while (1) { - NewInTy = (MVT::SimpleValueType)(NewInTy.getSimpleVT()+1); - - assert(NewInTy.isInteger() == OldVT.isInteger() && - "Fell off of the edge of the integer world"); - assert(NewInTy.isFloatingPoint() == OldVT.isFloatingPoint() && - "Fell off of the edge of the floating point world"); - - // If the target supports SETCC of this type, use it. - if (TLI.isOperationLegalOrCustom(ISD::SETCC, NewInTy)) - break; - } - if (NewInTy.isInteger()) - assert(0 && "Cannot promote Legal Integer SETCC yet"); - else { - Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NewInTy, Tmp1); - Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NewInTy, Tmp2); + unsigned ExtOp = ISD::FP_EXTEND; + if (NVT.isInteger()) { + ISD::CondCode CCCode = + cast<CondCodeSDNode>(Node->getOperand(2))->get(); + ExtOp = isSignedIntSetCC(CCCode) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; } + Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(0)); + Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1)); Results.push_back(DAG.getNode(ISD::SETCC, dl, Node->getValueType(0), Tmp1, Tmp2, Node->getOperand(2))); break; diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index c3c1bea..84e39b4 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -20,10 +20,12 @@ //===----------------------------------------------------------------------===// #include "LegalizeTypes.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; /// GetFPLibCall - Return the right libcall for the given floating point type. -static RTLIB::Libcall GetFPLibCall(MVT VT, +static RTLIB::Libcall GetFPLibCall(EVT VT, RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80, @@ -41,18 +43,17 @@ static RTLIB::Libcall GetFPLibCall(MVT VT, //===----------------------------------------------------------------------===// void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { - DEBUG(cerr << "Soften float result " << ResNo << ": "; N->dump(&DAG); - cerr << "\n"); + DEBUG(errs() << "Soften float result " << ResNo << ": "; N->dump(&DAG); + errs() << "\n"); SDValue R = SDValue(); switch (N->getOpcode()) { default: #ifndef NDEBUG - cerr << "SoftenFloatResult #" << ResNo << ": "; - N->dump(&DAG); cerr << "\n"; + errs() << "SoftenFloatResult #" << ResNo << ": "; + N->dump(&DAG); errs() << "\n"; #endif - assert(0 && "Do not know how to soften the result of this operator!"); - abort(); + llvm_unreachable("Do not know how to soften the result of this operator!"); case ISD::BIT_CONVERT: R = SoftenFloatRes_BIT_CONVERT(N); break; case ISD::BUILD_PAIR: R = SoftenFloatRes_BUILD_PAIR(N); break; @@ -107,14 +108,14 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_BIT_CONVERT(SDNode *N) { SDValue DAGTypeLegalizer::SoftenFloatRes_BUILD_PAIR(SDNode *N) { // Convert the inputs to integers, and build a new pair out of them. return DAG.getNode(ISD::BUILD_PAIR, N->getDebugLoc(), - TLI.getTypeToTransformTo(N->getValueType(0)), + TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)), BitConvertToInteger(N->getOperand(0)), BitConvertToInteger(N->getOperand(1))); } SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(ConstantFPSDNode *N) { return DAG.getConstant(N->getValueAPF().bitcastToAPInt(), - TLI.getTypeToTransformTo(N->getValueType(0))); + TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0))); } SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) { @@ -125,7 +126,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); unsigned Size = NVT.getSizeInBits(); // Mask = ~(1 << (Size-1)) @@ -136,7 +137,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; return MakeLibCall(GetFPLibCall(N->getValueType(0), @@ -148,7 +149,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); return MakeLibCall(GetFPLibCall(N->getValueType(0), RTLIB::CEIL_F32, @@ -163,8 +164,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) { SDValue RHS = BitConvertToInteger(N->getOperand(1)); DebugLoc dl = N->getDebugLoc(); - MVT LVT = LHS.getValueType(); - MVT RVT = RHS.getValueType(); + EVT LVT = LHS.getValueType(); + EVT RVT = RHS.getValueType(); unsigned LSize = LVT.getSizeInBits(); unsigned RSize = RVT.getSizeInBits(); @@ -199,7 +200,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); return MakeLibCall(GetFPLibCall(N->getValueType(0), RTLIB::COS_F32, @@ -210,7 +211,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; return MakeLibCall(GetFPLibCall(N->getValueType(0), @@ -222,7 +223,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); return MakeLibCall(GetFPLibCall(N->getValueType(0), RTLIB::EXP_F32, @@ -233,7 +234,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); return MakeLibCall(GetFPLibCall(N->getValueType(0), RTLIB::EXP2_F32, @@ -244,7 +245,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); return MakeLibCall(GetFPLibCall(N->getValueType(0), RTLIB::FLOOR_F32, @@ -255,7 +256,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); return MakeLibCall(GetFPLibCall(N->getValueType(0), RTLIB::LOG_F32, @@ -266,7 +267,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); return MakeLibCall(GetFPLibCall(N->getValueType(0), RTLIB::LOG2_F32, @@ -277,7 +278,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); return MakeLibCall(GetFPLibCall(N->getValueType(0), RTLIB::LOG10_F32, @@ -288,7 +289,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; return MakeLibCall(GetFPLibCall(N->getValueType(0), @@ -300,7 +301,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); return MakeLibCall(GetFPLibCall(N->getValueType(0), RTLIB::NEARBYINT_F32, @@ -311,7 +312,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); // Expand Y = FNEG(X) -> Y = SUB -0.0, X SDValue Ops[2] = { DAG.getConstantFP(-0.0, N->getValueType(0)), GetSoftenedFloat(N->getOperand(0)) }; @@ -324,7 +325,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = N->getOperand(0); RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0)); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!"); @@ -332,7 +333,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = N->getOperand(0); RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0)); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!"); @@ -340,7 +341,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; return MakeLibCall(GetFPLibCall(N->getValueType(0), @@ -354,7 +355,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) { SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) { assert(N->getOperand(1).getValueType() == MVT::i32 && "Unsupported power type!"); - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), N->getOperand(1) }; return MakeLibCall(GetFPLibCall(N->getValueType(0), RTLIB::POWI_F32, @@ -365,7 +366,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; return MakeLibCall(GetFPLibCall(N->getValueType(0), @@ -377,7 +378,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); return MakeLibCall(GetFPLibCall(N->getValueType(0), RTLIB::RINT_F32, @@ -388,7 +389,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); return MakeLibCall(GetFPLibCall(N->getValueType(0), RTLIB::SIN_F32, @@ -399,7 +400,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); return MakeLibCall(GetFPLibCall(N->getValueType(0), RTLIB::SQRT_F32, @@ -410,7 +411,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; return MakeLibCall(GetFPLibCall(N->getValueType(0), @@ -422,7 +423,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); return MakeLibCall(GetFPLibCall(N->getValueType(0), RTLIB::TRUNC_F32, @@ -434,8 +435,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) { SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { LoadSDNode *L = cast<LoadSDNode>(N); - MVT VT = N->getValueType(0); - MVT NVT = TLI.getTypeToTransformTo(VT); + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); DebugLoc dl = N->getDebugLoc(); SDValue NewL; @@ -479,19 +480,19 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_UNDEF(SDNode *N) { - return DAG.getUNDEF(TLI.getTypeToTransformTo(N->getValueType(0))); + return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0))); } SDValue DAGTypeLegalizer::SoftenFloatRes_VAARG(SDNode *N) { SDValue Chain = N->getOperand(0); // Get the chain. SDValue Ptr = N->getOperand(1); // Get the pointer. - MVT VT = N->getValueType(0); - MVT NVT = TLI.getTypeToTransformTo(VT); + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); DebugLoc dl = N->getDebugLoc(); SDValue NewVAARG; NewVAARG = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2)); - + // Legalized the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), NewVAARG.getValue(1)); @@ -500,9 +501,9 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_VAARG(SDNode *N) { SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) { bool Signed = N->getOpcode() == ISD::SINT_TO_FP; - MVT SVT = N->getOperand(0).getValueType(); - MVT RVT = N->getValueType(0); - MVT NVT = MVT(); + EVT SVT = N->getOperand(0).getValueType(); + EVT RVT = N->getValueType(0); + EVT NVT = EVT(); DebugLoc dl = N->getDebugLoc(); // If the input is not legal, eg: i1 -> fp, then it needs to be promoted to @@ -521,7 +522,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) { // Sign/zero extend the argument if the libcall takes a larger type. SDValue Op = DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl, NVT, N->getOperand(0)); - return MakeLibCall(LC, TLI.getTypeToTransformTo(RVT), &Op, 1, false, dl); + return MakeLibCall(LC, TLI.getTypeToTransformTo(*DAG.getContext(), RVT), &Op, 1, false, dl); } @@ -530,18 +531,17 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) { //===----------------------------------------------------------------------===// bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { - DEBUG(cerr << "Soften float operand " << OpNo << ": "; N->dump(&DAG); - cerr << "\n"); + DEBUG(errs() << "Soften float operand " << OpNo << ": "; N->dump(&DAG); + errs() << "\n"); SDValue Res = SDValue(); switch (N->getOpcode()) { default: #ifndef NDEBUG - cerr << "SoftenFloatOperand Op #" << OpNo << ": "; - N->dump(&DAG); cerr << "\n"; + errs() << "SoftenFloatOperand Op #" << OpNo << ": "; + N->dump(&DAG); errs() << "\n"; #endif - assert(0 && "Do not know how to soften this operator's operand!"); - abort(); + llvm_unreachable("Do not know how to soften this operator's operand!"); case ISD::BIT_CONVERT: Res = SoftenFloatOp_BIT_CONVERT(N); break; case ISD::BR_CC: Res = SoftenFloatOp_BR_CC(N); break; @@ -574,7 +574,7 @@ void DAGTypeLegalizer::SoftenSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, DebugLoc dl) { SDValue LHSInt = GetSoftenedFloat(NewLHS); SDValue RHSInt = GetSoftenedFloat(NewRHS); - MVT VT = NewLHS.getValueType(); + EVT VT = NewLHS.getValueType(); assert((VT == MVT::f32 || VT == MVT::f64) && "Unsupported setcc type!"); @@ -637,7 +637,7 @@ void DAGTypeLegalizer::SoftenSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, } } - MVT RetVT = MVT::i32; // FIXME: is this the correct return type? + EVT RetVT = MVT::i32; // FIXME: is this the correct return type? SDValue Ops[2] = { LHSInt, RHSInt }; NewLHS = MakeLibCall(LC1, RetVT, Ops, 2, false/*sign irrelevant*/, dl); NewRHS = DAG.getConstant(0, RetVT); @@ -659,8 +659,8 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BIT_CONVERT(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) { - MVT SVT = N->getOperand(0).getValueType(); - MVT RVT = N->getValueType(0); + EVT SVT = N->getOperand(0).getValueType(); + EVT RVT = N->getValueType(0); RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall"); @@ -688,7 +688,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_SINT(SDNode *N) { - MVT RVT = N->getValueType(0); + EVT RVT = N->getValueType(0); RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!"); SDValue Op = GetSoftenedFloat(N->getOperand(0)); @@ -696,7 +696,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_SINT(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_UINT(SDNode *N) { - MVT RVT = N->getValueType(0); + EVT RVT = N->getValueType(0); RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!"); SDValue Op = GetSoftenedFloat(N->getOperand(0)); @@ -767,7 +767,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) { /// have invalid operands or may have other results that need promotion, we just /// know that (at least) one result needs expansion. void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { - DEBUG(cerr << "Expand float result: "; N->dump(&DAG); cerr << "\n"); + DEBUG(errs() << "Expand float result: "; N->dump(&DAG); errs() << "\n"); SDValue Lo, Hi; Lo = Hi = SDValue(); @@ -778,11 +778,10 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { switch (N->getOpcode()) { default: #ifndef NDEBUG - cerr << "ExpandFloatResult #" << ResNo << ": "; - N->dump(&DAG); cerr << "\n"; + errs() << "ExpandFloatResult #" << ResNo << ": "; + N->dump(&DAG); errs() << "\n"; #endif - assert(0 && "Do not know how to expand the result of this operator!"); - abort(); + llvm_unreachable("Do not know how to expand the result of this operator!"); case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, Lo, Hi); break; case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break; @@ -830,7 +829,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { void DAGTypeLegalizer::ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo, SDValue &Hi) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); assert(NVT.getSizeInBits() == integerPartWidth && "Do not know how to expand this float constant!"); APInt C = cast<ConstantFPSDNode>(N)->getValueAPF().bitcastToAPInt(); @@ -982,7 +981,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FNEG(SDNode *N, SDValue &Lo, void DAGTypeLegalizer::ExpandFloatRes_FP_EXTEND(SDNode *N, SDValue &Lo, SDValue &Hi) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); Hi = DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), NVT, N->getOperand(0)); Lo = DAG.getConstantFP(APFloat(APInt(NVT.getSizeInBits(), 0)), NVT); } @@ -1067,7 +1066,7 @@ void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo, SDValue Ptr = LD->getBasePtr(); DebugLoc dl = N->getDebugLoc(); - MVT NVT = TLI.getTypeToTransformTo(LD->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0)); assert(NVT.isByteSized() && "Expanded type not byte sized!"); assert(LD->getMemoryVT().bitsLE(NVT) && "Float type not round?"); @@ -1090,10 +1089,10 @@ void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo, void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, SDValue &Hi) { assert(N->getValueType(0) == MVT::ppcf128 && "Unsupported XINT_TO_FP!"); - MVT VT = N->getValueType(0); - MVT NVT = TLI.getTypeToTransformTo(VT); + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); SDValue Src = N->getOperand(0); - MVT SrcVT = Src.getValueType(); + EVT SrcVT = Src.getValueType(); bool isSigned = N->getOpcode() == ISD::SINT_TO_FP; DebugLoc dl = N->getDebugLoc(); @@ -1135,7 +1134,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, static const uint64_t TwoE128[] = { 0x47f0000000000000LL, 0 }; const uint64_t *Parts = 0; - switch (SrcVT.getSimpleVT()) { + switch (SrcVT.getSimpleVT().SimpleTy) { default: assert(false && "Unsupported UINT_TO_FP!"); case MVT::i32: @@ -1167,7 +1166,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, /// types of the node are known to be legal, but other operands of the node may /// need promotion or expansion as well as the specified one. bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) { - DEBUG(cerr << "Expand float operand: "; N->dump(&DAG); cerr << "\n"); + DEBUG(errs() << "Expand float operand: "; N->dump(&DAG); errs() << "\n"); SDValue Res = SDValue(); if (TLI.getOperationAction(N->getOpcode(), N->getOperand(OpNo).getValueType()) @@ -1178,11 +1177,10 @@ bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) { switch (N->getOpcode()) { default: #ifndef NDEBUG - cerr << "ExpandFloatOperand Op #" << OpNo << ": "; - N->dump(&DAG); cerr << "\n"; + errs() << "ExpandFloatOperand Op #" << OpNo << ": "; + N->dump(&DAG); errs() << "\n"; #endif - assert(0 && "Do not know how to expand this operator's operand!"); - abort(); + llvm_unreachable("Do not know how to expand this operator's operand!"); case ISD::BIT_CONVERT: Res = ExpandOp_BIT_CONVERT(N); break; case ISD::BUILD_VECTOR: Res = ExpandOp_BUILD_VECTOR(N); break; @@ -1224,7 +1222,7 @@ void DAGTypeLegalizer::FloatExpandSetCCOperands(SDValue &NewLHS, GetExpandedFloat(NewLHS, LHSLo, LHSHi); GetExpandedFloat(NewRHS, RHSLo, RHSHi); - MVT VT = NewLHS.getValueType(); + EVT VT = NewLHS.getValueType(); assert(VT == MVT::ppcf128 && "Unsupported setcc type!"); // FIXME: This generated code sucks. We want to generate @@ -1276,7 +1274,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_ROUND(SDNode *N) { } SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) { - MVT RVT = N->getValueType(0); + EVT RVT = N->getValueType(0); DebugLoc dl = N->getDebugLoc(); // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on @@ -1297,7 +1295,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) { } SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) { - MVT RVT = N->getValueType(0); + EVT RVT = N->getValueType(0); DebugLoc dl = N->getDebugLoc(); // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on @@ -1374,7 +1372,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) { SDValue Chain = ST->getChain(); SDValue Ptr = ST->getBasePtr(); - MVT NVT = TLI.getTypeToTransformTo(ST->getValue().getValueType()); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), ST->getValue().getValueType()); assert(NVT.isByteSized() && "Expanded type not byte sized!"); assert(ST->getMemoryVT().bitsLE(NVT) && "Float type not round?"); diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 0c826f6..8ac8063 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -20,6 +20,8 @@ #include "LegalizeTypes.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; //===----------------------------------------------------------------------===// @@ -31,7 +33,7 @@ using namespace llvm; /// may also have invalid operands or may have other results that need /// expansion, we just know that (at least) one result needs promotion. void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { - DEBUG(cerr << "Promote integer result: "; N->dump(&DAG); cerr << "\n"); + DEBUG(errs() << "Promote integer result: "; N->dump(&DAG); errs() << "\n"); SDValue Res = SDValue(); // See if the target wants to custom expand this node. @@ -41,11 +43,10 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { switch (N->getOpcode()) { default: #ifndef NDEBUG - cerr << "PromoteIntegerResult #" << ResNo << ": "; - N->dump(&DAG); cerr << "\n"; + errs() << "PromoteIntegerResult #" << ResNo << ": "; + N->dump(&DAG); errs() << "\n"; #endif - assert(0 && "Do not know how to promote this operator!"); - abort(); + llvm_unreachable("Do not know how to promote this operator!"); case ISD::AssertSext: Res = PromoteIntRes_AssertSext(N); break; case ISD::AssertZext: Res = PromoteIntRes_AssertZext(N); break; case ISD::BIT_CONVERT: Res = PromoteIntRes_BIT_CONVERT(N); break; @@ -161,10 +162,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic2(AtomicSDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_BIT_CONVERT(SDNode *N) { SDValue InOp = N->getOperand(0); - MVT InVT = InOp.getValueType(); - MVT NInVT = TLI.getTypeToTransformTo(InVT); - MVT OutVT = N->getValueType(0); - MVT NOutVT = TLI.getTypeToTransformTo(OutVT); + EVT InVT = InOp.getValueType(); + EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT); + EVT OutVT = N->getValueType(0); + EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); DebugLoc dl = N->getDebugLoc(); switch (getTypeAction(InVT)) { @@ -201,7 +202,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BIT_CONVERT(SDNode *N) { std::swap(Lo, Hi); InOp = DAG.getNode(ISD::ANY_EXTEND, dl, - MVT::getIntegerVT(NOutVT.getSizeInBits()), + EVT::getIntegerVT(*DAG.getContext(), NOutVT.getSizeInBits()), JoinIntegers(Lo, Hi)); return DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, InOp); } @@ -211,24 +212,14 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BIT_CONVERT(SDNode *N) { return DAG.getNode(ISD::BIT_CONVERT, dl, OutVT, GetWidenedVector(InOp)); } - // Otherwise, lower the bit-convert to a store/load from the stack. - // Create the stack frame object. Make sure it is aligned for both - // the source and destination types. - SDValue FIPtr = DAG.CreateStackTemporary(InVT, OutVT); - int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex(); - const Value *SV = PseudoSourceValue::getFixedStack(FI); - - // Emit a store to the stack slot. - SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, FIPtr, SV, 0); - - // Result is an extending load from the stack slot. - return DAG.getExtLoad(ISD::EXTLOAD, dl, NOutVT, Store, FIPtr, SV, 0, OutVT); + return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, + CreateStackStoreLoad(InOp, OutVT)); } SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) { SDValue Op = GetPromotedInteger(N->getOperand(0)); - MVT OVT = N->getValueType(0); - MVT NVT = Op.getValueType(); + EVT OVT = N->getValueType(0); + EVT NVT = Op.getValueType(); DebugLoc dl = N->getDebugLoc(); unsigned DiffBits = NVT.getSizeInBits() - OVT.getSizeInBits(); @@ -240,18 +231,18 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) { // The pair element type may be legal, or may not promote to the same type as // the result, for example i14 = BUILD_PAIR (i7, i7). Handle all cases. return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), - TLI.getTypeToTransformTo(N->getValueType(0)), + TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)), JoinIntegers(N->getOperand(0), N->getOperand(1))); } SDValue DAGTypeLegalizer::PromoteIntRes_Constant(SDNode *N) { - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // FIXME there is no actual debug info here DebugLoc dl = N->getDebugLoc(); // Zero extend things like i1, sign extend everything else. It shouldn't // matter in theory which one we pick, but this tends to give better code? unsigned Opc = VT.isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; - SDValue Result = DAG.getNode(Opc, dl, TLI.getTypeToTransformTo(VT), + SDValue Result = DAG.getNode(Opc, dl, TLI.getTypeToTransformTo(*DAG.getContext(), VT), SDValue(N, 0)); assert(isa<ConstantSDNode>(Result) && "Didn't constant fold ext?"); return Result; @@ -263,7 +254,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CONVERT_RNDSAT(SDNode *N) { CvtCode == ISD::CVT_US || CvtCode == ISD::CVT_UU || CvtCode == ISD::CVT_SF || CvtCode == ISD::CVT_UF) && "can only promote integers"); - MVT OutVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT OutVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); return DAG.getConvertRndSat(OutVT, N->getDebugLoc(), N->getOperand(0), N->getOperand(1), N->getOperand(2), N->getOperand(3), N->getOperand(4), CvtCode); @@ -273,8 +264,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) { // Zero extend to the promoted type and do the count there. SDValue Op = ZExtPromotedInteger(N->getOperand(0)); DebugLoc dl = N->getDebugLoc(); - MVT OVT = N->getValueType(0); - MVT NVT = Op.getValueType(); + EVT OVT = N->getValueType(0); + EVT NVT = Op.getValueType(); Op = DAG.getNode(ISD::CTLZ, dl, NVT, Op); // Subtract off the extra leading bits in the bigger type. return DAG.getNode(ISD::SUB, dl, NVT, Op, @@ -290,8 +281,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) { SDValue Op = GetPromotedInteger(N->getOperand(0)); - MVT OVT = N->getValueType(0); - MVT NVT = Op.getValueType(); + EVT OVT = N->getValueType(0); + EVT NVT = Op.getValueType(); DebugLoc dl = N->getDebugLoc(); // The count is the same in the promoted type except if the original // value was zero. This can be handled by setting the bit just off @@ -303,63 +294,21 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N) { - MVT OldVT = N->getValueType(0); - SDValue OldVec = N->getOperand(0); - if (getTypeAction(OldVec.getValueType()) == WidenVector) - OldVec = GetWidenedVector(N->getOperand(0)); - unsigned OldElts = OldVec.getValueType().getVectorNumElements(); DebugLoc dl = N->getDebugLoc(); - - if (OldElts == 1) { - assert(!isTypeLegal(OldVec.getValueType()) && - "Legal one-element vector of a type needing promotion!"); - // It is tempting to follow GetScalarizedVector by a call to - // GetPromotedInteger, but this would be wrong because the - // scalarized value may not yet have been processed. - return DAG.getNode(ISD::ANY_EXTEND, dl, TLI.getTypeToTransformTo(OldVT), - GetScalarizedVector(OldVec)); - } - - // Convert to a vector half as long with an element type of twice the width, - // for example <4 x i16> -> <2 x i32>. - assert(!(OldElts & 1) && "Odd length vectors not supported!"); - MVT NewVT = MVT::getIntegerVT(2 * OldVT.getSizeInBits()); - assert(OldVT.isSimple() && NewVT.isSimple()); - - SDValue NewVec = DAG.getNode(ISD::BIT_CONVERT, dl, - MVT::getVectorVT(NewVT, OldElts / 2), - OldVec); - - // Extract the element at OldIdx / 2 from the new vector. - SDValue OldIdx = N->getOperand(1); - SDValue NewIdx = DAG.getNode(ISD::SRL, dl, OldIdx.getValueType(), OldIdx, - DAG.getConstant(1, TLI.getPointerTy())); - SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, NewIdx); - - // Select the appropriate half of the element: Lo if OldIdx was even, - // Hi if it was odd. - SDValue Lo = Elt; - SDValue Hi = DAG.getNode(ISD::SRL, dl, NewVT, Elt, - DAG.getConstant(OldVT.getSizeInBits(), - TLI.getPointerTy())); - if (TLI.isBigEndian()) - std::swap(Lo, Hi); - - // Extend to the promoted type. - SDValue Odd = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, OldIdx); - SDValue Res = DAG.getNode(ISD::SELECT, dl, NewVT, Odd, Hi, Lo); - return DAG.getNode(ISD::ANY_EXTEND, dl, TLI.getTypeToTransformTo(OldVT), Res); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NVT, N->getOperand(0), + N->getOperand(1)); } SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); unsigned NewOpc = N->getOpcode(); DebugLoc dl = N->getDebugLoc(); // If we're promoting a UINT to a larger size and the larger FP_TO_UINT is // not Legal, check to see if we can use FP_TO_SINT instead. (If both UINT // and SINT conversions are Custom, there is no way to tell which is preferable. - // We choose SINT because that's the right thing on PPC.) + // We choose SINT because that's the right thing on PPC.) if (N->getOpcode() == ISD::FP_TO_UINT && !TLI.isOperationLegal(ISD::FP_TO_UINT, NVT) && TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT)) @@ -376,7 +325,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); DebugLoc dl = N->getDebugLoc(); if (getTypeAction(N->getOperand(0).getValueType()) == PromoteInteger) { @@ -403,7 +352,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) { assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!"); - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(N) ? ISD::EXTLOAD : N->getExtensionType(); DebugLoc dl = N->getDebugLoc(); @@ -421,8 +370,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) { /// Promote the overflow flag of an overflowing arithmetic node. SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) { // Simply change the return type of the boolean result. - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(1)); - MVT ValueVTs[] = { N->getValueType(0), NVT }; + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(1)); + EVT ValueVTs[] = { N->getValueType(0), NVT }; SDValue Ops[] = { N->getOperand(0), N->getOperand(1) }; SDValue Res = DAG.getNode(N->getOpcode(), N->getDebugLoc(), DAG.getVTList(ValueVTs, 2), Ops, 2); @@ -442,8 +391,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo) { // sign extension of its truncation to the original type. SDValue LHS = SExtPromotedInteger(N->getOperand(0)); SDValue RHS = SExtPromotedInteger(N->getOperand(1)); - MVT OVT = N->getOperand(0).getValueType(); - MVT NVT = LHS.getValueType(); + EVT OVT = N->getOperand(0).getValueType(); + EVT NVT = LHS.getValueType(); DebugLoc dl = N->getDebugLoc(); // Do the arithmetic in the larger type. @@ -487,7 +436,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SELECT_CC(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) { - MVT SVT = TLI.getSetCCResultType(N->getOperand(0).getValueType()); + EVT SVT = TLI.getSetCCResultType(N->getOperand(0).getValueType()); assert(isTypeLegal(SVT) && "Illegal SetCC type!"); DebugLoc dl = N->getDebugLoc(); @@ -496,14 +445,14 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) { N->getOperand(1), N->getOperand(2)); // Convert to the expected type. - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); assert(NVT.bitsLE(SVT) && "Integer type overpromoted?"); return DAG.getNode(ISD::TRUNCATE, dl, NVT, SetCC); } SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) { return DAG.getNode(ISD::SHL, N->getDebugLoc(), - TLI.getTypeToTransformTo(N->getValueType(0)), + TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)), GetPromotedInteger(N->getOperand(0)), N->getOperand(1)); } @@ -532,18 +481,18 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) { // The input value must be properly zero extended. - MVT VT = N->getValueType(0); - MVT NVT = TLI.getTypeToTransformTo(VT); + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); SDValue Res = ZExtPromotedInteger(N->getOperand(0)); return DAG.getNode(ISD::SRL, N->getDebugLoc(), NVT, Res, N->getOperand(1)); } SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Res; switch (getTypeAction(N->getOperand(0).getValueType())) { - default: assert(0 && "Unknown type action!"); + default: llvm_unreachable("Unknown type action!"); case Legal: case ExpandInteger: Res = N->getOperand(0); @@ -565,8 +514,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) { // zero extension of its truncation to the original type. SDValue LHS = ZExtPromotedInteger(N->getOperand(0)); SDValue RHS = ZExtPromotedInteger(N->getOperand(1)); - MVT OVT = N->getOperand(0).getValueType(); - MVT NVT = LHS.getValueType(); + EVT OVT = N->getOperand(0).getValueType(); + EVT NVT = LHS.getValueType(); DebugLoc dl = N->getDebugLoc(); // Do the arithmetic in the larger type. @@ -594,17 +543,17 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UDIV(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntRes_UNDEF(SDNode *N) { - return DAG.getUNDEF(TLI.getTypeToTransformTo(N->getValueType(0))); + return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0))); } SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) { SDValue Chain = N->getOperand(0); // Get the chain. SDValue Ptr = N->getOperand(1); // Get the pointer. - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); DebugLoc dl = N->getDebugLoc(); - MVT RegVT = TLI.getRegisterType(VT); - unsigned NumRegs = TLI.getNumRegisters(VT); + EVT RegVT = TLI.getRegisterType(*DAG.getContext(), VT); + unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), VT); // The argument is passed as NumRegs registers of type RegVT. SmallVector<SDValue, 8> Parts(NumRegs); @@ -618,7 +567,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) { std::reverse(Parts.begin(), Parts.end()); // Assemble the parts in the promoted type. - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Res = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Parts[0]); for (unsigned i = 1; i < NumRegs; ++i) { SDValue Part = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Parts[i]); @@ -650,7 +599,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) { /// result types of the node are known to be legal, but other operands of the /// node may need promotion or expansion as well as the specified one. bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { - DEBUG(cerr << "Promote integer operand: "; N->dump(&DAG); cerr << "\n"); + DEBUG(errs() << "Promote integer operand: "; N->dump(&DAG); errs() << "\n"); SDValue Res = SDValue(); if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) @@ -659,11 +608,10 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { switch (N->getOpcode()) { default: #ifndef NDEBUG - cerr << "PromoteIntegerOperand Op #" << OpNo << ": "; - N->dump(&DAG); cerr << "\n"; + errs() << "PromoteIntegerOperand Op #" << OpNo << ": "; + N->dump(&DAG); errs() << "\n"; #endif - assert(0 && "Do not know how to promote this operator's operand!"); - abort(); + llvm_unreachable("Do not know how to promote this operator's operand!"); case ISD::ANY_EXTEND: Res = PromoteIntOp_ANY_EXTEND(N); break; case ISD::BIT_CONVERT: Res = PromoteIntOp_BIT_CONVERT(N); break; @@ -719,7 +667,7 @@ void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS, // insert sign extends for ALL conditions, but zero extend is cheaper on // many machines (an AND instead of two shifts), so prefer it. switch (CCCode) { - default: assert(0 && "Unknown integer comparison!"); + default: llvm_unreachable("Unknown integer comparison!"); case ISD::SETEQ: case ISD::SETNE: case ISD::SETUGE: @@ -770,7 +718,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo) { assert(OpNo == 1 && "only know how to promote condition"); // Promote all the way up to the canonical SetCC type. - MVT SVT = TLI.getSetCCResultType(MVT::Other); + EVT SVT = TLI.getSetCCResultType(MVT::Other); SDValue Cond = PromoteTargetBoolean(N->getOperand(1), SVT); // The chain (Op#0) and basic block destination (Op#2) are always legal types. @@ -780,7 +728,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo) { SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_PAIR(SDNode *N) { // Since the result type is legal, the operands must promote to it. - MVT OVT = N->getOperand(0).getValueType(); + EVT OVT = N->getOperand(0).getValueType(); SDValue Lo = ZExtPromotedInteger(N->getOperand(0)); SDValue Hi = GetPromotedInteger(N->getOperand(1)); assert(Lo.getValueType() == N->getValueType(0) && "Operand over promoted?"); @@ -795,7 +743,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_VECTOR(SDNode *N) { // The vector type is legal but the element type is not. This implies // that the vector is a power-of-two in length and that the element // type does not have a strange size (eg: it is not i1). - MVT VecVT = N->getValueType(0); + EVT VecVT = N->getValueType(0); unsigned NumElts = VecVT.getVectorNumElements(); assert(!(NumElts & 1) && "Legal vector of one illegal element?"); @@ -871,7 +819,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) { assert(OpNo == 0 && "Only know how to promote condition"); // Promote all the way up to the canonical SetCC type. - MVT SVT = TLI.getSetCCResultType(N->getOperand(1).getValueType()); + EVT SVT = TLI.getSetCCResultType(N->getOperand(1).getValueType()); SDValue Cond = PromoteTargetBoolean(N->getOperand(0), SVT); return DAG.UpdateNodeOperands(SDValue(N, 0), Cond, @@ -962,7 +910,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) { /// have invalid operands or may have other results that need promotion, we just /// know that (at least) one result needs expansion. void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { - DEBUG(cerr << "Expand integer result: "; N->dump(&DAG); cerr << "\n"); + DEBUG(errs() << "Expand integer result: "; N->dump(&DAG); errs() << "\n"); SDValue Lo, Hi; Lo = Hi = SDValue(); @@ -973,11 +921,10 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { switch (N->getOpcode()) { default: #ifndef NDEBUG - cerr << "ExpandIntegerResult #" << ResNo << ": "; - N->dump(&DAG); cerr << "\n"; + errs() << "ExpandIntegerResult #" << ResNo << ": "; + N->dump(&DAG); errs() << "\n"; #endif - assert(0 && "Do not know how to expand the result of this operator!"); - abort(); + llvm_unreachable("Do not know how to expand the result of this operator!"); case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, Lo, Hi); break; case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break; @@ -1043,10 +990,10 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt, SDValue InL, InH; GetExpandedInteger(N->getOperand(0), InL, InH); - MVT NVT = InL.getValueType(); + EVT NVT = InL.getValueType(); unsigned VTBits = N->getValueType(0).getSizeInBits(); unsigned NVTBits = NVT.getSizeInBits(); - MVT ShTy = N->getOperand(1).getValueType(); + EVT ShTy = N->getOperand(1).getValueType(); if (N->getOpcode() == ISD::SHL) { if (Amt > VTBits) { @@ -1060,7 +1007,7 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt, Hi = InL; } else if (Amt == 1 && TLI.isOperationLegalOrCustom(ISD::ADDC, - TLI.getTypeToExpandTo(NVT))) { + TLI.getTypeToExpandTo(*DAG.getContext(), NVT))) { // Emit this X << 1 as X+X. SDVTList VTList = DAG.getVTList(NVT, MVT::Flag); SDValue LoOps[2] = { InL, InL }; @@ -1130,8 +1077,8 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt, bool DAGTypeLegalizer:: ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Amt = N->getOperand(1); - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); - MVT ShTy = Amt.getValueType(); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + EVT ShTy = Amt.getValueType(); unsigned ShBits = ShTy.getSizeInBits(); unsigned NVTBits = NVT.getSizeInBits(); assert(isPowerOf2_32(NVTBits) && @@ -1158,7 +1105,7 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { DAG.getConstant(~HighBitMask, ShTy)); switch (N->getOpcode()) { - default: assert(0 && "Unknown shift"); + default: llvm_unreachable("Unknown shift"); case ISD::SHL: Lo = DAG.getConstant(0, NVT); // Low part is zero. Hi = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt); // High part from Lo part. @@ -1186,7 +1133,7 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { Amt); unsigned Op1, Op2; switch (N->getOpcode()) { - default: assert(0 && "Unknown shift"); + default: llvm_unreachable("Unknown shift"); case ISD::SHL: Op1 = ISD::SHL; Op2 = ISD::SRL; break; case ISD::SRL: case ISD::SRA: Op1 = ISD::SRL; Op2 = ISD::SHL; break; @@ -1208,8 +1155,8 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { bool DAGTypeLegalizer:: ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Amt = N->getOperand(1); - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); - MVT ShTy = Amt.getValueType(); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + EVT ShTy = Amt.getValueType(); unsigned NVTBits = NVT.getSizeInBits(); assert(isPowerOf2_32(NVTBits) && "Expanded integer type size not a power of two!"); @@ -1226,7 +1173,7 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Lo1, Hi1, Lo2, Hi2; switch (N->getOpcode()) { - default: assert(0 && "Unknown shift"); + default: llvm_unreachable("Unknown shift"); case ISD::SHL: // ShAmt < NVTBits Lo1 = DAG.getConstant(0, NVT); // Low part is zero. @@ -1283,7 +1230,7 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, GetExpandedInteger(N->getOperand(0), LHSL, LHSH); GetExpandedInteger(N->getOperand(1), RHSL, RHSH); - MVT NVT = LHSL.getValueType(); + EVT NVT = LHSL.getValueType(); SDValue LoOps[2] = { LHSL, RHSL }; SDValue HiOps[3] = { LHSH, RHSH }; @@ -1295,7 +1242,7 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, bool hasCarry = TLI.isOperationLegalOrCustom(N->getOpcode() == ISD::ADD ? ISD::ADDC : ISD::SUBC, - TLI.getTypeToExpandTo(NVT)); + TLI.getTypeToExpandTo(*DAG.getContext(), NVT)); if (hasCarry) { SDVTList VTList = DAG.getVTList(NVT, MVT::Flag); @@ -1384,7 +1331,7 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUBE(SDNode *N, void DAGTypeLegalizer::ExpandIntRes_ANY_EXTEND(SDNode *N, SDValue &Lo, SDValue &Hi) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); DebugLoc dl = N->getDebugLoc(); SDValue Op = N->getOperand(0); if (Op.getValueType().bitsLE(NVT)) { @@ -1408,14 +1355,14 @@ void DAGTypeLegalizer::ExpandIntRes_AssertSext(SDNode *N, SDValue &Lo, SDValue &Hi) { DebugLoc dl = N->getDebugLoc(); GetExpandedInteger(N->getOperand(0), Lo, Hi); - MVT NVT = Lo.getValueType(); - MVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); + EVT NVT = Lo.getValueType(); + EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); unsigned NVTBits = NVT.getSizeInBits(); unsigned EVTBits = EVT.getSizeInBits(); if (NVTBits < EVTBits) { Hi = DAG.getNode(ISD::AssertSext, dl, NVT, Hi, - DAG.getValueType(MVT::getIntegerVT(EVTBits - NVTBits))); + DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), EVTBits - NVTBits))); } else { Lo = DAG.getNode(ISD::AssertSext, dl, NVT, Lo, DAG.getValueType(EVT)); // The high part replicates the sign bit of Lo, make it explicit. @@ -1428,14 +1375,14 @@ void DAGTypeLegalizer::ExpandIntRes_AssertZext(SDNode *N, SDValue &Lo, SDValue &Hi) { DebugLoc dl = N->getDebugLoc(); GetExpandedInteger(N->getOperand(0), Lo, Hi); - MVT NVT = Lo.getValueType(); - MVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); + EVT NVT = Lo.getValueType(); + EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); unsigned NVTBits = NVT.getSizeInBits(); unsigned EVTBits = EVT.getSizeInBits(); if (NVTBits < EVTBits) { Hi = DAG.getNode(ISD::AssertZext, dl, NVT, Hi, - DAG.getValueType(MVT::getIntegerVT(EVTBits - NVTBits))); + DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), EVTBits - NVTBits))); } else { Lo = DAG.getNode(ISD::AssertZext, dl, NVT, Lo, DAG.getValueType(EVT)); // The high part must be zero, make it explicit. @@ -1453,7 +1400,7 @@ void DAGTypeLegalizer::ExpandIntRes_BSWAP(SDNode *N, void DAGTypeLegalizer::ExpandIntRes_Constant(SDNode *N, SDValue &Lo, SDValue &Hi) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); unsigned NBitWidth = NVT.getSizeInBits(); const APInt &Cst = cast<ConstantSDNode>(N)->getAPIntValue(); Lo = DAG.getConstant(APInt(Cst).trunc(NBitWidth), NVT); @@ -1465,7 +1412,7 @@ void DAGTypeLegalizer::ExpandIntRes_CTLZ(SDNode *N, DebugLoc dl = N->getDebugLoc(); // ctlz (HiLo) -> Hi != 0 ? ctlz(Hi) : (ctlz(Lo)+32) GetExpandedInteger(N->getOperand(0), Lo, Hi); - MVT NVT = Lo.getValueType(); + EVT NVT = Lo.getValueType(); SDValue HiNotZero = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Hi, DAG.getConstant(0, NVT), ISD::SETNE); @@ -1484,7 +1431,7 @@ void DAGTypeLegalizer::ExpandIntRes_CTPOP(SDNode *N, DebugLoc dl = N->getDebugLoc(); // ctpop(HiLo) -> ctpop(Hi)+ctpop(Lo) GetExpandedInteger(N->getOperand(0), Lo, Hi); - MVT NVT = Lo.getValueType(); + EVT NVT = Lo.getValueType(); Lo = DAG.getNode(ISD::ADD, dl, NVT, DAG.getNode(ISD::CTPOP, dl, NVT, Lo), DAG.getNode(ISD::CTPOP, dl, NVT, Hi)); Hi = DAG.getConstant(0, NVT); @@ -1495,7 +1442,7 @@ void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N, DebugLoc dl = N->getDebugLoc(); // cttz (HiLo) -> Lo != 0 ? cttz(Lo) : (cttz(Hi)+32) GetExpandedInteger(N->getOperand(0), Lo, Hi); - MVT NVT = Lo.getValueType(); + EVT NVT = Lo.getValueType(); SDValue LoNotZero = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, DAG.getConstant(0, NVT), ISD::SETNE); @@ -1512,7 +1459,7 @@ void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N, void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo, SDValue &Hi) { DebugLoc dl = N->getDebugLoc(); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); SDValue Op = N->getOperand(0); RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!"); @@ -1522,7 +1469,7 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo, void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo, SDValue &Hi) { DebugLoc dl = N->getDebugLoc(); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); SDValue Op = N->getOperand(0); RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!"); @@ -1538,8 +1485,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!"); - MVT VT = N->getValueType(0); - MVT NVT = TLI.getTypeToTransformTo(VT); + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); SDValue Ch = N->getChain(); SDValue Ptr = N->getBasePtr(); ISD::LoadExtType ExtType = N->getExtensionType(); @@ -1551,10 +1498,10 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, assert(NVT.isByteSized() && "Expanded type not byte sized!"); if (N->getMemoryVT().bitsLE(NVT)) { - MVT EVT = N->getMemoryVT(); + EVT MemVT = N->getMemoryVT(); Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(), SVOffset, - EVT, isVolatile, Alignment); + MemVT, isVolatile, Alignment); // Remember the chain. Ch = Lo.getValue(1); @@ -1580,7 +1527,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, unsigned ExcessBits = N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits(); - MVT NEVT = MVT::getIntegerVT(ExcessBits); + EVT NEVT = EVT::getIntegerVT(*DAG.getContext(), ExcessBits); // Increment the pointer to the other half. unsigned IncrementSize = NVT.getSizeInBits()/8; @@ -1597,14 +1544,15 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, } else { // Big-endian - high bits are at low addresses. Favor aligned loads at // the cost of some bit-fiddling. - MVT EVT = N->getMemoryVT(); - unsigned EBytes = EVT.getStoreSizeInBits()/8; + EVT MemVT = N->getMemoryVT(); + unsigned EBytes = MemVT.getStoreSize(); unsigned IncrementSize = NVT.getSizeInBits()/8; unsigned ExcessBits = (EBytes - IncrementSize)*8; // Load both the high bits and maybe some of the low bits. Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(), SVOffset, - MVT::getIntegerVT(EVT.getSizeInBits() - ExcessBits), + EVT::getIntegerVT(*DAG.getContext(), + MemVT.getSizeInBits() - ExcessBits), isVolatile, Alignment); // Increment the pointer to the other half. @@ -1613,7 +1561,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, // Load the rest of the low bits. Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr, N->getSrcValue(), SVOffset+IncrementSize, - MVT::getIntegerVT(ExcessBits), + EVT::getIntegerVT(*DAG.getContext(), ExcessBits), isVolatile, MinAlign(Alignment, IncrementSize)); // Build a factor node to remember that this load is independent of the @@ -1652,8 +1600,8 @@ void DAGTypeLegalizer::ExpandIntRes_Logical(SDNode *N, void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N, SDValue &Lo, SDValue &Hi) { - MVT VT = N->getValueType(0); - MVT NVT = TLI.getTypeToTransformTo(VT); + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); DebugLoc dl = N->getDebugLoc(); bool HasMULHS = TLI.isOperationLegalOrCustom(ISD::MULHS, NVT); @@ -1742,7 +1690,7 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N, void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N, SDValue &Lo, SDValue &Hi) { - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); DebugLoc dl = N->getDebugLoc(); RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; @@ -1762,7 +1710,7 @@ void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N, void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, SDValue &Lo, SDValue &Hi) { - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); DebugLoc dl = N->getDebugLoc(); // If we can emit an efficient shift operation, do so now. Check to see if @@ -1788,7 +1736,7 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, // Next check to see if the target supports this SHL_PARTS operation or if it // will custom expand it. - MVT NVT = TLI.getTypeToTransformTo(VT); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); TargetLowering::LegalizeAction Action = TLI.getOperationAction(PartsOpc, NVT); if ((Action == TargetLowering::Legal && TLI.isTypeLegal(NVT)) || Action == TargetLowering::Custom) { @@ -1797,7 +1745,7 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, GetExpandedInteger(N->getOperand(0), LHSL, LHSH); SDValue Ops[] = { LHSL, LHSH, N->getOperand(1) }; - MVT VT = LHSL.getValueType(); + EVT VT = LHSL.getValueType(); Lo = DAG.getNode(PartsOpc, dl, DAG.getVTList(VT, VT), Ops, 3); Hi = Lo.getValue(1); return; @@ -1838,7 +1786,7 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, else if (VT == MVT::i128) LC = RTLIB::SRA_I128; } - + if (LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC)) { SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; SplitInteger(MakeLibCall(LC, VT, Ops, 2, isSigned, dl), Lo, Hi); @@ -1846,12 +1794,12 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, } if (!ExpandShiftWithUnknownAmountBit(N, Lo, Hi)) - assert(0 && "Unsupported shift!"); + llvm_unreachable("Unsupported shift!"); } void DAGTypeLegalizer::ExpandIntRes_SIGN_EXTEND(SDNode *N, SDValue &Lo, SDValue &Hi) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); DebugLoc dl = N->getDebugLoc(); SDValue Op = N->getOperand(0); if (Op.getValueType().bitsLE(NVT)) { @@ -1874,7 +1822,7 @@ void DAGTypeLegalizer::ExpandIntRes_SIGN_EXTEND(SDNode *N, unsigned ExcessBits = Op.getValueType().getSizeInBits() - NVT.getSizeInBits(); Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi, - DAG.getValueType(MVT::getIntegerVT(ExcessBits))); + DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), ExcessBits))); } } @@ -1882,7 +1830,7 @@ void DAGTypeLegalizer:: ExpandIntRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi) { DebugLoc dl = N->getDebugLoc(); GetExpandedInteger(N->getOperand(0), Lo, Hi); - MVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); + EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); if (EVT.bitsLE(Lo.getValueType())) { // sext_inreg the low part if needed. @@ -1900,13 +1848,13 @@ ExpandIntRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi) { unsigned ExcessBits = EVT.getSizeInBits() - Lo.getValueType().getSizeInBits(); Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi, - DAG.getValueType(MVT::getIntegerVT(ExcessBits))); + DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), ExcessBits))); } } void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N, SDValue &Lo, SDValue &Hi) { - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); DebugLoc dl = N->getDebugLoc(); RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; @@ -1926,7 +1874,7 @@ void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N, void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N, SDValue &Lo, SDValue &Hi) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); DebugLoc dl = N->getDebugLoc(); Lo = DAG.getNode(ISD::TRUNCATE, dl, NVT, N->getOperand(0)); Hi = DAG.getNode(ISD::SRL, dl, @@ -1937,7 +1885,7 @@ void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N, void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N, SDValue &Lo, SDValue &Hi) { - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); DebugLoc dl = N->getDebugLoc(); RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; @@ -1957,7 +1905,7 @@ void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N, void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N, SDValue &Lo, SDValue &Hi) { - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); DebugLoc dl = N->getDebugLoc(); RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; @@ -1977,7 +1925,7 @@ void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N, void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N, SDValue &Lo, SDValue &Hi) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); DebugLoc dl = N->getDebugLoc(); SDValue Op = N->getOperand(0); if (Op.getValueType().bitsLE(NVT)) { @@ -1996,7 +1944,7 @@ void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N, SplitInteger(Res, Lo, Hi); unsigned ExcessBits = Op.getValueType().getSizeInBits() - NVT.getSizeInBits(); - Hi = DAG.getZeroExtendInReg(Hi, dl, MVT::getIntegerVT(ExcessBits)); + Hi = DAG.getZeroExtendInReg(Hi, dl, EVT::getIntegerVT(*DAG.getContext(), ExcessBits)); } } @@ -2010,7 +1958,7 @@ void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N, /// result types of the node are known to be legal, but other operands of the /// node may need promotion or expansion as well as the specified one. bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) { - DEBUG(cerr << "Expand integer operand: "; N->dump(&DAG); cerr << "\n"); + DEBUG(errs() << "Expand integer operand: "; N->dump(&DAG); errs() << "\n"); SDValue Res = SDValue(); if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) @@ -2019,11 +1967,10 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) { switch (N->getOpcode()) { default: #ifndef NDEBUG - cerr << "ExpandIntegerOperand Op #" << OpNo << ": "; - N->dump(&DAG); cerr << "\n"; + errs() << "ExpandIntegerOperand Op #" << OpNo << ": "; + N->dump(&DAG); errs() << "\n"; #endif - assert(0 && "Do not know how to expand this operator's operand!"); - abort(); + llvm_unreachable("Do not know how to expand this operator's operand!"); case ISD::BIT_CONVERT: Res = ExpandOp_BIT_CONVERT(N); break; case ISD::BR_CC: Res = ExpandIntOp_BR_CC(N); break; @@ -2070,7 +2017,7 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS, GetExpandedInteger(NewLHS, LHSLo, LHSHi); GetExpandedInteger(NewRHS, RHSLo, RHSHi); - MVT VT = NewLHS.getValueType(); + EVT VT = NewLHS.getValueType(); if (CCCode == ISD::SETEQ || CCCode == ISD::SETNE) { if (RHSLo == RHSHi) { @@ -2105,7 +2052,7 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS, // FIXME: This generated code sucks. ISD::CondCode LowCC; switch (CCCode) { - default: assert(0 && "Unknown integer setcc!"); + default: llvm_unreachable("Unknown integer setcc!"); case ISD::SETLT: case ISD::SETULT: LowCC = ISD::SETULT; break; case ISD::SETGT: @@ -2122,7 +2069,7 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS, // NOTE: on targets without efficient SELECT of bools, we can always use // this identity: (B1 ? B2 : B3) --> (B1 & B2)|(!B1&B3) - TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, false, true, NULL); + TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, false, true, true, NULL); SDValue Tmp1, Tmp2; Tmp1 = TLI.SimplifySetCC(TLI.getSetCCResultType(LHSLo.getValueType()), LHSLo, RHSLo, LowCC, false, DagCombineInfo, dl); @@ -2228,7 +2175,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_Shift(SDNode *N) { SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) { SDValue Op = N->getOperand(0); - MVT DstVT = N->getValueType(0); + EVT DstVT = N->getValueType(0); RTLIB::Libcall LC = RTLIB::getSINTTOFP(Op.getValueType(), DstVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Don't know how to expand this SINT_TO_FP!"); @@ -2242,8 +2189,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!"); assert(OpNo == 1 && "Can only expand the stored value so far"); - MVT VT = N->getOperand(1).getValueType(); - MVT NVT = TLI.getTypeToTransformTo(VT); + EVT VT = N->getOperand(1).getValueType(); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); SDValue Ch = N->getChain(); SDValue Ptr = N->getBasePtr(); int SVOffset = N->getSrcValueOffset(); @@ -2267,7 +2214,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { unsigned ExcessBits = N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits(); - MVT NEVT = MVT::getIntegerVT(ExcessBits); + EVT NEVT = EVT::getIntegerVT(*DAG.getContext(), ExcessBits); // Increment the pointer to the other half. unsigned IncrementSize = NVT.getSizeInBits()/8; @@ -2282,11 +2229,11 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { // the cost of some bit-fiddling. GetExpandedInteger(N->getValue(), Lo, Hi); - MVT EVT = N->getMemoryVT(); - unsigned EBytes = EVT.getStoreSizeInBits()/8; + EVT ExtVT = N->getMemoryVT(); + unsigned EBytes = ExtVT.getStoreSize(); unsigned IncrementSize = NVT.getSizeInBits()/8; unsigned ExcessBits = (EBytes - IncrementSize)*8; - MVT HiVT = MVT::getIntegerVT(EVT.getSizeInBits() - ExcessBits); + EVT HiVT = EVT::getIntegerVT(*DAG.getContext(), ExtVT.getSizeInBits() - ExcessBits); if (ExcessBits < NVT.getSizeInBits()) { // Transfer high bits from the top of Lo to the bottom of Hi. @@ -2309,7 +2256,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { // Store the lowest ExcessBits bits in the second half. Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset+IncrementSize, - MVT::getIntegerVT(ExcessBits), + EVT::getIntegerVT(*DAG.getContext(), ExcessBits), isVolatile, MinAlign(Alignment, IncrementSize)); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } @@ -2324,8 +2271,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_TRUNCATE(SDNode *N) { SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { SDValue Op = N->getOperand(0); - MVT SrcVT = Op.getValueType(); - MVT DstVT = N->getValueType(0); + EVT SrcVT = Op.getValueType(); + EVT DstVT = N->getValueType(0); DebugLoc dl = N->getDebugLoc(); if (TLI.getOperationAction(ISD::SINT_TO_FP, SrcVT) == TargetLowering::Custom){ @@ -2360,7 +2307,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { ISD::SETLT); // Build a 64 bit pair (0, FF) in the constant pool, with FF in the lo bits. - SDValue FudgePtr = DAG.getConstantPool(ConstantInt::get(FF.zext(64)), + SDValue FudgePtr = DAG.getConstantPool( + ConstantInt::get(*DAG.getContext(), FF.zext(64)), TLI.getPointerTy()); // Get a pointer to FF if the sign bit was set, or to 0 otherwise. diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 3135a44..5992f5d 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -15,9 +15,11 @@ #include "LegalizeTypes.h" #include "llvm/CallingConv.h" +#include "llvm/Target/TargetData.h" #include "llvm/ADT/SetVector.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Target/TargetData.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; static cl::opt<bool> @@ -113,43 +115,43 @@ void DAGTypeLegalizer::PerformExpensiveChecks() { if (I->getNodeId() != Processed) { if (Mapped != 0) { - cerr << "Unprocessed value in a map!"; + errs() << "Unprocessed value in a map!"; Failed = true; } } else if (isTypeLegal(Res.getValueType()) || IgnoreNodeResults(I)) { if (Mapped > 1) { - cerr << "Value with legal type was transformed!"; + errs() << "Value with legal type was transformed!"; Failed = true; } } else { if (Mapped == 0) { - cerr << "Processed value not in any map!"; + errs() << "Processed value not in any map!"; Failed = true; } else if (Mapped & (Mapped - 1)) { - cerr << "Value in multiple maps!"; + errs() << "Value in multiple maps!"; Failed = true; } } if (Failed) { if (Mapped & 1) - cerr << " ReplacedValues"; + errs() << " ReplacedValues"; if (Mapped & 2) - cerr << " PromotedIntegers"; + errs() << " PromotedIntegers"; if (Mapped & 4) - cerr << " SoftenedFloats"; + errs() << " SoftenedFloats"; if (Mapped & 8) - cerr << " ScalarizedVectors"; + errs() << " ScalarizedVectors"; if (Mapped & 16) - cerr << " ExpandedIntegers"; + errs() << " ExpandedIntegers"; if (Mapped & 32) - cerr << " ExpandedFloats"; + errs() << " ExpandedFloats"; if (Mapped & 64) - cerr << " SplitVectors"; + errs() << " SplitVectors"; if (Mapped & 128) - cerr << " WidenedVectors"; - cerr << "\n"; - abort(); + errs() << " WidenedVectors"; + errs() << "\n"; + llvm_unreachable(0); } } } @@ -210,7 +212,7 @@ bool DAGTypeLegalizer::run() { // Scan the values produced by the node, checking to see if any result // types are illegal. for (unsigned i = 0, NumResults = N->getNumValues(); i < NumResults; ++i) { - MVT ResultVT = N->getValueType(i); + EVT ResultVT = N->getValueType(i); switch (getTypeAction(ResultVT)) { default: assert(false && "Unknown action!"); @@ -263,7 +265,7 @@ ScanOperands: if (IgnoreNodeResults(N->getOperand(i).getNode())) continue; - MVT OpVT = N->getOperand(i).getValueType(); + EVT OpVT = N->getOperand(i).getValueType(); switch (getTypeAction(OpVT)) { default: assert(false && "Unknown action!"); @@ -336,7 +338,7 @@ ScanOperands: } if (i == NumOperands) { - DEBUG(cerr << "Legally typed node: "; N->dump(&DAG); cerr << "\n"); + DEBUG(errs() << "Legally typed node: "; N->dump(&DAG); errs() << "\n"); } } NodeDone: @@ -405,7 +407,7 @@ NodeDone: if (!IgnoreNodeResults(I)) for (unsigned i = 0, NumVals = I->getNumValues(); i < NumVals; ++i) if (!isTypeLegal(I->getValueType(i))) { - cerr << "Result type " << i << " illegal!\n"; + errs() << "Result type " << i << " illegal!\n"; Failed = true; } @@ -413,25 +415,25 @@ NodeDone: for (unsigned i = 0, NumOps = I->getNumOperands(); i < NumOps; ++i) if (!IgnoreNodeResults(I->getOperand(i).getNode()) && !isTypeLegal(I->getOperand(i).getValueType())) { - cerr << "Operand type " << i << " illegal!\n"; + errs() << "Operand type " << i << " illegal!\n"; Failed = true; } if (I->getNodeId() != Processed) { if (I->getNodeId() == NewNode) - cerr << "New node not analyzed?\n"; + errs() << "New node not analyzed?\n"; else if (I->getNodeId() == Unanalyzed) - cerr << "Unanalyzed node not noticed?\n"; + errs() << "Unanalyzed node not noticed?\n"; else if (I->getNodeId() > 0) - cerr << "Operand not processed?\n"; + errs() << "Operand not processed?\n"; else if (I->getNodeId() == ReadyToProcess) - cerr << "Not added to worklist?\n"; + errs() << "Not added to worklist?\n"; Failed = true; } if (Failed) { - I->dump(&DAG); cerr << "\n"; - abort(); + I->dump(&DAG); errs() << "\n"; + llvm_unreachable(0); } } #endif @@ -479,8 +481,7 @@ SDNode *DAGTypeLegalizer::AnalyzeNewNode(SDNode *N) { NewOps.push_back(Op); } else if (Op != OrigOp) { // This is the first operand to change - add all operands so far. - for (unsigned j = 0; j < i; ++j) - NewOps.push_back(N->getOperand(j)); + NewOps.insert(NewOps.end(), N->op_begin(), N->op_begin() + i); NewOps.push_back(Op); } } @@ -732,6 +733,8 @@ void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) { } void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) { + assert(Result.getValueType() == TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && + "Invalid type for promoted integer"); AnalyzeNewValue(Result); SDValue &OpEntry = PromotedIntegers[Op]; @@ -740,6 +743,8 @@ void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) { } void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) { + assert(Result.getValueType() == TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && + "Invalid type for softened float"); AnalyzeNewValue(Result); SDValue &OpEntry = SoftenedFloats[Op]; @@ -748,6 +753,8 @@ void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) { } void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) { + assert(Result.getValueType() == Op.getValueType().getVectorElementType() && + "Invalid type for scalarized vector"); AnalyzeNewValue(Result); SDValue &OpEntry = ScalarizedVectors[Op]; @@ -767,6 +774,9 @@ void DAGTypeLegalizer::GetExpandedInteger(SDValue Op, SDValue &Lo, void DAGTypeLegalizer::SetExpandedInteger(SDValue Op, SDValue Lo, SDValue Hi) { + assert(Lo.getValueType() == TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && + Hi.getValueType() == Lo.getValueType() && + "Invalid type for expanded integer"); // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant. AnalyzeNewValue(Lo); AnalyzeNewValue(Hi); @@ -790,6 +800,9 @@ void DAGTypeLegalizer::GetExpandedFloat(SDValue Op, SDValue &Lo, void DAGTypeLegalizer::SetExpandedFloat(SDValue Op, SDValue Lo, SDValue Hi) { + assert(Lo.getValueType() == TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && + Hi.getValueType() == Lo.getValueType() && + "Invalid type for expanded float"); // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant. AnalyzeNewValue(Lo); AnalyzeNewValue(Hi); @@ -813,6 +826,12 @@ void DAGTypeLegalizer::GetSplitVector(SDValue Op, SDValue &Lo, void DAGTypeLegalizer::SetSplitVector(SDValue Op, SDValue Lo, SDValue Hi) { + assert(Lo.getValueType().getVectorElementType() == + Op.getValueType().getVectorElementType() && + 2*Lo.getValueType().getVectorNumElements() == + Op.getValueType().getVectorNumElements() && + Hi.getValueType() == Lo.getValueType() && + "Invalid type for split vector"); // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant. AnalyzeNewValue(Lo); AnalyzeNewValue(Hi); @@ -825,6 +844,8 @@ void DAGTypeLegalizer::SetSplitVector(SDValue Op, SDValue Lo, } void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) { + assert(Result.getValueType() == TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && + "Invalid type for widened vector"); AnalyzeNewValue(Result); SDValue &OpEntry = WidenedVectors[Op]; @@ -841,7 +862,7 @@ void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) { SDValue DAGTypeLegalizer::BitConvertToInteger(SDValue Op) { unsigned BitWidth = Op.getValueType().getSizeInBits(); return DAG.getNode(ISD::BIT_CONVERT, Op.getDebugLoc(), - MVT::getIntegerVT(BitWidth), Op); + EVT::getIntegerVT(*DAG.getContext(), BitWidth), Op); } /// BitConvertVectorToIntegerVector - Convert to a vector of integers of the @@ -849,14 +870,14 @@ SDValue DAGTypeLegalizer::BitConvertToInteger(SDValue Op) { SDValue DAGTypeLegalizer::BitConvertVectorToIntegerVector(SDValue Op) { assert(Op.getValueType().isVector() && "Only applies to vectors!"); unsigned EltWidth = Op.getValueType().getVectorElementType().getSizeInBits(); - MVT EltNVT = MVT::getIntegerVT(EltWidth); + EVT EltNVT = EVT::getIntegerVT(*DAG.getContext(), EltWidth); unsigned NumElts = Op.getValueType().getVectorNumElements(); return DAG.getNode(ISD::BIT_CONVERT, Op.getDebugLoc(), - MVT::getVectorVT(EltNVT, NumElts), Op); + EVT::getVectorVT(*DAG.getContext(), EltNVT, NumElts), Op); } SDValue DAGTypeLegalizer::CreateStackStoreLoad(SDValue Op, - MVT DestVT) { + EVT DestVT) { DebugLoc dl = Op.getDebugLoc(); // Create the stack frame object. Make sure it is aligned for both // the source and destination types. @@ -875,7 +896,7 @@ SDValue DAGTypeLegalizer::CreateStackStoreLoad(SDValue Op, /// The last parameter being TRUE means we are dealing with a /// node with illegal result types. The second parameter denotes the type of /// illegal ResNo in that case. -bool DAGTypeLegalizer::CustomLowerNode(SDNode *N, MVT VT, bool LegalizeResult) { +bool DAGTypeLegalizer::CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult) { // See if the target wants to custom lower this node. if (TLI.getOperationAction(N->getOpcode(), VT) != TargetLowering::Custom) return false; @@ -900,21 +921,14 @@ bool DAGTypeLegalizer::CustomLowerNode(SDNode *N, MVT VT, bool LegalizeResult) { /// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type /// which is split into two not necessarily identical pieces. -void DAGTypeLegalizer::GetSplitDestVTs(MVT InVT, MVT &LoVT, MVT &HiVT) { +void DAGTypeLegalizer::GetSplitDestVTs(EVT InVT, EVT &LoVT, EVT &HiVT) { + // Currently all types are split in half. if (!InVT.isVector()) { - LoVT = HiVT = TLI.getTypeToTransformTo(InVT); + LoVT = HiVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT); } else { - MVT NewEltVT = InVT.getVectorElementType(); unsigned NumElements = InVT.getVectorNumElements(); - if ((NumElements & (NumElements-1)) == 0) { // Simple power of two vector. - NumElements >>= 1; - LoVT = HiVT = MVT::getVectorVT(NewEltVT, NumElements); - } else { // Non-power-of-two vectors. - unsigned NewNumElts_Lo = 1 << Log2_32(NumElements); - unsigned NewNumElts_Hi = NumElements - NewNumElts_Lo; - LoVT = MVT::getVectorVT(NewEltVT, NewNumElts_Lo); - HiVT = MVT::getVectorVT(NewEltVT, NewNumElts_Hi); - } + assert(!(NumElements & 1) && "Splitting vector, but not in half!"); + LoVT = HiVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), NumElements/2); } } @@ -923,14 +937,14 @@ void DAGTypeLegalizer::GetSplitDestVTs(MVT InVT, MVT &LoVT, MVT &HiVT) { void DAGTypeLegalizer::GetPairElements(SDValue Pair, SDValue &Lo, SDValue &Hi) { DebugLoc dl = Pair.getDebugLoc(); - MVT NVT = TLI.getTypeToTransformTo(Pair.getValueType()); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Pair.getValueType()); Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NVT, Pair, DAG.getIntPtrConstant(0)); Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NVT, Pair, DAG.getIntPtrConstant(1)); } -SDValue DAGTypeLegalizer::GetVectorElementPointer(SDValue VecPtr, MVT EltVT, +SDValue DAGTypeLegalizer::GetVectorElementPointer(SDValue VecPtr, EVT EltVT, SDValue Index) { DebugLoc dl = Index.getDebugLoc(); // Make sure the index type is big enough to compute in. @@ -952,9 +966,9 @@ SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) { // Arbitrarily use dlHi for result DebugLoc DebugLoc dlHi = Hi.getDebugLoc(); DebugLoc dlLo = Lo.getDebugLoc(); - MVT LVT = Lo.getValueType(); - MVT HVT = Hi.getValueType(); - MVT NVT = MVT::getIntegerVT(LVT.getSizeInBits() + HVT.getSizeInBits()); + EVT LVT = Lo.getValueType(); + EVT HVT = Hi.getValueType(); + EVT NVT = EVT::getIntegerVT(*DAG.getContext(), LVT.getSizeInBits() + HVT.getSizeInBits()); Lo = DAG.getNode(ISD::ZERO_EXTEND, dlLo, NVT, Lo); Hi = DAG.getNode(ISD::ANY_EXTEND, dlHi, NVT, Hi); @@ -986,7 +1000,7 @@ SDValue DAGTypeLegalizer::LibCallify(RTLIB::Libcall LC, SDNode *N, /// MakeLibCall - Generate a libcall taking the given operands as arguments and /// returning a result of type RetVT. -SDValue DAGTypeLegalizer::MakeLibCall(RTLIB::Libcall LC, MVT RetVT, +SDValue DAGTypeLegalizer::MakeLibCall(RTLIB::Libcall LC, EVT RetVT, const SDValue *Ops, unsigned NumOps, bool isSigned, DebugLoc dl) { TargetLowering::ArgListTy Args; @@ -995,7 +1009,7 @@ SDValue DAGTypeLegalizer::MakeLibCall(RTLIB::Libcall LC, MVT RetVT, TargetLowering::ArgListEntry Entry; for (unsigned i = 0; i != NumOps; ++i) { Entry.Node = Ops[i]; - Entry.Ty = Entry.Node.getValueType().getTypeForMVT(); + Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext()); Entry.isSExt = isSigned; Entry.isZExt = !isSigned; Args.push_back(Entry); @@ -1003,17 +1017,19 @@ SDValue DAGTypeLegalizer::MakeLibCall(RTLIB::Libcall LC, MVT RetVT, SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), TLI.getPointerTy()); - const Type *RetTy = RetVT.getTypeForMVT(); + const Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); std::pair<SDValue,SDValue> CallInfo = TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false, - false, 0, CallingConv::C, false, Callee, Args, DAG, dl); + false, 0, TLI.getLibcallCallingConv(LC), false, + /*isReturnValueUsed=*/true, + Callee, Args, DAG, dl); return CallInfo.first; } /// PromoteTargetBoolean - Promote the given target boolean to a target boolean /// of the given type. A target boolean is an integer value, not necessarily of /// type i1, the bits of which conform to getBooleanContents. -SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, MVT VT) { +SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, EVT VT) { DebugLoc dl = Bool.getDebugLoc(); ISD::NodeType ExtendCode; switch (TLI.getBooleanContents()) { @@ -1039,7 +1055,7 @@ SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, MVT VT) { /// SplitInteger - Return the lower LoVT bits of Op in Lo and the upper HiVT /// bits in Hi. void DAGTypeLegalizer::SplitInteger(SDValue Op, - MVT LoVT, MVT HiVT, + EVT LoVT, EVT HiVT, SDValue &Lo, SDValue &Hi) { DebugLoc dl = Op.getDebugLoc(); assert(LoVT.getSizeInBits() + HiVT.getSizeInBits() == @@ -1054,7 +1070,7 @@ void DAGTypeLegalizer::SplitInteger(SDValue Op, /// type half the size of Op's. void DAGTypeLegalizer::SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi) { - MVT HalfVT = MVT::getIntegerVT(Op.getValueType().getSizeInBits()/2); + EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Op.getValueType().getSizeInBits()/2); SplitInteger(Op, HalfVT, HalfVT, Lo, Hi); } diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 02b0732..859c656 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -64,7 +64,7 @@ private: SoftenFloat, // Convert this float type to a same size integer type. ExpandFloat, // Split this float type into two of half the size. ScalarizeVector, // Replace this one-element vector with its element type. - SplitVector, // This vector type should be split into smaller vectors. + SplitVector, // Split this vector type into two of half the size. WidenVector // This vector type should be widened into a larger vector. }; @@ -74,8 +74,8 @@ private: TargetLowering::ValueTypeActionImpl ValueTypeActions; /// getTypeAction - Return how we should legalize values of this type. - LegalizeAction getTypeAction(MVT VT) const { - switch (ValueTypeActions.getTypeAction(VT)) { + LegalizeAction getTypeAction(EVT VT) const { + switch (ValueTypeActions.getTypeAction(*DAG.getContext(), VT)) { default: assert(false && "Unknown legalize action!"); case TargetLowering::Legal: @@ -96,7 +96,7 @@ private: if (VT.isInteger()) return ExpandInteger; else if (VT.getSizeInBits() == - TLI.getTypeToTransformTo(VT).getSizeInBits()) + TLI.getTypeToTransformTo(*DAG.getContext(), VT).getSizeInBits()) return SoftenFloat; else return ExpandFloat; @@ -109,8 +109,9 @@ private: } /// isTypeLegal - Return true if this type is legal on this target. - bool isTypeLegal(MVT VT) const { - return ValueTypeActions.getTypeAction(VT) == TargetLowering::Legal; + bool isTypeLegal(EVT VT) const { + return (ValueTypeActions.getTypeAction(*DAG.getContext(), VT) == + TargetLowering::Legal); } /// IgnoreNodeResults - Pretend all of this node's results are legal. @@ -185,19 +186,19 @@ private: // Common routines. SDValue BitConvertToInteger(SDValue Op); SDValue BitConvertVectorToIntegerVector(SDValue Op); - SDValue CreateStackStoreLoad(SDValue Op, MVT DestVT); - bool CustomLowerNode(SDNode *N, MVT VT, bool LegalizeResult); - SDValue GetVectorElementPointer(SDValue VecPtr, MVT EltVT, SDValue Index); + SDValue CreateStackStoreLoad(SDValue Op, EVT DestVT); + bool CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult); + SDValue GetVectorElementPointer(SDValue VecPtr, EVT EltVT, SDValue Index); SDValue JoinIntegers(SDValue Lo, SDValue Hi); SDValue LibCallify(RTLIB::Libcall LC, SDNode *N, bool isSigned); - SDValue MakeLibCall(RTLIB::Libcall LC, MVT RetVT, + SDValue MakeLibCall(RTLIB::Libcall LC, EVT RetVT, const SDValue *Ops, unsigned NumOps, bool isSigned, DebugLoc dl); - SDValue PromoteTargetBoolean(SDValue Bool, MVT VT); + SDValue PromoteTargetBoolean(SDValue Bool, EVT VT); void ReplaceValueWith(SDValue From, SDValue To); void ReplaceValueWithHelper(SDValue From, SDValue To); void SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi); - void SplitInteger(SDValue Op, MVT LoVT, MVT HiVT, + void SplitInteger(SDValue Op, EVT LoVT, EVT HiVT, SDValue &Lo, SDValue &Hi); //===--------------------------------------------------------------------===// @@ -224,7 +225,7 @@ private: /// SExtPromotedInteger - Get a promoted operand and sign extend it to the /// final size. SDValue SExtPromotedInteger(SDValue Op) { - MVT OldVT = Op.getValueType(); + EVT OldVT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); Op = GetPromotedInteger(Op); return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Op.getValueType(), Op, @@ -234,7 +235,7 @@ private: /// ZExtPromotedInteger - Get a promoted operand and zero extend it to the /// final size. SDValue ZExtPromotedInteger(SDValue Op) { - MVT OldVT = Op.getValueType(); + EVT OldVT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); Op = GetPromotedInteger(Op); return DAG.getZeroExtendInReg(Op, dl, OldVT); @@ -506,7 +507,6 @@ private: // Vector Result Scalarization: <1 x ty> -> ty. void ScalarizeVectorResult(SDNode *N, unsigned OpNo); SDValue ScalarizeVecRes_BinOp(SDNode *N); - SDValue ScalarizeVecRes_ShiftOp(SDNode *N); SDValue ScalarizeVecRes_UnaryOp(SDNode *N); SDValue ScalarizeVecRes_BIT_CONVERT(SDNode *N); @@ -518,6 +518,7 @@ private: SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N); SDValue ScalarizeVecRes_SELECT(SDNode *N); SDValue ScalarizeVecRes_SELECT_CC(SDNode *N); + SDValue ScalarizeVecRes_SETCC(SDNode *N); SDValue ScalarizeVecRes_UNDEF(SDNode *N); SDValue ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N); SDValue ScalarizeVecRes_VSETCC(SDNode *N); @@ -533,8 +534,8 @@ private: // Vector Splitting Support: LegalizeVectorTypes.cpp //===--------------------------------------------------------------------===// - /// GetSplitVector - Given a processed vector Op which was split into smaller - /// vectors, this method returns the smaller vectors. The first elements of + /// GetSplitVector - Given a processed vector Op which was split into vectors + /// of half the size, this method returns the halves. The first elements of /// Op coincide with the elements of Lo; the remaining elements of Op coincide /// with the elements of Hi: Op is what you would get by concatenating Lo and /// Hi. For example, if Op is a v8i32 that was split into two v4i32's, then @@ -558,10 +559,10 @@ private: void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi); - void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo, + void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo, SDValue &Hi); - void SplitVecRes_VSETCC(SDNode *N, SDValue &Lo, SDValue &Hi); // Vector Operand Splitting: <128 x ty> -> 2 x <64 x ty>. bool SplitVectorOperand(SDNode *N, unsigned OpNo); @@ -641,7 +642,7 @@ private: SDValue BasePtr, const Value *SV, int SVOffset, unsigned Alignment, bool isVolatile, unsigned LdWidth, - MVT ResType, DebugLoc dl); + EVT ResType, DebugLoc dl); /// Helper genWidenVectorStores - Helper function to generate a set of /// stores to store a widen vector into non widen memory @@ -664,7 +665,7 @@ private: /// Modifies a vector input (widen or narrows) to a vector of NVT. The /// input vector must have the same element type as NVT. - SDValue ModifyToType(SDValue InOp, MVT WidenVT); + SDValue ModifyToType(SDValue InOp, EVT WidenVT); //===--------------------------------------------------------------------===// @@ -686,7 +687,7 @@ private: /// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type /// which is split (or expanded) into two not necessarily identical pieces. - void GetSplitDestVTs(MVT InVT, MVT &LoVT, MVT &HiVT); + void GetSplitDestVTs(EVT InVT, EVT &LoVT, EVT &HiVT); /// GetPairElements - Use ISD::EXTRACT_ELEMENT nodes to extract the low and /// high parts of the given value. diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index 6e5adee..0eafe62 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -11,9 +11,11 @@ // The routines here perform legalization when the details of the type (such as // whether it is an integer or a float) do not matter. // Expansion is the act of changing a computation in an illegal type to be a -// computation in two identical registers of a smaller type. +// computation in two identical registers of a smaller type. The Lo/Hi part +// is required to be stored first in memory on little/big-endian machines. // Splitting is the act of changing a computation in an illegal type to be a // computation in two not necessarily identical registers of a smaller type. +// There are no requirements on how the type is represented in memory. // //===----------------------------------------------------------------------===// @@ -32,10 +34,10 @@ using namespace llvm; void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo, SDValue &Hi) { - MVT OutVT = N->getValueType(0); - MVT NOutVT = TLI.getTypeToTransformTo(OutVT); + EVT OutVT = N->getValueType(0); + EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); SDValue InOp = N->getOperand(0); - MVT InVT = InOp.getValueType(); + EVT InVT = InOp.getValueType(); DebugLoc dl = N->getDebugLoc(); // Handle some special cases efficiently. @@ -59,16 +61,12 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo, Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi); return; case SplitVector: - // Convert the split parts of the input if it was split in two. GetSplitVector(InOp, Lo, Hi); - if (Lo.getValueType() == Hi.getValueType()) { - if (TLI.isBigEndian()) - std::swap(Lo, Hi); - Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo); - Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi); - return; - } - break; + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo); + Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi); + return; case ScalarizeVector: // Convert the element instead. SplitInteger(BitConvertToInteger(GetScalarizedVector(InOp)), Lo, Hi); @@ -78,7 +76,7 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo, case WidenVector: { assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BIT_CONVERT"); InOp = GetWidenedVector(InOp); - MVT InNVT = MVT::getVectorVT(InVT.getVectorElementType(), + EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), InVT.getVectorNumElements()/2); Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, DAG.getIntPtrConstant(0)); @@ -95,7 +93,7 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo, if (InVT.isVector() && OutVT.isInteger()) { // Handle cases like i64 = BIT_CONVERT v1i64 on x86, where the operand // is legal but the result is not. - MVT NVT = MVT::getVectorVT(NOutVT, 2); + EVT NVT = EVT::getVectorVT(*DAG.getContext(), NOutVT, 2); if (isTypeLegal(NVT)) { SDValue CastInOp = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, InOp); @@ -106,7 +104,7 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo, if (TLI.isBigEndian()) std::swap(Lo, Hi); - + return; } } @@ -117,7 +115,7 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo, // Create the stack frame object. Make sure it is aligned for both // the source and expanded destination types. unsigned Alignment = - TLI.getTargetData()->getPrefTypeAlignment(NOutVT.getTypeForMVT()); + TLI.getTargetData()->getPrefTypeAlignment(NOutVT.getTypeForEVT(*DAG.getContext())); SDValue StackPtr = DAG.CreateStackTemporary(InVT, Alignment); int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); const Value *SV = PseudoSourceValue::getFixedStack(SPFI); @@ -169,11 +167,11 @@ void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, // Convert to a vector of the expanded element type, for example // <3 x i64> -> <6 x i32>. - MVT OldVT = N->getValueType(0); - MVT NewVT = TLI.getTypeToTransformTo(OldVT); + EVT OldVT = N->getValueType(0); + EVT NewVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldVT); SDValue NewVec = DAG.getNode(ISD::BIT_CONVERT, dl, - MVT::getVectorVT(NewVT, 2*OldElts), + EVT::getVectorVT(*DAG.getContext(), NewVT, 2*OldElts), OldVec); // Extract the elements at 2 * Idx and 2 * Idx + 1 from the new vector. @@ -200,7 +198,7 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, DebugLoc dl = N->getDebugLoc(); LoadSDNode *LD = cast<LoadSDNode>(N); - MVT NVT = TLI.getTypeToTransformTo(LD->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0)); SDValue Chain = LD->getChain(); SDValue Ptr = LD->getBasePtr(); int SVOffset = LD->getSrcValueOffset(); @@ -235,7 +233,7 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, } void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Chain = N->getOperand(0); SDValue Ptr = N->getOperand(1); DebugLoc dl = N->getDebugLoc(); @@ -265,8 +263,8 @@ SDValue DAGTypeLegalizer::ExpandOp_BIT_CONVERT(SDNode *N) { // instead, but only if the new vector type is legal (otherwise there // is no point, and it might create expansion loops). For example, on // x86 this turns v1i64 = BIT_CONVERT i64 into v1i64 = BIT_CONVERT v2i32. - MVT OVT = N->getOperand(0).getValueType(); - MVT NVT = MVT::getVectorVT(TLI.getTypeToTransformTo(OVT), 2); + EVT OVT = N->getOperand(0).getValueType(); + EVT NVT = EVT::getVectorVT(*DAG.getContext(), TLI.getTypeToTransformTo(*DAG.getContext(), OVT), 2); if (isTypeLegal(NVT)) { SDValue Parts[2]; @@ -286,10 +284,10 @@ SDValue DAGTypeLegalizer::ExpandOp_BIT_CONVERT(SDNode *N) { SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) { // The vector type is legal but the element type needs expansion. - MVT VecVT = N->getValueType(0); + EVT VecVT = N->getValueType(0); unsigned NumElts = VecVT.getVectorNumElements(); - MVT OldVT = N->getOperand(0).getValueType(); - MVT NewVT = TLI.getTypeToTransformTo(OldVT); + EVT OldVT = N->getOperand(0).getValueType(); + EVT NewVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldVT); DebugLoc dl = N->getDebugLoc(); assert(OldVT == VecVT.getVectorElementType() && @@ -310,7 +308,7 @@ SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) { } SDValue NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl, - MVT::getVectorVT(NewVT, NewElts.size()), + EVT::getVectorVT(*DAG.getContext(), NewVT, NewElts.size()), &NewElts[0], NewElts.size()); // Convert the new vector to the old vector type. @@ -325,20 +323,20 @@ SDValue DAGTypeLegalizer::ExpandOp_EXTRACT_ELEMENT(SDNode *N) { SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) { // The vector type is legal but the element type needs expansion. - MVT VecVT = N->getValueType(0); + EVT VecVT = N->getValueType(0); unsigned NumElts = VecVT.getVectorNumElements(); DebugLoc dl = N->getDebugLoc(); SDValue Val = N->getOperand(1); - MVT OldEVT = Val.getValueType(); - MVT NewEVT = TLI.getTypeToTransformTo(OldEVT); + EVT OldEVT = Val.getValueType(); + EVT NewEVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldEVT); assert(OldEVT == VecVT.getVectorElementType() && "Inserted element type doesn't match vector element type!"); // Bitconvert to a vector of twice the length with elements of the expanded // type, insert the expanded vector elements, and then convert back. - MVT NewVecVT = MVT::getVectorVT(NewEVT, NumElts*2); + EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewEVT, NumElts*2); SDValue NewVec = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, N->getOperand(0)); @@ -360,7 +358,7 @@ SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) { SDValue DAGTypeLegalizer::ExpandOp_SCALAR_TO_VECTOR(SDNode *N) { DebugLoc dl = N->getDebugLoc(); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); assert(VT.getVectorElementType() == N->getOperand(0).getValueType() && "SCALAR_TO_VECTOR operand type doesn't match vector element type!"); unsigned NumElts = VT.getVectorNumElements(); @@ -378,7 +376,7 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { DebugLoc dl = N->getDebugLoc(); StoreSDNode *St = cast<StoreSDNode>(N); - MVT NVT = TLI.getTypeToTransformTo(St->getValue().getValueType()); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), St->getValue().getValueType()); SDValue Chain = St->getChain(); SDValue Ptr = St->getBasePtr(); int SVOffset = St->getSrcValueOffset(); @@ -464,7 +462,7 @@ void DAGTypeLegalizer::SplitRes_SELECT_CC(SDNode *N, SDValue &Lo, } void DAGTypeLegalizer::SplitRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi) { - MVT LoVT, HiVT; + EVT LoVT, HiVT; DebugLoc dl = N->getDebugLoc(); GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); Lo = DAG.getUNDEF(LoVT); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 335c73c..ca19430 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -20,8 +20,8 @@ // type i8 which must be promoted. // // This does not legalize vector manipulations like ISD::BUILD_VECTOR, -// or operations that happen to take a vector which are custom-lowered like -// ISD::CALL; the legalization for such operations never produces nodes +// or operations that happen to take a vector which are custom-lowered; +// the legalization for such operations never produces nodes // with illegal types, so it's okay to put off legalizing them until // SelectionDAG::Legalize runs. // @@ -129,7 +129,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { if (!HasVectorValue) return TranslateLegalizeResults(Op, Result); - MVT QueryType; + EVT QueryType; switch (Op.getOpcode()) { default: return TranslateLegalizeResults(Op, Result); @@ -231,10 +231,10 @@ SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) { // Vector "promotion" is basically just bitcasting and doing the operation // in a different type. For example, x86 promotes ISD::AND on v2i32 to // v1i64. - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); assert(Op.getNode()->getNumValues() == 1 && "Can't promote a vector with multiple results!"); - MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT); + EVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT); DebugLoc dl = Op.getDebugLoc(); SmallVector<SDValue, 4> Operands(Op.getNumOperands()); @@ -260,11 +260,11 @@ SDValue VectorLegalizer::ExpandFNEG(SDValue Op) { } SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) { - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); unsigned NumElems = VT.getVectorNumElements(); - MVT EltVT = VT.getVectorElementType(); + EVT EltVT = VT.getVectorElementType(); SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1), CC = Op.getOperand(2); - MVT TmpEltVT = LHS.getValueType().getVectorElementType(); + EVT TmpEltVT = LHS.getValueType().getVectorElementType(); DebugLoc dl = Op.getDebugLoc(); SmallVector<SDValue, 8> Ops(NumElems); for (unsigned i = 0; i < NumElems; ++i) { @@ -287,11 +287,11 @@ SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) { /// the operation be expanded. "Unroll" the vector, splitting out the scalars /// and operating on each element individually. SDValue VectorLegalizer::UnrollVectorOp(SDValue Op) { - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); assert(Op.getNode()->getNumValues() == 1 && "Can't unroll a vector with multiple results!"); unsigned NE = VT.getVectorNumElements(); - MVT EltVT = VT.getVectorElementType(); + EVT EltVT = VT.getVectorElementType(); DebugLoc dl = Op.getDebugLoc(); SmallVector<SDValue, 8> Scalars; @@ -299,10 +299,10 @@ SDValue VectorLegalizer::UnrollVectorOp(SDValue Op) { for (unsigned i = 0; i != NE; ++i) { for (unsigned j = 0; j != Op.getNumOperands(); ++j) { SDValue Operand = Op.getOperand(j); - MVT OperandVT = Operand.getValueType(); + EVT OperandVT = Operand.getValueType(); if (OperandVT.isVector()) { // A vector operand; extract a single element. - MVT OperandEltVT = OperandVT.getVectorElementType(); + EVT OperandEltVT = OperandVT.getVectorElementType(); Operands[j] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, OperandEltVT, Operand, diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 68967cc..a03f825 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -15,14 +15,16 @@ // eventually decomposes to scalars if the target doesn't support v4f32 or v2f32 // types. // Splitting is the act of changing a computation in an invalid vector type to -// be a computation in multiple vectors of a smaller type. For example, -// implementing <128 x f32> operations in terms of two <64 x f32> operations. +// be a computation in two vectors of half the size. For example, implementing +// <128 x f32> operations in terms of two <64 x f32> operations. // //===----------------------------------------------------------------------===// #include "LegalizeTypes.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Target/TargetData.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; //===----------------------------------------------------------------------===// @@ -30,18 +32,19 @@ using namespace llvm; //===----------------------------------------------------------------------===// void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { - DEBUG(cerr << "Scalarize node result " << ResNo << ": "; N->dump(&DAG); - cerr << "\n"); + DEBUG(errs() << "Scalarize node result " << ResNo << ": "; + N->dump(&DAG); + errs() << "\n"); SDValue R = SDValue(); switch (N->getOpcode()) { default: #ifndef NDEBUG - cerr << "ScalarizeVectorResult #" << ResNo << ": "; - N->dump(&DAG); cerr << "\n"; + errs() << "ScalarizeVectorResult #" << ResNo << ": "; + N->dump(&DAG); + errs() << "\n"; #endif - assert(0 && "Do not know how to scalarize the result of this operator!"); - abort(); + llvm_unreachable("Do not know how to scalarize the result of this operator!"); case ISD::BIT_CONVERT: R = ScalarizeVecRes_BIT_CONVERT(N); break; case ISD::BUILD_VECTOR: R = N->getOperand(0); break; @@ -53,6 +56,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break; case ISD::SELECT: R = ScalarizeVecRes_SELECT(N); break; case ISD::SELECT_CC: R = ScalarizeVecRes_SELECT_CC(N); break; + case ISD::SETCC: R = ScalarizeVecRes_SETCC(N); break; case ISD::UNDEF: R = ScalarizeVecRes_UNDEF(N); break; case ISD::VECTOR_SHUFFLE: R = ScalarizeVecRes_VECTOR_SHUFFLE(N); break; case ISD::VSETCC: R = ScalarizeVecRes_VSETCC(N); break; @@ -72,9 +76,14 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::FCEIL: case ISD::FRINT: case ISD::FNEARBYINT: + case ISD::UINT_TO_FP: case ISD::SINT_TO_FP: case ISD::TRUNCATE: - case ISD::UINT_TO_FP: R = ScalarizeVecRes_UnaryOp(N); break; + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::ANY_EXTEND: + R = ScalarizeVecRes_UnaryOp(N); + break; case ISD::ADD: case ISD::AND: @@ -91,11 +100,12 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::SUB: case ISD::UDIV: case ISD::UREM: - case ISD::XOR: R = ScalarizeVecRes_BinOp(N); break; - + case ISD::XOR: case ISD::SHL: case ISD::SRA: - case ISD::SRL: R = ScalarizeVecRes_ShiftOp(N); break; + case ISD::SRL: + R = ScalarizeVecRes_BinOp(N); + break; } // If R is null, the sub-method took care of registering the result. @@ -110,21 +120,14 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_BinOp(SDNode *N) { LHS.getValueType(), LHS, RHS); } -SDValue DAGTypeLegalizer::ScalarizeVecRes_ShiftOp(SDNode *N) { - SDValue LHS = GetScalarizedVector(N->getOperand(0)); - SDValue ShiftAmt = GetScalarizedVector(N->getOperand(1)); - return DAG.getNode(N->getOpcode(), N->getDebugLoc(), - LHS.getValueType(), LHS, ShiftAmt); -} - SDValue DAGTypeLegalizer::ScalarizeVecRes_BIT_CONVERT(SDNode *N) { - MVT NewVT = N->getValueType(0).getVectorElementType(); + EVT NewVT = N->getValueType(0).getVectorElementType(); return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), NewVT, N->getOperand(0)); } SDValue DAGTypeLegalizer::ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N) { - MVT NewVT = N->getValueType(0).getVectorElementType(); + EVT NewVT = N->getValueType(0).getVectorElementType(); SDValue Op0 = GetScalarizedVector(N->getOperand(0)); return DAG.getConvertRndSat(NewVT, N->getDebugLoc(), Op0, DAG.getValueType(NewVT), @@ -150,7 +153,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) { // The value to insert may have a wider type than the vector element type, // so be sure to truncate it to the element type if necessary. SDValue Op = N->getOperand(1); - MVT EltVT = N->getValueType(0).getVectorElementType(); + EVT EltVT = N->getValueType(0).getVectorElementType(); if (Op.getValueType() != EltVT) // FIXME: Can this happen for floating point types? Op = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), EltVT, Op); @@ -167,7 +170,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) { DAG.getUNDEF(N->getBasePtr().getValueType()), N->getSrcValue(), N->getSrcValueOffset(), N->getMemoryVT().getVectorElementType(), - N->isVolatile(), N->getAlignment()); + N->isVolatile(), N->getOriginalAlignment()); // Legalized the chain result - switch anything that used the old chain to // use the new one. @@ -177,7 +180,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) { SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) { // Get the dest type - it doesn't always match the input type, e.g. int_to_fp. - MVT DestVT = N->getValueType(0).getVectorElementType(); + EVT DestVT = N->getValueType(0).getVectorElementType(); SDValue Op = GetScalarizedVector(N->getOperand(0)); return DAG.getNode(N->getOpcode(), N->getDebugLoc(), DestVT, Op); } @@ -185,7 +188,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) { SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) { // If the operand is wider than the vector element type then it is implicitly // truncated. Make that explicit here. - MVT EltVT = N->getValueType(0).getVectorElementType(); + EVT EltVT = N->getValueType(0).getVectorElementType(); SDValue InOp = N->getOperand(0); if (InOp.getValueType() != EltVT) return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), EltVT, InOp); @@ -207,6 +210,15 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT_CC(SDNode *N) { N->getOperand(4)); } +SDValue DAGTypeLegalizer::ScalarizeVecRes_SETCC(SDNode *N) { + SDValue LHS = GetScalarizedVector(N->getOperand(0)); + SDValue RHS = GetScalarizedVector(N->getOperand(1)); + DebugLoc DL = N->getDebugLoc(); + + // Turn it into a scalar SETCC. + return DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, N->getOperand(2)); +} + SDValue DAGTypeLegalizer::ScalarizeVecRes_UNDEF(SDNode *N) { return DAG.getUNDEF(N->getValueType(0).getVectorElementType()); } @@ -223,12 +235,12 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N) { SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) { SDValue LHS = GetScalarizedVector(N->getOperand(0)); SDValue RHS = GetScalarizedVector(N->getOperand(1)); - MVT NVT = N->getValueType(0).getVectorElementType(); - MVT SVT = TLI.getSetCCResultType(LHS.getValueType()); - DebugLoc dl = N->getDebugLoc(); + EVT NVT = N->getValueType(0).getVectorElementType(); + EVT SVT = TLI.getSetCCResultType(LHS.getValueType()); + DebugLoc DL = N->getDebugLoc(); // Turn it into a scalar SETCC. - SDValue Res = DAG.getNode(ISD::SETCC, dl, SVT, LHS, RHS, N->getOperand(2)); + SDValue Res = DAG.getNode(ISD::SETCC, DL, SVT, LHS, RHS, N->getOperand(2)); // VSETCC always returns a sign-extended value, while SETCC may not. The // SETCC result type may not match the vector element type. Correct these. @@ -237,19 +249,19 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) { // Ensure the SETCC result is sign-extended. if (TLI.getBooleanContents() != TargetLowering::ZeroOrNegativeOneBooleanContent) - Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, SVT, Res, + Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, SVT, Res, DAG.getValueType(MVT::i1)); // Truncate to the final type. - return DAG.getNode(ISD::TRUNCATE, dl, NVT, Res); - } else { - // The SETCC result type is smaller than the vector element type. - // If the SetCC result is not sign-extended, chop it down to MVT::i1. - if (TLI.getBooleanContents() != - TargetLowering::ZeroOrNegativeOneBooleanContent) - Res = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Res); - // Sign extend to the final type. - return DAG.getNode(ISD::SIGN_EXTEND, dl, NVT, Res); + return DAG.getNode(ISD::TRUNCATE, DL, NVT, Res); } + + // The SETCC result type is smaller than the vector element type. + // If the SetCC result is not sign-extended, chop it down to MVT::i1. + if (TLI.getBooleanContents() != + TargetLowering::ZeroOrNegativeOneBooleanContent) + Res = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Res); + // Sign extend to the final type. + return DAG.getNode(ISD::SIGN_EXTEND, DL, NVT, Res); } @@ -258,31 +270,32 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) { //===----------------------------------------------------------------------===// bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { - DEBUG(cerr << "Scalarize node operand " << OpNo << ": "; N->dump(&DAG); - cerr << "\n"); + DEBUG(errs() << "Scalarize node operand " << OpNo << ": "; + N->dump(&DAG); + errs() << "\n"); SDValue Res = SDValue(); if (Res.getNode() == 0) { switch (N->getOpcode()) { default: #ifndef NDEBUG - cerr << "ScalarizeVectorOperand Op #" << OpNo << ": "; - N->dump(&DAG); cerr << "\n"; + errs() << "ScalarizeVectorOperand Op #" << OpNo << ": "; + N->dump(&DAG); + errs() << "\n"; #endif - assert(0 && "Do not know how to scalarize this operator's operand!"); - abort(); - + llvm_unreachable("Do not know how to scalarize this operator's operand!"); case ISD::BIT_CONVERT: - Res = ScalarizeVecOp_BIT_CONVERT(N); break; - + Res = ScalarizeVecOp_BIT_CONVERT(N); + break; case ISD::CONCAT_VECTORS: - Res = ScalarizeVecOp_CONCAT_VECTORS(N); break; - + Res = ScalarizeVecOp_CONCAT_VECTORS(N); + break; case ISD::EXTRACT_VECTOR_ELT: - Res = ScalarizeVecOp_EXTRACT_VECTOR_ELT(N); break; - + Res = ScalarizeVecOp_EXTRACT_VECTOR_ELT(N); + break; case ISD::STORE: - Res = ScalarizeVecOp_STORE(cast<StoreSDNode>(N), OpNo); break; + Res = ScalarizeVecOp_STORE(cast<StoreSDNode>(N), OpNo); + break; } } @@ -323,7 +336,11 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) { /// be scalarized, it must be <1 x ty>, so just return the element, ignoring the /// index. SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { - return GetScalarizedVector(N->getOperand(0)); + SDValue Res = GetScalarizedVector(N->getOperand(0)); + if (Res.getValueType() != N->getValueType(0)) + Res = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), N->getValueType(0), + Res); + return Res; } /// ScalarizeVecOp_STORE - If the value to store is a vector that needs to be @@ -343,7 +360,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){ return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)), N->getBasePtr(), N->getSrcValue(), N->getSrcValueOffset(), - N->isVolatile(), N->getAlignment()); + N->isVolatile(), N->getOriginalAlignment()); } @@ -357,17 +374,19 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){ /// legalization, we just know that (at least) one result needs vector /// splitting. void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { - DEBUG(cerr << "Split node result: "; N->dump(&DAG); cerr << "\n"); + DEBUG(errs() << "Split node result: "; + N->dump(&DAG); + errs() << "\n"); SDValue Lo, Hi; switch (N->getOpcode()) { default: #ifndef NDEBUG - cerr << "SplitVectorResult #" << ResNo << ": "; - N->dump(&DAG); cerr << "\n"; + errs() << "SplitVectorResult #" << ResNo << ": "; + N->dump(&DAG); + errs() << "\n"; #endif - assert(0 && "Do not know how to split the result of this operator!"); - abort(); + llvm_unreachable("Do not know how to split the result of this operator!"); case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, Lo, Hi); break; case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break; @@ -382,10 +401,16 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break; case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break; case ISD::SCALAR_TO_VECTOR: SplitVecRes_SCALAR_TO_VECTOR(N, Lo, Hi); break; - case ISD::LOAD: SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi);break; + case ISD::LOAD: + SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); + break; + case ISD::SETCC: + case ISD::VSETCC: + SplitVecRes_SETCC(N, Lo, Hi); + break; case ISD::VECTOR_SHUFFLE: - SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi); break; - case ISD::VSETCC: SplitVecRes_VSETCC(N, Lo, Hi); break; + SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi); + break; case ISD::CTTZ: case ISD::CTLZ: @@ -403,8 +428,13 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: case ISD::TRUNCATE: - case ISD::UINT_TO_FP: SplitVecRes_UnaryOp(N, Lo, Hi); break; + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::ANY_EXTEND: + SplitVecRes_UnaryOp(N, Lo, Hi); + break; case ISD::ADD: case ISD::SUB: @@ -424,7 +454,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::SRL: case ISD::UREM: case ISD::SREM: - case ISD::FREM: SplitVecRes_BinOp(N, Lo, Hi); break; + case ISD::FREM: + SplitVecRes_BinOp(N, Lo, Hi); + break; } // If Lo/Hi is null, the sub-method took care of registering results etc. @@ -448,12 +480,12 @@ void DAGTypeLegalizer::SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo, SDValue &Hi) { // We know the result is a vector. The input may be either a vector or a // scalar value. - MVT LoVT, HiVT; + EVT LoVT, HiVT; GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); DebugLoc dl = N->getDebugLoc(); SDValue InOp = N->getOperand(0); - MVT InVT = InOp.getValueType(); + EVT InVT = InOp.getValueType(); // Handle some special cases efficiently. switch (getTypeAction(InVT)) { @@ -488,8 +520,8 @@ void DAGTypeLegalizer::SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo, } // In the general case, convert the input to an integer and split it by hand. - MVT LoIntVT = MVT::getIntegerVT(LoVT.getSizeInBits()); - MVT HiIntVT = MVT::getIntegerVT(HiVT.getSizeInBits()); + EVT LoIntVT = EVT::getIntegerVT(*DAG.getContext(), LoVT.getSizeInBits()); + EVT HiIntVT = EVT::getIntegerVT(*DAG.getContext(), HiVT.getSizeInBits()); if (TLI.isBigEndian()) std::swap(LoIntVT, HiIntVT); @@ -503,7 +535,7 @@ void DAGTypeLegalizer::SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo, void DAGTypeLegalizer::SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi) { - MVT LoVT, HiVT; + EVT LoVT, HiVT; DebugLoc dl = N->getDebugLoc(); GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); unsigned LoNumElts = LoVT.getVectorNumElements(); @@ -525,7 +557,7 @@ void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, return; } - MVT LoVT, HiVT; + EVT LoVT, HiVT; GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+NumSubvectors); @@ -537,7 +569,7 @@ void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, void DAGTypeLegalizer::SplitVecRes_CONVERT_RNDSAT(SDNode *N, SDValue &Lo, SDValue &Hi) { - MVT LoVT, HiVT; + EVT LoVT, HiVT; DebugLoc dl = N->getDebugLoc(); GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); @@ -550,12 +582,11 @@ void DAGTypeLegalizer::SplitVecRes_CONVERT_RNDSAT(SDNode *N, SDValue &Lo, // Split the input. SDValue VLo, VHi; - MVT InVT = N->getOperand(0).getValueType(); + EVT InVT = N->getOperand(0).getValueType(); switch (getTypeAction(InVT)) { - default: assert(0 && "Unexpected type action!"); + default: llvm_unreachable("Unexpected type action!"); case Legal: { - assert(LoVT == HiVT && "Legal non-power-of-two vector type?"); - MVT InNVT = MVT::getVectorVT(InVT.getVectorElementType(), + EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), LoVT.getVectorNumElements()); VLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0), DAG.getIntPtrConstant(0)); @@ -570,9 +601,8 @@ void DAGTypeLegalizer::SplitVecRes_CONVERT_RNDSAT(SDNode *N, SDValue &Lo, // If the result needs to be split and the input needs to be widened, // the two types must have different lengths. Use the widened result // and extract from it to do the split. - assert(LoVT == HiVT && "Legal non-power-of-two vector type?"); SDValue InOp = GetWidenedVector(N->getOperand(0)); - MVT InNVT = MVT::getVectorVT(InVT.getVectorElementType(), + EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), LoVT.getVectorNumElements()); VLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, DAG.getIntPtrConstant(0)); @@ -595,14 +625,11 @@ void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Vec = N->getOperand(0); SDValue Idx = N->getOperand(1); - MVT IdxVT = Idx.getValueType(); + EVT IdxVT = Idx.getValueType(); DebugLoc dl = N->getDebugLoc(); - MVT LoVT, HiVT; + EVT LoVT, HiVT; GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); - // The indices are not guaranteed to be a multiple of the new vector - // size unless the original vector type was split in two. - assert(LoVT == HiVT && "Non power-of-two vectors not supported!"); Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx); Idx = DAG.getNode(ISD::ADD, dl, IdxVT, Idx, @@ -639,8 +666,8 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, } // Spill the vector to the stack. - MVT VecVT = Vec.getValueType(); - MVT EltVT = VecVT.getVectorElementType(); + EVT VecVT = Vec.getValueType(); + EVT EltVT = VecVT.getVectorElementType(); SDValue StackPtr = DAG.CreateStackTemporary(VecVT); SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, NULL, 0); @@ -648,7 +675,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, // so use a truncating store. SDValue EltPtr = GetVectorElementPointer(StackPtr, EltVT, Idx); unsigned Alignment = - TLI.getTargetData()->getPrefTypeAlignment(VecVT.getTypeForMVT()); + TLI.getTargetData()->getPrefTypeAlignment(VecVT.getTypeForEVT(*DAG.getContext())); Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, NULL, 0, EltVT); // Load the Lo part from the stack slot. @@ -666,7 +693,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi) { - MVT LoVT, HiVT; + EVT LoVT, HiVT; DebugLoc dl = N->getDebugLoc(); GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); Lo = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoVT, N->getOperand(0)); @@ -676,7 +703,7 @@ void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi) { assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type legalization!"); - MVT LoVT, HiVT; + EVT LoVT, HiVT; DebugLoc dl = LD->getDebugLoc(); GetSplitDestVTs(LD->getValueType(0), LoVT, HiVT); @@ -686,11 +713,11 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue Offset = DAG.getUNDEF(Ptr.getValueType()); const Value *SV = LD->getSrcValue(); int SVOffset = LD->getSrcValueOffset(); - MVT MemoryVT = LD->getMemoryVT(); - unsigned Alignment = LD->getAlignment(); + EVT MemoryVT = LD->getMemoryVT(); + unsigned Alignment = LD->getOriginalAlignment(); bool isVolatile = LD->isVolatile(); - MVT LoMemVT, HiMemVT; + EVT LoMemVT, HiMemVT; GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT); Lo = DAG.getLoad(ISD::UNINDEXED, dl, ExtType, LoVT, Ch, Ptr, Offset, @@ -700,7 +727,6 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getIntPtrConstant(IncrementSize)); SVOffset += IncrementSize; - Alignment = MinAlign(Alignment, IncrementSize); Hi = DAG.getLoad(ISD::UNINDEXED, dl, ExtType, HiVT, Ch, Ptr, Offset, SV, SVOffset, HiMemVT, isVolatile, Alignment); @@ -714,20 +740,43 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, ReplaceValueWith(SDValue(LD, 1), Ch); } +void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) { + EVT LoVT, HiVT; + DebugLoc DL = N->getDebugLoc(); + GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + + // Split the input. + EVT InVT = N->getOperand(0).getValueType(); + SDValue LL, LH, RL, RH; + EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), + LoVT.getVectorNumElements()); + LL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(0), + DAG.getIntPtrConstant(0)); + LH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(0), + DAG.getIntPtrConstant(InNVT.getVectorNumElements())); + + RL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(1), + DAG.getIntPtrConstant(0)); + RH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(1), + DAG.getIntPtrConstant(InNVT.getVectorNumElements())); + + Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2)); + Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2)); +} + void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi) { // Get the dest types - they may not match the input types, e.g. int_to_fp. - MVT LoVT, HiVT; + EVT LoVT, HiVT; DebugLoc dl = N->getDebugLoc(); GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); // Split the input. - MVT InVT = N->getOperand(0).getValueType(); + EVT InVT = N->getOperand(0).getValueType(); switch (getTypeAction(InVT)) { - default: assert(0 && "Unexpected type action!"); + default: llvm_unreachable("Unexpected type action!"); case Legal: { - assert(LoVT == HiVT && "Legal non-power-of-two vector type?"); - MVT InNVT = MVT::getVectorVT(InVT.getVectorElementType(), + EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), LoVT.getVectorNumElements()); Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0), DAG.getIntPtrConstant(0)); @@ -742,9 +791,8 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, // If the result needs to be split and the input needs to be widened, // the two types must have different lengths. Use the widened result // and extract from it to do the split. - assert(LoVT == HiVT && "Legal non-power-of-two vector type?"); SDValue InOp = GetWidenedVector(N->getOperand(0)); - MVT InNVT = MVT::getVectorVT(InVT.getVectorElementType(), + EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), LoVT.getVectorNumElements()); Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, DAG.getIntPtrConstant(0)); @@ -765,10 +813,8 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, DebugLoc dl = N->getDebugLoc(); GetSplitVector(N->getOperand(0), Inputs[0], Inputs[1]); GetSplitVector(N->getOperand(1), Inputs[2], Inputs[3]); - MVT NewVT = Inputs[0].getValueType(); + EVT NewVT = Inputs[0].getValueType(); unsigned NewElts = NewVT.getVectorNumElements(); - assert(NewVT == Inputs[1].getValueType() && - "Non power-of-two vectors not supported!"); // If Lo or Hi uses elements from at most two of the four input vectors, then // express it as a vector shuffle of those two inputs. Otherwise extract the @@ -825,7 +871,7 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, } if (useBuildVector) { - MVT EltVT = NewVT.getVectorElementType(); + EVT EltVT = NewVT.getVectorElementType(); SmallVector<SDValue, 16> SVOps; // Extract the input elements by hand. @@ -868,20 +914,6 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, } } -void DAGTypeLegalizer::SplitVecRes_VSETCC(SDNode *N, SDValue &Lo, - SDValue &Hi) { - MVT LoVT, HiVT; - DebugLoc dl = N->getDebugLoc(); - GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); - - SDValue LL, LH, RL, RH; - GetSplitVector(N->getOperand(0), LL, LH); - GetSplitVector(N->getOperand(1), RL, RH); - - Lo = DAG.getNode(ISD::VSETCC, dl, LoVT, LL, RL, N->getOperand(2)); - Hi = DAG.getNode(ISD::VSETCC, dl, HiVT, LH, RH, N->getOperand(2)); -} - //===----------------------------------------------------------------------===// // Operand Vector Splitting @@ -892,24 +924,27 @@ void DAGTypeLegalizer::SplitVecRes_VSETCC(SDNode *N, SDValue &Lo, /// result types of the node are known to be legal, but other operands of the /// node may need legalization as well as the specified one. bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { - DEBUG(cerr << "Split node operand: "; N->dump(&DAG); cerr << "\n"); + DEBUG(errs() << "Split node operand: "; + N->dump(&DAG); + errs() << "\n"); SDValue Res = SDValue(); if (Res.getNode() == 0) { switch (N->getOpcode()) { default: #ifndef NDEBUG - cerr << "SplitVectorOperand Op #" << OpNo << ": "; - N->dump(&DAG); cerr << "\n"; + errs() << "SplitVectorOperand Op #" << OpNo << ": "; + N->dump(&DAG); + errs() << "\n"; #endif - assert(0 && "Do not know how to split this operator's operand!"); - abort(); + llvm_unreachable("Do not know how to split this operator's operand!"); case ISD::BIT_CONVERT: Res = SplitVecOp_BIT_CONVERT(N); break; case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break; case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break; - case ISD::STORE: Res = SplitVecOp_STORE(cast<StoreSDNode>(N), - OpNo); break; + case ISD::STORE: + Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo); + break; case ISD::CTTZ: case ISD::CTLZ: @@ -917,8 +952,13 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: case ISD::TRUNCATE: - case ISD::UINT_TO_FP: Res = SplitVecOp_UnaryOp(N); break; + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::ANY_EXTEND: + Res = SplitVecOp_UnaryOp(N); + break; } } @@ -939,15 +979,13 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) { // The result has a legal vector type, but the input needs splitting. - MVT ResVT = N->getValueType(0); + EVT ResVT = N->getValueType(0); SDValue Lo, Hi; DebugLoc dl = N->getDebugLoc(); GetSplitVector(N->getOperand(0), Lo, Hi); - assert(Lo.getValueType() == Hi.getValueType() && - "Returns legal non-power-of-two vector type?"); - MVT InVT = Lo.getValueType(); + EVT InVT = Lo.getValueType(); - MVT OutVT = MVT::getVectorVT(ResVT.getVectorElementType(), + EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(), InVT.getVectorNumElements()); Lo = DAG.getNode(N->getOpcode(), dl, OutVT, Lo); @@ -975,7 +1013,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_BIT_CONVERT(SDNode *N) { SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) { // We know that the extracted result type is legal. For now, assume the index // is a constant. - MVT SubVT = N->getValueType(0); + EVT SubVT = N->getValueType(0); SDValue Idx = N->getOperand(1); DebugLoc dl = N->getDebugLoc(); SDValue Lo, Hi; @@ -997,7 +1035,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) { SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { SDValue Vec = N->getOperand(0); SDValue Idx = N->getOperand(1); - MVT VecVT = Vec.getValueType(); + EVT VecVT = Vec.getValueType(); if (isa<ConstantSDNode>(Idx)) { uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); @@ -1010,14 +1048,13 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { if (IdxVal < LoElts) return DAG.UpdateNodeOperands(SDValue(N, 0), Lo, Idx); - else - return DAG.UpdateNodeOperands(SDValue(N, 0), Hi, - DAG.getConstant(IdxVal - LoElts, - Idx.getValueType())); + return DAG.UpdateNodeOperands(SDValue(N, 0), Hi, + DAG.getConstant(IdxVal - LoElts, + Idx.getValueType())); } // Store the vector to the stack. - MVT EltVT = VecVT.getVectorElementType(); + EVT EltVT = VecVT.getVectorElementType(); DebugLoc dl = N->getDebugLoc(); SDValue StackPtr = DAG.CreateStackTemporary(VecVT); int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); @@ -1026,7 +1063,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { // Load back the required element. StackPtr = GetVectorElementPointer(StackPtr, EltVT, Idx); - return DAG.getLoad(EltVT, dl, Store, StackPtr, SV, 0); + return DAG.getExtLoad(ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr, + SV, 0, EltVT); } SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { @@ -1038,13 +1076,13 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { SDValue Ch = N->getChain(); SDValue Ptr = N->getBasePtr(); int SVOffset = N->getSrcValueOffset(); - MVT MemoryVT = N->getMemoryVT(); - unsigned Alignment = N->getAlignment(); + EVT MemoryVT = N->getMemoryVT(); + unsigned Alignment = N->getOriginalAlignment(); bool isVol = N->isVolatile(); SDValue Lo, Hi; GetSplitVector(N->getOperand(1), Lo, Hi); - MVT LoMemVT, HiMemVT; + EVT LoMemVT, HiMemVT; GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT); unsigned IncrementSize = LoMemVT.getSizeInBits()/8; @@ -1059,15 +1097,14 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { // Increment the pointer to the other half. Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getIntPtrConstant(IncrementSize)); + SVOffset += IncrementSize; if (isTruncating) - Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, - N->getSrcValue(), SVOffset+IncrementSize, - HiMemVT, - isVol, MinAlign(Alignment, IncrementSize)); + Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(), SVOffset, + HiMemVT, isVol, Alignment); else - Hi = DAG.getStore(Ch, dl, Hi, Ptr, N->getSrcValue(), SVOffset+IncrementSize, - isVol, MinAlign(Alignment, IncrementSize)); + Hi = DAG.getStore(Ch, dl, Hi, Ptr, N->getSrcValue(), SVOffset, + isVol, Alignment); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } @@ -1078,18 +1115,19 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { //===----------------------------------------------------------------------===// void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { - DEBUG(cerr << "Widen node result " << ResNo << ": "; N->dump(&DAG); - cerr << "\n"); + DEBUG(errs() << "Widen node result " << ResNo << ": "; + N->dump(&DAG); + errs() << "\n"); SDValue Res = SDValue(); switch (N->getOpcode()) { default: #ifndef NDEBUG - cerr << "WidenVectorResult #" << ResNo << ": "; - N->dump(&DAG); cerr << "\n"; + errs() << "WidenVectorResult #" << ResNo << ": "; + N->dump(&DAG); + errs() << "\n"; #endif - assert(0 && "Do not know how to widen the result of this operator!"); - abort(); + llvm_unreachable("Do not know how to widen the result of this operator!"); case ISD::BIT_CONVERT: Res = WidenVecRes_BIT_CONVERT(N); break; case ISD::BUILD_VECTOR: Res = WidenVecRes_BUILD_VECTOR(N); break; @@ -1102,9 +1140,12 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::SELECT: Res = WidenVecRes_SELECT(N); break; case ISD::SELECT_CC: Res = WidenVecRes_SELECT_CC(N); break; case ISD::UNDEF: Res = WidenVecRes_UNDEF(N); break; - case ISD::VECTOR_SHUFFLE: - Res = WidenVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N)); break; - case ISD::VSETCC: Res = WidenVecRes_VSETCC(N); break; + case ISD::VECTOR_SHUFFLE: + Res = WidenVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N)); + break; + case ISD::VSETCC: + Res = WidenVecRes_VSETCC(N); + break; case ISD::ADD: case ISD::AND: @@ -1126,21 +1167,27 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::UDIV: case ISD::UREM: case ISD::SUB: - case ISD::XOR: Res = WidenVecRes_Binary(N); break; + case ISD::XOR: + Res = WidenVecRes_Binary(N); + break; case ISD::SHL: case ISD::SRA: - case ISD::SRL: Res = WidenVecRes_Shift(N); break; + case ISD::SRL: + Res = WidenVecRes_Shift(N); + break; - case ISD::ANY_EXTEND: case ISD::FP_ROUND: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: - case ISD::SIGN_EXTEND: case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: case ISD::TRUNCATE: + case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: - case ISD::UINT_TO_FP: Res = WidenVecRes_Convert(N); break; + case ISD::ANY_EXTEND: + Res = WidenVecRes_Convert(N); + break; case ISD::CTLZ: case ISD::CTPOP: @@ -1149,7 +1196,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::FCOS: case ISD::FNEG: case ISD::FSIN: - case ISD::FSQRT: Res = WidenVecRes_Unary(N); break; + case ISD::FSQRT: + Res = WidenVecRes_Unary(N); + break; } // If Res is null, the sub-method took care of registering the result. @@ -1159,7 +1208,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { // Binary op widening. - MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue InOp1 = GetWidenedVector(N->getOperand(0)); SDValue InOp2 = GetWidenedVector(N->getOperand(1)); return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp1, InOp2); @@ -1169,12 +1218,12 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { SDValue InOp = N->getOperand(0); DebugLoc dl = N->getDebugLoc(); - MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); unsigned WidenNumElts = WidenVT.getVectorNumElements(); - MVT InVT = InOp.getValueType(); - MVT InEltVT = InVT.getVectorElementType(); - MVT InWidenVT = MVT::getVectorVT(InEltVT, WidenNumElts); + EVT InVT = InOp.getValueType(); + EVT InEltVT = InVT.getVectorElementType(); + EVT InWidenVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenNumElts); unsigned Opcode = N->getOpcode(); unsigned InVTNumElts = InVT.getVectorNumElements(); @@ -1216,7 +1265,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { // Otherwise unroll into some nasty scalar code and rebuild the vector. SmallVector<SDValue, 16> Ops(WidenNumElts); - MVT EltVT = WidenVT.getVectorElementType(); + EVT EltVT = WidenVT.getVectorElementType(); unsigned MinElts = std::min(InVTNumElts, WidenNumElts); unsigned i; for (i=0; i < MinElts; ++i) @@ -1232,16 +1281,16 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { } SDValue DAGTypeLegalizer::WidenVecRes_Shift(SDNode *N) { - MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue InOp = GetWidenedVector(N->getOperand(0)); SDValue ShOp = N->getOperand(1); - MVT ShVT = ShOp.getValueType(); + EVT ShVT = ShOp.getValueType(); if (getTypeAction(ShVT) == WidenVector) { ShOp = GetWidenedVector(ShOp); ShVT = ShOp.getValueType(); } - MVT ShWidenVT = MVT::getVectorVT(ShVT.getVectorElementType(), + EVT ShWidenVT = EVT::getVectorVT(*DAG.getContext(), ShVT.getVectorElementType(), WidenVT.getVectorNumElements()); if (ShVT != ShWidenVT) ShOp = ModifyToType(ShOp, ShWidenVT); @@ -1251,16 +1300,16 @@ SDValue DAGTypeLegalizer::WidenVecRes_Shift(SDNode *N) { SDValue DAGTypeLegalizer::WidenVecRes_Unary(SDNode *N) { // Unary op widening. - MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue InOp = GetWidenedVector(N->getOperand(0)); return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp); } SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) { SDValue InOp = N->getOperand(0); - MVT InVT = InOp.getValueType(); - MVT VT = N->getValueType(0); - MVT WidenVT = TLI.getTypeToTransformTo(VT); + EVT InVT = InOp.getValueType(); + EVT VT = N->getValueType(0); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); DebugLoc dl = N->getDebugLoc(); switch (getTypeAction(InVT)) { @@ -1300,13 +1349,13 @@ SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) { // Determine new input vector type. The new input vector type will use // the same element type (if its a vector) or use the input type as a // vector. It is the same size as the type to widen to. - MVT NewInVT; + EVT NewInVT; unsigned NewNumElts = WidenSize / InSize; if (InVT.isVector()) { - MVT InEltVT = InVT.getVectorElementType(); - NewInVT= MVT::getVectorVT(InEltVT, WidenSize / InEltVT.getSizeInBits()); + EVT InEltVT = InVT.getVectorElementType(); + NewInVT= EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenSize / InEltVT.getSizeInBits()); } else { - NewInVT = MVT::getVectorVT(InVT, NewNumElts); + NewInVT = EVT::getVectorVT(*DAG.getContext(), InVT, NewNumElts); } if (TLI.isTypeLegal(NewInVT)) { @@ -1332,28 +1381,17 @@ SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) { } } - // This should occur rarely. Lower the bit-convert to a store/load - // from the stack. Create the stack frame object. Make sure it is aligned - // for both the source and destination types. - SDValue FIPtr = DAG.CreateStackTemporary(InVT, WidenVT); - int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex(); - const Value *SV = PseudoSourceValue::getFixedStack(FI); - - // Emit a store to the stack slot. - SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, FIPtr, SV, 0); - - // Result is a load from the stack slot. - return DAG.getLoad(WidenVT, dl, Store, FIPtr, SV, 0); + return CreateStackStoreLoad(InOp, WidenVT); } SDValue DAGTypeLegalizer::WidenVecRes_BUILD_VECTOR(SDNode *N) { DebugLoc dl = N->getDebugLoc(); // Build a vector with undefined for the new nodes. - MVT VT = N->getValueType(0); - MVT EltVT = VT.getVectorElementType(); + EVT VT = N->getValueType(0); + EVT EltVT = VT.getVectorElementType(); unsigned NumElts = VT.getVectorNumElements(); - MVT WidenVT = TLI.getTypeToTransformTo(VT); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); unsigned WidenNumElts = WidenVT.getVectorNumElements(); SmallVector<SDValue, 16> NewOps(N->op_begin(), N->op_end()); @@ -1365,8 +1403,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_BUILD_VECTOR(SDNode *N) { } SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { - MVT InVT = N->getOperand(0).getValueType(); - MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT InVT = N->getOperand(0).getValueType(); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); DebugLoc dl = N->getDebugLoc(); unsigned WidenNumElts = WidenVT.getVectorNumElements(); unsigned NumOperands = N->getNumOperands(); @@ -1387,7 +1425,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { } } else { InputWidened = true; - if (WidenVT == TLI.getTypeToTransformTo(InVT)) { + if (WidenVT == TLI.getTypeToTransformTo(*DAG.getContext(), InVT)) { // The inputs and the result are widen to the same value. unsigned i; for (i=1; i < NumOperands; ++i) @@ -1406,7 +1444,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { MaskOps[i] = i; MaskOps[i+WidenNumElts/2] = i+WidenNumElts; } - return DAG.getVectorShuffle(WidenVT, dl, + return DAG.getVectorShuffle(WidenVT, dl, GetWidenedVector(N->getOperand(0)), GetWidenedVector(N->getOperand(1)), &MaskOps[0]); @@ -1415,7 +1453,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { } // Fall back to use extracts and build vector. - MVT EltVT = WidenVT.getVectorElementType(); + EVT EltVT = WidenVT.getVectorElementType(); unsigned NumInElts = InVT.getVectorNumElements(); SmallVector<SDValue, 16> Ops(WidenNumElts); unsigned Idx = 0; @@ -1439,12 +1477,12 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) { SDValue RndOp = N->getOperand(3); SDValue SatOp = N->getOperand(4); - MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); unsigned WidenNumElts = WidenVT.getVectorNumElements(); - MVT InVT = InOp.getValueType(); - MVT InEltVT = InVT.getVectorElementType(); - MVT InWidenVT = MVT::getVectorVT(InEltVT, WidenNumElts); + EVT InVT = InOp.getValueType(); + EVT InEltVT = InVT.getVectorElementType(); + EVT InWidenVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenNumElts); SDValue DTyOp = DAG.getValueType(WidenVT); SDValue STyOp = DAG.getValueType(InWidenVT); @@ -1491,7 +1529,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) { // Otherwise unroll into some nasty scalar code and rebuild the vector. SmallVector<SDValue, 16> Ops(WidenNumElts); - MVT EltVT = WidenVT.getVectorElementType(); + EVT EltVT = WidenVT.getVectorElementType(); DTyOp = DAG.getValueType(EltVT); STyOp = DAG.getValueType(InEltVT); @@ -1512,8 +1550,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) { } SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { - MVT VT = N->getValueType(0); - MVT WidenVT = TLI.getTypeToTransformTo(VT); + EVT VT = N->getValueType(0); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); unsigned WidenNumElts = WidenVT.getVectorNumElements(); SDValue InOp = N->getOperand(0); SDValue Idx = N->getOperand(1); @@ -1522,7 +1560,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { if (getTypeAction(InOp.getValueType()) == WidenVector) InOp = GetWidenedVector(InOp); - MVT InVT = InOp.getValueType(); + EVT InVT = InOp.getValueType(); ConstantSDNode *CIdx = dyn_cast<ConstantSDNode>(Idx); if (CIdx) { @@ -1540,8 +1578,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { // We could try widening the input to the right length but for now, extract // the original elements, fill the rest with undefs and build a vector. SmallVector<SDValue, 16> Ops(WidenNumElts); - MVT EltVT = VT.getVectorElementType(); - MVT IdxVT = Idx.getValueType(); + EVT EltVT = VT.getVectorElementType(); + EVT IdxVT = Idx.getValueType(); unsigned NumElts = VT.getVectorNumElements(); unsigned i; if (CIdx) { @@ -1573,8 +1611,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) { SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) { LoadSDNode *LD = cast<LoadSDNode>(N); - MVT WidenVT = TLI.getTypeToTransformTo(LD->getValueType(0)); - MVT LdVT = LD->getMemoryVT(); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0)); + EVT LdVT = LD->getMemoryVT(); DebugLoc dl = N->getDebugLoc(); assert(LdVT.isVector() && WidenVT.isVector()); @@ -1593,8 +1631,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) { // For extension loads, we can not play the tricks of chopping legal // vector types and bit cast it to the right type. Instead, we unroll // the load and build a vector. - MVT EltVT = WidenVT.getVectorElementType(); - MVT LdEltVT = LdVT.getVectorElementType(); + EVT EltVT = WidenVT.getVectorElementType(); + EVT LdEltVT = LdVT.getVectorElementType(); unsigned NumElts = LdVT.getVectorNumElements(); // Load each element and widen @@ -1638,26 +1676,26 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) { // Modified the chain - switch anything that used the old chain to use // the new one. - ReplaceValueWith(SDValue(N, 1), Chain); + ReplaceValueWith(SDValue(N, 1), NewChain); return Result; } SDValue DAGTypeLegalizer::WidenVecRes_SCALAR_TO_VECTOR(SDNode *N) { - MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); return DAG.getNode(ISD::SCALAR_TO_VECTOR, N->getDebugLoc(), WidenVT, N->getOperand(0)); } SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) { - MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); unsigned WidenNumElts = WidenVT.getVectorNumElements(); SDValue Cond1 = N->getOperand(0); - MVT CondVT = Cond1.getValueType(); + EVT CondVT = Cond1.getValueType(); if (CondVT.isVector()) { - MVT CondEltVT = CondVT.getVectorElementType(); - MVT CondWidenVT = MVT::getVectorVT(CondEltVT, WidenNumElts); + EVT CondEltVT = CondVT.getVectorElementType(); + EVT CondWidenVT = EVT::getVectorVT(*DAG.getContext(), CondEltVT, WidenNumElts); if (getTypeAction(CondVT) == WidenVector) Cond1 = GetWidenedVector(Cond1); @@ -1681,15 +1719,15 @@ SDValue DAGTypeLegalizer::WidenVecRes_SELECT_CC(SDNode *N) { } SDValue DAGTypeLegalizer::WidenVecRes_UNDEF(SDNode *N) { - MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); return DAG.getUNDEF(WidenVT); } SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N) { - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); DebugLoc dl = N->getDebugLoc(); - MVT WidenVT = TLI.getTypeToTransformTo(VT); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); unsigned NumElts = VT.getVectorNumElements(); unsigned WidenNumElts = WidenVT.getVectorNumElements(); @@ -1711,13 +1749,13 @@ SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N) { } SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) { - MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); unsigned WidenNumElts = WidenVT.getVectorNumElements(); SDValue InOp1 = N->getOperand(0); - MVT InVT = InOp1.getValueType(); + EVT InVT = InOp1.getValueType(); assert(InVT.isVector() && "can not widen non vector type"); - MVT WidenInVT = MVT::getVectorVT(InVT.getVectorElementType(), WidenNumElts); + EVT WidenInVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), WidenNumElts); InOp1 = GetWidenedVector(InOp1); SDValue InOp2 = GetWidenedVector(N->getOperand(1)); @@ -1735,18 +1773,19 @@ SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) { // Widen Vector Operand //===----------------------------------------------------------------------===// bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned ResNo) { - DEBUG(cerr << "Widen node operand " << ResNo << ": "; N->dump(&DAG); - cerr << "\n"); + DEBUG(errs() << "Widen node operand " << ResNo << ": "; + N->dump(&DAG); + errs() << "\n"); SDValue Res = SDValue(); switch (N->getOpcode()) { default: #ifndef NDEBUG - cerr << "WidenVectorOperand op #" << ResNo << ": "; - N->dump(&DAG); cerr << "\n"; + errs() << "WidenVectorOperand op #" << ResNo << ": "; + N->dump(&DAG); + errs() << "\n"; #endif - assert(0 && "Do not know how to widen this operator's operand!"); - abort(); + llvm_unreachable("Do not know how to widen this operator's operand!"); case ISD::BIT_CONVERT: Res = WidenVecOp_BIT_CONVERT(N); break; case ISD::CONCAT_VECTORS: Res = WidenVecOp_CONCAT_VECTORS(N); break; @@ -1757,8 +1796,13 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned ResNo) { case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: case ISD::TRUNCATE: - case ISD::UINT_TO_FP: Res = WidenVecOp_Convert(N); break; + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::ANY_EXTEND: + Res = WidenVecOp_Convert(N); + break; } // If Res is null, the sub-method took care of registering the result. @@ -1781,15 +1825,15 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { // Since the result is legal and the input is illegal, it is unlikely // that we can fix the input to a legal type so unroll the convert // into some scalar code and create a nasty build vector. - MVT VT = N->getValueType(0); - MVT EltVT = VT.getVectorElementType(); + EVT VT = N->getValueType(0); + EVT EltVT = VT.getVectorElementType(); DebugLoc dl = N->getDebugLoc(); unsigned NumElts = VT.getVectorNumElements(); SDValue InOp = N->getOperand(0); if (getTypeAction(InOp.getValueType()) == WidenVector) InOp = GetWidenedVector(InOp); - MVT InVT = InOp.getValueType(); - MVT InEltVT = InVT.getVectorElementType(); + EVT InVT = InOp.getValueType(); + EVT InEltVT = InVT.getVectorElementType(); unsigned Opcode = N->getOpcode(); SmallVector<SDValue, 16> Ops(NumElts); @@ -1802,9 +1846,9 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { } SDValue DAGTypeLegalizer::WidenVecOp_BIT_CONVERT(SDNode *N) { - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); SDValue InOp = GetWidenedVector(N->getOperand(0)); - MVT InWidenVT = InOp.getValueType(); + EVT InWidenVT = InOp.getValueType(); DebugLoc dl = N->getDebugLoc(); // Check if we can convert between two legal vector types and extract. @@ -1812,7 +1856,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_BIT_CONVERT(SDNode *N) { unsigned Size = VT.getSizeInBits(); if (InWidenSize % Size == 0 && !VT.isVector()) { unsigned NewNumElts = InWidenSize / Size; - MVT NewVT = MVT::getVectorVT(VT, NewNumElts); + EVT NewVT = EVT::getVectorVT(*DAG.getContext(), VT, NewNumElts); if (TLI.isTypeLegal(NewVT)) { SDValue BitOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVT, InOp); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp, @@ -1820,31 +1864,20 @@ SDValue DAGTypeLegalizer::WidenVecOp_BIT_CONVERT(SDNode *N) { } } - // Lower the bit-convert to a store/load from the stack. Create the stack - // frame object. Make sure it is aligned for both the source and destination - // types. - SDValue FIPtr = DAG.CreateStackTemporary(InWidenVT, VT); - int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex(); - const Value *SV = PseudoSourceValue::getFixedStack(FI); - - // Emit a store to the stack slot. - SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, FIPtr, SV, 0); - - // Result is a load from the stack slot. - return DAG.getLoad(VT, dl, Store, FIPtr, SV, 0); + return CreateStackStoreLoad(InOp, VT); } SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) { // If the input vector is not legal, it is likely that we will not find a // legal vector of the same size. Replace the concatenate vector with a // nasty build vector. - MVT VT = N->getValueType(0); - MVT EltVT = VT.getVectorElementType(); + EVT VT = N->getValueType(0); + EVT EltVT = VT.getVectorElementType(); DebugLoc dl = N->getDebugLoc(); unsigned NumElts = VT.getVectorNumElements(); SmallVector<SDValue, 16> Ops(NumElts); - MVT InVT = N->getOperand(0).getValueType(); + EVT InVT = N->getOperand(0).getValueType(); unsigned NumInElts = InVT.getVectorNumElements(); unsigned Idx = 0; @@ -1862,9 +1895,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) { SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { SDValue InOp = GetWidenedVector(N->getOperand(0)); - MVT EltVT = InOp.getValueType().getVectorElementType(); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(), - EltVT, InOp, N->getOperand(1)); + N->getValueType(0), InOp, N->getOperand(1)); } SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) { @@ -1880,8 +1912,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) { SDValue ValOp = GetWidenedVector(ST->getValue()); DebugLoc dl = N->getDebugLoc(); - MVT StVT = ST->getMemoryVT(); - MVT ValVT = ValOp.getValueType(); + EVT StVT = ST->getMemoryVT(); + EVT ValVT = ValOp.getValueType(); // It must be true that we the widen vector type is bigger than where // we need to store. assert(StVT.isVector() && ValOp.getValueType().isVector()); @@ -1892,8 +1924,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) { // For truncating stores, we can not play the tricks of chopping legal // vector types and bit cast it to the right type. Instead, we unroll // the store. - MVT StEltVT = StVT.getVectorElementType(); - MVT ValEltVT = ValVT.getVectorElementType(); + EVT StEltVT = StVT.getVectorElementType(); + EVT ValEltVT = ValVT.getVectorElementType(); unsigned Increment = ValEltVT.getSizeInBits() / 8; unsigned NumElts = StVT.getVectorNumElements(); SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, @@ -1938,9 +1970,10 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) { // VecVT: Vector value type whose size we must match. // Returns NewVecVT and NewEltVT - the vector type and its associated // element type. -static void FindAssocWidenVecType(const TargetLowering &TLI, unsigned Width, - MVT VecVT, - MVT& NewEltVT, MVT& NewVecVT) { +static void FindAssocWidenVecType(SelectionDAG& DAG, + const TargetLowering &TLI, unsigned Width, + EVT VecVT, + EVT& NewEltVT, EVT& NewVecVT) { unsigned EltWidth = Width + 1; if (TLI.isTypeLegal(VecVT)) { // We start with the preferred with, making it a power of 2 and find a @@ -1950,9 +1983,9 @@ static void FindAssocWidenVecType(const TargetLowering &TLI, unsigned Width, do { assert(EltWidth > 0); EltWidth = 1 << Log2_32(EltWidth - 1); - NewEltVT = MVT::getIntegerVT(EltWidth); + NewEltVT = EVT::getIntegerVT(*DAG.getContext(), EltWidth); unsigned NumElts = VecVT.getSizeInBits() / EltWidth; - NewVecVT = MVT::getVectorVT(NewEltVT, NumElts); + NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewEltVT, NumElts); } while (!TLI.isTypeLegal(NewVecVT) || VecVT.getSizeInBits() != NewVecVT.getSizeInBits()); } else { @@ -1965,9 +1998,9 @@ static void FindAssocWidenVecType(const TargetLowering &TLI, unsigned Width, do { assert(EltWidth > 0); EltWidth = 1 << Log2_32(EltWidth - 1); - NewEltVT = MVT::getIntegerVT(EltWidth); + NewEltVT = EVT::getIntegerVT(*DAG.getContext(), EltWidth); unsigned NumElts = VecVT.getSizeInBits() / EltWidth; - NewVecVT = MVT::getVectorVT(NewEltVT, NumElts); + NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewEltVT, NumElts); } while (!TLI.isTypeLegal(NewEltVT) || VecVT.getSizeInBits() != NewVecVT.getSizeInBits()); } @@ -1981,7 +2014,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain, unsigned Alignment, bool isVolatile, unsigned LdWidth, - MVT ResType, + EVT ResType, DebugLoc dl) { // The strategy assumes that we can efficiently load powers of two widths. // The routines chops the vector into the largest power of 2 load and @@ -1992,9 +2025,9 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain, // the load is nonvolatile, we an use a wider load for the value. // Find the vector type that can load from. - MVT NewEltVT, NewVecVT; + EVT NewEltVT, NewVecVT; unsigned NewEltVTWidth; - FindAssocWidenVecType(TLI, LdWidth, ResType, NewEltVT, NewVecVT); + FindAssocWidenVecType(DAG, TLI, LdWidth, ResType, NewEltVT, NewVecVT); NewEltVTWidth = NewEltVT.getSizeInBits(); SDValue LdOp = DAG.getLoad(NewEltVT, dl, Chain, BasePtr, SV, SVOffset, @@ -2021,7 +2054,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain, // Our current type we are using is too large, use a smaller size by // using a smaller power of 2 unsigned oNewEltVTWidth = NewEltVTWidth; - FindAssocWidenVecType(TLI, LdWidth, ResType, NewEltVT, NewVecVT); + FindAssocWidenVecType(DAG, TLI, LdWidth, ResType, NewEltVT, NewVecVT); NewEltVTWidth = NewEltVT.getSizeInBits(); // Readjust position and vector position based on new load type Idx = Idx * (oNewEltVTWidth/NewEltVTWidth); @@ -2056,10 +2089,10 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain, // want to store. This avoids requiring a stack convert. // Find a width of the element type we can store with - MVT WidenVT = ValOp.getValueType(); - MVT NewEltVT, NewVecVT; + EVT WidenVT = ValOp.getValueType(); + EVT NewEltVT, NewVecVT; - FindAssocWidenVecType(TLI, StWidth, WidenVT, NewEltVT, NewVecVT); + FindAssocWidenVecType(DAG, TLI, StWidth, WidenVT, NewEltVT, NewVecVT); unsigned NewEltVTWidth = NewEltVT.getSizeInBits(); SDValue VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, ValOp); @@ -2088,7 +2121,7 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain, // Our current type we are using is too large, use a smaller size by // using a smaller power of 2 unsigned oNewEltVTWidth = NewEltVTWidth; - FindAssocWidenVecType(TLI, StWidth, WidenVT, NewEltVT, NewVecVT); + FindAssocWidenVecType(DAG, TLI, StWidth, WidenVT, NewEltVT, NewVecVT); NewEltVTWidth = NewEltVT.getSizeInBits(); // Readjust position and vector position based on new load type Idx = Idx * (oNewEltVTWidth/NewEltVTWidth); @@ -2106,10 +2139,10 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain, /// Modifies a vector input (widen or narrows) to a vector of NVT. The /// input vector must have the same element type as NVT. -SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, MVT NVT) { +SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) { // Note that InOp might have been widened so it might already have // the right width or it might need be narrowed. - MVT InVT = InOp.getValueType(); + EVT InVT = InOp.getValueType(); assert(InVT.getVectorElementType() == NVT.getVectorElementType() && "input and widen element type must match"); DebugLoc dl = InOp.getDebugLoc(); @@ -2137,7 +2170,7 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, MVT NVT) { // Fall back to extract and build. SmallVector<SDValue, 16> Ops(WidenNumElts); - MVT EltVT = NVT.getVectorElementType(); + EVT EltVT = NVT.getVectorElementType(); unsigned MinNumElts = std::min(WidenNumElts, InNumElts); unsigned Idx; for (Idx = 0; Idx < MinNumElts; ++Idx) diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index af73b28..e0f93d8 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -24,6 +24,8 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; STATISTIC(NumUnfolds, "Number of nodes unfolded"); @@ -108,14 +110,14 @@ private: /// Schedule - Schedule the DAG using list scheduling. void ScheduleDAGFast::Schedule() { - DOUT << "********** List Scheduling **********\n"; + DEBUG(errs() << "********** List Scheduling **********\n"); NumLiveRegs = 0; LiveRegDefs.resize(TRI->getNumRegs(), NULL); LiveRegCycles.resize(TRI->getNumRegs(), 0); // Build the scheduling graph. - BuildSchedGraph(); + BuildSchedGraph(NULL); DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) SUnits[su].dumpAll(this)); @@ -132,17 +134,17 @@ void ScheduleDAGFast::Schedule() { /// the AvailableQueue if the count reaches zero. Also update its cycle bound. void ScheduleDAGFast::ReleasePred(SUnit *SU, SDep *PredEdge) { SUnit *PredSU = PredEdge->getSUnit(); - --PredSU->NumSuccsLeft; - + #ifndef NDEBUG - if (PredSU->NumSuccsLeft < 0) { - cerr << "*** Scheduling failed! ***\n"; + if (PredSU->NumSuccsLeft == 0) { + errs() << "*** Scheduling failed! ***\n"; PredSU->dump(this); - cerr << " has been released too many times!\n"; - assert(0); + errs() << " has been released too many times!\n"; + llvm_unreachable(0); } #endif - + --PredSU->NumSuccsLeft; + // If all the node's successors are scheduled, this node is ready // to be scheduled. Ignore the special EntrySU node. if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) { @@ -174,7 +176,7 @@ void ScheduleDAGFast::ReleasePredecessors(SUnit *SU, unsigned CurCycle) { /// count of its predecessors. If a predecessor pending count is zero, add it to /// the Available queue. void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { - DOUT << "*** Scheduling [" << CurCycle << "]: "; + DEBUG(errs() << "*** Scheduling [" << CurCycle << "]: "); DEBUG(SU->dump(this)); assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!"); @@ -214,7 +216,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { SUnit *NewSU; bool TryUnfold = false; for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { - MVT VT = N->getValueType(i); + EVT VT = N->getValueType(i); if (VT == MVT::Flag) return NULL; else if (VT == MVT::Other) @@ -222,7 +224,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { } for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { const SDValue &Op = N->getOperand(i); - MVT VT = Op.getNode()->getValueType(Op.getResNo()); + EVT VT = Op.getNode()->getValueType(Op.getResNo()); if (VT == MVT::Flag) return NULL; } @@ -232,7 +234,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) return NULL; - DOUT << "Unfolding SU # " << SU->NodeNum << "\n"; + DEBUG(errs() << "Unfolding SU # " << SU->NodeNum << "\n"); assert(NewNodes.size() == 2 && "Expected a load folding node!"); N = NewNodes[1]; @@ -342,7 +344,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { SU = NewSU; } - DOUT << "Duplicating SU # " << SU->NodeNum << "\n"; + DEBUG(errs() << "Duplicating SU # " << SU->NodeNum << "\n"); NewSU = Clone(SU); // New SUnit has the exact same predecessors. @@ -419,7 +421,7 @@ void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, /// getPhysicalRegisterVT - Returns the ValueType of the physical register /// definition of the specified node. /// FIXME: Move to SelectionDAG? -static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, +static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, const TargetInstrInfo *TII) { const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!"); @@ -533,7 +535,7 @@ void ScheduleDAGFast::ListScheduleBottomUp() { assert(LRegs.size() == 1 && "Can't handle this yet!"); unsigned Reg = LRegs[0]; SUnit *LRDef = LiveRegDefs[Reg]; - MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); + EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); const TargetRegisterClass *RC = TRI->getPhysicalRegisterRegClass(Reg, VT); const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); @@ -549,16 +551,16 @@ void ScheduleDAGFast::ListScheduleBottomUp() { // Issue copies, these can be expensive cross register class copies. SmallVector<SUnit*, 2> Copies; InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); - DOUT << "Adding an edge from SU # " << TrySU->NodeNum - << " to SU #" << Copies.front()->NodeNum << "\n"; + DEBUG(errs() << "Adding an edge from SU # " << TrySU->NodeNum + << " to SU #" << Copies.front()->NodeNum << "\n"); AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1, /*Reg=*/0, /*isNormalMemory=*/false, /*isMustAlias=*/false, /*isArtificial=*/true)); NewDef = Copies.back(); } - DOUT << "Adding an edge from SU # " << NewDef->NodeNum - << " to SU #" << TrySU->NodeNum << "\n"; + DEBUG(errs() << "Adding an edge from SU # " << NewDef->NodeNum + << " to SU #" << TrySU->NodeNum << "\n"); LiveRegDefs[Reg] = NewDef; AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1, /*Reg=*/0, /*isNormalMemory=*/false, @@ -568,8 +570,7 @@ void ScheduleDAGFast::ListScheduleBottomUp() { } if (!CurSU) { - assert(false && "Unable to resolve live physical register dependencies!"); - abort(); + llvm_unreachable("Unable to resolve live physical register dependencies!"); } } @@ -587,41 +588,11 @@ void ScheduleDAGFast::ListScheduleBottomUp() { ++CurCycle; } - // Reverse the order if it is bottom up. + // Reverse the order since it is bottom up. std::reverse(Sequence.begin(), Sequence.end()); - - + #ifndef NDEBUG - // Verify that all SUnits were scheduled. - bool AnyNotSched = false; - unsigned DeadNodes = 0; - unsigned Noops = 0; - for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { - if (!SUnits[i].isScheduled) { - if (SUnits[i].NumPreds == 0 && SUnits[i].NumSuccs == 0) { - ++DeadNodes; - continue; - } - if (!AnyNotSched) - cerr << "*** List scheduling failed! ***\n"; - SUnits[i].dump(this); - cerr << "has not been scheduled!\n"; - AnyNotSched = true; - } - if (SUnits[i].NumSuccsLeft != 0) { - if (!AnyNotSched) - cerr << "*** List scheduling failed! ***\n"; - SUnits[i].dump(this); - cerr << "has successors left!\n"; - AnyNotSched = true; - } - } - for (unsigned i = 0, e = Sequence.size(); i != e; ++i) - if (!Sequence[i]) - ++Noops; - assert(!AnyNotSched); - assert(Sequence.size() + DeadNodes - Noops == SUnits.size() && - "The number of nodes scheduled doesn't match the expected number!"); + VerifySchedule(/*isBottomUp=*/true); #endif } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp index c432534..c8d2158 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp @@ -29,6 +29,8 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/PriorityQueue.h" #include "llvm/ADT/Statistic.h" #include <climits> @@ -86,10 +88,10 @@ private: /// Schedule - Schedule the DAG using list scheduling. void ScheduleDAGList::Schedule() { - DOUT << "********** List Scheduling **********\n"; + DEBUG(errs() << "********** List Scheduling **********\n"); // Build the scheduling graph. - BuildSchedGraph(); + BuildSchedGraph(NULL); AvailableQueue->initNodes(SUnits); @@ -106,17 +108,17 @@ void ScheduleDAGList::Schedule() { /// the PendingQueue if the count reaches zero. Also update its cycle bound. void ScheduleDAGList::ReleaseSucc(SUnit *SU, const SDep &D) { SUnit *SuccSU = D.getSUnit(); - --SuccSU->NumPredsLeft; - + #ifndef NDEBUG - if (SuccSU->NumPredsLeft < 0) { - cerr << "*** Scheduling failed! ***\n"; + if (SuccSU->NumPredsLeft == 0) { + errs() << "*** Scheduling failed! ***\n"; SuccSU->dump(this); - cerr << " has been released too many times!\n"; - assert(0); + errs() << " has been released too many times!\n"; + llvm_unreachable(0); } #endif - + --SuccSU->NumPredsLeft; + SuccSU->setDepthToAtLeast(SU->getDepth() + D.getLatency()); // If all the node's predecessors are scheduled, this node is ready @@ -140,7 +142,7 @@ void ScheduleDAGList::ReleaseSuccessors(SUnit *SU) { /// count of its successors. If a successor pending count is zero, add it to /// the Available queue. void ScheduleDAGList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { - DOUT << "*** Scheduling [" << CurCycle << "]: "; + DEBUG(errs() << "*** Scheduling [" << CurCycle << "]: "); DEBUG(SU->dump(this)); Sequence.push_back(SU); @@ -232,7 +234,7 @@ void ScheduleDAGList::ListScheduleTopDown() { } else if (!HasNoopHazards) { // Otherwise, we have a pipeline stall, but no other problem, just advance // the current cycle and try again. - DOUT << "*** Advancing cycle, no work to do\n"; + DEBUG(errs() << "*** Advancing cycle, no work to do\n"); HazardRec->AdvanceCycle(); ++NumStalls; ++CurCycle; @@ -240,7 +242,7 @@ void ScheduleDAGList::ListScheduleTopDown() { // Otherwise, we have no instructions to issue and we have instructions // that will fault if we don't do this right. This is the case for // processors without pipeline interlocks and other cases. - DOUT << "*** Emitting noop\n"; + DEBUG(errs() << "*** Emitting noop\n"); HazardRec->EmitNoop(); Sequence.push_back(0); // NULL here means noop ++NumNoops; diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index c97e2a8..cec24e6 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -25,10 +25,12 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/ADT/PriorityQueue.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/Support/raw_ostream.h" #include <climits> using namespace llvm; @@ -163,14 +165,14 @@ private: /// Schedule - Schedule the DAG using list scheduling. void ScheduleDAGRRList::Schedule() { - DOUT << "********** List Scheduling **********\n"; + DEBUG(errs() << "********** List Scheduling **********\n"); NumLiveRegs = 0; LiveRegDefs.resize(TRI->getNumRegs(), NULL); LiveRegCycles.resize(TRI->getNumRegs(), 0); // Build the scheduling graph. - BuildSchedGraph(); + BuildSchedGraph(NULL); DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) SUnits[su].dumpAll(this)); @@ -195,17 +197,17 @@ void ScheduleDAGRRList::Schedule() { /// the AvailableQueue if the count reaches zero. Also update its cycle bound. void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { SUnit *PredSU = PredEdge->getSUnit(); - --PredSU->NumSuccsLeft; - + #ifndef NDEBUG - if (PredSU->NumSuccsLeft < 0) { - cerr << "*** Scheduling failed! ***\n"; + if (PredSU->NumSuccsLeft == 0) { + errs() << "*** Scheduling failed! ***\n"; PredSU->dump(this); - cerr << " has been released too many times!\n"; - assert(0); + errs() << " has been released too many times!\n"; + llvm_unreachable(0); } #endif - + --PredSU->NumSuccsLeft; + // If all the node's successors are scheduled, this node is ready // to be scheduled. Ignore the special EntrySU node. if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) { @@ -237,7 +239,7 @@ void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU, unsigned CurCycle) { /// count of its predecessors. If a predecessor pending count is zero, add it to /// the Available queue. void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { - DOUT << "*** Scheduling [" << CurCycle << "]: "; + DEBUG(errs() << "*** Scheduling [" << CurCycle << "]: "); DEBUG(SU->dump(this)); assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!"); @@ -276,13 +278,14 @@ void ScheduleDAGRRList::CapturePred(SDep *PredEdge) { AvailableQueue->remove(PredSU); } + assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!"); ++PredSU->NumSuccsLeft; } /// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and /// its predecessor states to reflect the change. void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { - DOUT << "*** Unscheduling [" << SU->getHeight() << "]: "; + DEBUG(errs() << "*** Unscheduling [" << SU->getHeight() << "]: "); DEBUG(SU->dump(this)); AvailableQueue->UnscheduledNode(SU); @@ -351,7 +354,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { SUnit *NewSU; bool TryUnfold = false; for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { - MVT VT = N->getValueType(i); + EVT VT = N->getValueType(i); if (VT == MVT::Flag) return NULL; else if (VT == MVT::Other) @@ -359,7 +362,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { } for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { const SDValue &Op = N->getOperand(i); - MVT VT = Op.getNode()->getValueType(Op.getResNo()); + EVT VT = Op.getNode()->getValueType(Op.getResNo()); if (VT == MVT::Flag) return NULL; } @@ -369,7 +372,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) return NULL; - DOUT << "Unfolding SU # " << SU->NodeNum << "\n"; + DEBUG(errs() << "Unfolding SU # " << SU->NodeNum << "\n"); assert(NewNodes.size() == 2 && "Expected a load folding node!"); N = NewNodes[1]; @@ -488,7 +491,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { SU = NewSU; } - DOUT << "Duplicating SU # " << SU->NodeNum << "\n"; + DEBUG(errs() << "Duplicating SU # " << SU->NodeNum << "\n"); NewSU = CreateClone(SU); // New SUnit has the exact same predecessors. @@ -570,7 +573,7 @@ void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, /// getPhysicalRegisterVT - Returns the ValueType of the physical register /// definition of the specified node. /// FIXME: Move to SelectionDAG? -static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, +static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, const TargetInstrInfo *TII) { const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!"); @@ -753,7 +756,7 @@ void ScheduleDAGRRList::ListScheduleBottomUp() { assert(LRegs.size() == 1 && "Can't handle this yet!"); unsigned Reg = LRegs[0]; SUnit *LRDef = LiveRegDefs[Reg]; - MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); + EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); const TargetRegisterClass *RC = TRI->getPhysicalRegisterRegClass(Reg, VT); const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); @@ -769,8 +772,8 @@ void ScheduleDAGRRList::ListScheduleBottomUp() { // Issue copies, these can be expensive cross register class copies. SmallVector<SUnit*, 2> Copies; InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); - DOUT << "Adding an edge from SU #" << TrySU->NodeNum - << " to SU #" << Copies.front()->NodeNum << "\n"; + DEBUG(errs() << "Adding an edge from SU #" << TrySU->NodeNum + << " to SU #" << Copies.front()->NodeNum << "\n"); AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1, /*Reg=*/0, /*isNormalMemory=*/false, /*isMustAlias=*/false, @@ -778,8 +781,8 @@ void ScheduleDAGRRList::ListScheduleBottomUp() { NewDef = Copies.back(); } - DOUT << "Adding an edge from SU #" << NewDef->NodeNum - << " to SU #" << TrySU->NodeNum << "\n"; + DEBUG(errs() << "Adding an edge from SU #" << NewDef->NodeNum + << " to SU #" << TrySU->NodeNum << "\n"); LiveRegDefs[Reg] = NewDef; AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1, /*Reg=*/0, /*isNormalMemory=*/false, @@ -822,17 +825,17 @@ void ScheduleDAGRRList::ListScheduleBottomUp() { /// the AvailableQueue if the count reaches zero. Also update its cycle bound. void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) { SUnit *SuccSU = SuccEdge->getSUnit(); - --SuccSU->NumPredsLeft; - + #ifndef NDEBUG - if (SuccSU->NumPredsLeft < 0) { - cerr << "*** Scheduling failed! ***\n"; + if (SuccSU->NumPredsLeft == 0) { + errs() << "*** Scheduling failed! ***\n"; SuccSU->dump(this); - cerr << " has been released too many times!\n"; - assert(0); + errs() << " has been released too many times!\n"; + llvm_unreachable(0); } #endif - + --SuccSU->NumPredsLeft; + // If all the node's predecessors are scheduled, this node is ready // to be scheduled. Ignore the special ExitSU node. if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) { @@ -856,7 +859,7 @@ void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) { /// count of its successors. If a successor pending count is zero, add it to /// the Available queue. void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { - DOUT << "*** Scheduling [" << CurCycle << "]: "; + DEBUG(errs() << "*** Scheduling [" << CurCycle << "]: "); DEBUG(SU->dump(this)); assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); @@ -1215,7 +1218,7 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, if (!SUImpDefs) return false; for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { - MVT VT = N->getValueType(i); + EVT VT = N->getValueType(i); if (VT == MVT::Flag || VT == MVT::Other) continue; if (!N->hasAnyUseOfValue(i)) @@ -1328,9 +1331,9 @@ void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() { // Ok, the transformation is safe and the heuristics suggest it is // profitable. Update the graph. - DOUT << "Prescheduling SU # " << SU->NodeNum - << " next to PredSU # " << PredSU->NodeNum - << " to guide scheduling in the presence of multiple uses\n"; + DEBUG(errs() << "Prescheduling SU # " << SU->NodeNum + << " next to PredSU # " << PredSU->NodeNum + << " to guide scheduling in the presence of multiple uses\n"); for (unsigned i = 0; i != PredSU->Succs.size(); ++i) { SDep Edge = PredSU->Succs[i]; assert(!Edge.isAssignedRegDep()); @@ -1418,8 +1421,8 @@ void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() { (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) || (!SU->isCommutable && SuccSU->isCommutable)) && !scheduleDAG->IsReachable(SuccSU, SU)) { - DOUT << "Adding a pseudo-two-addr edge from SU # " << SU->NodeNum - << " to SU #" << SuccSU->NodeNum << "\n"; + DEBUG(errs() << "Adding a pseudo-two-addr edge from SU # " + << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n"); scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0, /*Reg=*/0, /*isNormalMemory=*/false, /*isMustAlias=*/false, diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 7aa15bc..d53de34 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -14,10 +14,12 @@ #define DEBUG_TYPE "pre-RA-sched" #include "ScheduleDAGSDNodes.h" +#include "InstrEmitter.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtarget.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -152,6 +154,11 @@ void ScheduleDAGSDNodes::BuildSchedUnits() { } void ScheduleDAGSDNodes::AddSchedEdges() { + const TargetSubtarget &ST = TM.getSubtarget<TargetSubtarget>(); + + // Check to see if the scheduler cares about latencies. + bool UnitLatencies = ForceUnitLatencies(); + // Pass 2: add the preds, succs, etc. for (unsigned su = 0, e = SUnits.size(); su != e; ++su) { SUnit *SU = &SUnits[su]; @@ -175,7 +182,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() { if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).getImplicitDefs()) { SU->hasPhysRegClobbers = true; - unsigned NumUsed = CountResults(N); + unsigned NumUsed = InstrEmitter::CountResults(N); while (NumUsed != 0 && !N->hasAnyUseOfValue(NumUsed - 1)) --NumUsed; // Skip over unused values at the end. if (NumUsed > TII->get(N->getMachineOpcode()).getNumDefs()) @@ -189,7 +196,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() { assert(OpSU && "Node has no SUnit!"); if (OpSU == SU) continue; // In the same group. - MVT OpVT = N->getOperand(i).getValueType(); + EVT OpVT = N->getOperand(i).getValueType(); assert(OpVT != MVT::Flag && "Flagged nodes should be in same sunit!"); bool isChain = OpVT == MVT::Other; @@ -206,8 +213,15 @@ void ScheduleDAGSDNodes::AddSchedEdges() { // dependency. This may change in the future though. if (Cost >= 0) PhysReg = 0; - SU->addPred(SDep(OpSU, isChain ? SDep::Order : SDep::Data, - OpSU->Latency, PhysReg)); + + const SDep& dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data, + OpSU->Latency, PhysReg); + if (!isChain && !UnitLatencies) { + ComputeOperandLatency(OpSU, SU, (SDep &)dep); + ST.adjustSchedDependency(OpSU, SU, (SDep &)dep); + } + + SU->addPred(dep); } } } @@ -217,7 +231,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() { /// are input. This SUnit graph is similar to the SelectionDAG, but /// excludes nodes that aren't interesting to scheduling, and represents /// flagged together nodes with a single SUnit. -void ScheduleDAGSDNodes::BuildSchedGraph() { +void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) { // Populate the SUnits array. BuildSchedUnits(); // Compute all the scheduling dependencies between nodes. @@ -230,65 +244,68 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) { // Compute the latency for the node. We use the sum of the latencies for // all nodes flagged together into this SUnit. SU->Latency = 0; - bool SawMachineOpcode = false; for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode()) if (N->isMachineOpcode()) { - SawMachineOpcode = true; - SU->Latency += - InstrItins.getLatency(TII->get(N->getMachineOpcode()).getSchedClass()); + SU->Latency += InstrItins. + getStageLatency(TII->get(N->getMachineOpcode()).getSchedClass()); } } -/// CountResults - The results of target nodes have register or immediate -/// operands first, then an optional chain, and optional flag operands (which do -/// not go into the resulting MachineInstr). -unsigned ScheduleDAGSDNodes::CountResults(SDNode *Node) { - unsigned N = Node->getNumValues(); - while (N && Node->getValueType(N - 1) == MVT::Flag) - --N; - if (N && Node->getValueType(N - 1) == MVT::Other) - --N; // Skip over chain result. - return N; -} - -/// CountOperands - The inputs to target nodes have any actual inputs first, -/// followed by special operands that describe memory references, then an -/// optional chain operand, then an optional flag operand. Compute the number -/// of actual operands that will go into the resulting MachineInstr. -unsigned ScheduleDAGSDNodes::CountOperands(SDNode *Node) { - unsigned N = ComputeMemOperandsEnd(Node); - while (N && isa<MemOperandSDNode>(Node->getOperand(N - 1).getNode())) - --N; // Ignore MEMOPERAND nodes - return N; -} - -/// ComputeMemOperandsEnd - Find the index one past the last MemOperandSDNode -/// operand -unsigned ScheduleDAGSDNodes::ComputeMemOperandsEnd(SDNode *Node) { - unsigned N = Node->getNumOperands(); - while (N && Node->getOperand(N - 1).getValueType() == MVT::Flag) - --N; - if (N && Node->getOperand(N - 1).getValueType() == MVT::Other) - --N; // Ignore chain if it exists. - return N; -} - - void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const { if (!SU->getNode()) { - cerr << "PHYS REG COPY\n"; + errs() << "PHYS REG COPY\n"; return; } SU->getNode()->dump(DAG); - cerr << "\n"; + errs() << "\n"; SmallVector<SDNode *, 4> FlaggedNodes; for (SDNode *N = SU->getNode()->getFlaggedNode(); N; N = N->getFlaggedNode()) FlaggedNodes.push_back(N); while (!FlaggedNodes.empty()) { - cerr << " "; + errs() << " "; FlaggedNodes.back()->dump(DAG); - cerr << "\n"; + errs() << "\n"; FlaggedNodes.pop_back(); } } + +/// EmitSchedule - Emit the machine code in scheduled order. +MachineBasicBlock *ScheduleDAGSDNodes:: +EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) { + InstrEmitter Emitter(BB, InsertPos); + DenseMap<SDValue, unsigned> VRBaseMap; + DenseMap<SUnit*, unsigned> CopyVRBaseMap; + for (unsigned i = 0, e = Sequence.size(); i != e; i++) { + SUnit *SU = Sequence[i]; + if (!SU) { + // Null SUnit* is a noop. + EmitNoop(); + continue; + } + + // For pre-regalloc scheduling, create instructions corresponding to the + // SDNode and any flagged SDNodes and append them to the block. + if (!SU->getNode()) { + // Emit a copy. + EmitPhysRegCopy(SU, CopyVRBaseMap); + continue; + } + + SmallVector<SDNode *, 4> FlaggedNodes; + for (SDNode *N = SU->getNode()->getFlaggedNode(); N; + N = N->getFlaggedNode()) + FlaggedNodes.push_back(N); + while (!FlaggedNodes.empty()) { + Emitter.EmitNode(FlaggedNodes.back(), SU->OrigNode != SU, SU->isCloned, + VRBaseMap, EM); + FlaggedNodes.pop_back(); + } + Emitter.EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned, + VRBaseMap, EM); + } + + BB = Emitter.getBlock(); + InsertPos = Emitter.getInsertPos(); + return BB; +} diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index 2a278b7..c9c36f7 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -58,7 +58,6 @@ namespace llvm { if (isa<ConstantPoolSDNode>(Node)) return true; if (isa<JumpTableSDNode>(Node)) return true; if (isa<ExternalSymbolSDNode>(Node)) return true; - if (isa<MemOperandSDNode>(Node)) return true; if (Node->getOpcode() == ISD::EntryToken) return true; return false; } @@ -87,35 +86,14 @@ namespace llvm { /// are input. This SUnit graph is similar to the SelectionDAG, but /// excludes nodes that aren't interesting to scheduling, and represents /// flagged together nodes with a single SUnit. - virtual void BuildSchedGraph(); + virtual void BuildSchedGraph(AliasAnalysis *AA); /// ComputeLatency - Compute node latency. /// virtual void ComputeLatency(SUnit *SU); - /// CountResults - The results of target nodes have register or immediate - /// operands first, then an optional chain, and optional flag operands - /// (which do not go into the machine instrs.) - static unsigned CountResults(SDNode *Node); - - /// CountOperands - The inputs to target nodes have any actual inputs first, - /// followed by special operands that describe memory references, then an - /// optional chain operand, then flag operands. Compute the number of - /// actual operands that will go into the resulting MachineInstr. - static unsigned CountOperands(SDNode *Node); - - /// ComputeMemOperandsEnd - Find the index one past the last - /// MemOperandSDNode operand - static unsigned ComputeMemOperandsEnd(SDNode *Node); - - /// EmitNode - Generate machine code for an node and needed dependencies. - /// VRBaseMap contains, for each already emitted node, the first virtual - /// register number for the results of the node. - /// - void EmitNode(SDNode *Node, bool IsClone, bool HasClone, - DenseMap<SDValue, unsigned> &VRBaseMap); - - virtual MachineBasicBlock *EmitSchedule(); + virtual MachineBasicBlock * + EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM); /// Schedule - Order nodes according to selected style, filling /// in the Sequence member. @@ -129,47 +107,6 @@ namespace llvm { virtual void getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const; private: - /// EmitSubregNode - Generate machine code for subreg nodes. - /// - void EmitSubregNode(SDNode *Node, - DenseMap<SDValue, unsigned> &VRBaseMap); - - /// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS - /// nodes. - /// - void EmitCopyToRegClassNode(SDNode *Node, - DenseMap<SDValue, unsigned> &VRBaseMap); - - /// getVR - Return the virtual register corresponding to the specified result - /// of the specified node. - unsigned getVR(SDValue Op, DenseMap<SDValue, unsigned> &VRBaseMap); - - /// getDstOfCopyToRegUse - If the only use of the specified result number of - /// node is a CopyToReg, return its destination register. Return 0 otherwise. - unsigned getDstOfOnlyCopyToRegUse(SDNode *Node, unsigned ResNo) const; - - void AddOperand(MachineInstr *MI, SDValue Op, unsigned IIOpNum, - const TargetInstrDesc *II, - DenseMap<SDValue, unsigned> &VRBaseMap); - - /// AddRegisterOperand - Add the specified register as an operand to the - /// specified machine instr. Insert register copies if the register is - /// not in the required register class. - void AddRegisterOperand(MachineInstr *MI, SDValue Op, - unsigned IIOpNum, const TargetInstrDesc *II, - DenseMap<SDValue, unsigned> &VRBaseMap); - - /// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an - /// implicit physical register output. - void EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, - bool IsCloned, unsigned SrcReg, - DenseMap<SDValue, unsigned> &VRBaseMap); - - void CreateVirtualRegisters(SDNode *Node, MachineInstr *MI, - const TargetInstrDesc &II, bool IsClone, - bool IsCloned, - DenseMap<SDValue, unsigned> &VRBaseMap); - /// BuildSchedUnits, AddSchedEdges - Helper functions for BuildSchedGraph. void BuildSchedUnits(); void AddSchedEdges(); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index c8f4b52..542bf64 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -13,6 +13,7 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Constants.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Function.h" #include "llvm/GlobalAlias.h" #include "llvm/GlobalVariable.h" #include "llvm/Intrinsics.h" @@ -31,6 +32,7 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" @@ -46,14 +48,14 @@ using namespace llvm; /// makeVTList - Return an instance of the SDVTList struct initialized with the /// specified members. -static SDVTList makeVTList(const MVT *VTs, unsigned NumVTs) { +static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) { SDVTList Res = {VTs, NumVTs}; return Res; } -static const fltSemantics *MVTToAPFloatSemantics(MVT VT) { - switch (VT.getSimpleVT()) { - default: assert(0 && "Unknown FP format"); +static const fltSemantics *EVTToAPFloatSemantics(EVT VT) { + switch (VT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unknown FP format"); case MVT::f32: return &APFloat::IEEEsingle; case MVT::f64: return &APFloat::IEEEdouble; case MVT::f80: return &APFloat::x87DoubleExtended; @@ -76,7 +78,7 @@ bool ConstantFPSDNode::isExactlyValue(const APFloat& V) const { return getValueAPF().bitwiseIsEqual(V); } -bool ConstantFPSDNode::isValueValidForType(MVT VT, +bool ConstantFPSDNode::isValueValidForType(EVT VT, const APFloat& Val) { assert(VT.isFloatingPoint() && "Can only convert between FP types"); @@ -88,7 +90,7 @@ bool ConstantFPSDNode::isValueValidForType(MVT VT, // convert modifies in place, so make a copy. APFloat Val2 = APFloat(Val); bool losesInfo; - (void) Val2.convert(*MVTToAPFloatSemantics(VT), APFloat::rmNearestTiesToEven, + (void) Val2.convert(*EVTToAPFloatSemantics(VT), APFloat::rmNearestTiesToEven, &losesInfo); return !losesInfo; } @@ -243,7 +245,7 @@ ISD::CondCode ISD::getSetCCInverse(ISD::CondCode Op, bool isInteger) { /// if the operation does not depend on the sign of the input (setne and seteq). static int isSignedOp(ISD::CondCode Opcode) { switch (Opcode) { - default: assert(0 && "Illegal integer setcc operation!"); + default: llvm_unreachable("Illegal integer setcc operation!"); case ISD::SETEQ: case ISD::SETNE: return 0; case ISD::SETLT: @@ -363,11 +365,8 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { switch (N->getOpcode()) { case ISD::TargetExternalSymbol: case ISD::ExternalSymbol: - assert(0 && "Should only be used on nodes with operands"); + llvm_unreachable("Should only be used on nodes with operands"); default: break; // Normal nodes don't need extra info. - case ISD::ARG_FLAGS: - ID.AddInteger(cast<ARG_FLAGSSDNode>(N)->getArgFlags().getRawBits()); - break; case ISD::TargetConstant: case ISD::Constant: ID.AddPointer(cast<ConstantSDNode>(N)->getConstantIntValue()); @@ -403,11 +402,6 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { case ISD::SRCVALUE: ID.AddPointer(cast<SrcValueSDNode>(N)->getValue()); break; - case ISD::MEMOPERAND: { - const MachineMemOperand &MO = cast<MemOperandSDNode>(N)->MO; - MO.Profile(ID); - break; - } case ISD::FrameIndex: case ISD::TargetFrameIndex: ID.AddInteger(cast<FrameIndexSDNode>(N)->getIndex()); @@ -429,12 +423,6 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { ID.AddInteger(CP->getTargetFlags()); break; } - case ISD::CALL: { - const CallSDNode *Call = cast<CallSDNode>(N); - ID.AddInteger(Call->getCallingConv()); - ID.AddInteger(Call->isVarArg()); - break; - } case ISD::LOAD: { const LoadSDNode *LD = cast<LoadSDNode>(N); ID.AddInteger(LD->getMemoryVT().getRawBits()); @@ -466,7 +454,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { } case ISD::VECTOR_SHUFFLE: { const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); - for (unsigned i = 0, e = N->getValueType(0).getVectorNumElements(); + for (unsigned i = 0, e = N->getValueType(0).getVectorNumElements(); i != e; ++i) ID.AddInteger(SVN->getMaskElt(i)); break; @@ -488,20 +476,18 @@ static void AddNodeIDNode(FoldingSetNodeID &ID, const SDNode *N) { } /// encodeMemSDNodeFlags - Generic routine for computing a value for use in -/// the CSE map that carries alignment, volatility, indexing mode, and +/// the CSE map that carries volatility, indexing mode, and /// extension/truncation information. /// static inline unsigned -encodeMemSDNodeFlags(int ConvType, ISD::MemIndexedMode AM, - bool isVolatile, unsigned Alignment) { +encodeMemSDNodeFlags(int ConvType, ISD::MemIndexedMode AM, bool isVolatile) { assert((ConvType & 3) == ConvType && "ConvType may not require more than 2 bits!"); assert((AM & 7) == AM && "AM may not require more than 3 bits!"); return ConvType | (AM << 2) | - (isVolatile << 5) | - ((Log2_32(Alignment) + 1) << 6); + (isVolatile << 5); } //===----------------------------------------------------------------------===// @@ -519,7 +505,6 @@ static bool doNotCSE(SDNode *N) { case ISD::DBG_LABEL: case ISD::DBG_STOPPOINT: case ISD::EH_LABEL: - case ISD::DECLARE: return true; // Never CSE these nodes. } @@ -626,7 +611,7 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) { bool Erased = false; switch (N->getOpcode()) { case ISD::EntryToken: - assert(0 && "EntryToken should not be in CSEMaps!"); + llvm_unreachable("EntryToken should not be in CSEMaps!"); return false; case ISD::HANDLENODE: return false; // noop. case ISD::CONDCODE: @@ -646,12 +631,12 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) { break; } case ISD::VALUETYPE: { - MVT VT = cast<VTSDNode>(N)->getVT(); + EVT VT = cast<VTSDNode>(N)->getVT(); if (VT.isExtended()) { Erased = ExtendedValueTypeNodes.erase(VT); } else { - Erased = ValueTypeNodes[VT.getSimpleVT()] != 0; - ValueTypeNodes[VT.getSimpleVT()] = 0; + Erased = ValueTypeNodes[VT.getSimpleVT().SimpleTy] != 0; + ValueTypeNodes[VT.getSimpleVT().SimpleTy] = 0; } break; } @@ -667,8 +652,8 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) { if (!Erased && N->getValueType(N->getNumValues()-1) != MVT::Flag && !N->isMachineOpcode() && !doNotCSE(N)) { N->dump(this); - cerr << "\n"; - assert(0 && "Node is not in map!"); + errs() << "\n"; + llvm_unreachable("Node is not in map!"); } #endif return Erased; @@ -762,7 +747,7 @@ void SelectionDAG::VerifyNode(SDNode *N) { default: break; case ISD::BUILD_PAIR: { - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); assert(N->getNumValues() == 1 && "Too many results!"); assert(!VT.isVector() && (VT.isInteger() || VT.isFloatingPoint()) && "Wrong return type!"); @@ -780,7 +765,7 @@ void SelectionDAG::VerifyNode(SDNode *N) { assert(N->getValueType(0).isVector() && "Wrong return type!"); assert(N->getNumOperands() == N->getValueType(0).getVectorNumElements() && "Wrong number of operands!"); - MVT EltVT = N->getValueType(0).getVectorElementType(); + EVT EltVT = N->getValueType(0).getVectorElementType(); for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I) assert((I->getValueType() == EltVT || (EltVT.isInteger() && I->getValueType().isInteger() && @@ -791,13 +776,13 @@ void SelectionDAG::VerifyNode(SDNode *N) { } } -/// getMVTAlignment - Compute the default alignment value for the +/// getEVTAlignment - Compute the default alignment value for the /// given type. /// -unsigned SelectionDAG::getMVTAlignment(MVT VT) const { +unsigned SelectionDAG::getEVTAlignment(EVT VT) const { const Type *Ty = VT == MVT::iPTR ? - PointerType::get(Type::Int8Ty, 0) : - VT.getTypeForMVT(); + PointerType::get(Type::getInt8Ty(*getContext()), 0) : + VT.getTypeForEVT(*getContext()); return TLI.getTargetData()->getABITypeAlignment(Ty); } @@ -815,6 +800,7 @@ void SelectionDAG::init(MachineFunction &mf, MachineModuleInfo *mmi, MF = &mf; MMI = mmi; DW = dw; + Context = &mf.getFunction()->getContext(); } SelectionDAG::~SelectionDAG() { @@ -846,7 +832,19 @@ void SelectionDAG::clear() { Root = getEntryNode(); } -SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, DebugLoc DL, MVT VT) { +SDValue SelectionDAG::getSExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT) { + return VT.bitsGT(Op.getValueType()) ? + getNode(ISD::SIGN_EXTEND, DL, VT, Op) : + getNode(ISD::TRUNCATE, DL, VT, Op); +} + +SDValue SelectionDAG::getZExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT) { + return VT.bitsGT(Op.getValueType()) ? + getNode(ISD::ZERO_EXTEND, DL, VT, Op) : + getNode(ISD::TRUNCATE, DL, VT, Op); +} + +SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, DebugLoc DL, EVT VT) { if (Op.getValueType() == VT) return Op; APInt Imm = APInt::getLowBitsSet(Op.getValueSizeInBits(), VT.getSizeInBits()); @@ -856,29 +854,29 @@ SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, DebugLoc DL, MVT VT) { /// getNOT - Create a bitwise NOT operation as (XOR Val, -1). /// -SDValue SelectionDAG::getNOT(DebugLoc DL, SDValue Val, MVT VT) { - MVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; +SDValue SelectionDAG::getNOT(DebugLoc DL, SDValue Val, EVT VT) { + EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; SDValue NegOne = getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT); return getNode(ISD::XOR, DL, VT, Val, NegOne); } -SDValue SelectionDAG::getConstant(uint64_t Val, MVT VT, bool isT) { - MVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; +SDValue SelectionDAG::getConstant(uint64_t Val, EVT VT, bool isT) { + EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; assert((EltVT.getSizeInBits() >= 64 || (uint64_t)((int64_t)Val >> EltVT.getSizeInBits()) + 1 < 2) && "getConstant with a uint64_t value that doesn't fit in the type!"); return getConstant(APInt(EltVT.getSizeInBits(), Val), VT, isT); } -SDValue SelectionDAG::getConstant(const APInt &Val, MVT VT, bool isT) { - return getConstant(*ConstantInt::get(Val), VT, isT); +SDValue SelectionDAG::getConstant(const APInt &Val, EVT VT, bool isT) { + return getConstant(*ConstantInt::get(*Context, Val), VT, isT); } -SDValue SelectionDAG::getConstant(const ConstantInt &Val, MVT VT, bool isT) { +SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) { assert(VT.isInteger() && "Cannot create FP integer constant!"); - MVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; + EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; assert(Val.getBitWidth() == EltVT.getSizeInBits() && "APInt size does not match type size!"); @@ -913,14 +911,14 @@ SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, bool isTarget) { } -SDValue SelectionDAG::getConstantFP(const APFloat& V, MVT VT, bool isTarget) { - return getConstantFP(*ConstantFP::get(V), VT, isTarget); +SDValue SelectionDAG::getConstantFP(const APFloat& V, EVT VT, bool isTarget) { + return getConstantFP(*ConstantFP::get(*getContext(), V), VT, isTarget); } -SDValue SelectionDAG::getConstantFP(const ConstantFP& V, MVT VT, bool isTarget){ +SDValue SelectionDAG::getConstantFP(const ConstantFP& V, EVT VT, bool isTarget){ assert(VT.isFloatingPoint() && "Cannot create integer FP constant!"); - MVT EltVT = + EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; // Do the map lookup using the actual bit pattern for the floating point @@ -953,8 +951,8 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP& V, MVT VT, bool isTarget){ return Result; } -SDValue SelectionDAG::getConstantFP(double Val, MVT VT, bool isTarget) { - MVT EltVT = +SDValue SelectionDAG::getConstantFP(double Val, EVT VT, bool isTarget) { + EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; if (EltVT==MVT::f32) return getConstantFP(APFloat((float)Val), VT, isTarget); @@ -963,14 +961,15 @@ SDValue SelectionDAG::getConstantFP(double Val, MVT VT, bool isTarget) { } SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, - MVT VT, int64_t Offset, + EVT VT, int64_t Offset, bool isTargetGA, unsigned char TargetFlags) { assert((TargetFlags == 0 || isTargetGA) && "Cannot set target flags on target-independent globals"); - + // Truncate (with sign-extension) the offset value to the pointer size. - unsigned BitWidth = TLI.getPointerTy().getSizeInBits(); + EVT PTy = TLI.getPointerTy(); + unsigned BitWidth = PTy.getSizeInBits(); if (BitWidth < 64) Offset = (Offset << (64 - BitWidth) >> (64 - BitWidth)); @@ -1002,7 +1001,7 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, return SDValue(N, 0); } -SDValue SelectionDAG::getFrameIndex(int FI, MVT VT, bool isTarget) { +SDValue SelectionDAG::getFrameIndex(int FI, EVT VT, bool isTarget) { unsigned Opc = isTarget ? ISD::TargetFrameIndex : ISD::FrameIndex; FoldingSetNodeID ID; AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); @@ -1017,7 +1016,7 @@ SDValue SelectionDAG::getFrameIndex(int FI, MVT VT, bool isTarget) { return SDValue(N, 0); } -SDValue SelectionDAG::getJumpTable(int JTI, MVT VT, bool isTarget, +SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget, unsigned char TargetFlags) { assert((TargetFlags == 0 || isTarget) && "Cannot set target flags on target-independent jump tables"); @@ -1036,9 +1035,9 @@ SDValue SelectionDAG::getJumpTable(int JTI, MVT VT, bool isTarget, return SDValue(N, 0); } -SDValue SelectionDAG::getConstantPool(Constant *C, MVT VT, +SDValue SelectionDAG::getConstantPool(Constant *C, EVT VT, unsigned Alignment, int Offset, - bool isTarget, + bool isTarget, unsigned char TargetFlags) { assert((TargetFlags == 0 || isTarget) && "Cannot set target flags on target-independent globals"); @@ -1062,7 +1061,7 @@ SDValue SelectionDAG::getConstantPool(Constant *C, MVT VT, } -SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, MVT VT, +SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, unsigned Alignment, int Offset, bool isTarget, unsigned char TargetFlags) { @@ -1101,26 +1100,13 @@ SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) { return SDValue(N, 0); } -SDValue SelectionDAG::getArgFlags(ISD::ArgFlagsTy Flags) { - FoldingSetNodeID ID; - AddNodeIDNode(ID, ISD::ARG_FLAGS, getVTList(MVT::Other), 0, 0); - ID.AddInteger(Flags.getRawBits()); - void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) - return SDValue(E, 0); - SDNode *N = NodeAllocator.Allocate<ARG_FLAGSSDNode>(); - new (N) ARG_FLAGSSDNode(Flags); - CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); - return SDValue(N, 0); -} - -SDValue SelectionDAG::getValueType(MVT VT) { - if (VT.isSimple() && (unsigned)VT.getSimpleVT() >= ValueTypeNodes.size()) - ValueTypeNodes.resize(VT.getSimpleVT()+1); +SDValue SelectionDAG::getValueType(EVT VT) { + if (VT.isSimple() && (unsigned)VT.getSimpleVT().SimpleTy >= + ValueTypeNodes.size()) + ValueTypeNodes.resize(VT.getSimpleVT().SimpleTy+1); SDNode *&N = VT.isExtended() ? - ExtendedValueTypeNodes[VT] : ValueTypeNodes[VT.getSimpleVT()]; + ExtendedValueTypeNodes[VT] : ValueTypeNodes[VT.getSimpleVT().SimpleTy]; if (N) return SDValue(N, 0); N = NodeAllocator.Allocate<VTSDNode>(); @@ -1129,7 +1115,7 @@ SDValue SelectionDAG::getValueType(MVT VT) { return SDValue(N, 0); } -SDValue SelectionDAG::getExternalSymbol(const char *Sym, MVT VT) { +SDValue SelectionDAG::getExternalSymbol(const char *Sym, EVT VT) { SDNode *&N = ExternalSymbols[Sym]; if (N) return SDValue(N, 0); N = NodeAllocator.Allocate<ExternalSymbolSDNode>(); @@ -1138,7 +1124,7 @@ SDValue SelectionDAG::getExternalSymbol(const char *Sym, MVT VT) { return SDValue(N, 0); } -SDValue SelectionDAG::getTargetExternalSymbol(const char *Sym, MVT VT, +SDValue SelectionDAG::getTargetExternalSymbol(const char *Sym, EVT VT, unsigned char TargetFlags) { SDNode *&N = TargetExternalSymbols[std::pair<std::string,unsigned char>(Sym, @@ -1177,19 +1163,19 @@ static void commuteShuffle(SDValue &N1, SDValue &N2, SmallVectorImpl<int> &M) { } } -SDValue SelectionDAG::getVectorShuffle(MVT VT, DebugLoc dl, SDValue N1, +SDValue SelectionDAG::getVectorShuffle(EVT VT, DebugLoc dl, SDValue N1, SDValue N2, const int *Mask) { assert(N1.getValueType() == N2.getValueType() && "Invalid VECTOR_SHUFFLE"); - assert(VT.isVector() && N1.getValueType().isVector() && + assert(VT.isVector() && N1.getValueType().isVector() && "Vector Shuffle VTs must be a vectors"); assert(VT.getVectorElementType() == N1.getValueType().getVectorElementType() && "Vector Shuffle VTs must have same element type"); // Canonicalize shuffle undef, undef -> undef if (N1.getOpcode() == ISD::UNDEF && N2.getOpcode() == ISD::UNDEF) - return N1; + return getUNDEF(VT); - // Validate that all indices in Mask are within the range of the elements + // Validate that all indices in Mask are within the range of the elements // input to the shuffle. unsigned NElts = VT.getVectorNumElements(); SmallVector<int, 8> MaskVec; @@ -1197,18 +1183,18 @@ SDValue SelectionDAG::getVectorShuffle(MVT VT, DebugLoc dl, SDValue N1, assert(Mask[i] < (int)(NElts * 2) && "Index out of range"); MaskVec.push_back(Mask[i]); } - + // Canonicalize shuffle v, v -> v, undef if (N1 == N2) { N2 = getUNDEF(VT); for (unsigned i = 0; i != NElts; ++i) if (MaskVec[i] >= (int)NElts) MaskVec[i] -= NElts; } - + // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask. if (N1.getOpcode() == ISD::UNDEF) commuteShuffle(N1, N2, MaskVec); - + // Canonicalize all index into lhs, -> shuffle lhs, undef // Canonicalize all index into rhs, -> shuffle rhs, undef bool AllLHS = true, AllRHS = true; @@ -1231,7 +1217,7 @@ SDValue SelectionDAG::getVectorShuffle(MVT VT, DebugLoc dl, SDValue N1, N1 = getUNDEF(VT); commuteShuffle(N1, N2, MaskVec); } - + // If Identity shuffle, or all shuffle in to undef, return that node. bool AllUndef = true; bool Identity = true; @@ -1239,7 +1225,7 @@ SDValue SelectionDAG::getVectorShuffle(MVT VT, DebugLoc dl, SDValue N1, if (MaskVec[i] >= 0 && MaskVec[i] != (int)i) Identity = false; if (MaskVec[i] >= 0) AllUndef = false; } - if (Identity) + if (Identity && NElts == N1.getValueType().getVectorNumElements()) return N1; if (AllUndef) return getUNDEF(VT); @@ -1249,17 +1235,17 @@ SDValue SelectionDAG::getVectorShuffle(MVT VT, DebugLoc dl, SDValue N1, AddNodeIDNode(ID, ISD::VECTOR_SHUFFLE, getVTList(VT), Ops, 2); for (unsigned i = 0; i != NElts; ++i) ID.AddInteger(MaskVec[i]); - + void* IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - + // Allocate the mask array for the node out of the BumpPtrAllocator, since // SDNode doesn't have access to it. This memory will be "leaked" when // the node is deallocated, but recovered when the NodeAllocator is released. int *MaskAlloc = OperandAllocator.Allocate<int>(NElts); memcpy(MaskAlloc, &MaskVec[0], NElts * sizeof(int)); - + ShuffleVectorSDNode *N = NodeAllocator.Allocate<ShuffleVectorSDNode>(); new (N) ShuffleVectorSDNode(VT, dl, N1, N2, MaskAlloc); CSEMap.InsertNode(N, IP); @@ -1267,7 +1253,7 @@ SDValue SelectionDAG::getVectorShuffle(MVT VT, DebugLoc dl, SDValue N1, return SDValue(N, 0); } -SDValue SelectionDAG::getConvertRndSat(MVT VT, DebugLoc dl, +SDValue SelectionDAG::getConvertRndSat(EVT VT, DebugLoc dl, SDValue Val, SDValue DTy, SDValue STy, SDValue Rnd, SDValue Sat, ISD::CvtCode Code) { @@ -1289,7 +1275,7 @@ SDValue SelectionDAG::getConvertRndSat(MVT VT, DebugLoc dl, return SDValue(N, 0); } -SDValue SelectionDAG::getRegister(unsigned RegNo, MVT VT) { +SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) { FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::Register, getVTList(VT), 0, 0); ID.AddInteger(RegNo); @@ -1305,7 +1291,7 @@ SDValue SelectionDAG::getRegister(unsigned RegNo, MVT VT) { SDValue SelectionDAG::getDbgStopPoint(DebugLoc DL, SDValue Root, unsigned Line, unsigned Col, - Value *CU) { + MDNode *CU) { SDNode *N = NodeAllocator.Allocate<DbgStopPointSDNode>(); new (N) DbgStopPointSDNode(Root, Line, Col, CU); N->setDebugLoc(DL); @@ -1349,32 +1335,10 @@ SDValue SelectionDAG::getSrcValue(const Value *V) { return SDValue(N, 0); } -SDValue SelectionDAG::getMemOperand(const MachineMemOperand &MO) { -#ifndef NDEBUG - const Value *v = MO.getValue(); - assert((!v || isa<PointerType>(v->getType())) && - "SrcValue is not a pointer?"); -#endif - - FoldingSetNodeID ID; - AddNodeIDNode(ID, ISD::MEMOPERAND, getVTList(MVT::Other), 0, 0); - MO.Profile(ID); - - void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) - return SDValue(E, 0); - - SDNode *N = NodeAllocator.Allocate<MemOperandSDNode>(); - new (N) MemOperandSDNode(MO); - CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); - return SDValue(N, 0); -} - /// getShiftAmountOperand - Return the specified value casted to /// the target's desired shift amount type. SDValue SelectionDAG::getShiftAmountOperand(SDValue Op) { - MVT OpTy = Op.getValueType(); + EVT OpTy = Op.getValueType(); MVT ShTy = TLI.getShiftAmountTy(); if (OpTy == ShTy || OpTy.isVector()) return Op; @@ -1384,10 +1348,10 @@ SDValue SelectionDAG::getShiftAmountOperand(SDValue Op) { /// CreateStackTemporary - Create a stack temporary, suitable for holding the /// specified value type. -SDValue SelectionDAG::CreateStackTemporary(MVT VT, unsigned minAlign) { +SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) { MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo(); - unsigned ByteSize = VT.getStoreSizeInBits()/8; - const Type *Ty = VT.getTypeForMVT(); + unsigned ByteSize = VT.getStoreSize(); + const Type *Ty = VT.getTypeForEVT(*getContext()); unsigned StackAlign = std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty), minAlign); @@ -1397,11 +1361,11 @@ SDValue SelectionDAG::CreateStackTemporary(MVT VT, unsigned minAlign) { /// CreateStackTemporary - Create a stack temporary suitable for holding /// either of the specified value types. -SDValue SelectionDAG::CreateStackTemporary(MVT VT1, MVT VT2) { +SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) { unsigned Bytes = std::max(VT1.getStoreSizeInBits(), VT2.getStoreSizeInBits())/8; - const Type *Ty1 = VT1.getTypeForMVT(); - const Type *Ty2 = VT2.getTypeForMVT(); + const Type *Ty1 = VT1.getTypeForEVT(*getContext()); + const Type *Ty2 = VT2.getTypeForEVT(*getContext()); const TargetData *TD = TLI.getTargetData(); unsigned Align = std::max(TD->getPrefTypeAlignment(Ty1), TD->getPrefTypeAlignment(Ty2)); @@ -1411,7 +1375,7 @@ SDValue SelectionDAG::CreateStackTemporary(MVT VT1, MVT VT2) { return getFrameIndex(FrameIdx, TLI.getPointerTy()); } -SDValue SelectionDAG::FoldSetCC(MVT VT, SDValue N1, +SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond, DebugLoc dl) { // These setcc operations always fold. switch (Cond) { @@ -1441,7 +1405,7 @@ SDValue SelectionDAG::FoldSetCC(MVT VT, SDValue N1, const APInt &C1 = N1C->getAPIntValue(); switch (Cond) { - default: assert(0 && "Unknown integer setcc!"); + default: llvm_unreachable("Unknown integer setcc!"); case ISD::SETEQ: return getConstant(C1 == C2, VT); case ISD::SETNE: return getConstant(C1 != C2, VT); case ISD::SETULT: return getConstant(C1.ult(C2), VT); @@ -1516,6 +1480,10 @@ SDValue SelectionDAG::FoldSetCC(MVT VT, SDValue N1, /// SignBitIsZero - Return true if the sign bit of Op is known to be zero. We /// use this predicate to simplify operations downstream. bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const { + // This predicate is not safe for vector operations. + if (Op.getValueType().isVector()) + return false; + unsigned BitWidth = Op.getValueSizeInBits(); return MaskedValueIsZero(Op, APInt::getSignBit(BitWidth), Depth); } @@ -1743,7 +1711,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, } return; case ISD::SIGN_EXTEND_INREG: { - MVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); + EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); unsigned EBits = EVT.getSizeInBits(); // Sign extension. Compute the demanded bits in the result that are not @@ -1788,14 +1756,14 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, case ISD::LOAD: { if (ISD::isZEXTLoad(Op.getNode())) { LoadSDNode *LD = cast<LoadSDNode>(Op); - MVT VT = LD->getMemoryVT(); + EVT VT = LD->getMemoryVT(); unsigned MemBits = VT.getSizeInBits(); KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits) & Mask; } return; } case ISD::ZERO_EXTEND: { - MVT InVT = Op.getOperand(0).getValueType(); + EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getSizeInBits(); APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask; APInt InMask = Mask; @@ -1809,7 +1777,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, return; } case ISD::SIGN_EXTEND: { - MVT InVT = Op.getOperand(0).getValueType(); + EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getSizeInBits(); APInt InSignBit = APInt::getSignBit(InBits); APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask; @@ -1850,7 +1818,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, return; } case ISD::ANY_EXTEND: { - MVT InVT = Op.getOperand(0).getValueType(); + EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getSizeInBits(); APInt InMask = Mask; InMask.trunc(InBits); @@ -1862,7 +1830,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, return; } case ISD::TRUNCATE: { - MVT InVT = Op.getOperand(0).getValueType(); + EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getSizeInBits(); APInt InMask = Mask; InMask.zext(InBits); @@ -1875,7 +1843,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, break; } case ISD::AssertZext: { - MVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT(); + EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT(); APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits()); ComputeMaskedBits(Op.getOperand(0), Mask & InMask, KnownZero, KnownOne, Depth+1); @@ -1981,7 +1949,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, case ISD::INTRINSIC_WO_CHAIN: case ISD::INTRINSIC_W_CHAIN: case ISD::INTRINSIC_VOID: - TLI.computeMaskedBitsForTargetNode(Op, Mask, KnownZero, KnownOne, *this); + TLI.computeMaskedBitsForTargetNode(Op, Mask, KnownZero, KnownOne, *this, + Depth); } return; } @@ -1993,7 +1962,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, /// information. For example, immediately after an "SRA X, 2", we know that /// the top 3 bits are all equal to each other, so we return 3. unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); assert(VT.isInteger() && "Invalid VT!"); unsigned VTBits = VT.getSizeInBits(); unsigned Tmp, Tmp2; @@ -2212,6 +2181,19 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ return std::max(FirstAnswer, std::min(VTBits, Mask.countLeadingZeros())); } +bool SelectionDAG::isKnownNeverNaN(SDValue Op) const { + // If we're told that NaNs won't happen, assume they won't. + if (FiniteOnlyFPMath()) + return true; + + // If the value is a constant, we can obviously see if it is a NaN or not. + if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op)) + return !C->getValueAPF().isNaN(); + + // TODO: Recognize more cases here. + + return false; +} bool SelectionDAG::isVerifiedDebugInfoDesc(SDValue Op) const { GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op); @@ -2228,7 +2210,7 @@ bool SelectionDAG::isVerifiedDebugInfoDesc(SDValue Op) const { /// element of the result of the vector shuffle. SDValue SelectionDAG::getShuffleScalarElt(const ShuffleVectorSDNode *N, unsigned i) { - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); DebugLoc dl = N->getDebugLoc(); if (N->getMaskElt(i) < 0) return getUNDEF(VT.getVectorElementType()); @@ -2239,7 +2221,7 @@ SDValue SelectionDAG::getShuffleScalarElt(const ShuffleVectorSDNode *N, if (V.getOpcode() == ISD::BIT_CONVERT) { V = V.getOperand(0); - MVT VVT = V.getValueType(); + EVT VVT = V.getValueType(); if (!VVT.isVector() || VVT.getVectorNumElements() != (unsigned)NumElems) return SDValue(); } @@ -2256,7 +2238,7 @@ SDValue SelectionDAG::getShuffleScalarElt(const ShuffleVectorSDNode *N, /// getNode - Gets or creates the specified node. /// -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT) { +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT) { FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, getVTList(VT), 0, 0); void *IP = 0; @@ -2274,7 +2256,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT) { } SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, - MVT VT, SDValue Operand) { + EVT VT, SDValue Operand) { // Constant fold unary operations with an integer constant operand. if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Operand.getNode())) { const APInt &Val = C->getAPIntValue(); @@ -2332,7 +2314,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, bool ignored; // This can return overflow, underflow, or inexact; we don't care. // FIXME need to be more flexible about rounding mode. - (void)V.convert(*MVTToAPFloatSemantics(VT), + (void)V.convert(*EVTToAPFloatSemantics(VT), APFloat::rmNearestTiesToEven, &ignored); return getConstantFP(V, VT); } @@ -2366,7 +2348,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, case ISD::MERGE_VALUES: case ISD::CONCAT_VECTORS: return Operand; // Factor, merge or concat of one node? No need. - case ISD::FP_ROUND: assert(0 && "Invalid method to make FP_ROUND node"); + case ISD::FP_ROUND: llvm_unreachable("Invalid method to make FP_ROUND node"); case ISD::FP_EXTEND: assert(VT.isFloatingPoint() && Operand.getValueType().isFloatingPoint() && "Invalid FP cast!"); @@ -2487,7 +2469,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, } SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, - MVT VT, + EVT VT, ConstantSDNode *Cst1, ConstantSDNode *Cst2) { const APInt &C1 = Cst1->getAPIntValue(), &C2 = Cst2->getAPIntValue(); @@ -2522,7 +2504,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, return SDValue(); } -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT, +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, SDValue N1, SDValue N2) { ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode()); @@ -2624,7 +2606,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT, return N1; break; case ISD::FP_ROUND_INREG: { - MVT EVT = cast<VTSDNode>(N2)->getVT(); + EVT EVT = cast<VTSDNode>(N2)->getVT(); assert(VT == N1.getValueType() && "Not an inreg round!"); assert(VT.isFloatingPoint() && EVT.isFloatingPoint() && "Cannot FP_ROUND_INREG integer types"); @@ -2641,7 +2623,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT, break; case ISD::AssertSext: case ISD::AssertZext: { - MVT EVT = cast<VTSDNode>(N2)->getVT(); + EVT EVT = cast<VTSDNode>(N2)->getVT(); assert(VT == N1.getValueType() && "Not an inreg extend!"); assert(VT.isInteger() && EVT.isInteger() && "Cannot *_EXTEND_INREG FP types"); @@ -2650,7 +2632,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT, break; } case ISD::SIGN_EXTEND_INREG: { - MVT EVT = cast<VTSDNode>(N2)->getVT(); + EVT EVT = cast<VTSDNode>(N2)->getVT(); assert(VT == N1.getValueType() && "Not an inreg extend!"); assert(VT.isInteger() && EVT.isInteger() && "Cannot *_EXTEND_INREG FP types"); @@ -2688,13 +2670,16 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT, // expanding large vector constants. if (N2C && N1.getOpcode() == ISD::BUILD_VECTOR) { SDValue Elt = N1.getOperand(N2C->getZExtValue()); - if (Elt.getValueType() != VT) { + EVT VEltTy = N1.getValueType().getVectorElementType(); + if (Elt.getValueType() != VEltTy) { // If the vector element type is not legal, the BUILD_VECTOR operands // are promoted and implicitly truncated. Make that explicit here. - assert(VT.isInteger() && Elt.getValueType().isInteger() && - VT.bitsLE(Elt.getValueType()) && - "Bad type for BUILD_VECTOR operand"); - Elt = getNode(ISD::TRUNCATE, DL, VT, Elt); + Elt = getNode(ISD::TRUNCATE, DL, VEltTy, Elt); + } + if (VT != VEltTy) { + // If the vector element type is not legal, the EXTRACT_VECTOR_ELT + // result is implicitly extended. + Elt = getNode(ISD::ANY_EXTEND, DL, VT, Elt); } return Elt; } @@ -2895,7 +2880,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT, return SDValue(N, 0); } -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT, +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, SDValue N1, SDValue N2, SDValue N3) { // Perform various simplifications. ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); @@ -2938,7 +2923,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT, } break; case ISD::VECTOR_SHUFFLE: - assert(0 && "should use getVectorShuffle constructor!"); + llvm_unreachable("should use getVectorShuffle constructor!"); break; case ISD::BIT_CONVERT: // Fold bit_convert nodes from a type to themselves. @@ -2971,23 +2956,46 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT, return SDValue(N, 0); } -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT, +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, SDValue N1, SDValue N2, SDValue N3, SDValue N4) { SDValue Ops[] = { N1, N2, N3, N4 }; return getNode(Opcode, DL, VT, Ops, 4); } -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT, +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, SDValue N1, SDValue N2, SDValue N3, SDValue N4, SDValue N5) { SDValue Ops[] = { N1, N2, N3, N4, N5 }; return getNode(Opcode, DL, VT, Ops, 5); } +/// getStackArgumentTokenFactor - Compute a TokenFactor to force all +/// the incoming stack arguments to be loaded from the stack. +SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) { + SmallVector<SDValue, 8> ArgChains; + + // Include the original chain at the beginning of the list. When this is + // used by target LowerCall hooks, this helps legalize find the + // CALLSEQ_BEGIN node. + ArgChains.push_back(Chain); + + // Add a chain value for each stack argument. + for (SDNode::use_iterator U = getEntryNode().getNode()->use_begin(), + UE = getEntryNode().getNode()->use_end(); U != UE; ++U) + if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U)) + if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr())) + if (FI->getIndex() < 0) + ArgChains.push_back(SDValue(L, 1)); + + // Build a tokenfactor for all the chains. + return getNode(ISD::TokenFactor, Chain.getDebugLoc(), MVT::Other, + &ArgChains[0], ArgChains.size()); +} + /// getMemsetValue - Vectorized representation of the memset value /// operand. -static SDValue getMemsetValue(SDValue Value, MVT VT, SelectionDAG &DAG, +static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG, DebugLoc dl) { unsigned NumBits = VT.isVector() ? VT.getVectorElementType().getSizeInBits() : VT.getSizeInBits(); @@ -3021,9 +3029,9 @@ static SDValue getMemsetValue(SDValue Value, MVT VT, SelectionDAG &DAG, /// getMemsetStringVal - Similar to getMemsetValue. Except this is only /// used when a memcpy is turned into a memset when the source is a constant /// string ptr. -static SDValue getMemsetStringVal(MVT VT, DebugLoc dl, SelectionDAG &DAG, - const TargetLowering &TLI, - std::string &Str, unsigned Offset) { +static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG, + const TargetLowering &TLI, + std::string &Str, unsigned Offset) { // Handle vector with all elements zero. if (Str.empty()) { if (VT.isInteger()) @@ -3031,7 +3039,8 @@ static SDValue getMemsetStringVal(MVT VT, DebugLoc dl, SelectionDAG &DAG, unsigned NumElts = VT.getVectorNumElements(); MVT EltVT = (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64; return DAG.getNode(ISD::BIT_CONVERT, dl, VT, - DAG.getConstant(0, MVT::getVectorVT(EltVT, NumElts))); + DAG.getConstant(0, + EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts))); } assert(!VT.isVector() && "Can't handle vector type here!"); @@ -3051,7 +3060,7 @@ static SDValue getMemsetStringVal(MVT VT, DebugLoc dl, SelectionDAG &DAG, /// static SDValue getMemBasePlusOffset(SDValue Base, unsigned Offset, SelectionDAG &DAG) { - MVT VT = Base.getValueType(); + EVT VT = Base.getValueType(); return DAG.getNode(ISD::ADD, Base.getDebugLoc(), VT, Base, DAG.getConstant(Offset, VT)); } @@ -3083,7 +3092,7 @@ static bool isMemSrcFromString(SDValue Src, std::string &Str) { /// to replace the memset / memcpy is below the threshold. It also returns the /// types of the sequence of memory ops to perform memset / memcpy. static -bool MeetsMaxMemopRequirement(std::vector<MVT> &MemOps, +bool MeetsMaxMemopRequirement(std::vector<EVT> &MemOps, SDValue Dst, SDValue Src, unsigned Limit, uint64_t Size, unsigned &Align, std::string &Str, bool &isSrcStr, @@ -3091,11 +3100,11 @@ bool MeetsMaxMemopRequirement(std::vector<MVT> &MemOps, const TargetLowering &TLI) { isSrcStr = isMemSrcFromString(Src, Str); bool isSrcConst = isa<ConstantSDNode>(Src); - bool AllowUnalign = TLI.allowsUnalignedMemoryAccesses(); - MVT VT = TLI.getOptimalMemOpType(Size, Align, isSrcConst, isSrcStr, DAG); + EVT VT = TLI.getOptimalMemOpType(Size, Align, isSrcConst, isSrcStr, DAG); + bool AllowUnalign = TLI.allowsUnalignedMemoryAccesses(VT); if (VT != MVT::iAny) { - unsigned NewAlign = (unsigned) - TLI.getTargetData()->getABITypeAlignment(VT.getTypeForMVT()); + const Type *Ty = VT.getTypeForEVT(*DAG.getContext()); + unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty); // If source is a string constant, this will require an unaligned load. if (NewAlign > Align && (isSrcConst || AllowUnalign)) { if (Dst.getOpcode() != ISD::FrameIndex) { @@ -3120,7 +3129,7 @@ bool MeetsMaxMemopRequirement(std::vector<MVT> &MemOps, } if (VT == MVT::iAny) { - if (AllowUnalign) { + if (TLI.allowsUnalignedMemoryAccesses(MVT::i64)) { VT = MVT::i64; } else { switch (Align & 7) { @@ -3133,7 +3142,7 @@ bool MeetsMaxMemopRequirement(std::vector<MVT> &MemOps, MVT LVT = MVT::i64; while (!TLI.isTypeLegal(LVT)) - LVT = (MVT::SimpleValueType)(LVT.getSimpleVT() - 1); + LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1); assert(LVT.isInteger()); if (VT.bitsGT(LVT)) @@ -3148,12 +3157,12 @@ bool MeetsMaxMemopRequirement(std::vector<MVT> &MemOps, if (VT.isVector()) { VT = MVT::i64; while (!TLI.isTypeLegal(VT)) - VT = (MVT::SimpleValueType)(VT.getSimpleVT() - 1); + VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1); VTSize = VT.getSizeInBits() / 8; } else { // This can result in a type that is not legal on the target, e.g. // 1 or 2 bytes on PPC. - VT = (MVT::SimpleValueType)(VT.getSimpleVT() - 1); + VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1); VTSize >>= 1; } } @@ -3177,7 +3186,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, // Expand memcpy to a series of load and store ops if the size operand falls // below a certain threshold. - std::vector<MVT> MemOps; + std::vector<EVT> MemOps; uint64_t Limit = -1ULL; if (!AlwaysInline) Limit = TLI.getMaxStoresPerMemcpy(); @@ -3193,8 +3202,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, SmallVector<SDValue, 8> OutChains; unsigned NumMemOps = MemOps.size(); uint64_t SrcOff = 0, DstOff = 0; - for (unsigned i = 0; i < NumMemOps; i++) { - MVT VT = MemOps[i]; + for (unsigned i = 0; i != NumMemOps; ++i) { + EVT VT = MemOps[i]; unsigned VTSize = VT.getSizeInBits() / 8; SDValue Value, Store; @@ -3214,7 +3223,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, // thing to do is generate a LoadExt/StoreTrunc pair. These simplify // to Load/Store if NVT==VT. // FIXME does the case above also need this? - MVT NVT = TLI.getTypeToTransformTo(VT); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); assert(NVT.bitsGE(VT)); Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain, getMemBasePlusOffset(Src, SrcOff, DAG), @@ -3242,7 +3251,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, // Expand memmove to a series of load and store ops if the size operand falls // below a certain threshold. - std::vector<MVT> MemOps; + std::vector<EVT> MemOps; uint64_t Limit = -1ULL; if (!AlwaysInline) Limit = TLI.getMaxStoresPerMemmove(); @@ -3260,7 +3269,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, SmallVector<SDValue, 8> OutChains; unsigned NumMemOps = MemOps.size(); for (unsigned i = 0; i < NumMemOps; i++) { - MVT VT = MemOps[i]; + EVT VT = MemOps[i]; unsigned VTSize = VT.getSizeInBits() / 8; SDValue Value, Store; @@ -3275,7 +3284,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, &LoadChains[0], LoadChains.size()); OutChains.clear(); for (unsigned i = 0; i < NumMemOps; i++) { - MVT VT = MemOps[i]; + EVT VT = MemOps[i]; unsigned VTSize = VT.getSizeInBits() / 8; SDValue Value, Store; @@ -3299,7 +3308,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl, // Expand memset to a series of load/store ops if the size operand // falls below a certain threshold. - std::vector<MVT> MemOps; + std::vector<EVT> MemOps; std::string Str; bool CopyFromStr; if (!MeetsMaxMemopRequirement(MemOps, Dst, Src, TLI.getMaxStoresPerMemset(), @@ -3311,7 +3320,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl, unsigned NumMemOps = MemOps.size(); for (unsigned i = 0; i < NumMemOps; i++) { - MVT VT = MemOps[i]; + EVT VT = MemOps[i]; unsigned VTSize = VT.getSizeInBits() / 8; SDValue Value = getMemsetValue(Src, VT, DAG, dl); SDValue Store = DAG.getStore(Chain, dl, Value, @@ -3368,15 +3377,18 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst, // Emit a library call. TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; - Entry.Ty = TLI.getTargetData()->getIntPtrType(); + Entry.Ty = TLI.getTargetData()->getIntPtrType(*getContext()); Entry.Node = Dst; Args.push_back(Entry); Entry.Node = Src; Args.push_back(Entry); Entry.Node = Size; Args.push_back(Entry); // FIXME: pass in DebugLoc std::pair<SDValue,SDValue> CallResult = - TLI.LowerCallTo(Chain, Type::VoidTy, - false, false, false, false, 0, CallingConv::C, false, - getExternalSymbol("memcpy", TLI.getPointerTy()), + TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()), + false, false, false, false, 0, + TLI.getLibcallCallingConv(RTLIB::MEMCPY), false, + /*isReturnValueUsed=*/false, + getExternalSymbol(TLI.getLibcallName(RTLIB::MEMCPY), + TLI.getPointerTy()), Args, *this, dl); return CallResult.second; } @@ -3414,15 +3426,18 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst, // Emit a library call. TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; - Entry.Ty = TLI.getTargetData()->getIntPtrType(); + Entry.Ty = TLI.getTargetData()->getIntPtrType(*getContext()); Entry.Node = Dst; Args.push_back(Entry); Entry.Node = Src; Args.push_back(Entry); Entry.Node = Size; Args.push_back(Entry); // FIXME: pass in DebugLoc std::pair<SDValue,SDValue> CallResult = - TLI.LowerCallTo(Chain, Type::VoidTy, - false, false, false, false, 0, CallingConv::C, false, - getExternalSymbol("memmove", TLI.getPointerTy()), + TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()), + false, false, false, false, 0, + TLI.getLibcallCallingConv(RTLIB::MEMMOVE), false, + /*isReturnValueUsed=*/false, + getExternalSymbol(TLI.getLibcallName(RTLIB::MEMMOVE), + TLI.getPointerTy()), Args, *this, dl); return CallResult.second; } @@ -3456,7 +3471,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst, return Result; // Emit a library call. - const Type *IntPtrTy = TLI.getTargetData()->getIntPtrType(); + const Type *IntPtrTy = TLI.getTargetData()->getIntPtrType(*getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Node = Dst; Entry.Ty = IntPtrTy; @@ -3466,31 +3481,61 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst, Src = getNode(ISD::TRUNCATE, dl, MVT::i32, Src); else Src = getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src); - Entry.Node = Src; Entry.Ty = Type::Int32Ty; Entry.isSExt = true; + Entry.Node = Src; + Entry.Ty = Type::getInt32Ty(*getContext()); + Entry.isSExt = true; Args.push_back(Entry); - Entry.Node = Size; Entry.Ty = IntPtrTy; Entry.isSExt = false; + Entry.Node = Size; + Entry.Ty = IntPtrTy; + Entry.isSExt = false; Args.push_back(Entry); // FIXME: pass in DebugLoc std::pair<SDValue,SDValue> CallResult = - TLI.LowerCallTo(Chain, Type::VoidTy, - false, false, false, false, 0, CallingConv::C, false, - getExternalSymbol("memset", TLI.getPointerTy()), + TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()), + false, false, false, false, 0, + TLI.getLibcallCallingConv(RTLIB::MEMSET), false, + /*isReturnValueUsed=*/false, + getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET), + TLI.getPointerTy()), Args, *this, dl); return CallResult.second; } -SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, MVT MemVT, +SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Cmp, SDValue Swp, const Value* PtrVal, unsigned Alignment) { + if (Alignment == 0) // Ensure that codegen never sees alignment 0 + Alignment = getEVTAlignment(MemVT); + + // Check if the memory reference references a frame index + if (!PtrVal) + if (const FrameIndexSDNode *FI = + dyn_cast<const FrameIndexSDNode>(Ptr.getNode())) + PtrVal = PseudoSourceValue::getFixedStack(FI->getIndex()); + + MachineFunction &MF = getMachineFunction(); + unsigned Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; + + // For now, atomics are considered to be volatile always. + Flags |= MachineMemOperand::MOVolatile; + + MachineMemOperand *MMO = + MF.getMachineMemOperand(PtrVal, Flags, 0, + MemVT.getStoreSize(), Alignment); + + return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Cmp, Swp, MMO); +} + +SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, + SDValue Chain, + SDValue Ptr, SDValue Cmp, + SDValue Swp, MachineMemOperand *MMO) { assert(Opcode == ISD::ATOMIC_CMP_SWAP && "Invalid Atomic Op"); assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types"); - MVT VT = Cmp.getValueType(); - - if (Alignment == 0) // Ensure that codegen never sees alignment 0 - Alignment = getMVTAlignment(MemVT); + EVT VT = Cmp.getValueType(); SDVTList VTs = getVTList(VT, MVT::Other); FoldingSetNodeID ID; @@ -3498,21 +3543,48 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, MVT MemVT, SDValue Ops[] = {Chain, Ptr, Cmp, Swp}; AddNodeIDNode(ID, Opcode, VTs, Ops, 4); void* IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + cast<AtomicSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); + } SDNode* N = NodeAllocator.Allocate<AtomicSDNode>(); - new (N) AtomicSDNode(Opcode, dl, VTs, MemVT, - Chain, Ptr, Cmp, Swp, PtrVal, Alignment); + new (N) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain, Ptr, Cmp, Swp, MMO); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); } -SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, MVT MemVT, +SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, const Value* PtrVal, unsigned Alignment) { + if (Alignment == 0) // Ensure that codegen never sees alignment 0 + Alignment = getEVTAlignment(MemVT); + + // Check if the memory reference references a frame index + if (!PtrVal) + if (const FrameIndexSDNode *FI = + dyn_cast<const FrameIndexSDNode>(Ptr.getNode())) + PtrVal = PseudoSourceValue::getFixedStack(FI->getIndex()); + + MachineFunction &MF = getMachineFunction(); + unsigned Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; + + // For now, atomics are considered to be volatile always. + Flags |= MachineMemOperand::MOVolatile; + + MachineMemOperand *MMO = + MF.getMachineMemOperand(PtrVal, Flags, 0, + MemVT.getStoreSize(), Alignment); + + return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Val, MMO); +} + +SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, + SDValue Chain, + SDValue Ptr, SDValue Val, + MachineMemOperand *MMO) { assert((Opcode == ISD::ATOMIC_LOAD_ADD || Opcode == ISD::ATOMIC_LOAD_SUB || Opcode == ISD::ATOMIC_LOAD_AND || @@ -3526,10 +3598,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, MVT MemVT, Opcode == ISD::ATOMIC_SWAP) && "Invalid Atomic Op"); - MVT VT = Val.getValueType(); - - if (Alignment == 0) // Ensure that codegen never sees alignment 0 - Alignment = getMVTAlignment(MemVT); + EVT VT = Val.getValueType(); SDVTList VTs = getVTList(VT, MVT::Other); FoldingSetNodeID ID; @@ -3537,11 +3606,12 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, MVT MemVT, SDValue Ops[] = {Chain, Ptr, Val}; AddNodeIDNode(ID, Opcode, VTs, Ops, 3); void* IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + cast<AtomicSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); + } SDNode* N = NodeAllocator.Allocate<AtomicSDNode>(); - new (N) AtomicSDNode(Opcode, dl, VTs, MemVT, - Chain, Ptr, Val, PtrVal, Alignment); + new (N) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain, Ptr, Val, MMO); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); @@ -3554,7 +3624,7 @@ SDValue SelectionDAG::getMergeValues(const SDValue *Ops, unsigned NumOps, if (NumOps == 1) return Ops[0]; - SmallVector<MVT, 4> VTs; + SmallVector<EVT, 4> VTs; VTs.reserve(NumOps); for (unsigned i = 0; i < NumOps; ++i) VTs.push_back(Ops[i].getValueType()); @@ -3564,9 +3634,9 @@ SDValue SelectionDAG::getMergeValues(const SDValue *Ops, unsigned NumOps, SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, - const MVT *VTs, unsigned NumVTs, + const EVT *VTs, unsigned NumVTs, const SDValue *Ops, unsigned NumOps, - MVT MemVT, const Value *srcValue, int SVOff, + EVT MemVT, const Value *srcValue, int SVOff, unsigned Align, bool Vol, bool ReadMem, bool WriteMem) { return getMemIntrinsicNode(Opcode, dl, makeVTList(VTs, NumVTs), Ops, NumOps, @@ -3577,81 +3647,104 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList, const SDValue *Ops, unsigned NumOps, - MVT MemVT, const Value *srcValue, int SVOff, + EVT MemVT, const Value *srcValue, int SVOff, unsigned Align, bool Vol, bool ReadMem, bool WriteMem) { + if (Align == 0) // Ensure that codegen never sees alignment 0 + Align = getEVTAlignment(MemVT); + + MachineFunction &MF = getMachineFunction(); + unsigned Flags = 0; + if (WriteMem) + Flags |= MachineMemOperand::MOStore; + if (ReadMem) + Flags |= MachineMemOperand::MOLoad; + if (Vol) + Flags |= MachineMemOperand::MOVolatile; + MachineMemOperand *MMO = + MF.getMachineMemOperand(srcValue, Flags, SVOff, + MemVT.getStoreSize(), Align); + + return getMemIntrinsicNode(Opcode, dl, VTList, Ops, NumOps, MemVT, MMO); +} + +SDValue +SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList, + const SDValue *Ops, unsigned NumOps, + EVT MemVT, MachineMemOperand *MMO) { + assert((Opcode == ISD::INTRINSIC_VOID || + Opcode == ISD::INTRINSIC_W_CHAIN || + (Opcode <= INT_MAX && + (int)Opcode >= ISD::FIRST_TARGET_MEMORY_OPCODE)) && + "Opcode is not a memory-accessing opcode!"); + // Memoize the node unless it returns a flag. MemIntrinsicSDNode *N; if (VTList.VTs[VTList.NumVTs-1] != MVT::Flag) { FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + cast<MemIntrinsicSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); + } N = NodeAllocator.Allocate<MemIntrinsicSDNode>(); - new (N) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps, MemVT, - srcValue, SVOff, Align, Vol, ReadMem, WriteMem); + new (N) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps, MemVT, MMO); CSEMap.InsertNode(N, IP); } else { N = NodeAllocator.Allocate<MemIntrinsicSDNode>(); - new (N) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps, MemVT, - srcValue, SVOff, Align, Vol, ReadMem, WriteMem); - } - AllNodes.push_back(N); - return SDValue(N, 0); -} - -SDValue -SelectionDAG::getCall(unsigned CallingConv, DebugLoc dl, bool IsVarArgs, - bool IsTailCall, bool IsInreg, SDVTList VTs, - const SDValue *Operands, unsigned NumOperands, - unsigned NumFixedArgs) { - // Do not include isTailCall in the folding set profile. - FoldingSetNodeID ID; - AddNodeIDNode(ID, ISD::CALL, VTs, Operands, NumOperands); - ID.AddInteger(CallingConv); - ID.AddInteger(IsVarArgs); - void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { - // Instead of including isTailCall in the folding set, we just - // set the flag of the existing node. - if (!IsTailCall) - cast<CallSDNode>(E)->setNotTailCall(); - return SDValue(E, 0); + new (N) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps, MemVT, MMO); } - SDNode *N = NodeAllocator.Allocate<CallSDNode>(); - new (N) CallSDNode(CallingConv, dl, IsVarArgs, IsTailCall, IsInreg, - VTs, Operands, NumOperands, NumFixedArgs); - CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); } SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl, - ISD::LoadExtType ExtType, MVT VT, SDValue Chain, + ISD::LoadExtType ExtType, EVT VT, SDValue Chain, SDValue Ptr, SDValue Offset, - const Value *SV, int SVOffset, MVT EVT, + const Value *SV, int SVOffset, EVT MemVT, bool isVolatile, unsigned Alignment) { if (Alignment == 0) // Ensure that codegen never sees alignment 0 - Alignment = getMVTAlignment(VT); + Alignment = getEVTAlignment(VT); + + // Check if the memory reference references a frame index + if (!SV) + if (const FrameIndexSDNode *FI = + dyn_cast<const FrameIndexSDNode>(Ptr.getNode())) + SV = PseudoSourceValue::getFixedStack(FI->getIndex()); + + MachineFunction &MF = getMachineFunction(); + unsigned Flags = MachineMemOperand::MOLoad; + if (isVolatile) + Flags |= MachineMemOperand::MOVolatile; + MachineMemOperand *MMO = + MF.getMachineMemOperand(SV, Flags, SVOffset, + MemVT.getStoreSize(), Alignment); + return getLoad(AM, dl, ExtType, VT, Chain, Ptr, Offset, MemVT, MMO); +} - if (VT == EVT) { +SDValue +SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl, + ISD::LoadExtType ExtType, EVT VT, SDValue Chain, + SDValue Ptr, SDValue Offset, EVT MemVT, + MachineMemOperand *MMO) { + if (VT == MemVT) { ExtType = ISD::NON_EXTLOAD; } else if (ExtType == ISD::NON_EXTLOAD) { - assert(VT == EVT && "Non-extending load from different memory type!"); + assert(VT == MemVT && "Non-extending load from different memory type!"); } else { // Extending load. if (VT.isVector()) - assert(EVT.getVectorNumElements() == VT.getVectorNumElements() && + assert(MemVT.getVectorNumElements() == VT.getVectorNumElements() && "Invalid vector extload!"); else - assert(EVT.bitsLT(VT) && + assert(MemVT.bitsLT(VT) && "Should only be an extending load, not truncating!"); assert((ExtType == ISD::EXTLOAD || VT.isInteger()) && "Cannot sign/zero extend a FP/Vector load!"); - assert(VT.isInteger() == EVT.isInteger() && + assert(VT.isInteger() == MemVT.isInteger() && "Cannot convert from FP to Int or Int -> FP!"); } @@ -3664,20 +3757,21 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl, SDValue Ops[] = { Chain, Ptr, Offset }; FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::LOAD, VTs, Ops, 3); - ID.AddInteger(EVT.getRawBits()); - ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, isVolatile, Alignment)); + ID.AddInteger(MemVT.getRawBits()); + ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, MMO->isVolatile())); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + cast<LoadSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); + } SDNode *N = NodeAllocator.Allocate<LoadSDNode>(); - new (N) LoadSDNode(Ops, dl, VTs, AM, ExtType, EVT, SV, SVOffset, - Alignment, isVolatile); + new (N) LoadSDNode(Ops, dl, VTs, AM, ExtType, MemVT, MMO); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); } -SDValue SelectionDAG::getLoad(MVT VT, DebugLoc dl, +SDValue SelectionDAG::getLoad(EVT VT, DebugLoc dl, SDValue Chain, SDValue Ptr, const Value *SV, int SVOffset, bool isVolatile, unsigned Alignment) { @@ -3686,14 +3780,14 @@ SDValue SelectionDAG::getLoad(MVT VT, DebugLoc dl, SV, SVOffset, VT, isVolatile, Alignment); } -SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, MVT VT, +SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, EVT VT, SDValue Chain, SDValue Ptr, const Value *SV, - int SVOffset, MVT EVT, + int SVOffset, EVT MemVT, bool isVolatile, unsigned Alignment) { SDValue Undef = getUNDEF(Ptr.getValueType()); return getLoad(ISD::UNINDEXED, dl, ExtType, VT, Chain, Ptr, Undef, - SV, SVOffset, EVT, isVolatile, Alignment); + SV, SVOffset, MemVT, isVolatile, Alignment); } SDValue @@ -3711,25 +3805,43 @@ SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base, SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, SDValue Ptr, const Value *SV, int SVOffset, bool isVolatile, unsigned Alignment) { - MVT VT = Val.getValueType(); - if (Alignment == 0) // Ensure that codegen never sees alignment 0 - Alignment = getMVTAlignment(VT); + Alignment = getEVTAlignment(Val.getValueType()); + // Check if the memory reference references a frame index + if (!SV) + if (const FrameIndexSDNode *FI = + dyn_cast<const FrameIndexSDNode>(Ptr.getNode())) + SV = PseudoSourceValue::getFixedStack(FI->getIndex()); + + MachineFunction &MF = getMachineFunction(); + unsigned Flags = MachineMemOperand::MOStore; + if (isVolatile) + Flags |= MachineMemOperand::MOVolatile; + MachineMemOperand *MMO = + MF.getMachineMemOperand(SV, Flags, SVOffset, + Val.getValueType().getStoreSize(), Alignment); + + return getStore(Chain, dl, Val, Ptr, MMO); +} + +SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, + SDValue Ptr, MachineMemOperand *MMO) { + EVT VT = Val.getValueType(); SDVTList VTs = getVTList(MVT::Other); SDValue Undef = getUNDEF(Ptr.getValueType()); SDValue Ops[] = { Chain, Val, Ptr, Undef }; FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4); ID.AddInteger(VT.getRawBits()); - ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, - isVolatile, Alignment)); + ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile())); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + cast<StoreSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); + } SDNode *N = NodeAllocator.Allocate<StoreSDNode>(); - new (N) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, false, - VT, SV, SVOffset, Alignment, isVolatile); + new (N) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, false, VT, MMO); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); @@ -3737,19 +3849,39 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, SDValue Ptr, const Value *SV, - int SVOffset, MVT SVT, + int SVOffset, EVT SVT, bool isVolatile, unsigned Alignment) { - MVT VT = Val.getValueType(); + if (Alignment == 0) // Ensure that codegen never sees alignment 0 + Alignment = getEVTAlignment(SVT); + + // Check if the memory reference references a frame index + if (!SV) + if (const FrameIndexSDNode *FI = + dyn_cast<const FrameIndexSDNode>(Ptr.getNode())) + SV = PseudoSourceValue::getFixedStack(FI->getIndex()); + + MachineFunction &MF = getMachineFunction(); + unsigned Flags = MachineMemOperand::MOStore; + if (isVolatile) + Flags |= MachineMemOperand::MOVolatile; + MachineMemOperand *MMO = + MF.getMachineMemOperand(SV, Flags, SVOffset, SVT.getStoreSize(), Alignment); + + return getTruncStore(Chain, dl, Val, Ptr, SVT, MMO); +} + +SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, + SDValue Ptr, EVT SVT, + MachineMemOperand *MMO) { + EVT VT = Val.getValueType(); if (VT == SVT) - return getStore(Chain, dl, Val, Ptr, SV, SVOffset, isVolatile, Alignment); + return getStore(Chain, dl, Val, Ptr, MMO); assert(VT.bitsGT(SVT) && "Not a truncation?"); assert(VT.isInteger() == SVT.isInteger() && "Can't do FP-INT conversion!"); - if (Alignment == 0) // Ensure that codegen never sees alignment 0 - Alignment = getMVTAlignment(VT); SDVTList VTs = getVTList(MVT::Other); SDValue Undef = getUNDEF(Ptr.getValueType()); @@ -3757,14 +3889,14 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4); ID.AddInteger(SVT.getRawBits()); - ID.AddInteger(encodeMemSDNodeFlags(true, ISD::UNINDEXED, - isVolatile, Alignment)); + ID.AddInteger(encodeMemSDNodeFlags(true, ISD::UNINDEXED, MMO->isVolatile())); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + cast<StoreSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); + } SDNode *N = NodeAllocator.Allocate<StoreSDNode>(); - new (N) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, true, - SVT, SV, SVOffset, Alignment, isVolatile); + new (N) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, true, SVT, MMO); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); @@ -3788,21 +3920,20 @@ SelectionDAG::getIndexedStore(SDValue OrigStore, DebugLoc dl, SDValue Base, SDNode *N = NodeAllocator.Allocate<StoreSDNode>(); new (N) StoreSDNode(Ops, dl, VTs, AM, ST->isTruncatingStore(), ST->getMemoryVT(), - ST->getSrcValue(), ST->getSrcValueOffset(), - ST->getAlignment(), ST->isVolatile()); + ST->getMemOperand()); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); } -SDValue SelectionDAG::getVAArg(MVT VT, DebugLoc dl, +SDValue SelectionDAG::getVAArg(EVT VT, DebugLoc dl, SDValue Chain, SDValue Ptr, SDValue SV) { SDValue Ops[] = { Chain, Ptr, SV }; return getNode(ISD::VAARG, dl, getVTList(VT, MVT::Other), Ops, 3); } -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT, +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, const SDUse *Ops, unsigned NumOps) { switch (NumOps) { case 0: return getNode(Opcode, DL, VT); @@ -3818,7 +3949,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT, return getNode(Opcode, DL, VT, &NewOps[0], NumOps); } -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT, +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, const SDValue *Ops, unsigned NumOps) { switch (NumOps) { case 0: return getNode(Opcode, DL, VT); @@ -3876,14 +4007,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT, } SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, - const std::vector<MVT> &ResultTys, + const std::vector<EVT> &ResultTys, const SDValue *Ops, unsigned NumOps) { return getNode(Opcode, DL, getVTList(&ResultTys[0], ResultTys.size()), Ops, NumOps); } SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, - const MVT *VTs, unsigned NumVTs, + const EVT *VTs, unsigned NumVTs, const SDValue *Ops, unsigned NumOps) { if (NumVTs == 1) return getNode(Opcode, DL, VTs[0], Ops, NumOps); @@ -3895,11 +4026,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, if (VTList.NumVTs == 1) return getNode(Opcode, DL, VTList.VTs[0], Ops, NumOps); +#if 0 switch (Opcode) { // FIXME: figure out how to safely handle things like // int foo(int x) { return 1 << (x & 255); } // int bar() { return foo(256); } -#if 0 case ISD::SRA_PARTS: case ISD::SRL_PARTS: case ISD::SHL_PARTS: @@ -3915,8 +4046,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0)); } break; -#endif } +#endif // Memoize the node unless it returns a flag. SDNode *N; @@ -3998,17 +4129,17 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, return getNode(Opcode, DL, VTList, Ops, 5); } -SDVTList SelectionDAG::getVTList(MVT VT) { +SDVTList SelectionDAG::getVTList(EVT VT) { return makeVTList(SDNode::getValueTypeList(VT), 1); } -SDVTList SelectionDAG::getVTList(MVT VT1, MVT VT2) { +SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2) { for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(), E = VTList.rend(); I != E; ++I) if (I->NumVTs == 2 && I->VTs[0] == VT1 && I->VTs[1] == VT2) return *I; - MVT *Array = Allocator.Allocate<MVT>(2); + EVT *Array = Allocator.Allocate<EVT>(2); Array[0] = VT1; Array[1] = VT2; SDVTList Result = makeVTList(Array, 2); @@ -4016,14 +4147,14 @@ SDVTList SelectionDAG::getVTList(MVT VT1, MVT VT2) { return Result; } -SDVTList SelectionDAG::getVTList(MVT VT1, MVT VT2, MVT VT3) { +SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3) { for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(), E = VTList.rend(); I != E; ++I) if (I->NumVTs == 3 && I->VTs[0] == VT1 && I->VTs[1] == VT2 && I->VTs[2] == VT3) return *I; - MVT *Array = Allocator.Allocate<MVT>(3); + EVT *Array = Allocator.Allocate<EVT>(3); Array[0] = VT1; Array[1] = VT2; Array[2] = VT3; @@ -4032,14 +4163,14 @@ SDVTList SelectionDAG::getVTList(MVT VT1, MVT VT2, MVT VT3) { return Result; } -SDVTList SelectionDAG::getVTList(MVT VT1, MVT VT2, MVT VT3, MVT VT4) { +SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3, EVT VT4) { for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(), E = VTList.rend(); I != E; ++I) if (I->NumVTs == 4 && I->VTs[0] == VT1 && I->VTs[1] == VT2 && I->VTs[2] == VT3 && I->VTs[3] == VT4) return *I; - MVT *Array = Allocator.Allocate<MVT>(3); + EVT *Array = Allocator.Allocate<EVT>(3); Array[0] = VT1; Array[1] = VT2; Array[2] = VT3; @@ -4049,9 +4180,9 @@ SDVTList SelectionDAG::getVTList(MVT VT1, MVT VT2, MVT VT3, MVT VT4) { return Result; } -SDVTList SelectionDAG::getVTList(const MVT *VTs, unsigned NumVTs) { +SDVTList SelectionDAG::getVTList(const EVT *VTs, unsigned NumVTs) { switch (NumVTs) { - case 0: assert(0 && "Cannot have nodes without results!"); + case 0: llvm_unreachable("Cannot have nodes without results!"); case 1: return getVTList(VTs[0]); case 2: return getVTList(VTs[0], VTs[1]); case 3: return getVTList(VTs[0], VTs[1], VTs[2]); @@ -4073,7 +4204,7 @@ SDVTList SelectionDAG::getVTList(const MVT *VTs, unsigned NumVTs) { return *I; } - MVT *Array = Allocator.Allocate<MVT>(NumVTs); + EVT *Array = Allocator.Allocate<EVT>(NumVTs); std::copy(VTs, VTs+NumVTs, Array); SDVTList Result = makeVTList(Array, NumVTs); VTList.push_back(Result); @@ -4215,20 +4346,20 @@ void SDNode::DropOperands() { /// machine opcode. /// SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, - MVT VT) { + EVT VT) { SDVTList VTs = getVTList(VT); return SelectNodeTo(N, MachineOpc, VTs, 0, 0); } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, - MVT VT, SDValue Op1) { + EVT VT, SDValue Op1) { SDVTList VTs = getVTList(VT); SDValue Ops[] = { Op1 }; return SelectNodeTo(N, MachineOpc, VTs, Ops, 1); } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, - MVT VT, SDValue Op1, + EVT VT, SDValue Op1, SDValue Op2) { SDVTList VTs = getVTList(VT); SDValue Ops[] = { Op1, Op2 }; @@ -4236,7 +4367,7 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, - MVT VT, SDValue Op1, + EVT VT, SDValue Op1, SDValue Op2, SDValue Op3) { SDVTList VTs = getVTList(VT); SDValue Ops[] = { Op1, Op2, Op3 }; @@ -4244,41 +4375,41 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, - MVT VT, const SDValue *Ops, + EVT VT, const SDValue *Ops, unsigned NumOps) { SDVTList VTs = getVTList(VT); return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps); } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, - MVT VT1, MVT VT2, const SDValue *Ops, + EVT VT1, EVT VT2, const SDValue *Ops, unsigned NumOps) { SDVTList VTs = getVTList(VT1, VT2); return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps); } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, - MVT VT1, MVT VT2) { + EVT VT1, EVT VT2) { SDVTList VTs = getVTList(VT1, VT2); return SelectNodeTo(N, MachineOpc, VTs, (SDValue *)0, 0); } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, - MVT VT1, MVT VT2, MVT VT3, + EVT VT1, EVT VT2, EVT VT3, const SDValue *Ops, unsigned NumOps) { SDVTList VTs = getVTList(VT1, VT2, VT3); return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps); } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, - MVT VT1, MVT VT2, MVT VT3, MVT VT4, + EVT VT1, EVT VT2, EVT VT3, EVT VT4, const SDValue *Ops, unsigned NumOps) { SDVTList VTs = getVTList(VT1, VT2, VT3, VT4); return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps); } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, - MVT VT1, MVT VT2, + EVT VT1, EVT VT2, SDValue Op1) { SDVTList VTs = getVTList(VT1, VT2); SDValue Ops[] = { Op1 }; @@ -4286,7 +4417,7 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, - MVT VT1, MVT VT2, + EVT VT1, EVT VT2, SDValue Op1, SDValue Op2) { SDVTList VTs = getVTList(VT1, VT2); SDValue Ops[] = { Op1, Op2 }; @@ -4294,7 +4425,7 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, - MVT VT1, MVT VT2, + EVT VT1, EVT VT2, SDValue Op1, SDValue Op2, SDValue Op3) { SDVTList VTs = getVTList(VT1, VT2); @@ -4303,7 +4434,7 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, - MVT VT1, MVT VT2, MVT VT3, + EVT VT1, EVT VT2, EVT VT3, SDValue Op1, SDValue Op2, SDValue Op3) { SDVTList VTs = getVTList(VT1, VT2, VT3); @@ -4318,20 +4449,20 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, } SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, - MVT VT) { + EVT VT) { SDVTList VTs = getVTList(VT); return MorphNodeTo(N, Opc, VTs, 0, 0); } SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, - MVT VT, SDValue Op1) { + EVT VT, SDValue Op1) { SDVTList VTs = getVTList(VT); SDValue Ops[] = { Op1 }; return MorphNodeTo(N, Opc, VTs, Ops, 1); } SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, - MVT VT, SDValue Op1, + EVT VT, SDValue Op1, SDValue Op2) { SDVTList VTs = getVTList(VT); SDValue Ops[] = { Op1, Op2 }; @@ -4339,7 +4470,7 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, } SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, - MVT VT, SDValue Op1, + EVT VT, SDValue Op1, SDValue Op2, SDValue Op3) { SDVTList VTs = getVTList(VT); SDValue Ops[] = { Op1, Op2, Op3 }; @@ -4347,34 +4478,34 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, } SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, - MVT VT, const SDValue *Ops, + EVT VT, const SDValue *Ops, unsigned NumOps) { SDVTList VTs = getVTList(VT); return MorphNodeTo(N, Opc, VTs, Ops, NumOps); } SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, - MVT VT1, MVT VT2, const SDValue *Ops, + EVT VT1, EVT VT2, const SDValue *Ops, unsigned NumOps) { SDVTList VTs = getVTList(VT1, VT2); return MorphNodeTo(N, Opc, VTs, Ops, NumOps); } SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, - MVT VT1, MVT VT2) { + EVT VT1, EVT VT2) { SDVTList VTs = getVTList(VT1, VT2); return MorphNodeTo(N, Opc, VTs, (SDValue *)0, 0); } SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, - MVT VT1, MVT VT2, MVT VT3, + EVT VT1, EVT VT2, EVT VT3, const SDValue *Ops, unsigned NumOps) { SDVTList VTs = getVTList(VT1, VT2, VT3); return MorphNodeTo(N, Opc, VTs, Ops, NumOps); } SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, - MVT VT1, MVT VT2, + EVT VT1, EVT VT2, SDValue Op1) { SDVTList VTs = getVTList(VT1, VT2); SDValue Ops[] = { Op1 }; @@ -4382,7 +4513,7 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, } SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, - MVT VT1, MVT VT2, + EVT VT1, EVT VT2, SDValue Op1, SDValue Op2) { SDVTList VTs = getVTList(VT1, VT2); SDValue Ops[] = { Op1, Op2 }; @@ -4390,7 +4521,7 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, } SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, - MVT VT1, MVT VT2, + EVT VT1, EVT VT2, SDValue Op1, SDValue Op2, SDValue Op3) { SDVTList VTs = getVTList(VT1, VT2); @@ -4441,29 +4572,35 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, DeadNodeSet.insert(Used); } - // If NumOps is larger than the # of operands we currently have, reallocate - // the operand list. - if (NumOps > N->NumOperands) { - if (N->OperandsNeedDelete) - delete[] N->OperandList; - - if (N->isMachineOpcode()) { - // We're creating a final node that will live unmorphed for the - // remainder of the current SelectionDAG iteration, so we can allocate - // the operands directly out of a pool with no recycling metadata. - N->OperandList = OperandAllocator.Allocate<SDUse>(NumOps); - N->OperandsNeedDelete = false; - } else { - N->OperandList = new SDUse[NumOps]; + if (MachineSDNode *MN = dyn_cast<MachineSDNode>(N)) { + // Initialize the memory references information. + MN->setMemRefs(0, 0); + // If NumOps is larger than the # of operands we can have in a + // MachineSDNode, reallocate the operand list. + if (NumOps > MN->NumOperands || !MN->OperandsNeedDelete) { + if (MN->OperandsNeedDelete) + delete[] MN->OperandList; + if (NumOps > array_lengthof(MN->LocalOperands)) + // We're creating a final node that will live unmorphed for the + // remainder of the current SelectionDAG iteration, so we can allocate + // the operands directly out of a pool with no recycling metadata. + MN->InitOperands(OperandAllocator.Allocate<SDUse>(NumOps), + Ops, NumOps); + else + MN->InitOperands(MN->LocalOperands, Ops, NumOps); + MN->OperandsNeedDelete = false; + } else + MN->InitOperands(MN->OperandList, Ops, NumOps); + } else { + // If NumOps is larger than the # of operands we currently have, reallocate + // the operand list. + if (NumOps > N->NumOperands) { + if (N->OperandsNeedDelete) + delete[] N->OperandList; + N->InitOperands(new SDUse[NumOps], Ops, NumOps); N->OperandsNeedDelete = true; - } - } - - // Assign the new operands. - N->NumOperands = NumOps; - for (unsigned i = 0, e = NumOps; i != e; ++i) { - N->OperandList[i].setUser(N); - N->OperandList[i].setInitial(Ops[i]); + } else + MN->InitOperands(MN->OperandList, Ops, NumOps); } // Delete any nodes that are still dead after adding the uses for the @@ -4481,108 +4618,189 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, } -/// getTargetNode - These are used for target selectors to create a new node -/// with specified return type(s), target opcode, and operands. +/// getMachineNode - These are used for target selectors to create a new node +/// with specified return type(s), MachineInstr opcode, and operands. /// -/// Note that getTargetNode returns the resultant node. If there is already a +/// Note that getMachineNode returns the resultant node. If there is already a /// node of the specified opcode and operands, it returns that node instead of /// the current one. -SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT) { - return getNode(~Opcode, dl, VT).getNode(); +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT) { + SDVTList VTs = getVTList(VT); + return getMachineNode(Opcode, dl, VTs, 0, 0); } -SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT, - SDValue Op1) { - return getNode(~Opcode, dl, VT, Op1).getNode(); +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT, SDValue Op1) { + SDVTList VTs = getVTList(VT); + SDValue Ops[] = { Op1 }; + return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops)); } -SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT, - SDValue Op1, SDValue Op2) { - return getNode(~Opcode, dl, VT, Op1, Op2).getNode(); +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT, + SDValue Op1, SDValue Op2) { + SDVTList VTs = getVTList(VT); + SDValue Ops[] = { Op1, Op2 }; + return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops)); } -SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT, - SDValue Op1, SDValue Op2, - SDValue Op3) { - return getNode(~Opcode, dl, VT, Op1, Op2, Op3).getNode(); +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT, + SDValue Op1, SDValue Op2, SDValue Op3) { + SDVTList VTs = getVTList(VT); + SDValue Ops[] = { Op1, Op2, Op3 }; + return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops)); } -SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT, - const SDValue *Ops, unsigned NumOps) { - return getNode(~Opcode, dl, VT, Ops, NumOps).getNode(); +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT, + const SDValue *Ops, unsigned NumOps) { + SDVTList VTs = getVTList(VT); + return getMachineNode(Opcode, dl, VTs, Ops, NumOps); } -SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, - MVT VT1, MVT VT2) { +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2) { SDVTList VTs = getVTList(VT1, VT2); - SDValue Op; - return getNode(~Opcode, dl, VTs, &Op, 0).getNode(); + return getMachineNode(Opcode, dl, VTs, 0, 0); } -SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT1, - MVT VT2, SDValue Op1) { +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, + EVT VT1, EVT VT2, SDValue Op1) { SDVTList VTs = getVTList(VT1, VT2); - return getNode(~Opcode, dl, VTs, &Op1, 1).getNode(); + SDValue Ops[] = { Op1 }; + return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops)); } -SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT1, - MVT VT2, SDValue Op1, - SDValue Op2) { +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, + EVT VT1, EVT VT2, SDValue Op1, SDValue Op2) { SDVTList VTs = getVTList(VT1, VT2); SDValue Ops[] = { Op1, Op2 }; - return getNode(~Opcode, dl, VTs, Ops, 2).getNode(); + return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops)); } -SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT1, - MVT VT2, SDValue Op1, - SDValue Op2, SDValue Op3) { +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, + EVT VT1, EVT VT2, SDValue Op1, + SDValue Op2, SDValue Op3) { SDVTList VTs = getVTList(VT1, VT2); SDValue Ops[] = { Op1, Op2, Op3 }; - return getNode(~Opcode, dl, VTs, Ops, 3).getNode(); + return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops)); } -SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, - MVT VT1, MVT VT2, - const SDValue *Ops, unsigned NumOps) { +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, + EVT VT1, EVT VT2, + const SDValue *Ops, unsigned NumOps) { SDVTList VTs = getVTList(VT1, VT2); - return getNode(~Opcode, dl, VTs, Ops, NumOps).getNode(); + return getMachineNode(Opcode, dl, VTs, Ops, NumOps); } -SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, - MVT VT1, MVT VT2, MVT VT3, - SDValue Op1, SDValue Op2) { +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, + EVT VT1, EVT VT2, EVT VT3, + SDValue Op1, SDValue Op2) { SDVTList VTs = getVTList(VT1, VT2, VT3); SDValue Ops[] = { Op1, Op2 }; - return getNode(~Opcode, dl, VTs, Ops, 2).getNode(); + return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops)); } -SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, - MVT VT1, MVT VT2, MVT VT3, - SDValue Op1, SDValue Op2, - SDValue Op3) { +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, + EVT VT1, EVT VT2, EVT VT3, + SDValue Op1, SDValue Op2, SDValue Op3) { SDVTList VTs = getVTList(VT1, VT2, VT3); SDValue Ops[] = { Op1, Op2, Op3 }; - return getNode(~Opcode, dl, VTs, Ops, 3).getNode(); + return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops)); } -SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, - MVT VT1, MVT VT2, MVT VT3, - const SDValue *Ops, unsigned NumOps) { +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, + EVT VT1, EVT VT2, EVT VT3, + const SDValue *Ops, unsigned NumOps) { SDVTList VTs = getVTList(VT1, VT2, VT3); - return getNode(~Opcode, dl, VTs, Ops, NumOps).getNode(); + return getMachineNode(Opcode, dl, VTs, Ops, NumOps); } -SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT1, - MVT VT2, MVT VT3, MVT VT4, - const SDValue *Ops, unsigned NumOps) { +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, + EVT VT2, EVT VT3, EVT VT4, + const SDValue *Ops, unsigned NumOps) { SDVTList VTs = getVTList(VT1, VT2, VT3, VT4); - return getNode(~Opcode, dl, VTs, Ops, NumOps).getNode(); + return getMachineNode(Opcode, dl, VTs, Ops, NumOps); } -SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, - const std::vector<MVT> &ResultTys, - const SDValue *Ops, unsigned NumOps) { - return getNode(~Opcode, dl, ResultTys, Ops, NumOps).getNode(); +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, + const std::vector<EVT> &ResultTys, + const SDValue *Ops, unsigned NumOps) { + SDVTList VTs = getVTList(&ResultTys[0], ResultTys.size()); + return getMachineNode(Opcode, dl, VTs, Ops, NumOps); +} + +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs, + const SDValue *Ops, unsigned NumOps) { + bool DoCSE = VTs.VTs[VTs.NumVTs-1] != MVT::Flag; + MachineSDNode *N; + void *IP; + + if (DoCSE) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, ~Opcode, VTs, Ops, NumOps); + IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return cast<MachineSDNode>(E); + } + + // Allocate a new MachineSDNode. + N = NodeAllocator.Allocate<MachineSDNode>(); + new (N) MachineSDNode(~Opcode, DL, VTs); + + // Initialize the operands list. + if (NumOps > array_lengthof(N->LocalOperands)) + // We're creating a final node that will live unmorphed for the + // remainder of the current SelectionDAG iteration, so we can allocate + // the operands directly out of a pool with no recycling metadata. + N->InitOperands(OperandAllocator.Allocate<SDUse>(NumOps), + Ops, NumOps); + else + N->InitOperands(N->LocalOperands, Ops, NumOps); + N->OperandsNeedDelete = false; + + if (DoCSE) + CSEMap.InsertNode(N, IP); + + AllNodes.push_back(N); +#ifndef NDEBUG + VerifyNode(N); +#endif + return N; +} + +/// getTargetExtractSubreg - A convenience function for creating +/// TargetInstrInfo::EXTRACT_SUBREG nodes. +SDValue +SelectionDAG::getTargetExtractSubreg(int SRIdx, DebugLoc DL, EVT VT, + SDValue Operand) { + SDValue SRIdxVal = getTargetConstant(SRIdx, MVT::i32); + SDNode *Subreg = getMachineNode(TargetInstrInfo::EXTRACT_SUBREG, DL, + VT, Operand, SRIdxVal); + return SDValue(Subreg, 0); +} + +/// getTargetInsertSubreg - A convenience function for creating +/// TargetInstrInfo::INSERT_SUBREG nodes. +SDValue +SelectionDAG::getTargetInsertSubreg(int SRIdx, DebugLoc DL, EVT VT, + SDValue Operand, SDValue Subreg) { + SDValue SRIdxVal = getTargetConstant(SRIdx, MVT::i32); + SDNode *Result = getMachineNode(TargetInstrInfo::INSERT_SUBREG, DL, + VT, Operand, Subreg, SRIdxVal); + return SDValue(Result, 0); } /// getNodeIfExists - Get the specified node if it's already available, or @@ -4937,64 +5155,28 @@ HandleSDNode::~HandleSDNode() { } GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, const GlobalValue *GA, - MVT VT, int64_t o, unsigned char TF) + EVT VT, int64_t o, unsigned char TF) : SDNode(Opc, DebugLoc::getUnknownLoc(), getSDVTList(VT)), Offset(o), TargetFlags(TF) { TheGlobal = const_cast<GlobalValue*>(GA); } -MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, MVT memvt, - const Value *srcValue, int SVO, - unsigned alignment, bool vol) - : SDNode(Opc, dl, VTs), MemoryVT(memvt), SrcValue(srcValue), SVOffset(SVO) { - SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, vol, alignment); - assert(isPowerOf2_32(alignment) && "Alignment is not a power of 2!"); - assert(getAlignment() == alignment && "Alignment representation error!"); - assert(isVolatile() == vol && "Volatile representation error!"); +MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, EVT memvt, + MachineMemOperand *mmo) + : SDNode(Opc, dl, VTs), MemoryVT(memvt), MMO(mmo) { + SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile()); + assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!"); + assert(memvt.getStoreSize() == MMO->getSize() && "Size mismatch!"); } MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, - const SDValue *Ops, - unsigned NumOps, MVT memvt, const Value *srcValue, - int SVO, unsigned alignment, bool vol) + const SDValue *Ops, unsigned NumOps, EVT memvt, + MachineMemOperand *mmo) : SDNode(Opc, dl, VTs, Ops, NumOps), - MemoryVT(memvt), SrcValue(srcValue), SVOffset(SVO) { - SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, vol, alignment); - assert(isPowerOf2_32(alignment) && "Alignment is not a power of 2!"); - assert(getAlignment() == alignment && "Alignment representation error!"); - assert(isVolatile() == vol && "Volatile representation error!"); -} - -/// getMemOperand - Return a MachineMemOperand object describing the memory -/// reference performed by this memory reference. -MachineMemOperand MemSDNode::getMemOperand() const { - int Flags = 0; - if (isa<LoadSDNode>(this)) - Flags = MachineMemOperand::MOLoad; - else if (isa<StoreSDNode>(this)) - Flags = MachineMemOperand::MOStore; - else if (isa<AtomicSDNode>(this)) { - Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; - } - else { - const MemIntrinsicSDNode* MemIntrinNode = dyn_cast<MemIntrinsicSDNode>(this); - assert(MemIntrinNode && "Unknown MemSDNode opcode!"); - if (MemIntrinNode->readMem()) Flags |= MachineMemOperand::MOLoad; - if (MemIntrinNode->writeMem()) Flags |= MachineMemOperand::MOStore; - } - - int Size = (getMemoryVT().getSizeInBits() + 7) >> 3; - if (isVolatile()) Flags |= MachineMemOperand::MOVolatile; - - // Check if the memory reference references a frame index - const FrameIndexSDNode *FI = - dyn_cast<const FrameIndexSDNode>(getBasePtr().getNode()); - if (!getSrcValue() && FI) - return MachineMemOperand(PseudoSourceValue::getFixedStack(FI->getIndex()), - Flags, 0, Size, getAlignment()); - else - return MachineMemOperand(getSrcValue(), Flags, getSrcValueOffset(), - Size, getAlignment()); + MemoryVT(memvt), MMO(mmo) { + SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile()); + assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!"); + assert(memvt.getStoreSize() == MMO->getSize() && "Size mismatch!"); } /// Profile - Gather unique data for the node. @@ -5003,19 +5185,30 @@ void SDNode::Profile(FoldingSetNodeID &ID) const { AddNodeIDNode(ID, this); } -static ManagedStatic<std::set<MVT, MVT::compareRawBits> > EVTs; -static MVT VTs[MVT::LAST_VALUETYPE]; +namespace { + struct EVTArray { + std::vector<EVT> VTs; + + EVTArray() { + VTs.reserve(MVT::LAST_VALUETYPE); + for (unsigned i = 0; i < MVT::LAST_VALUETYPE; ++i) + VTs.push_back(MVT((MVT::SimpleValueType)i)); + } + }; +} + +static ManagedStatic<std::set<EVT, EVT::compareRawBits> > EVTs; +static ManagedStatic<EVTArray> SimpleVTArray; static ManagedStatic<sys::SmartMutex<true> > VTMutex; /// getValueTypeList - Return a pointer to the specified value type. /// -const MVT *SDNode::getValueTypeList(MVT VT) { - sys::SmartScopedLock<true> Lock(&*VTMutex); +const EVT *SDNode::getValueTypeList(EVT VT) { if (VT.isExtended()) { + sys::SmartScopedLock<true> Lock(*VTMutex); return &(*EVTs->insert(VT).first); } else { - VTs[VT.getSimpleVT()] = VT; - return &VTs[VT.getSimpleVT()]; + return &SimpleVTArray->VTs[VT.getSimpleVT().SimpleTy]; } } @@ -5186,14 +5379,12 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::PCMARKER: return "PCMarker"; case ISD::READCYCLECOUNTER: return "ReadCycleCounter"; case ISD::SRCVALUE: return "SrcValue"; - case ISD::MEMOPERAND: return "MemOperand"; case ISD::EntryToken: return "EntryToken"; case ISD::TokenFactor: return "TokenFactor"; case ISD::AssertSext: return "AssertSext"; case ISD::AssertZext: return "AssertZext"; case ISD::BasicBlock: return "BasicBlock"; - case ISD::ARG_FLAGS: return "ArgFlags"; case ISD::VALUETYPE: return "ValueType"; case ISD::Register: return "Register"; @@ -5208,6 +5399,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FRAMEADDR: return "FRAMEADDR"; case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET"; case ISD::EXCEPTIONADDR: return "EXCEPTIONADDR"; + case ISD::LSDAADDR: return "LSDAADDR"; case ISD::EHSELECTION: return "EHSELECTION"; case ISD::EH_RETURN: return "EH_RETURN"; case ISD::ConstantPool: return "ConstantPool"; @@ -5239,10 +5431,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::INLINEASM: return "inlineasm"; case ISD::DBG_LABEL: return "dbg_label"; case ISD::EH_LABEL: return "eh_label"; - case ISD::DECLARE: return "declare"; case ISD::HANDLENODE: return "handlenode"; - case ISD::FORMAL_ARGUMENTS: return "formal_arguments"; - case ISD::CALL: return "call"; // Unary operators case ISD::FABS: return "fabs"; @@ -5332,7 +5521,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::CONVERT_RNDSAT: { switch (cast<CvtRndSatSDNode>(this)->getCvtCode()) { - default: assert(0 && "Unknown cvt code!"); + default: llvm_unreachable("Unknown cvt code!"); case ISD::CVT_FF: return "cvt_ff"; case ISD::CVT_FS: return "cvt_fs"; case ISD::CVT_FU: return "cvt_fu"; @@ -5351,7 +5540,6 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::BR_JT: return "br_jt"; case ISD::BRCOND: return "brcond"; case ISD::BR_CC: return "br_cc"; - case ISD::RET: return "ret"; case ISD::CALLSEQ_START: return "callseq_start"; case ISD::CALLSEQ_END: return "callseq_end"; @@ -5384,7 +5572,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::CONDCODE: switch (cast<CondCodeSDNode>(this)->get()) { - default: assert(0 && "Unknown setcc condition!"); + default: llvm_unreachable("Unknown setcc condition!"); case ISD::SETOEQ: return "setoeq"; case ISD::SETOGT: return "setogt"; case ISD::SETOGE: return "setoge"; @@ -5463,14 +5651,26 @@ void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const { if (getValueType(i) == MVT::Other) OS << "ch"; else - OS << getValueType(i).getMVTString(); + OS << getValueType(i).getEVTString(); } OS << " = " << getOperationName(G); } void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { - if (!isTargetOpcode() && getOpcode() == ISD::VECTOR_SHUFFLE) { - const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(this); + if (const MachineSDNode *MN = dyn_cast<MachineSDNode>(this)) { + if (!MN->memoperands_empty()) { + OS << "<"; + OS << "Mem:"; + for (MachineSDNode::mmo_iterator i = MN->memoperands_begin(), + e = MN->memoperands_end(); i != e; ++i) { + OS << **i; + if (next(i) != e) + OS << " "; + } + OS << ">"; + } + } else if (const ShuffleVectorSDNode *SVN = + dyn_cast<ShuffleVectorSDNode>(this)) { OS << "<"; for (unsigned i = 0, e = ValueList[0].getVectorNumElements(); i != e; ++i) { int Idx = SVN->getMaskElt(i); @@ -5481,9 +5681,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { OS << Idx; } OS << ">"; - } - - if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(this)) { + } else if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(this)) { OS << '<' << CSDN->getAPIntValue() << '>'; } else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(this)) { if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEsingle) @@ -5505,13 +5703,13 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { OS << " + " << offset; else OS << " " << offset; - if (unsigned char TF = GADN->getTargetFlags()) + if (unsigned int TF = GADN->getTargetFlags()) OS << " [TF=" << TF << ']'; } else if (const FrameIndexSDNode *FIDN = dyn_cast<FrameIndexSDNode>(this)) { OS << "<" << FIDN->getIndex() << ">"; } else if (const JumpTableSDNode *JTDN = dyn_cast<JumpTableSDNode>(this)) { OS << "<" << JTDN->getIndex() << ">"; - if (unsigned char TF = JTDN->getTargetFlags()) + if (unsigned int TF = JTDN->getTargetFlags()) OS << " [TF=" << TF << ']'; } else if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(this)){ int offset = CP->getOffset(); @@ -5523,7 +5721,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { OS << " + " << offset; else OS << " " << offset; - if (unsigned char TF = CP->getTargetFlags()) + if (unsigned int TF = CP->getTargetFlags()) OS << " [TF=" << TF << ']'; } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(this)) { OS << "<"; @@ -5541,80 +5739,47 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { } else if (const ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(this)) { OS << "'" << ES->getSymbol() << "'"; - if (unsigned char TF = ES->getTargetFlags()) + if (unsigned int TF = ES->getTargetFlags()) OS << " [TF=" << TF << ']'; } else if (const SrcValueSDNode *M = dyn_cast<SrcValueSDNode>(this)) { if (M->getValue()) OS << "<" << M->getValue() << ">"; else OS << "<null>"; - } else if (const MemOperandSDNode *M = dyn_cast<MemOperandSDNode>(this)) { - if (M->MO.getValue()) - OS << "<" << M->MO.getValue() << ":" << M->MO.getOffset() << ">"; - else - OS << "<null:" << M->MO.getOffset() << ">"; - } else if (const ARG_FLAGSSDNode *N = dyn_cast<ARG_FLAGSSDNode>(this)) { - OS << N->getArgFlags().getArgFlagsString(); } else if (const VTSDNode *N = dyn_cast<VTSDNode>(this)) { - OS << ":" << N->getVT().getMVTString(); + OS << ":" << N->getVT().getEVTString(); } else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(this)) { - const Value *SrcValue = LD->getSrcValue(); - int SrcOffset = LD->getSrcValueOffset(); - OS << " <"; - if (SrcValue) - OS << SrcValue; - else - OS << "null"; - OS << ":" << SrcOffset << ">"; + OS << " <" << *LD->getMemOperand(); bool doExt = true; switch (LD->getExtensionType()) { default: doExt = false; break; - case ISD::EXTLOAD: OS << " <anyext "; break; - case ISD::SEXTLOAD: OS << " <sext "; break; - case ISD::ZEXTLOAD: OS << " <zext "; break; + case ISD::EXTLOAD: OS << ", anyext"; break; + case ISD::SEXTLOAD: OS << ", sext"; break; + case ISD::ZEXTLOAD: OS << ", zext"; break; } if (doExt) - OS << LD->getMemoryVT().getMVTString() << ">"; + OS << " from " << LD->getMemoryVT().getEVTString(); const char *AM = getIndexedModeName(LD->getAddressingMode()); if (*AM) - OS << " " << AM; - if (LD->isVolatile()) - OS << " <volatile>"; - OS << " alignment=" << LD->getAlignment(); + OS << ", " << AM; + + OS << ">"; } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(this)) { - const Value *SrcValue = ST->getSrcValue(); - int SrcOffset = ST->getSrcValueOffset(); - OS << " <"; - if (SrcValue) - OS << SrcValue; - else - OS << "null"; - OS << ":" << SrcOffset << ">"; + OS << " <" << *ST->getMemOperand(); if (ST->isTruncatingStore()) - OS << " <trunc " << ST->getMemoryVT().getMVTString() << ">"; + OS << ", trunc to " << ST->getMemoryVT().getEVTString(); const char *AM = getIndexedModeName(ST->getAddressingMode()); if (*AM) - OS << " " << AM; - if (ST->isVolatile()) - OS << " <volatile>"; - OS << " alignment=" << ST->getAlignment(); - } else if (const AtomicSDNode* AT = dyn_cast<AtomicSDNode>(this)) { - const Value *SrcValue = AT->getSrcValue(); - int SrcOffset = AT->getSrcValueOffset(); - OS << " <"; - if (SrcValue) - OS << SrcValue; - else - OS << "null"; - OS << ":" << SrcOffset << ">"; - if (AT->isVolatile()) - OS << " <volatile>"; - OS << " alignment=" << AT->getAlignment(); + OS << ", " << AM; + + OS << ">"; + } else if (const MemSDNode* M = dyn_cast<MemSDNode>(this)) { + OS << " <" << *M->getMemOperand() << ">"; } } @@ -5635,16 +5800,17 @@ static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) { if (N->getOperand(i).getNode()->hasOneUse()) DumpNodes(N->getOperand(i).getNode(), indent+2, G); else - cerr << "\n" << std::string(indent+2, ' ') - << (void*)N->getOperand(i).getNode() << ": <multiple use>"; + errs() << "\n" << std::string(indent+2, ' ') + << (void*)N->getOperand(i).getNode() << ": <multiple use>"; - cerr << "\n" << std::string(indent, ' '); + errs() << "\n"; + errs().indent(indent); N->dump(G); } void SelectionDAG::dump() const { - cerr << "SelectionDAG has " << AllNodes.size() << " nodes:"; + errs() << "SelectionDAG has " << AllNodes.size() << " nodes:"; for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end(); I != E; ++I) { @@ -5655,7 +5821,7 @@ void SelectionDAG::dump() const { if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this); - cerr << "\n\n"; + errs() << "\n\n"; } void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const { @@ -5699,6 +5865,11 @@ void SDNode::dumpr() const { DumpNodesr(errs(), this, 0, 0, once); } +void SDNode::dumpr(const SelectionDAG *G) const { + VisitedSDNodeSet once; + DumpNodesr(errs(), this, 0, G, once); +} + // getAddressSpace - Return the address space this GlobalAddress belongs to. unsigned GlobalAddressSDNode::getAddressSpace() const { @@ -5717,7 +5888,7 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits) { - MVT VT = getValueType(0); + EVT VT = getValueType(0); assert(VT.isVector() && "Expected a vector type"); unsigned sz = VT.getSizeInBits(); if (MinSplatBits > sz) @@ -5767,7 +5938,7 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, SplatValue = HighValue | LowValue; SplatUndef = HighUndef & LowUndef; - + sz = HalfSize; } @@ -5775,14 +5946,14 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, return true; } -bool ShuffleVectorSDNode::isSplatMask(const int *Mask, MVT VT) { +bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) { // Find the first non-undef value in the shuffle mask. unsigned i, e; for (i = 0, e = VT.getVectorNumElements(); i != e && Mask[i] < 0; ++i) /* search */; assert(i != e && "VECTOR_SHUFFLE node with all undef indices!"); - + // Make sure all remaining elements are either undef or the same as the first // non-undef value. for (int Idx = Mask[i]; i != e; ++i) diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp index 260911e..9017e43 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Constants.h" +#include "llvm/Constants.h" #include "llvm/CallingConv.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" @@ -49,6 +50,7 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> @@ -104,14 +106,14 @@ static unsigned ComputeLinearIndex(const TargetLowering &TLI, const Type *Ty, } /// ComputeValueVTs - Given an LLVM IR type, compute a sequence of -/// MVTs that represent all the individual underlying +/// EVTs that represent all the individual underlying /// non-aggregate types that comprise it. /// /// If Offsets is non-null, it points to a vector to be filled in /// with the in-memory offsets of each of the individual values. /// static void ComputeValueVTs(const TargetLowering &TLI, const Type *Ty, - SmallVectorImpl<MVT> &ValueVTs, + SmallVectorImpl<EVT> &ValueVTs, SmallVectorImpl<uint64_t> *Offsets = 0, uint64_t StartingOffset = 0) { // Given a struct type, recursively traverse the elements. @@ -135,9 +137,9 @@ static void ComputeValueVTs(const TargetLowering &TLI, const Type *Ty, return; } // Interpret void as zero return values. - if (Ty == Type::VoidTy) + if (Ty == Type::getVoidTy(Ty->getContext())) return; - // Base case: we can get an MVT for this LLVM IR type. + // Base case: we can get an EVT for this LLVM IR type. ValueVTs.push_back(TLI.getValueType(Ty)); if (Offsets) Offsets->push_back(StartingOffset); @@ -161,7 +163,7 @@ namespace llvm { /// ValueVTs - The value types of the values, which may not be legal, and /// may need be promoted or synthesized from one or more registers. /// - SmallVector<MVT, 4> ValueVTs; + SmallVector<EVT, 4> ValueVTs; /// RegVTs - The value types of the registers. This is the same size as /// ValueVTs and it records, for each value, what the type of the assigned @@ -172,7 +174,7 @@ namespace llvm { /// getRegisterType member function, however when with physical registers /// it is necessary to have a separate record of the types. /// - SmallVector<MVT, 4> RegVTs; + SmallVector<EVT, 4> RegVTs; /// Regs - This list holds the registers assigned to the values. /// Each legal or promoted value requires one register, and each @@ -184,21 +186,21 @@ namespace llvm { RegsForValue(const TargetLowering &tli, const SmallVector<unsigned, 4> ®s, - MVT regvt, MVT valuevt) + EVT regvt, EVT valuevt) : TLI(&tli), ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {} RegsForValue(const TargetLowering &tli, const SmallVector<unsigned, 4> ®s, - const SmallVector<MVT, 4> ®vts, - const SmallVector<MVT, 4> &valuevts) + const SmallVector<EVT, 4> ®vts, + const SmallVector<EVT, 4> &valuevts) : TLI(&tli), ValueVTs(valuevts), RegVTs(regvts), Regs(regs) {} - RegsForValue(const TargetLowering &tli, + RegsForValue(LLVMContext &Context, const TargetLowering &tli, unsigned Reg, const Type *Ty) : TLI(&tli) { ComputeValueVTs(tli, Ty, ValueVTs); for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { - MVT ValueVT = ValueVTs[Value]; - unsigned NumRegs = TLI->getNumRegisters(ValueVT); - MVT RegisterVT = TLI->getRegisterType(ValueVT); + EVT ValueVT = ValueVTs[Value]; + unsigned NumRegs = TLI->getNumRegisters(Context, ValueVT); + EVT RegisterVT = TLI->getRegisterType(Context, ValueVT); for (unsigned i = 0; i != NumRegs; ++i) Regs.push_back(Reg + i); RegVTs.push_back(RegisterVT); @@ -352,11 +354,11 @@ void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf, unsigned PHIReg = ValueMap[PN]; assert(PHIReg && "PHI node does not have an assigned virtual register!"); - SmallVector<MVT, 4> ValueVTs; + SmallVector<EVT, 4> ValueVTs; ComputeValueVTs(TLI, PN->getType(), ValueVTs); for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) { - MVT VT = ValueVTs[vti]; - unsigned NumRegisters = TLI.getNumRegisters(VT); + EVT VT = ValueVTs[vti]; + unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT); const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); for (unsigned i = 0; i != NumRegisters; ++i) BuildMI(MBB, DL, TII->get(TargetInstrInfo::PHI), PHIReg + i); @@ -366,7 +368,7 @@ void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf, } } -unsigned FunctionLoweringInfo::MakeReg(MVT VT) { +unsigned FunctionLoweringInfo::MakeReg(EVT VT) { return RegInfo->createVirtualRegister(TLI.getRegClassFor(VT)); } @@ -378,15 +380,15 @@ unsigned FunctionLoweringInfo::MakeReg(MVT VT) { /// will assign registers for each member or element. /// unsigned FunctionLoweringInfo::CreateRegForValue(const Value *V) { - SmallVector<MVT, 4> ValueVTs; + SmallVector<EVT, 4> ValueVTs; ComputeValueVTs(TLI, V->getType(), ValueVTs); unsigned FirstReg = 0; for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { - MVT ValueVT = ValueVTs[Value]; - MVT RegisterVT = TLI.getRegisterType(ValueVT); + EVT ValueVT = ValueVTs[Value]; + EVT RegisterVT = TLI.getRegisterType(V->getContext(), ValueVT); - unsigned NumRegs = TLI.getNumRegisters(ValueVT); + unsigned NumRegs = TLI.getNumRegisters(V->getContext(), ValueVT); for (unsigned i = 0; i != NumRegs; ++i) { unsigned R = MakeReg(RegisterVT); if (!FirstReg) FirstReg = R; @@ -402,7 +404,7 @@ unsigned FunctionLoweringInfo::CreateRegForValue(const Value *V) { /// (ISD::AssertSext). static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, const SDValue *Parts, - unsigned NumParts, MVT PartVT, MVT ValueVT, + unsigned NumParts, EVT PartVT, EVT ValueVT, ISD::NodeType AssertOp = ISD::DELETED_NODE) { assert(NumParts > 0 && "No parts to assemble!"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -418,11 +420,11 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, unsigned RoundParts = NumParts & (NumParts - 1) ? 1 << Log2_32(NumParts) : NumParts; unsigned RoundBits = PartBits * RoundParts; - MVT RoundVT = RoundBits == ValueBits ? - ValueVT : MVT::getIntegerVT(RoundBits); + EVT RoundVT = RoundBits == ValueBits ? + ValueVT : EVT::getIntegerVT(*DAG.getContext(), RoundBits); SDValue Lo, Hi; - MVT HalfVT = MVT::getIntegerVT(RoundBits/2); + EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), RoundBits/2); if (RoundParts > 2) { Lo = getCopyFromParts(DAG, dl, Parts, RoundParts/2, PartVT, HalfVT); @@ -439,7 +441,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, if (RoundParts < NumParts) { // Assemble the trailing non-power-of-2 part. unsigned OddParts = NumParts - RoundParts; - MVT OddVT = MVT::getIntegerVT(OddParts * PartBits); + EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits); Hi = getCopyFromParts(DAG, dl, Parts+RoundParts, OddParts, PartVT, OddVT); @@ -447,7 +449,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, Lo = Val; if (TLI.isBigEndian()) std::swap(Lo, Hi); - MVT TotalVT = MVT::getIntegerVT(NumParts * PartBits); + EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); Hi = DAG.getNode(ISD::ANY_EXTEND, dl, TotalVT, Hi); Hi = DAG.getNode(ISD::SHL, dl, TotalVT, Hi, DAG.getConstant(Lo.getValueType().getSizeInBits(), @@ -457,11 +459,11 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, } } else if (ValueVT.isVector()) { // Handle a multi-element vector. - MVT IntermediateVT, RegisterVT; + EVT IntermediateVT, RegisterVT; unsigned NumIntermediates; unsigned NumRegs = - TLI.getVectorTypeBreakdown(ValueVT, IntermediateVT, NumIntermediates, - RegisterVT); + TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT, + NumIntermediates, RegisterVT); assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!"); NumParts = NumRegs; // Silence a compiler warning. assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); @@ -494,11 +496,11 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, ValueVT, &Ops[0], NumIntermediates); } else if (PartVT.isFloatingPoint()) { // FP split into multiple FP parts (for ppcf128) - assert(ValueVT == MVT(MVT::ppcf128) && PartVT == MVT(MVT::f64) && + assert(ValueVT == EVT(MVT::ppcf128) && PartVT == EVT(MVT::f64) && "Unexpected split"); SDValue Lo, Hi; - Lo = DAG.getNode(ISD::BIT_CONVERT, dl, MVT(MVT::f64), Parts[0]); - Hi = DAG.getNode(ISD::BIT_CONVERT, dl, MVT(MVT::f64), Parts[1]); + Lo = DAG.getNode(ISD::BIT_CONVERT, dl, EVT(MVT::f64), Parts[0]); + Hi = DAG.getNode(ISD::BIT_CONVERT, dl, EVT(MVT::f64), Parts[1]); if (TLI.isBigEndian()) std::swap(Lo, Hi); Val = DAG.getNode(ISD::BUILD_PAIR, dl, ValueVT, Lo, Hi); @@ -506,7 +508,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, // FP split into integer parts (soft fp) assert(ValueVT.isFloatingPoint() && PartVT.isInteger() && !PartVT.isVector() && "Unexpected split"); - MVT IntVT = MVT::getIntegerVT(ValueVT.getSizeInBits()); + EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()); Val = getCopyFromParts(DAG, dl, Parts, NumParts, PartVT, IntVT); } } @@ -555,7 +557,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) return DAG.getNode(ISD::BIT_CONVERT, dl, ValueVT, Val); - assert(0 && "Unknown mismatch!"); + llvm_unreachable("Unknown mismatch!"); return SDValue(); } @@ -563,11 +565,11 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, /// split into legal parts. If the parts contain more bits than Val, then, for /// integers, ExtendKind can be used to specify how to generate the extra bits. static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, SDValue Val, - SDValue *Parts, unsigned NumParts, MVT PartVT, + SDValue *Parts, unsigned NumParts, EVT PartVT, ISD::NodeType ExtendKind = ISD::ANY_EXTEND) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - MVT PtrVT = TLI.getPointerTy(); - MVT ValueVT = Val.getValueType(); + EVT PtrVT = TLI.getPointerTy(); + EVT ValueVT = Val.getValueType(); unsigned PartBits = PartVT.getSizeInBits(); unsigned OrigNumParts = NumParts; assert(TLI.isTypeLegal(PartVT) && "Copying to an illegal type!"); @@ -588,10 +590,10 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, SDValue Val, assert(NumParts == 1 && "Do not know what to promote to!"); Val = DAG.getNode(ISD::FP_EXTEND, dl, PartVT, Val); } else if (PartVT.isInteger() && ValueVT.isInteger()) { - ValueVT = MVT::getIntegerVT(NumParts * PartBits); + ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); Val = DAG.getNode(ExtendKind, dl, ValueVT, Val); } else { - assert(0 && "Unknown mismatch!"); + llvm_unreachable("Unknown mismatch!"); } } else if (PartBits == ValueVT.getSizeInBits()) { // Different types of the same size. @@ -600,10 +602,10 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, SDValue Val, } else if (NumParts * PartBits < ValueVT.getSizeInBits()) { // If the parts cover less bits than value has, truncate the value. if (PartVT.isInteger() && ValueVT.isInteger()) { - ValueVT = MVT::getIntegerVT(NumParts * PartBits); + ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); Val = DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val); } else { - assert(0 && "Unknown mismatch!"); + llvm_unreachable("Unknown mismatch!"); } } @@ -634,19 +636,19 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, SDValue Val, // The odd parts were reversed by getCopyToParts - unreverse them. std::reverse(Parts + RoundParts, Parts + NumParts); NumParts = RoundParts; - ValueVT = MVT::getIntegerVT(NumParts * PartBits); + ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); Val = DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val); } // The number of parts is a power of 2. Repeatedly bisect the value using // EXTRACT_ELEMENT. Parts[0] = DAG.getNode(ISD::BIT_CONVERT, dl, - MVT::getIntegerVT(ValueVT.getSizeInBits()), + EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()), Val); for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) { for (unsigned i = 0; i < NumParts; i += StepSize) { unsigned ThisBits = StepSize * PartBits / 2; - MVT ThisVT = MVT::getIntegerVT (ThisBits); + EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits); SDValue &Part0 = Parts[i]; SDValue &Part1 = Parts[i+StepSize/2]; @@ -692,11 +694,10 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, SDValue Val, } // Handle a multi-element vector. - MVT IntermediateVT, RegisterVT; + EVT IntermediateVT, RegisterVT; unsigned NumIntermediates; - unsigned NumRegs = TLI - .getVectorTypeBreakdown(ValueVT, IntermediateVT, NumIntermediates, - RegisterVT); + unsigned NumRegs = TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, + IntermediateVT, NumIntermediates, RegisterVT); unsigned NumElements = ValueVT.getVectorNumElements(); assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!"); @@ -750,8 +751,10 @@ void SelectionDAGLowering::clear() { NodeMap.clear(); PendingLoads.clear(); PendingExports.clear(); + EdgeMapping.clear(); DAG.clear(); CurDebugLoc = DebugLoc::getUnknownLoc(); + HasTailCall = false; } /// getRoot - Return the current virtual root of the Selection DAG, @@ -817,8 +820,7 @@ void SelectionDAGLowering::visit(unsigned Opcode, User &I) { // Note: this doesn't use InstVisitor, because it has to work with // ConstantExpr's in addition to instructions. switch (Opcode) { - default: assert(0 && "Unknown instruction type encountered!"); - abort(); + default: llvm_unreachable("Unknown instruction type encountered!"); // Build the switch statement using the Instruction.def file. #define HANDLE_INST(NUM, OPCODE, CLASS) \ case Instruction::OPCODE:return visit##OPCODE((CLASS&)I); @@ -831,7 +833,7 @@ SDValue SelectionDAGLowering::getValue(const Value *V) { if (N.getNode()) return N; if (Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V))) { - MVT VT = TLI.getValueType(V->getType(), true); + EVT VT = TLI.getValueType(V->getType(), true); if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) return N = DAG.getConstant(*CI, VT); @@ -860,6 +862,10 @@ SDValue SelectionDAGLowering::getValue(const Value *V) { for (User::const_op_iterator OI = C->op_begin(), OE = C->op_end(); OI != OE; ++OI) { SDNode *Val = getValue(*OI).getNode(); + // If the operand is an empty aggregate, there are no values. + if (!Val) continue; + // Add each leaf value from the operand to the Constants list + // to form a flattened list of all the values. for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i) Constants.push_back(SDValue(Val, i)); } @@ -871,14 +877,14 @@ SDValue SelectionDAGLowering::getValue(const Value *V) { assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) && "Unknown struct or array constant!"); - SmallVector<MVT, 4> ValueVTs; + SmallVector<EVT, 4> ValueVTs; ComputeValueVTs(TLI, C->getType(), ValueVTs); unsigned NumElts = ValueVTs.size(); if (NumElts == 0) return SDValue(); // empty struct SmallVector<SDValue, 4> Constants(NumElts); for (unsigned i = 0; i != NumElts; ++i) { - MVT EltVT = ValueVTs[i]; + EVT EltVT = ValueVTs[i]; if (isa<UndefValue>(C)) Constants[i] = DAG.getUNDEF(EltVT); else if (EltVT.isFloatingPoint()) @@ -900,7 +906,7 @@ SDValue SelectionDAGLowering::getValue(const Value *V) { Ops.push_back(getValue(CP->getOperand(i))); } else { assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!"); - MVT EltVT = TLI.getValueType(VecTy->getElementType()); + EVT EltVT = TLI.getValueType(VecTy->getElementType()); SDValue Op; if (EltVT.isFloatingPoint()) @@ -927,30 +933,24 @@ SDValue SelectionDAGLowering::getValue(const Value *V) { unsigned InReg = FuncInfo.ValueMap[V]; assert(InReg && "Value not in map!"); - RegsForValue RFV(TLI, InReg, V->getType()); + RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType()); SDValue Chain = DAG.getEntryNode(); return RFV.getCopyFromRegs(DAG, getCurDebugLoc(), Chain, NULL); } void SelectionDAGLowering::visitRet(ReturnInst &I) { - if (I.getNumOperands() == 0) { - DAG.setRoot(DAG.getNode(ISD::RET, getCurDebugLoc(), - MVT::Other, getControlRoot())); - return; - } - - SmallVector<SDValue, 8> NewValues; - NewValues.push_back(getControlRoot()); + SDValue Chain = getControlRoot(); + SmallVector<ISD::OutputArg, 8> Outs; for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) { - SmallVector<MVT, 4> ValueVTs; + SmallVector<EVT, 4> ValueVTs; ComputeValueVTs(TLI, I.getOperand(i)->getType(), ValueVTs); unsigned NumValues = ValueVTs.size(); if (NumValues == 0) continue; SDValue RetOp = getValue(I.getOperand(i)); for (unsigned j = 0, f = NumValues; j != f; ++j) { - MVT VT = ValueVTs[j]; + EVT VT = ValueVTs[j]; ISD::NodeType ExtendKind = ISD::ANY_EXTEND; @@ -965,13 +965,13 @@ void SelectionDAGLowering::visitRet(ReturnInst &I) { // conventions. The frontend should mark functions whose return values // require promoting with signext or zeroext attributes. if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) { - MVT MinVT = TLI.getRegisterType(MVT::i32); + EVT MinVT = TLI.getRegisterType(*DAG.getContext(), MVT::i32); if (VT.bitsLT(MinVT)) VT = MinVT; } - unsigned NumParts = TLI.getNumRegisters(VT); - MVT PartVT = TLI.getRegisterType(VT); + unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), VT); + EVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT); SmallVector<SDValue, 4> Parts(NumParts); getCopyToParts(DAG, getCurDebugLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + j), @@ -981,14 +981,30 @@ void SelectionDAGLowering::visitRet(ReturnInst &I) { ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); if (F->paramHasAttr(0, Attribute::InReg)) Flags.setInReg(); - for (unsigned i = 0; i < NumParts; ++i) { - NewValues.push_back(Parts[i]); - NewValues.push_back(DAG.getArgFlags(Flags)); - } + + // Propagate extension type if any + if (F->paramHasAttr(0, Attribute::SExt)) + Flags.setSExt(); + else if (F->paramHasAttr(0, Attribute::ZExt)) + Flags.setZExt(); + + for (unsigned i = 0; i < NumParts; ++i) + Outs.push_back(ISD::OutputArg(Flags, Parts[i], /*isfixed=*/true)); } } - DAG.setRoot(DAG.getNode(ISD::RET, getCurDebugLoc(), MVT::Other, - &NewValues[0], NewValues.size())); + + bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); + CallingConv::ID CallConv = + DAG.getMachineFunction().getFunction()->getCallingConv(); + Chain = TLI.LowerReturn(Chain, CallConv, isVarArg, + Outs, getCurDebugLoc(), DAG); + + // Verify that the target's LowerReturn behaved as expected. + assert(Chain.getNode() && Chain.getValueType() == MVT::Other && + "LowerReturn didn't return a valid chain!"); + + // Update the DAG with the new chain value resulting from return lowering. + DAG.setRoot(Chain); } /// CopyToExportRegsIfNeeded - If the given value has virtual registers @@ -1073,7 +1089,7 @@ static ISD::CondCode getFCmpCondCode(FCmpInst::Predicate Pred) { case FCmpInst::FCMP_UNE: FOC = ISD::SETNE; FPC = ISD::SETUNE; break; case FCmpInst::FCMP_TRUE: FOC = FPC = ISD::SETTRUE; break; default: - assert(0 && "Invalid FCmp predicate opcode!"); + llvm_unreachable("Invalid FCmp predicate opcode!"); FOC = FPC = ISD::SETFALSE; break; } @@ -1099,7 +1115,7 @@ static ISD::CondCode getICmpCondCode(ICmpInst::Predicate Pred) { case ICmpInst::ICMP_SGT: return ISD::SETGT; case ICmpInst::ICMP_UGT: return ISD::SETUGT; default: - assert(0 && "Invalid ICmp predicate opcode!"); + llvm_unreachable("Invalid ICmp predicate opcode!"); return ISD::SETNE; } } @@ -1131,7 +1147,7 @@ SelectionDAGLowering::EmitBranchForMergedCondition(Value *Cond, Condition = getFCmpCondCode(FC->getPredicate()); } else { Condition = ISD::SETEQ; // silence warning. - assert(0 && "Unknown compare instruction"); + llvm_unreachable("Unknown compare instruction"); } CaseBlock CB(Condition, BOp->getOperand(0), @@ -1142,7 +1158,7 @@ SelectionDAGLowering::EmitBranchForMergedCondition(Value *Cond, } // Create a CaseBlock record representing this branch. - CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(), + CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(*DAG.getContext()), NULL, TBB, FBB, CurBB); SwitchCases.push_back(CB); } @@ -1229,7 +1245,7 @@ void SelectionDAGLowering::visitBr(BranchInst &I) { // Figure out which block is immediately after the current one. MachineBasicBlock *NextBlock = 0; MachineFunction::iterator BBI = CurMBB; - if (++BBI != CurMBB->getParent()->end()) + if (++BBI != FuncInfo.MF->end()) NextBlock = BBI; if (I.isUnconditional()) { @@ -1290,14 +1306,14 @@ void SelectionDAGLowering::visitBr(BranchInst &I) { // Okay, we decided not to do this, remove any inserted MBB's and clear // SwitchCases. for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) - CurMBB->getParent()->erase(SwitchCases[i].ThisBB); + FuncInfo.MF->erase(SwitchCases[i].ThisBB); SwitchCases.clear(); } } // Create a CaseBlock record representing this branch. - CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(), + CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()), NULL, Succ0MBB, Succ1MBB, CurMBB); // Use visitSwitchCase to actually insert the fast branch sequence for this // cond branch. @@ -1315,9 +1331,11 @@ void SelectionDAGLowering::visitSwitchCase(CaseBlock &CB) { if (CB.CmpMHS == NULL) { // Fold "(X == true)" to X and "(X == false)" to !X to // handle common cases produced by branch lowering. - if (CB.CmpRHS == ConstantInt::getTrue() && CB.CC == ISD::SETEQ) + if (CB.CmpRHS == ConstantInt::getTrue(*DAG.getContext()) && + CB.CC == ISD::SETEQ) Cond = CondLHS; - else if (CB.CmpRHS == ConstantInt::getFalse() && CB.CC == ISD::SETEQ) { + else if (CB.CmpRHS == ConstantInt::getFalse(*DAG.getContext()) && + CB.CC == ISD::SETEQ) { SDValue True = DAG.getConstant(1, CondLHS.getValueType()); Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True); } else @@ -1329,7 +1347,7 @@ void SelectionDAGLowering::visitSwitchCase(CaseBlock &CB) { const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue(); SDValue CmpOp = getValue(CB.CmpMHS); - MVT VT = CmpOp.getValueType(); + EVT VT = CmpOp.getValueType(); if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) { Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, VT), @@ -1350,7 +1368,7 @@ void SelectionDAGLowering::visitSwitchCase(CaseBlock &CB) { // This is used to avoid emitting unnecessary branches to the next block. MachineBasicBlock *NextBlock = 0; MachineFunction::iterator BBI = CurMBB; - if (++BBI != CurMBB->getParent()->end()) + if (++BBI != FuncInfo.MF->end()) NextBlock = BBI; // If the lhs block is the next block, invert the condition so that we can @@ -1385,7 +1403,7 @@ void SelectionDAGLowering::visitSwitchCase(CaseBlock &CB) { void SelectionDAGLowering::visitJumpTable(JumpTable &JT) { // Emit the code for the jump table assert(JT.Reg != -1U && "Should lower JT Header first!"); - MVT PTy = TLI.getPointerTy(); + EVT PTy = TLI.getPointerTy(); SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), JT.Reg, PTy); SDValue Table = DAG.getJumpTable(JT.JTI, PTy); @@ -1402,7 +1420,7 @@ void SelectionDAGLowering::visitJumpTableHeader(JumpTable &JT, // conditional branch to default mbb if the result is greater than the // difference between smallest and largest cases. SDValue SwitchOp = getValue(JTH.SValue); - MVT VT = SwitchOp.getValueType(); + EVT VT = SwitchOp.getValueType(); SDValue SUB = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp, DAG.getConstant(JTH.First, VT)); @@ -1411,12 +1429,7 @@ void SelectionDAGLowering::visitJumpTableHeader(JumpTable &JT, // can be used as an index into the jump table in a subsequent basic block. // This value may be smaller or larger than the target's pointer type, and // therefore require extension or truncating. - if (VT.bitsGT(TLI.getPointerTy())) - SwitchOp = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), - TLI.getPointerTy(), SUB); - else - SwitchOp = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), - TLI.getPointerTy(), SUB); + SwitchOp = DAG.getZExtOrTrunc(SUB, getCurDebugLoc(), TLI.getPointerTy()); unsigned JumpTableReg = FuncInfo.MakeReg(TLI.getPointerTy()); SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(), @@ -1435,7 +1448,7 @@ void SelectionDAGLowering::visitJumpTableHeader(JumpTable &JT, // This is used to avoid emitting unnecessary branches to the next block. MachineBasicBlock *NextBlock = 0; MachineFunction::iterator BBI = CurMBB; - if (++BBI != CurMBB->getParent()->end()) + if (++BBI != FuncInfo.MF->end()) NextBlock = BBI; SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurDebugLoc(), @@ -1454,7 +1467,7 @@ void SelectionDAGLowering::visitJumpTableHeader(JumpTable &JT, void SelectionDAGLowering::visitBitTestHeader(BitTestBlock &B) { // Subtract the minimum value SDValue SwitchOp = getValue(B.SValue); - MVT VT = SwitchOp.getValueType(); + EVT VT = SwitchOp.getValueType(); SDValue SUB = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp, DAG.getConstant(B.First, VT)); @@ -1464,13 +1477,7 @@ void SelectionDAGLowering::visitBitTestHeader(BitTestBlock &B) { SUB, DAG.getConstant(B.Range, VT), ISD::SETUGT); - SDValue ShiftOp; - if (VT.bitsGT(TLI.getPointerTy())) - ShiftOp = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), - TLI.getPointerTy(), SUB); - else - ShiftOp = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), - TLI.getPointerTy(), SUB); + SDValue ShiftOp = DAG.getZExtOrTrunc(SUB, getCurDebugLoc(), TLI.getPointerTy()); B.Reg = FuncInfo.MakeReg(TLI.getPointerTy()); SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(), @@ -1480,7 +1487,7 @@ void SelectionDAGLowering::visitBitTestHeader(BitTestBlock &B) { // This is used to avoid emitting unnecessary branches to the next block. MachineBasicBlock *NextBlock = 0; MachineFunction::iterator BBI = CurMBB; - if (++BBI != CurMBB->getParent()->end()) + if (++BBI != FuncInfo.MF->end()) NextBlock = BBI; MachineBasicBlock* MBB = B.Cases[0].ThisBB; @@ -1531,7 +1538,7 @@ void SelectionDAGLowering::visitBitTestCase(MachineBasicBlock* NextMBB, // This is used to avoid emitting unnecessary branches to the next block. MachineBasicBlock *NextBlock = 0; MachineFunction::iterator BBI = CurMBB; - if (++BBI != CurMBB->getParent()->end()) + if (++BBI != FuncInfo.MF->end()) NextBlock = BBI; if (NextMBB == NextBlock) @@ -1584,13 +1591,13 @@ bool SelectionDAGLowering::handleSmallSwitchRange(CaseRec& CR, // Get the MachineFunction which holds the current MBB. This is used when // inserting any additional MBBs necessary to represent the switch. - MachineFunction *CurMF = CurMBB->getParent(); + MachineFunction *CurMF = FuncInfo.MF; // Figure out which block is immediately after the current one. MachineBasicBlock *NextBlock = 0; MachineFunction::iterator BBI = CR.CaseBB; - if (++BBI != CurMBB->getParent()->end()) + if (++BBI != FuncInfo.MF->end()) NextBlock = BBI; // TODO: If any two of the cases has the same destination, and if one value @@ -1698,14 +1705,11 @@ bool SelectionDAGLowering::handleJTSwitchCase(CaseRec& CR, // Get the MachineFunction which holds the current MBB. This is used when // inserting any additional MBBs necessary to represent the switch. - MachineFunction *CurMF = CurMBB->getParent(); + MachineFunction *CurMF = FuncInfo.MF; // Figure out which block is immediately after the current one. - MachineBasicBlock *NextBlock = 0; MachineFunction::iterator BBI = CR.CaseBB; - - if (++BBI != CurMBB->getParent()->end()) - NextBlock = BBI; + ++BBI; const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock(); @@ -1771,14 +1775,11 @@ bool SelectionDAGLowering::handleBTSplitSwitchCase(CaseRec& CR, MachineBasicBlock* Default) { // Get the MachineFunction which holds the current MBB. This is used when // inserting any additional MBBs necessary to represent the switch. - MachineFunction *CurMF = CurMBB->getParent(); + MachineFunction *CurMF = FuncInfo.MF; // Figure out which block is immediately after the current one. - MachineBasicBlock *NextBlock = 0; MachineFunction::iterator BBI = CR.CaseBB; - - if (++BBI != CurMBB->getParent()->end()) - NextBlock = BBI; + ++BBI; Case& FrontCase = *CR.Range.first; Case& BackCase = *(CR.Range.second-1); @@ -1898,14 +1899,15 @@ bool SelectionDAGLowering::handleBitTestsSwitchCase(CaseRec& CR, CaseRecVector& WorkList, Value* SV, MachineBasicBlock* Default){ - unsigned IntPtrBits = TLI.getPointerTy().getSizeInBits(); + EVT PTy = TLI.getPointerTy(); + unsigned IntPtrBits = PTy.getSizeInBits(); Case& FrontCase = *CR.Range.first; Case& BackCase = *(CR.Range.second-1); // Get the MachineFunction which holds the current MBB. This is used when // inserting any additional MBBs necessary to represent the switch. - MachineFunction *CurMF = CurMBB->getParent(); + MachineFunction *CurMF = FuncInfo.MF; // If target does not have legal shift left, do not emit bit tests at all. if (!TLI.isOperationLegal(ISD::SHL, TLI.getPointerTy())) @@ -2069,7 +2071,6 @@ size_t SelectionDAGLowering::Clusterify(CaseVector& Cases, void SelectionDAGLowering::visitSwitch(SwitchInst &SI) { // Figure out which block is immediately after the current one. MachineBasicBlock *NextBlock = 0; - MachineFunction::iterator BBI = CurMBB; MachineBasicBlock *Default = FuncInfo.MBBMap[SI.getDefaultDest()]; @@ -2174,24 +2175,26 @@ void SelectionDAGLowering::visitShift(User &I, unsigned Opcode) { if (!isa<VectorType>(I.getType()) && Op2.getValueType() != TLI.getShiftAmountTy()) { // If the operand is smaller than the shift count type, promote it. - if (TLI.getShiftAmountTy().bitsGT(Op2.getValueType())) + EVT PTy = TLI.getPointerTy(); + EVT STy = TLI.getShiftAmountTy(); + if (STy.bitsGT(Op2.getValueType())) Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(), TLI.getShiftAmountTy(), Op2); // If the operand is larger than the shift count type but the shift // count type has enough bits to represent any shift value, truncate // it now. This is a common case and it exposes the truncate to // optimization early. - else if (TLI.getShiftAmountTy().getSizeInBits() >= + else if (STy.getSizeInBits() >= Log2_32_Ceil(Op2.getValueType().getSizeInBits())) Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), TLI.getShiftAmountTy(), Op2); // Otherwise we'll need to temporarily settle for some other // convenient type; type legalization will make adjustments as // needed. - else if (TLI.getPointerTy().bitsLT(Op2.getValueType())) + else if (PTy.bitsLT(Op2.getValueType())) Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), TLI.getPointerTy(), Op2); - else if (TLI.getPointerTy().bitsGT(Op2.getValueType())) + else if (PTy.bitsGT(Op2.getValueType())) Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(), TLI.getPointerTy(), Op2); } @@ -2209,7 +2212,9 @@ void SelectionDAGLowering::visitICmp(User &I) { SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); ISD::CondCode Opcode = getICmpCondCode(predicate); - setValue(&I, DAG.getSetCC(getCurDebugLoc(),MVT::i1, Op1, Op2, Opcode)); + + EVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Opcode)); } void SelectionDAGLowering::visitFCmp(User &I) { @@ -2221,38 +2226,12 @@ void SelectionDAGLowering::visitFCmp(User &I) { SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); ISD::CondCode Condition = getFCmpCondCode(predicate); - setValue(&I, DAG.getSetCC(getCurDebugLoc(), MVT::i1, Op1, Op2, Condition)); -} - -void SelectionDAGLowering::visitVICmp(User &I) { - ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE; - if (VICmpInst *IC = dyn_cast<VICmpInst>(&I)) - predicate = IC->getPredicate(); - else if (ConstantExpr *IC = dyn_cast<ConstantExpr>(&I)) - predicate = ICmpInst::Predicate(IC->getPredicate()); - SDValue Op1 = getValue(I.getOperand(0)); - SDValue Op2 = getValue(I.getOperand(1)); - ISD::CondCode Opcode = getICmpCondCode(predicate); - setValue(&I, DAG.getVSetCC(getCurDebugLoc(), Op1.getValueType(), - Op1, Op2, Opcode)); -} - -void SelectionDAGLowering::visitVFCmp(User &I) { - FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE; - if (VFCmpInst *FC = dyn_cast<VFCmpInst>(&I)) - predicate = FC->getPredicate(); - else if (ConstantExpr *FC = dyn_cast<ConstantExpr>(&I)) - predicate = FCmpInst::Predicate(FC->getPredicate()); - SDValue Op1 = getValue(I.getOperand(0)); - SDValue Op2 = getValue(I.getOperand(1)); - ISD::CondCode Condition = getFCmpCondCode(predicate); - MVT DestVT = TLI.getValueType(I.getType()); - - setValue(&I, DAG.getVSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Condition)); + EVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Condition)); } void SelectionDAGLowering::visitSelect(User &I) { - SmallVector<MVT, 4> ValueVTs; + SmallVector<EVT, 4> ValueVTs; ComputeValueVTs(TLI, I.getType(), ValueVTs); unsigned NumValues = ValueVTs.size(); if (NumValues != 0) { @@ -2277,7 +2256,7 @@ void SelectionDAGLowering::visitSelect(User &I) { void SelectionDAGLowering::visitTrunc(User &I) { // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest). SDValue N = getValue(I.getOperand(0)); - MVT DestVT = TLI.getValueType(I.getType()); + EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), DestVT, N)); } @@ -2285,7 +2264,7 @@ void SelectionDAGLowering::visitZExt(User &I) { // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest). // ZExt also can't be a cast to bool for same reason. So, nothing much to do SDValue N = getValue(I.getOperand(0)); - MVT DestVT = TLI.getValueType(I.getType()); + EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), DestVT, N)); } @@ -2293,14 +2272,14 @@ void SelectionDAGLowering::visitSExt(User &I) { // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest). // SExt also can't be a cast to bool for same reason. So, nothing much to do SDValue N = getValue(I.getOperand(0)); - MVT DestVT = TLI.getValueType(I.getType()); + EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurDebugLoc(), DestVT, N)); } void SelectionDAGLowering::visitFPTrunc(User &I) { // FPTrunc is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); - MVT DestVT = TLI.getValueType(I.getType()); + EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurDebugLoc(), DestVT, N, DAG.getIntPtrConstant(0))); } @@ -2308,35 +2287,35 @@ void SelectionDAGLowering::visitFPTrunc(User &I) { void SelectionDAGLowering::visitFPExt(User &I){ // FPTrunc is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); - MVT DestVT = TLI.getValueType(I.getType()); + EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurDebugLoc(), DestVT, N)); } void SelectionDAGLowering::visitFPToUI(User &I) { // FPToUI is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); - MVT DestVT = TLI.getValueType(I.getType()); + EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurDebugLoc(), DestVT, N)); } void SelectionDAGLowering::visitFPToSI(User &I) { // FPToSI is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); - MVT DestVT = TLI.getValueType(I.getType()); + EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurDebugLoc(), DestVT, N)); } void SelectionDAGLowering::visitUIToFP(User &I) { // UIToFP is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); - MVT DestVT = TLI.getValueType(I.getType()); + EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurDebugLoc(), DestVT, N)); } void SelectionDAGLowering::visitSIToFP(User &I){ // SIToFP is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); - MVT DestVT = TLI.getValueType(I.getType()); + EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurDebugLoc(), DestVT, N)); } @@ -2344,14 +2323,9 @@ void SelectionDAGLowering::visitPtrToInt(User &I) { // What to do depends on the size of the integer and the size of the pointer. // We can either truncate, zero extend, or no-op, accordingly. SDValue N = getValue(I.getOperand(0)); - MVT SrcVT = N.getValueType(); - MVT DestVT = TLI.getValueType(I.getType()); - SDValue Result; - if (DestVT.bitsLT(SrcVT)) - Result = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), DestVT, N); - else - // Note: ZERO_EXTEND can handle cases where the sizes are equal too - Result = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), DestVT, N); + EVT SrcVT = N.getValueType(); + EVT DestVT = TLI.getValueType(I.getType()); + SDValue Result = DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT); setValue(&I, Result); } @@ -2359,19 +2333,14 @@ void SelectionDAGLowering::visitIntToPtr(User &I) { // What to do depends on the size of the integer and the size of the pointer. // We can either truncate, zero extend, or no-op, accordingly. SDValue N = getValue(I.getOperand(0)); - MVT SrcVT = N.getValueType(); - MVT DestVT = TLI.getValueType(I.getType()); - if (DestVT.bitsLT(SrcVT)) - setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), DestVT, N)); - else - // Note: ZERO_EXTEND can handle cases where the sizes are equal too - setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), - DestVT, N)); + EVT SrcVT = N.getValueType(); + EVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT)); } void SelectionDAGLowering::visitBitCast(User &I) { SDValue N = getValue(I.getOperand(0)); - MVT DestVT = TLI.getValueType(I.getType()); + EVT DestVT = TLI.getValueType(I.getType()); // BitCast assures us that source and destination are the same size so this // is either a BIT_CONVERT or a no-op. @@ -2422,7 +2391,8 @@ void SelectionDAGLowering::visitShuffleVector(User &I) { // Convert the ConstantVector mask operand into an array of ints, with -1 // representing undef values. SmallVector<Constant*, 8> MaskElts; - cast<Constant>(I.getOperand(2))->getVectorElements(MaskElts); + cast<Constant>(I.getOperand(2))->getVectorElements(*DAG.getContext(), + MaskElts); unsigned MaskNumElts = MaskElts.size(); for (unsigned i = 0; i != MaskNumElts; ++i) { if (isa<UndefValue>(MaskElts[i])) @@ -2431,8 +2401,8 @@ void SelectionDAGLowering::visitShuffleVector(User &I) { Mask.push_back(cast<ConstantInt>(MaskElts[i])->getSExtValue()); } - MVT VT = TLI.getValueType(I.getType()); - MVT SrcVT = Src1.getValueType(); + EVT VT = TLI.getValueType(I.getType()); + EVT SrcVT = Src1.getValueType(); unsigned SrcNumElts = SrcVT.getVectorNumElements(); if (SrcNumElts == MaskNumElts) { @@ -2531,7 +2501,7 @@ void SelectionDAGLowering::visitShuffleVector(User &I) { } } - if (RangeUse[0] == 0 && RangeUse[0] == 0) { + if (RangeUse[0] == 0 && RangeUse[1] == 0) { setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used. return; } @@ -2566,8 +2536,8 @@ void SelectionDAGLowering::visitShuffleVector(User &I) { // We can't use either concat vectors or extract subvectors so fall back to // replacing the shuffle with extract and build vector. // to insert and build vector. - MVT EltVT = VT.getVectorElementType(); - MVT PtrVT = TLI.getPointerTy(); + EVT EltVT = VT.getVectorElementType(); + EVT PtrVT = TLI.getPointerTy(); SmallVector<SDValue,8> Ops; for (unsigned i = 0; i != MaskNumElts; ++i) { if (Mask[i] < 0) { @@ -2598,9 +2568,9 @@ void SelectionDAGLowering::visitInsertValue(InsertValueInst &I) { unsigned LinearIndex = ComputeLinearIndex(TLI, AggTy, I.idx_begin(), I.idx_end()); - SmallVector<MVT, 4> AggValueVTs; + SmallVector<EVT, 4> AggValueVTs; ComputeValueVTs(TLI, AggTy, AggValueVTs); - SmallVector<MVT, 4> ValValueVTs; + SmallVector<EVT, 4> ValValueVTs; ComputeValueVTs(TLI, ValTy, ValValueVTs); unsigned NumAggValues = AggValueVTs.size(); @@ -2637,7 +2607,7 @@ void SelectionDAGLowering::visitExtractValue(ExtractValueInst &I) { unsigned LinearIndex = ComputeLinearIndex(TLI, AggTy, I.idx_begin(), I.idx_end()); - SmallVector<MVT, 4> ValValueVTs; + SmallVector<EVT, 4> ValValueVTs; ComputeValueVTs(TLI, ValTy, ValValueVTs); unsigned NumValValues = ValValueVTs.size(); @@ -2682,7 +2652,8 @@ void SelectionDAGLowering::visitGetElementPtr(User &I) { uint64_t Offs = TD->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue(); SDValue OffsVal; - unsigned PtrBits = TLI.getPointerTy().getSizeInBits(); + EVT PTy = TLI.getPointerTy(); + unsigned PtrBits = PTy.getSizeInBits(); if (PtrBits < 64) { OffsVal = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), TLI.getPointerTy(), @@ -2700,12 +2671,7 @@ void SelectionDAGLowering::visitGetElementPtr(User &I) { // If the index is smaller or larger than intptr_t, truncate or extend // it. - if (IdxN.getValueType().bitsLT(N.getValueType())) - IdxN = DAG.getNode(ISD::SIGN_EXTEND, getCurDebugLoc(), - N.getValueType(), IdxN); - else if (IdxN.getValueType().bitsGT(N.getValueType())) - IdxN = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), - N.getValueType(), IdxN); + IdxN = DAG.getSExtOrTrunc(IdxN, getCurDebugLoc(), N.getValueType()); // If this is a multiply by a power of two, turn it into a shl // immediately. This is a very common case. @@ -2749,13 +2715,8 @@ void SelectionDAGLowering::visitAlloca(AllocaInst &I) { - MVT IntPtr = TLI.getPointerTy(); - if (IntPtr.bitsLT(AllocSize.getValueType())) - AllocSize = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), - IntPtr, AllocSize); - else if (IntPtr.bitsGT(AllocSize.getValueType())) - AllocSize = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), - IntPtr, AllocSize); + EVT IntPtr = TLI.getPointerTy(); + AllocSize = DAG.getZExtOrTrunc(AllocSize, getCurDebugLoc(), IntPtr); // Handle alignment. If the requested alignment is less than or equal to // the stack alignment, ignore it. If the size is greater than or equal to @@ -2784,7 +2745,7 @@ void SelectionDAGLowering::visitAlloca(AllocaInst &I) { // Inform the Frame Information that we have just allocated a variable-sized // object. - CurMBB->getParent()->getFrameInfo()->CreateVariableSizedObject(); + FuncInfo.MF->getFrameInfo()->CreateVariableSizedObject(); } void SelectionDAGLowering::visitLoad(LoadInst &I) { @@ -2795,7 +2756,7 @@ void SelectionDAGLowering::visitLoad(LoadInst &I) { bool isVolatile = I.isVolatile(); unsigned Alignment = I.getAlignment(); - SmallVector<MVT, 4> ValueVTs; + SmallVector<EVT, 4> ValueVTs; SmallVector<uint64_t, 4> Offsets; ComputeValueVTs(TLI, Ty, ValueVTs, &Offsets); unsigned NumValues = ValueVTs.size(); @@ -2818,14 +2779,13 @@ void SelectionDAGLowering::visitLoad(LoadInst &I) { SmallVector<SDValue, 4> Values(NumValues); SmallVector<SDValue, 4> Chains(NumValues); - MVT PtrVT = Ptr.getValueType(); + EVT PtrVT = Ptr.getValueType(); for (unsigned i = 0; i != NumValues; ++i) { SDValue L = DAG.getLoad(ValueVTs[i], getCurDebugLoc(), Root, - DAG.getNode(ISD::ADD, getCurDebugLoc(), - PtrVT, Ptr, - DAG.getConstant(Offsets[i], PtrVT)), - SV, Offsets[i], - isVolatile, Alignment); + DAG.getNode(ISD::ADD, getCurDebugLoc(), + PtrVT, Ptr, + DAG.getConstant(Offsets[i], PtrVT)), + SV, Offsets[i], isVolatile, Alignment); Values[i] = L; Chains[i] = L.getValue(1); } @@ -2850,7 +2810,7 @@ void SelectionDAGLowering::visitStore(StoreInst &I) { Value *SrcV = I.getOperand(0); Value *PtrV = I.getOperand(1); - SmallVector<MVT, 4> ValueVTs; + SmallVector<EVT, 4> ValueVTs; SmallVector<uint64_t, 4> Offsets; ComputeValueVTs(TLI, SrcV->getType(), ValueVTs, &Offsets); unsigned NumValues = ValueVTs.size(); @@ -2865,7 +2825,7 @@ void SelectionDAGLowering::visitStore(StoreInst &I) { SDValue Root = getRoot(); SmallVector<SDValue, 4> Chains(NumValues); - MVT PtrVT = Ptr.getValueType(); + EVT PtrVT = Ptr.getValueType(); bool isVolatile = I.isVolatile(); unsigned Alignment = I.getAlignment(); for (unsigned i = 0; i != NumValues; ++i) @@ -2874,8 +2834,7 @@ void SelectionDAGLowering::visitStore(StoreInst &I) { DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, Ptr, DAG.getConstant(Offsets[i], PtrVT)), - PtrV, Offsets[i], - isVolatile, Alignment); + PtrV, Offsets[i], isVolatile, Alignment); DAG.setRoot(DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other, &Chains[0], NumValues)); @@ -2915,24 +2874,18 @@ void SelectionDAGLowering::visitTargetIntrinsic(CallInst &I, Ops.push_back(Op); } - std::vector<MVT> VTArray; - if (I.getType() != Type::VoidTy) { - MVT VT = TLI.getValueType(I.getType()); - if (VT.isVector()) { - const VectorType *DestTy = cast<VectorType>(I.getType()); - MVT EltVT = TLI.getValueType(DestTy->getElementType()); - - VT = MVT::getVectorVT(EltVT, DestTy->getNumElements()); - assert(VT != MVT::Other && "Intrinsic uses a non-legal type?"); - } - - assert(TLI.isTypeLegal(VT) && "Intrinsic uses a non-legal type?"); - VTArray.push_back(VT); + SmallVector<EVT, 4> ValueVTs; + ComputeValueVTs(TLI, I.getType(), ValueVTs); +#ifndef NDEBUG + for (unsigned Val = 0, E = ValueVTs.size(); Val != E; ++Val) { + assert(TLI.isTypeLegal(ValueVTs[Val]) && + "Intrinsic uses a non-legal type?"); } +#endif // NDEBUG if (HasChain) - VTArray.push_back(MVT::Other); + ValueVTs.push_back(MVT::Other); - SDVTList VTs = DAG.getVTList(&VTArray[0], VTArray.size()); + SDVTList VTs = DAG.getVTList(ValueVTs.data(), ValueVTs.size()); // Create the node. SDValue Result; @@ -2947,7 +2900,7 @@ void SelectionDAGLowering::visitTargetIntrinsic(CallInst &I, else if (!HasChain) Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurDebugLoc(), VTs, &Ops[0], Ops.size()); - else if (I.getType() != Type::VoidTy) + else if (I.getType() != Type::getVoidTy(*DAG.getContext())) Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurDebugLoc(), VTs, &Ops[0], Ops.size()); else @@ -2961,9 +2914,9 @@ void SelectionDAGLowering::visitTargetIntrinsic(CallInst &I, else DAG.setRoot(Chain); } - if (I.getType() != Type::VoidTy) { + if (I.getType() != Type::getVoidTy(*DAG.getContext())) { if (const VectorType *PTy = dyn_cast<VectorType>(I.getType())) { - MVT VT = TLI.getValueType(PTy); + EVT VT = TLI.getValueType(PTy); Result = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), VT, Result); } setValue(&I, Result); @@ -3890,7 +3843,7 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { if (isValidDebugInfoIntrinsic(RSI, OptLevel) && DW && DW->ShouldEmitDwarfDebug()) { unsigned LabelID = - DW->RecordRegionStart(cast<GlobalVariable>(RSI.getContext())); + DW->RecordRegionStart(RSI.getContext()); DAG.setRoot(DAG.getLabel(ISD::DBG_LABEL, getCurDebugLoc(), getRoot(), LabelID)); } @@ -3905,7 +3858,7 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { return 0; MachineFunction &MF = DAG.getMachineFunction(); - DISubprogram Subprogram(cast<GlobalVariable>(REI.getContext())); + DISubprogram Subprogram(REI.getContext()); if (isInlinedFnEnd(REI, MF.getFunction())) { // This is end of inlined function. Debugging information for inlined @@ -3924,7 +3877,7 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { } unsigned LabelID = - DW->RecordRegionEnd(cast<GlobalVariable>(REI.getContext())); + DW->RecordRegionEnd(REI.getContext()); DAG.setRoot(DAG.getLabel(ISD::DBG_LABEL, getCurDebugLoc(), getRoot(), LabelID)); return 0; @@ -3932,8 +3885,7 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { case Intrinsic::dbg_func_start: { DwarfWriter *DW = DAG.getDwarfWriter(); DbgFuncStartInst &FSI = cast<DbgFuncStartInst>(I); - if (!isValidDebugInfoIntrinsic(FSI, CodeGenOpt::None) || !DW - || !DW->ShouldEmitDwarfDebug()) + if (!isValidDebugInfoIntrinsic(FSI, CodeGenOpt::None)) return 0; MachineFunction &MF = DAG.getMachineFunction(); @@ -3954,9 +3906,11 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { // Record the source line. setCurDebugLoc(ExtractDebugLocation(FSI, MF.getDebugLocInfo())); + if (!DW || !DW->ShouldEmitDwarfDebug()) + return 0; DebugLocTuple PrevLocTpl = MF.getDebugLocTuple(PrevLoc); - DISubprogram SP(cast<GlobalVariable>(FSI.getSubprogram())); - DICompileUnit CU(PrevLocTpl.CompileUnit); + DISubprogram SP(FSI.getSubprogram()); + DICompileUnit CU(PrevLocTpl.Scope); unsigned LabelID = DW->RecordInlinedFnStart(SP, CU, PrevLocTpl.Line, PrevLocTpl.Col); @@ -3967,23 +3921,44 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { // This is a beginning of a new function. MF.setDefaultDebugLoc(ExtractDebugLocation(FSI, MF.getDebugLocInfo())); - + + if (!DW || !DW->ShouldEmitDwarfDebug()) + return 0; // llvm.dbg.func_start also defines beginning of function scope. - DW->RecordRegionStart(cast<GlobalVariable>(FSI.getSubprogram())); + DW->RecordRegionStart(FSI.getSubprogram()); return 0; } case Intrinsic::dbg_declare: { if (OptLevel != CodeGenOpt::None) // FIXME: Variable debug info is not supported here. return 0; - + DwarfWriter *DW = DAG.getDwarfWriter(); + if (!DW) + return 0; DbgDeclareInst &DI = cast<DbgDeclareInst>(I); if (!isValidDebugInfoIntrinsic(DI, CodeGenOpt::None)) return 0; - Value *Variable = DI.getVariable(); - DAG.setRoot(DAG.getNode(ISD::DECLARE, dl, MVT::Other, getRoot(), - getValue(DI.getAddress()), getValue(Variable))); + MDNode *Variable = DI.getVariable(); + Value *Address = DI.getAddress(); + if (BitCastInst *BCI = dyn_cast<BitCastInst>(Address)) + Address = BCI->getOperand(0); + AllocaInst *AI = dyn_cast<AllocaInst>(Address); + // Don't handle byval struct arguments or VLAs, for example. + if (!AI) + return 0; + DenseMap<const AllocaInst*, int>::iterator SI = + FuncInfo.StaticAllocaMap.find(AI); + if (SI == FuncInfo.StaticAllocaMap.end()) + return 0; // VLAs. + int FI = SI->second; +#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN + MachineModuleInfo *MMI = DAG.getMachineModuleInfo(); + if (MMI) + MMI->setVariableDbgInfo(Variable, FI); +#else + DW->RecordVariable(Variable, FI); +#endif return 0; } case Intrinsic::eh_exception: { @@ -3998,54 +3973,45 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { return 0; } - case Intrinsic::eh_selector_i32: - case Intrinsic::eh_selector_i64: { + case Intrinsic::eh_selector: { MachineModuleInfo *MMI = DAG.getMachineModuleInfo(); - MVT VT = (Intrinsic == Intrinsic::eh_selector_i32 ? - MVT::i32 : MVT::i64); - if (MMI) { - if (CurMBB->isLandingPad()) - AddCatchInfo(I, MMI, CurMBB); - else { + if (CurMBB->isLandingPad()) + AddCatchInfo(I, MMI, CurMBB); + else { #ifndef NDEBUG - FuncInfo.CatchInfoLost.insert(&I); + FuncInfo.CatchInfoLost.insert(&I); #endif - // FIXME: Mark exception selector register as live in. Hack for PR1508. - unsigned Reg = TLI.getExceptionSelectorRegister(); - if (Reg) CurMBB->addLiveIn(Reg); - } - - // Insert the EHSELECTION instruction. - SDVTList VTs = DAG.getVTList(VT, MVT::Other); - SDValue Ops[2]; - Ops[0] = getValue(I.getOperand(1)); - Ops[1] = getRoot(); - SDValue Op = DAG.getNode(ISD::EHSELECTION, dl, VTs, Ops, 2); - setValue(&I, Op); - DAG.setRoot(Op.getValue(1)); - } else { - setValue(&I, DAG.getConstant(0, VT)); + // FIXME: Mark exception selector register as live in. Hack for PR1508. + unsigned Reg = TLI.getExceptionSelectorRegister(); + if (Reg) CurMBB->addLiveIn(Reg); } + // Insert the EHSELECTION instruction. + SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other); + SDValue Ops[2]; + Ops[0] = getValue(I.getOperand(1)); + Ops[1] = getRoot(); + SDValue Op = DAG.getNode(ISD::EHSELECTION, dl, VTs, Ops, 2); + + DAG.setRoot(Op.getValue(1)); + + setValue(&I, DAG.getSExtOrTrunc(Op, dl, MVT::i32)); return 0; } - case Intrinsic::eh_typeid_for_i32: - case Intrinsic::eh_typeid_for_i64: { + case Intrinsic::eh_typeid_for: { MachineModuleInfo *MMI = DAG.getMachineModuleInfo(); - MVT VT = (Intrinsic == Intrinsic::eh_typeid_for_i32 ? - MVT::i32 : MVT::i64); if (MMI) { // Find the type id for the given typeinfo. GlobalVariable *GV = ExtractTypeInfo(I.getOperand(1)); unsigned TypeID = MMI->getTypeIDFor(GV); - setValue(&I, DAG.getConstant(TypeID, VT)); + setValue(&I, DAG.getConstant(TypeID, MVT::i32)); } else { // Return something different to eh_selector. - setValue(&I, DAG.getConstant(1, VT)); + setValue(&I, DAG.getConstant(1, MVT::i32)); } return 0; @@ -4073,14 +4039,9 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { return 0; case Intrinsic::eh_dwarf_cfa: { - MVT VT = getValue(I.getOperand(1)).getValueType(); - SDValue CfaArg; - if (VT.bitsGT(TLI.getPointerTy())) - CfaArg = DAG.getNode(ISD::TRUNCATE, dl, - TLI.getPointerTy(), getValue(I.getOperand(1))); - else - CfaArg = DAG.getNode(ISD::SIGN_EXTEND, dl, - TLI.getPointerTy(), getValue(I.getOperand(1))); + EVT VT = getValue(I.getOperand(1)).getValueType(); + SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), dl, + TLI.getPointerTy()); SDValue Offset = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), @@ -4096,7 +4057,6 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { Offset)); return 0; } - case Intrinsic::convertff: case Intrinsic::convertfsi: case Intrinsic::convertfui: @@ -4118,7 +4078,7 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { case Intrinsic::convertus: Code = ISD::CVT_US; break; case Intrinsic::convertuu: Code = ISD::CVT_UU; break; } - MVT DestVT = TLI.getValueType(I.getType()); + EVT DestVT = TLI.getValueType(I.getType()); Value* Op1 = I.getOperand(1); setValue(&I, DAG.getConvertRndSat(DestVT, getCurDebugLoc(), getValue(Op1), DAG.getValueType(DestVT), @@ -4182,16 +4142,6 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { DAG.setRoot(Tmp.getValue(1)); return 0; } - case Intrinsic::part_select: { - // Currently not implemented: just abort - assert(0 && "part_select intrinsic not implemented"); - abort(); - } - case Intrinsic::part_set: { - // Currently not implemented: just abort - assert(0 && "part_set intrinsic not implemented"); - abort(); - } case Intrinsic::bswap: setValue(&I, DAG.getNode(ISD::BSWAP, dl, getValue(I.getOperand(1)).getValueType(), @@ -4199,21 +4149,21 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { return 0; case Intrinsic::cttz: { SDValue Arg = getValue(I.getOperand(1)); - MVT Ty = Arg.getValueType(); + EVT Ty = Arg.getValueType(); SDValue result = DAG.getNode(ISD::CTTZ, dl, Ty, Arg); setValue(&I, result); return 0; } case Intrinsic::ctlz: { SDValue Arg = getValue(I.getOperand(1)); - MVT Ty = Arg.getValueType(); + EVT Ty = Arg.getValueType(); SDValue result = DAG.getNode(ISD::CTLZ, dl, Ty, Arg); setValue(&I, result); return 0; } case Intrinsic::ctpop: { SDValue Arg = getValue(I.getOperand(1)); - MVT Ty = Arg.getValueType(); + EVT Ty = Arg.getValueType(); SDValue result = DAG.getNode(ISD::CTPOP, dl, Ty, Arg); setValue(&I, result); return 0; @@ -4235,7 +4185,7 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { // Emit code into the DAG to store the stack guard onto the stack. MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - MVT PtrTy = TLI.getPointerTy(); + EVT PtrTy = TLI.getPointerTy(); SDValue Src = getValue(I.getOperand(1)); // The guard's value. AllocaInst *Slot = cast<AllocaInst>(I.getOperand(2)); @@ -4289,7 +4239,7 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { case Intrinsic::gcread: case Intrinsic::gcwrite: - assert(0 && "GC failed to lower gcread/gcwrite intrinsics!"); + llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!"); return 0; case Intrinsic::flt_rounds: { @@ -4373,9 +4323,76 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { } } +/// Test if the given instruction is in a position to be optimized +/// with a tail-call. This roughly means that it's in a block with +/// a return and there's nothing that needs to be scheduled +/// between it and the return. +/// +/// This function only tests target-independent requirements. +/// For target-dependent requirements, a target should override +/// TargetLowering::IsEligibleForTailCallOptimization. +/// +static bool +isInTailCallPosition(const Instruction *I, Attributes RetAttr, + const TargetLowering &TLI) { + const BasicBlock *ExitBB = I->getParent(); + const TerminatorInst *Term = ExitBB->getTerminator(); + const ReturnInst *Ret = dyn_cast<ReturnInst>(Term); + const Function *F = ExitBB->getParent(); + + // The block must end in a return statement or an unreachable. + if (!Ret && !isa<UnreachableInst>(Term)) return false; + + // If I will have a chain, make sure no other instruction that will have a + // chain interposes between I and the return. + if (I->mayHaveSideEffects() || I->mayReadFromMemory() || + !I->isSafeToSpeculativelyExecute()) + for (BasicBlock::const_iterator BBI = prior(prior(ExitBB->end())); ; + --BBI) { + if (&*BBI == I) + break; + if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() || + !BBI->isSafeToSpeculativelyExecute()) + return false; + } + + // If the block ends with a void return or unreachable, it doesn't matter + // what the call's return type is. + if (!Ret || Ret->getNumOperands() == 0) return true; + + // Conservatively require the attributes of the call to match those of + // the return. + if (F->getAttributes().getRetAttributes() != RetAttr) + return false; + + // Otherwise, make sure the unmodified return value of I is the return value. + for (const Instruction *U = dyn_cast<Instruction>(Ret->getOperand(0)); ; + U = dyn_cast<Instruction>(U->getOperand(0))) { + if (!U) + return false; + if (!U->hasOneUse()) + return false; + if (U == I) + break; + // Check for a truly no-op truncate. + if (isa<TruncInst>(U) && + TLI.isTruncateFree(U->getOperand(0)->getType(), U->getType())) + continue; + // Check for a truly no-op bitcast. + if (isa<BitCastInst>(U) && + (U->getOperand(0)->getType() == U->getType() || + (isa<PointerType>(U->getOperand(0)->getType()) && + isa<PointerType>(U->getType())))) + continue; + // Otherwise it's not a true no-op. + return false; + } + + return true; +} void SelectionDAGLowering::LowerCallTo(CallSite CS, SDValue Callee, - bool IsTailCall, + bool isTailCall, MachineBasicBlock *LandingPad) { const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); const FunctionType *FTy = cast<FunctionType>(PT->getElementType()); @@ -4385,8 +4402,9 @@ void SelectionDAGLowering::LowerCallTo(CallSite CS, SDValue Callee, TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Args.reserve(CS.arg_size()); + unsigned j = 1; for (CallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); - i != e; ++i) { + i != e; ++i, ++j) { SDValue ArgNode = getValue(*i); Entry.Node = ArgNode; Entry.Ty = (*i)->getType(); @@ -4405,6 +4423,7 @@ void SelectionDAGLowering::LowerCallTo(CallSite CS, SDValue Callee, // Insert a label before the invoke call to mark the try range. This can be // used to detect deletion of the invoke via the MachineModuleInfo. BeginLabel = MMI->NextLabelID(); + // Both PendingLoads and PendingExports must be flushed here; // this call might not return. (void)getRoot(); @@ -4412,17 +4431,35 @@ void SelectionDAGLowering::LowerCallTo(CallSite CS, SDValue Callee, getControlRoot(), BeginLabel)); } + // Check if target-independent constraints permit a tail call here. + // Target-dependent constraints are checked within TLI.LowerCallTo. + if (isTailCall && + !isInTailCallPosition(CS.getInstruction(), + CS.getAttributes().getRetAttributes(), + TLI)) + isTailCall = false; + std::pair<SDValue,SDValue> Result = TLI.LowerCallTo(getRoot(), CS.getType(), CS.paramHasAttr(0, Attribute::SExt), CS.paramHasAttr(0, Attribute::ZExt), FTy->isVarArg(), CS.paramHasAttr(0, Attribute::InReg), FTy->getNumParams(), CS.getCallingConv(), - IsTailCall && PerformTailCallOpt, + isTailCall, + !CS.getInstruction()->use_empty(), Callee, Args, DAG, getCurDebugLoc()); - if (CS.getType() != Type::VoidTy) + assert((isTailCall || Result.second.getNode()) && + "Non-null chain expected with non-tail call!"); + assert((Result.second.getNode() || !Result.first.getNode()) && + "Null value expected with tail call!"); + if (Result.first.getNode()) setValue(CS.getInstruction(), Result.first); - DAG.setRoot(Result.second); + // As a special case, a null chain means that a tail call has + // been emitted and the DAG root is already updated. + if (Result.second.getNode()) + DAG.setRoot(Result.second); + else + HasTailCall = true; if (LandingPad && MMI) { // Insert a label at the end of the invoke call to mark the try range. This @@ -4458,12 +4495,9 @@ void SelectionDAGLowering::visitCall(CallInst &I) { // Check for well-known libc/libm calls. If the function is internal, it // can't be a library call. - unsigned NameLen = F->getNameLen(); - if (!F->hasLocalLinkage() && NameLen) { - const char *NameStr = F->getNameStart(); - if (NameStr[0] == 'c' && - ((NameLen == 8 && !strcmp(NameStr, "copysign")) || - (NameLen == 9 && !strcmp(NameStr, "copysignf")))) { + if (!F->hasLocalLinkage() && F->hasName()) { + StringRef Name = F->getName(); + if (Name == "copysign" || Name == "copysignf") { if (I.getNumOperands() == 3 && // Basic sanity checks. I.getOperand(1)->getType()->isFloatingPoint() && I.getType() == I.getOperand(1)->getType() && @@ -4474,10 +4508,7 @@ void SelectionDAGLowering::visitCall(CallInst &I) { LHS.getValueType(), LHS, RHS)); return; } - } else if (NameStr[0] == 'f' && - ((NameLen == 4 && !strcmp(NameStr, "fabs")) || - (NameLen == 5 && !strcmp(NameStr, "fabsf")) || - (NameLen == 5 && !strcmp(NameStr, "fabsl")))) { + } else if (Name == "fabs" || Name == "fabsf" || Name == "fabsl") { if (I.getNumOperands() == 2 && // Basic sanity checks. I.getOperand(1)->getType()->isFloatingPoint() && I.getType() == I.getOperand(1)->getType()) { @@ -4486,30 +4517,36 @@ void SelectionDAGLowering::visitCall(CallInst &I) { Tmp.getValueType(), Tmp)); return; } - } else if (NameStr[0] == 's' && - ((NameLen == 3 && !strcmp(NameStr, "sin")) || - (NameLen == 4 && !strcmp(NameStr, "sinf")) || - (NameLen == 4 && !strcmp(NameStr, "sinl")))) { + } else if (Name == "sin" || Name == "sinf" || Name == "sinl") { if (I.getNumOperands() == 2 && // Basic sanity checks. I.getOperand(1)->getType()->isFloatingPoint() && - I.getType() == I.getOperand(1)->getType()) { + I.getType() == I.getOperand(1)->getType() && + I.onlyReadsMemory()) { SDValue Tmp = getValue(I.getOperand(1)); setValue(&I, DAG.getNode(ISD::FSIN, getCurDebugLoc(), Tmp.getValueType(), Tmp)); return; } - } else if (NameStr[0] == 'c' && - ((NameLen == 3 && !strcmp(NameStr, "cos")) || - (NameLen == 4 && !strcmp(NameStr, "cosf")) || - (NameLen == 4 && !strcmp(NameStr, "cosl")))) { + } else if (Name == "cos" || Name == "cosf" || Name == "cosl") { if (I.getNumOperands() == 2 && // Basic sanity checks. I.getOperand(1)->getType()->isFloatingPoint() && - I.getType() == I.getOperand(1)->getType()) { + I.getType() == I.getOperand(1)->getType() && + I.onlyReadsMemory()) { SDValue Tmp = getValue(I.getOperand(1)); setValue(&I, DAG.getNode(ISD::FCOS, getCurDebugLoc(), Tmp.getValueType(), Tmp)); return; } + } else if (Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") { + if (I.getNumOperands() == 2 && // Basic sanity checks. + I.getOperand(1)->getType()->isFloatingPoint() && + I.getType() == I.getOperand(1)->getType() && + I.onlyReadsMemory()) { + SDValue Tmp = getValue(I.getOperand(1)); + setValue(&I, DAG.getNode(ISD::FSQRT, getCurDebugLoc(), + Tmp.getValueType(), Tmp)); + return; + } } } } else if (isa<InlineAsm>(I.getOperand(0))) { @@ -4523,7 +4560,12 @@ void SelectionDAGLowering::visitCall(CallInst &I) { else Callee = DAG.getExternalSymbol(RenameFn, TLI.getPointerTy()); - LowerCallTo(&I, Callee, I.isTailCall()); + // Check if we can potentially perform a tail call. More detailed + // checking is be done within LowerCallTo, after more information + // about the call is known. + bool isTailCall = PerformTailCallOpt && I.isTailCall(); + + LowerCallTo(&I, Callee, isTailCall); } @@ -4539,9 +4581,9 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl, SmallVector<SDValue, 8> Parts; for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { // Copy the legal parts from the registers. - MVT ValueVT = ValueVTs[Value]; - unsigned NumRegs = TLI->getNumRegisters(ValueVT); - MVT RegisterVT = RegVTs[Value]; + EVT ValueVT = ValueVTs[Value]; + unsigned NumRegs = TLI->getNumRegisters(*DAG.getContext(), ValueVT); + EVT RegisterVT = RegVTs[Value]; Parts.resize(NumRegs); for (unsigned i = 0; i != NumRegs; ++i) { @@ -4570,7 +4612,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl, // FIXME: We capture more information than the dag can represent. For // now, just use the tightest assertzext/assertsext possible. bool isSExt = true; - MVT FromVT(MVT::Other); + EVT FromVT(MVT::Other); if (NumSignBits == RegSize) isSExt = true, FromVT = MVT::i1; // ASSERT SEXT 1 else if (NumZeroBits >= RegSize-1) @@ -4620,9 +4662,9 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, unsigned NumRegs = Regs.size(); SmallVector<SDValue, 8> Parts(NumRegs); for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { - MVT ValueVT = ValueVTs[Value]; - unsigned NumParts = TLI->getNumRegisters(ValueVT); - MVT RegisterVT = RegVTs[Value]; + EVT ValueVT = ValueVTs[Value]; + unsigned NumParts = TLI->getNumRegisters(*DAG.getContext(), ValueVT); + EVT RegisterVT = RegVTs[Value]; getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value), &Parts[Part], NumParts, RegisterVT); @@ -4665,15 +4707,15 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,unsigned MatchingIdx, SelectionDAG &DAG, std::vector<SDValue> &Ops) const { - MVT IntPtrTy = DAG.getTargetLoweringInfo().getPointerTy(); + EVT IntPtrTy = DAG.getTargetLoweringInfo().getPointerTy(); assert(Regs.size() < (1 << 13) && "Too many inline asm outputs!"); unsigned Flag = Code | (Regs.size() << 3); if (HasMatching) Flag |= 0x80000000 | (MatchingIdx << 16); Ops.push_back(DAG.getTargetConstant(Flag, IntPtrTy)); for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) { - unsigned NumRegs = TLI->getNumRegisters(ValueVTs[Value]); - MVT RegisterVT = RegVTs[Value]; + unsigned NumRegs = TLI->getNumRegisters(*DAG.getContext(), ValueVTs[Value]); + EVT RegisterVT = RegVTs[Value]; for (unsigned i = 0; i != NumRegs; ++i) { assert(Reg < Regs.size() && "Mismatch in # registers expected"); Ops.push_back(DAG.getRegister(Regs[Reg++], RegisterVT)); @@ -4688,11 +4730,11 @@ static const TargetRegisterClass * isAllocatableRegister(unsigned Reg, MachineFunction &MF, const TargetLowering &TLI, const TargetRegisterInfo *TRI) { - MVT FoundVT = MVT::Other; + EVT FoundVT = MVT::Other; const TargetRegisterClass *FoundRC = 0; for (TargetRegisterInfo::regclass_iterator RCI = TRI->regclass_begin(), E = TRI->regclass_end(); RCI != E; ++RCI) { - MVT ThisVT = MVT::Other; + EVT ThisVT = MVT::Other; const TargetRegisterClass *RC = *RCI; // If none of the the value types for this register class are valid, we @@ -4765,10 +4807,11 @@ public: } } - /// getCallOperandValMVT - Return the MVT of the Value* that this operand + /// getCallOperandValEVT - Return the EVT of the Value* that this operand /// corresponds to. If there is no Value* for this operand, it returns /// MVT::Other. - MVT getCallOperandValMVT(const TargetLowering &TLI, + EVT getCallOperandValEVT(LLVMContext &Context, + const TargetLowering &TLI, const TargetData *TD) const { if (CallOperandVal == 0) return MVT::Other; @@ -4794,7 +4837,7 @@ public: case 32: case 64: case 128: - OpTy = IntegerType::get(BitSize); + OpTy = IntegerType::get(Context, BitSize); break; } } @@ -4830,6 +4873,8 @@ void SelectionDAGLowering:: GetRegistersForValue(SDISelAsmOperandInfo &OpInfo, std::set<unsigned> &OutputRegs, std::set<unsigned> &InputRegs) { + LLVMContext &Context = FuncInfo.Fn->getContext(); + // Compute whether this value requires an input register, an output register, // or both. bool isOutReg = false; @@ -4869,10 +4914,10 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo, // value disagrees with the register class we plan to stick this in. if (OpInfo.Type == InlineAsm::isInput && PhysReg.second && !PhysReg.second->hasType(OpInfo.ConstraintVT)) { - // Try to convert to the first MVT that the reg class contains. If the + // Try to convert to the first EVT that the reg class contains. If the // types are identical size, use a bitcast to convert (e.g. two differing // vector types). - MVT RegVT = *PhysReg.second->vt_begin(); + EVT RegVT = *PhysReg.second->vt_begin(); if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) { OpInfo.CallOperand = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), RegVT, OpInfo.CallOperand); @@ -4882,18 +4927,19 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo, // bitcast to the corresponding integer type. This turns an f64 value // into i64, which can be passed with two i32 values on a 32-bit // machine. - RegVT = MVT::getIntegerVT(OpInfo.ConstraintVT.getSizeInBits()); + RegVT = EVT::getIntegerVT(Context, + OpInfo.ConstraintVT.getSizeInBits()); OpInfo.CallOperand = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), RegVT, OpInfo.CallOperand); OpInfo.ConstraintVT = RegVT; } } - NumRegs = TLI.getNumRegisters(OpInfo.ConstraintVT); + NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT); } - MVT RegVT; - MVT ValueVT = OpInfo.ConstraintVT; + EVT RegVT; + EVT ValueVT = OpInfo.ConstraintVT; // If this is a constraint for a specific physical register, like {r17}, // assign it now. @@ -5047,7 +5093,7 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) { ConstraintOperands.push_back(SDISelAsmOperandInfo(ConstraintInfos[i])); SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back(); - MVT OpVT = MVT::Other; + EVT OpVT = MVT::Other; // Compute the value type for each operand. switch (OpInfo.Type) { @@ -5060,7 +5106,8 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) { // The return value of the call is this value. As such, there is no // corresponding argument. - assert(CS.getType() != Type::VoidTy && "Bad inline asm!"); + assert(CS.getType() != Type::getVoidTy(*DAG.getContext()) && + "Bad inline asm!"); if (const StructType *STy = dyn_cast<StructType>(CS.getType())) { OpVT = TLI.getValueType(STy->getElementType(ResNo)); } else { @@ -5080,13 +5127,16 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) { // If this is an input or an indirect output, process the call argument. // BasicBlocks are labels, currently appearing only in asm's. if (OpInfo.CallOperandVal) { + // Strip bitcasts, if any. This mostly comes up for functions. + OpInfo.CallOperandVal = OpInfo.CallOperandVal->stripPointerCasts(); + if (BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) { OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]); } else { OpInfo.CallOperand = getValue(OpInfo.CallOperandVal); } - OpVT = OpInfo.getCallOperandValMVT(TLI, TD); + OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI, TD); } OpInfo.ConstraintVT = OpVT; @@ -5108,9 +5158,9 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) { Input.ConstraintVT.isInteger()) || (OpInfo.ConstraintVT.getSizeInBits() != Input.ConstraintVT.getSizeInBits())) { - cerr << "llvm: error: Unsupported asm: input constraint with a " - << "matching output constraint of incompatible type!\n"; - exit(1); + llvm_report_error("Unsupported asm: input constraint" + " with a matching output constraint of incompatible" + " type!"); } Input.ConstraintVT = OpInfo.ConstraintVT; } @@ -5213,9 +5263,8 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) { // Copy the output from the appropriate register. Find a register that // we can use. if (OpInfo.AssignedRegs.Regs.empty()) { - cerr << "llvm: error: Couldn't allocate output reg for constraint '" - << OpInfo.ConstraintCode << "'!\n"; - exit(1); + llvm_report_error("Couldn't allocate output reg for" + " constraint '" + OpInfo.ConstraintCode + "'!"); } // If this is an indirect operand, store through the pointer after the @@ -5225,7 +5274,8 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) { OpInfo.CallOperandVal)); } else { // This is the result value of the call. - assert(CS.getType() != Type::VoidTy && "Bad inline asm!"); + assert(CS.getType() != Type::getVoidTy(*DAG.getContext()) && + "Bad inline asm!"); // Concatenate this output onto the outputs list. RetValRegs.append(OpInfo.AssignedRegs); } @@ -5268,15 +5318,13 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) { || (OpFlag & 7) == 6 /* EARLYCLOBBER REGDEF */) { // Add (OpFlag&0xffff)>>3 registers to MatchedRegs. if (OpInfo.isIndirect) { - cerr << "llvm: error: " - "Don't know how to handle tied indirect " - "register inputs yet!\n"; - exit(1); + llvm_report_error("Don't know how to handle tied indirect " + "register inputs yet!"); } RegsForValue MatchedRegs; MatchedRegs.TLI = &TLI; MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType()); - MVT RegVT = AsmNodeOperands[CurOp+1].getValueType(); + EVT RegVT = AsmNodeOperands[CurOp+1].getValueType(); MatchedRegs.RegVTs.push_back(RegVT); MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo(); for (unsigned i = 0, e = InlineAsm::getNumOperandRegisters(OpFlag); @@ -5313,9 +5361,8 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) { TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode[0], hasMemory, Ops, DAG); if (Ops.empty()) { - cerr << "llvm: error: Invalid operand for inline asm constraint '" - << OpInfo.ConstraintCode << "'!\n"; - exit(1); + llvm_report_error("Invalid operand for inline asm" + " constraint '" + OpInfo.ConstraintCode + "'!"); } // Add information to the INLINEASM node to know about this input. @@ -5345,9 +5392,8 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) { // Copy the input into the appropriate registers. if (OpInfo.AssignedRegs.Regs.empty()) { - cerr << "llvm: error: Couldn't allocate output reg for constraint '" - << OpInfo.ConstraintCode << "'!\n"; - exit(1); + llvm_report_error("Couldn't allocate input reg for" + " constraint '"+ OpInfo.ConstraintCode +"'!"); } OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(), @@ -5385,7 +5431,7 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) { // FIXME: Why don't we do this for inline asms with MRVs? if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) { - MVT ResultType = TLI.getValueType(CS.getType()); + EVT ResultType = TLI.getValueType(CS.getType()); // If any of the results of the inline asm is a vector, it may have the // wrong width/num elts. This can happen for register classes that can @@ -5449,45 +5495,56 @@ void SelectionDAGLowering::visitMalloc(MallocInst &I) { // multiply on 64-bit targets. // FIXME: Malloc inst should go away: PR715. uint64_t ElementSize = TD->getTypeAllocSize(I.getType()->getElementType()); - if (ElementSize != 1) + if (ElementSize != 1) { + // Src is always 32-bits, make sure the constant fits. + assert(Src.getValueType() == MVT::i32); + ElementSize = (uint32_t)ElementSize; Src = DAG.getNode(ISD::MUL, getCurDebugLoc(), Src.getValueType(), Src, DAG.getConstant(ElementSize, Src.getValueType())); + } - MVT IntPtr = TLI.getPointerTy(); + EVT IntPtr = TLI.getPointerTy(); - if (IntPtr.bitsLT(Src.getValueType())) - Src = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), IntPtr, Src); - else if (IntPtr.bitsGT(Src.getValueType())) - Src = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), IntPtr, Src); + Src = DAG.getZExtOrTrunc(Src, getCurDebugLoc(), IntPtr); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Node = Src; - Entry.Ty = TLI.getTargetData()->getIntPtrType(); + Entry.Ty = TLI.getTargetData()->getIntPtrType(*DAG.getContext()); Args.push_back(Entry); + bool isTailCall = PerformTailCallOpt && + isInTailCallPosition(&I, Attribute::None, TLI); std::pair<SDValue,SDValue> Result = TLI.LowerCallTo(getRoot(), I.getType(), false, false, false, false, - 0, CallingConv::C, PerformTailCallOpt, + 0, CallingConv::C, isTailCall, + /*isReturnValueUsed=*/true, DAG.getExternalSymbol("malloc", IntPtr), Args, DAG, getCurDebugLoc()); - setValue(&I, Result.first); // Pointers always fit in registers - DAG.setRoot(Result.second); + if (Result.first.getNode()) + setValue(&I, Result.first); // Pointers always fit in registers + if (Result.second.getNode()) + DAG.setRoot(Result.second); } void SelectionDAGLowering::visitFree(FreeInst &I) { TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Node = getValue(I.getOperand(0)); - Entry.Ty = TLI.getTargetData()->getIntPtrType(); + Entry.Ty = TLI.getTargetData()->getIntPtrType(*DAG.getContext()); Args.push_back(Entry); - MVT IntPtr = TLI.getPointerTy(); + EVT IntPtr = TLI.getPointerTy(); + bool isTailCall = PerformTailCallOpt && + isInTailCallPosition(&I, Attribute::None, TLI); std::pair<SDValue,SDValue> Result = - TLI.LowerCallTo(getRoot(), Type::VoidTy, false, false, false, false, - 0, CallingConv::C, PerformTailCallOpt, + TLI.LowerCallTo(getRoot(), Type::getVoidTy(*DAG.getContext()), + false, false, false, false, + 0, CallingConv::C, isTailCall, + /*isReturnValueUsed=*/true, DAG.getExternalSymbol("free", IntPtr), Args, DAG, getCurDebugLoc()); - DAG.setRoot(Result.second); + if (Result.second.getNode()) + DAG.setRoot(Result.second); } void SelectionDAGLowering::visitVAStart(CallInst &I) { @@ -5521,161 +5578,31 @@ void SelectionDAGLowering::visitVACopy(CallInst &I) { DAG.getSrcValue(I.getOperand(2)))); } -/// TargetLowering::LowerArguments - This is the default LowerArguments -/// implementation, which just inserts a FORMAL_ARGUMENTS node. FIXME: When all -/// targets are migrated to using FORMAL_ARGUMENTS, this hook should be -/// integrated into SDISel. -void TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &ArgValues, - DebugLoc dl) { - // Add CC# and isVararg as operands to the FORMAL_ARGUMENTS node. - SmallVector<SDValue, 3+16> Ops; - Ops.push_back(DAG.getRoot()); - Ops.push_back(DAG.getConstant(F.getCallingConv(), getPointerTy())); - Ops.push_back(DAG.getConstant(F.isVarArg(), getPointerTy())); - - // Add one result value for each formal argument. - SmallVector<MVT, 16> RetVals; - unsigned j = 1; - for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); - I != E; ++I, ++j) { - SmallVector<MVT, 4> ValueVTs; - ComputeValueVTs(*this, I->getType(), ValueVTs); - for (unsigned Value = 0, NumValues = ValueVTs.size(); - Value != NumValues; ++Value) { - MVT VT = ValueVTs[Value]; - const Type *ArgTy = VT.getTypeForMVT(); - ISD::ArgFlagsTy Flags; - unsigned OriginalAlignment = - getTargetData()->getABITypeAlignment(ArgTy); - - if (F.paramHasAttr(j, Attribute::ZExt)) - Flags.setZExt(); - if (F.paramHasAttr(j, Attribute::SExt)) - Flags.setSExt(); - if (F.paramHasAttr(j, Attribute::InReg)) - Flags.setInReg(); - if (F.paramHasAttr(j, Attribute::StructRet)) - Flags.setSRet(); - if (F.paramHasAttr(j, Attribute::ByVal)) { - Flags.setByVal(); - const PointerType *Ty = cast<PointerType>(I->getType()); - const Type *ElementTy = Ty->getElementType(); - unsigned FrameAlign = getByValTypeAlignment(ElementTy); - unsigned FrameSize = getTargetData()->getTypeAllocSize(ElementTy); - // For ByVal, alignment should be passed from FE. BE will guess if - // this info is not there but there are cases it cannot get right. - if (F.getParamAlignment(j)) - FrameAlign = F.getParamAlignment(j); - Flags.setByValAlign(FrameAlign); - Flags.setByValSize(FrameSize); - } - if (F.paramHasAttr(j, Attribute::Nest)) - Flags.setNest(); - Flags.setOrigAlign(OriginalAlignment); - - MVT RegisterVT = getRegisterType(VT); - unsigned NumRegs = getNumRegisters(VT); - for (unsigned i = 0; i != NumRegs; ++i) { - RetVals.push_back(RegisterVT); - ISD::ArgFlagsTy MyFlags = Flags; - if (NumRegs > 1 && i == 0) - MyFlags.setSplit(); - // if it isn't first piece, alignment must be 1 - else if (i > 0) - MyFlags.setOrigAlign(1); - Ops.push_back(DAG.getArgFlags(MyFlags)); - } - } - } - - RetVals.push_back(MVT::Other); - - // Create the node. - SDNode *Result = DAG.getNode(ISD::FORMAL_ARGUMENTS, dl, - DAG.getVTList(&RetVals[0], RetVals.size()), - &Ops[0], Ops.size()).getNode(); - - // Prelower FORMAL_ARGUMENTS. This isn't required for functionality, but - // allows exposing the loads that may be part of the argument access to the - // first DAGCombiner pass. - SDValue TmpRes = LowerOperation(SDValue(Result, 0), DAG); - - // The number of results should match up, except that the lowered one may have - // an extra flag result. - assert((Result->getNumValues() == TmpRes.getNode()->getNumValues() || - (Result->getNumValues()+1 == TmpRes.getNode()->getNumValues() && - TmpRes.getValue(Result->getNumValues()).getValueType() == MVT::Flag)) - && "Lowering produced unexpected number of results!"); - - // The FORMAL_ARGUMENTS node itself is likely no longer needed. - if (Result != TmpRes.getNode() && Result->use_empty()) { - HandleSDNode Dummy(DAG.getRoot()); - DAG.RemoveDeadNode(Result); - } - - Result = TmpRes.getNode(); - - unsigned NumArgRegs = Result->getNumValues() - 1; - DAG.setRoot(SDValue(Result, NumArgRegs)); - - // Set up the return result vector. - unsigned i = 0; - unsigned Idx = 1; - for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; - ++I, ++Idx) { - SmallVector<MVT, 4> ValueVTs; - ComputeValueVTs(*this, I->getType(), ValueVTs); - for (unsigned Value = 0, NumValues = ValueVTs.size(); - Value != NumValues; ++Value) { - MVT VT = ValueVTs[Value]; - MVT PartVT = getRegisterType(VT); - - unsigned NumParts = getNumRegisters(VT); - SmallVector<SDValue, 4> Parts(NumParts); - for (unsigned j = 0; j != NumParts; ++j) - Parts[j] = SDValue(Result, i++); - - ISD::NodeType AssertOp = ISD::DELETED_NODE; - if (F.paramHasAttr(Idx, Attribute::SExt)) - AssertOp = ISD::AssertSext; - else if (F.paramHasAttr(Idx, Attribute::ZExt)) - AssertOp = ISD::AssertZext; - - ArgValues.push_back(getCopyFromParts(DAG, dl, &Parts[0], NumParts, - PartVT, VT, AssertOp)); - } - } - assert(i == NumArgRegs && "Argument register count mismatch!"); -} - - /// TargetLowering::LowerCallTo - This is the default LowerCallTo -/// implementation, which just inserts an ISD::CALL node, which is later custom -/// lowered by the target to something concrete. FIXME: When all targets are -/// migrated to using ISD::CALL, this hook should be integrated into SDISel. +/// implementation, which just calls LowerCall. +/// FIXME: When all targets are +/// migrated to using LowerCall, this hook should be integrated into SDISel. std::pair<SDValue, SDValue> TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy, bool RetSExt, bool RetZExt, bool isVarArg, bool isInreg, unsigned NumFixedArgs, - unsigned CallingConv, bool isTailCall, + CallingConv::ID CallConv, bool isTailCall, + bool isReturnValueUsed, SDValue Callee, ArgListTy &Args, SelectionDAG &DAG, DebugLoc dl) { + assert((!isTailCall || PerformTailCallOpt) && "isTailCall set when tail-call optimizations are disabled!"); - SmallVector<SDValue, 32> Ops; - Ops.push_back(Chain); // Op#0 - Chain - Ops.push_back(Callee); - // Handle all of the outgoing arguments. + SmallVector<ISD::OutputArg, 32> Outs; for (unsigned i = 0, e = Args.size(); i != e; ++i) { - SmallVector<MVT, 4> ValueVTs; + SmallVector<EVT, 4> ValueVTs; ComputeValueVTs(*this, Args[i].Ty, ValueVTs); for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues; ++Value) { - MVT VT = ValueVTs[Value]; - const Type *ArgTy = VT.getTypeForMVT(); + EVT VT = ValueVTs[Value]; + const Type *ArgTy = VT.getTypeForEVT(RetTy->getContext()); SDValue Op = SDValue(Args[i].Node.getNode(), Args[i].Node.getResNo() + Value); ISD::ArgFlagsTy Flags; @@ -5707,8 +5634,8 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy, Flags.setNest(); Flags.setOrigAlign(OriginalAlignment); - MVT PartVT = getRegisterType(VT); - unsigned NumParts = getNumRegisters(VT); + EVT PartVT = getRegisterType(RetTy->getContext(), VT); + unsigned NumParts = getNumRegisters(RetTy->getContext(), VT); SmallVector<SDValue, 4> Parts(NumParts); ISD::NodeType ExtendKind = ISD::ANY_EXTEND; @@ -5719,75 +5646,105 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy, getCopyToParts(DAG, dl, Op, &Parts[0], NumParts, PartVT, ExtendKind); - for (unsigned i = 0; i != NumParts; ++i) { + for (unsigned j = 0; j != NumParts; ++j) { // if it isn't first piece, alignment must be 1 - ISD::ArgFlagsTy MyFlags = Flags; - if (NumParts > 1 && i == 0) - MyFlags.setSplit(); - else if (i != 0) - MyFlags.setOrigAlign(1); - - Ops.push_back(Parts[i]); - Ops.push_back(DAG.getArgFlags(MyFlags)); + ISD::OutputArg MyFlags(Flags, Parts[j], i < NumFixedArgs); + if (NumParts > 1 && j == 0) + MyFlags.Flags.setSplit(); + else if (j != 0) + MyFlags.Flags.setOrigAlign(1); + + Outs.push_back(MyFlags); } } } - // Figure out the result value types. We start by making a list of - // the potentially illegal return value types. - SmallVector<MVT, 4> LoweredRetTys; - SmallVector<MVT, 4> RetTys; + // Handle the incoming return values from the call. + SmallVector<ISD::InputArg, 32> Ins; + SmallVector<EVT, 4> RetTys; ComputeValueVTs(*this, RetTy, RetTys); - - // Then we translate that to a list of legal types. for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { - MVT VT = RetTys[I]; - MVT RegisterVT = getRegisterType(VT); - unsigned NumRegs = getNumRegisters(VT); - for (unsigned i = 0; i != NumRegs; ++i) - LoweredRetTys.push_back(RegisterVT); - } - - LoweredRetTys.push_back(MVT::Other); // Always has a chain. - - // Create the CALL node. - SDValue Res = DAG.getCall(CallingConv, dl, - isVarArg, isTailCall, isInreg, - DAG.getVTList(&LoweredRetTys[0], - LoweredRetTys.size()), - &Ops[0], Ops.size(), NumFixedArgs - ); - Chain = Res.getValue(LoweredRetTys.size() - 1); - - // Gather up the call result into a single value. - if (RetTy != Type::VoidTy && !RetTys.empty()) { - ISD::NodeType AssertOp = ISD::DELETED_NODE; - - if (RetSExt) - AssertOp = ISD::AssertSext; - else if (RetZExt) - AssertOp = ISD::AssertZext; - - SmallVector<SDValue, 4> ReturnValues; - unsigned RegNo = 0; - for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { - MVT VT = RetTys[I]; - MVT RegisterVT = getRegisterType(VT); - unsigned NumRegs = getNumRegisters(VT); - unsigned RegNoEnd = NumRegs + RegNo; - SmallVector<SDValue, 4> Results; - for (; RegNo != RegNoEnd; ++RegNo) - Results.push_back(Res.getValue(RegNo)); - SDValue ReturnValue = - getCopyFromParts(DAG, dl, &Results[0], NumRegs, RegisterVT, VT, - AssertOp); - ReturnValues.push_back(ReturnValue); + EVT VT = RetTys[I]; + EVT RegisterVT = getRegisterType(RetTy->getContext(), VT); + unsigned NumRegs = getNumRegisters(RetTy->getContext(), VT); + for (unsigned i = 0; i != NumRegs; ++i) { + ISD::InputArg MyFlags; + MyFlags.VT = RegisterVT; + MyFlags.Used = isReturnValueUsed; + if (RetSExt) + MyFlags.Flags.setSExt(); + if (RetZExt) + MyFlags.Flags.setZExt(); + if (isInreg) + MyFlags.Flags.setInReg(); + Ins.push_back(MyFlags); } - Res = DAG.getNode(ISD::MERGE_VALUES, dl, - DAG.getVTList(&RetTys[0], RetTys.size()), - &ReturnValues[0], ReturnValues.size()); } + // Check if target-dependent constraints permit a tail call here. + // Target-independent constraints should be checked by the caller. + if (isTailCall && + !IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, Ins, DAG)) + isTailCall = false; + + SmallVector<SDValue, 4> InVals; + Chain = LowerCall(Chain, Callee, CallConv, isVarArg, isTailCall, + Outs, Ins, dl, DAG, InVals); + + // Verify that the target's LowerCall behaved as expected. + assert(Chain.getNode() && Chain.getValueType() == MVT::Other && + "LowerCall didn't return a valid chain!"); + assert((!isTailCall || InVals.empty()) && + "LowerCall emitted a return value for a tail call!"); + assert((isTailCall || InVals.size() == Ins.size()) && + "LowerCall didn't emit the correct number of values!"); + DEBUG(for (unsigned i = 0, e = Ins.size(); i != e; ++i) { + assert(InVals[i].getNode() && + "LowerCall emitted a null value!"); + assert(Ins[i].VT == InVals[i].getValueType() && + "LowerCall emitted a value with the wrong type!"); + }); + + // For a tail call, the return value is merely live-out and there aren't + // any nodes in the DAG representing it. Return a special value to + // indicate that a tail call has been emitted and no more Instructions + // should be processed in the current block. + if (isTailCall) { + DAG.setRoot(Chain); + return std::make_pair(SDValue(), SDValue()); + } + + // Collect the legal value parts into potentially illegal values + // that correspond to the original function's return values. + ISD::NodeType AssertOp = ISD::DELETED_NODE; + if (RetSExt) + AssertOp = ISD::AssertSext; + else if (RetZExt) + AssertOp = ISD::AssertZext; + SmallVector<SDValue, 4> ReturnValues; + unsigned CurReg = 0; + for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { + EVT VT = RetTys[I]; + EVT RegisterVT = getRegisterType(RetTy->getContext(), VT); + unsigned NumRegs = getNumRegisters(RetTy->getContext(), VT); + + SDValue ReturnValue = + getCopyFromParts(DAG, dl, &InVals[CurReg], NumRegs, RegisterVT, VT, + AssertOp); + ReturnValues.push_back(ReturnValue); + CurReg += NumRegs; + } + + // For a function returning void, there is no return value. We can't create + // such a node, so we just return a null return value in that case. In + // that case, nothing will actualy look at the value. + if (ReturnValues.empty()) + return std::make_pair(SDValue(), Chain); + + SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl, + DAG.getVTList(&RetTys[0], RetTys.size()), + &ReturnValues[0], ReturnValues.size()); + return std::make_pair(Res, Chain); } @@ -5800,8 +5757,7 @@ void TargetLowering::LowerOperationWrapper(SDNode *N, } SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { - assert(0 && "LowerOperation not implemented for this target!"); - abort(); + llvm_unreachable("LowerOperation not implemented for this target!"); return SDValue(); } @@ -5813,7 +5769,7 @@ void SelectionDAGLowering::CopyValueToVirtualRegister(Value *V, unsigned Reg) { "Copy from a reg to the same reg!"); assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg"); - RegsForValue RFV(TLI, Reg, V->getType()); + RegsForValue RFV(V->getContext(), TLI, Reg, V->getType()); SDValue Chain = DAG.getEntryNode(); RFV.getCopyToRegs(Op, DAG, getCurDebugLoc(), Chain, 0); PendingExports.push_back(Chain); @@ -5825,25 +5781,122 @@ void SelectionDAGISel:: LowerArguments(BasicBlock *LLVMBB) { // If this is the entry block, emit arguments. Function &F = *LLVMBB->getParent(); - SDValue OldRoot = SDL->DAG.getRoot(); - SmallVector<SDValue, 16> Args; - TLI.LowerArguments(F, SDL->DAG, Args, SDL->getCurDebugLoc()); - - unsigned a = 0; - for (Function::arg_iterator AI = F.arg_begin(), E = F.arg_end(); - AI != E; ++AI) { - SmallVector<MVT, 4> ValueVTs; - ComputeValueVTs(TLI, AI->getType(), ValueVTs); + SelectionDAG &DAG = SDL->DAG; + SDValue OldRoot = DAG.getRoot(); + DebugLoc dl = SDL->getCurDebugLoc(); + const TargetData *TD = TLI.getTargetData(); + + // Set up the incoming argument description vector. + SmallVector<ISD::InputArg, 16> Ins; + unsigned Idx = 1; + for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); + I != E; ++I, ++Idx) { + SmallVector<EVT, 4> ValueVTs; + ComputeValueVTs(TLI, I->getType(), ValueVTs); + bool isArgValueUsed = !I->use_empty(); + for (unsigned Value = 0, NumValues = ValueVTs.size(); + Value != NumValues; ++Value) { + EVT VT = ValueVTs[Value]; + const Type *ArgTy = VT.getTypeForEVT(*DAG.getContext()); + ISD::ArgFlagsTy Flags; + unsigned OriginalAlignment = + TD->getABITypeAlignment(ArgTy); + + if (F.paramHasAttr(Idx, Attribute::ZExt)) + Flags.setZExt(); + if (F.paramHasAttr(Idx, Attribute::SExt)) + Flags.setSExt(); + if (F.paramHasAttr(Idx, Attribute::InReg)) + Flags.setInReg(); + if (F.paramHasAttr(Idx, Attribute::StructRet)) + Flags.setSRet(); + if (F.paramHasAttr(Idx, Attribute::ByVal)) { + Flags.setByVal(); + const PointerType *Ty = cast<PointerType>(I->getType()); + const Type *ElementTy = Ty->getElementType(); + unsigned FrameAlign = TLI.getByValTypeAlignment(ElementTy); + unsigned FrameSize = TD->getTypeAllocSize(ElementTy); + // For ByVal, alignment should be passed from FE. BE will guess if + // this info is not there but there are cases it cannot get right. + if (F.getParamAlignment(Idx)) + FrameAlign = F.getParamAlignment(Idx); + Flags.setByValAlign(FrameAlign); + Flags.setByValSize(FrameSize); + } + if (F.paramHasAttr(Idx, Attribute::Nest)) + Flags.setNest(); + Flags.setOrigAlign(OriginalAlignment); + + EVT RegisterVT = TLI.getRegisterType(*CurDAG->getContext(), VT); + unsigned NumRegs = TLI.getNumRegisters(*CurDAG->getContext(), VT); + for (unsigned i = 0; i != NumRegs; ++i) { + ISD::InputArg MyFlags(Flags, RegisterVT, isArgValueUsed); + if (NumRegs > 1 && i == 0) + MyFlags.Flags.setSplit(); + // if it isn't first piece, alignment must be 1 + else if (i > 0) + MyFlags.Flags.setOrigAlign(1); + Ins.push_back(MyFlags); + } + } + } + + // Call the target to set up the argument values. + SmallVector<SDValue, 8> InVals; + SDValue NewRoot = TLI.LowerFormalArguments(DAG.getRoot(), F.getCallingConv(), + F.isVarArg(), Ins, + dl, DAG, InVals); + + // Verify that the target's LowerFormalArguments behaved as expected. + assert(NewRoot.getNode() && NewRoot.getValueType() == MVT::Other && + "LowerFormalArguments didn't return a valid chain!"); + assert(InVals.size() == Ins.size() && + "LowerFormalArguments didn't emit the correct number of values!"); + DEBUG(for (unsigned i = 0, e = Ins.size(); i != e; ++i) { + assert(InVals[i].getNode() && + "LowerFormalArguments emitted a null value!"); + assert(Ins[i].VT == InVals[i].getValueType() && + "LowerFormalArguments emitted a value with the wrong type!"); + }); + + // Update the DAG with the new chain value resulting from argument lowering. + DAG.setRoot(NewRoot); + + // Set up the argument values. + unsigned i = 0; + Idx = 1; + for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; + ++I, ++Idx) { + SmallVector<SDValue, 4> ArgValues; + SmallVector<EVT, 4> ValueVTs; + ComputeValueVTs(TLI, I->getType(), ValueVTs); unsigned NumValues = ValueVTs.size(); - if (!AI->use_empty()) { - SDL->setValue(AI, SDL->DAG.getMergeValues(&Args[a], NumValues, - SDL->getCurDebugLoc())); + for (unsigned Value = 0; Value != NumValues; ++Value) { + EVT VT = ValueVTs[Value]; + EVT PartVT = TLI.getRegisterType(*CurDAG->getContext(), VT); + unsigned NumParts = TLI.getNumRegisters(*CurDAG->getContext(), VT); + + if (!I->use_empty()) { + ISD::NodeType AssertOp = ISD::DELETED_NODE; + if (F.paramHasAttr(Idx, Attribute::SExt)) + AssertOp = ISD::AssertSext; + else if (F.paramHasAttr(Idx, Attribute::ZExt)) + AssertOp = ISD::AssertZext; + + ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts, + PartVT, VT, AssertOp)); + } + i += NumParts; + } + if (!I->use_empty()) { + SDL->setValue(I, DAG.getMergeValues(&ArgValues[0], NumValues, + SDL->getCurDebugLoc())); // If this argument is live outside of the entry block, insert a copy from // whereever we got it to the vreg that other BB's will reference it as. - SDL->CopyToExportRegsIfNeeded(AI); + SDL->CopyToExportRegsIfNeeded(I); } - a += NumValues; } + assert(i == InVals.size() && "Argument register count mismatch!"); // Finally, if the target has anything special to do, allow it to do so. // FIXME: this should insert code into the DAG! @@ -5908,11 +5961,11 @@ SelectionDAGISel::HandlePHINodesInSuccessorBlocks(BasicBlock *LLVMBB) { // Remember that this register needs to added to the machine PHI node as // the input for this MBB. - SmallVector<MVT, 4> ValueVTs; + SmallVector<EVT, 4> ValueVTs; ComputeValueVTs(TLI, PN->getType(), ValueVTs); for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) { - MVT VT = ValueVTs[vti]; - unsigned NumRegisters = TLI.getNumRegisters(VT); + EVT VT = ValueVTs[vti]; + unsigned NumRegisters = TLI.getNumRegisters(*CurDAG->getContext(), VT); for (unsigned i = 0, e = NumRegisters; i != e; ++i) SDL->PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i)); Reg += NumRegisters; @@ -5962,11 +6015,11 @@ SelectionDAGISel::HandlePHINodesInSuccessorBlocksFast(BasicBlock *LLVMBB, // own moves. Second, this check is necessary becuase FastISel doesn't // use CreateRegForValue to create registers, so it always creates // exactly one register for each non-void instruction. - MVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true); + EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true); if (VT == MVT::Other || !TLI.isTypeLegal(VT)) { // Promote MVT::i1. if (VT == MVT::i1) - VT = TLI.getTypeToTransformTo(VT); + VT = TLI.getTypeToTransformTo(*CurDAG->getContext(), VT); else { SDL->PHINodesToUpdate.resize(OrigNumPHINodesToUpdate); return false; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h index 057c841..06acc8a 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h @@ -15,6 +15,7 @@ #define SELECTIONDAGBUILD_H #include "llvm/Constants.h" +#include "llvm/CodeGen/SelectionDAG.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" #ifndef NDEBUG @@ -23,6 +24,7 @@ #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/Support/CallSite.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetMachine.h" #include <vector> #include <set> @@ -75,8 +77,6 @@ class TruncInst; class UIToFPInst; class UnreachableInst; class UnwindInst; -class VICmpInst; -class VFCmpInst; class VAArgInst; class ZExtInst; @@ -117,7 +117,7 @@ public: SmallSet<Instruction*, 8> CatchInfoFound; #endif - unsigned MakeReg(MVT VT); + unsigned MakeReg(EVT VT); /// isExportedInst - Return true if the specified value is an instruction /// exported from its block. @@ -345,9 +345,15 @@ public: /// BitTestCases - Vector of BitTestBlock structures used to communicate /// SwitchInst code generation information. std::vector<BitTestBlock> BitTestCases; - + + /// PHINodesToUpdate - A list of phi instructions whose operand list will + /// be updated after processing the current basic block. std::vector<std::pair<MachineInstr*, unsigned> > PHINodesToUpdate; + /// EdgeMapping - If an edge from CurMBB to any MBB is changed (e.g. due to + /// scheduler custom lowering), track the change here. + DenseMap<MachineBasicBlock*, MachineBasicBlock*> EdgeMapping; + // Emit PHI-node-operand constants only once even if used by multiple // PHI nodes. DenseMap<Constant*, unsigned> ConstantsOut; @@ -363,11 +369,21 @@ public: /// GFI - Garbage collection metadata for the function. GCFunctionInfo *GFI; + /// HasTailCall - This is set to true if a call in the current + /// block has been translated as a tail call. In this case, + /// no subsequent DAG nodes should be created. + /// + bool HasTailCall; + + LLVMContext *Context; + SelectionDAGLowering(SelectionDAG &dag, TargetLowering &tli, FunctionLoweringInfo &funcinfo, CodeGenOpt::Level ol) : CurDebugLoc(DebugLoc::getUnknownLoc()), - TLI(tli), DAG(dag), FuncInfo(funcinfo), OptLevel(ol) { + TLI(tli), DAG(dag), FuncInfo(funcinfo), OptLevel(ol), + HasTailCall(false), + Context(dag.getContext()) { } void init(GCFunctionInfo *gfi, AliasAnalysis &aa); @@ -489,8 +505,6 @@ private: void visitAShr(User &I) { visitShift(I, ISD::SRA); } void visitICmp(User &I); void visitFCmp(User &I); - void visitVICmp(User &I); - void visitVFCmp(User &I); // Visit the conversion instructions void visitTrunc(User &I); void visitZExt(User &I); @@ -539,12 +553,10 @@ private: void visitVACopy(CallInst &I); void visitUserOp1(Instruction &I) { - assert(0 && "UserOp1 should not exist at instruction selection time!"); - abort(); + llvm_unreachable("UserOp1 should not exist at instruction selection time!"); } void visitUserOp2(Instruction &I) { - assert(0 && "UserOp2 should not exist at instruction selection time!"); - abort(); + llvm_unreachable("UserOp2 should not exist at instruction selection time!"); } const char *implVisitBinaryAtomic(CallInst& I, ISD::NodeType Op); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 9d72a12..ae98da5 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -16,6 +16,7 @@ #include "SelectionDAGBuild.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/DebugInfo.h" #include "llvm/Constants.h" #include "llvm/CallingConv.h" #include "llvm/DerivedTypes.h" @@ -29,6 +30,7 @@ #include "llvm/CodeGen/GCStrategy.h" #include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" @@ -47,8 +49,10 @@ #include "llvm/Target/TargetOptions.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Timer.h" +#include "llvm/Support/raw_ostream.h" #include <algorithm> using namespace llvm; @@ -150,12 +154,15 @@ namespace llvm { // insert. The specified MachineInstr is created but not inserted into any // basic blocks, and the scheduler passes ownership of it to this method. MachineBasicBlock *TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *MBB) const { - cerr << "If a target marks an instruction with " - << "'usesCustomDAGSchedInserter', it must implement " - << "TargetLowering::EmitInstrWithCustomInserter!\n"; - abort(); - return 0; + MachineBasicBlock *MBB, + DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const { +#ifndef NDEBUG + errs() << "If a target marks an instruction with " + "'usesCustomDAGSchedInserter', it must implement " + "TargetLowering::EmitInstrWithCustomInserter!"; +#endif + llvm_unreachable(0); + return 0; } /// EmitLiveInCopy - Emit a copy for a live in physical register. If the @@ -215,8 +222,11 @@ static void EmitLiveInCopy(MachineBasicBlock *MBB, --Pos; } - TII.copyRegToReg(*MBB, Pos, VirtReg, PhysReg, RC, RC); - CopyRegMap.insert(std::make_pair(prior(Pos), VirtReg)); + bool Emitted = TII.copyRegToReg(*MBB, Pos, VirtReg, PhysReg, RC, RC); + assert(Emitted && "Unable to issue a live-in copy instruction!\n"); + (void) Emitted; + +CopyRegMap.insert(std::make_pair(prior(Pos), VirtReg)); if (Coalesced) { if (&*InsertPos == UseMI) ++InsertPos; MBB->erase(UseMI); @@ -247,8 +257,10 @@ static void EmitLiveInCopies(MachineBasicBlock *EntryMBB, E = MRI.livein_end(); LI != E; ++LI) if (LI->second) { const TargetRegisterClass *RC = MRI.getRegClass(LI->second); - TII.copyRegToReg(*EntryMBB, EntryMBB->begin(), - LI->second, LI->first, RC, RC); + bool Emitted = TII.copyRegToReg(*EntryMBB, EntryMBB->begin(), + LI->second, LI->first, RC, RC); + assert(Emitted && "Unable to issue a live-in copy instruction!\n"); + (void) Emitted; } } } @@ -258,7 +270,7 @@ static void EmitLiveInCopies(MachineBasicBlock *EntryMBB, //===----------------------------------------------------------------------===// SelectionDAGISel::SelectionDAGISel(TargetMachine &tm, CodeGenOpt::Level OL) : - FunctionPass(&ID), TM(tm), TLI(*tm.getTargetLowering()), + MachineFunctionPass(&ID), TM(tm), TLI(*tm.getTargetLowering()), FuncInfo(new FunctionLoweringInfo(TLI)), CurDAG(new SelectionDAG(TLI, *FuncInfo)), SDL(new SelectionDAGLowering(*CurDAG, TLI, *FuncInfo, OL)), @@ -273,44 +285,42 @@ SelectionDAGISel::~SelectionDAGISel() { delete FuncInfo; } -unsigned SelectionDAGISel::MakeReg(MVT VT) { +unsigned SelectionDAGISel::MakeReg(EVT VT) { return RegInfo->createVirtualRegister(TLI.getRegClassFor(VT)); } void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<AliasAnalysis>(); + AU.addPreserved<AliasAnalysis>(); AU.addRequired<GCModuleInfo>(); + AU.addPreserved<GCModuleInfo>(); AU.addRequired<DwarfWriter>(); - AU.setPreservesAll(); + AU.addPreserved<DwarfWriter>(); + MachineFunctionPass::getAnalysisUsage(AU); } -bool SelectionDAGISel::runOnFunction(Function &Fn) { +bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { + Function &Fn = *mf.getFunction(); + // Do some sanity-checking on the command-line options. assert((!EnableFastISelVerbose || EnableFastISel) && "-fast-isel-verbose requires -fast-isel"); assert((!EnableFastISelAbort || EnableFastISel) && "-fast-isel-abort requires -fast-isel"); - // Do not codegen any 'available_externally' functions at all, they have - // definitions outside the translation unit. - if (Fn.hasAvailableExternallyLinkage()) - return false; - - // Get alias analysis for load/store combining. AA = &getAnalysis<AliasAnalysis>(); - TargetMachine &TM = TLI.getTargetMachine(); - MF = &MachineFunction::construct(&Fn, TM); + MF = &mf; const TargetInstrInfo &TII = *TM.getInstrInfo(); const TargetRegisterInfo &TRI = *TM.getRegisterInfo(); - if (MF->getFunction()->hasGC()) - GFI = &getAnalysis<GCModuleInfo>().getFunctionInfo(*MF->getFunction()); + if (Fn.hasGC()) + GFI = &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn); else GFI = 0; RegInfo = &MF->getRegInfo(); - DOUT << "\n\n\n=== " << Fn.getName() << "\n"; + DEBUG(errs() << "\n\n\n=== " << Fn.getName() << "\n"); MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>(); DwarfWriter *DW = getAnalysisIfAvailable<DwarfWriter>(); @@ -358,140 +368,50 @@ static void copyCatchInfo(BasicBlock *SrcBB, BasicBlock *DestBB, } } -/// IsFixedFrameObjectWithPosOffset - Check if object is a fixed frame object and -/// whether object offset >= 0. -static bool -IsFixedFrameObjectWithPosOffset(MachineFrameInfo *MFI, SDValue Op) { - if (!isa<FrameIndexSDNode>(Op)) return false; - - FrameIndexSDNode * FrameIdxNode = dyn_cast<FrameIndexSDNode>(Op); - int FrameIdx = FrameIdxNode->getIndex(); - return MFI->isFixedObjectIndex(FrameIdx) && - MFI->getObjectOffset(FrameIdx) >= 0; -} - -/// IsPossiblyOverwrittenArgumentOfTailCall - Check if the operand could -/// possibly be overwritten when lowering the outgoing arguments in a tail -/// call. Currently the implementation of this call is very conservative and -/// assumes all arguments sourcing from FORMAL_ARGUMENTS or a CopyFromReg with -/// virtual registers would be overwritten by direct lowering. -static bool IsPossiblyOverwrittenArgumentOfTailCall(SDValue Op, - MachineFrameInfo *MFI) { - RegisterSDNode * OpReg = NULL; - if (Op.getOpcode() == ISD::FORMAL_ARGUMENTS || - (Op.getOpcode()== ISD::CopyFromReg && - (OpReg = dyn_cast<RegisterSDNode>(Op.getOperand(1))) && - (OpReg->getReg() >= TargetRegisterInfo::FirstVirtualRegister)) || - (Op.getOpcode() == ISD::LOAD && - IsFixedFrameObjectWithPosOffset(MFI, Op.getOperand(1))) || - (Op.getOpcode() == ISD::MERGE_VALUES && - Op.getOperand(Op.getResNo()).getOpcode() == ISD::LOAD && - IsFixedFrameObjectWithPosOffset(MFI, Op.getOperand(Op.getResNo()). - getOperand(1)))) - return true; - return false; -} - -/// CheckDAGForTailCallsAndFixThem - This Function looks for CALL nodes in the -/// DAG and fixes their tailcall attribute operand. -static void CheckDAGForTailCallsAndFixThem(SelectionDAG &DAG, - const TargetLowering& TLI) { - SDNode * Ret = NULL; - SDValue Terminator = DAG.getRoot(); - - // Find RET node. - if (Terminator.getOpcode() == ISD::RET) { - Ret = Terminator.getNode(); - } - - // Fix tail call attribute of CALL nodes. - for (SelectionDAG::allnodes_iterator BE = DAG.allnodes_begin(), - BI = DAG.allnodes_end(); BI != BE; ) { - --BI; - if (CallSDNode *TheCall = dyn_cast<CallSDNode>(BI)) { - SDValue OpRet(Ret, 0); - SDValue OpCall(BI, 0); - bool isMarkedTailCall = TheCall->isTailCall(); - // If CALL node has tail call attribute set to true and the call is not - // eligible (no RET or the target rejects) the attribute is fixed to - // false. The TargetLowering::IsEligibleForTailCallOptimization function - // must correctly identify tail call optimizable calls. - if (!isMarkedTailCall) continue; - if (Ret==NULL || - !TLI.IsEligibleForTailCallOptimization(TheCall, OpRet, DAG)) { - // Not eligible. Mark CALL node as non tail call. Note that we - // can modify the call node in place since calls are not CSE'd. - TheCall->setNotTailCall(); - } else { - // Look for tail call clobbered arguments. Emit a series of - // copyto/copyfrom virtual register nodes to protect them. - SmallVector<SDValue, 32> Ops; - SDValue Chain = TheCall->getChain(), InFlag; - Ops.push_back(Chain); - Ops.push_back(TheCall->getCallee()); - for (unsigned i = 0, e = TheCall->getNumArgs(); i != e; ++i) { - SDValue Arg = TheCall->getArg(i); - bool isByVal = TheCall->getArgFlags(i).isByVal(); - MachineFunction &MF = DAG.getMachineFunction(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - if (!isByVal && - IsPossiblyOverwrittenArgumentOfTailCall(Arg, MFI)) { - MVT VT = Arg.getValueType(); - unsigned VReg = MF.getRegInfo(). - createVirtualRegister(TLI.getRegClassFor(VT)); - Chain = DAG.getCopyToReg(Chain, Arg.getDebugLoc(), - VReg, Arg, InFlag); - InFlag = Chain.getValue(1); - Arg = DAG.getCopyFromReg(Chain, Arg.getDebugLoc(), - VReg, VT, InFlag); - Chain = Arg.getValue(1); - InFlag = Arg.getValue(2); - } - Ops.push_back(Arg); - Ops.push_back(TheCall->getArgFlagsVal(i)); - } - // Link in chain of CopyTo/CopyFromReg. - Ops[0] = Chain; - DAG.UpdateNodeOperands(OpCall, Ops.begin(), Ops.size()); - } - } - } -} - void SelectionDAGISel::SelectBasicBlock(BasicBlock *LLVMBB, BasicBlock::iterator Begin, BasicBlock::iterator End) { SDL->setCurrentBasicBlock(BB); - - // Lower all of the non-terminator instructions. - for (BasicBlock::iterator I = Begin; I != End; ++I) + MetadataContext &TheMetadata = LLVMBB->getParent()->getContext().getMetadata(); + unsigned MDDbgKind = TheMetadata.getMDKind("dbg"); + + // Lower all of the non-terminator instructions. If a call is emitted + // as a tail call, cease emitting nodes for this block. + for (BasicBlock::iterator I = Begin; I != End && !SDL->HasTailCall; ++I) { + if (MDDbgKind) { + // Update DebugLoc if debug information is attached with this + // instruction. + if (MDNode *Dbg = TheMetadata.getMD(MDDbgKind, I)) { + DILocation DILoc(Dbg); + DebugLoc Loc = ExtractDebugLocation(DILoc, MF->getDebugLocInfo()); + SDL->setCurDebugLoc(Loc); + if (MF->getDefaultDebugLoc().isUnknown()) + MF->setDefaultDebugLoc(Loc); + } + } if (!isa<TerminatorInst>(I)) SDL->visit(*I); + } - // Ensure that all instructions which are used outside of their defining - // blocks are available as virtual registers. Invoke is handled elsewhere. - for (BasicBlock::iterator I = Begin; I != End; ++I) - if (!isa<PHINode>(I) && !isa<InvokeInst>(I)) - SDL->CopyToExportRegsIfNeeded(I); + if (!SDL->HasTailCall) { + // Ensure that all instructions which are used outside of their defining + // blocks are available as virtual registers. Invoke is handled elsewhere. + for (BasicBlock::iterator I = Begin; I != End; ++I) + if (!isa<PHINode>(I) && !isa<InvokeInst>(I)) + SDL->CopyToExportRegsIfNeeded(I); - // Handle PHI nodes in successor blocks. - if (End == LLVMBB->end()) { - HandlePHINodesInSuccessorBlocks(LLVMBB); + // Handle PHI nodes in successor blocks. + if (End == LLVMBB->end()) { + HandlePHINodesInSuccessorBlocks(LLVMBB); - // Lower the terminator after the copies are emitted. - SDL->visit(*LLVMBB->getTerminator()); + // Lower the terminator after the copies are emitted. + SDL->visit(*LLVMBB->getTerminator()); + } } - + // Make sure the root of the DAG is up-to-date. CurDAG->setRoot(SDL->getControlRoot()); - // Check whether calls in this block are real tail calls. Fix up CALL nodes - // with correct tailcall attribute so that the target can rely on the tailcall - // attribute indicating whether the call is really eligible for tail call - // optimization. - if (PerformTailCallOpt) - CheckDAGForTailCallsAndFixThem(*CurDAG, TLI); - // Final step, emit the lowered DAG as machine code. CodeGenAndEmitDAG(); SDL->clear(); @@ -500,51 +420,51 @@ void SelectionDAGISel::SelectBasicBlock(BasicBlock *LLVMBB, void SelectionDAGISel::ComputeLiveOutVRegInfo() { SmallPtrSet<SDNode*, 128> VisitedNodes; SmallVector<SDNode*, 128> Worklist; - + Worklist.push_back(CurDAG->getRoot().getNode()); - + APInt Mask; APInt KnownZero; APInt KnownOne; - + while (!Worklist.empty()) { SDNode *N = Worklist.back(); Worklist.pop_back(); - + // If we've already seen this node, ignore it. if (!VisitedNodes.insert(N)) continue; - + // Otherwise, add all chain operands to the worklist. for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) if (N->getOperand(i).getValueType() == MVT::Other) Worklist.push_back(N->getOperand(i).getNode()); - + // If this is a CopyToReg with a vreg dest, process it. if (N->getOpcode() != ISD::CopyToReg) continue; - + unsigned DestReg = cast<RegisterSDNode>(N->getOperand(1))->getReg(); if (!TargetRegisterInfo::isVirtualRegister(DestReg)) continue; - + // Ignore non-scalar or non-integer values. SDValue Src = N->getOperand(2); - MVT SrcVT = Src.getValueType(); + EVT SrcVT = Src.getValueType(); if (!SrcVT.isInteger() || SrcVT.isVector()) continue; - + unsigned NumSignBits = CurDAG->ComputeNumSignBits(Src); Mask = APInt::getAllOnesValue(SrcVT.getSizeInBits()); CurDAG->ComputeMaskedBits(Src, Mask, KnownZero, KnownOne); - + // Only install this information if it tells us something. if (NumSignBits != 1 || KnownZero != 0 || KnownOne != 0) { DestReg -= TargetRegisterInfo::FirstVirtualRegister; - FunctionLoweringInfo &FLI = CurDAG->getFunctionLoweringInfo(); - if (DestReg >= FLI.LiveOutRegInfo.size()) - FLI.LiveOutRegInfo.resize(DestReg+1); - FunctionLoweringInfo::LiveOutInfo &LOI = FLI.LiveOutRegInfo[DestReg]; + if (DestReg >= FuncInfo->LiveOutRegInfo.size()) + FuncInfo->LiveOutRegInfo.resize(DestReg+1); + FunctionLoweringInfo::LiveOutInfo &LOI = + FuncInfo->LiveOutRegInfo[DestReg]; LOI.NumSignBits = NumSignBits; LOI.KnownOne = KnownOne; LOI.KnownZero = KnownZero; @@ -560,10 +480,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { if (ViewDAGCombine1 || ViewLegalizeTypesDAGs || ViewLegalizeDAGs || ViewDAGCombine2 || ViewDAGCombineLT || ViewISelDAGs || ViewSchedDAGs || ViewSUnitDAGs) - BlockName = CurDAG->getMachineFunction().getFunction()->getName() + ':' + - BB->getBasicBlock()->getName(); + BlockName = MF->getFunction()->getNameStr() + ":" + + BB->getBasicBlock()->getNameStr(); - DOUT << "Initial selection DAG:\n"; + DEBUG(errs() << "Initial selection DAG:\n"); DEBUG(CurDAG->dump()); if (ViewDAGCombine1) CurDAG->viewGraph("dag-combine1 input for " + BlockName); @@ -575,10 +495,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { } else { CurDAG->Combine(Unrestricted, *AA, OptLevel); } - - DOUT << "Optimized lowered selection DAG:\n"; + + DEBUG(errs() << "Optimized lowered selection DAG:\n"); DEBUG(CurDAG->dump()); - + // Second step, hack on the DAG until it only uses operations and types that // the target supports. if (!DisableLegalizeTypes) { @@ -593,7 +513,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { Changed = CurDAG->LegalizeTypes(); } - DOUT << "Type-legalized selection DAG:\n"; + DEBUG(errs() << "Type-legalized selection DAG:\n"); DEBUG(CurDAG->dump()); if (Changed) { @@ -608,7 +528,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Combine(NoIllegalTypes, *AA, OptLevel); } - DOUT << "Optimized type-legalized selection DAG:\n"; + DEBUG(errs() << "Optimized type-legalized selection DAG:\n"); DEBUG(CurDAG->dump()); } @@ -638,11 +558,11 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Combine(NoIllegalOperations, *AA, OptLevel); } - DOUT << "Optimized vector-legalized selection DAG:\n"; + DEBUG(errs() << "Optimized vector-legalized selection DAG:\n"); DEBUG(CurDAG->dump()); } } - + if (ViewLegalizeDAGs) CurDAG->viewGraph("legalize input for " + BlockName); if (TimePassesIsEnabled) { @@ -651,10 +571,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { } else { CurDAG->Legalize(DisableLegalizeTypes, OptLevel); } - - DOUT << "Legalized selection DAG:\n"; + + DEBUG(errs() << "Legalized selection DAG:\n"); DEBUG(CurDAG->dump()); - + if (ViewDAGCombine2) CurDAG->viewGraph("dag-combine2 input for " + BlockName); // Run the DAG combiner in post-legalize mode. @@ -664,12 +584,12 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { } else { CurDAG->Combine(NoIllegalOperations, *AA, OptLevel); } - - DOUT << "Optimized legalized selection DAG:\n"; + + DEBUG(errs() << "Optimized legalized selection DAG:\n"); DEBUG(CurDAG->dump()); if (ViewISelDAGs) CurDAG->viewGraph("isel input for " + BlockName); - + if (OptLevel != CodeGenOpt::None) ComputeLiveOutVRegInfo(); @@ -682,7 +602,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { InstructionSelect(); } - DOUT << "Selected selection DAG:\n"; + DEBUG(errs() << "Selected selection DAG:\n"); DEBUG(CurDAG->dump()); if (ViewSchedDAGs) CurDAG->viewGraph("scheduler input for " + BlockName); @@ -698,13 +618,13 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { if (ViewSUnitDAGs) Scheduler->viewGraph(); - // Emit machine code to BB. This can change 'BB' to the last block being + // Emit machine code to BB. This can change 'BB' to the last block being // inserted into. if (TimePassesIsEnabled) { NamedRegionTimer T("Instruction Creation", GroupName); - BB = Scheduler->EmitSchedule(); + BB = Scheduler->EmitSchedule(&SDL->EdgeMapping); } else { - BB = Scheduler->EmitSchedule(); + BB = Scheduler->EmitSchedule(&SDL->EdgeMapping); } // Free the scheduler state. @@ -715,9 +635,9 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { delete Scheduler; } - DOUT << "Selected machine code:\n"; + DEBUG(errs() << "Selected machine code:\n"); DEBUG(BB->dump()); -} +} void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn, MachineFunction &MF, @@ -736,6 +656,9 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn, #endif ); + MetadataContext &TheMetadata = Fn.getContext().getMetadata(); + unsigned MDDbgKind = TheMetadata.getMDKind("dbg"); + // Iterate over all basic blocks in the function. for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) { BasicBlock *LLVMBB = &*I; @@ -758,7 +681,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn, I != E; ++I, ++j) if (Fn.paramHasAttr(j, Attribute::ByVal)) { if (EnableFastISelVerbose || EnableFastISelAbort) - cerr << "FastISel skips entry block due to byval argument\n"; + errs() << "FastISel skips entry block due to byval argument\n"; SuppressFastISel = true; break; } @@ -818,16 +741,29 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn, FastIS->startNewBlock(BB); // Do FastISel on as many instructions as possible. for (; BI != End; ++BI) { + if (MDDbgKind) { + // Update DebugLoc if debug information is attached with this + // instruction. + if (MDNode *Dbg = TheMetadata.getMD(MDDbgKind, BI)) { + DILocation DILoc(Dbg); + DebugLoc Loc = ExtractDebugLocation(DILoc, + MF.getDebugLocInfo()); + FastIS->setCurDebugLoc(Loc); + if (MF.getDefaultDebugLoc().isUnknown()) + MF.setDefaultDebugLoc(Loc); + } + } + // Just before the terminator instruction, insert instructions to // feed PHI nodes in successor blocks. if (isa<TerminatorInst>(BI)) if (!HandlePHINodesInSuccessorBlocksFast(LLVMBB, FastIS)) { if (EnableFastISelVerbose || EnableFastISelAbort) { - cerr << "FastISel miss: "; + errs() << "FastISel miss: "; BI->dump(); } - if (EnableFastISelAbort) - assert(0 && "FastISel didn't handle a PHI in a successor"); + assert(!EnableFastISelAbort && + "FastISel didn't handle a PHI in a successor"); break; } @@ -842,11 +778,11 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn, // Then handle certain instructions as single-LLVM-Instruction blocks. if (isa<CallInst>(BI)) { if (EnableFastISelVerbose || EnableFastISelAbort) { - cerr << "FastISel missed call: "; + errs() << "FastISel missed call: "; BI->dump(); } - if (BI->getType() != Type::VoidTy) { + if (BI->getType() != Type::getVoidTy(*CurDAG->getContext())) { unsigned &R = FuncInfo->ValueMap[BI]; if (!R) R = FuncInfo->CreateRegForValue(BI); @@ -864,13 +800,13 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn, // For now, be a little lenient about non-branch terminators. if (!isa<TerminatorInst>(BI) || isa<BranchInst>(BI)) { if (EnableFastISelVerbose || EnableFastISelAbort) { - cerr << "FastISel miss: "; + errs() << "FastISel miss: "; BI->dump(); } if (EnableFastISelAbort) // The "fast" selector couldn't handle something and bailed. // For the purpose of debugging, just abort. - assert(0 && "FastISel didn't select the entire block"); + llvm_unreachable("FastISel didn't select the entire block"); } break; } @@ -895,15 +831,16 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn, void SelectionDAGISel::FinishBasicBlock() { - DOUT << "Target-post-processed machine code:\n"; + DEBUG(errs() << "Target-post-processed machine code:\n"); DEBUG(BB->dump()); - DOUT << "Total amount of phi nodes to update: " - << SDL->PHINodesToUpdate.size() << "\n"; + DEBUG(errs() << "Total amount of phi nodes to update: " + << SDL->PHINodesToUpdate.size() << "\n"); DEBUG(for (unsigned i = 0, e = SDL->PHINodesToUpdate.size(); i != e; ++i) - DOUT << "Node " << i << " : (" << SDL->PHINodesToUpdate[i].first - << ", " << SDL->PHINodesToUpdate[i].second << ")\n";); - + errs() << "Node " << i << " : (" + << SDL->PHINodesToUpdate[i].first + << ", " << SDL->PHINodesToUpdate[i].second << ")\n"); + // Next, now that we know what the last MBB the LLVM BB expanded is, update // PHI nodes in successors. if (SDL->SwitchCases.empty() && @@ -932,7 +869,7 @@ SelectionDAGISel::FinishBasicBlock() { CurDAG->setRoot(SDL->getRoot()); CodeGenAndEmitDAG(); SDL->clear(); - } + } for (unsigned j = 0, ej = SDL->BitTestCases[i].Cases.size(); j != ej; ++j) { // Set the current basic block to the mbb we wish to insert the code into @@ -947,8 +884,8 @@ SelectionDAGISel::FinishBasicBlock() { SDL->visitBitTestCase(SDL->BitTestCases[i].Default, SDL->BitTestCases[i].Reg, SDL->BitTestCases[i].Cases[j]); - - + + CurDAG->setRoot(SDL->getRoot()); CodeGenAndEmitDAG(); SDL->clear(); @@ -1001,7 +938,7 @@ SelectionDAGISel::FinishBasicBlock() { CodeGenAndEmitDAG(); SDL->clear(); } - + // Set the current basic block to the mbb we wish to insert the code into BB = SDL->JTCases[i].second.MBB; SDL->setCurrentBasicBlock(BB); @@ -1010,7 +947,7 @@ SelectionDAGISel::FinishBasicBlock() { CurDAG->setRoot(SDL->getRoot()); CodeGenAndEmitDAG(); SDL->clear(); - + // Update PHI Nodes for (unsigned pi = 0, pe = SDL->PHINodesToUpdate.size(); pi != pe; ++pi) { MachineInstr *PHI = SDL->PHINodesToUpdate[pi].first; @@ -1019,20 +956,21 @@ SelectionDAGISel::FinishBasicBlock() { "This is not a machine PHI node that we are updating!"); // "default" BB. We can go there only from header BB. if (PHIBB == SDL->JTCases[i].second.Default) { - PHI->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[pi].second, - false)); - PHI->addOperand(MachineOperand::CreateMBB(SDL->JTCases[i].first.HeaderBB)); + PHI->addOperand + (MachineOperand::CreateReg(SDL->PHINodesToUpdate[pi].second, false)); + PHI->addOperand + (MachineOperand::CreateMBB(SDL->JTCases[i].first.HeaderBB)); } // JT BB. Just iterate over successors here if (BB->succ_end() != std::find(BB->succ_begin(),BB->succ_end(), PHIBB)) { - PHI->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[pi].second, - false)); + PHI->addOperand + (MachineOperand::CreateReg(SDL->PHINodesToUpdate[pi].second, false)); PHI->addOperand(MachineOperand::CreateMBB(BB)); } } } SDL->JTCases.clear(); - + // If the switch block involved a branch to one of the actual successors, we // need to update PHI nodes in that block. for (unsigned i = 0, e = SDL->PHINodesToUpdate.size(); i != e; ++i) { @@ -1045,25 +983,31 @@ SelectionDAGISel::FinishBasicBlock() { PHI->addOperand(MachineOperand::CreateMBB(BB)); } } - + // If we generated any switch lowering information, build and codegen any // additional DAGs necessary. for (unsigned i = 0, e = SDL->SwitchCases.size(); i != e; ++i) { // Set the current basic block to the mbb we wish to insert the code into - BB = SDL->SwitchCases[i].ThisBB; + MachineBasicBlock *ThisBB = BB = SDL->SwitchCases[i].ThisBB; SDL->setCurrentBasicBlock(BB); - + // Emit the code SDL->visitSwitchCase(SDL->SwitchCases[i]); CurDAG->setRoot(SDL->getRoot()); CodeGenAndEmitDAG(); - SDL->clear(); - + // Handle any PHI nodes in successors of this chunk, as if we were coming // from the original BB before switch expansion. Note that PHI nodes can // occur multiple times in PHINodesToUpdate. We have to be very careful to // handle them the right number of times. while ((BB = SDL->SwitchCases[i].TrueBB)) { // Handle LHS and RHS. + // If new BB's are created during scheduling, the edges may have been + // updated. That is, the edge from ThisBB to BB may have been split and + // BB's predecessor is now another block. + DenseMap<MachineBasicBlock*, MachineBasicBlock*>::iterator EI = + SDL->EdgeMapping.find(BB); + if (EI != SDL->EdgeMapping.end()) + ThisBB = EI->second; for (MachineBasicBlock::iterator Phi = BB->begin(); Phi != BB->end() && Phi->getOpcode() == TargetInstrInfo::PHI; ++Phi){ // This value for this PHI node is recorded in PHINodesToUpdate, get it. @@ -1073,21 +1017,22 @@ SelectionDAGISel::FinishBasicBlock() { if (SDL->PHINodesToUpdate[pn].first == Phi) { Phi->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[pn]. second, false)); - Phi->addOperand(MachineOperand::CreateMBB(SDL->SwitchCases[i].ThisBB)); + Phi->addOperand(MachineOperand::CreateMBB(ThisBB)); break; } } } - + // Don't process RHS if same block as LHS. if (BB == SDL->SwitchCases[i].FalseBB) SDL->SwitchCases[i].FalseBB = 0; - + // If we haven't handled the RHS, do so now. Otherwise, we're done. SDL->SwitchCases[i].TrueBB = SDL->SwitchCases[i].FalseBB; SDL->SwitchCases[i].FalseBB = 0; } assert(SDL->SwitchCases[i].TrueBB == 0 && SDL->SwitchCases[i].FalseBB == 0); + SDL->clear(); } SDL->SwitchCases.clear(); @@ -1101,12 +1046,12 @@ SelectionDAGISel::FinishBasicBlock() { /// ScheduleDAGSDNodes *SelectionDAGISel::CreateScheduler() { RegisterScheduler::FunctionPassCtor Ctor = RegisterScheduler::getDefault(); - + if (!Ctor) { Ctor = ISHeuristic; RegisterScheduler::setDefault(Ctor); } - + return Ctor(this, OptLevel); } @@ -1123,25 +1068,25 @@ ScheduleHazardRecognizer *SelectionDAGISel::CreateTargetHazardRecognizer() { /// the dag combiner simplified the 255, we still want to match. RHS is the /// actual value in the DAG on the RHS of an AND, and DesiredMaskS is the value /// specified in the .td file (e.g. 255). -bool SelectionDAGISel::CheckAndMask(SDValue LHS, ConstantSDNode *RHS, +bool SelectionDAGISel::CheckAndMask(SDValue LHS, ConstantSDNode *RHS, int64_t DesiredMaskS) const { const APInt &ActualMask = RHS->getAPIntValue(); const APInt &DesiredMask = APInt(LHS.getValueSizeInBits(), DesiredMaskS); - + // If the actual mask exactly matches, success! if (ActualMask == DesiredMask) return true; - + // If the actual AND mask is allowing unallowed bits, this doesn't match. if (ActualMask.intersects(~DesiredMask)) return false; - + // Otherwise, the DAG Combiner may have proven that the value coming in is // either already zero or is not demanded. Check for known zero input bits. APInt NeededMask = DesiredMask & ~ActualMask; if (CurDAG->MaskedValueIsZero(LHS, NeededMask)) return true; - + // TODO: check to see if missing bits are just not demanded. // Otherwise, this pattern doesn't match. @@ -1152,32 +1097,32 @@ bool SelectionDAGISel::CheckAndMask(SDValue LHS, ConstantSDNode *RHS, /// the dag combiner simplified the 255, we still want to match. RHS is the /// actual value in the DAG on the RHS of an OR, and DesiredMaskS is the value /// specified in the .td file (e.g. 255). -bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS, +bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS, int64_t DesiredMaskS) const { const APInt &ActualMask = RHS->getAPIntValue(); const APInt &DesiredMask = APInt(LHS.getValueSizeInBits(), DesiredMaskS); - + // If the actual mask exactly matches, success! if (ActualMask == DesiredMask) return true; - + // If the actual AND mask is allowing unallowed bits, this doesn't match. if (ActualMask.intersects(~DesiredMask)) return false; - + // Otherwise, the DAG Combiner may have proven that the value coming in is // either already zero or is not demanded. Check for known zero input bits. APInt NeededMask = DesiredMask & ~ActualMask; - + APInt KnownZero, KnownOne; CurDAG->ComputeMaskedBits(LHS, NeededMask, KnownZero, KnownOne); - + // If all the missing bits in the or are already known to be set, match! if ((NeededMask & KnownOne) == NeededMask) return true; - + // TODO: check to see if missing bits are just not demanded. - + // Otherwise, this pattern doesn't match. return false; } @@ -1196,7 +1141,7 @@ SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops) { unsigned i = 2, e = InOps.size(); if (InOps[e-1].getValueType() == MVT::Flag) --e; // Don't process a flag operand if it is here. - + while (i != e) { unsigned Flags = cast<ConstantSDNode>(InOps[i])->getZExtValue(); if ((Flags & 7) != 4 /*MEM*/) { @@ -1210,25 +1155,25 @@ SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops) { // Otherwise, this is a memory operand. Ask the target to select it. std::vector<SDValue> SelOps; if (SelectInlineAsmMemoryOperand(InOps[i+1], 'm', SelOps)) { - cerr << "Could not match memory address. Inline asm failure!\n"; - exit(1); + llvm_report_error("Could not match memory address. Inline asm" + " failure!"); } - + // Add this to the output node. - MVT IntPtrTy = CurDAG->getTargetLoweringInfo().getPointerTy(); + EVT IntPtrTy = TLI.getPointerTy(); Ops.push_back(CurDAG->getTargetConstant(4/*MEM*/ | (SelOps.size()<< 3), IntPtrTy)); Ops.insert(Ops.end(), SelOps.begin(), SelOps.end()); i += 2; } } - + // Add the flag input back if present. if (e != InOps.size()) Ops.push_back(InOps.back()); } -/// findFlagUse - Return use of MVT::Flag value produced by the specified +/// findFlagUse - Return use of EVT::Flag value produced by the specified /// SDNode. /// static SDNode *findFlagUse(SDNode *N) { @@ -1331,7 +1276,7 @@ bool SelectionDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U, // Fold. But since Fold and FU are flagged together, this will create // a cycle in the scheduling graph. - MVT VT = Root->getValueType(Root->getNumValues()-1); + EVT VT = Root->getValueType(Root->getNumValues()-1); while (VT == MVT::Flag) { SDNode *FU = findFlagUse(Root); if (FU == NULL) diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp index 6fd5df2..ccc5e3c 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -44,7 +44,7 @@ namespace llvm { } static std::string getEdgeDestLabel(const void *Node, unsigned i) { - return ((const SDNode *) Node)->getValueType(i).getMVTString(); + return ((const SDNode *) Node)->getValueType(i).getEVTString(); } /// edgeTargetsEdgeSource - This method returns true if this outgoing edge @@ -84,7 +84,7 @@ namespace llvm { template<typename EdgeIter> static std::string getEdgeAttributes(const void *Node, EdgeIter EI) { SDValue Op = EI.getNode()->getOperand(EI.getOperand()); - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); if (VT == MVT::Flag) return "color=red,style=bold"; else if (VT == MVT::Other) @@ -138,11 +138,11 @@ std::string DOTGraphTraits<SelectionDAG*>::getNodeLabel(const SDNode *Node, void SelectionDAG::viewGraph(const std::string &Title) { // This code is only for debugging! #ifndef NDEBUG - ViewGraph(this, "dag." + getMachineFunction().getFunction()->getName(), false, - Title); + ViewGraph(this, "dag." + getMachineFunction().getFunction()->getNameStr(), + false, Title); #else - cerr << "SelectionDAG::viewGraph is only available in debug builds on " - << "systems with Graphviz or gv!\n"; + errs() << "SelectionDAG::viewGraph is only available in debug builds on " + << "systems with Graphviz or gv!\n"; #endif // NDEBUG } @@ -158,8 +158,8 @@ void SelectionDAG::clearGraphAttrs() { #ifndef NDEBUG NodeGraphAttrs.clear(); #else - cerr << "SelectionDAG::clearGraphAttrs is only available in debug builds" - << " on systems with Graphviz or gv!\n"; + errs() << "SelectionDAG::clearGraphAttrs is only available in debug builds" + << " on systems with Graphviz or gv!\n"; #endif } @@ -170,8 +170,8 @@ void SelectionDAG::setGraphAttrs(const SDNode *N, const char *Attrs) { #ifndef NDEBUG NodeGraphAttrs[N] = Attrs; #else - cerr << "SelectionDAG::setGraphAttrs is only available in debug builds" - << " on systems with Graphviz or gv!\n"; + errs() << "SelectionDAG::setGraphAttrs is only available in debug builds" + << " on systems with Graphviz or gv!\n"; #endif } @@ -188,8 +188,8 @@ const std::string SelectionDAG::getGraphAttrs(const SDNode *N) const { else return ""; #else - cerr << "SelectionDAG::getGraphAttrs is only available in debug builds" - << " on systems with Graphviz or gv!\n"; + errs() << "SelectionDAG::getGraphAttrs is only available in debug builds" + << " on systems with Graphviz or gv!\n"; return std::string(""); #endif } @@ -200,8 +200,8 @@ void SelectionDAG::setGraphColor(const SDNode *N, const char *Color) { #ifndef NDEBUG NodeGraphAttrs[N] = std::string("color=") + Color; #else - cerr << "SelectionDAG::setGraphColor is only available in debug builds" - << " on systems with Graphviz or gv!\n"; + errs() << "SelectionDAG::setGraphColor is only available in debug builds" + << " on systems with Graphviz or gv!\n"; #endif } @@ -216,7 +216,7 @@ bool SelectionDAG::setSubgraphColorHelper(SDNode *N, const char *Color, DenseSet if (level >= 20) { if (!printed) { printed = true; - DOUT << "setSubgraphColor hit max level\n"; + DEBUG(errs() << "setSubgraphColor hit max level\n"); } return true; } @@ -232,8 +232,8 @@ bool SelectionDAG::setSubgraphColorHelper(SDNode *N, const char *Color, DenseSet } } #else - cerr << "SelectionDAG::setSubgraphColor is only available in debug builds" - << " on systems with Graphviz or gv!\n"; + errs() << "SelectionDAG::setSubgraphColor is only available in debug builds" + << " on systems with Graphviz or gv!\n"; #endif return hit_limit; } @@ -255,8 +255,8 @@ void SelectionDAG::setSubgraphColor(SDNode *N, const char *Color) { } #else - cerr << "SelectionDAG::setSubgraphColor is only available in debug builds" - << " on systems with Graphviz or gv!\n"; + errs() << "SelectionDAG::setSubgraphColor is only available in debug builds" + << " on systems with Graphviz or gv!\n"; #endif } diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 83357e0..a2baee4 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -11,18 +11,20 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Target/TargetAsmInfo.h" #include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetSubtarget.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtarget.h" #include "llvm/GlobalVariable.h" #include "llvm/DerivedTypes.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" using namespace llvm; @@ -239,12 +241,23 @@ static void InitLibcallNames(const char **Names) { Names[RTLIB::UO_F64] = "__unorddf2"; Names[RTLIB::O_F32] = "__unordsf2"; Names[RTLIB::O_F64] = "__unorddf2"; + Names[RTLIB::MEMCPY] = "memcpy"; + Names[RTLIB::MEMMOVE] = "memmove"; + Names[RTLIB::MEMSET] = "memset"; Names[RTLIB::UNWIND_RESUME] = "_Unwind_Resume"; } +/// InitLibcallCallingConvs - Set default libcall CallingConvs. +/// +static void InitLibcallCallingConvs(CallingConv::ID *CCs) { + for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) { + CCs[i] = CallingConv::C; + } +} + /// getFPEXT - Return the FPEXT_*_* value for the given types, or /// UNKNOWN_LIBCALL if there is none. -RTLIB::Libcall RTLIB::getFPEXT(MVT OpVT, MVT RetVT) { +RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) { if (OpVT == MVT::f32) { if (RetVT == MVT::f64) return FPEXT_F32_F64; @@ -254,7 +267,7 @@ RTLIB::Libcall RTLIB::getFPEXT(MVT OpVT, MVT RetVT) { /// getFPROUND - Return the FPROUND_*_* value for the given types, or /// UNKNOWN_LIBCALL if there is none. -RTLIB::Libcall RTLIB::getFPROUND(MVT OpVT, MVT RetVT) { +RTLIB::Libcall RTLIB::getFPROUND(EVT OpVT, EVT RetVT) { if (RetVT == MVT::f32) { if (OpVT == MVT::f64) return FPROUND_F64_F32; @@ -273,7 +286,7 @@ RTLIB::Libcall RTLIB::getFPROUND(MVT OpVT, MVT RetVT) { /// getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or /// UNKNOWN_LIBCALL if there is none. -RTLIB::Libcall RTLIB::getFPTOSINT(MVT OpVT, MVT RetVT) { +RTLIB::Libcall RTLIB::getFPTOSINT(EVT OpVT, EVT RetVT) { if (OpVT == MVT::f32) { if (RetVT == MVT::i8) return FPTOSINT_F32_I8; @@ -312,7 +325,7 @@ RTLIB::Libcall RTLIB::getFPTOSINT(MVT OpVT, MVT RetVT) { /// getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or /// UNKNOWN_LIBCALL if there is none. -RTLIB::Libcall RTLIB::getFPTOUINT(MVT OpVT, MVT RetVT) { +RTLIB::Libcall RTLIB::getFPTOUINT(EVT OpVT, EVT RetVT) { if (OpVT == MVT::f32) { if (RetVT == MVT::i8) return FPTOUINT_F32_I8; @@ -351,7 +364,7 @@ RTLIB::Libcall RTLIB::getFPTOUINT(MVT OpVT, MVT RetVT) { /// getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or /// UNKNOWN_LIBCALL if there is none. -RTLIB::Libcall RTLIB::getSINTTOFP(MVT OpVT, MVT RetVT) { +RTLIB::Libcall RTLIB::getSINTTOFP(EVT OpVT, EVT RetVT) { if (OpVT == MVT::i32) { if (RetVT == MVT::f32) return SINTTOFP_I32_F32; @@ -385,7 +398,7 @@ RTLIB::Libcall RTLIB::getSINTTOFP(MVT OpVT, MVT RetVT) { /// getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or /// UNKNOWN_LIBCALL if there is none. -RTLIB::Libcall RTLIB::getUINTTOFP(MVT OpVT, MVT RetVT) { +RTLIB::Libcall RTLIB::getUINTTOFP(EVT OpVT, EVT RetVT) { if (OpVT == MVT::i32) { if (RetVT == MVT::f32) return UINTTOFP_I32_F32; @@ -439,8 +452,9 @@ static void InitCmpLibcallCCs(ISD::CondCode *CCs) { CCs[RTLIB::O_F64] = ISD::SETEQ; } -TargetLowering::TargetLowering(TargetMachine &tm) - : TM(tm), TD(TM.getTargetData()) { +/// NOTE: The constructor takes ownership of TLOF. +TargetLowering::TargetLowering(TargetMachine &tm,TargetLoweringObjectFile *tlof) + : TM(tm), TD(TM.getTargetData()), TLOF(*tlof) { // All operations default to being supported. memset(OpActions, 0, sizeof(OpActions)); memset(LoadExtActions, 0, sizeof(LoadExtActions)); @@ -490,12 +504,10 @@ TargetLowering::TargetLowering(TargetMachine &tm) IsLittleEndian = TD->isLittleEndian(); UsesGlobalOffsetTable = false; - ShiftAmountTy = PointerTy = getValueType(TD->getIntPtrType()); - ShiftAmtHandling = Undefined; + ShiftAmountTy = PointerTy = MVT::getIntegerVT(8*TD->getPointerSize()); memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*)); memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray)); maxStoresPerMemset = maxStoresPerMemcpy = maxStoresPerMemmove = 8; - allowUnalignedMemoryAccesses = false; benefitFromCodePlacementOpt = false; UseUnderscoreSetJmp = false; UseUnderscoreLongJmp = false; @@ -515,14 +527,62 @@ TargetLowering::TargetLowering(TargetMachine &tm) InitLibcallNames(LibcallRoutineNames); InitCmpLibcallCCs(CmpLibcallCCs); + InitLibcallCallingConvs(LibcallCallingConvs); // Tell Legalize whether the assembler supports DEBUG_LOC. - const TargetAsmInfo *TASM = TM.getTargetAsmInfo(); + const MCAsmInfo *TASM = TM.getMCAsmInfo(); if (!TASM || !TASM->hasDotLocAndDotFile()) setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); } -TargetLowering::~TargetLowering() {} +TargetLowering::~TargetLowering() { + delete &TLOF; +} + +static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT, + unsigned &NumIntermediates, + EVT &RegisterVT, + TargetLowering* TLI) { + // Figure out the right, legal destination reg to copy into. + unsigned NumElts = VT.getVectorNumElements(); + MVT EltTy = VT.getVectorElementType(); + + unsigned NumVectorRegs = 1; + + // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we + // could break down into LHS/RHS like LegalizeDAG does. + if (!isPowerOf2_32(NumElts)) { + NumVectorRegs = NumElts; + NumElts = 1; + } + + // Divide the input until we get to a supported size. This will always + // end with a scalar if the target doesn't support vectors. + while (NumElts > 1 && !TLI->isTypeLegal(MVT::getVectorVT(EltTy, NumElts))) { + NumElts >>= 1; + NumVectorRegs <<= 1; + } + + NumIntermediates = NumVectorRegs; + + MVT NewVT = MVT::getVectorVT(EltTy, NumElts); + if (!TLI->isTypeLegal(NewVT)) + NewVT = EltTy; + IntermediateVT = NewVT; + + EVT DestVT = TLI->getRegisterType(NewVT); + RegisterVT = DestVT; + if (EVT(DestVT).bitsLT(NewVT)) { + // Value is expanded, e.g. i64 -> i16. + return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits()); + } else { + // Otherwise, promotion or legal types use the same number of registers as + // the vector decimated to the appropriate level. + return NumVectorRegs; + } + + return 1; +} /// computeRegisterProperties - Once all of the register classes are added, /// this allows us to compute derived properties we expose. @@ -546,13 +606,13 @@ void TargetLowering::computeRegisterProperties() { // Every integer value type larger than this largest register takes twice as // many registers to represent as the previous ValueType. for (unsigned ExpandedReg = LargestIntReg + 1; ; ++ExpandedReg) { - MVT EVT = (MVT::SimpleValueType)ExpandedReg; - if (!EVT.isInteger()) + EVT ExpandedVT = (MVT::SimpleValueType)ExpandedReg; + if (!ExpandedVT.isInteger()) break; NumRegistersForVT[ExpandedReg] = 2*NumRegistersForVT[ExpandedReg-1]; RegisterTypeForVT[ExpandedReg] = (MVT::SimpleValueType)LargestIntReg; TransformToType[ExpandedReg] = (MVT::SimpleValueType)(ExpandedReg - 1); - ValueTypeActions.setTypeAction(EVT, Expand); + ValueTypeActions.setTypeAction(ExpandedVT, Expand); } // Inspect all of the ValueType's smaller than the largest integer @@ -560,7 +620,7 @@ void TargetLowering::computeRegisterProperties() { unsigned LegalIntReg = LargestIntReg; for (unsigned IntReg = LargestIntReg - 1; IntReg >= (unsigned)MVT::i1; --IntReg) { - MVT IVT = (MVT::SimpleValueType)IntReg; + EVT IVT = (MVT::SimpleValueType)IntReg; if (isTypeLegal(IVT)) { LegalIntReg = IntReg; } else { @@ -608,20 +668,20 @@ void TargetLowering::computeRegisterProperties() { i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { MVT VT = (MVT::SimpleValueType)i; if (!isTypeLegal(VT)) { - MVT IntermediateVT, RegisterVT; + MVT IntermediateVT; + EVT RegisterVT; unsigned NumIntermediates; NumRegistersForVT[i] = - getVectorTypeBreakdown(VT, - IntermediateVT, NumIntermediates, - RegisterVT); + getVectorTypeBreakdownMVT(VT, IntermediateVT, NumIntermediates, + RegisterVT, this); RegisterTypeForVT[i] = RegisterVT; // Determine if there is a legal wider type. bool IsLegalWiderType = false; - MVT EltVT = VT.getVectorElementType(); + EVT EltVT = VT.getVectorElementType(); unsigned NElts = VT.getVectorNumElements(); for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { - MVT SVT = (MVT::SimpleValueType)nVT; + EVT SVT = (MVT::SimpleValueType)nVT; if (isTypeLegal(SVT) && SVT.getVectorElementType() == EltVT && SVT.getVectorNumElements() > NElts) { TransformToType[i] = SVT; @@ -631,7 +691,7 @@ void TargetLowering::computeRegisterProperties() { } } if (!IsLegalWiderType) { - MVT NVT = VT.getPow2VectorType(); + EVT NVT = VT.getPow2VectorType(); if (NVT == VT) { // Type is already a power of 2. The default action is to split. TransformToType[i] = MVT::Other; @@ -650,11 +710,10 @@ const char *TargetLowering::getTargetNodeName(unsigned Opcode) const { } -MVT TargetLowering::getSetCCResultType(MVT VT) const { - return getValueType(TD->getIntPtrType()); +MVT::SimpleValueType TargetLowering::getSetCCResultType(EVT VT) const { + return PointerTy.SimpleTy; } - /// getVectorTypeBreakdown - Vector types are broken down into some number of /// legal first class types. For example, MVT::v8f32 maps to 2 MVT::v4f32 /// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack. @@ -664,13 +723,13 @@ MVT TargetLowering::getSetCCResultType(MVT VT) const { /// register. It also returns the VT and quantity of the intermediate values /// before they are promoted/expanded. /// -unsigned TargetLowering::getVectorTypeBreakdown(MVT VT, - MVT &IntermediateVT, +unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT, + EVT &IntermediateVT, unsigned &NumIntermediates, - MVT &RegisterVT) const { + EVT &RegisterVT) const { // Figure out the right, legal destination reg to copy into. unsigned NumElts = VT.getVectorNumElements(); - MVT EltTy = VT.getVectorElementType(); + EVT EltTy = VT.getVectorElementType(); unsigned NumVectorRegs = 1; @@ -683,19 +742,20 @@ unsigned TargetLowering::getVectorTypeBreakdown(MVT VT, // Divide the input until we get to a supported size. This will always // end with a scalar if the target doesn't support vectors. - while (NumElts > 1 && !isTypeLegal(MVT::getVectorVT(EltTy, NumElts))) { + while (NumElts > 1 && !isTypeLegal( + EVT::getVectorVT(Context, EltTy, NumElts))) { NumElts >>= 1; NumVectorRegs <<= 1; } NumIntermediates = NumVectorRegs; - MVT NewVT = MVT::getVectorVT(EltTy, NumElts); + EVT NewVT = EVT::getVectorVT(Context, EltTy, NumElts); if (!isTypeLegal(NewVT)) NewVT = EltTy; IntermediateVT = NewVT; - MVT DestVT = getRegisterType(NewVT); + EVT DestVT = getRegisterType(Context, NewVT); RegisterVT = DestVT; if (DestVT.bitsLT(NewVT)) { // Value is expanded, e.g. i64 -> i16. @@ -714,7 +774,7 @@ unsigned TargetLowering::getVectorTypeBreakdown(MVT VT, /// If there is no vector type that we want to widen to, returns MVT::Other /// When and where to widen is target dependent based on the cost of /// scalarizing vs using the wider vector type. -MVT TargetLowering::getWidenVectorType(MVT VT) const { +EVT TargetLowering::getWidenVectorType(EVT VT) const { assert(VT.isVector()); if (isTypeLegal(VT)) return VT; @@ -781,7 +841,7 @@ bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op, // if we can expand it to have all bits set, do it if (C->getAPIntValue().intersects(~Demanded)) { - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); SDValue New = DAG.getNode(Op.getOpcode(), dl, VT, Op.getOperand(0), DAG.getConstant(Demanded & C->getAPIntValue(), @@ -822,7 +882,7 @@ TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op, if (!isPowerOf2_32(SmallVTBits)) SmallVTBits = NextPowerOf2(SmallVTBits); for (; SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) { - MVT SmallVT = MVT::getIntegerVT(SmallVTBits); + EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits); if (TLI.isTruncateFree(Op.getValueType(), SmallVT) && TLI.isZExtFree(SmallVT, Op.getValueType())) { // We found a type with free casts. @@ -1008,7 +1068,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2 if ((NewMask & (KnownZero|KnownOne)) == NewMask) { // all known if ((KnownOne & KnownOne2) == KnownOne) { - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); SDValue ANDC = TLO.DAG.getConstant(~KnownOne & NewMask, VT); return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), ANDC)); @@ -1023,7 +1083,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // if we can expand it to have all bits set, do it if (Expanded.isAllOnesValue()) { if (Expanded != C->getAPIntValue()) { - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); SDValue New = TLO.DAG.getNode(Op.getOpcode(), dl,VT, Op.getOperand(0), TLO.DAG.getConstant(Expanded, VT)); return TLO.CombineTo(Op, New); @@ -1099,7 +1159,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, SDValue NewSA = TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType()); - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, InOp.getOperand(0), NewSA)); } @@ -1116,7 +1176,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, break; case ISD::SRL: if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); unsigned ShAmt = SA->getZExtValue(); unsigned VTSize = VT.getSizeInBits(); SDValue InOp = Op.getOperand(0); @@ -1168,7 +1228,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, Op.getOperand(0), Op.getOperand(1))); if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); unsigned ShAmt = SA->getZExtValue(); // If the shift count is an invalid immediate, don't do anything. @@ -1205,7 +1265,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, } break; case ISD::SIGN_EXTEND_INREG: { - MVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); + EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); // Sign extension. Compute the demanded bits in the result that are not // present in the input. @@ -1272,7 +1332,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, break; } case ISD::SIGN_EXTEND: { - MVT InVT = Op.getOperand(0).getValueType(); + EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getSizeInBits(); APInt InMask = APInt::getLowBitsSet(BitWidth, InBits); APInt InSignBit = APInt::getBitsSet(BitWidth, InBits - 1, InBits); @@ -1371,7 +1431,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, break; } case ISD::AssertZext: { - MVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT(); + EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT(); APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits()); if (SimplifyDemandedBits(Op.getOperand(0), InMask & NewMask, @@ -1385,7 +1445,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, #if 0 // If this is an FP->Int bitcast and if the sign bit is the only thing that // is demanded, turn this into a FGETSIGN. - if (NewMask == MVT::getIntegerVTSignBit(Op.getValueType()) && + if (NewMask == EVT::getIntegerVTSignBit(Op.getValueType()) && MVT::isFloatingPoint(Op.getOperand(0).getValueType()) && !MVT::isVector(Op.getOperand(0).getValueType())) { // Only do this xform if FGETSIGN is valid or if before legalize. @@ -1492,7 +1552,7 @@ static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) { // to handle some common cases. // Fall back to ComputeMaskedBits to catch other known cases. - MVT OpVT = Val.getValueType(); + EVT OpVT = Val.getValueType(); unsigned BitWidth = OpVT.getSizeInBits(); APInt Mask = APInt::getAllOnesValue(BitWidth); APInt KnownZero, KnownOne; @@ -1504,10 +1564,11 @@ static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) { /// SimplifySetCC - Try to simplify a setcc built with the specified operands /// and cc. If it is unable to simplify it, return a null SDValue. SDValue -TargetLowering::SimplifySetCC(MVT VT, SDValue N0, SDValue N1, +TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, DAGCombinerInfo &DCI, DebugLoc dl) const { SelectionDAG &DAG = DCI.DAG; + LLVMContext &Context = *DAG.getContext(); // These setcc operations always fold. switch (Cond) { @@ -1518,316 +1579,321 @@ TargetLowering::SimplifySetCC(MVT VT, SDValue N0, SDValue N1, case ISD::SETTRUE2: return DAG.getConstant(1, VT); } + if (isa<ConstantSDNode>(N0.getNode())) { + // Ensure that the constant occurs on the RHS, and fold constant + // comparisons. + return DAG.getSetCC(dl, VT, N1, N0, ISD::getSetCCSwappedOperands(Cond)); + } + if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) { const APInt &C1 = N1C->getAPIntValue(); - if (isa<ConstantSDNode>(N0.getNode())) { - return DAG.FoldSetCC(VT, N0, N1, Cond, dl); - } else { - // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an - // equality comparison, then we're just comparing whether X itself is - // zero. - if (N0.getOpcode() == ISD::SRL && (C1 == 0 || C1 == 1) && - N0.getOperand(0).getOpcode() == ISD::CTLZ && - N0.getOperand(1).getOpcode() == ISD::Constant) { - unsigned ShAmt = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); - if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && - ShAmt == Log2_32(N0.getValueType().getSizeInBits())) { - if ((C1 == 0) == (Cond == ISD::SETEQ)) { - // (srl (ctlz x), 5) == 0 -> X != 0 - // (srl (ctlz x), 5) != 1 -> X != 0 - Cond = ISD::SETNE; - } else { - // (srl (ctlz x), 5) != 0 -> X == 0 - // (srl (ctlz x), 5) == 1 -> X == 0 - Cond = ISD::SETEQ; - } - SDValue Zero = DAG.getConstant(0, N0.getValueType()); - return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), - Zero, Cond); + + // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an + // equality comparison, then we're just comparing whether X itself is + // zero. + if (N0.getOpcode() == ISD::SRL && (C1 == 0 || C1 == 1) && + N0.getOperand(0).getOpcode() == ISD::CTLZ && + N0.getOperand(1).getOpcode() == ISD::Constant) { + unsigned ShAmt = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); + if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && + ShAmt == Log2_32(N0.getValueType().getSizeInBits())) { + if ((C1 == 0) == (Cond == ISD::SETEQ)) { + // (srl (ctlz x), 5) == 0 -> X != 0 + // (srl (ctlz x), 5) != 1 -> X != 0 + Cond = ISD::SETNE; + } else { + // (srl (ctlz x), 5) != 0 -> X == 0 + // (srl (ctlz x), 5) == 1 -> X == 0 + Cond = ISD::SETEQ; } + SDValue Zero = DAG.getConstant(0, N0.getValueType()); + return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), + Zero, Cond); } + } - // If the LHS is '(and load, const)', the RHS is 0, - // the test is for equality or unsigned, and all 1 bits of the const are - // in the same partial word, see if we can shorten the load. - if (DCI.isBeforeLegalize() && - N0.getOpcode() == ISD::AND && C1 == 0 && - N0.getNode()->hasOneUse() && - isa<LoadSDNode>(N0.getOperand(0)) && - N0.getOperand(0).getNode()->hasOneUse() && - isa<ConstantSDNode>(N0.getOperand(1))) { - LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0)); - uint64_t bestMask = 0; - unsigned bestWidth = 0, bestOffset = 0; - if (!Lod->isVolatile() && Lod->isUnindexed() && - // FIXME: This uses getZExtValue() below so it only works on i64 and - // below. - N0.getValueType().getSizeInBits() <= 64) { - unsigned origWidth = N0.getValueType().getSizeInBits(); - // We can narrow (e.g.) 16-bit extending loads on 32-bit target to - // 8 bits, but have to be careful... - if (Lod->getExtensionType() != ISD::NON_EXTLOAD) - origWidth = Lod->getMemoryVT().getSizeInBits(); - uint64_t Mask =cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); - for (unsigned width = origWidth / 2; width>=8; width /= 2) { - uint64_t newMask = (1ULL << width) - 1; - for (unsigned offset=0; offset<origWidth/width; offset++) { - if ((newMask & Mask) == Mask) { - if (!TD->isLittleEndian()) - bestOffset = (origWidth/width - offset - 1) * (width/8); - else - bestOffset = (uint64_t)offset * (width/8); - bestMask = Mask >> (offset * (width/8) * 8); - bestWidth = width; - break; - } - newMask = newMask << width; + // If the LHS is '(and load, const)', the RHS is 0, + // the test is for equality or unsigned, and all 1 bits of the const are + // in the same partial word, see if we can shorten the load. + if (DCI.isBeforeLegalize() && + N0.getOpcode() == ISD::AND && C1 == 0 && + N0.getNode()->hasOneUse() && + isa<LoadSDNode>(N0.getOperand(0)) && + N0.getOperand(0).getNode()->hasOneUse() && + isa<ConstantSDNode>(N0.getOperand(1))) { + LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0)); + uint64_t bestMask = 0; + unsigned bestWidth = 0, bestOffset = 0; + if (!Lod->isVolatile() && Lod->isUnindexed() && + // FIXME: This uses getZExtValue() below so it only works on i64 and + // below. + N0.getValueType().getSizeInBits() <= 64) { + unsigned origWidth = N0.getValueType().getSizeInBits(); + // We can narrow (e.g.) 16-bit extending loads on 32-bit target to + // 8 bits, but have to be careful... + if (Lod->getExtensionType() != ISD::NON_EXTLOAD) + origWidth = Lod->getMemoryVT().getSizeInBits(); + uint64_t Mask =cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); + for (unsigned width = origWidth / 2; width>=8; width /= 2) { + uint64_t newMask = (1ULL << width) - 1; + for (unsigned offset=0; offset<origWidth/width; offset++) { + if ((newMask & Mask) == Mask) { + if (!TD->isLittleEndian()) + bestOffset = (origWidth/width - offset - 1) * (width/8); + else + bestOffset = (uint64_t)offset * (width/8); + bestMask = Mask >> (offset * (width/8) * 8); + bestWidth = width; + break; } + newMask = newMask << width; } } - if (bestWidth) { - MVT newVT = MVT::getIntegerVT(bestWidth); - if (newVT.isRound()) { - MVT PtrType = Lod->getOperand(1).getValueType(); - SDValue Ptr = Lod->getBasePtr(); - if (bestOffset != 0) - Ptr = DAG.getNode(ISD::ADD, dl, PtrType, Lod->getBasePtr(), - DAG.getConstant(bestOffset, PtrType)); - unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset); - SDValue NewLoad = DAG.getLoad(newVT, dl, Lod->getChain(), Ptr, - Lod->getSrcValue(), - Lod->getSrcValueOffset() + bestOffset, - false, NewAlign); - return DAG.getSetCC(dl, VT, - DAG.getNode(ISD::AND, dl, newVT, NewLoad, - DAG.getConstant(bestMask, newVT)), - DAG.getConstant(0LL, newVT), Cond); - } + } + if (bestWidth) { + EVT newVT = EVT::getIntegerVT(Context, bestWidth); + if (newVT.isRound()) { + EVT PtrType = Lod->getOperand(1).getValueType(); + SDValue Ptr = Lod->getBasePtr(); + if (bestOffset != 0) + Ptr = DAG.getNode(ISD::ADD, dl, PtrType, Lod->getBasePtr(), + DAG.getConstant(bestOffset, PtrType)); + unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset); + SDValue NewLoad = DAG.getLoad(newVT, dl, Lod->getChain(), Ptr, + Lod->getSrcValue(), + Lod->getSrcValueOffset() + bestOffset, + false, NewAlign); + return DAG.getSetCC(dl, VT, + DAG.getNode(ISD::AND, dl, newVT, NewLoad, + DAG.getConstant(bestMask, newVT)), + DAG.getConstant(0LL, newVT), Cond); } } + } - // If the LHS is a ZERO_EXTEND, perform the comparison on the input. - if (N0.getOpcode() == ISD::ZERO_EXTEND) { - unsigned InSize = N0.getOperand(0).getValueType().getSizeInBits(); - - // If the comparison constant has bits in the upper part, the - // zero-extended value could never match. - if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(), - C1.getBitWidth() - InSize))) { - switch (Cond) { - case ISD::SETUGT: - case ISD::SETUGE: - case ISD::SETEQ: return DAG.getConstant(0, VT); - case ISD::SETULT: - case ISD::SETULE: - case ISD::SETNE: return DAG.getConstant(1, VT); - case ISD::SETGT: - case ISD::SETGE: - // True if the sign bit of C1 is set. - return DAG.getConstant(C1.isNegative(), VT); - case ISD::SETLT: - case ISD::SETLE: - // True if the sign bit of C1 isn't set. - return DAG.getConstant(C1.isNonNegative(), VT); - default: - break; - } - } + // If the LHS is a ZERO_EXTEND, perform the comparison on the input. + if (N0.getOpcode() == ISD::ZERO_EXTEND) { + unsigned InSize = N0.getOperand(0).getValueType().getSizeInBits(); - // Otherwise, we can perform the comparison with the low bits. + // If the comparison constant has bits in the upper part, the + // zero-extended value could never match. + if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(), + C1.getBitWidth() - InSize))) { switch (Cond) { - case ISD::SETEQ: - case ISD::SETNE: case ISD::SETUGT: case ISD::SETUGE: + case ISD::SETEQ: return DAG.getConstant(0, VT); case ISD::SETULT: case ISD::SETULE: - return DAG.getSetCC(dl, VT, N0.getOperand(0), - DAG.getConstant(APInt(C1).trunc(InSize), - N0.getOperand(0).getValueType()), - Cond); + case ISD::SETNE: return DAG.getConstant(1, VT); + case ISD::SETGT: + case ISD::SETGE: + // True if the sign bit of C1 is set. + return DAG.getConstant(C1.isNegative(), VT); + case ISD::SETLT: + case ISD::SETLE: + // True if the sign bit of C1 isn't set. + return DAG.getConstant(C1.isNonNegative(), VT); default: - break; // todo, be more careful with signed comparisons - } - } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && - (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { - MVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT(); - unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits(); - MVT ExtDstTy = N0.getValueType(); - unsigned ExtDstTyBits = ExtDstTy.getSizeInBits(); - - // If the extended part has any inconsistent bits, it cannot ever - // compare equal. In other words, they have to be all ones or all - // zeros. - APInt ExtBits = - APInt::getHighBitsSet(ExtDstTyBits, ExtDstTyBits - ExtSrcTyBits); - if ((C1 & ExtBits) != 0 && (C1 & ExtBits) != ExtBits) - return DAG.getConstant(Cond == ISD::SETNE, VT); - - SDValue ZextOp; - MVT Op0Ty = N0.getOperand(0).getValueType(); - if (Op0Ty == ExtSrcTy) { - ZextOp = N0.getOperand(0); - } else { - APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits); - ZextOp = DAG.getNode(ISD::AND, dl, Op0Ty, N0.getOperand(0), - DAG.getConstant(Imm, Op0Ty)); - } - if (!DCI.isCalledByLegalizer()) - DCI.AddToWorklist(ZextOp.getNode()); - // Otherwise, make this a use of a zext. - return DAG.getSetCC(dl, VT, ZextOp, - DAG.getConstant(C1 & APInt::getLowBitsSet( - ExtDstTyBits, - ExtSrcTyBits), - ExtDstTy), - Cond); - } else if ((N1C->isNullValue() || N1C->getAPIntValue() == 1) && - (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { - - // SETCC (SETCC), [0|1], [EQ|NE] -> SETCC - if (N0.getOpcode() == ISD::SETCC) { - bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (N1C->getZExtValue() != 1); - if (TrueWhenTrue) - return N0; - - // Invert the condition. - ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); - CC = ISD::getSetCCInverse(CC, - N0.getOperand(0).getValueType().isInteger()); - return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC); - } - - if ((N0.getOpcode() == ISD::XOR || - (N0.getOpcode() == ISD::AND && - N0.getOperand(0).getOpcode() == ISD::XOR && - N0.getOperand(1) == N0.getOperand(0).getOperand(1))) && - isa<ConstantSDNode>(N0.getOperand(1)) && - cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue() == 1) { - // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We - // can only do this if the top bits are known zero. - unsigned BitWidth = N0.getValueSizeInBits(); - if (DAG.MaskedValueIsZero(N0, - APInt::getHighBitsSet(BitWidth, - BitWidth-1))) { - // Okay, get the un-inverted input value. - SDValue Val; - if (N0.getOpcode() == ISD::XOR) - Val = N0.getOperand(0); - else { - assert(N0.getOpcode() == ISD::AND && - N0.getOperand(0).getOpcode() == ISD::XOR); - // ((X^1)&1)^1 -> X & 1 - Val = DAG.getNode(ISD::AND, dl, N0.getValueType(), - N0.getOperand(0).getOperand(0), - N0.getOperand(1)); - } - return DAG.getSetCC(dl, VT, Val, N1, - Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ); - } + break; } } + + // Otherwise, we can perform the comparison with the low bits. + switch (Cond) { + case ISD::SETEQ: + case ISD::SETNE: + case ISD::SETUGT: + case ISD::SETUGE: + case ISD::SETULT: + case ISD::SETULE: { + EVT newVT = N0.getOperand(0).getValueType(); + if (DCI.isBeforeLegalizeOps() || + (isOperationLegal(ISD::SETCC, newVT) && + getCondCodeAction(Cond, newVT)==Legal)) + return DAG.getSetCC(dl, VT, N0.getOperand(0), + DAG.getConstant(APInt(C1).trunc(InSize), newVT), + Cond); + break; + } + default: + break; // todo, be more careful with signed comparisons + } + } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && + (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { + EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT(); + unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits(); + EVT ExtDstTy = N0.getValueType(); + unsigned ExtDstTyBits = ExtDstTy.getSizeInBits(); + + // If the extended part has any inconsistent bits, it cannot ever + // compare equal. In other words, they have to be all ones or all + // zeros. + APInt ExtBits = + APInt::getHighBitsSet(ExtDstTyBits, ExtDstTyBits - ExtSrcTyBits); + if ((C1 & ExtBits) != 0 && (C1 & ExtBits) != ExtBits) + return DAG.getConstant(Cond == ISD::SETNE, VT); - APInt MinVal, MaxVal; - unsigned OperandBitSize = N1C->getValueType(0).getSizeInBits(); - if (ISD::isSignedIntSetCC(Cond)) { - MinVal = APInt::getSignedMinValue(OperandBitSize); - MaxVal = APInt::getSignedMaxValue(OperandBitSize); + SDValue ZextOp; + EVT Op0Ty = N0.getOperand(0).getValueType(); + if (Op0Ty == ExtSrcTy) { + ZextOp = N0.getOperand(0); } else { - MinVal = APInt::getMinValue(OperandBitSize); - MaxVal = APInt::getMaxValue(OperandBitSize); + APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits); + ZextOp = DAG.getNode(ISD::AND, dl, Op0Ty, N0.getOperand(0), + DAG.getConstant(Imm, Op0Ty)); } - - // Canonicalize GE/LE comparisons to use GT/LT comparisons. - if (Cond == ISD::SETGE || Cond == ISD::SETUGE) { - if (C1 == MinVal) return DAG.getConstant(1, VT); // X >= MIN --> true - // X >= C0 --> X > (C0-1) - return DAG.getSetCC(dl, VT, N0, - DAG.getConstant(C1-1, N1.getValueType()), - (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT); + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(ZextOp.getNode()); + // Otherwise, make this a use of a zext. + return DAG.getSetCC(dl, VT, ZextOp, + DAG.getConstant(C1 & APInt::getLowBitsSet( + ExtDstTyBits, + ExtSrcTyBits), + ExtDstTy), + Cond); + } else if ((N1C->isNullValue() || N1C->getAPIntValue() == 1) && + (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { + + // SETCC (SETCC), [0|1], [EQ|NE] -> SETCC + if (N0.getOpcode() == ISD::SETCC) { + bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (N1C->getZExtValue() != 1); + if (TrueWhenTrue) + return N0; + + // Invert the condition. + ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); + CC = ISD::getSetCCInverse(CC, + N0.getOperand(0).getValueType().isInteger()); + return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC); } - - if (Cond == ISD::SETLE || Cond == ISD::SETULE) { - if (C1 == MaxVal) return DAG.getConstant(1, VT); // X <= MAX --> true - // X <= C0 --> X < (C0+1) - return DAG.getSetCC(dl, VT, N0, - DAG.getConstant(C1+1, N1.getValueType()), - (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT); + + if ((N0.getOpcode() == ISD::XOR || + (N0.getOpcode() == ISD::AND && + N0.getOperand(0).getOpcode() == ISD::XOR && + N0.getOperand(1) == N0.getOperand(0).getOperand(1))) && + isa<ConstantSDNode>(N0.getOperand(1)) && + cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue() == 1) { + // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We + // can only do this if the top bits are known zero. + unsigned BitWidth = N0.getValueSizeInBits(); + if (DAG.MaskedValueIsZero(N0, + APInt::getHighBitsSet(BitWidth, + BitWidth-1))) { + // Okay, get the un-inverted input value. + SDValue Val; + if (N0.getOpcode() == ISD::XOR) + Val = N0.getOperand(0); + else { + assert(N0.getOpcode() == ISD::AND && + N0.getOperand(0).getOpcode() == ISD::XOR); + // ((X^1)&1)^1 -> X & 1 + Val = DAG.getNode(ISD::AND, dl, N0.getValueType(), + N0.getOperand(0).getOperand(0), + N0.getOperand(1)); + } + return DAG.getSetCC(dl, VT, Val, N1, + Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ); + } } + } + + APInt MinVal, MaxVal; + unsigned OperandBitSize = N1C->getValueType(0).getSizeInBits(); + if (ISD::isSignedIntSetCC(Cond)) { + MinVal = APInt::getSignedMinValue(OperandBitSize); + MaxVal = APInt::getSignedMaxValue(OperandBitSize); + } else { + MinVal = APInt::getMinValue(OperandBitSize); + MaxVal = APInt::getMaxValue(OperandBitSize); + } - if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal) - return DAG.getConstant(0, VT); // X < MIN --> false - if ((Cond == ISD::SETGE || Cond == ISD::SETUGE) && C1 == MinVal) - return DAG.getConstant(1, VT); // X >= MIN --> true - if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal) - return DAG.getConstant(0, VT); // X > MAX --> false - if ((Cond == ISD::SETLE || Cond == ISD::SETULE) && C1 == MaxVal) - return DAG.getConstant(1, VT); // X <= MAX --> true - - // Canonicalize setgt X, Min --> setne X, Min - if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MinVal) - return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE); - // Canonicalize setlt X, Max --> setne X, Max - if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MaxVal) - return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE); - - // If we have setult X, 1, turn it into seteq X, 0 - if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal+1) - return DAG.getSetCC(dl, VT, N0, - DAG.getConstant(MinVal, N0.getValueType()), - ISD::SETEQ); - // If we have setugt X, Max-1, turn it into seteq X, Max - else if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal-1) - return DAG.getSetCC(dl, VT, N0, - DAG.getConstant(MaxVal, N0.getValueType()), - ISD::SETEQ); - - // If we have "setcc X, C0", check to see if we can shrink the immediate - // by changing cc. - - // SETUGT X, SINTMAX -> SETLT X, 0 - if (Cond == ISD::SETUGT && - C1 == APInt::getSignedMaxValue(OperandBitSize)) - return DAG.getSetCC(dl, VT, N0, - DAG.getConstant(0, N1.getValueType()), - ISD::SETLT); - - // SETULT X, SINTMIN -> SETGT X, -1 - if (Cond == ISD::SETULT && - C1 == APInt::getSignedMinValue(OperandBitSize)) { - SDValue ConstMinusOne = - DAG.getConstant(APInt::getAllOnesValue(OperandBitSize), - N1.getValueType()); - return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT); - } + // Canonicalize GE/LE comparisons to use GT/LT comparisons. + if (Cond == ISD::SETGE || Cond == ISD::SETUGE) { + if (C1 == MinVal) return DAG.getConstant(1, VT); // X >= MIN --> true + // X >= C0 --> X > (C0-1) + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(C1-1, N1.getValueType()), + (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT); + } - // Fold bit comparisons when we can. - if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && - VT == N0.getValueType() && N0.getOpcode() == ISD::AND) - if (ConstantSDNode *AndRHS = - dyn_cast<ConstantSDNode>(N0.getOperand(1))) { - MVT ShiftTy = DCI.isBeforeLegalize() ? - getPointerTy() : getShiftAmountTy(); - if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3 - // Perform the xform if the AND RHS is a single bit. - if (isPowerOf2_64(AndRHS->getZExtValue())) { - return DAG.getNode(ISD::SRL, dl, VT, N0, - DAG.getConstant(Log2_64(AndRHS->getZExtValue()), - ShiftTy)); - } - } else if (Cond == ISD::SETEQ && C1 == AndRHS->getZExtValue()) { - // (X & 8) == 8 --> (X & 8) >> 3 - // Perform the xform if C1 is a single bit. - if (C1.isPowerOf2()) { - return DAG.getNode(ISD::SRL, dl, VT, N0, - DAG.getConstant(C1.logBase2(), ShiftTy)); - } + if (Cond == ISD::SETLE || Cond == ISD::SETULE) { + if (C1 == MaxVal) return DAG.getConstant(1, VT); // X <= MAX --> true + // X <= C0 --> X < (C0+1) + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(C1+1, N1.getValueType()), + (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT); + } + + if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal) + return DAG.getConstant(0, VT); // X < MIN --> false + if ((Cond == ISD::SETGE || Cond == ISD::SETUGE) && C1 == MinVal) + return DAG.getConstant(1, VT); // X >= MIN --> true + if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal) + return DAG.getConstant(0, VT); // X > MAX --> false + if ((Cond == ISD::SETLE || Cond == ISD::SETULE) && C1 == MaxVal) + return DAG.getConstant(1, VT); // X <= MAX --> true + + // Canonicalize setgt X, Min --> setne X, Min + if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MinVal) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE); + // Canonicalize setlt X, Max --> setne X, Max + if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MaxVal) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE); + + // If we have setult X, 1, turn it into seteq X, 0 + if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal+1) + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(MinVal, N0.getValueType()), + ISD::SETEQ); + // If we have setugt X, Max-1, turn it into seteq X, Max + else if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal-1) + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(MaxVal, N0.getValueType()), + ISD::SETEQ); + + // If we have "setcc X, C0", check to see if we can shrink the immediate + // by changing cc. + + // SETUGT X, SINTMAX -> SETLT X, 0 + if (Cond == ISD::SETUGT && + C1 == APInt::getSignedMaxValue(OperandBitSize)) + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(0, N1.getValueType()), + ISD::SETLT); + + // SETULT X, SINTMIN -> SETGT X, -1 + if (Cond == ISD::SETULT && + C1 == APInt::getSignedMinValue(OperandBitSize)) { + SDValue ConstMinusOne = + DAG.getConstant(APInt::getAllOnesValue(OperandBitSize), + N1.getValueType()); + return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT); + } + + // Fold bit comparisons when we can. + if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && + VT == N0.getValueType() && N0.getOpcode() == ISD::AND) + if (ConstantSDNode *AndRHS = + dyn_cast<ConstantSDNode>(N0.getOperand(1))) { + EVT ShiftTy = DCI.isBeforeLegalize() ? + getPointerTy() : getShiftAmountTy(); + if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3 + // Perform the xform if the AND RHS is a single bit. + if (isPowerOf2_64(AndRHS->getZExtValue())) { + return DAG.getNode(ISD::SRL, dl, VT, N0, + DAG.getConstant(Log2_64(AndRHS->getZExtValue()), + ShiftTy)); + } + } else if (Cond == ISD::SETEQ && C1 == AndRHS->getZExtValue()) { + // (X & 8) == 8 --> (X & 8) >> 3 + // Perform the xform if C1 is a single bit. + if (C1.isPowerOf2()) { + return DAG.getNode(ISD::SRL, dl, VT, N0, + DAG.getConstant(C1.logBase2(), ShiftTy)); } } - } - } else if (isa<ConstantSDNode>(N0.getNode())) { - // Ensure that the constant occurs on the RHS. - return DAG.getSetCC(dl, VT, N1, N0, ISD::getSetCCSwappedOperands(Cond)); + } } if (isa<ConstantFPSDNode>(N0.getNode())) { @@ -1840,7 +1906,7 @@ TargetLowering::SimplifySetCC(MVT VT, SDValue N0, SDValue N1, if (CFP->getValueAPF().isNaN()) { // If an operand is known to be a nan, we can fold it. switch (ISD::getUnorderedFlavor(Cond)) { - default: assert(0 && "Unknown flavor!"); + default: llvm_unreachable("Unknown flavor!"); case 0: // Known false. return DAG.getConstant(0, VT); case 1: // Known true. @@ -1856,6 +1922,43 @@ TargetLowering::SimplifySetCC(MVT VT, SDValue N0, SDValue N1, // materialize 0.0. if (Cond == ISD::SETO || Cond == ISD::SETUO) return DAG.getSetCC(dl, VT, N0, N0, Cond); + + // If the condition is not legal, see if we can find an equivalent one + // which is legal. + if (!isCondCodeLegal(Cond, N0.getValueType())) { + // If the comparison was an awkward floating-point == or != and one of + // the comparison operands is infinity or negative infinity, convert the + // condition to a less-awkward <= or >=. + if (CFP->getValueAPF().isInfinity()) { + if (CFP->getValueAPF().isNegative()) { + if (Cond == ISD::SETOEQ && + isCondCodeLegal(ISD::SETOLE, N0.getValueType())) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLE); + if (Cond == ISD::SETUEQ && + isCondCodeLegal(ISD::SETOLE, N0.getValueType())) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULE); + if (Cond == ISD::SETUNE && + isCondCodeLegal(ISD::SETUGT, N0.getValueType())) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGT); + if (Cond == ISD::SETONE && + isCondCodeLegal(ISD::SETUGT, N0.getValueType())) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGT); + } else { + if (Cond == ISD::SETOEQ && + isCondCodeLegal(ISD::SETOGE, N0.getValueType())) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGE); + if (Cond == ISD::SETUEQ && + isCondCodeLegal(ISD::SETOGE, N0.getValueType())) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGE); + if (Cond == ISD::SETUNE && + isCondCodeLegal(ISD::SETULT, N0.getValueType())) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULT); + if (Cond == ISD::SETONE && + isCondCodeLegal(ISD::SETULT, N0.getValueType())) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLT); + } + } + } } if (N0 == N1) { @@ -2000,7 +2103,7 @@ TargetLowering::SimplifySetCC(MVT VT, SDValue N0, SDValue N1, SDValue Temp; if (N0.getValueType() == MVT::i1 && foldBooleans) { switch (Cond) { - default: assert(0 && "Unknown integer setcc!"); + default: llvm_unreachable("Unknown integer setcc!"); case ISD::SETEQ: // X == Y -> ~(X^Y) Temp = DAG.getNode(ISD::XOR, dl, MVT::i1, N0, N1); N0 = DAG.getNOT(dl, Temp, MVT::i1); @@ -2090,7 +2193,7 @@ bool TargetLowering::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base, const MachineFrameInfo *MFI) const { if (LD->getChain() != Base->getChain()) return false; - MVT VT = LD->getValueType(0); + EVT VT = LD->getValueType(0); if (VT.getSizeInBits() / 8 != Bytes) return false; @@ -2171,7 +2274,7 @@ TargetLowering::getConstraintType(const std::string &Constraint) const { /// LowerXConstraint - try to replace an X constraint, which matches anything, /// with another that has more specific requirements based on the type of the /// corresponding operand. -const char *TargetLowering::LowerXConstraint(MVT ConstraintVT) const{ +const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const{ if (ConstraintVT.isInteger()) return "r"; if (ConstraintVT.isFloatingPoint()) @@ -2244,14 +2347,14 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, std::vector<unsigned> TargetLowering:: getRegClassForInlineAsmConstraint(const std::string &Constraint, - MVT VT) const { + EVT VT) const { return std::vector<unsigned>(); } std::pair<unsigned, const TargetRegisterClass*> TargetLowering:: getRegForInlineAsmConstraint(const std::string &Constraint, - MVT VT) const { + EVT VT) const { if (Constraint[0] != '{') return std::pair<unsigned, const TargetRegisterClass*>(0, 0); assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?"); @@ -2280,7 +2383,7 @@ getRegForInlineAsmConstraint(const std::string &Constraint, for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); I != E; ++I) { - if (StringsEqualNoCase(RegName, RI->get(*I).AsmName)) + if (StringsEqualNoCase(RegName, RI->getName(*I))) return std::make_pair(*I, RC); } } @@ -2310,7 +2413,7 @@ unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const { /// is. static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) { switch (CT) { - default: assert(0 && "Unknown constraint type!"); + default: llvm_unreachable("Unknown constraint type!"); case TargetLowering::C_Other: case TargetLowering::C_Unknown: return 0; @@ -2406,10 +2509,13 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo, // 'X' matches anything. if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) { // Labels and constants are handled elsewhere ('X' is the only thing - // that matches labels). - if (isa<BasicBlock>(OpInfo.CallOperandVal) || - isa<ConstantInt>(OpInfo.CallOperandVal)) + // that matches labels). For Functions, the type here is the type of + // the result, which is not what we want to look at; leave them alone. + Value *v = OpInfo.CallOperandVal; + if (isa<BasicBlock>(v) || isa<ConstantInt>(v) || isa<Function>(v)) { + OpInfo.CallOperandVal = v; return; + } // Otherwise, try to resolve it to something we know about by looking at // the actual operand type. @@ -2464,7 +2570,7 @@ bool TargetLowering::isLegalAddressingMode(const AddrMode &AM, /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG, std::vector<SDNode*>* Created) const { - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); DebugLoc dl= N->getDebugLoc(); // Check to see if we can do this. @@ -2521,7 +2627,7 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG, /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, std::vector<SDNode*>* Created) const { - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); DebugLoc dl = N->getDebugLoc(); // Check to see if we can do this. @@ -2569,45 +2675,3 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, DAG.getConstant(magics.s-1, getShiftAmountTy())); } } - -/// IgnoreHarmlessInstructions - Ignore instructions between a CALL and RET -/// node that don't prevent tail call optimization. -static SDValue IgnoreHarmlessInstructions(SDValue node) { - // Found call return. - if (node.getOpcode() == ISD::CALL) return node; - // Ignore MERGE_VALUES. Will have at least one operand. - if (node.getOpcode() == ISD::MERGE_VALUES) - return IgnoreHarmlessInstructions(node.getOperand(0)); - // Ignore ANY_EXTEND node. - if (node.getOpcode() == ISD::ANY_EXTEND) - return IgnoreHarmlessInstructions(node.getOperand(0)); - if (node.getOpcode() == ISD::TRUNCATE) - return IgnoreHarmlessInstructions(node.getOperand(0)); - // Any other node type. - return node; -} - -bool TargetLowering::CheckTailCallReturnConstraints(CallSDNode *TheCall, - SDValue Ret) { - unsigned NumOps = Ret.getNumOperands(); - // ISD::CALL results:(value0, ..., valuen, chain) - // ISD::RET operands:(chain, value0, flag0, ..., valuen, flagn) - // Value return: - // Check that operand of the RET node sources from the CALL node. The RET node - // has at least two operands. Operand 0 holds the chain. Operand 1 holds the - // value. - // Also we need to check that there is no code in between the call and the - // return. Hence we also check that the incomming chain to the return sources - // from the outgoing chain of the call. - if (NumOps > 1 && - IgnoreHarmlessInstructions(Ret.getOperand(1)) == SDValue(TheCall,0) && - Ret.getOperand(0) == SDValue(TheCall, TheCall->getNumValues()-1)) - return true; - // void return: The RET node has the chain result value of the CALL node as - // input. - if (NumOps == 1 && - Ret.getOperand(0) == SDValue(TheCall, TheCall->getNumValues()-1)) - return true; - - return false; -} diff --git a/lib/CodeGen/ShadowStackGC.cpp b/lib/CodeGen/ShadowStackGC.cpp index 2402f81..25a499b88 100644 --- a/lib/CodeGen/ShadowStackGC.cpp +++ b/lib/CodeGen/ShadowStackGC.cpp @@ -62,9 +62,11 @@ namespace { Constant *GetFrameMap(Function &F); const Type* GetConcreteStackEntryType(Function &F); void CollectRoots(Function &F); - static GetElementPtrInst *CreateGEP(IRBuilder<> &B, Value *BasePtr, + static GetElementPtrInst *CreateGEP(LLVMContext &Context, + IRBuilder<> &B, Value *BasePtr, int Idx1, const char *Name); - static GetElementPtrInst *CreateGEP(IRBuilder<> &B, Value *BasePtr, + static GetElementPtrInst *CreateGEP(LLVMContext &Context, + IRBuilder<> &B, Value *BasePtr, int Idx1, int Idx2, const char *Name); }; @@ -93,7 +95,7 @@ namespace { public: EscapeEnumerator(Function &F, const char *N = "cleanup") - : F(F), CleanupBBName(N), State(0) {} + : F(F), CleanupBBName(N), State(0), Builder(F.getContext()) {} IRBuilder<> *Next() { switch (State) { @@ -136,8 +138,9 @@ namespace { return 0; // Create a cleanup block. - BasicBlock *CleanupBB = BasicBlock::Create(CleanupBBName, &F); - UnwindInst *UI = new UnwindInst(CleanupBB); + BasicBlock *CleanupBB = BasicBlock::Create(F.getContext(), + CleanupBBName, &F); + UnwindInst *UI = new UnwindInst(F.getContext(), CleanupBB); // Transform the 'call' instructions into 'invoke's branching to the // cleanup block. Go in reverse order to make prettier BB names. @@ -186,8 +189,7 @@ ShadowStackGC::ShadowStackGC() : Head(0), StackEntryTy(0) { Constant *ShadowStackGC::GetFrameMap(Function &F) { // doInitialization creates the abstract type of this value. - - Type *VoidPtr = PointerType::getUnqual(Type::Int8Ty); + const Type *VoidPtr = Type::getInt8PtrTy(F.getContext()); // Truncate the ShadowStackDescriptor if some metadata is null. unsigned NumMeta = 0; @@ -200,17 +202,18 @@ Constant *ShadowStackGC::GetFrameMap(Function &F) { } Constant *BaseElts[] = { - ConstantInt::get(Type::Int32Ty, Roots.size(), false), - ConstantInt::get(Type::Int32Ty, NumMeta, false), + ConstantInt::get(Type::getInt32Ty(F.getContext()), Roots.size(), false), + ConstantInt::get(Type::getInt32Ty(F.getContext()), NumMeta, false), }; Constant *DescriptorElts[] = { - ConstantStruct::get(BaseElts, 2), + ConstantStruct::get(F.getContext(), BaseElts, 2, false), ConstantArray::get(ArrayType::get(VoidPtr, NumMeta), Metadata.begin(), NumMeta) }; - Constant *FrameMap = ConstantStruct::get(DescriptorElts, 2); + Constant *FrameMap = ConstantStruct::get(F.getContext(), DescriptorElts, 2, + false); std::string TypeName("gc_map."); TypeName += utostr(NumMeta); @@ -229,13 +232,14 @@ Constant *ShadowStackGC::GetFrameMap(Function &F) { // to be a ModulePass (which means it cannot be in the 'llc' pipeline // (which uses a FunctionPassManager (which segfaults (not asserts) if // provided a ModulePass))). - Constant *GV = new GlobalVariable(FrameMap->getType(), true, + Constant *GV = new GlobalVariable(*F.getParent(), FrameMap->getType(), true, GlobalVariable::InternalLinkage, - FrameMap, "__gc_" + F.getName(), - F.getParent()); + FrameMap, "__gc_" + F.getName()); - Constant *GEPIndices[2] = { ConstantInt::get(Type::Int32Ty, 0), - ConstantInt::get(Type::Int32Ty, 0) }; + Constant *GEPIndices[2] = { + ConstantInt::get(Type::getInt32Ty(F.getContext()), 0), + ConstantInt::get(Type::getInt32Ty(F.getContext()), 0) + }; return ConstantExpr::getGetElementPtr(GV, GEPIndices, 2); } @@ -245,7 +249,7 @@ const Type* ShadowStackGC::GetConcreteStackEntryType(Function &F) { EltTys.push_back(StackEntryTy); for (size_t I = 0; I != Roots.size(); I++) EltTys.push_back(Roots[I].second->getAllocatedType()); - Type *Ty = StructType::get(EltTys); + Type *Ty = StructType::get(F.getContext(), EltTys); std::string TypeName("gc_stackentry."); TypeName += F.getName(); @@ -263,9 +267,11 @@ bool ShadowStackGC::initializeCustomLowering(Module &M) { // void *Meta[]; // May be absent for roots without metadata. // }; std::vector<const Type*> EltTys; - EltTys.push_back(Type::Int32Ty); // 32 bits is ok up to a 32GB stack frame. :) - EltTys.push_back(Type::Int32Ty); // Specifies length of variable length array. - StructType *FrameMapTy = StructType::get(EltTys); + // 32 bits is ok up to a 32GB stack frame. :) + EltTys.push_back(Type::getInt32Ty(M.getContext())); + // Specifies length of variable length array. + EltTys.push_back(Type::getInt32Ty(M.getContext())); + StructType *FrameMapTy = StructType::get(M.getContext(), EltTys); M.addTypeName("gc_map", FrameMapTy); PointerType *FrameMapPtrTy = PointerType::getUnqual(FrameMapTy); @@ -274,12 +280,12 @@ bool ShadowStackGC::initializeCustomLowering(Module &M) { // FrameMap *Map; // Pointer to constant FrameMap. // void *Roots[]; // Stack roots (in-place array, so we pretend). // }; - OpaqueType *RecursiveTy = OpaqueType::get(); + OpaqueType *RecursiveTy = OpaqueType::get(M.getContext()); EltTys.clear(); EltTys.push_back(PointerType::getUnqual(RecursiveTy)); EltTys.push_back(FrameMapPtrTy); - PATypeHolder LinkTyH = StructType::get(EltTys); + PATypeHolder LinkTyH = StructType::get(M.getContext(), EltTys); RecursiveTy->refineAbstractTypeTo(LinkTyH.get()); StackEntryTy = cast<StructType>(LinkTyH.get()); @@ -292,10 +298,10 @@ bool ShadowStackGC::initializeCustomLowering(Module &M) { if (!Head) { // If the root chain does not exist, insert a new one with linkonce // linkage! - Head = new GlobalVariable(StackEntryPtrTy, false, + Head = new GlobalVariable(M, StackEntryPtrTy, false, GlobalValue::LinkOnceAnyLinkage, Constant::getNullValue(StackEntryPtrTy), - "llvm_gc_root_chain", &M); + "llvm_gc_root_chain"); } else if (Head->hasExternalLinkage() && Head->isDeclaration()) { Head->setInitializer(Constant::getNullValue(StackEntryPtrTy)); Head->setLinkage(GlobalValue::LinkOnceAnyLinkage); @@ -338,11 +344,11 @@ void ShadowStackGC::CollectRoots(Function &F) { } GetElementPtrInst * -ShadowStackGC::CreateGEP(IRBuilder<> &B, Value *BasePtr, +ShadowStackGC::CreateGEP(LLVMContext &Context, IRBuilder<> &B, Value *BasePtr, int Idx, int Idx2, const char *Name) { - Value *Indices[] = { ConstantInt::get(Type::Int32Ty, 0), - ConstantInt::get(Type::Int32Ty, Idx), - ConstantInt::get(Type::Int32Ty, Idx2) }; + Value *Indices[] = { ConstantInt::get(Type::getInt32Ty(Context), 0), + ConstantInt::get(Type::getInt32Ty(Context), Idx), + ConstantInt::get(Type::getInt32Ty(Context), Idx2) }; Value* Val = B.CreateGEP(BasePtr, Indices, Indices + 3, Name); assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant"); @@ -351,10 +357,10 @@ ShadowStackGC::CreateGEP(IRBuilder<> &B, Value *BasePtr, } GetElementPtrInst * -ShadowStackGC::CreateGEP(IRBuilder<> &B, Value *BasePtr, +ShadowStackGC::CreateGEP(LLVMContext &Context, IRBuilder<> &B, Value *BasePtr, int Idx, const char *Name) { - Value *Indices[] = { ConstantInt::get(Type::Int32Ty, 0), - ConstantInt::get(Type::Int32Ty, Idx) }; + Value *Indices[] = { ConstantInt::get(Type::getInt32Ty(Context), 0), + ConstantInt::get(Type::getInt32Ty(Context), Idx) }; Value *Val = B.CreateGEP(BasePtr, Indices, Indices + 2, Name); assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant"); @@ -364,6 +370,8 @@ ShadowStackGC::CreateGEP(IRBuilder<> &B, Value *BasePtr, /// runOnFunction - Insert code to maintain the shadow stack. bool ShadowStackGC::performCustomLowering(Function &F) { + LLVMContext &Context = F.getContext(); + // Find calls to llvm.gcroot. CollectRoots(F); @@ -388,13 +396,14 @@ bool ShadowStackGC::performCustomLowering(Function &F) { // Initialize the map pointer and load the current head of the shadow stack. Instruction *CurrentHead = AtEntry.CreateLoad(Head, "gc_currhead"); - Instruction *EntryMapPtr = CreateGEP(AtEntry, StackEntry,0,1,"gc_frame.map"); + Instruction *EntryMapPtr = CreateGEP(Context, AtEntry, StackEntry, + 0,1,"gc_frame.map"); AtEntry.CreateStore(FrameMap, EntryMapPtr); // After all the allocas... for (unsigned I = 0, E = Roots.size(); I != E; ++I) { // For each root, find the corresponding slot in the aggregate... - Value *SlotPtr = CreateGEP(AtEntry, StackEntry, 1 + I, "gc_root"); + Value *SlotPtr = CreateGEP(Context, AtEntry, StackEntry, 1 + I, "gc_root"); // And use it in lieu of the alloca. AllocaInst *OriginalAlloca = Roots[I].second; @@ -410,17 +419,19 @@ bool ShadowStackGC::performCustomLowering(Function &F) { AtEntry.SetInsertPoint(IP->getParent(), IP); // Push the entry onto the shadow stack. - Instruction *EntryNextPtr = CreateGEP(AtEntry,StackEntry,0,0,"gc_frame.next"); - Instruction *NewHeadVal = CreateGEP(AtEntry,StackEntry, 0, "gc_newhead"); - AtEntry.CreateStore(CurrentHead, EntryNextPtr); - AtEntry.CreateStore(NewHeadVal, Head); + Instruction *EntryNextPtr = CreateGEP(Context, AtEntry, + StackEntry,0,0,"gc_frame.next"); + Instruction *NewHeadVal = CreateGEP(Context, AtEntry, + StackEntry, 0, "gc_newhead"); + AtEntry.CreateStore(CurrentHead, EntryNextPtr); + AtEntry.CreateStore(NewHeadVal, Head); // For each instruction that escapes... EscapeEnumerator EE(F, "gc_cleanup"); while (IRBuilder<> *AtExit = EE.Next()) { // Pop the entry from the shadow stack. Don't reuse CurrentHead from // AtEntry, since that would make the value live for the entire function. - Instruction *EntryNextPtr2 = CreateGEP(*AtExit, StackEntry, 0, 0, + Instruction *EntryNextPtr2 = CreateGEP(Context, *AtExit, StackEntry, 0, 0, "gc_frame.next"); Value *SavedHead = AtExit->CreateLoad(EntryNextPtr2, "gc_savedhead"); AtExit->CreateStore(SavedHead, Head); diff --git a/lib/CodeGen/ShrinkWrapping.cpp b/lib/CodeGen/ShrinkWrapping.cpp index e44a138..8070570 100644 --- a/lib/CodeGen/ShrinkWrapping.cpp +++ b/lib/CodeGen/ShrinkWrapping.cpp @@ -158,7 +158,7 @@ void PEI::initShrinkWrappingInfo() { // via --shrink-wrap-func=<funcname>. #ifndef NDEBUG if (ShrinkWrapFunc != "") { - std::string MFName = MF->getFunction()->getName(); + std::string MFName = MF->getFunction()->getNameStr(); ShrinkWrapThisFunction = (MFName == ShrinkWrapFunc); } #endif @@ -185,8 +185,8 @@ void PEI::placeCSRSpillsAndRestores(MachineFunction &Fn) { initShrinkWrappingInfo(); DEBUG(if (ShrinkWrapThisFunction) { - DOUT << "Place CSR spills/restores for " - << MF->getFunction()->getName() << "\n"; + errs() << "Place CSR spills/restores for " + << MF->getFunction()->getName() << "\n"; }); if (calculateSets(Fn)) @@ -297,20 +297,26 @@ void PEI::calculateAnticAvail(MachineFunction &Fn) { } } - DEBUG(if (ShrinkWrapDebugging >= Details) { - DOUT << "-----------------------------------------------------------\n"; - DOUT << " Antic/Avail Sets:\n"; - DOUT << "-----------------------------------------------------------\n"; - DOUT << "iterations = " << iterations << "\n"; - DOUT << "-----------------------------------------------------------\n"; - DOUT << "MBB | USED | ANTIC_IN | ANTIC_OUT | AVAIL_IN | AVAIL_OUT\n"; - DOUT << "-----------------------------------------------------------\n"; - for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); - MBBI != MBBE; ++MBBI) { - MachineBasicBlock* MBB = MBBI; - dumpSets(MBB); + DEBUG({ + if (ShrinkWrapDebugging >= Details) { + errs() + << "-----------------------------------------------------------\n" + << " Antic/Avail Sets:\n" + << "-----------------------------------------------------------\n" + << "iterations = " << iterations << "\n" + << "-----------------------------------------------------------\n" + << "MBB | USED | ANTIC_IN | ANTIC_OUT | AVAIL_IN | AVAIL_OUT\n" + << "-----------------------------------------------------------\n"; + + for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); + MBBI != MBBE; ++MBBI) { + MachineBasicBlock* MBB = MBBI; + dumpSets(MBB); + } + + errs() + << "-----------------------------------------------------------\n"; } - DOUT << "-----------------------------------------------------------\n"; }); } @@ -357,8 +363,8 @@ bool PEI::calculateSets(MachineFunction &Fn) { // If no CSRs used, we are done. if (CSI.empty()) { DEBUG(if (ShrinkWrapThisFunction) - DOUT << "DISABLED: " << Fn.getFunction()->getName() - << ": uses no callee-saved registers\n"); + errs() << "DISABLED: " << Fn.getFunction()->getName() + << ": uses no callee-saved registers\n"); return false; } @@ -377,8 +383,8 @@ bool PEI::calculateSets(MachineFunction &Fn) { // implementation to functions with <= 500 MBBs. if (Fn.size() > 500) { DEBUG(if (ShrinkWrapThisFunction) - DOUT << "DISABLED: " << Fn.getFunction()->getName() - << ": too large (" << Fn.size() << " MBBs)\n"); + errs() << "DISABLED: " << Fn.getFunction()->getName() + << ": too large (" << Fn.size() << " MBBs)\n"); ShrinkWrapThisFunction = false; } @@ -459,8 +465,8 @@ bool PEI::calculateSets(MachineFunction &Fn) { } if (allCSRUsesInEntryBlock) { - DEBUG(DOUT << "DISABLED: " << Fn.getFunction()->getName() - << ": all CSRs used in EntryBlock\n"); + DEBUG(errs() << "DISABLED: " << Fn.getFunction()->getName() + << ": all CSRs used in EntryBlock\n"); ShrinkWrapThisFunction = false; } else { bool allCSRsUsedInEntryFanout = true; @@ -471,8 +477,8 @@ bool PEI::calculateSets(MachineFunction &Fn) { allCSRsUsedInEntryFanout = false; } if (allCSRsUsedInEntryFanout) { - DEBUG(DOUT << "DISABLED: " << Fn.getFunction()->getName() - << ": all CSRs used in imm successors of EntryBlock\n"); + DEBUG(errs() << "DISABLED: " << Fn.getFunction()->getName() + << ": all CSRs used in imm successors of EntryBlock\n"); ShrinkWrapThisFunction = false; } } @@ -498,9 +504,9 @@ bool PEI::calculateSets(MachineFunction &Fn) { if (dominatesExitNodes) { CSRUsedInChokePoints |= CSRUsed[MBB]; if (CSRUsedInChokePoints == UsedCSRegs) { - DEBUG(DOUT << "DISABLED: " << Fn.getFunction()->getName() - << ": all CSRs used in choke point(s) at " - << getBasicBlockName(MBB) << "\n"); + DEBUG(errs() << "DISABLED: " << Fn.getFunction()->getName() + << ": all CSRs used in choke point(s) at " + << getBasicBlockName(MBB) << "\n"); ShrinkWrapThisFunction = false; break; } @@ -514,16 +520,16 @@ bool PEI::calculateSets(MachineFunction &Fn) { return false; DEBUG({ - DOUT << "ENABLED: " << Fn.getFunction()->getName(); + errs() << "ENABLED: " << Fn.getFunction()->getName(); if (HasFastExitPath) - DOUT << " (fast exit path)"; - DOUT << "\n"; + errs() << " (fast exit path)"; + errs() << "\n"; if (ShrinkWrapDebugging >= BasicInfo) { - DOUT << "------------------------------" + errs() << "------------------------------" << "-----------------------------\n"; - DOUT << "UsedCSRegs = " << stringifyCSRegSet(UsedCSRegs) << "\n"; + errs() << "UsedCSRegs = " << stringifyCSRegSet(UsedCSRegs) << "\n"; if (ShrinkWrapDebugging >= Details) { - DOUT << "------------------------------" + errs() << "------------------------------" << "-----------------------------\n"; dumpAllUsed(); } @@ -596,7 +602,7 @@ bool PEI::addUsesForMEMERegion(MachineBasicBlock* MBB, addedUses = true; blks.push_back(SUCC); DEBUG(if (ShrinkWrapDebugging >= Iterations) - DOUT << getBasicBlockName(MBB) + errs() << getBasicBlockName(MBB) << "(" << stringifyCSRegSet(prop) << ")->" << "successor " << getBasicBlockName(SUCC) << "\n"); } @@ -612,7 +618,7 @@ bool PEI::addUsesForMEMERegion(MachineBasicBlock* MBB, addedUses = true; blks.push_back(PRED); DEBUG(if (ShrinkWrapDebugging >= Iterations) - DOUT << getBasicBlockName(MBB) + errs() << getBasicBlockName(MBB) << "(" << stringifyCSRegSet(prop) << ")->" << "predecessor " << getBasicBlockName(PRED) << "\n"); } @@ -650,7 +656,7 @@ bool PEI::addUsesForTopLevelLoops(SmallVector<MachineBasicBlock*, 4>& blks) { CSRUsed[EXB] |= loopSpills; addedUses = true; DEBUG(if (ShrinkWrapDebugging >= Iterations) - DOUT << "LOOP " << getBasicBlockName(MBB) + errs() << "LOOP " << getBasicBlockName(MBB) << "(" << stringifyCSRegSet(loopSpills) << ")->" << getBasicBlockName(EXB) << "\n"); if (EXB->succ_size() > 1 || EXB->pred_size() > 1) @@ -717,7 +723,7 @@ bool PEI::calcSpillPlacements(MachineBasicBlock* MBB, blks.push_back(MBB); DEBUG(if (! CSRSave[MBB].empty() && ShrinkWrapDebugging >= Iterations) - DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = " + errs() << "SAVE[" << getBasicBlockName(MBB) << "] = " << stringifyCSRegSet(CSRSave[MBB]) << "\n"); return placedSpills; @@ -778,7 +784,7 @@ bool PEI::calcRestorePlacements(MachineBasicBlock* MBB, blks.push_back(MBB); DEBUG(if (! CSRRestore[MBB].empty() && ShrinkWrapDebugging >= Iterations) - DOUT << "RESTORE[" << getBasicBlockName(MBB) << "] = " + errs() << "RESTORE[" << getBasicBlockName(MBB) << "] = " << stringifyCSRegSet(CSRRestore[MBB]) << "\n"); return placedRestores; @@ -802,7 +808,7 @@ void PEI::placeSpillsAndRestores(MachineFunction &Fn) { ++iterations; DEBUG(if (ShrinkWrapDebugging >= Iterations) - DOUT << "iter " << iterations + errs() << "iter " << iterations << " --------------------------------------------------\n"); // Calculate CSR{Save,Restore} sets using Antic, Avail on the MCFG, @@ -852,15 +858,15 @@ void PEI::placeSpillsAndRestores(MachineFunction &Fn) { unsigned numSRReducedThisFunc = notSpilledInEntryBlock.count(); numSRReduced += numSRReducedThisFunc; DEBUG(if (ShrinkWrapDebugging >= BasicInfo) { - DOUT << "-----------------------------------------------------------\n"; - DOUT << "total iterations = " << iterations << " ( " + errs() << "-----------------------------------------------------------\n"; + errs() << "total iterations = " << iterations << " ( " << Fn.getFunction()->getName() << " " << numSRReducedThisFunc << " " << Fn.size() << " )\n"; - DOUT << "-----------------------------------------------------------\n"; + errs() << "-----------------------------------------------------------\n"; dumpSRSets(); - DOUT << "-----------------------------------------------------------\n"; + errs() << "-----------------------------------------------------------\n"; if (numSRReducedThisFunc) verifySpillRestorePlacement(); }); @@ -893,7 +899,7 @@ void PEI::findFastExitPath() { // Check the immediate successors. if (isReturnBlock(SUCC)) { if (ShrinkWrapDebugging >= BasicInfo) - DOUT << "Fast exit path: " << getBasicBlockName(EntryBlock) + errs() << "Fast exit path: " << getBasicBlockName(EntryBlock) << "->" << getBasicBlockName(SUCC) << "\n"; break; } @@ -911,7 +917,7 @@ void PEI::findFastExitPath() { } if (HasFastExitPath) { if (ShrinkWrapDebugging >= BasicInfo) - DOUT << "Fast exit path: " << getBasicBlockName(EntryBlock) + errs() << "Fast exit path: " << getBasicBlockName(EntryBlock) << "->" << exitPath << "\n"; break; } @@ -945,10 +951,10 @@ void PEI::verifySpillRestorePlacement() { if (spilled.empty()) continue; - DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = " - << stringifyCSRegSet(spilled) - << " RESTORE[" << getBasicBlockName(MBB) << "] = " - << stringifyCSRegSet(CSRRestore[MBB]) << "\n"; + DEBUG(errs() << "SAVE[" << getBasicBlockName(MBB) << "] = " + << stringifyCSRegSet(spilled) + << " RESTORE[" << getBasicBlockName(MBB) << "] = " + << stringifyCSRegSet(CSRRestore[MBB]) << "\n"); if (CSRRestore[MBB].intersects(spilled)) { restored |= (CSRRestore[MBB] & spilled); @@ -977,11 +983,11 @@ void PEI::verifySpillRestorePlacement() { if (isReturnBlock(SBB) || SBB->succ_size() == 0) { if (restored != spilled) { CSRegSet notRestored = (spilled - restored); - DOUT << MF->getFunction()->getName() << ": " - << stringifyCSRegSet(notRestored) - << " spilled at " << getBasicBlockName(MBB) - << " are never restored on path to return " - << getBasicBlockName(SBB) << "\n"; + DEBUG(errs() << MF->getFunction()->getName() << ": " + << stringifyCSRegSet(notRestored) + << " spilled at " << getBasicBlockName(MBB) + << " are never restored on path to return " + << getBasicBlockName(SBB) << "\n"); } restored.clear(); } @@ -998,10 +1004,10 @@ void PEI::verifySpillRestorePlacement() { if (restored.empty()) continue; - DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = " - << stringifyCSRegSet(CSRSave[MBB]) - << " RESTORE[" << getBasicBlockName(MBB) << "] = " - << stringifyCSRegSet(restored) << "\n"; + DEBUG(errs() << "SAVE[" << getBasicBlockName(MBB) << "] = " + << stringifyCSRegSet(CSRSave[MBB]) + << " RESTORE[" << getBasicBlockName(MBB) << "] = " + << stringifyCSRegSet(restored) << "\n"); if (CSRSave[MBB].intersects(restored)) { spilled |= (CSRSave[MBB] & restored); @@ -1025,23 +1031,24 @@ void PEI::verifySpillRestorePlacement() { } if (spilled != restored) { CSRegSet notSpilled = (restored - spilled); - DOUT << MF->getFunction()->getName() << ": " - << stringifyCSRegSet(notSpilled) - << " restored at " << getBasicBlockName(MBB) - << " are never spilled\n"; + DEBUG(errs() << MF->getFunction()->getName() << ": " + << stringifyCSRegSet(notSpilled) + << " restored at " << getBasicBlockName(MBB) + << " are never spilled\n"); } } } // Debugging print methods. std::string PEI::getBasicBlockName(const MachineBasicBlock* MBB) { + if (!MBB) + return ""; + + if (MBB->getBasicBlock()) + return MBB->getBasicBlock()->getNameStr(); + std::ostringstream name; - if (MBB) { - if (MBB->getBasicBlock()) - name << MBB->getBasicBlock()->getName(); - else - name << "_MBB_" << MBB->getNumber(); - } + name << "_MBB_" << MBB->getNumber(); return name.str(); } @@ -1071,14 +1078,15 @@ std::string PEI::stringifyCSRegSet(const CSRegSet& s) { } void PEI::dumpSet(const CSRegSet& s) { - DOUT << stringifyCSRegSet(s) << "\n"; + DEBUG(errs() << stringifyCSRegSet(s) << "\n"); } void PEI::dumpUsed(MachineBasicBlock* MBB) { - if (MBB) { - DOUT << "CSRUsed[" << getBasicBlockName(MBB) << "] = " - << stringifyCSRegSet(CSRUsed[MBB]) << "\n"; - } + DEBUG({ + if (MBB) + errs() << "CSRUsed[" << getBasicBlockName(MBB) << "] = " + << stringifyCSRegSet(CSRUsed[MBB]) << "\n"; + }); } void PEI::dumpAllUsed() { @@ -1090,27 +1098,29 @@ void PEI::dumpAllUsed() { } void PEI::dumpSets(MachineBasicBlock* MBB) { - if (MBB) { - DOUT << getBasicBlockName(MBB) << " | " - << stringifyCSRegSet(CSRUsed[MBB]) << " | " - << stringifyCSRegSet(AnticIn[MBB]) << " | " - << stringifyCSRegSet(AnticOut[MBB]) << " | " - << stringifyCSRegSet(AvailIn[MBB]) << " | " - << stringifyCSRegSet(AvailOut[MBB]) << "\n"; - } + DEBUG({ + if (MBB) + errs() << getBasicBlockName(MBB) << " | " + << stringifyCSRegSet(CSRUsed[MBB]) << " | " + << stringifyCSRegSet(AnticIn[MBB]) << " | " + << stringifyCSRegSet(AnticOut[MBB]) << " | " + << stringifyCSRegSet(AvailIn[MBB]) << " | " + << stringifyCSRegSet(AvailOut[MBB]) << "\n"; + }); } void PEI::dumpSets1(MachineBasicBlock* MBB) { - if (MBB) { - DOUT << getBasicBlockName(MBB) << " | " - << stringifyCSRegSet(CSRUsed[MBB]) << " | " - << stringifyCSRegSet(AnticIn[MBB]) << " | " - << stringifyCSRegSet(AnticOut[MBB]) << " | " - << stringifyCSRegSet(AvailIn[MBB]) << " | " - << stringifyCSRegSet(AvailOut[MBB]) << " | " - << stringifyCSRegSet(CSRSave[MBB]) << " | " - << stringifyCSRegSet(CSRRestore[MBB]) << "\n"; - } + DEBUG({ + if (MBB) + errs() << getBasicBlockName(MBB) << " | " + << stringifyCSRegSet(CSRUsed[MBB]) << " | " + << stringifyCSRegSet(AnticIn[MBB]) << " | " + << stringifyCSRegSet(AnticOut[MBB]) << " | " + << stringifyCSRegSet(AvailIn[MBB]) << " | " + << stringifyCSRegSet(AvailOut[MBB]) << " | " + << stringifyCSRegSet(CSRSave[MBB]) << " | " + << stringifyCSRegSet(CSRRestore[MBB]) << "\n"; + }); } void PEI::dumpAllSets() { @@ -1122,20 +1132,21 @@ void PEI::dumpAllSets() { } void PEI::dumpSRSets() { - for (MachineFunction::iterator MBB = MF->begin(), E = MF->end(); - MBB != E; ++MBB) { - if (! CSRSave[MBB].empty()) { - DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = " - << stringifyCSRegSet(CSRSave[MBB]); - if (CSRRestore[MBB].empty()) - DOUT << "\n"; - } - if (! CSRRestore[MBB].empty()) { - if (! CSRSave[MBB].empty()) - DOUT << " "; - DOUT << "RESTORE[" << getBasicBlockName(MBB) << "] = " - << stringifyCSRegSet(CSRRestore[MBB]) << "\n"; - } - } + DEBUG({ + for (MachineFunction::iterator MBB = MF->begin(), E = MF->end(); + MBB != E; ++MBB) { + if (!CSRSave[MBB].empty()) { + errs() << "SAVE[" << getBasicBlockName(MBB) << "] = " + << stringifyCSRegSet(CSRSave[MBB]); + if (CSRRestore[MBB].empty()) + errs() << '\n'; + } + + if (!CSRRestore[MBB].empty() && !CSRSave[MBB].empty()) + errs() << " " + << "RESTORE[" << getBasicBlockName(MBB) << "] = " + << stringifyCSRegSet(CSRRestore[MBB]) << "\n"; + } + }); } #endif diff --git a/lib/CodeGen/SimpleHazardRecognizer.h b/lib/CodeGen/SimpleHazardRecognizer.h new file mode 100644 index 0000000..f69feaf --- /dev/null +++ b/lib/CodeGen/SimpleHazardRecognizer.h @@ -0,0 +1,89 @@ +//=- llvm/CodeGen/SimpleHazardRecognizer.h - Scheduling Support -*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the SimpleHazardRecognizer class, which +// implements hazard-avoidance heuristics for scheduling, based on the +// scheduling itineraries specified for the target. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_SIMPLEHAZARDRECOGNIZER_H +#define LLVM_CODEGEN_SIMPLEHAZARDRECOGNIZER_H + +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" + +namespace llvm { + /// SimpleHazardRecognizer - A *very* simple hazard recognizer. It uses + /// a coarse classification and attempts to avoid that instructions of + /// a given class aren't grouped too densely together. + class SimpleHazardRecognizer : public ScheduleHazardRecognizer { + /// Class - A simple classification for SUnits. + enum Class { + Other, Load, Store + }; + + /// Window - The Class values of the most recently issued + /// instructions. + Class Window[8]; + + /// getClass - Classify the given SUnit. + Class getClass(const SUnit *SU) { + const MachineInstr *MI = SU->getInstr(); + const TargetInstrDesc &TID = MI->getDesc(); + if (TID.mayLoad()) + return Load; + if (TID.mayStore()) + return Store; + return Other; + } + + /// Step - Rotate the existing entries in Window and insert the + /// given class value in position as the most recent. + void Step(Class C) { + std::copy(Window+1, array_endof(Window), Window); + Window[array_lengthof(Window)-1] = C; + } + + public: + SimpleHazardRecognizer() : Window() { + Reset(); + } + + virtual HazardType getHazardType(SUnit *SU) { + Class C = getClass(SU); + if (C == Other) + return NoHazard; + unsigned Score = 0; + for (unsigned i = 0; i != array_lengthof(Window); ++i) + if (Window[i] == C) + Score += i + 1; + if (Score > array_lengthof(Window) * 2) + return Hazard; + return NoHazard; + } + + virtual void Reset() { + for (unsigned i = 0; i != array_lengthof(Window); ++i) + Window[i] = Other; + } + + virtual void EmitInstruction(SUnit *SU) { + Step(getClass(SU)); + } + + virtual void AdvanceCycle() { + Step(Other); + } + }; +} + +#endif diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp index 7e7d6b8..9c283b0 100644 --- a/lib/CodeGen/SimpleRegisterCoalescing.cpp +++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp @@ -17,6 +17,7 @@ #include "VirtRegMap.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/Value.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineLoopInfo.h" @@ -28,6 +29,8 @@ #include "llvm/Target/TargetOptions.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" @@ -51,13 +54,8 @@ EnableJoining("join-liveintervals", cl::init(true)); static cl::opt<bool> -NewHeuristic("new-coalescer-heuristic", - cl::desc("Use new coalescer heuristic"), - cl::init(false), cl::Hidden); - -static cl::opt<bool> -CrossClassJoin("join-cross-class-copies", - cl::desc("Coalesce cross register class copies"), +DisableCrossClassJoin("disable-cross-class-join", + cl::desc("Avoid coalescing cross register class copies"), cl::init(false), cl::Hidden); static cl::opt<bool> @@ -65,7 +63,7 @@ PhysJoinTweak("tweak-phys-join-heuristics", cl::desc("Tweak heuristics for joining phys reg with vr"), cl::init(false), cl::Hidden); -static RegisterPass<SimpleRegisterCoalescing> +static RegisterPass<SimpleRegisterCoalescing> X("simple-register-coalescing", "Simple Register Coalescing"); // Declare that we implement the RegisterCoalescer interface @@ -74,6 +72,8 @@ static RegisterAnalysisGroup<RegisterCoalescer, true/*The Default*/> V(X); const PassInfo *const llvm::SimpleRegisterCoalescingID = &X; void SimpleRegisterCoalescing::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired<AliasAnalysis>(); AU.addRequired<LiveIntervals>(); AU.addPreserved<LiveIntervals>(); AU.addRequired<MachineLoopInfo>(); @@ -105,22 +105,23 @@ void SimpleRegisterCoalescing::getAnalysisUsage(AnalysisUsage &AU) const { bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA, LiveInterval &IntB, MachineInstr *CopyMI) { - unsigned CopyIdx = li_->getDefIndex(li_->getInstructionIndex(CopyMI)); + LiveIndex CopyIdx = li_->getDefIndex(li_->getInstructionIndex(CopyMI)); // BValNo is a value number in B that is defined by a copy from A. 'B3' in // the example above. LiveInterval::iterator BLR = IntB.FindLiveRangeContaining(CopyIdx); assert(BLR != IntB.end() && "Live range not found!"); VNInfo *BValNo = BLR->valno; - + // Get the location that B is defined at. Two options: either this value has - // an unknown definition point or it is defined at CopyIdx. If unknown, we + // an unknown definition point or it is defined at CopyIdx. If unknown, we // can't process it. - if (!BValNo->copy) return false; + if (!BValNo->getCopy()) return false; assert(BValNo->def == CopyIdx && "Copy doesn't define the value?"); - + // AValNo is the value number in A that defines the copy, A3 in the example. - LiveInterval::iterator ALR = IntA.FindLiveRangeContaining(CopyIdx-1); + LiveIndex CopyUseIdx = li_->getUseIndex(CopyIdx); + LiveInterval::iterator ALR = IntA.FindLiveRangeContaining(CopyUseIdx); assert(ALR != IntA.end() && "Live range not found!"); VNInfo *AValNo = ALR->valno; // If it's re-defined by an early clobber somewhere in the live range, then @@ -143,26 +144,28 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA, // The coalescer has no idea there was a def in the middle of [174,230]. if (AValNo->hasRedefByEC()) return false; - - // If AValNo is defined as a copy from IntB, we can potentially process this. + + // If AValNo is defined as a copy from IntB, we can potentially process this. // Get the instruction that defines this value number. unsigned SrcReg = li_->getVNInfoSourceReg(AValNo); if (!SrcReg) return false; // Not defined by a copy. - + // If the value number is not defined by a copy instruction, ignore it. // If the source register comes from an interval other than IntB, we can't // handle this. if (SrcReg != IntB.reg) return false; - + // Get the LiveRange in IntB that this value number starts with. - LiveInterval::iterator ValLR = IntB.FindLiveRangeContaining(AValNo->def-1); + LiveInterval::iterator ValLR = + IntB.FindLiveRangeContaining(li_->getPrevSlot(AValNo->def)); assert(ValLR != IntB.end() && "Live range not found!"); - + // Make sure that the end of the live range is inside the same block as // CopyMI. - MachineInstr *ValLREndInst = li_->getInstructionFromIndex(ValLR->end-1); - if (!ValLREndInst || + MachineInstr *ValLREndInst = + li_->getInstructionFromIndex(li_->getPrevSlot(ValLR->end)); + if (!ValLREndInst || ValLREndInst->getParent() != CopyMI->getParent()) return false; // Okay, we now know that ValLR ends in the same block that the CopyMI @@ -177,28 +180,33 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA, *tri_->getSubRegisters(IntB.reg)) { for (const unsigned* SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) if (li_->hasInterval(*SR) && IntA.overlaps(li_->getInterval(*SR))) { - DOUT << "Interfere with sub-register "; - DEBUG(li_->getInterval(*SR).print(DOUT, tri_)); + DEBUG({ + errs() << "Interfere with sub-register "; + li_->getInterval(*SR).print(errs(), tri_); + }); return false; } } - - DOUT << "\nExtending: "; IntB.print(DOUT, tri_); - - unsigned FillerStart = ValLR->end, FillerEnd = BLR->start; + + DEBUG({ + errs() << "\nExtending: "; + IntB.print(errs(), tri_); + }); + + LiveIndex FillerStart = ValLR->end, FillerEnd = BLR->start; // We are about to delete CopyMI, so need to remove it as the 'instruction // that defines this value #'. Update the the valnum with the new defining // instruction #. BValNo->def = FillerStart; - BValNo->copy = NULL; - + BValNo->setCopy(0); + // Okay, we can merge them. We need to insert a new liverange: // [ValLR.end, BLR.begin) of either value number, then we merge the // two value numbers. IntB.addRange(LiveRange(FillerStart, FillerEnd, BValNo)); // If the IntB live range is assigned to a physical register, and if that - // physreg has sub-registers, update their live intervals as well. + // physreg has sub-registers, update their live intervals as well. if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) { for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) { LiveInterval &SRLI = li_->getInterval(*SR); @@ -213,17 +221,26 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA, IntB.addKills(ValLR->valno, BValNo->kills); IntB.MergeValueNumberInto(BValNo, ValLR->valno); } - DOUT << " result = "; IntB.print(DOUT, tri_); - DOUT << "\n"; + DEBUG({ + errs() << " result = "; + IntB.print(errs(), tri_); + errs() << "\n"; + }); // If the source instruction was killing the source register before the // merge, unset the isKill marker given the live range has been extended. int UIdx = ValLREndInst->findRegisterUseOperandIdx(IntB.reg, true); if (UIdx != -1) { ValLREndInst->getOperand(UIdx).setIsKill(false); - IntB.removeKill(ValLR->valno, FillerStart); + ValLR->valno->removeKill(FillerStart); } + // If the copy instruction was killing the destination register before the + // merge, find the last use and trim the live range. That will also add the + // isKill marker. + if (CopyMI->killsRegister(IntA.reg)) + TrimLiveIntervalToLastUse(CopyUseIdx, CopyMI->getParent(), IntA, ALR); + ++numExtends; return true; } @@ -253,6 +270,16 @@ bool SimpleRegisterCoalescing::HasOtherReachingDefs(LiveInterval &IntA, return false; } +static void +TransferImplicitOps(MachineInstr *MI, MachineInstr *NewMI) { + for (unsigned i = MI->getDesc().getNumOperands(), e = MI->getNumOperands(); + i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isImplicit()) + NewMI->addOperand(MO); + } +} + /// RemoveCopyByCommutingDef - We found a non-trivially-coalescable copy with IntA /// being the source and IntB being the dest, thus this defines a value number /// in IntB. If the source value number (in IntA) is defined by a commutable @@ -279,7 +306,8 @@ bool SimpleRegisterCoalescing::HasOtherReachingDefs(LiveInterval &IntA, bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, LiveInterval &IntB, MachineInstr *CopyMI) { - unsigned CopyIdx = li_->getDefIndex(li_->getInstructionIndex(CopyMI)); + LiveIndex CopyIdx = + li_->getDefIndex(li_->getInstructionIndex(CopyMI)); // FIXME: For now, only eliminate the copy by commuting its def when the // source register is a virtual register. We want to guard against cases @@ -293,15 +321,17 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, LiveInterval::iterator BLR = IntB.FindLiveRangeContaining(CopyIdx); assert(BLR != IntB.end() && "Live range not found!"); VNInfo *BValNo = BLR->valno; - + // Get the location that B is defined at. Two options: either this value has - // an unknown definition point or it is defined at CopyIdx. If unknown, we + // an unknown definition point or it is defined at CopyIdx. If unknown, we // can't process it. - if (!BValNo->copy) return false; + if (!BValNo->getCopy()) return false; assert(BValNo->def == CopyIdx && "Copy doesn't define the value?"); - + // AValNo is the value number in A that defines the copy, A3 in the example. - LiveInterval::iterator ALR = IntA.FindLiveRangeContaining(CopyIdx-1); + LiveInterval::iterator ALR = + IntA.FindLiveRangeContaining(li_->getPrevSlot(CopyIdx)); + assert(ALR != IntA.end() && "Live range not found!"); VNInfo *AValNo = ALR->valno; // If other defs can reach uses of this def, then it's not safe to perform @@ -312,9 +342,23 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, return false; MachineInstr *DefMI = li_->getInstructionFromIndex(AValNo->def); const TargetInstrDesc &TID = DefMI->getDesc(); - unsigned NewDstIdx; - if (!TID.isCommutable() || - !tii_->CommuteChangesDestination(DefMI, NewDstIdx)) + if (!TID.isCommutable()) + return false; + // If DefMI is a two-address instruction then commuting it will change the + // destination register. + int DefIdx = DefMI->findRegisterDefOperandIdx(IntA.reg); + assert(DefIdx != -1); + unsigned UseOpIdx; + if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx)) + return false; + unsigned Op1, Op2, NewDstIdx; + if (!tii_->findCommutedOpIndices(DefMI, Op1, Op2)) + return false; + if (Op1 == UseOpIdx) + NewDstIdx = Op2; + else if (Op2 == UseOpIdx) + NewDstIdx = Op1; + else return false; MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx); @@ -332,7 +376,7 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(IntA.reg), UE = mri_->use_end(); UI != UE; ++UI) { MachineInstr *UseMI = &*UI; - unsigned UseIdx = li_->getInstructionIndex(UseMI); + LiveIndex UseIdx = li_->getInstructionIndex(UseMI); LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx); if (ULR == IntA.end()) continue; @@ -356,8 +400,8 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, bool BHasPHIKill = BValNo->hasPHIKill(); SmallVector<VNInfo*, 4> BDeadValNos; - SmallVector<unsigned, 4> BKills; - std::map<unsigned, unsigned> BExtend; + VNInfo::KillSet BKills; + std::map<LiveIndex, LiveIndex> BExtend; // If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g. // A = or A, B @@ -384,7 +428,7 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, ++UI; if (JoinedCopies.count(UseMI)) continue; - unsigned UseIdx = li_->getInstructionIndex(UseMI); + LiveIndex UseIdx= li_->getUseIndex(li_->getInstructionIndex(UseMI)); LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx); if (ULR == IntA.end() || ULR->valno != AValNo) continue; @@ -395,7 +439,7 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, if (Extended) UseMO.setIsKill(false); else - BKills.push_back(li_->getUseIndex(UseIdx)+1); + BKills.push_back(li_->getNextSlot(UseIdx)); } unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; if (!tii_->isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) @@ -404,7 +448,7 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, // This copy will become a noop. If it's defining a new val#, // remove that val# as well. However this live range is being // extended to the end of the existing live range defined by the copy. - unsigned DefIdx = li_->getDefIndex(UseIdx); + LiveIndex DefIdx = li_->getDefIndex(UseIdx); const LiveRange *DLR = IntB.getLiveRangeContaining(DefIdx); BHasPHIKill |= DLR->valno->hasPHIKill(); assert(DLR->valno->def == DefIdx); @@ -420,7 +464,10 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, // We need to insert a new liverange: [ALR.start, LastUse). It may be we can // simply extend BLR if CopyMI doesn't end the range. - DOUT << "\nExtending: "; IntB.print(DOUT, tri_); + DEBUG({ + errs() << "\nExtending: "; + IntB.print(errs(), tri_); + }); // Remove val#'s defined by copies that will be coalesced away. for (unsigned i = 0, e = BDeadValNos.size(); i != e; ++i) { @@ -439,24 +486,24 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, // is updated. Kills are also updated. VNInfo *ValNo = BValNo; ValNo->def = AValNo->def; - ValNo->copy = NULL; + ValNo->setCopy(0); for (unsigned j = 0, ee = ValNo->kills.size(); j != ee; ++j) { - unsigned Kill = ValNo->kills[j]; - if (Kill != BLR->end) - BKills.push_back(Kill); + if (ValNo->kills[j] != BLR->end) + BKills.push_back(ValNo->kills[j]); } ValNo->kills.clear(); for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end(); AI != AE; ++AI) { if (AI->valno != AValNo) continue; - unsigned End = AI->end; - std::map<unsigned, unsigned>::iterator EI = BExtend.find(End); + LiveIndex End = AI->end; + std::map<LiveIndex, LiveIndex>::iterator + EI = BExtend.find(End); if (EI != BExtend.end()) End = EI->second; IntB.addRange(LiveRange(AI->start, End, ValNo)); // If the IntB live range is assigned to a physical register, and if that - // physreg has sub-registers, update their live intervals as well. + // physreg has sub-registers, update their live intervals as well. if (BHasSubRegs) { for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) { LiveInterval &SRLI = li_->getInterval(*SR); @@ -467,13 +514,21 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, IntB.addKills(ValNo, BKills); ValNo->setHasPHIKill(BHasPHIKill); - DOUT << " result = "; IntB.print(DOUT, tri_); - DOUT << "\n"; + DEBUG({ + errs() << " result = "; + IntB.print(errs(), tri_); + errs() << '\n'; + errs() << "\nShortening: "; + IntA.print(errs(), tri_); + }); - DOUT << "\nShortening: "; IntA.print(DOUT, tri_); IntA.removeValNo(AValNo); - DOUT << " result = "; IntA.print(DOUT, tri_); - DOUT << "\n"; + + DEBUG({ + errs() << " result = "; + IntA.print(errs(), tri_); + errs() << '\n'; + }); ++numCommutes; return true; @@ -495,7 +550,8 @@ static bool isSameOrFallThroughBB(MachineBasicBlock *MBB, /// removeRange - Wrapper for LiveInterval::removeRange. This removes a range /// from a physical register live interval as well as from the live intervals /// of its sub-registers. -static void removeRange(LiveInterval &li, unsigned Start, unsigned End, +static void removeRange(LiveInterval &li, + LiveIndex Start, LiveIndex End, LiveIntervals *li_, const TargetRegisterInfo *tri_) { li.removeRange(Start, End, true); if (TargetRegisterInfo::isPhysicalRegister(li.reg)) { @@ -503,14 +559,15 @@ static void removeRange(LiveInterval &li, unsigned Start, unsigned End, if (!li_->hasInterval(*SR)) continue; LiveInterval &sli = li_->getInterval(*SR); - unsigned RemoveEnd = Start; + LiveIndex RemoveStart = Start; + LiveIndex RemoveEnd = Start; while (RemoveEnd != End) { - LiveInterval::iterator LR = sli.FindLiveRangeContaining(Start); + LiveInterval::iterator LR = sli.FindLiveRangeContaining(RemoveStart); if (LR == sli.end()) break; RemoveEnd = (LR->end < End) ? LR->end : End; - sli.removeRange(Start, RemoveEnd, true); - Start = RemoveEnd; + sli.removeRange(RemoveStart, RemoveEnd, true); + RemoveStart = RemoveEnd; } } } @@ -520,14 +577,14 @@ static void removeRange(LiveInterval &li, unsigned Start, unsigned End, /// as the copy instruction, trim the live interval to the last use and return /// true. bool -SimpleRegisterCoalescing::TrimLiveIntervalToLastUse(unsigned CopyIdx, +SimpleRegisterCoalescing::TrimLiveIntervalToLastUse(LiveIndex CopyIdx, MachineBasicBlock *CopyMBB, LiveInterval &li, const LiveRange *LR) { - unsigned MBBStart = li_->getMBBStartIdx(CopyMBB); - unsigned LastUseIdx; - MachineOperand *LastUse = lastRegisterUse(LR->start, CopyIdx-1, li.reg, - LastUseIdx); + LiveIndex MBBStart = li_->getMBBStartIdx(CopyMBB); + LiveIndex LastUseIdx; + MachineOperand *LastUse = + lastRegisterUse(LR->start, li_->getPrevSlot(CopyIdx), li.reg, LastUseIdx); if (LastUse) { MachineInstr *LastUseMI = LastUse->getParent(); if (!isSameOrFallThroughBB(LastUseMI->getParent(), CopyMBB, tii_)) { @@ -547,7 +604,7 @@ SimpleRegisterCoalescing::TrimLiveIntervalToLastUse(unsigned CopyIdx, // of last use. LastUse->setIsKill(); removeRange(li, li_->getDefIndex(LastUseIdx), LR->end, li_, tri_); - li.addKill(LR->valno, LastUseIdx+1); + LR->valno->addKill(li_->getNextSlot(LastUseIdx)); unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; if (tii_->isMoveInstr(*LastUseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) && DstReg == li.reg) { @@ -560,7 +617,7 @@ SimpleRegisterCoalescing::TrimLiveIntervalToLastUse(unsigned CopyIdx, // Is it livein? if (LR->start <= MBBStart && LR->end > MBBStart) { - if (LR->start == 0) { + if (LR->start == LiveIndex()) { assert(TargetRegisterInfo::isPhysicalRegister(li.reg)); // Live-in to the function but dead. Remove it from entry live-in set. mf_->begin()->removeLiveIn(li.reg); @@ -575,8 +632,9 @@ SimpleRegisterCoalescing::TrimLiveIntervalToLastUse(unsigned CopyIdx, /// computation, replace the copy by rematerialize the definition. bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt, unsigned DstReg, + unsigned DstSubIdx, MachineInstr *CopyMI) { - unsigned CopyIdx = li_->getUseIndex(li_->getInstructionIndex(CopyMI)); + LiveIndex CopyIdx = li_->getUseIndex(li_->getInstructionIndex(CopyMI)); LiveInterval::iterator SrcLR = SrcInt.FindLiveRangeContaining(CopyIdx); assert(SrcLR != SrcInt.end() && "Live range not found!"); VNInfo *ValNo = SrcLR->valno; @@ -590,24 +648,52 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt, const TargetInstrDesc &TID = DefMI->getDesc(); if (!TID.isAsCheapAsAMove()) return false; - if (!DefMI->getDesc().isRematerializable() || - !tii_->isTriviallyReMaterializable(DefMI)) + if (!tii_->isTriviallyReMaterializable(DefMI, AA)) return false; bool SawStore = false; - if (!DefMI->isSafeToMove(tii_, SawStore)) + if (!DefMI->isSafeToMove(tii_, SawStore, AA)) + return false; + if (TID.getNumDefs() != 1) return false; + if (DefMI->getOpcode() != TargetInstrInfo::IMPLICIT_DEF) { + // Make sure the copy destination register class fits the instruction + // definition register class. The mismatch can happen as a result of earlier + // extract_subreg, insert_subreg, subreg_to_reg coalescing. + const TargetRegisterClass *RC = TID.OpInfo[0].getRegClass(tri_); + if (TargetRegisterInfo::isVirtualRegister(DstReg)) { + if (mri_->getRegClass(DstReg) != RC) + return false; + } else if (!RC->contains(DstReg)) + return false; + } - unsigned DefIdx = li_->getDefIndex(CopyIdx); + // If destination register has a sub-register index on it, make sure it mtches + // the instruction register class. + if (DstSubIdx) { + const TargetInstrDesc &TID = DefMI->getDesc(); + if (TID.getNumDefs() != 1) + return false; + const TargetRegisterClass *DstRC = mri_->getRegClass(DstReg); + const TargetRegisterClass *DstSubRC = + DstRC->getSubRegisterRegClass(DstSubIdx); + const TargetRegisterClass *DefRC = TID.OpInfo[0].getRegClass(tri_); + if (DefRC == DstRC) + DstSubIdx = 0; + else if (DefRC != DstSubRC) + return false; + } + + LiveIndex DefIdx = li_->getDefIndex(CopyIdx); const LiveRange *DLR= li_->getInterval(DstReg).getLiveRangeContaining(DefIdx); - DLR->valno->copy = NULL; + DLR->valno->setCopy(0); // Don't forget to update sub-register intervals. if (TargetRegisterInfo::isPhysicalRegister(DstReg)) { for (const unsigned* SR = tri_->getSubRegisters(DstReg); *SR; ++SR) { if (!li_->hasInterval(*SR)) continue; DLR = li_->getInterval(*SR).getLiveRangeContaining(DefIdx); - if (DLR && DLR->valno->copy == CopyMI) - DLR->valno->copy = NULL; + if (DLR && DLR->valno->getCopy() == CopyMI) + DLR->valno->setCopy(0); } } @@ -621,7 +707,7 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt, } MachineBasicBlock::iterator MII = next(MachineBasicBlock::iterator(CopyMI)); - tii_->reMaterialize(*MBB, MII, DstReg, DefMI); + tii_->reMaterialize(*MBB, MII, DstReg, DstSubIdx, DefMI); MachineInstr *NewMI = prior(MII); if (checkForDeadDef) { @@ -630,7 +716,7 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt, // should mark it dead: if (DefMI->getParent() == MBB) { DefMI->addRegisterDead(SrcInt.reg, tri_); - SrcLR->end = SrcLR->start + 1; + SrcLR->end = li_->getNextSlot(SrcLR->start); } } @@ -644,11 +730,12 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt, if (MO.isDef() && li_->hasInterval(MO.getReg())) { unsigned Reg = MO.getReg(); DLR = li_->getInterval(Reg).getLiveRangeContaining(DefIdx); - if (DLR && DLR->valno->copy == CopyMI) - DLR->valno->copy = NULL; + if (DLR && DLR->valno->getCopy() == CopyMI) + DLR->valno->setCopy(0); } } + TransferImplicitOps(CopyMI, NewMI); li_->ReplaceMachineInstrInMaps(CopyMI, NewMI); CopyMI->eraseFromParent(); ReMatCopies.insert(CopyMI); @@ -657,30 +744,6 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt, return true; } -/// isBackEdgeCopy - Returns true if CopyMI is a back edge copy. -/// -bool SimpleRegisterCoalescing::isBackEdgeCopy(MachineInstr *CopyMI, - unsigned DstReg) const { - MachineBasicBlock *MBB = CopyMI->getParent(); - const MachineLoop *L = loopInfo->getLoopFor(MBB); - if (!L) - return false; - if (MBB != L->getLoopLatch()) - return false; - - LiveInterval &LI = li_->getInterval(DstReg); - unsigned DefIdx = li_->getInstructionIndex(CopyMI); - LiveInterval::const_iterator DstLR = - LI.FindLiveRangeContaining(li_->getDefIndex(DefIdx)); - if (DstLR == LI.end()) - return false; - unsigned KillIdx = li_->getMBBEndIdx(MBB) + 1; - if (DstLR->valno->kills.size() == 1 && - DstLR->valno->kills[0] == KillIdx && DstLR->valno->hasPHIKill()) - return true; - return false; -} - /// UpdateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and /// update the subregister number if it is not zero. If DstReg is a /// physical register and the existing subregister number of the def / use @@ -714,7 +777,8 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg, CopySrcReg == SrcReg && CopyDstReg != UseDstReg) { // If the use is a copy and it won't be coalesced away, and its source // is defined by a trivial computation, try to rematerialize it instead. - if (ReMaterializeTrivialDef(li_->getInterval(SrcReg), CopyDstReg,UseMI)) + if (ReMaterializeTrivialDef(li_->getInterval(SrcReg), CopyDstReg, + CopyDstSubIdx, UseMI)) continue; } @@ -751,44 +815,16 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg, (TargetRegisterInfo::isVirtualRegister(CopyDstReg) || allocatableRegs_[CopyDstReg])) { LiveInterval &LI = li_->getInterval(CopyDstReg); - unsigned DefIdx = li_->getDefIndex(li_->getInstructionIndex(UseMI)); + LiveIndex DefIdx = + li_->getDefIndex(li_->getInstructionIndex(UseMI)); if (const LiveRange *DLR = LI.getLiveRangeContaining(DefIdx)) { if (DLR->valno->def == DefIdx) - DLR->valno->copy = UseMI; + DLR->valno->setCopy(UseMI); } } } } -/// RemoveDeadImpDef - Remove implicit_def instructions which are "re-defining" -/// registers due to insert_subreg coalescing. e.g. -/// r1024 = op -/// r1025 = implicit_def -/// r1025 = insert_subreg r1025, r1024 -/// = op r1025 -/// => -/// r1025 = op -/// r1025 = implicit_def -/// r1025 = insert_subreg r1025, r1025 -/// = op r1025 -void -SimpleRegisterCoalescing::RemoveDeadImpDef(unsigned Reg, LiveInterval &LI) { - for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(Reg), - E = mri_->reg_end(); I != E; ) { - MachineOperand &O = I.getOperand(); - MachineInstr *DefMI = &*I; - ++I; - if (!O.isDef()) - continue; - if (DefMI->getOpcode() != TargetInstrInfo::IMPLICIT_DEF) - continue; - if (!LI.liveBeforeAndAt(li_->getInstructionIndex(DefMI))) - continue; - li_->RemoveMachineInstrFromMaps(DefMI); - DefMI->eraseFromParent(); - } -} - /// RemoveUnnecessaryKills - Remove kill markers that are no longer accurate /// due to live range lengthening as the result of coalescing. void SimpleRegisterCoalescing::RemoveUnnecessaryKills(unsigned Reg, @@ -796,12 +832,27 @@ void SimpleRegisterCoalescing::RemoveUnnecessaryKills(unsigned Reg, for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(Reg), UE = mri_->use_end(); UI != UE; ++UI) { MachineOperand &UseMO = UI.getOperand(); - if (UseMO.isKill()) { - MachineInstr *UseMI = UseMO.getParent(); - unsigned UseIdx = li_->getUseIndex(li_->getInstructionIndex(UseMI)); - const LiveRange *UI = LI.getLiveRangeContaining(UseIdx); - if (!UI || !LI.isKill(UI->valno, UseIdx+1)) - UseMO.setIsKill(false); + if (!UseMO.isKill()) + continue; + MachineInstr *UseMI = UseMO.getParent(); + LiveIndex UseIdx = + li_->getUseIndex(li_->getInstructionIndex(UseMI)); + const LiveRange *LR = LI.getLiveRangeContaining(UseIdx); + if (!LR || + (!LR->valno->isKill(li_->getNextSlot(UseIdx)) && + LR->valno->def != li_->getNextSlot(UseIdx))) { + // Interesting problem. After coalescing reg1027's def and kill are both + // at the same point: %reg1027,0.000000e+00 = [56,814:0) 0@70-(814) + // + // bb5: + // 60 %reg1027<def> = t2MOVr %reg1027, 14, %reg0, %reg0 + // 68 %reg1027<def> = t2LDRi12 %reg1027<kill>, 8, 14, %reg0 + // 76 t2CMPzri %reg1038<kill,undef>, 0, 14, %reg0, %CPSR<imp-def> + // 84 %reg1027<def> = t2MOVr %reg1027, 14, %reg0, %reg0 + // 96 t2Bcc mbb<bb5,0x2030910>, 1, %CPSR<kill> + // + // Do not remove the kill marker on t2LDRi12. + UseMO.setIsKill(false); } } } @@ -830,15 +881,16 @@ static bool removeIntervalIfEmpty(LiveInterval &li, LiveIntervals *li_, /// Return true if live interval is removed. bool SimpleRegisterCoalescing::ShortenDeadCopyLiveRange(LiveInterval &li, MachineInstr *CopyMI) { - unsigned CopyIdx = li_->getInstructionIndex(CopyMI); + LiveIndex CopyIdx = li_->getInstructionIndex(CopyMI); LiveInterval::iterator MLR = li.FindLiveRangeContaining(li_->getDefIndex(CopyIdx)); if (MLR == li.end()) return false; // Already removed by ShortenDeadCopySrcLiveRange. - unsigned RemoveStart = MLR->start; - unsigned RemoveEnd = MLR->end; + LiveIndex RemoveStart = MLR->start; + LiveIndex RemoveEnd = MLR->end; + LiveIndex DefIdx = li_->getDefIndex(CopyIdx); // Remove the liverange that's defined by this. - if (RemoveEnd == li_->getDefIndex(CopyIdx)+1) { + if (RemoveStart == DefIdx && RemoveEnd == li_->getNextSlot(DefIdx)) { removeRange(li, RemoveStart, RemoveEnd, li_, tri_); return removeIntervalIfEmpty(li, li_, tri_); } @@ -849,7 +901,7 @@ bool SimpleRegisterCoalescing::ShortenDeadCopyLiveRange(LiveInterval &li, /// the val# it defines. If the live interval becomes empty, remove it as well. bool SimpleRegisterCoalescing::RemoveDeadDef(LiveInterval &li, MachineInstr *DefMI) { - unsigned DefIdx = li_->getDefIndex(li_->getInstructionIndex(DefMI)); + LiveIndex DefIdx = li_->getDefIndex(li_->getInstructionIndex(DefMI)); LiveInterval::iterator MLR = li.FindLiveRangeContaining(DefIdx); if (DefIdx != MLR->valno->def) return false; @@ -860,17 +912,18 @@ bool SimpleRegisterCoalescing::RemoveDeadDef(LiveInterval &li, /// PropagateDeadness - Propagate the dead marker to the instruction which /// defines the val#. static void PropagateDeadness(LiveInterval &li, MachineInstr *CopyMI, - unsigned &LRStart, LiveIntervals *li_, + LiveIndex &LRStart, LiveIntervals *li_, const TargetRegisterInfo* tri_) { MachineInstr *DefMI = li_->getInstructionFromIndex(li_->getDefIndex(LRStart)); if (DefMI && DefMI != CopyMI) { - int DeadIdx = DefMI->findRegisterDefOperandIdx(li.reg, false, tri_); - if (DeadIdx != -1) { + int DeadIdx = DefMI->findRegisterDefOperandIdx(li.reg, false); + if (DeadIdx != -1) DefMI->getOperand(DeadIdx).setIsDead(); - // A dead def should have a single cycle interval. - ++LRStart; - } + else + DefMI->addOperand(MachineOperand::CreateReg(li.reg, + true, true, false, true)); + LRStart = li_->getNextSlot(LRStart); } } @@ -881,8 +934,8 @@ static void PropagateDeadness(LiveInterval &li, MachineInstr *CopyMI, bool SimpleRegisterCoalescing::ShortenDeadCopySrcLiveRange(LiveInterval &li, MachineInstr *CopyMI) { - unsigned CopyIdx = li_->getInstructionIndex(CopyMI); - if (CopyIdx == 0) { + LiveIndex CopyIdx = li_->getInstructionIndex(CopyMI); + if (CopyIdx == LiveIndex()) { // FIXME: special case: function live in. It can be a general case if the // first instruction index starts at > 0 value. assert(TargetRegisterInfo::isPhysicalRegister(li.reg)); @@ -894,13 +947,14 @@ SimpleRegisterCoalescing::ShortenDeadCopySrcLiveRange(LiveInterval &li, return removeIntervalIfEmpty(li, li_, tri_); } - LiveInterval::iterator LR = li.FindLiveRangeContaining(CopyIdx-1); + LiveInterval::iterator LR = + li.FindLiveRangeContaining(li_->getPrevSlot(CopyIdx)); if (LR == li.end()) // Livein but defined by a phi. return false; - unsigned RemoveStart = LR->start; - unsigned RemoveEnd = li_->getDefIndex(CopyIdx)+1; + LiveIndex RemoveStart = LR->start; + LiveIndex RemoveEnd = li_->getNextSlot(li_->getDefIndex(CopyIdx)); if (LR->end > RemoveEnd) // More uses past this copy? Nothing to do. return false; @@ -911,22 +965,25 @@ SimpleRegisterCoalescing::ShortenDeadCopySrcLiveRange(LiveInterval &li, if (TrimLiveIntervalToLastUse(CopyIdx, CopyMBB, li, LR)) return false; + // There are other kills of the val#. Nothing to do. + if (!li.isOnlyLROfValNo(LR)) + return false; + MachineBasicBlock *StartMBB = li_->getMBBFromIndex(RemoveStart); if (!isSameOrFallThroughBB(StartMBB, CopyMBB, tii_)) // If the live range starts in another mbb and the copy mbb is not a fall // through mbb, then we can only cut the range from the beginning of the // copy mbb. - RemoveStart = li_->getMBBStartIdx(CopyMBB) + 1; + RemoveStart = li_->getNextSlot(li_->getMBBStartIdx(CopyMBB)); if (LR->valno->def == RemoveStart) { // If the def MI defines the val# and this copy is the only kill of the // val#, then propagate the dead marker. - if (li.isOnlyLROfValNo(LR)) { - PropagateDeadness(li, CopyMI, RemoveStart, li_, tri_); - ++numDeadValNo; - } - if (li.isKill(LR->valno, RemoveEnd)) - li.removeKill(LR->valno, RemoveEnd); + PropagateDeadness(li, CopyMI, RemoveStart, li_, tri_); + ++numDeadValNo; + + if (LR->valno->isKill(RemoveEnd)) + LR->valno->removeKill(RemoveEnd); } removeRange(li, RemoveStart, RemoveEnd, li_, tri_); @@ -940,97 +997,19 @@ bool SimpleRegisterCoalescing::CanCoalesceWithImpDef(MachineInstr *CopyMI, LiveInterval &ImpLi) const{ if (!CopyMI->killsRegister(ImpLi.reg)) return false; - unsigned CopyIdx = li_->getDefIndex(li_->getInstructionIndex(CopyMI)); - LiveInterval::iterator LR = li.FindLiveRangeContaining(CopyIdx); - if (LR == li.end()) - return false; - if (LR->valno->hasPHIKill()) - return false; - if (LR->valno->def != CopyIdx) - return false; - // Make sure all of val# uses are copies. - for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(li.reg), + // Make sure this is the only use. + for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(ImpLi.reg), UE = mri_->use_end(); UI != UE;) { MachineInstr *UseMI = &*UI; ++UI; - if (JoinedCopies.count(UseMI)) - continue; - unsigned UseIdx = li_->getUseIndex(li_->getInstructionIndex(UseMI)); - LiveInterval::iterator ULR = li.FindLiveRangeContaining(UseIdx); - if (ULR == li.end() || ULR->valno != LR->valno) + if (CopyMI == UseMI || JoinedCopies.count(UseMI)) continue; - // If the use is not a use, then it's not safe to coalesce the move. - unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; - if (!tii_->isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) { - if (UseMI->getOpcode() == TargetInstrInfo::INSERT_SUBREG && - UseMI->getOperand(1).getReg() == li.reg) - continue; - return false; - } + return false; } return true; } -/// TurnCopiesFromValNoToImpDefs - The specified value# is defined by an -/// implicit_def and it is being removed. Turn all copies from this value# -/// into implicit_defs. -void SimpleRegisterCoalescing::TurnCopiesFromValNoToImpDefs(LiveInterval &li, - VNInfo *VNI) { - SmallVector<MachineInstr*, 4> ImpDefs; - MachineOperand *LastUse = NULL; - unsigned LastUseIdx = li_->getUseIndex(VNI->def); - for (MachineRegisterInfo::reg_iterator RI = mri_->reg_begin(li.reg), - RE = mri_->reg_end(); RI != RE;) { - MachineOperand *MO = &RI.getOperand(); - MachineInstr *MI = &*RI; - ++RI; - if (MO->isDef()) { - if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) - ImpDefs.push_back(MI); - continue; - } - if (JoinedCopies.count(MI)) - continue; - unsigned UseIdx = li_->getUseIndex(li_->getInstructionIndex(MI)); - LiveInterval::iterator ULR = li.FindLiveRangeContaining(UseIdx); - if (ULR == li.end() || ULR->valno != VNI) - continue; - // If the use is a copy, turn it into an identity copy. - unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; - if (tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) && - SrcReg == li.reg) { - // Change it to an implicit_def. - MI->setDesc(tii_->get(TargetInstrInfo::IMPLICIT_DEF)); - for (int i = MI->getNumOperands() - 1, e = 0; i > e; --i) - MI->RemoveOperand(i); - // It's no longer a copy, update the valno it defines. - unsigned DefIdx = li_->getDefIndex(UseIdx); - LiveInterval &DstInt = li_->getInterval(DstReg); - LiveInterval::iterator DLR = DstInt.FindLiveRangeContaining(DefIdx); - assert(DLR != DstInt.end() && "Live range not found!"); - assert(DLR->valno->copy == MI); - DLR->valno->copy = NULL; - ReMatCopies.insert(MI); - } else if (UseIdx > LastUseIdx) { - LastUseIdx = UseIdx; - LastUse = MO; - } - } - if (LastUse) { - LastUse->setIsKill(); - li.addKill(VNI, LastUseIdx+1); - } else { - // Remove dead implicit_def's. - while (!ImpDefs.empty()) { - MachineInstr *ImpDef = ImpDefs.back(); - ImpDefs.pop_back(); - li_->RemoveMachineInstrFromMaps(ImpDef); - ImpDef->eraseFromParent(); - } - } -} - /// isWinToJoinVRWithSrcPhysReg - Return true if it's worth while to join a /// a virtual destination register with physical source register. bool @@ -1051,13 +1030,14 @@ SimpleRegisterCoalescing::isWinToJoinVRWithSrcPhysReg(MachineInstr *CopyMI, // If the virtual register live interval extends into a loop, turn down // aggressiveness. - unsigned CopyIdx = li_->getDefIndex(li_->getInstructionIndex(CopyMI)); + LiveIndex CopyIdx = + li_->getDefIndex(li_->getInstructionIndex(CopyMI)); const MachineLoop *L = loopInfo->getLoopFor(CopyMBB); if (!L) { // Let's see if the virtual register live interval extends into the loop. LiveInterval::iterator DLR = DstInt.FindLiveRangeContaining(CopyIdx); assert(DLR != DstInt.end() && "Live range not found!"); - DLR = DstInt.FindLiveRangeContaining(DLR->end+1); + DLR = DstInt.FindLiveRangeContaining(li_->getNextSlot(DLR->end)); if (DLR != DstInt.end()) { CopyMBB = li_->getMBBFromIndex(DLR->start); L = loopInfo->getLoopFor(CopyMBB); @@ -1067,7 +1047,7 @@ SimpleRegisterCoalescing::isWinToJoinVRWithSrcPhysReg(MachineInstr *CopyMI, if (!L || Length <= Threshold) return true; - unsigned UseIdx = li_->getUseIndex(CopyIdx); + LiveIndex UseIdx = li_->getUseIndex(CopyIdx); LiveInterval::iterator SLR = SrcInt.FindLiveRangeContaining(UseIdx); MachineBasicBlock *SMBB = li_->getMBBFromIndex(SLR->start); if (loopInfo->getLoopFor(SMBB) != L) { @@ -1080,7 +1060,7 @@ SimpleRegisterCoalescing::isWinToJoinVRWithSrcPhysReg(MachineInstr *CopyMI, if (SuccMBB == CopyMBB) continue; if (DstInt.overlaps(li_->getMBBStartIdx(SuccMBB), - li_->getMBBEndIdx(SuccMBB)+1)) + li_->getNextSlot(li_->getMBBEndIdx(SuccMBB)))) return false; } } @@ -1111,11 +1091,12 @@ SimpleRegisterCoalescing::isWinToJoinVRWithDstPhysReg(MachineInstr *CopyMI, // If the virtual register live interval is defined or cross a loop, turn // down aggressiveness. - unsigned CopyIdx = li_->getDefIndex(li_->getInstructionIndex(CopyMI)); - unsigned UseIdx = li_->getUseIndex(CopyIdx); + LiveIndex CopyIdx = + li_->getDefIndex(li_->getInstructionIndex(CopyMI)); + LiveIndex UseIdx = li_->getUseIndex(CopyIdx); LiveInterval::iterator SLR = SrcInt.FindLiveRangeContaining(UseIdx); assert(SLR != SrcInt.end() && "Live range not found!"); - SLR = SrcInt.FindLiveRangeContaining(SLR->start-1); + SLR = SrcInt.FindLiveRangeContaining(li_->getPrevSlot(SLR->start)); if (SLR == SrcInt.end()) return true; MachineBasicBlock *SMBB = li_->getMBBFromIndex(SLR->start); @@ -1135,7 +1116,7 @@ SimpleRegisterCoalescing::isWinToJoinVRWithDstPhysReg(MachineInstr *CopyMI, if (PredMBB == SMBB) continue; if (SrcInt.overlaps(li_->getMBBStartIdx(PredMBB), - li_->getMBBEndIdx(PredMBB)+1)) + li_->getNextSlot(li_->getMBBEndIdx(PredMBB)))) return false; } } @@ -1236,14 +1217,18 @@ SimpleRegisterCoalescing::CanJoinExtractSubRegToPhysReg(unsigned DstReg, LiveInterval &RHS = li_->getInterval(SrcReg); if (li_->hasInterval(RealDstReg) && RHS.overlaps(li_->getInterval(RealDstReg))) { - DOUT << "Interfere with register "; - DEBUG(li_->getInterval(RealDstReg).print(DOUT, tri_)); + DEBUG({ + errs() << "Interfere with register "; + li_->getInterval(RealDstReg).print(errs(), tri_); + }); return false; // Not coalescable } for (const unsigned* SR = tri_->getSubRegisters(RealDstReg); *SR; ++SR) if (li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) { - DOUT << "Interfere with sub-register "; - DEBUG(li_->getInterval(*SR).print(DOUT, tri_)); + DEBUG({ + errs() << "Interfere with sub-register "; + li_->getInterval(*SR).print(errs(), tri_); + }); return false; // Not coalescable } return true; @@ -1263,14 +1248,18 @@ SimpleRegisterCoalescing::CanJoinInsertSubRegToPhysReg(unsigned DstReg, LiveInterval &RHS = li_->getInterval(DstReg); if (li_->hasInterval(RealSrcReg) && RHS.overlaps(li_->getInterval(RealSrcReg))) { - DOUT << "Interfere with register "; - DEBUG(li_->getInterval(RealSrcReg).print(DOUT, tri_)); + DEBUG({ + errs() << "Interfere with register "; + li_->getInterval(RealSrcReg).print(errs(), tri_); + }); return false; // Not coalescable } for (const unsigned* SR = tri_->getSubRegisters(RealSrcReg); *SR; ++SR) if (li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) { - DOUT << "Interfere with sub-register "; - DEBUG(li_->getInterval(*SR).print(DOUT, tri_)); + DEBUG({ + errs() << "Interfere with sub-register "; + li_->getInterval(*SR).print(errs(), tri_); + }); return false; // Not coalescable } return true; @@ -1299,7 +1288,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { if (JoinedCopies.count(CopyMI) || ReMatCopies.count(CopyMI)) return false; // Already done. - DOUT << li_->getInstructionIndex(CopyMI) << '\t' << *CopyMI; + DEBUG(errs() << li_->getInstructionIndex(CopyMI) << '\t' << *CopyMI); unsigned SrcReg, DstReg, SrcSubIdx = 0, DstSubIdx = 0; bool isExtSubReg = CopyMI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG; @@ -1312,41 +1301,43 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { SrcReg = CopyMI->getOperand(1).getReg(); SrcSubIdx = CopyMI->getOperand(2).getImm(); } else if (isInsSubReg || isSubRegToReg) { - if (CopyMI->getOperand(2).getSubReg()) { - DOUT << "\tSource of insert_subreg is already coalesced " - << "to another register.\n"; - return false; // Not coalescable. - } DstReg = CopyMI->getOperand(0).getReg(); DstSubIdx = CopyMI->getOperand(3).getImm(); SrcReg = CopyMI->getOperand(2).getReg(); + SrcSubIdx = CopyMI->getOperand(2).getSubReg(); + if (SrcSubIdx && SrcSubIdx != DstSubIdx) { + // r1025 = INSERT_SUBREG r1025, r1024<2>, 2 Then r1024 has already been + // coalesced to a larger register so the subreg indices cancel out. + DEBUG(errs() << "\tSource of insert_subreg or subreg_to_reg is already " + "coalesced to another register.\n"); + return false; // Not coalescable. + } } else if (!tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)){ - assert(0 && "Unrecognized copy instruction!"); - return false; + llvm_unreachable("Unrecognized copy instruction!"); } // If they are already joined we continue. if (SrcReg == DstReg) { - DOUT << "\tCopy already coalesced.\n"; + DEBUG(errs() << "\tCopy already coalesced.\n"); return false; // Not coalescable. } - + bool SrcIsPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg); bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); // If they are both physical registers, we cannot join them. if (SrcIsPhys && DstIsPhys) { - DOUT << "\tCan not coalesce physregs.\n"; + DEBUG(errs() << "\tCan not coalesce physregs.\n"); return false; // Not coalescable. } - + // We only join virtual registers with allocatable physical registers. if (SrcIsPhys && !allocatableRegs_[SrcReg]) { - DOUT << "\tSrc reg is unallocatable physreg.\n"; + DEBUG(errs() << "\tSrc reg is unallocatable physreg.\n"); return false; // Not coalescable. } if (DstIsPhys && !allocatableRegs_[DstReg]) { - DOUT << "\tDst reg is unallocatable physreg.\n"; + DEBUG(errs() << "\tDst reg is unallocatable physreg.\n"); return false; // Not coalescable. } @@ -1360,9 +1351,9 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { DstSubRC = DstRC->getSubRegisterRegClass(DstSubIdx); assert(DstSubRC && "Illegal subregister index"); if (!DstSubRC->contains(SrcSubReg)) { - DOUT << "\tIncompatible destination regclass: " - << tri_->getName(SrcSubReg) << " not in " << DstSubRC->getName() - << ".\n"; + DEBUG(errs() << "\tIncompatible destination regclass: " + << tri_->getName(SrcSubReg) << " not in " + << DstSubRC->getName() << ".\n"); return false; // Not coalescable. } } @@ -1377,15 +1368,18 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { SrcSubRC = SrcRC->getSubRegisterRegClass(SrcSubIdx); assert(SrcSubRC && "Illegal subregister index"); if (!SrcSubRC->contains(DstReg)) { - DOUT << "\tIncompatible source regclass: " - << tri_->getName(DstSubReg) << " not in " << SrcSubRC->getName() - << ".\n"; + DEBUG(errs() << "\tIncompatible source regclass: " + << tri_->getName(DstSubReg) << " not in " + << SrcSubRC->getName() << ".\n"); + (void)DstSubReg; return false; // Not coalescable. } } // Should be non-null only when coalescing to a sub-register class. bool CrossRC = false; + const TargetRegisterClass *SrcRC= SrcIsPhys ? 0 : mri_->getRegClass(SrcReg); + const TargetRegisterClass *DstRC= DstIsPhys ? 0 : mri_->getRegClass(DstReg); const TargetRegisterClass *NewRC = NULL; MachineBasicBlock *CopyMBB = CopyMI->getParent(); unsigned RealDstReg = 0; @@ -1400,7 +1394,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { // r1024<2> = EXTRACT_SUBREG EAX, 2. Then r1024 has already been // coalesced to a larger register so the subreg indices cancel out. if (DstSubIdx != SubIdx) { - DOUT << "\t Sub-register indices mismatch.\n"; + DEBUG(errs() << "\t Sub-register indices mismatch.\n"); return false; // Not coalescable. } } else @@ -1413,7 +1407,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { // EAX = INSERT_SUBREG EAX, r1024<2>, 2 Then r1024 has already been // coalesced to a larger register so the subreg indices cancel out. if (SrcSubIdx != SubIdx) { - DOUT << "\t Sub-register indices mismatch.\n"; + DEBUG(errs() << "\t Sub-register indices mismatch.\n"); return false; // Not coalescable. } } else @@ -1422,8 +1416,8 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { } else if ((DstIsPhys && isExtSubReg) || (SrcIsPhys && (isInsSubReg || isSubRegToReg))) { if (!isSubRegToReg && CopyMI->getOperand(1).getSubReg()) { - DOUT << "\tSrc of extract_subreg already coalesced with reg" - << " of a super-class.\n"; + DEBUG(errs() << "\tSrc of extract_subreg already coalesced with reg" + << " of a super-class.\n"); return false; // Not coalescable. } @@ -1446,11 +1440,22 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { // class as the would be resulting register. SubIdx = 0; else { - DOUT << "\t Sub-register indices mismatch.\n"; + DEBUG(errs() << "\t Sub-register indices mismatch.\n"); return false; // Not coalescable. } } if (SubIdx) { + if (!DstIsPhys && !SrcIsPhys) { + if (isInsSubReg || isSubRegToReg) { + NewRC = tri_->getMatchingSuperRegClass(DstRC, SrcRC, SubIdx); + } else // extract_subreg { + NewRC = tri_->getMatchingSuperRegClass(SrcRC, DstRC, SubIdx); + } + if (!NewRC) { + DEBUG(errs() << "\t Conflicting sub-register indices.\n"); + return false; // Not coalescable + } + unsigned LargeReg = isExtSubReg ? SrcReg : DstReg; unsigned SmallReg = isExtSubReg ? DstReg : SrcReg; unsigned Limit= allocatableRCRegs_[mri_->getRegClass(SmallReg)].count(); @@ -1461,7 +1466,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { } } } else if (differingRegisterClasses(SrcReg, DstReg)) { - if (!CrossClassJoin) + if (DisableCrossClassJoin) return false; CrossRC = true; @@ -1502,11 +1507,8 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { } } - const TargetRegisterClass *SrcRC= SrcIsPhys ? 0 : mri_->getRegClass(SrcReg); - const TargetRegisterClass *DstRC= DstIsPhys ? 0 : mri_->getRegClass(DstReg); unsigned LargeReg = SrcReg; unsigned SmallReg = DstReg; - unsigned Limit = 0; // Now determine the register class of the joined register. if (isExtSubReg) { @@ -1517,13 +1519,14 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { Again = true; return false; } - Limit = allocatableRCRegs_[DstRC].count(); + if (!DstIsPhys && !SrcIsPhys) + NewRC = SrcRC; } else if (!SrcIsPhys && !DstIsPhys) { NewRC = getCommonSubClass(SrcRC, DstRC); if (!NewRC) { - DOUT << "\tDisjoint regclasses: " - << SrcRC->getName() << ", " - << DstRC->getName() << ".\n"; + DEBUG(errs() << "\tDisjoint regclasses: " + << SrcRC->getName() << ", " + << DstRC->getName() << ".\n"); return false; // Not coalescable. } if (DstRC->getSize() > SrcRC->getSize()) @@ -1537,7 +1540,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { (isExtSubReg || DstRC->isASubClass()) && !isWinToJoinCrossClass(LargeReg, SmallReg, allocatableRCRegs_[NewRC].count())) { - DOUT << "\tSrc/Dest are different register classes.\n"; + DEBUG(errs() << "\tSrc/Dest are different register classes.\n"); // Allow the coalescer to try again in case either side gets coalesced to // a physical register that's compatible with the other side. e.g. // r1024 = MOV32to32_ r1025 @@ -1552,15 +1555,17 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { return false; if (DstIsPhys && HasIncompatibleSubRegDefUse(CopyMI, SrcReg, DstReg)) return false; - + LiveInterval &SrcInt = li_->getInterval(SrcReg); LiveInterval &DstInt = li_->getInterval(DstReg); assert(SrcInt.reg == SrcReg && DstInt.reg == DstReg && "Register mapping is horribly broken!"); - DOUT << "\t\tInspecting "; SrcInt.print(DOUT, tri_); - DOUT << " and "; DstInt.print(DOUT, tri_); - DOUT << ": "; + DEBUG({ + errs() << "\t\tInspecting "; SrcInt.print(errs(), tri_); + errs() << " and "; DstInt.print(errs(), tri_); + errs() << ": "; + }); // Save a copy of the virtual register live interval. We'll manually // merge this into the "real" physical register live interval this is @@ -1590,7 +1595,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { if (!isWinToJoinVRWithSrcPhysReg(CopyMI, CopyMBB, DstInt, SrcInt)) { mri_->setRegAllocationHint(DstInt.reg, 0, SrcReg); ++numAborts; - DOUT << "\tMay tie down a physical register, abort!\n"; + DEBUG(errs() << "\tMay tie down a physical register, abort!\n"); Again = true; // May be possible to coalesce later. return false; } @@ -1598,7 +1603,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { if (!isWinToJoinVRWithDstPhysReg(CopyMI, CopyMBB, DstInt, SrcInt)) { mri_->setRegAllocationHint(SrcInt.reg, 0, DstReg); ++numAborts; - DOUT << "\tMay tie down a physical register, abort!\n"; + DEBUG(errs() << "\tMay tie down a physical register, abort!\n"); Again = true; // May be possible to coalesce later. return false; } @@ -1612,9 +1617,6 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { unsigned JoinPReg = SrcIsPhys ? SrcReg : DstReg; const TargetRegisterClass *RC = mri_->getRegClass(JoinVReg); unsigned Threshold = allocatableRCRegs_[RC].count() * 2; - if (TheCopy.isBackEdge) - Threshold *= 2; // Favors back edge copies. - unsigned Length = li_->getApproximateInstructionCount(JoinVInt); float Ratio = 1.0 / Threshold; if (Length > Threshold && @@ -1622,7 +1624,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { mri_->use_end()) / Length) < Ratio)) { mri_->setRegAllocationHint(JoinVInt.reg, 0, JoinPReg); ++numAborts; - DOUT << "\tMay tie down a physical register, abort!\n"; + DEBUG(errs() << "\tMay tie down a physical register, abort!\n"); Again = true; // May be possible to coalesce later. return false; } @@ -1641,7 +1643,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { // Only coalesce an empty interval (defined by implicit_def) with // another interval which has a valno defined by the CopyMI and the CopyMI // is a kill of the implicit def. - DOUT << "Not profitable!\n"; + DEBUG(errs() << "Not profitable!\n"); return false; } @@ -1651,9 +1653,9 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { // If definition of source is defined by trivial computation, try // rematerializing it. if (!isExtSubReg && !isInsSubReg && !isSubRegToReg && - ReMaterializeTrivialDef(SrcInt, DstInt.reg, CopyMI)) + ReMaterializeTrivialDef(SrcInt, DstReg, DstSubIdx, CopyMI)) return true; - + // If we can eliminate the copy without merging the live ranges, do so now. if (!isExtSubReg && !isInsSubReg && !isSubRegToReg && (AdjustCopiesBackFrom(SrcInt, DstInt, CopyMI) || @@ -1661,9 +1663,9 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { JoinedCopies.insert(CopyMI); return true; } - + // Otherwise, we are unable to join the intervals. - DOUT << "Interference!\n"; + DEBUG(errs() << "Interference!\n"); Again = true; // May be possible to coalesce later. return false; } @@ -1676,7 +1678,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { } assert(TargetRegisterInfo::isVirtualRegister(SrcReg) && "LiveInterval::join didn't work right!"); - + // If we're about to merge live ranges into a physical register live interval, // we have to update any aliased register's live ranges to indicate that they // have clobbered values for this range. @@ -1690,14 +1692,14 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { for (LiveInterval::const_vni_iterator I = SavedLI->vni_begin(), E = SavedLI->vni_end(); I != E; ++I) { const VNInfo *ValNo = *I; - VNInfo *NewValNo = RealInt.getNextValue(ValNo->def, ValNo->copy, + VNInfo *NewValNo = RealInt.getNextValue(ValNo->def, ValNo->getCopy(), false, // updated at * li_->getVNInfoAllocator()); NewValNo->setFlags(ValNo->getFlags()); // * updated here. RealInt.addKills(NewValNo, ValNo->kills); RealInt.MergeValueInAsValue(*SavedLI, ValNo, NewValNo); } - RealInt.weight += SavedLI->weight; + RealInt.weight += SavedLI->weight; DstReg = RealDstReg ? RealDstReg : RealSrcReg; } @@ -1721,32 +1723,15 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { // Coalescing to a virtual register that is of a sub-register class of the // other. Make sure the resulting register is set to the right register class. - if (CrossRC) { - ++numCrossRCs; - if (NewRC) - mri_->setRegClass(DstReg, NewRC); - } - - if (NewHeuristic) { - // Add all copies that define val# in the source interval into the queue. - for (LiveInterval::const_vni_iterator i = ResSrcInt->vni_begin(), - e = ResSrcInt->vni_end(); i != e; ++i) { - const VNInfo *vni = *i; - // FIXME: Do isPHIDef and isDefAccurate both need to be tested? - if (!vni->def || vni->isUnused() || vni->isPHIDef() || !vni->isDefAccurate()) - continue; - MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def); - unsigned NewSrcReg, NewDstReg, NewSrcSubIdx, NewDstSubIdx; - if (CopyMI && - JoinedCopies.count(CopyMI) == 0 && - tii_->isMoveInstr(*CopyMI, NewSrcReg, NewDstReg, - NewSrcSubIdx, NewDstSubIdx)) { - unsigned LoopDepth = loopInfo->getLoopDepth(CopyMBB); - JoinQueue->push(CopyRec(CopyMI, LoopDepth, - isBackEdgeCopy(CopyMI, DstReg))); - } - } - } + if (CrossRC) + ++numCrossRCs; + + // This may happen even if it's cross-rc coalescing. e.g. + // %reg1026<def> = SUBREG_TO_REG 0, %reg1037<kill>, 4 + // reg1026 -> GR64, reg1037 -> GR32_ABCD. The resulting register will have to + // be allocate a register from GR64_ABCD. + if (NewRC) + mri_->setRegClass(DstReg, NewRC); // Remember to delete the copy instruction. JoinedCopies.insert(CopyMI); @@ -1757,13 +1742,6 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { if (TargetRegisterInfo::isVirtualRegister(DstReg)) RemoveUnnecessaryKills(DstReg, *ResDstInt); - if (isInsSubReg) - // Avoid: - // r1024 = op - // r1024 = implicit_def - // ... - // = r1024 - RemoveDeadImpDef(DstReg, *ResDstInt); UpdateRegDefsUses(SrcReg, DstReg, SubIdx); // SrcReg is guarateed to be the register whose live interval that is @@ -1779,29 +1757,6 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { delete SavedLI; } - if (isEmpty) { - // Now the copy is being coalesced away, the val# previously defined - // by the copy is being defined by an IMPLICIT_DEF which defines a zero - // length interval. Remove the val#. - unsigned CopyIdx = li_->getDefIndex(li_->getInstructionIndex(CopyMI)); - const LiveRange *LR = ResDstInt->getLiveRangeContaining(CopyIdx); - VNInfo *ImpVal = LR->valno; - assert(ImpVal->def == CopyIdx); - unsigned NextDef = LR->end; - TurnCopiesFromValNoToImpDefs(*ResDstInt, ImpVal); - ResDstInt->removeValNo(ImpVal); - LR = ResDstInt->FindLiveRangeContaining(NextDef); - if (LR != ResDstInt->end() && LR->valno->def == NextDef) { - // Special case: vr1024 = implicit_def - // vr1024 = insert_subreg vr1024, vr1025, c - // The insert_subreg becomes a "copy" that defines a val# which can itself - // be coalesced away. - MachineInstr *DefMI = li_->getInstructionFromIndex(NextDef); - if (DefMI->getOpcode() == TargetInstrInfo::INSERT_SUBREG) - LR->valno->copy = DefMI; - } - } - // If resulting interval has a preference that no longer fits because of subreg // coalescing, just clear the preference. unsigned Preference = getRegAllocPreference(ResDstInt->reg, *mf_, mri_, tri_); @@ -1812,8 +1767,11 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { mri_->setRegAllocationHint(ResDstInt->reg, 0, 0); } - DOUT << "\n\t\tJoined. Result = "; ResDstInt->print(DOUT, tri_); - DOUT << "\n"; + DEBUG({ + errs() << "\n\t\tJoined. Result = "; + ResDstInt->print(errs(), tri_); + errs() << "\n"; + }); ++numJoins; return true; @@ -1860,7 +1818,7 @@ static unsigned ComputeUltimateVN(VNInfo *VNI, // been computed, return it. if (OtherValNoAssignments[OtherValNo->id] >= 0) return ThisValNoAssignments[VN] = OtherValNoAssignments[OtherValNo->id]; - + // Mark this value number as currently being computed, then ask what the // ultimate value # of the other value is. ThisValNoAssignments[VN] = -2; @@ -1896,7 +1854,7 @@ bool SimpleRegisterCoalescing::RangeIsDefinedByCopyFromReg(LiveInterval &li, DstReg == li.reg && SrcReg == Reg) { // Cache computed info. LR->valno->def = LR->start; - LR->valno->copy = DefMI; + LR->valno->setCopy(DefMI); return true; } } @@ -1910,16 +1868,16 @@ bool SimpleRegisterCoalescing::RangeIsDefinedByCopyFromReg(LiveInterval &li, /// joins them and returns true. bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){ assert(RHS.containsOneValue()); - + // Some number (potentially more than one) value numbers in the current // interval may be defined as copies from the RHS. Scan the overlapping // portions of the LHS and RHS, keeping track of this and looking for // overlapping live ranges that are NOT defined as copies. If these exist, we // cannot coalesce. - + LiveInterval::iterator LHSIt = LHS.begin(), LHSEnd = LHS.end(); LiveInterval::iterator RHSIt = RHS.begin(), RHSEnd = RHS.end(); - + if (LHSIt->start < RHSIt->start) { LHSIt = std::upper_bound(LHSIt, LHSEnd, RHSIt->start); if (LHSIt != LHS.begin()) --LHSIt; @@ -1927,9 +1885,9 @@ bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){ RHSIt = std::upper_bound(RHSIt, RHSEnd, LHSIt->start); if (RHSIt != RHS.begin()) --RHSIt; } - + SmallVector<VNInfo*, 8> EliminatedLHSVals; - + while (1) { // Determine if these live intervals overlap. bool Overlaps = false; @@ -1937,7 +1895,7 @@ bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){ Overlaps = LHSIt->end > RHSIt->start; else Overlaps = RHSIt->end > LHSIt->start; - + // If the live intervals overlap, there are two interesting cases: if the // LHS interval is defined by a copy from the RHS, it's ok and we record // that the LHS value # is the same as the RHS. If it's not, then we cannot @@ -1955,7 +1913,7 @@ bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){ // vr1025 = copy vr1024 // .. // BB2: - // vr1024 = op + // vr1024 = op // = vr1025 // Even though vr1025 is copied from vr1024, it's not safe to // coalesce them since the live range of vr1025 intersects the @@ -1964,12 +1922,12 @@ bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){ return false; EliminatedLHSVals.push_back(LHSIt->valno); } - + // We know this entire LHS live range is okay, so skip it now. if (++LHSIt == LHSEnd) break; continue; } - + if (LHSIt->end < RHSIt->end) { if (++LHSIt == LHSEnd) break; } else { @@ -1993,7 +1951,7 @@ bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){ // vr1025 = copy vr1024 // .. // BB2: - // vr1024 = op + // vr1024 = op // = vr1025 // Even though vr1025 is copied from vr1024, it's not safe to // coalesced them since live range of vr1025 intersects the @@ -2007,11 +1965,11 @@ bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){ } } } - + if (++RHSIt == RHSEnd) break; } } - + // If we got here, we know that the coalescing will be successful and that // the value numbers in EliminatedLHSVals will all be merged together. Since // the most common case is that EliminatedLHSVals has a single number, we @@ -2039,28 +1997,29 @@ bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){ *tri_->getSuperRegisters(LHS.reg)) // Imprecise sub-register information. Can't handle it. return false; - assert(0 && "No copies from the RHS?"); + llvm_unreachable("No copies from the RHS?"); } else { LHSValNo = EliminatedLHSVals[0]; } - + // Okay, now that there is a single LHS value number that we're merging the // RHS into, update the value number info for the LHS to indicate that the // value number is defined where the RHS value number was. const VNInfo *VNI = RHS.getValNumInfo(0); LHSValNo->def = VNI->def; - LHSValNo->copy = VNI->copy; - + LHSValNo->setCopy(VNI->getCopy()); + // Okay, the final step is to loop over the RHS live intervals, adding them to // the LHS. if (VNI->hasPHIKill()) LHSValNo->setHasPHIKill(true); LHS.addKills(LHSValNo, VNI->kills); LHS.MergeRangesInAsValue(RHS, LHSValNo); - LHS.weight += RHS.weight; + + LHS.ComputeJoinedWeight(RHS); // Update regalloc hint if both are virtual registers. - if (TargetRegisterInfo::isVirtualRegister(LHS.reg) && + if (TargetRegisterInfo::isVirtualRegister(LHS.reg) && TargetRegisterInfo::isVirtualRegister(RHS.reg)) { std::pair<unsigned, unsigned> RHSPref = mri_->getRegAllocationHint(RHS.reg); std::pair<unsigned, unsigned> LHSPref = mri_->getRegAllocationHint(LHS.reg); @@ -2122,8 +2081,10 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, } else { for (const unsigned* SR = tri_->getSubRegisters(LHS.reg); *SR; ++SR) if (li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) { - DOUT << "Interfere with sub-register "; - DEBUG(li_->getInterval(*SR).print(DOUT, tri_)); + DEBUG({ + errs() << "Interfere with sub-register "; + li_->getInterval(*SR).print(errs(), tri_); + }); return false; } } @@ -2137,19 +2098,21 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, } else { for (const unsigned* SR = tri_->getSubRegisters(RHS.reg); *SR; ++SR) if (li_->hasInterval(*SR) && LHS.overlaps(li_->getInterval(*SR))) { - DOUT << "Interfere with sub-register "; - DEBUG(li_->getInterval(*SR).print(DOUT, tri_)); + DEBUG({ + errs() << "Interfere with sub-register "; + li_->getInterval(*SR).print(errs(), tri_); + }); return false; } } } - + // Compute ultimate value numbers for the LHS and RHS values. if (RHS.containsOneValue()) { // Copies from a liveinterval with a single value are simple to handle and // very common, handle the special case here. This is important, because // often RHS is small and LHS is large (e.g. a physreg). - + // Find out if the RHS is defined as a copy from some value in the LHS. int RHSVal0DefinedFromLHS = -1; int RHSValID = -1; @@ -2167,15 +2130,16 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, } } else { // It was defined as a copy from the LHS, find out what value # it is. - RHSValNoInfo = LHS.getLiveRangeContaining(RHSValNoInfo0->def-1)->valno; + RHSValNoInfo = + LHS.getLiveRangeContaining(li_->getPrevSlot(RHSValNoInfo0->def))->valno; RHSValID = RHSValNoInfo->id; RHSVal0DefinedFromLHS = RHSValID; } - + LHSValNoAssignments.resize(LHS.getNumValNums(), -1); RHSValNoAssignments.resize(RHS.getNumValNums(), -1); NewVNInfo.resize(LHS.getNumValNums(), NULL); - + // Okay, *all* of the values in LHS that are defined as a copy from RHS // should now get updated. for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end(); @@ -2207,7 +2171,7 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, LHSValNoAssignments[VN] = VN; } } - + assert(RHSValID != -1 && "Didn't find value #?"); RHSValNoAssignments[0] = RHSValID; if (RHSVal0DefinedFromLHS != -1) { @@ -2221,44 +2185,46 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end(); i != e; ++i) { VNInfo *VNI = *i; - if (VNI->isUnused() || VNI->copy == 0) // Src not defined by a copy? + if (VNI->isUnused() || VNI->getCopy() == 0) // Src not defined by a copy? continue; - + // DstReg is known to be a register in the LHS interval. If the src is // from the RHS interval, we can use its value #. if (li_->getVNInfoSourceReg(VNI) != RHS.reg) continue; - + // Figure out the value # from the RHS. - LHSValsDefinedFromRHS[VNI]=RHS.getLiveRangeContaining(VNI->def-1)->valno; + LHSValsDefinedFromRHS[VNI]= + RHS.getLiveRangeContaining(li_->getPrevSlot(VNI->def))->valno; } - + // Loop over the value numbers of the RHS, seeing if any are defined from // the LHS. for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end(); i != e; ++i) { VNInfo *VNI = *i; - if (VNI->isUnused() || VNI->copy == 0) // Src not defined by a copy? + if (VNI->isUnused() || VNI->getCopy() == 0) // Src not defined by a copy? continue; - + // DstReg is known to be a register in the RHS interval. If the src is // from the LHS interval, we can use its value #. if (li_->getVNInfoSourceReg(VNI) != LHS.reg) continue; - + // Figure out the value # from the LHS. - RHSValsDefinedFromLHS[VNI]=LHS.getLiveRangeContaining(VNI->def-1)->valno; + RHSValsDefinedFromLHS[VNI]= + LHS.getLiveRangeContaining(li_->getPrevSlot(VNI->def))->valno; } - + LHSValNoAssignments.resize(LHS.getNumValNums(), -1); RHSValNoAssignments.resize(RHS.getNumValNums(), -1); NewVNInfo.reserve(LHS.getNumValNums() + RHS.getNumValNums()); - + for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end(); i != e; ++i) { VNInfo *VNI = *i; unsigned VN = VNI->id; - if (LHSValNoAssignments[VN] >= 0 || VNI->isUnused()) + if (LHSValNoAssignments[VN] >= 0 || VNI->isUnused()) continue; ComputeUltimateVN(VNI, NewVNInfo, LHSValsDefinedFromRHS, RHSValsDefinedFromLHS, @@ -2276,20 +2242,20 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, RHSValNoAssignments[VN] = NewVNInfo.size()-1; continue; } - + ComputeUltimateVN(VNI, NewVNInfo, RHSValsDefinedFromLHS, LHSValsDefinedFromRHS, RHSValNoAssignments, LHSValNoAssignments); } } - + // Armed with the mappings of LHS/RHS values to ultimate values, walk the // interval lists to see if these intervals are coalescable. LiveInterval::const_iterator I = LHS.begin(); LiveInterval::const_iterator IE = LHS.end(); LiveInterval::const_iterator J = RHS.begin(); LiveInterval::const_iterator JE = RHS.end(); - + // Skip ahead until the first place of potential sharing. if (I->start < J->start) { I = std::upper_bound(I, IE, J->start); @@ -2298,7 +2264,7 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, J = std::upper_bound(J, JE, I->start); if (J != RHS.begin()) --J; } - + while (1) { // Determine if these two live ranges overlap. bool Overlaps; @@ -2316,7 +2282,7 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, RHSValNoAssignments[J->valno->id]) return false; } - + if (I->end < J->end) { ++I; if (I == IE) break; @@ -2331,7 +2297,7 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, E = LHSValsDefinedFromRHS.end(); I != E; ++I) { VNInfo *VNI = I->first; unsigned LHSValID = LHSValNoAssignments[VNI->id]; - LiveInterval::removeKill(NewVNInfo[LHSValID], VNI->def); + NewVNInfo[LHSValID]->removeKill(VNI->def); if (VNI->hasPHIKill()) NewVNInfo[LHSValID]->setHasPHIKill(true); RHS.addKills(NewVNInfo[LHSValID], VNI->kills); @@ -2342,7 +2308,7 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, E = RHSValsDefinedFromLHS.end(); I != E; ++I) { VNInfo *VNI = I->first; unsigned RHSValID = RHSValNoAssignments[VNI->id]; - LiveInterval::removeKill(NewVNInfo[RHSValID], VNI->def); + NewVNInfo[RHSValID]->removeKill(VNI->def); if (VNI->hasPHIKill()) NewVNInfo[RHSValID]->setHasPHIKill(true); LHS.addKills(NewVNInfo[RHSValID], VNI->kills); @@ -2377,37 +2343,17 @@ namespace { }; } -/// getRepIntervalSize - Returns the size of the interval that represents the -/// specified register. -template<class SF> -unsigned JoinPriorityQueue<SF>::getRepIntervalSize(unsigned Reg) { - return Rc->getRepIntervalSize(Reg); -} - -/// CopyRecSort::operator - Join priority queue sorting function. -/// -bool CopyRecSort::operator()(CopyRec left, CopyRec right) const { - // Inner loops first. - if (left.LoopDepth > right.LoopDepth) - return false; - else if (left.LoopDepth == right.LoopDepth) - if (left.isBackEdge && !right.isBackEdge) - return false; - return true; -} - void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB, std::vector<CopyRec> &TryAgain) { - DOUT << ((Value*)MBB->getBasicBlock())->getName() << ":\n"; + DEBUG(errs() << ((Value*)MBB->getBasicBlock())->getName() << ":\n"); std::vector<CopyRec> VirtCopies; std::vector<CopyRec> PhysCopies; std::vector<CopyRec> ImpDefCopies; - unsigned LoopDepth = loopInfo->getLoopDepth(MBB); for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end(); MII != E;) { MachineInstr *Inst = MII++; - + // If this isn't a copy nor a extract_subreg, we can't join intervals. unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; if (Inst->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) { @@ -2422,21 +2368,14 @@ void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB, bool SrcIsPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg); bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); - if (NewHeuristic) { - JoinQueue->push(CopyRec(Inst, LoopDepth, isBackEdgeCopy(Inst, DstReg))); - } else { - if (li_->hasInterval(SrcReg) && li_->getInterval(SrcReg).empty()) - ImpDefCopies.push_back(CopyRec(Inst, 0, false)); - else if (SrcIsPhys || DstIsPhys) - PhysCopies.push_back(CopyRec(Inst, 0, false)); - else - VirtCopies.push_back(CopyRec(Inst, 0, false)); - } + if (li_->hasInterval(SrcReg) && li_->getInterval(SrcReg).empty()) + ImpDefCopies.push_back(CopyRec(Inst, 0)); + else if (SrcIsPhys || DstIsPhys) + PhysCopies.push_back(CopyRec(Inst, 0)); + else + VirtCopies.push_back(CopyRec(Inst, 0)); } - if (NewHeuristic) - return; - // Try coalescing implicit copies first, followed by copies to / from // physical registers, then finally copies from virtual registers to // virtual registers. @@ -2464,10 +2403,7 @@ void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB, } void SimpleRegisterCoalescing::joinIntervals() { - DOUT << "********** JOINING INTERVALS ***********\n"; - - if (NewHeuristic) - JoinQueue = new JoinPriorityQueue<CopyRecSort>(this); + DEBUG(errs() << "********** JOINING INTERVALS ***********\n"); std::vector<CopyRec> TryAgainList; if (loopInfo->empty()) { @@ -2495,52 +2431,26 @@ void SimpleRegisterCoalescing::joinIntervals() { for (unsigned i = 0, e = MBBs.size(); i != e; ++i) CopyCoalesceInMBB(MBBs[i].second, TryAgainList); } - + // Joining intervals can allow other intervals to be joined. Iteratively join // until we make no progress. - if (NewHeuristic) { - SmallVector<CopyRec, 16> TryAgain; - bool ProgressMade = true; - while (ProgressMade) { - ProgressMade = false; - while (!JoinQueue->empty()) { - CopyRec R = JoinQueue->pop(); - bool Again = false; - bool Success = JoinCopy(R, Again); - if (Success) - ProgressMade = true; - else if (Again) - TryAgain.push_back(R); - } + bool ProgressMade = true; + while (ProgressMade) { + ProgressMade = false; - if (ProgressMade) { - while (!TryAgain.empty()) { - JoinQueue->push(TryAgain.back()); - TryAgain.pop_back(); - } - } - } - } else { - bool ProgressMade = true; - while (ProgressMade) { - ProgressMade = false; - - for (unsigned i = 0, e = TryAgainList.size(); i != e; ++i) { - CopyRec &TheCopy = TryAgainList[i]; - if (TheCopy.MI) { - bool Again = false; - bool Success = JoinCopy(TheCopy, Again); - if (Success || !Again) { - TheCopy.MI = 0; // Mark this one as done. - ProgressMade = true; - } - } + for (unsigned i = 0, e = TryAgainList.size(); i != e; ++i) { + CopyRec &TheCopy = TryAgainList[i]; + if (!TheCopy.MI) + continue; + + bool Again = false; + bool Success = JoinCopy(TheCopy, Again); + if (Success || !Again) { + TheCopy.MI = 0; // Mark this one as done. + ProgressMade = true; } } } - - if (NewHeuristic) - delete JoinQueue; } /// Return true if the two specified registers belong to different register @@ -2567,9 +2477,11 @@ SimpleRegisterCoalescing::differingRegisterClasses(unsigned RegA, /// lastRegisterUse - Returns the last use of the specific register between /// cycles Start and End or NULL if there are no uses. MachineOperand * -SimpleRegisterCoalescing::lastRegisterUse(unsigned Start, unsigned End, - unsigned Reg, unsigned &UseIdx) const{ - UseIdx = 0; +SimpleRegisterCoalescing::lastRegisterUse(LiveIndex Start, + LiveIndex End, + unsigned Reg, + LiveIndex &UseIdx) const{ + UseIdx = LiveIndex(); if (TargetRegisterInfo::isVirtualRegister(Reg)) { MachineOperand *LastUse = NULL; for (MachineRegisterInfo::use_iterator I = mri_->use_begin(Reg), @@ -2581,7 +2493,7 @@ SimpleRegisterCoalescing::lastRegisterUse(unsigned Start, unsigned End, SrcReg == DstReg) // Ignore identity copies. continue; - unsigned Idx = li_->getInstructionIndex(UseMI); + LiveIndex Idx = li_->getInstructionIndex(UseMI); if (Idx >= Start && Idx < End && Idx >= UseIdx) { LastUse = &Use; UseIdx = li_->getUseIndex(Idx); @@ -2590,13 +2502,13 @@ SimpleRegisterCoalescing::lastRegisterUse(unsigned Start, unsigned End, return LastUse; } - int e = (End-1) / InstrSlots::NUM * InstrSlots::NUM; - int s = Start; + LiveIndex s = Start; + LiveIndex e = li_->getBaseIndex(li_->getPrevSlot(End)); while (e >= s) { // Skip deleted instructions MachineInstr *MI = li_->getInstructionFromIndex(e); - while ((e - InstrSlots::NUM) >= s && !MI) { - e -= InstrSlots::NUM; + while (e != LiveIndex() && li_->getPrevIndex(e) >= s && !MI) { + e = li_->getPrevIndex(e); MI = li_->getInstructionFromIndex(e); } if (e < s || MI == NULL) @@ -2615,7 +2527,7 @@ SimpleRegisterCoalescing::lastRegisterUse(unsigned Start, unsigned End, } } - e -= InstrSlots::NUM; + e = li_->getPrevIndex(e); } return NULL; @@ -2624,9 +2536,9 @@ SimpleRegisterCoalescing::lastRegisterUse(unsigned Start, unsigned End, void SimpleRegisterCoalescing::printRegName(unsigned reg) const { if (TargetRegisterInfo::isPhysicalRegister(reg)) - cerr << tri_->getName(reg); + errs() << tri_->getName(reg); else - cerr << "%reg" << reg; + errs() << "%reg" << reg; } void SimpleRegisterCoalescing::releaseMemory() { @@ -2635,64 +2547,106 @@ void SimpleRegisterCoalescing::releaseMemory() { ReMatDefs.clear(); } -static bool isZeroLengthInterval(LiveInterval *li) { +/// Returns true if the given live interval is zero length. +static bool isZeroLengthInterval(LiveInterval *li, LiveIntervals *li_) { for (LiveInterval::Ranges::const_iterator i = li->ranges.begin(), e = li->ranges.end(); i != e; ++i) - if (i->end - i->start > LiveInterval::InstrSlots::NUM) + if (li_->getPrevIndex(i->end) > i->start) return false; return true; } -/// TurnCopyIntoImpDef - If source of the specified copy is an implicit def, -/// turn the copy into an implicit def. -bool -SimpleRegisterCoalescing::TurnCopyIntoImpDef(MachineBasicBlock::iterator &I, - MachineBasicBlock *MBB, - unsigned DstReg, unsigned SrcReg) { - MachineInstr *CopyMI = &*I; - unsigned CopyIdx = li_->getDefIndex(li_->getInstructionIndex(CopyMI)); - if (!li_->hasInterval(SrcReg)) - return false; - LiveInterval &SrcInt = li_->getInterval(SrcReg); - if (!SrcInt.empty()) - return false; - if (!li_->hasInterval(DstReg)) - return false; - LiveInterval &DstInt = li_->getInterval(DstReg); - const LiveRange *DstLR = DstInt.getLiveRangeContaining(CopyIdx); - // If the valno extends beyond this basic block, then it's not safe to delete - // the val# or else livein information won't be correct. - MachineBasicBlock *EndMBB = li_->getMBBFromIndex(DstLR->end); - if (EndMBB != MBB) - return false; - DstInt.removeValNo(DstLR->valno); - CopyMI->setDesc(tii_->get(TargetInstrInfo::IMPLICIT_DEF)); - for (int i = CopyMI->getNumOperands() - 1, e = 0; i > e; --i) - CopyMI->RemoveOperand(i); - CopyMI->getOperand(0).setIsUndef(); - bool NoUse = mri_->use_empty(SrcReg); - if (NoUse) { - for (MachineRegisterInfo::reg_iterator RI = mri_->reg_begin(SrcReg), - RE = mri_->reg_end(); RI != RE; ) { - assert(RI.getOperand().isDef()); - MachineInstr *DefMI = &*RI; - ++RI; - // The implicit_def source has no other uses, delete it. - assert(DefMI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF); - li_->RemoveMachineInstrFromMaps(DefMI); - DefMI->eraseFromParent(); +void SimpleRegisterCoalescing::CalculateSpillWeights() { + SmallSet<unsigned, 4> Processed; + for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end(); + mbbi != mbbe; ++mbbi) { + MachineBasicBlock* MBB = mbbi; + LiveIndex MBBEnd = li_->getMBBEndIdx(MBB); + MachineLoop* loop = loopInfo->getLoopFor(MBB); + unsigned loopDepth = loop ? loop->getLoopDepth() : 0; + bool isExit = loop ? loop->isLoopExit(MBB) : false; + + for (MachineBasicBlock::iterator mii = MBB->begin(), mie = MBB->end(); + mii != mie; ++mii) { + MachineInstr *MI = mii; + + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &mopi = MI->getOperand(i); + if (!mopi.isReg() || mopi.getReg() == 0) + continue; + unsigned Reg = mopi.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(mopi.getReg())) + continue; + // Multiple uses of reg by the same instruction. It should not + // contribute to spill weight again. + if (!Processed.insert(Reg)) + continue; + + bool HasDef = mopi.isDef(); + bool HasUse = !HasDef; + for (unsigned j = i+1; j != e; ++j) { + const MachineOperand &mopj = MI->getOperand(j); + if (!mopj.isReg() || mopj.getReg() != Reg) + continue; + HasDef |= mopj.isDef(); + HasUse |= mopj.isUse(); + if (HasDef && HasUse) + break; + } + + LiveInterval &RegInt = li_->getInterval(Reg); + float Weight = li_->getSpillWeight(HasDef, HasUse, loopDepth); + if (HasDef && isExit) { + // Looks like this is a loop count variable update. + LiveIndex DefIdx = + li_->getDefIndex(li_->getInstructionIndex(MI)); + const LiveRange *DLR = + li_->getInterval(Reg).getLiveRangeContaining(DefIdx); + if (DLR->end > MBBEnd) + Weight *= 3.0F; + } + RegInt.weight += Weight; + } + Processed.clear(); } } - // Mark uses of implicit_def isUndef. - for (MachineRegisterInfo::use_iterator RI = mri_->use_begin(DstReg), - RE = mri_->use_end(); RI != RE; ++RI) { - assert((*RI).getParent() == MBB); - RI.getOperand().setIsUndef(); - } + for (LiveIntervals::iterator I = li_->begin(), E = li_->end(); I != E; ++I) { + LiveInterval &LI = *I->second; + if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { + // If the live interval length is essentially zero, i.e. in every live + // range the use follows def immediately, it doesn't make sense to spill + // it and hope it will be easier to allocate for this li. + if (isZeroLengthInterval(&LI, li_)) { + LI.weight = HUGE_VALF; + continue; + } - ++I; - return true; + bool isLoad = false; + SmallVector<LiveInterval*, 4> SpillIs; + if (li_->isReMaterializable(LI, SpillIs, isLoad)) { + // If all of the definitions of the interval are re-materializable, + // it is a preferred candidate for spilling. If non of the defs are + // loads, then it's potentially very cheap to re-materialize. + // FIXME: this gets much more complicated once we support non-trivial + // re-materialization. + if (isLoad) + LI.weight *= 0.9F; + else + LI.weight *= 0.5F; + } + + // Slightly prefer live interval that has been assigned a preferred reg. + std::pair<unsigned, unsigned> Hint = mri_->getRegAllocationHint(LI.reg); + if (Hint.first || Hint.second) + LI.weight *= 1.01F; + + // Divide the weight of the interval by its size. This encourages + // spilling of intervals that are large and have few uses, and + // discourages spilling of small intervals with many uses. + LI.weight /= li_->getApproximateInstructionCount(LI) * InstrSlots::NUM; + } + } } @@ -2703,11 +2657,12 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { tri_ = tm_->getRegisterInfo(); tii_ = tm_->getInstrInfo(); li_ = &getAnalysis<LiveIntervals>(); + AA = &getAnalysis<AliasAnalysis>(); loopInfo = &getAnalysis<MachineLoopInfo>(); - DOUT << "********** SIMPLE REGISTER COALESCING **********\n" - << "********** Function: " - << ((Value*)mf_->getFunction())->getName() << '\n'; + DEBUG(errs() << "********** SIMPLE REGISTER COALESCING **********\n" + << "********** Function: " + << ((Value*)mf_->getFunction())->getName() << '\n'); allocatableRegs_ = tri_->getAllocatableSet(fn); for (TargetRegisterInfo::regclass_iterator I = tri_->regclass_begin(), @@ -2719,10 +2674,10 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { if (EnableJoining) { joinIntervals(); DEBUG({ - DOUT << "********** INTERVALS POST JOINING **********\n"; + errs() << "********** INTERVALS POST JOINING **********\n"; for (LiveIntervals::iterator I = li_->begin(), E = li_->end(); I != E; ++I){ - I->second->print(DOUT, tri_); - DOUT << "\n"; + I->second->print(errs(), tri_); + errs() << "\n"; } }); } @@ -2733,29 +2688,40 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end(); mbbi != mbbe; ++mbbi) { MachineBasicBlock* mbb = mbbi; - unsigned loopDepth = loopInfo->getLoopDepth(mbb); - for (MachineBasicBlock::iterator mii = mbb->begin(), mie = mbb->end(); mii != mie; ) { MachineInstr *MI = mii; unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; if (JoinedCopies.count(MI)) { // Delete all coalesced copies. + bool DoDelete = true; if (!tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) { assert((MI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG || MI->getOpcode() == TargetInstrInfo::INSERT_SUBREG || MI->getOpcode() == TargetInstrInfo::SUBREG_TO_REG) && "Unrecognized copy instruction"); DstReg = MI->getOperand(0).getReg(); + if (TargetRegisterInfo::isPhysicalRegister(DstReg)) + // Do not delete extract_subreg, insert_subreg of physical + // registers unless the definition is dead. e.g. + // %DO<def> = INSERT_SUBREG %D0<undef>, %S0<kill>, 1 + // or else the scavenger may complain. LowerSubregs will + // change this to an IMPLICIT_DEF later. + DoDelete = false; } if (MI->registerDefIsDead(DstReg)) { LiveInterval &li = li_->getInterval(DstReg); if (!ShortenDeadCopySrcLiveRange(li, MI)) ShortenDeadCopyLiveRange(li, MI); + DoDelete = true; + } + if (!DoDelete) + mii = next(mii); + else { + li_->RemoveMachineInstrFromMaps(MI); + mii = mbbi->erase(mii); + ++numPeep; } - li_->RemoveMachineInstrFromMaps(MI); - mii = mbbi->erase(mii); - ++numPeep; continue; } @@ -2807,70 +2773,20 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { li_->RemoveMachineInstrFromMaps(MI); mii = mbbi->erase(mii); ++numPeep; - } else if (!isMove || !TurnCopyIntoImpDef(mii, mbb, DstReg, SrcReg)) { - SmallSet<unsigned, 4> UniqueUses; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &mop = MI->getOperand(i); - if (mop.isReg() && mop.getReg() && - TargetRegisterInfo::isVirtualRegister(mop.getReg())) { - unsigned reg = mop.getReg(); - // Multiple uses of reg by the same instruction. It should not - // contribute to spill weight again. - if (UniqueUses.count(reg) != 0) - continue; - LiveInterval &RegInt = li_->getInterval(reg); - RegInt.weight += - li_->getSpillWeight(mop.isDef(), mop.isUse(), loopDepth); - UniqueUses.insert(reg); - } - } + } else { ++mii; } } } - for (LiveIntervals::iterator I = li_->begin(), E = li_->end(); I != E; ++I) { - LiveInterval &LI = *I->second; - if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { - // If the live interval length is essentially zero, i.e. in every live - // range the use follows def immediately, it doesn't make sense to spill - // it and hope it will be easier to allocate for this li. - if (isZeroLengthInterval(&LI)) - LI.weight = HUGE_VALF; - else { - bool isLoad = false; - SmallVector<LiveInterval*, 4> SpillIs; - if (li_->isReMaterializable(LI, SpillIs, isLoad)) { - // If all of the definitions of the interval are re-materializable, - // it is a preferred candidate for spilling. If non of the defs are - // loads, then it's potentially very cheap to re-materialize. - // FIXME: this gets much more complicated once we support non-trivial - // re-materialization. - if (isLoad) - LI.weight *= 0.9F; - else - LI.weight *= 0.5F; - } - } - - // Slightly prefer live interval that has been assigned a preferred reg. - std::pair<unsigned, unsigned> Hint = mri_->getRegAllocationHint(LI.reg); - if (Hint.first || Hint.second) - LI.weight *= 1.01F; - - // Divide the weight of the interval by its size. This encourages - // spilling of intervals that are large and have few uses, and - // discourages spilling of small intervals with many uses. - LI.weight /= li_->getApproximateInstructionCount(LI) * InstrSlots::NUM; - } - } + CalculateSpillWeights(); DEBUG(dump()); return true; } /// print - Implement the dump method. -void SimpleRegisterCoalescing::print(std::ostream &O, const Module* m) const { +void SimpleRegisterCoalescing::print(raw_ostream &O, const Module* m) const { li_->print(O, m); } diff --git a/lib/CodeGen/SimpleRegisterCoalescing.h b/lib/CodeGen/SimpleRegisterCoalescing.h index d2c5581..3ebe3a1 100644 --- a/lib/CodeGen/SimpleRegisterCoalescing.h +++ b/lib/CodeGen/SimpleRegisterCoalescing.h @@ -18,7 +18,6 @@ #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/RegisterCoalescer.h" #include "llvm/ADT/BitVector.h" -#include <queue> namespace llvm { class SimpleRegisterCoalescing; @@ -33,44 +32,8 @@ namespace llvm { struct CopyRec { MachineInstr *MI; unsigned LoopDepth; - bool isBackEdge; - CopyRec(MachineInstr *mi, unsigned depth, bool be) - : MI(mi), LoopDepth(depth), isBackEdge(be) {}; - }; - - template<class SF> class JoinPriorityQueue; - - /// CopyRecSort - Sorting function for coalescer queue. - /// - struct CopyRecSort : public std::binary_function<CopyRec,CopyRec,bool> { - JoinPriorityQueue<CopyRecSort> *JPQ; - explicit CopyRecSort(JoinPriorityQueue<CopyRecSort> *jpq) : JPQ(jpq) {} - CopyRecSort(const CopyRecSort &RHS) : JPQ(RHS.JPQ) {} - bool operator()(CopyRec left, CopyRec right) const; - }; - - /// JoinQueue - A priority queue of copy instructions the coalescer is - /// going to process. - template<class SF> - class JoinPriorityQueue { - SimpleRegisterCoalescing *Rc; - std::priority_queue<CopyRec, std::vector<CopyRec>, SF> Queue; - - public: - explicit JoinPriorityQueue(SimpleRegisterCoalescing *rc) - : Rc(rc), Queue(SF(this)) {} - - bool empty() const { return Queue.empty(); } - void push(CopyRec R) { Queue.push(R); } - CopyRec pop() { - if (empty()) return CopyRec(0, 0, false); - CopyRec R = Queue.top(); - Queue.pop(); - return R; - } - - // Callbacks to SimpleRegisterCoalescing. - unsigned getRepIntervalSize(unsigned Reg); + CopyRec(MachineInstr *mi, unsigned depth) + : MI(mi), LoopDepth(depth) {}; }; class SimpleRegisterCoalescing : public MachineFunctionPass, @@ -82,14 +45,11 @@ namespace llvm { const TargetInstrInfo* tii_; LiveIntervals *li_; const MachineLoopInfo* loopInfo; + AliasAnalysis *AA; BitVector allocatableRegs_; DenseMap<const TargetRegisterClass*, BitVector> allocatableRCRegs_; - /// JoinQueue - A priority queue of copy instructions the coalescer is - /// going to process. - JoinPriorityQueue<CopyRecSort> *JoinQueue; - /// JoinedCopies - Keep track of copies eliminated due to coalescing. /// SmallPtrSet<MachineInstr*, 32> JoinedCopies; @@ -127,20 +87,8 @@ namespace llvm { return false; }; - /// getRepIntervalSize - Called from join priority queue sorting function. - /// It returns the size of the interval that represent the given register. - unsigned getRepIntervalSize(unsigned Reg) { - if (!li_->hasInterval(Reg)) - return 0; - return li_->getApproximateInstructionCount(li_->getInterval(Reg)) * - LiveInterval::InstrSlots::NUM; - } - /// print - Implement the dump method. - virtual void print(std::ostream &O, const Module* = 0) const; - void print(std::ostream *O, const Module* M = 0) const { - if (O) print(*O, M); - } + virtual void print(raw_ostream &O, const Module* = 0) const; private: /// joinIntervals - join compatible live intervals @@ -176,7 +124,6 @@ namespace llvm { /// classes. The registers may be either phys or virt regs. bool differingRegisterClasses(unsigned RegA, unsigned RegB) const; - /// AdjustCopiesBackFrom - We found a non-trivially-coalescable copy. If /// the source value number is defined by a copy from the destination reg /// see if we can merge these two destination reg valno# into a single @@ -199,20 +146,14 @@ namespace llvm { /// TrimLiveIntervalToLastUse - If there is a last use in the same basic /// block as the copy instruction, trim the ive interval to the last use /// and return true. - bool TrimLiveIntervalToLastUse(unsigned CopyIdx, + bool TrimLiveIntervalToLastUse(LiveIndex CopyIdx, MachineBasicBlock *CopyMBB, LiveInterval &li, const LiveRange *LR); /// ReMaterializeTrivialDef - If the source of a copy is defined by a trivial /// computation, replace the copy by rematerialize the definition. bool ReMaterializeTrivialDef(LiveInterval &SrcInt, unsigned DstReg, - MachineInstr *CopyMI); - - /// TurnCopyIntoImpDef - If source of the specified copy is an implicit def, - /// turn the copy into an implicit def. - bool TurnCopyIntoImpDef(MachineBasicBlock::iterator &I, - MachineBasicBlock *MBB, - unsigned DstReg, unsigned SrcReg); + unsigned DstSubIdx, MachineInstr *CopyMI); /// CanCoalesceWithImpDef - Returns true if the specified copy instruction /// from an implicit def to another register can be coalesced away. @@ -266,10 +207,6 @@ namespace llvm { bool RangeIsDefinedByCopyFromReg(LiveInterval &li, LiveRange *LR, unsigned Reg); - /// isBackEdgeCopy - Return true if CopyMI is a back edge copy. - /// - bool isBackEdgeCopy(MachineInstr *CopyMI, unsigned DstReg) const; - /// UpdateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and /// update the subregister number if it is not zero. If DstReg is a /// physical register and the existing subregister number of the def / use @@ -277,10 +214,6 @@ namespace llvm { /// subregister. void UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx); - /// RemoveDeadImpDef - Remove implicit_def instructions which are - /// "re-defining" registers due to insert_subreg coalescing. e.g. - void RemoveDeadImpDef(unsigned Reg, LiveInterval &LI); - /// RemoveUnnecessaryKills - Remove kill markers that are no longer accurate /// due to live range lengthening as the result of coalescing. void RemoveUnnecessaryKills(unsigned Reg, LiveInterval &LI); @@ -302,8 +235,13 @@ namespace llvm { /// lastRegisterUse - Returns the last use of the specific register between /// cycles Start and End or NULL if there are no uses. - MachineOperand *lastRegisterUse(unsigned Start, unsigned End, unsigned Reg, - unsigned &LastUseIdx) const; + MachineOperand *lastRegisterUse(LiveIndex Start, + LiveIndex End, unsigned Reg, + LiveIndex &LastUseIdx) const; + + /// CalculateSpillWeights - Compute spill weights for all virtual register + /// live intervals. + void CalculateSpillWeights(); void printRegName(unsigned reg) const; }; diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp new file mode 100644 index 0000000..e987fa2 --- /dev/null +++ b/lib/CodeGen/SjLjEHPrepare.cpp @@ -0,0 +1,520 @@ +//===- SjLjEHPass.cpp - Eliminate Invoke & Unwind instructions -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This transformation is designed for use by code generators which use SjLj +// based exception handling. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "sjljehprepare" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Instructions.h" +#include "llvm/Intrinsics.h" +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetLowering.h" +using namespace llvm; + +STATISTIC(NumInvokes, "Number of invokes replaced"); +STATISTIC(NumUnwinds, "Number of unwinds replaced"); +STATISTIC(NumSpilled, "Number of registers live across unwind edges"); + +namespace { + class VISIBILITY_HIDDEN SjLjEHPass : public FunctionPass { + + const TargetLowering *TLI; + + const Type *FunctionContextTy; + Constant *RegisterFn; + Constant *UnregisterFn; + Constant *ResumeFn; + Constant *BuiltinSetjmpFn; + Constant *FrameAddrFn; + Constant *LSDAAddrFn; + Value *PersonalityFn; + Constant *SelectorFn; + Constant *ExceptionFn; + + Value *CallSite; + public: + static char ID; // Pass identification, replacement for typeid + explicit SjLjEHPass(const TargetLowering *tli = NULL) + : FunctionPass(&ID), TLI(tli) { } + bool doInitialization(Module &M); + bool runOnFunction(Function &F); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { } + const char *getPassName() const { + return "SJLJ Exception Handling preparation"; + } + + private: + void markInvokeCallSite(InvokeInst *II, unsigned InvokeNo, + Value *CallSite, + SwitchInst *CatchSwitch); + void splitLiveRangesLiveAcrossInvokes(SmallVector<InvokeInst*,16> &Invokes); + bool insertSjLjEHSupport(Function &F); + }; +} // end anonymous namespace + +char SjLjEHPass::ID = 0; + +// Public Interface To the SjLjEHPass pass. +FunctionPass *llvm::createSjLjEHPass(const TargetLowering *TLI) { + return new SjLjEHPass(TLI); +} +// doInitialization - Set up decalarations and types needed to process +// exceptions. +bool SjLjEHPass::doInitialization(Module &M) { + // Build the function context structure. + // builtin_setjmp uses a five word jbuf + const Type *VoidPtrTy = + Type::getInt8PtrTy(M.getContext()); + const Type *Int32Ty = Type::getInt32Ty(M.getContext()); + FunctionContextTy = + StructType::get(M.getContext(), + VoidPtrTy, // __prev + Int32Ty, // call_site + ArrayType::get(Int32Ty, 4), // __data + VoidPtrTy, // __personality + VoidPtrTy, // __lsda + ArrayType::get(VoidPtrTy, 5), // __jbuf + NULL); + RegisterFn = M.getOrInsertFunction("_Unwind_SjLj_Register", + Type::getVoidTy(M.getContext()), + PointerType::getUnqual(FunctionContextTy), + (Type *)0); + UnregisterFn = + M.getOrInsertFunction("_Unwind_SjLj_Unregister", + Type::getVoidTy(M.getContext()), + PointerType::getUnqual(FunctionContextTy), + (Type *)0); + ResumeFn = + M.getOrInsertFunction("_Unwind_SjLj_Resume", + Type::getVoidTy(M.getContext()), + VoidPtrTy, + (Type *)0); + FrameAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::frameaddress); + BuiltinSetjmpFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_setjmp); + LSDAAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_lsda); + SelectorFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_selector); + ExceptionFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_exception); + PersonalityFn = 0; + + return true; +} + +/// markInvokeCallSite - Insert code to mark the call_site for this invoke +void SjLjEHPass::markInvokeCallSite(InvokeInst *II, unsigned InvokeNo, + Value *CallSite, + SwitchInst *CatchSwitch) { + ConstantInt *CallSiteNoC= ConstantInt::get(Type::getInt32Ty(II->getContext()), + InvokeNo); + // The runtime comes back to the dispatcher with the call_site - 1 in + // the context. Odd, but there it is. + ConstantInt *SwitchValC = ConstantInt::get(Type::getInt32Ty(II->getContext()), + InvokeNo - 1); + + // If the unwind edge has phi nodes, split the edge. + if (isa<PHINode>(II->getUnwindDest()->begin())) { + SplitCriticalEdge(II, 1, this); + + // If there are any phi nodes left, they must have a single predecessor. + while (PHINode *PN = dyn_cast<PHINode>(II->getUnwindDest()->begin())) { + PN->replaceAllUsesWith(PN->getIncomingValue(0)); + PN->eraseFromParent(); + } + } + + // Insert a store of the invoke num before the invoke and store zero into the + // location afterward. + new StoreInst(CallSiteNoC, CallSite, true, II); // volatile + + // Add a switch case to our unwind block. + CatchSwitch->addCase(SwitchValC, II->getUnwindDest()); + // We still want this to look like an invoke so we emit the LSDA properly + // FIXME: ??? Or will this cause strangeness with mis-matched IDs like + // when it was in the front end? +} + +/// MarkBlocksLiveIn - Insert BB and all of its predescessors into LiveBBs until +/// we reach blocks we've already seen. +static void MarkBlocksLiveIn(BasicBlock *BB, std::set<BasicBlock*> &LiveBBs) { + if (!LiveBBs.insert(BB).second) return; // already been here. + + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) + MarkBlocksLiveIn(*PI, LiveBBs); +} + +/// splitLiveRangesAcrossInvokes - Each value that is live across an unwind edge +/// we spill into a stack location, guaranteeing that there is nothing live +/// across the unwind edge. This process also splits all critical edges +/// coming out of invoke's. +void SjLjEHPass:: +splitLiveRangesLiveAcrossInvokes(SmallVector<InvokeInst*,16> &Invokes) { + // First step, split all critical edges from invoke instructions. + for (unsigned i = 0, e = Invokes.size(); i != e; ++i) { + InvokeInst *II = Invokes[i]; + SplitCriticalEdge(II, 0, this); + SplitCriticalEdge(II, 1, this); + assert(!isa<PHINode>(II->getNormalDest()) && + !isa<PHINode>(II->getUnwindDest()) && + "critical edge splitting left single entry phi nodes?"); + } + + Function *F = Invokes.back()->getParent()->getParent(); + + // To avoid having to handle incoming arguments specially, we lower each arg + // to a copy instruction in the entry block. This ensures that the argument + // value itself cannot be live across the entry block. + BasicBlock::iterator AfterAllocaInsertPt = F->begin()->begin(); + while (isa<AllocaInst>(AfterAllocaInsertPt) && + isa<ConstantInt>(cast<AllocaInst>(AfterAllocaInsertPt)->getArraySize())) + ++AfterAllocaInsertPt; + for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); + AI != E; ++AI) { + // This is always a no-op cast because we're casting AI to AI->getType() so + // src and destination types are identical. BitCast is the only possibility. + CastInst *NC = new BitCastInst( + AI, AI->getType(), AI->getName()+".tmp", AfterAllocaInsertPt); + AI->replaceAllUsesWith(NC); + // Normally its is forbidden to replace a CastInst's operand because it + // could cause the opcode to reflect an illegal conversion. However, we're + // replacing it here with the same value it was constructed with to simply + // make NC its user. + NC->setOperand(0, AI); + } + + // Finally, scan the code looking for instructions with bad live ranges. + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) + for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) { + // Ignore obvious cases we don't have to handle. In particular, most + // instructions either have no uses or only have a single use inside the + // current block. Ignore them quickly. + Instruction *Inst = II; + if (Inst->use_empty()) continue; + if (Inst->hasOneUse() && + cast<Instruction>(Inst->use_back())->getParent() == BB && + !isa<PHINode>(Inst->use_back())) continue; + + // If this is an alloca in the entry block, it's not a real register + // value. + if (AllocaInst *AI = dyn_cast<AllocaInst>(Inst)) + if (isa<ConstantInt>(AI->getArraySize()) && BB == F->begin()) + continue; + + // Avoid iterator invalidation by copying users to a temporary vector. + SmallVector<Instruction*,16> Users; + for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end(); + UI != E; ++UI) { + Instruction *User = cast<Instruction>(*UI); + if (User->getParent() != BB || isa<PHINode>(User)) + Users.push_back(User); + } + + // Find all of the blocks that this value is live in. + std::set<BasicBlock*> LiveBBs; + LiveBBs.insert(Inst->getParent()); + while (!Users.empty()) { + Instruction *U = Users.back(); + Users.pop_back(); + + if (!isa<PHINode>(U)) { + MarkBlocksLiveIn(U->getParent(), LiveBBs); + } else { + // Uses for a PHI node occur in their predecessor block. + PHINode *PN = cast<PHINode>(U); + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (PN->getIncomingValue(i) == Inst) + MarkBlocksLiveIn(PN->getIncomingBlock(i), LiveBBs); + } + } + + // Now that we know all of the blocks that this thing is live in, see if + // it includes any of the unwind locations. + bool NeedsSpill = false; + for (unsigned i = 0, e = Invokes.size(); i != e; ++i) { + BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest(); + if (UnwindBlock != BB && LiveBBs.count(UnwindBlock)) { + NeedsSpill = true; + } + } + + // If we decided we need a spill, do it. + if (NeedsSpill) { + ++NumSpilled; + DemoteRegToStack(*Inst, true); + } + } +} + +bool SjLjEHPass::insertSjLjEHSupport(Function &F) { + SmallVector<ReturnInst*,16> Returns; + SmallVector<UnwindInst*,16> Unwinds; + SmallVector<InvokeInst*,16> Invokes; + + // Look through the terminators of the basic blocks to find invokes, returns + // and unwinds + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) + if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) { + // Remember all return instructions in case we insert an invoke into this + // function. + Returns.push_back(RI); + } else if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) { + Invokes.push_back(II); + } else if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) { + Unwinds.push_back(UI); + } + // If we don't have any invokes or unwinds, there's nothing to do. + if (Unwinds.empty() && Invokes.empty()) return false; + + // Find the eh.selector.* and eh.exception calls. We'll use the first + // eh.selector to determine the right personality function to use. For + // SJLJ, we always use the same personality for the whole function, + // not on a per-selector basis. + // FIXME: That's a bit ugly. Better way? + SmallVector<CallInst*,16> EH_Selectors; + SmallVector<CallInst*,16> EH_Exceptions; + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + if (CallInst *CI = dyn_cast<CallInst>(I)) { + if (CI->getCalledFunction() == SelectorFn) { + if (!PersonalityFn) PersonalityFn = CI->getOperand(2); + EH_Selectors.push_back(CI); + } else if (CI->getCalledFunction() == ExceptionFn) { + EH_Exceptions.push_back(CI); + } + } + } + } + // If we don't have any eh.selector calls, we can't determine the personality + // function. Without a personality function, we can't process exceptions. + if (!PersonalityFn) return false; + + NumInvokes += Invokes.size(); + NumUnwinds += Unwinds.size(); + + if (!Invokes.empty()) { + // We have invokes, so we need to add register/unregister calls to get + // this function onto the global unwind stack. + // + // First thing we need to do is scan the whole function for values that are + // live across unwind edges. Each value that is live across an unwind edge + // we spill into a stack location, guaranteeing that there is nothing live + // across the unwind edge. This process also splits all critical edges + // coming out of invoke's. + splitLiveRangesLiveAcrossInvokes(Invokes); + + BasicBlock *EntryBB = F.begin(); + // Create an alloca for the incoming jump buffer ptr and the new jump buffer + // that needs to be restored on all exits from the function. This is an + // alloca because the value needs to be added to the global context list. + unsigned Align = 4; // FIXME: Should be a TLI check? + AllocaInst *FunctionContext = + new AllocaInst(FunctionContextTy, 0, Align, + "fcn_context", F.begin()->begin()); + + Value *Idxs[2]; + const Type *Int32Ty = Type::getInt32Ty(F.getContext()); + Value *Zero = ConstantInt::get(Int32Ty, 0); + // We need to also keep around a reference to the call_site field + Idxs[0] = Zero; + Idxs[1] = ConstantInt::get(Int32Ty, 1); + CallSite = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2, + "call_site", + EntryBB->getTerminator()); + + // The exception selector comes back in context->data[1] + Idxs[1] = ConstantInt::get(Int32Ty, 2); + Value *FCData = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2, + "fc_data", + EntryBB->getTerminator()); + Idxs[1] = ConstantInt::get(Int32Ty, 1); + Value *SelectorAddr = GetElementPtrInst::Create(FCData, Idxs, Idxs+2, + "exc_selector_gep", + EntryBB->getTerminator()); + // The exception value comes back in context->data[0] + Idxs[1] = Zero; + Value *ExceptionAddr = GetElementPtrInst::Create(FCData, Idxs, Idxs+2, + "exception_gep", + EntryBB->getTerminator()); + + // The result of the eh.selector call will be replaced with a + // a reference to the selector value returned in the function + // context. We leave the selector itself so the EH analysis later + // can use it. + for (int i = 0, e = EH_Selectors.size(); i < e; ++i) { + CallInst *I = EH_Selectors[i]; + Value *SelectorVal = new LoadInst(SelectorAddr, "select_val", true, I); + I->replaceAllUsesWith(SelectorVal); + } + // eh.exception calls are replaced with references to the proper + // location in the context. Unlike eh.selector, the eh.exception + // calls are removed entirely. + for (int i = 0, e = EH_Exceptions.size(); i < e; ++i) { + CallInst *I = EH_Exceptions[i]; + // Possible for there to be duplicates, so check to make sure + // the instruction hasn't already been removed. + if (!I->getParent()) continue; + Value *Val = new LoadInst(ExceptionAddr, "exception", true, I); + const Type *Ty = Type::getInt8PtrTy(F.getContext()); + Val = CastInst::Create(Instruction::IntToPtr, Val, Ty, "", I); + + I->replaceAllUsesWith(Val); + I->eraseFromParent(); + } + + + + + // The entry block changes to have the eh.sjlj.setjmp, with a conditional + // branch to a dispatch block for non-zero returns. If we return normally, + // we're not handling an exception and just register the function context + // and continue. + + // Create the dispatch block. The dispatch block is basically a big switch + // statement that goes to all of the invoke landing pads. + BasicBlock *DispatchBlock = + BasicBlock::Create(F.getContext(), "eh.sjlj.setjmp.catch", &F); + + // Insert a load in the Catch block, and a switch on its value. By default, + // we go to a block that just does an unwind (which is the correct action + // for a standard call). + BasicBlock *UnwindBlock = BasicBlock::Create(F.getContext(), "unwindbb", &F); + Unwinds.push_back(new UnwindInst(F.getContext(), UnwindBlock)); + + Value *DispatchLoad = new LoadInst(CallSite, "invoke.num", true, + DispatchBlock); + SwitchInst *DispatchSwitch = + SwitchInst::Create(DispatchLoad, UnwindBlock, Invokes.size(), DispatchBlock); + // Split the entry block to insert the conditional branch for the setjmp. + BasicBlock *ContBlock = EntryBB->splitBasicBlock(EntryBB->getTerminator(), + "eh.sjlj.setjmp.cont"); + + // Populate the Function Context + // 1. LSDA address + // 2. Personality function address + // 3. jmpbuf (save FP and call eh.sjlj.setjmp) + + // LSDA address + Idxs[0] = Zero; + Idxs[1] = ConstantInt::get(Int32Ty, 4); + Value *LSDAFieldPtr = + GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2, + "lsda_gep", + EntryBB->getTerminator()); + Value *LSDA = CallInst::Create(LSDAAddrFn, "lsda_addr", + EntryBB->getTerminator()); + new StoreInst(LSDA, LSDAFieldPtr, true, EntryBB->getTerminator()); + + Idxs[1] = ConstantInt::get(Int32Ty, 3); + Value *PersonalityFieldPtr = + GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2, + "lsda_gep", + EntryBB->getTerminator()); + new StoreInst(PersonalityFn, PersonalityFieldPtr, true, + EntryBB->getTerminator()); + + // Save the frame pointer. + Idxs[1] = ConstantInt::get(Int32Ty, 5); + Value *FieldPtr + = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2, + "jbuf_gep", + EntryBB->getTerminator()); + Idxs[1] = ConstantInt::get(Int32Ty, 0); + Value *ElemPtr = + GetElementPtrInst::Create(FieldPtr, Idxs, Idxs+2, "jbuf_fp_gep", + EntryBB->getTerminator()); + + Value *Val = CallInst::Create(FrameAddrFn, + ConstantInt::get(Int32Ty, 0), + "fp", + EntryBB->getTerminator()); + new StoreInst(Val, ElemPtr, true, EntryBB->getTerminator()); + // Call the setjmp instrinsic. It fills in the rest of the jmpbuf + Value *SetjmpArg = + CastInst::Create(Instruction::BitCast, FieldPtr, + Type::getInt8PtrTy(F.getContext()), "", + EntryBB->getTerminator()); + Value *DispatchVal = CallInst::Create(BuiltinSetjmpFn, SetjmpArg, + "dispatch", + EntryBB->getTerminator()); + // check the return value of the setjmp. non-zero goes to dispatcher + Value *IsNormal = new ICmpInst(EntryBB->getTerminator(), + ICmpInst::ICMP_EQ, DispatchVal, Zero, + "notunwind"); + // Nuke the uncond branch. + EntryBB->getTerminator()->eraseFromParent(); + + // Put in a new condbranch in its place. + BranchInst::Create(ContBlock, DispatchBlock, IsNormal, EntryBB); + + // Register the function context and make sure it's known to not throw + CallInst *Register = + CallInst::Create(RegisterFn, FunctionContext, "", + ContBlock->getTerminator()); + Register->setDoesNotThrow(); + + // At this point, we are all set up, update the invoke instructions + // to mark their call_site values, and fill in the dispatch switch + // accordingly. + for (unsigned i = 0, e = Invokes.size(); i != e; ++i) + markInvokeCallSite(Invokes[i], i+1, CallSite, DispatchSwitch); + + // The front end has likely added calls to _Unwind_Resume. We need + // to find those calls and mark the call_site as -1 immediately prior. + // resume is a noreturn function, so any block that has a call to it + // should end in an 'unreachable' instruction with the call immediately + // prior. That's how we'll search. + // ??? There's got to be a better way. this is fugly. + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) + if ((dyn_cast<UnreachableInst>(BB->getTerminator()))) { + BasicBlock::iterator I = BB->getTerminator(); + // Check the previous instruction and see if it's a resume call + if (I == BB->begin()) continue; + if (CallInst *CI = dyn_cast<CallInst>(--I)) { + if (CI->getCalledFunction() == ResumeFn) { + Value *NegativeOne = Constant::getAllOnesValue(Int32Ty); + new StoreInst(NegativeOne, CallSite, true, I); // volatile + } + } + } + + // Replace all unwinds with a branch to the unwind handler. + // ??? Should this ever happen with sjlj exceptions? + for (unsigned i = 0, e = Unwinds.size(); i != e; ++i) { + BranchInst::Create(UnwindBlock, Unwinds[i]); + Unwinds[i]->eraseFromParent(); + } + + // Finally, for any returns from this function, if this function contains an + // invoke, add a call to unregister the function context. + for (unsigned i = 0, e = Returns.size(); i != e; ++i) + CallInst::Create(UnregisterFn, FunctionContext, "", Returns[i]); + } + + return true; +} + +bool SjLjEHPass::runOnFunction(Function &F) { + bool Res = insertSjLjEHSupport(F); + return Res; +} diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp index 405cd80..0277d64 100644 --- a/lib/CodeGen/Spiller.cpp +++ b/lib/CodeGen/Spiller.cpp @@ -13,12 +13,13 @@ #include "VirtRegMap.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -50,13 +51,13 @@ protected: /// Ensures there is space before the given machine instruction, returns the /// instruction's new number. - unsigned makeSpaceBefore(MachineInstr *mi) { + LiveIndex makeSpaceBefore(MachineInstr *mi) { if (!lis->hasGapBeforeInstr(lis->getInstructionIndex(mi))) { lis->scaleNumbering(2); ls->scaleNumbering(2); } - unsigned miIdx = lis->getInstructionIndex(mi); + LiveIndex miIdx = lis->getInstructionIndex(mi); assert(lis->hasGapBeforeInstr(miIdx)); @@ -65,13 +66,13 @@ protected: /// Ensure there is space after the given machine instruction, returns the /// instruction's new number. - unsigned makeSpaceAfter(MachineInstr *mi) { + LiveIndex makeSpaceAfter(MachineInstr *mi) { if (!lis->hasGapAfterInstr(lis->getInstructionIndex(mi))) { lis->scaleNumbering(2); ls->scaleNumbering(2); } - unsigned miIdx = lis->getInstructionIndex(mi); + LiveIndex miIdx = lis->getInstructionIndex(mi); assert(lis->hasGapAfterInstr(miIdx)); @@ -82,19 +83,19 @@ protected: /// after the given instruction. Returns the base index of the inserted /// instruction. The caller is responsible for adding an appropriate /// LiveInterval to the LiveIntervals analysis. - unsigned insertStoreAfter(MachineInstr *mi, unsigned ss, - unsigned vreg, - const TargetRegisterClass *trc) { + LiveIndex insertStoreAfter(MachineInstr *mi, unsigned ss, + unsigned vreg, + const TargetRegisterClass *trc) { MachineBasicBlock::iterator nextInstItr(next(mi)); - unsigned miIdx = makeSpaceAfter(mi); + LiveIndex miIdx = makeSpaceAfter(mi); tii->storeRegToStackSlot(*mi->getParent(), nextInstItr, vreg, true, ss, trc); MachineBasicBlock::iterator storeInstItr(next(mi)); MachineInstr *storeInst = &*storeInstItr; - unsigned storeInstIdx = miIdx + LiveInterval::InstrSlots::NUM; + LiveIndex storeInstIdx = lis->getNextIndex(miIdx); assert(lis->getInstructionFromIndex(storeInstIdx) == 0 && "Store inst index already in use."); @@ -107,15 +108,15 @@ protected: /// Insert a store of the given vreg to the given stack slot immediately /// before the given instructnion. Returns the base index of the inserted /// Instruction. - unsigned insertStoreBefore(MachineInstr *mi, unsigned ss, - unsigned vreg, - const TargetRegisterClass *trc) { - unsigned miIdx = makeSpaceBefore(mi); + LiveIndex insertStoreBefore(MachineInstr *mi, unsigned ss, + unsigned vreg, + const TargetRegisterClass *trc) { + LiveIndex miIdx = makeSpaceBefore(mi); tii->storeRegToStackSlot(*mi->getParent(), mi, vreg, true, ss, trc); MachineBasicBlock::iterator storeInstItr(prior(mi)); MachineInstr *storeInst = &*storeInstItr; - unsigned storeInstIdx = miIdx - LiveInterval::InstrSlots::NUM; + LiveIndex storeInstIdx = lis->getPrevIndex(miIdx); assert(lis->getInstructionFromIndex(storeInstIdx) == 0 && "Store inst index already in use."); @@ -130,14 +131,15 @@ protected: unsigned vreg, const TargetRegisterClass *trc) { - unsigned storeInstIdx = insertStoreAfter(mi, ss, vreg, trc); - unsigned start = lis->getDefIndex(lis->getInstructionIndex(mi)), - end = lis->getUseIndex(storeInstIdx); + LiveIndex storeInstIdx = insertStoreAfter(mi, ss, vreg, trc); + LiveIndex start = lis->getDefIndex(lis->getInstructionIndex(mi)), + end = lis->getUseIndex(storeInstIdx); VNInfo *vni = li->getNextValue(storeInstIdx, 0, true, lis->getVNInfoAllocator()); - vni->kills.push_back(storeInstIdx); - DOUT << " Inserting store range: [" << start << ", " << end << ")\n"; + vni->addKill(storeInstIdx); + DEBUG(errs() << " Inserting store range: [" << start + << ", " << end << ")\n"); LiveRange lr(start, end, vni); li->addRange(lr); @@ -147,18 +149,18 @@ protected: /// after the given instruction. Returns the base index of the inserted /// instruction. The caller is responsibel for adding/removing an appropriate /// range vreg's LiveInterval. - unsigned insertLoadAfter(MachineInstr *mi, unsigned ss, - unsigned vreg, - const TargetRegisterClass *trc) { + LiveIndex insertLoadAfter(MachineInstr *mi, unsigned ss, + unsigned vreg, + const TargetRegisterClass *trc) { MachineBasicBlock::iterator nextInstItr(next(mi)); - unsigned miIdx = makeSpaceAfter(mi); + LiveIndex miIdx = makeSpaceAfter(mi); tii->loadRegFromStackSlot(*mi->getParent(), nextInstItr, vreg, ss, trc); MachineBasicBlock::iterator loadInstItr(next(mi)); MachineInstr *loadInst = &*loadInstItr; - unsigned loadInstIdx = miIdx + LiveInterval::InstrSlots::NUM; + LiveIndex loadInstIdx = lis->getNextIndex(miIdx); assert(lis->getInstructionFromIndex(loadInstIdx) == 0 && "Store inst index already in use."); @@ -172,15 +174,15 @@ protected: /// before the given instruction. Returns the base index of the inserted /// instruction. The caller is responsible for adding an appropriate /// LiveInterval to the LiveIntervals analysis. - unsigned insertLoadBefore(MachineInstr *mi, unsigned ss, - unsigned vreg, - const TargetRegisterClass *trc) { - unsigned miIdx = makeSpaceBefore(mi); + LiveIndex insertLoadBefore(MachineInstr *mi, unsigned ss, + unsigned vreg, + const TargetRegisterClass *trc) { + LiveIndex miIdx = makeSpaceBefore(mi); tii->loadRegFromStackSlot(*mi->getParent(), mi, vreg, ss, trc); MachineBasicBlock::iterator loadInstItr(prior(mi)); MachineInstr *loadInst = &*loadInstItr; - unsigned loadInstIdx = miIdx - LiveInterval::InstrSlots::NUM; + LiveIndex loadInstIdx = lis->getPrevIndex(miIdx); assert(lis->getInstructionFromIndex(loadInstIdx) == 0 && "Load inst index already in use."); @@ -195,14 +197,15 @@ protected: unsigned vreg, const TargetRegisterClass *trc) { - unsigned loadInstIdx = insertLoadBefore(mi, ss, vreg, trc); - unsigned start = lis->getDefIndex(loadInstIdx), - end = lis->getUseIndex(lis->getInstructionIndex(mi)); + LiveIndex loadInstIdx = insertLoadBefore(mi, ss, vreg, trc); + LiveIndex start = lis->getDefIndex(loadInstIdx), + end = lis->getUseIndex(lis->getInstructionIndex(mi)); VNInfo *vni = li->getNextValue(loadInstIdx, 0, true, lis->getVNInfoAllocator()); - vni->kills.push_back(lis->getInstructionIndex(mi)); - DOUT << " Intserting load range: [" << start << ", " << end << ")\n"; + vni->addKill(lis->getInstructionIndex(mi)); + DEBUG(errs() << " Intserting load range: [" << start + << ", " << end << ")\n"); LiveRange lr(start, end, vni); li->addRange(lr); @@ -214,7 +217,7 @@ protected: /// immediately before each use, and stores after each def. No folding is /// attempted. std::vector<LiveInterval*> trivialSpillEverywhere(LiveInterval *li) { - DOUT << "Spilling everywhere " << *li << "\n"; + DEBUG(errs() << "Spilling everywhere " << *li << "\n"); assert(li->weight != HUGE_VALF && "Attempting to spill already spilled value."); @@ -222,7 +225,7 @@ protected: assert(!li->isStackSlot() && "Trying to spill a stack slot."); - DOUT << "Trivial spill everywhere of reg" << li->reg << "\n"; + DEBUG(errs() << "Trivial spill everywhere of reg" << li->reg << "\n"); std::vector<LiveInterval*> added; @@ -234,7 +237,7 @@ protected: MachineInstr *mi = &*regItr; - DOUT << " Processing " << *mi; + DEBUG(errs() << " Processing " << *mi); do { ++regItr; @@ -318,23 +321,21 @@ public: vrm->assignVirt2StackSlot(li->reg, ss); MachineInstr *mi = 0; - unsigned storeIdx = 0; + LiveIndex storeIdx = LiveIndex(); if (valno->isDefAccurate()) { // If we have an accurate def we can just grab an iterator to the instr // after the def. mi = lis->getInstructionFromIndex(valno->def); - storeIdx = insertStoreAfter(mi, ss, li->reg, trc) + - LiveInterval::InstrSlots::DEF; + storeIdx = lis->getDefIndex(insertStoreAfter(mi, ss, li->reg, trc)); } else { // if we get here we have a PHI def. mi = &lis->getMBBFromIndex(valno->def)->front(); - storeIdx = insertStoreBefore(mi, ss, li->reg, trc) + - LiveInterval::InstrSlots::DEF; + storeIdx = lis->getDefIndex(insertStoreBefore(mi, ss, li->reg, trc)); } MachineBasicBlock *defBlock = mi->getParent(); - unsigned loadIdx = 0; + LiveIndex loadIdx = LiveIndex(); // Now we need to find the load... MachineBasicBlock::iterator useItr(mi); @@ -342,13 +343,11 @@ public: if (useItr != defBlock->end()) { MachineInstr *loadInst = useItr; - loadIdx = insertLoadBefore(loadInst, ss, li->reg, trc) + - LiveInterval::InstrSlots::USE; + loadIdx = lis->getUseIndex(insertLoadBefore(loadInst, ss, li->reg, trc)); } else { MachineInstr *loadInst = &defBlock->back(); - loadIdx = insertLoadAfter(loadInst, ss, li->reg, trc) + - LiveInterval::InstrSlots::USE; + loadIdx = lis->getUseIndex(insertLoadAfter(loadInst, ss, li->reg, trc)); } li->removeRange(storeIdx, loadIdx, true); diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp index c179f1e..350bc6e 100644 --- a/lib/CodeGen/StackProtector.cpp +++ b/lib/CodeGen/StackProtector.cpp @@ -148,7 +148,8 @@ bool StackProtector::InsertStackProtectors() { // StackGuard = load __stack_chk_guard // call void @llvm.stackprotect.create(StackGuard, StackGuardSlot) // - PointerType *PtrTy = PointerType::getUnqual(Type::Int8Ty); + PointerType *PtrTy = PointerType::getUnqual( + Type::getInt8Ty(RI->getContext())); StackGuardVar = M->getOrInsertGlobal("__stack_chk_guard", PtrTy); BasicBlock &Entry = F->getEntryBlock(); @@ -201,7 +202,7 @@ bool StackProtector::InsertStackProtectors() { // Generate the stack protector instructions in the old basic block. LoadInst *LI1 = new LoadInst(StackGuardVar, "", false, BB); LoadInst *LI2 = new LoadInst(AI, "", true, BB); - ICmpInst *Cmp = new ICmpInst(CmpInst::ICMP_EQ, LI1, LI2, "", BB); + ICmpInst *Cmp = new ICmpInst(*BB, CmpInst::ICMP_EQ, LI1, LI2, ""); BranchInst::Create(NewBB, FailBB, Cmp, BB); } @@ -215,10 +216,12 @@ bool StackProtector::InsertStackProtectors() { /// CreateFailBB - Create a basic block to jump to when the stack protector /// check fails. BasicBlock *StackProtector::CreateFailBB() { - BasicBlock *FailBB = BasicBlock::Create("CallStackCheckFailBlk", F); + BasicBlock *FailBB = BasicBlock::Create(F->getContext(), + "CallStackCheckFailBlk", F); Constant *StackChkFail = - M->getOrInsertFunction("__stack_chk_fail", Type::VoidTy, NULL); + M->getOrInsertFunction("__stack_chk_fail", + Type::getVoidTy(F->getContext()), NULL); CallInst::Create(StackChkFail, "", FailBB); - new UnreachableInst(FailBB); + new UnreachableInst(F->getContext(), FailBB); return FailBB; } diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp index 5824644..fad0808 100644 --- a/lib/CodeGen/StackSlotColoring.cpp +++ b/lib/CodeGen/StackSlotColoring.cpp @@ -18,6 +18,7 @@ #include "llvm/CodeGen/LiveStackAnalysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Support/CommandLine.h" @@ -97,6 +98,7 @@ namespace { MachineFunctionPass(&ID), ColorWithRegs(RegColor), NextColor(-1) {} virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); AU.addRequired<LiveStacks>(); AU.addRequired<VirtRegMap>(); AU.addPreserved<VirtRegMap>(); @@ -197,7 +199,7 @@ void StackSlotColoring::InitializeSlots() { Assignments.resize(LastFI); // Gather all spill slots into a list. - DOUT << "Spill slot intervals:\n"; + DEBUG(errs() << "Spill slot intervals:\n"); for (LiveStacks::iterator i = LS->begin(), e = LS->end(); i != e; ++i) { LiveInterval &li = i->second; DEBUG(li.dump()); @@ -209,7 +211,7 @@ void StackSlotColoring::InitializeSlots() { OrigSizes[FI] = MFI->getObjectSize(FI); AllColors.set(FI); } - DOUT << '\n'; + DEBUG(errs() << '\n'); // Sort them by weight. std::stable_sort(SSIntervals.begin(), SSIntervals.end(), IntervalSorter()); @@ -241,7 +243,7 @@ StackSlotColoring::ColorSlotsWithFreeRegs(SmallVector<int, 16> &SlotMapping, return false; bool Changed = false; - DOUT << "Assigning unused registers to spill slots:\n"; + DEBUG(errs() << "Assigning unused registers to spill slots:\n"); for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) { LiveInterval *li = SSIntervals[i]; int SS = li->getStackSlotIndex(); @@ -271,7 +273,8 @@ StackSlotColoring::ColorSlotsWithFreeRegs(SmallVector<int, 16> &SlotMapping, AllColored = false; continue; } else { - DOUT << "Assigning fi#" << RSS << " to " << TRI->getName(Reg) << '\n'; + DEBUG(errs() << "Assigning fi#" << RSS << " to " + << TRI->getName(Reg) << '\n'); ColoredRegs.push_back(Reg); SlotMapping[RSS] = Reg; SlotIsReg.set(RSS); @@ -298,7 +301,7 @@ StackSlotColoring::ColorSlotsWithFreeRegs(SmallVector<int, 16> &SlotMapping, ++NumEliminated; } } - DOUT << '\n'; + DEBUG(errs() << '\n'); return Changed; } @@ -333,7 +336,7 @@ int StackSlotColoring::ColorSlot(LiveInterval *li) { // Record the assignment. Assignments[Color].push_back(li); int FI = li->getStackSlotIndex(); - DOUT << "Assigning fi#" << FI << " to fi#" << Color << "\n"; + DEBUG(errs() << "Assigning fi#" << FI << " to fi#" << Color << "\n"); // Change size and alignment of the allocated slot. If there are multiple // objects sharing the same slot, then make sure the size and alignment @@ -357,7 +360,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) { BitVector SlotIsReg(NumObjs); BitVector UsedColors(NumObjs); - DOUT << "Color spill slot intervals:\n"; + DEBUG(errs() << "Color spill slot intervals:\n"); bool Changed = false; for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) { LiveInterval *li = SSIntervals[i]; @@ -371,7 +374,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) { Changed |= (SS != NewSS); } - DOUT << "\nSpill slots after coloring:\n"; + DEBUG(errs() << "\nSpill slots after coloring:\n"); for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) { LiveInterval *li = SSIntervals[i]; int SS = li->getStackSlotIndex(); @@ -383,7 +386,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) { #ifndef NDEBUG for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) DEBUG(SSIntervals[i]->dump()); - DOUT << '\n'; + DEBUG(errs() << '\n'); #endif // Can we "color" a stack slot with a unused register? @@ -415,7 +418,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) { // Delete unused stack slots. while (NextColor != -1) { - DOUT << "Removing unused stack object fi#" << NextColor << "\n"; + DEBUG(errs() << "Removing unused stack object fi#" << NextColor << "\n"); MFI->RemoveStackObject(NextColor); NextColor = AllColors.find_next(NextColor); } @@ -449,6 +452,7 @@ bool StackSlotColoring::AllMemRefsCanBeUnfolded(int SS) { /// to old frame index with new one. void StackSlotColoring::RewriteInstruction(MachineInstr *MI, int OldFI, int NewFI, MachineFunction &MF) { + // Update the operands. for (unsigned i = 0, ee = MI->getNumOperands(); i != ee; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isFI()) @@ -459,22 +463,15 @@ void StackSlotColoring::RewriteInstruction(MachineInstr *MI, int OldFI, MO.setIndex(NewFI); } - // Update the MachineMemOperand for the new memory location. - // FIXME: We need a better method of managing these too. - SmallVector<MachineMemOperand, 2> MMOs(MI->memoperands_begin(), - MI->memoperands_end()); - MI->clearMemOperands(MF); + // Update the memory references. This changes the MachineMemOperands + // directly. They may be in use by multiple instructions, however all + // instructions using OldFI are being rewritten to use NewFI. const Value *OldSV = PseudoSourceValue::getFixedStack(OldFI); - for (unsigned i = 0, ee = MMOs.size(); i != ee; ++i) { - if (MMOs[i].getValue() != OldSV) - MI->addMemOperand(MF, MMOs[i]); - else { - MachineMemOperand MMO(PseudoSourceValue::getFixedStack(NewFI), - MMOs[i].getFlags(), MMOs[i].getOffset(), - MMOs[i].getSize(), MMOs[i].getAlignment()); - MI->addMemOperand(MF, MMO); - } - } + const Value *NewSV = PseudoSourceValue::getFixedStack(NewFI); + for (MachineInstr::mmo_iterator I = MI->memoperands_begin(), + E = MI->memoperands_end(); I != E; ++I) + if ((*I)->getValue() == OldSV) + (*I)->setValue(NewSV); } /// PropagateBackward - Traverse backward and look for the definition of @@ -503,7 +500,16 @@ bool StackSlotColoring::PropagateBackward(MachineBasicBlock::iterator MII, if (Reg == OldReg) { if (MO.isImplicit()) return false; - const TargetRegisterClass *RC = getInstrOperandRegClass(TRI, TID, i); + + // Abort the use is actually a sub-register def. We don't have enough + // information to figure out if it is really legal. + if (MO.getSubReg() || + TID.getOpcode() == TargetInstrInfo::EXTRACT_SUBREG || + TID.getOpcode() == TargetInstrInfo::INSERT_SUBREG || + TID.getOpcode() == TargetInstrInfo::SUBREG_TO_REG) + return false; + + const TargetRegisterClass *RC = TID.OpInfo[i].getRegClass(TRI); if (RC && !RC->contains(NewReg)) return false; @@ -547,7 +553,6 @@ bool StackSlotColoring::PropagateForward(MachineBasicBlock::iterator MII, SmallVector<MachineOperand*, 4> Uses; while (++MII != MBB->end()) { - bool FoundUse = false; bool FoundKill = false; const TargetInstrDesc &TID = MII->getDesc(); for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) { @@ -561,12 +566,18 @@ bool StackSlotColoring::PropagateForward(MachineBasicBlock::iterator MII, if (MO.isDef() || MO.isImplicit()) return false; - const TargetRegisterClass *RC = getInstrOperandRegClass(TRI, TID, i); + // Abort the use is actually a sub-register use. We don't have enough + // information to figure out if it is really legal. + if (MO.getSubReg() || + TID.getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) + return false; + + const TargetRegisterClass *RC = TID.OpInfo[i].getRegClass(TRI); if (RC && !RC->contains(NewReg)) return false; - FoundUse = true; if (MO.isKill()) FoundKill = true; + Uses.push_back(&MO); } else if (TRI->regsOverlap(Reg, NewReg) || TRI->regsOverlap(Reg, OldReg)) @@ -593,7 +604,7 @@ StackSlotColoring::UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI, MachineBasicBlock *MBB = MI->getParent(); if (unsigned DstReg = TII->isLoadFromStackSlot(MI, OldFI)) { if (PropagateForward(MI, MBB, DstReg, Reg)) { - DOUT << "Eliminated load: "; + DEBUG(errs() << "Eliminated load: "); DEBUG(MI->dump()); ++NumLoadElim; } else { @@ -609,7 +620,7 @@ StackSlotColoring::UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI, } } else if (unsigned SrcReg = TII->isStoreToStackSlot(MI, OldFI)) { if (MI->killsRegister(SrcReg) && PropagateBackward(MI, MBB, SrcReg, Reg)) { - DOUT << "Eliminated store: "; + DEBUG(errs() << "Eliminated store: "); DEBUG(MI->dump()); ++NumStoreElim; } else { @@ -687,7 +698,7 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) { bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) { - DOUT << "********** Stack Slot Coloring **********\n"; + DEBUG(errs() << "********** Stack Slot Coloring **********\n"); MFI = MF.getFrameInfo(); MRI = &MF.getRegInfo(); diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp index ca99528..48d6dc1 100644 --- a/lib/CodeGen/StrongPHIElimination.cpp +++ b/lib/CodeGen/StrongPHIElimination.cpp @@ -71,6 +71,7 @@ namespace { bool runOnMachineFunction(MachineFunction &Fn); virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); AU.addRequired<MachineDominatorTree>(); AU.addRequired<LiveIntervals>(); @@ -294,7 +295,7 @@ StrongPHIElimination::computeDomForest( static bool isLiveIn(unsigned r, MachineBasicBlock* MBB, LiveIntervals& LI) { LiveInterval& I = LI.getOrCreateInterval(r); - unsigned idx = LI.getMBBStartIdx(MBB); + LiveIndex idx = LI.getMBBStartIdx(MBB); return I.liveAt(idx); } @@ -427,7 +428,7 @@ void StrongPHIElimination::processBlock(MachineBasicBlock* MBB) { } LiveInterval& PI = LI.getOrCreateInterval(DestReg); - unsigned pIdx = LI.getDefIndex(LI.getInstructionIndex(P)); + LiveIndex pIdx = LI.getDefIndex(LI.getInstructionIndex(P)); VNInfo* PVN = PI.getLiveRangeContaining(pIdx)->valno; PhiValueNumber.insert(std::make_pair(DestReg, PVN->id)); @@ -553,8 +554,8 @@ void StrongPHIElimination::processBlock(MachineBasicBlock* MBB) { // Add the renaming set for this PHI node to our overall renaming information for (std::map<unsigned, MachineBasicBlock*>::iterator QI = PHIUnion.begin(), QE = PHIUnion.end(); QI != QE; ++QI) { - DOUT << "Adding Renaming: " << QI->first << " -> " - << P->getOperand(0).getReg() << "\n"; + DEBUG(errs() << "Adding Renaming: " << QI->first << " -> " + << P->getOperand(0).getReg() << "\n"); } RenameSets.insert(std::make_pair(P->getOperand(0).getReg(), PHIUnion)); @@ -696,7 +697,8 @@ void StrongPHIElimination::ScheduleCopies(MachineBasicBlock* MBB, TII->copyRegToReg(*PI->getParent(), PI, t, curr.second, RC, RC); - DOUT << "Inserted copy from " << curr.second << " to " << t << "\n"; + DEBUG(errs() << "Inserted copy from " << curr.second << " to " << t + << "\n"); // Push temporary on Stacks Stacks[curr.second].push_back(t); @@ -712,8 +714,8 @@ void StrongPHIElimination::ScheduleCopies(MachineBasicBlock* MBB, TII->copyRegToReg(*MBB, MBB->getFirstTerminator(), curr.second, map[curr.first], RC, RC); map[curr.first] = curr.second; - DOUT << "Inserted copy from " << curr.first << " to " - << curr.second << "\n"; + DEBUG(errs() << "Inserted copy from " << curr.first << " to " + << curr.second << "\n"); // Push this copy onto InsertedPHICopies so we can // update LiveIntervals with it. @@ -746,7 +748,7 @@ void StrongPHIElimination::ScheduleCopies(MachineBasicBlock* MBB, LiveInterval& I = LI.getInterval(curr.second); MachineBasicBlock::iterator term = MBB->getFirstTerminator(); - unsigned endIdx = 0; + LiveIndex endIdx = LiveIndex(); if (term != MBB->end()) endIdx = LI.getInstructionIndex(term); else @@ -782,16 +784,15 @@ void StrongPHIElimination::ScheduleCopies(MachineBasicBlock* MBB, InsertedPHIDests.begin(), E = InsertedPHIDests.end(); I != E; ++I) { if (RegHandled.insert(I->first).second) { LiveInterval& Int = LI.getOrCreateInterval(I->first); - unsigned instrIdx = LI.getInstructionIndex(I->second); - if (Int.liveAt(LiveIntervals::getDefIndex(instrIdx))) - Int.removeRange(LiveIntervals::getDefIndex(instrIdx), - LI.getMBBEndIdx(I->second->getParent())+1, + LiveIndex instrIdx = LI.getInstructionIndex(I->second); + if (Int.liveAt(LI.getDefIndex(instrIdx))) + Int.removeRange(LI.getDefIndex(instrIdx), + LI.getNextSlot(LI.getMBBEndIdx(I->second->getParent())), true); LiveRange R = LI.addLiveRangeToEndOfBlock(I->first, I->second); - R.valno->copy = I->second; - R.valno->def = - LiveIntervals::getDefIndex(LI.getInstructionIndex(I->second)); + R.valno->setCopy(I->second); + R.valno->def = LI.getDefIndex(LI.getInstructionIndex(I->second)); } } } @@ -817,7 +818,7 @@ void StrongPHIElimination::InsertCopies(MachineDomTreeNode* MDTN, // Remove the live range for the old vreg. LiveInterval& OldInt = LI.getInterval(I->getOperand(i).getReg()); LiveInterval::iterator OldLR = OldInt.FindLiveRangeContaining( - LiveIntervals::getUseIndex(LI.getInstructionIndex(I))); + LI.getUseIndex(LI.getInstructionIndex(I))); if (OldLR != OldInt.end()) OldInt.removeRange(*OldLR, true); @@ -829,11 +830,11 @@ void StrongPHIElimination::InsertCopies(MachineDomTreeNode* MDTN, VNInfo* FirstVN = *Int.vni_begin(); FirstVN->setHasPHIKill(false); if (I->getOperand(i).isKill()) - FirstVN->kills.push_back( - LiveIntervals::getUseIndex(LI.getInstructionIndex(I))); + FirstVN->addKill( + LI.getUseIndex(LI.getInstructionIndex(I))); LiveRange LR (LI.getMBBStartIdx(I->getParent()), - LiveIntervals::getUseIndex(LI.getInstructionIndex(I))+1, + LI.getNextSlot(LI.getUseIndex(LI.getInstructionIndex(I))), FirstVN); Int.addRange(LR); @@ -868,8 +869,8 @@ bool StrongPHIElimination::mergeLiveIntervals(unsigned primary, for (LiveInterval::iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) { LiveRange R = *I; - unsigned Start = R.start; - unsigned End = R.end; + LiveIndex Start = R.start; + LiveIndex End = R.end; if (LHS.getLiveRangeContaining(Start)) return false; @@ -927,7 +928,8 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) { unsigned reg = OI->first; ++OI; I->second.erase(reg); - DOUT << "Removing Renaming: " << reg << " -> " << I->first << "\n"; + DEBUG(errs() << "Removing Renaming: " << reg << " -> " << I->first + << "\n"); } } } @@ -944,7 +946,7 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) { while (I->second.size()) { std::map<unsigned, MachineBasicBlock*>::iterator SI = I->second.begin(); - DOUT << "Renaming: " << SI->first << " -> " << I->first << "\n"; + DEBUG(errs() << "Renaming: " << SI->first << " -> " << I->first << "\n"); if (SI->first != I->first) { if (mergeLiveIntervals(I->first, SI->first)) { @@ -965,19 +967,19 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) { LI.computeNumbering(); LiveInterval& Int = LI.getOrCreateInterval(I->first); - unsigned instrIdx = + LiveIndex instrIdx = LI.getInstructionIndex(--SI->second->getFirstTerminator()); - if (Int.liveAt(LiveIntervals::getDefIndex(instrIdx))) - Int.removeRange(LiveIntervals::getDefIndex(instrIdx), - LI.getMBBEndIdx(SI->second)+1, true); + if (Int.liveAt(LI.getDefIndex(instrIdx))) + Int.removeRange(LI.getDefIndex(instrIdx), + LI.getNextSlot(LI.getMBBEndIdx(SI->second)), true); LiveRange R = LI.addLiveRangeToEndOfBlock(I->first, --SI->second->getFirstTerminator()); - R.valno->copy = --SI->second->getFirstTerminator(); - R.valno->def = LiveIntervals::getDefIndex(instrIdx); + R.valno->setCopy(--SI->second->getFirstTerminator()); + R.valno->def = LI.getDefIndex(instrIdx); - DOUT << "Renaming failed: " << SI->first << " -> " - << I->first << "\n"; + DEBUG(errs() << "Renaming failed: " << SI->first << " -> " + << I->first << "\n"); } } @@ -1009,7 +1011,7 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) { if (PI.containsOneValue()) { LI.removeInterval(DestReg); } else { - unsigned idx = LI.getDefIndex(LI.getInstructionIndex(PInstr)); + LiveIndex idx = LI.getDefIndex(LI.getInstructionIndex(PInstr)); PI.removeRange(*PI.getLiveRangeContaining(idx), true); } } else { @@ -1023,8 +1025,7 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) { LiveInterval& InputI = LI.getInterval(reg); if (MBB != PInstr->getParent() && InputI.liveAt(LI.getMBBStartIdx(PInstr->getParent())) && - InputI.expiredAt(LI.getInstructionIndex(PInstr) + - LiveInterval::InstrSlots::NUM)) + InputI.expiredAt(LI.getNextIndex(LI.getInstructionIndex(PInstr)))) InputI.removeRange(LI.getMBBStartIdx(PInstr->getParent()), LI.getInstructionIndex(PInstr), true); @@ -1032,7 +1033,7 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) { // If the PHI is not dead, then the valno defined by the PHI // now has an unknown def. - unsigned idx = LI.getDefIndex(LI.getInstructionIndex(PInstr)); + LiveIndex idx = LI.getDefIndex(LI.getInstructionIndex(PInstr)); const LiveRange* PLR = PI.getLiveRangeContaining(idx); PLR->valno->setIsPHIDef(true); LiveRange R (LI.getMBBStartIdx(PInstr->getParent()), diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp index b759599..c646869 100644 --- a/lib/CodeGen/TargetInstrInfoImpl.cpp +++ b/lib/CodeGen/TargetInstrInfoImpl.cpp @@ -13,21 +13,35 @@ //===----------------------------------------------------------------------===// #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; // commuteInstruction - The default implementation of this method just exchanges -// operand 1 and 2. +// the two operands returned by findCommutedOpIndices. MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI, bool NewMI) const { const TargetInstrDesc &TID = MI->getDesc(); bool HasDef = TID.getNumDefs(); - unsigned Idx1 = HasDef ? 1 : 0; - unsigned Idx2 = HasDef ? 2 : 1; + if (HasDef && !MI->getOperand(0).isReg()) + // No idea how to commute this instruction. Target should implement its own. + return 0; + unsigned Idx1, Idx2; + if (!findCommutedOpIndices(MI, Idx1, Idx2)) { + std::string msg; + raw_string_ostream Msg(msg); + Msg << "Don't know how to commute: " << *MI; + llvm_report_error(Msg.str()); + } assert(MI->getOperand(Idx1).isReg() && MI->getOperand(Idx2).isReg() && "This only knows how to commute register operands so far"); @@ -70,26 +84,24 @@ MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI, return MI; } -/// CommuteChangesDestination - Return true if commuting the specified -/// instruction will also changes the destination operand. Also return the -/// current operand index of the would be new destination register by -/// reference. This can happen when the commutable instruction is also a -/// two-address instruction. -bool TargetInstrInfoImpl::CommuteChangesDestination(MachineInstr *MI, - unsigned &OpIdx) const{ +/// findCommutedOpIndices - If specified MI is commutable, return the two +/// operand indices that would swap value. Return true if the instruction +/// is not in a form which this routine understands. +bool TargetInstrInfoImpl::findCommutedOpIndices(MachineInstr *MI, + unsigned &SrcOpIdx1, + unsigned &SrcOpIdx2) const { const TargetInstrDesc &TID = MI->getDesc(); - if (!TID.getNumDefs()) + if (!TID.isCommutable()) return false; - assert(MI->getOperand(1).isReg() && MI->getOperand(2).isReg() && - "This only knows how to commute register operands so far"); - if (MI->getOperand(0).getReg() == MI->getOperand(1).getReg()) { - // Must be two address instruction! - assert(MI->getDesc().getOperandConstraint(0, TOI::TIED_TO) && - "Expecting a two-address instruction!"); - OpIdx = 2; - return true; - } - return false; + // This assumes v0 = op v1, v2 and commuting would swap v1 and v2. If this + // is not true, then the target must implement this. + SrcOpIdx1 = TID.getNumDefs(); + SrcOpIdx2 = SrcOpIdx1 + 1; + if (!MI->getOperand(SrcOpIdx1).isReg() || + !MI->getOperand(SrcOpIdx2).isReg()) + // No idea. + return false; + return true; } @@ -122,9 +134,12 @@ bool TargetInstrInfoImpl::PredicateInstruction(MachineInstr *MI, void TargetInstrInfoImpl::reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, + unsigned SubIdx, const MachineInstr *Orig) const { MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig); - MI->getOperand(0).setReg(DestReg); + MachineOperand &MO = MI->getOperand(0); + MO.setReg(DestReg); + MO.setSubReg(SubIdx); MBB.insert(I, MI); } @@ -171,11 +186,11 @@ TargetInstrInfo::foldMemoryOperand(MachineFunction &MF, "Folded a use to a non-load!"); const MachineFrameInfo &MFI = *MF.getFrameInfo(); assert(MFI.getObjectOffset(FrameIndex) != -1); - MachineMemOperand MMO(PseudoSourceValue::getFixedStack(FrameIndex), - Flags, - MFI.getObjectOffset(FrameIndex), - MFI.getObjectSize(FrameIndex), - MFI.getObjectAlignment(FrameIndex)); + MachineMemOperand *MMO = + MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FrameIndex), + Flags, /*Offset=*/0, + MFI.getObjectSize(FrameIndex), + MFI.getObjectAlignment(FrameIndex)); NewMI->addMemOperand(MF, MMO); return NewMI; @@ -200,9 +215,93 @@ TargetInstrInfo::foldMemoryOperand(MachineFunction &MF, if (!NewMI) return 0; // Copy the memoperands from the load to the folded instruction. - for (std::list<MachineMemOperand>::iterator I = LoadMI->memoperands_begin(), - E = LoadMI->memoperands_end(); I != E; ++I) - NewMI->addMemOperand(MF, *I); + NewMI->setMemRefs(LoadMI->memoperands_begin(), + LoadMI->memoperands_end()); return NewMI; } + +bool +TargetInstrInfo::isReallyTriviallyReMaterializableGeneric(const MachineInstr * + MI, + AliasAnalysis * + AA) const { + const MachineFunction &MF = *MI->getParent()->getParent(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + const TargetMachine &TM = MF.getTarget(); + const TargetInstrInfo &TII = *TM.getInstrInfo(); + const TargetRegisterInfo &TRI = *TM.getRegisterInfo(); + + // A load from a fixed stack slot can be rematerialized. This may be + // redundant with subsequent checks, but it's target-independent, + // simple, and a common case. + int FrameIdx = 0; + if (TII.isLoadFromStackSlot(MI, FrameIdx) && + MF.getFrameInfo()->isImmutableObjectIndex(FrameIdx)) + return true; + + const TargetInstrDesc &TID = MI->getDesc(); + + // Avoid instructions obviously unsafe for remat. + if (TID.hasUnmodeledSideEffects() || TID.isNotDuplicable() || + TID.mayStore()) + return false; + + // Avoid instructions which load from potentially varying memory. + if (TID.mayLoad() && !MI->isInvariantLoad(AA)) + return false; + + // If any of the registers accessed are non-constant, conservatively assume + // the instruction is not rematerializable. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) + continue; + + // Check for a well-behaved physical register. + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (MO.isUse()) { + // If the physreg has no defs anywhere, it's just an ambient register + // and we can freely move its uses. Alternatively, if it's allocatable, + // it could get allocated to something with a def during allocation. + if (!MRI.def_empty(Reg)) + return false; + BitVector AllocatableRegs = TRI.getAllocatableSet(MF, 0); + if (AllocatableRegs.test(Reg)) + return false; + // Check for a def among the register's aliases too. + for (const unsigned *Alias = TRI.getAliasSet(Reg); *Alias; ++Alias) { + unsigned AliasReg = *Alias; + if (!MRI.def_empty(AliasReg)) + return false; + if (AllocatableRegs.test(AliasReg)) + return false; + } + } else { + // A physreg def. We can't remat it. + return false; + } + continue; + } + + // Only allow one virtual-register def, and that in the first operand. + if (MO.isDef() != (i == 0)) + return false; + + // For the def, it should be the only def of that register. + if (MO.isDef() && (next(MRI.def_begin(Reg)) != MRI.def_end() || + MRI.isLiveIn(Reg))) + return false; + + // Don't allow any virtual-register uses. Rematting an instruction with + // virtual register uses would length the live ranges of the uses, which + // is not necessarily a good idea, certainly not "trivial". + if (MO.isUse()) + return false; + } + + // Everything checked out. + return true; +} diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index 3c40404..a5a0f5b 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -34,6 +34,7 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" @@ -62,6 +63,7 @@ namespace { const TargetRegisterInfo *TRI; MachineRegisterInfo *MRI; LiveVariables *LV; + AliasAnalysis *AA; // DistanceMap - Keep track the distance of a MI from the start of the // current basic block. @@ -106,13 +108,31 @@ namespace { MachineFunction::iterator &mbbi, unsigned RegB, unsigned Dist); + typedef std::pair<std::pair<unsigned, bool>, MachineInstr*> NewKill; + bool canUpdateDeletedKills(SmallVector<unsigned, 4> &Kills, + SmallVector<NewKill, 4> &NewKills, + MachineBasicBlock *MBB, unsigned Dist); + bool DeleteUnusedInstr(MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + MachineFunction::iterator &mbbi, + unsigned regB, unsigned regBIdx, unsigned Dist); + + bool TryInstructionTransform(MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + MachineFunction::iterator &mbbi, + unsigned SrcIdx, unsigned DstIdx, + unsigned Dist); + void ProcessCopy(MachineInstr *MI, MachineBasicBlock *MBB, SmallPtrSet<MachineInstr*, 8> &Processed); + public: static char ID; // Pass identification, replacement for typeid TwoAddressInstructionPass() : MachineFunctionPass(&ID) {} virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired<AliasAnalysis>(); AU.addPreserved<LiveVariables>(); AU.addPreservedID(MachineLoopInfoID); AU.addPreservedID(MachineDominatorsID); @@ -143,7 +163,7 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB, MachineBasicBlock::iterator OldPos) { // Check if it's safe to move this instruction. bool SeenStore = true; // Be conservative. - if (!MI->isSafeToMove(TII, SeenStore)) + if (!MI->isSafeToMove(TII, SeenStore, AA)) return false; unsigned DefReg = 0; @@ -556,15 +576,15 @@ TwoAddressInstructionPass::CommuteInstruction(MachineBasicBlock::iterator &mi, MachineFunction::iterator &mbbi, unsigned RegB, unsigned RegC, unsigned Dist) { MachineInstr *MI = mi; - DOUT << "2addr: COMMUTING : " << *MI; + DEBUG(errs() << "2addr: COMMUTING : " << *MI); MachineInstr *NewMI = TII->commuteInstruction(MI); if (NewMI == 0) { - DOUT << "2addr: COMMUTING FAILED!\n"; + DEBUG(errs() << "2addr: COMMUTING FAILED!\n"); return false; } - DOUT << "2addr: COMMUTED TO: " << *NewMI; + DEBUG(errs() << "2addr: COMMUTED TO: " << *NewMI); // If the instruction changed to commute it, update livevar. if (NewMI != MI) { if (LV) @@ -611,8 +631,8 @@ TwoAddressInstructionPass::ConvertInstTo3Addr(MachineBasicBlock::iterator &mi, unsigned RegB, unsigned Dist) { MachineInstr *NewMI = TII->convertToThreeAddress(mbbi, mi, LV); if (NewMI) { - DOUT << "2addr: CONVERTING 2-ADDR: " << *mi; - DOUT << "2addr: TO 3-ADDR: " << *NewMI; + DEBUG(errs() << "2addr: CONVERTING 2-ADDR: " << *mi); + DEBUG(errs() << "2addr: TO 3-ADDR: " << *NewMI); bool Sunk = false; if (NewMI->findRegisterUseOperand(RegB, false, TRI)) @@ -734,25 +754,174 @@ static bool isSafeToDelete(MachineInstr *MI, unsigned Reg, return true; } +/// canUpdateDeletedKills - Check if all the registers listed in Kills are +/// killed by instructions in MBB preceding the current instruction at +/// position Dist. If so, return true and record information about the +/// preceding kills in NewKills. +bool TwoAddressInstructionPass:: +canUpdateDeletedKills(SmallVector<unsigned, 4> &Kills, + SmallVector<NewKill, 4> &NewKills, + MachineBasicBlock *MBB, unsigned Dist) { + while (!Kills.empty()) { + unsigned Kill = Kills.back(); + Kills.pop_back(); + if (TargetRegisterInfo::isPhysicalRegister(Kill)) + return false; + + MachineInstr *LastKill = FindLastUseInMBB(Kill, MBB, Dist); + if (!LastKill) + return false; + + bool isModRef = LastKill->modifiesRegister(Kill); + NewKills.push_back(std::make_pair(std::make_pair(Kill, isModRef), + LastKill)); + } + return true; +} + +/// DeleteUnusedInstr - If an instruction with a tied register operand can +/// be safely deleted, just delete it. +bool +TwoAddressInstructionPass::DeleteUnusedInstr(MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + MachineFunction::iterator &mbbi, + unsigned regB, unsigned regBIdx, + unsigned Dist) { + // Check if the instruction has no side effects and if all its defs are dead. + SmallVector<unsigned, 4> Kills; + if (!isSafeToDelete(mi, regB, TII, Kills)) + return false; + + // If this instruction kills some virtual registers, we need to + // update the kill information. If it's not possible to do so, + // then bail out. + SmallVector<NewKill, 4> NewKills; + if (!canUpdateDeletedKills(Kills, NewKills, &*mbbi, Dist)) + return false; + + if (LV) { + while (!NewKills.empty()) { + MachineInstr *NewKill = NewKills.back().second; + unsigned Kill = NewKills.back().first.first; + bool isDead = NewKills.back().first.second; + NewKills.pop_back(); + if (LV->removeVirtualRegisterKilled(Kill, mi)) { + if (isDead) + LV->addVirtualRegisterDead(Kill, NewKill); + else + LV->addVirtualRegisterKilled(Kill, NewKill); + } + } + + // If regB was marked as a kill, update its Kills list. + if (mi->getOperand(regBIdx).isKill()) + LV->removeVirtualRegisterKilled(regB, mi); + } + + mbbi->erase(mi); // Nuke the old inst. + mi = nmi; + return true; +} + +/// TryInstructionTransform - For the case where an instruction has a single +/// pair of tied register operands, attempt some transformations that may +/// either eliminate the tied operands or improve the opportunities for +/// coalescing away the register copy. Returns true if the tied operands +/// are eliminated altogether. +bool TwoAddressInstructionPass:: +TryInstructionTransform(MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + MachineFunction::iterator &mbbi, + unsigned SrcIdx, unsigned DstIdx, unsigned Dist) { + const TargetInstrDesc &TID = mi->getDesc(); + unsigned regA = mi->getOperand(DstIdx).getReg(); + unsigned regB = mi->getOperand(SrcIdx).getReg(); + + assert(TargetRegisterInfo::isVirtualRegister(regB) && + "cannot make instruction into two-address form"); + + // If regA is dead and the instruction can be deleted, just delete + // it so it doesn't clobber regB. + bool regBKilled = isKilled(*mi, regB, MRI, TII); + if (!regBKilled && mi->getOperand(DstIdx).isDead() && + DeleteUnusedInstr(mi, nmi, mbbi, regB, SrcIdx, Dist)) { + ++NumDeletes; + return true; // Done with this instruction. + } + + // Check if it is profitable to commute the operands. + unsigned SrcOp1, SrcOp2; + unsigned regC = 0; + unsigned regCIdx = ~0U; + bool TryCommute = false; + bool AggressiveCommute = false; + if (TID.isCommutable() && mi->getNumOperands() >= 3 && + TII->findCommutedOpIndices(mi, SrcOp1, SrcOp2)) { + if (SrcIdx == SrcOp1) + regCIdx = SrcOp2; + else if (SrcIdx == SrcOp2) + regCIdx = SrcOp1; + + if (regCIdx != ~0U) { + regC = mi->getOperand(regCIdx).getReg(); + if (!regBKilled && isKilled(*mi, regC, MRI, TII)) + // If C dies but B does not, swap the B and C operands. + // This makes the live ranges of A and C joinable. + TryCommute = true; + else if (isProfitableToCommute(regB, regC, mi, mbbi, Dist)) { + TryCommute = true; + AggressiveCommute = true; + } + } + } + + // If it's profitable to commute, try to do so. + if (TryCommute && CommuteInstruction(mi, mbbi, regB, regC, Dist)) { + ++NumCommuted; + if (AggressiveCommute) + ++NumAggrCommuted; + return false; + } + + if (TID.isConvertibleTo3Addr()) { + // This instruction is potentially convertible to a true + // three-address instruction. Check if it is profitable. + if (!regBKilled || isProfitableToConv3Addr(regA)) { + // Try to convert it. + if (ConvertInstTo3Addr(mi, nmi, mbbi, regB, Dist)) { + ++NumConvertedTo3Addr; + return true; // Done with this instruction. + } + } + } + return false; +} + /// runOnMachineFunction - Reduce two-address instructions to two operands. /// bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { - DOUT << "Machine Function\n"; + DEBUG(errs() << "Machine Function\n"); const TargetMachine &TM = MF.getTarget(); MRI = &MF.getRegInfo(); TII = TM.getInstrInfo(); TRI = TM.getRegisterInfo(); LV = getAnalysisIfAvailable<LiveVariables>(); + AA = &getAnalysis<AliasAnalysis>(); bool MadeChange = false; - DOUT << "********** REWRITING TWO-ADDR INSTRS **********\n"; - DOUT << "********** Function: " << MF.getFunction()->getName() << '\n'; + DEBUG(errs() << "********** REWRITING TWO-ADDR INSTRS **********\n"); + DEBUG(errs() << "********** Function: " + << MF.getFunction()->getName() << '\n'); // ReMatRegs - Keep track of the registers whose def's are remat'ed. BitVector ReMatRegs; ReMatRegs.resize(MRI->getLastVirtReg()+1); + typedef DenseMap<unsigned, SmallVector<std::pair<unsigned, unsigned>, 4> > + TiedOperandMap; + TiedOperandMap TiedOperands(4); + SmallPtrSet<MachineInstr*, 8> Processed; for (MachineFunction::iterator mbbi = MF.begin(), mbbe = MF.end(); mbbi != mbbe; ++mbbi) { @@ -771,175 +940,102 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { ProcessCopy(&*mi, &*mbbi, Processed); + // First scan through all the tied register uses in this instruction + // and record a list of pairs of tied operands for each register. unsigned NumOps = (mi->getOpcode() == TargetInstrInfo::INLINEASM) ? mi->getNumOperands() : TID.getNumOperands(); - for (unsigned si = 0; si < NumOps; ++si) { - unsigned ti = 0; - if (!mi->isRegTiedToDefOperand(si, &ti)) + for (unsigned SrcIdx = 0; SrcIdx < NumOps; ++SrcIdx) { + unsigned DstIdx = 0; + if (!mi->isRegTiedToDefOperand(SrcIdx, &DstIdx)) continue; if (FirstTied) { + FirstTied = false; ++NumTwoAddressInstrs; - DOUT << '\t'; DEBUG(mi->print(*cerr.stream(), &TM)); + DEBUG(errs() << '\t' << *mi); } - FirstTied = false; + assert(mi->getOperand(SrcIdx).isReg() && + mi->getOperand(SrcIdx).getReg() && + mi->getOperand(SrcIdx).isUse() && + "two address instruction invalid"); - assert(mi->getOperand(si).isReg() && mi->getOperand(si).getReg() && - mi->getOperand(si).isUse() && "two address instruction invalid"); + unsigned regB = mi->getOperand(SrcIdx).getReg(); + TiedOperandMap::iterator OI = TiedOperands.find(regB); + if (OI == TiedOperands.end()) { + SmallVector<std::pair<unsigned, unsigned>, 4> TiedPair; + OI = TiedOperands.insert(std::make_pair(regB, TiedPair)).first; + } + OI->second.push_back(std::make_pair(SrcIdx, DstIdx)); + } - // If the two operands are the same we just remove the use - // and mark the def as def&use, otherwise we have to insert a copy. - if (mi->getOperand(ti).getReg() != mi->getOperand(si).getReg()) { - // Rewrite: - // a = b op c - // to: - // a = b - // a = a op c - unsigned regA = mi->getOperand(ti).getReg(); - unsigned regB = mi->getOperand(si).getReg(); + // Now iterate over the information collected above. + for (TiedOperandMap::iterator OI = TiedOperands.begin(), + OE = TiedOperands.end(); OI != OE; ++OI) { + SmallVector<std::pair<unsigned, unsigned>, 4> &TiedPairs = OI->second; + + // If the instruction has a single pair of tied operands, try some + // transformations that may either eliminate the tied operands or + // improve the opportunities for coalescing away the register copy. + if (TiedOperands.size() == 1 && TiedPairs.size() == 1) { + unsigned SrcIdx = TiedPairs[0].first; + unsigned DstIdx = TiedPairs[0].second; + + // If the registers are already equal, nothing needs to be done. + if (mi->getOperand(SrcIdx).getReg() == + mi->getOperand(DstIdx).getReg()) + break; // Done with this instruction. + + if (TryInstructionTransform(mi, nmi, mbbi, SrcIdx, DstIdx, Dist)) + break; // The tied operands have been eliminated. + } + + bool RemovedKillFlag = false; + bool AllUsesCopied = true; + unsigned LastCopiedReg = 0; + unsigned regB = OI->first; + for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) { + unsigned SrcIdx = TiedPairs[tpi].first; + unsigned DstIdx = TiedPairs[tpi].second; + unsigned regA = mi->getOperand(DstIdx).getReg(); + // Grab regB from the instruction because it may have changed if the + // instruction was commuted. + regB = mi->getOperand(SrcIdx).getReg(); + + if (regA == regB) { + // The register is tied to multiple destinations (or else we would + // not have continued this far), but this use of the register + // already matches the tied destination. Leave it. + AllUsesCopied = false; + continue; + } + LastCopiedReg = regA; assert(TargetRegisterInfo::isVirtualRegister(regB) && - "cannot update physical register live information"); + "cannot make instruction into two-address form"); #ifndef NDEBUG - // First, verify that we don't have a use of a in the instruction (a = - // b + a for example) because our transformation will not work. This - // should never occur because we are in SSA form. + // First, verify that we don't have a use of "a" in the instruction + // (a = b + a for example) because our transformation will not + // work. This should never occur because we are in SSA form. for (unsigned i = 0; i != mi->getNumOperands(); ++i) - assert(i == ti || + assert(i == DstIdx || !mi->getOperand(i).isReg() || mi->getOperand(i).getReg() != regA); #endif - // If this instruction is not the killing user of B, see if we can - // rearrange the code to make it so. Making it the killing user will - // allow us to coalesce A and B together, eliminating the copy we are - // about to insert. - if (!isKilled(*mi, regB, MRI, TII)) { - // If regA is dead and the instruction can be deleted, just delete - // it so it doesn't clobber regB. - SmallVector<unsigned, 4> Kills; - if (mi->getOperand(ti).isDead() && - isSafeToDelete(mi, regB, TII, Kills)) { - SmallVector<std::pair<std::pair<unsigned, bool> - ,MachineInstr*>, 4> NewKills; - bool ReallySafe = true; - // If this instruction kills some virtual registers, we need - // update the kill information. If it's not possible to do so, - // then bail out. - while (!Kills.empty()) { - unsigned Kill = Kills.back(); - Kills.pop_back(); - if (TargetRegisterInfo::isPhysicalRegister(Kill)) { - ReallySafe = false; - break; - } - MachineInstr *LastKill = FindLastUseInMBB(Kill, &*mbbi, Dist); - if (LastKill) { - bool isModRef = LastKill->modifiesRegister(Kill); - NewKills.push_back(std::make_pair(std::make_pair(Kill,isModRef), - LastKill)); - } else { - ReallySafe = false; - break; - } - } - - if (ReallySafe) { - if (LV) { - while (!NewKills.empty()) { - MachineInstr *NewKill = NewKills.back().second; - unsigned Kill = NewKills.back().first.first; - bool isDead = NewKills.back().first.second; - NewKills.pop_back(); - if (LV->removeVirtualRegisterKilled(Kill, mi)) { - if (isDead) - LV->addVirtualRegisterDead(Kill, NewKill); - else - LV->addVirtualRegisterKilled(Kill, NewKill); - } - } - } - - // We're really going to nuke the old inst. If regB was marked - // as a kill we need to update its Kills list. - if (mi->getOperand(si).isKill()) - LV->removeVirtualRegisterKilled(regB, mi); - - mbbi->erase(mi); // Nuke the old inst. - mi = nmi; - ++NumDeletes; - break; // Done with this instruction. - } - } - - // If this instruction is commutative, check to see if C dies. If - // so, swap the B and C operands. This makes the live ranges of A - // and C joinable. - // FIXME: This code also works for A := B op C instructions. - if (TID.isCommutable() && mi->getNumOperands() >= 3) { - assert(mi->getOperand(3-si).isReg() && - "Not a proper commutative instruction!"); - unsigned regC = mi->getOperand(3-si).getReg(); - if (isKilled(*mi, regC, MRI, TII)) { - if (CommuteInstruction(mi, mbbi, regB, regC, Dist)) { - ++NumCommuted; - regB = regC; - goto InstructionRearranged; - } - } - } - - // If this instruction is potentially convertible to a true - // three-address instruction, - if (TID.isConvertibleTo3Addr()) { - // FIXME: This assumes there are no more operands which are tied - // to another register. -#ifndef NDEBUG - for (unsigned i = si + 1, e = TID.getNumOperands(); i < e; ++i) - assert(TID.getOperandConstraint(i, TOI::TIED_TO) == -1); -#endif - - if (ConvertInstTo3Addr(mi, nmi, mbbi, regB, Dist)) { - ++NumConvertedTo3Addr; - break; // Done with this instruction. - } - } - } - - // If it's profitable to commute the instruction, do so. - if (TID.isCommutable() && mi->getNumOperands() >= 3) { - unsigned regC = mi->getOperand(3-si).getReg(); - if (isProfitableToCommute(regB, regC, mi, mbbi, Dist)) - if (CommuteInstruction(mi, mbbi, regB, regC, Dist)) { - ++NumAggrCommuted; - ++NumCommuted; - regB = regC; - goto InstructionRearranged; - } - } - - // If it's profitable to convert the 2-address instruction to a - // 3-address one, do so. - if (TID.isConvertibleTo3Addr() && isProfitableToConv3Addr(regA)) { - if (ConvertInstTo3Addr(mi, nmi, mbbi, regB, Dist)) { - ++NumConvertedTo3Addr; - break; // Done with this instruction. - } - } - - InstructionRearranged: - const TargetRegisterClass* rc = MRI->getRegClass(regB); + // Emit a copy or rematerialize the definition. + const TargetRegisterClass *rc = MRI->getRegClass(regB); MachineInstr *DefMI = MRI->getVRegDef(regB); // If it's safe and profitable, remat the definition instead of // copying it. if (DefMI && DefMI->getDesc().isAsCheapAsAMove() && - DefMI->isSafeToReMat(TII, regB) && + DefMI->isSafeToReMat(TII, regB, AA) && isProfitableToReMat(regB, rc, mi, DefMI, mbbi, Dist)){ - DEBUG(cerr << "2addr: REMATTING : " << *DefMI << "\n"); - TII->reMaterialize(*mbbi, mi, regA, DefMI); + DEBUG(errs() << "2addr: REMATTING : " << *DefMI << "\n"); + unsigned regASubIdx = mi->getOperand(DstIdx).getSubReg(); + TII->reMaterialize(*mbbi, mi, regA, regASubIdx, DefMI); ReMatRegs.set(regB); ++NumReMats; } else { @@ -953,32 +1049,57 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { DistanceMap.insert(std::make_pair(prevMI, Dist)); DistanceMap[mi] = ++Dist; - // Update live variables for regB. - if (LV) { - if (LV->removeVirtualRegisterKilled(regB, mi)) - LV->addVirtualRegisterKilled(regB, prevMI); + DEBUG(errs() << "\t\tprepend:\t" << *prevMI); - if (LV->removeVirtualRegisterDead(regB, mi)) - LV->addVirtualRegisterDead(regB, prevMI); + MachineOperand &MO = mi->getOperand(SrcIdx); + assert(MO.isReg() && MO.getReg() == regB && MO.isUse() && + "inconsistent operand info for 2-reg pass"); + if (MO.isKill()) { + MO.setIsKill(false); + RemovedKillFlag = true; } + MO.setReg(regA); + } - DOUT << "\t\tprepend:\t"; DEBUG(prevMI->print(*cerr.stream(), &TM)); - - // Replace all occurences of regB with regA. + if (AllUsesCopied) { + // Replace other (un-tied) uses of regB with LastCopiedReg. for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) { - if (mi->getOperand(i).isReg() && - mi->getOperand(i).getReg() == regB) - mi->getOperand(i).setReg(regA); + MachineOperand &MO = mi->getOperand(i); + if (MO.isReg() && MO.getReg() == regB && MO.isUse()) { + if (MO.isKill()) { + MO.setIsKill(false); + RemovedKillFlag = true; + } + MO.setReg(LastCopiedReg); + } } - } - assert(mi->getOperand(ti).isDef() && mi->getOperand(si).isUse()); - mi->getOperand(ti).setReg(mi->getOperand(si).getReg()); + // Update live variables for regB. + if (RemovedKillFlag && LV && LV->getVarInfo(regB).removeKill(mi)) + LV->addVirtualRegisterKilled(regB, prior(mi)); + + } else if (RemovedKillFlag) { + // Some tied uses of regB matched their destination registers, so + // regB is still used in this instruction, but a kill flag was + // removed from a different tied use of regB, so now we need to add + // a kill flag to one of the remaining uses of regB. + for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) { + MachineOperand &MO = mi->getOperand(i); + if (MO.isReg() && MO.getReg() == regB && MO.isUse()) { + MO.setIsKill(true); + break; + } + } + } + MadeChange = true; - DOUT << "\t\trewrite to:\t"; DEBUG(mi->print(*cerr.stream(), &TM)); + DEBUG(errs() << "\t\trewrite to:\t" << *mi); } + // Clear TiedOperands here instead of at the top of the loop + // since most instructions do not have tied operands. + TiedOperands.clear(); mi = nmi; } } diff --git a/lib/CodeGen/UnreachableBlockElim.cpp b/lib/CodeGen/UnreachableBlockElim.cpp index c3b213c..e7c3412 100644 --- a/lib/CodeGen/UnreachableBlockElim.cpp +++ b/lib/CodeGen/UnreachableBlockElim.cpp @@ -26,8 +26,11 @@ #include "llvm/Function.h" #include "llvm/Pass.h" #include "llvm/Type.h" +#include "llvm/Analysis/ProfileInfo.h" +#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/CFG.h" #include "llvm/Support/Compiler.h" @@ -42,6 +45,10 @@ namespace { public: static char ID; // Pass identification, replacement for typeid UnreachableBlockElim() : FunctionPass(&ID) {} + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreserved<ProfileInfo>(); + } }; } char UnreachableBlockElim::ID = 0; @@ -77,8 +84,11 @@ bool UnreachableBlockElim::runOnFunction(Function &F) { } // Actually remove the blocks now. - for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i) + ProfileInfo *PI = getAnalysisIfAvailable<ProfileInfo>(); + for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i) { + if (PI) PI->removeBlock(DeadBlocks[i]); DeadBlocks[i]->eraseFromParent(); + } return DeadBlocks.size(); } @@ -88,6 +98,7 @@ namespace { class VISIBILITY_HIDDEN UnreachableMachineBlockElim : public MachineFunctionPass { virtual bool runOnMachineFunction(MachineFunction &F); + virtual void getAnalysisUsage(AnalysisUsage &AU) const; MachineModuleInfo *MMI; public: static char ID; // Pass identification, replacement for typeid @@ -102,10 +113,18 @@ Y("unreachable-mbb-elimination", const PassInfo *const llvm::UnreachableMachineBlockElimID = &Y; +void UnreachableMachineBlockElim::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreserved<MachineLoopInfo>(); + AU.addPreserved<MachineDominatorTree>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) { SmallPtrSet<MachineBasicBlock*, 8> Reachable; MMI = getAnalysisIfAvailable<MachineModuleInfo>(); + MachineDominatorTree *MDT = getAnalysisIfAvailable<MachineDominatorTree>(); + MachineLoopInfo *MLI = getAnalysisIfAvailable<MachineLoopInfo>(); // Mark all reachable blocks. for (df_ext_iterator<MachineFunction*, SmallPtrSet<MachineBasicBlock*, 8> > @@ -123,6 +142,10 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) { if (!Reachable.count(BB)) { DeadBlocks.push_back(BB); + // Update dominator and loop info. + if (MLI) MLI->removeBlock(BB); + if (MDT && MDT->getNode(BB)) MDT->eraseNode(BB); + while (BB->succ_begin() != BB->succ_end()) { MachineBasicBlock* succ = *BB->succ_begin(); diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp index 4d3417f..c78f35b 100644 --- a/lib/CodeGen/VirtRegMap.cpp +++ b/lib/CodeGen/VirtRegMap.cpp @@ -30,6 +30,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DepthFirstIterator.h" @@ -258,7 +259,7 @@ bool VirtRegMap::FindUnusedRegisters(LiveIntervals* LIs) { return AnyUnused; } -void VirtRegMap::print(std::ostream &OS, const Module* M) const { +void VirtRegMap::print(raw_ostream &OS, const Module* M) const { const TargetRegisterInfo* TRI = MF->getTarget().getRegisterInfo(); OS << "********** REGISTER MAP **********\n"; @@ -277,5 +278,5 @@ void VirtRegMap::print(std::ostream &OS, const Module* M) const { } void VirtRegMap::dump() const { - print(cerr); + print(errs()); } diff --git a/lib/CodeGen/VirtRegMap.h b/lib/CodeGen/VirtRegMap.h index fe767b7..bdc2d1f 100644 --- a/lib/CodeGen/VirtRegMap.h +++ b/lib/CodeGen/VirtRegMap.h @@ -18,13 +18,13 @@ #define LLVM_CODEGEN_VIRTREGMAP_H #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/LiveInterval.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/IndexedMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/Support/Streams.h" #include <map> namespace llvm { @@ -34,6 +34,7 @@ namespace llvm { class MachineRegisterInfo; class TargetInstrInfo; class TargetRegisterInfo; + class raw_ostream; class VirtRegMap : public MachineFunctionPass { public: @@ -79,7 +80,7 @@ namespace llvm { /// Virt2SplitKillMap - This is splitted virtual register to its last use /// (kill) index mapping. - IndexedMap<unsigned> Virt2SplitKillMap; + IndexedMap<LiveIndex> Virt2SplitKillMap; /// ReMatMap - This is virtual register to re-materialized instruction /// mapping. Each virtual register whose definition is going to be @@ -141,7 +142,7 @@ namespace llvm { VirtRegMap() : MachineFunctionPass(&ID), Virt2PhysMap(NO_PHYS_REG), Virt2StackSlotMap(NO_STACK_SLOT), Virt2ReMatIdMap(NO_STACK_SLOT), Virt2SplitMap(0), - Virt2SplitKillMap(0), ReMatMap(NULL), + Virt2SplitKillMap(LiveIndex()), ReMatMap(NULL), ReMatId(MAX_STACK_SLOT+1), LowSpillSlot(NO_STACK_SLOT), HighSpillSlot(NO_STACK_SLOT) { } virtual bool runOnMachineFunction(MachineFunction &MF); @@ -265,17 +266,17 @@ namespace llvm { } /// @brief record the last use (kill) of a split virtual register. - void addKillPoint(unsigned virtReg, unsigned index) { + void addKillPoint(unsigned virtReg, LiveIndex index) { Virt2SplitKillMap[virtReg] = index; } - unsigned getKillPoint(unsigned virtReg) const { + LiveIndex getKillPoint(unsigned virtReg) const { return Virt2SplitKillMap[virtReg]; } /// @brief remove the last use (kill) of a split virtual register. void removeKillPoint(unsigned virtReg) { - Virt2SplitKillMap[virtReg] = 0; + Virt2SplitKillMap[virtReg] = LiveIndex(); } /// @brief returns true if the specified MachineInstr is a spill point. @@ -481,16 +482,11 @@ namespace llvm { return 0; } - void print(std::ostream &OS, const Module* M = 0) const; - void print(std::ostream *OS) const { if (OS) print(*OS); } + void print(raw_ostream &OS, const Module* M = 0) const; void dump() const; }; - inline std::ostream *operator<<(std::ostream *OS, const VirtRegMap &VRM) { - VRM.print(OS); - return OS; - } - inline std::ostream &operator<<(std::ostream &OS, const VirtRegMap &VRM) { + inline raw_ostream &operator<<(raw_ostream &OS, const VirtRegMap &VRM) { VRM.print(OS); return OS; } diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp index be0b016..401bcb6 100644 --- a/lib/CodeGen/VirtRegRewriter.cpp +++ b/lib/CodeGen/VirtRegRewriter.cpp @@ -9,10 +9,19 @@ #define DEBUG_TYPE "virtregrewriter" #include "VirtRegRewriter.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" #include <algorithm> using namespace llvm; @@ -45,10 +54,15 @@ RewriterOpt("rewriter", clEnumValEnd), cl::init(local)); +static cl::opt<bool> +ScheduleSpills("schedule-spills", + cl::desc("Schedule spill code"), + cl::init(false)); + VirtRegRewriter::~VirtRegRewriter() {} +namespace { - /// This class is intended for use with the new spilling framework only. It /// rewrites vreg def/uses to use the assigned preg, but does not insert any /// spill code. @@ -56,8 +70,13 @@ struct VISIBILITY_HIDDEN TrivialRewriter : public VirtRegRewriter { bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM, LiveIntervals* LIs) { - DOUT << "********** REWRITE MACHINE CODE **********\n"; - DOUT << "********** Function: " << MF.getFunction()->getName() << '\n'; + DEBUG(errs() << "********** REWRITE MACHINE CODE **********\n"); + DEBUG(errs() << "********** Function: " + << MF.getFunction()->getName() << '\n'); + DEBUG(errs() << "**** Machine Instrs" + << "(NOTE! Does not include spills and reloads!) ****\n"); + DEBUG(MF.dump()); + MachineRegisterInfo *mri = &MF.getRegInfo(); bool changed = false; @@ -79,14 +98,22 @@ struct VISIBILITY_HIDDEN TrivialRewriter : public VirtRegRewriter { } } } + + + DEBUG(errs() << "**** Post Machine Instrs ****\n"); + DEBUG(MF.dump()); return changed; } }; +} + // ************************************************************************ // +namespace { + /// AvailableSpills - As the local rewriter is scanning and rewriting an MBB /// from top down, keep track of which spill slots or remat are available in /// each register. @@ -154,10 +181,11 @@ public: (unsigned)CanClobber; if (SlotOrReMat > VirtRegMap::MAX_STACK_SLOT) - DOUT << "Remembering RM#" << SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1; + DEBUG(errs() << "Remembering RM#" + << SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1); else - DOUT << "Remembering SS#" << SlotOrReMat; - DOUT << " in physreg " << TRI->getName(Reg) << "\n"; + DEBUG(errs() << "Remembering SS#" << SlotOrReMat); + DEBUG(errs() << " in physreg " << TRI->getName(Reg) << "\n"); } /// canClobberPhysRegForSS - Return true if the spiller is allowed to change @@ -209,8 +237,82 @@ public: std::vector<MachineOperand*> &KillOps); }; +} + // ************************************************************************ // +// Given a location where a reload of a spilled register or a remat of +// a constant is to be inserted, attempt to find a safe location to +// insert the load at an earlier point in the basic-block, to hide +// latency of the load and to avoid address-generation interlock +// issues. +static MachineBasicBlock::iterator +ComputeReloadLoc(MachineBasicBlock::iterator const InsertLoc, + MachineBasicBlock::iterator const Begin, + unsigned PhysReg, + const TargetRegisterInfo *TRI, + bool DoReMat, + int SSorRMId, + const TargetInstrInfo *TII, + const MachineFunction &MF) +{ + if (!ScheduleSpills) + return InsertLoc; + + // Spill backscheduling is of primary interest to addresses, so + // don't do anything if the register isn't in the register class + // used for pointers. + + const TargetLowering *TL = MF.getTarget().getTargetLowering(); + + if (!TL->isTypeLegal(TL->getPointerTy())) + // Believe it or not, this is true on PIC16. + return InsertLoc; + + const TargetRegisterClass *ptrRegClass = + TL->getRegClassFor(TL->getPointerTy()); + if (!ptrRegClass->contains(PhysReg)) + return InsertLoc; + + // Scan upwards through the preceding instructions. If an instruction doesn't + // reference the stack slot or the register we're loading, we can + // backschedule the reload up past it. + MachineBasicBlock::iterator NewInsertLoc = InsertLoc; + while (NewInsertLoc != Begin) { + MachineBasicBlock::iterator Prev = prior(NewInsertLoc); + for (unsigned i = 0; i < Prev->getNumOperands(); ++i) { + MachineOperand &Op = Prev->getOperand(i); + if (!DoReMat && Op.isFI() && Op.getIndex() == SSorRMId) + goto stop; + } + if (Prev->findRegisterUseOperandIdx(PhysReg) != -1 || + Prev->findRegisterDefOperand(PhysReg)) + goto stop; + for (const unsigned *Alias = TRI->getAliasSet(PhysReg); *Alias; ++Alias) + if (Prev->findRegisterUseOperandIdx(*Alias) != -1 || + Prev->findRegisterDefOperand(*Alias)) + goto stop; + NewInsertLoc = Prev; + } +stop:; + + // If we made it to the beginning of the block, turn around and move back + // down just past any existing reloads. They're likely to be reloads/remats + // for instructions earlier than what our current reload/remat is for, so + // they should be scheduled earlier. + if (NewInsertLoc == Begin) { + int FrameIdx; + while (InsertLoc != NewInsertLoc && + (TII->isLoadFromStackSlot(NewInsertLoc, FrameIdx) || + TII->isTriviallyReMaterializable(NewInsertLoc))) + ++NewInsertLoc; + } + + return NewInsertLoc; +} + +namespace { + // ReusedOp - For each reused operand, we keep track of a bit of information, // in case we need to rollback upon processing a new operand. See comments // below. @@ -276,7 +378,8 @@ public: /// GetRegForReload - We are about to emit a reload into PhysReg. If there /// is some other operand that is using the specified register, either pick /// a new register to use, or evict the previous reload and use this reg. - unsigned GetRegForReload(unsigned PhysReg, MachineInstr *MI, + unsigned GetRegForReload(const TargetRegisterClass *RC, unsigned PhysReg, + MachineFunction &MF, MachineInstr *MI, AvailableSpills &Spills, std::vector<MachineInstr*> &MaybeDeadStores, SmallSet<unsigned, 8> &Rejected, @@ -295,18 +398,21 @@ public: /// sees r1 is taken by t2, tries t2's reload register r0 /// sees r0 is taken by t3, tries t3's reload register r1 /// sees r1 is taken by t2, tries t2's reload register r0 ... - unsigned GetRegForReload(unsigned PhysReg, MachineInstr *MI, + unsigned GetRegForReload(unsigned VirtReg, unsigned PhysReg, MachineInstr *MI, AvailableSpills &Spills, std::vector<MachineInstr*> &MaybeDeadStores, BitVector &RegKills, std::vector<MachineOperand*> &KillOps, VirtRegMap &VRM) { SmallSet<unsigned, 8> Rejected; - return GetRegForReload(PhysReg, MI, Spills, MaybeDeadStores, Rejected, - RegKills, KillOps, VRM); + MachineFunction &MF = *MI->getParent()->getParent(); + const TargetRegisterClass* RC = MF.getRegInfo().getRegClass(VirtReg); + return GetRegForReload(RC, PhysReg, MF, MI, Spills, MaybeDeadStores, + Rejected, RegKills, KillOps, VRM); } }; +} // ****************** // // Utility Functions // @@ -489,7 +595,14 @@ static void ReMaterialize(MachineBasicBlock &MBB, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, VirtRegMap &VRM) { - TII->reMaterialize(MBB, MII, DestReg, VRM.getReMaterializedMI(Reg)); + MachineInstr *ReMatDefMI = VRM.getReMaterializedMI(Reg); +#ifndef NDEBUG + const TargetInstrDesc &TID = ReMatDefMI->getDesc(); + assert(TID.getNumDefs() == 1 && + "Don't know how to remat instructions that define > 1 values!"); +#endif + TII->reMaterialize(MBB, MII, DestReg, + ReMatDefMI->getOperand(0).getSubReg(), ReMatDefMI); MachineInstr *NewMI = prior(MII); for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) { MachineOperand &MO = NewMI->getOperand(i); @@ -538,8 +651,8 @@ void AvailableSpills::disallowClobberPhysRegOnly(unsigned PhysReg) { assert((SpillSlotsOrReMatsAvailable[SlotOrReMat] >> 1) == PhysReg && "Bidirectional map mismatch!"); SpillSlotsOrReMatsAvailable[SlotOrReMat] &= ~1; - DOUT << "PhysReg " << TRI->getName(PhysReg) - << " copied, it is available for use but can no longer be modified\n"; + DEBUG(errs() << "PhysReg " << TRI->getName(PhysReg) + << " copied, it is available for use but can no longer be modified\n"); } } @@ -563,12 +676,12 @@ void AvailableSpills::ClobberPhysRegOnly(unsigned PhysReg) { assert((SpillSlotsOrReMatsAvailable[SlotOrReMat] >> 1) == PhysReg && "Bidirectional map mismatch!"); SpillSlotsOrReMatsAvailable.erase(SlotOrReMat); - DOUT << "PhysReg " << TRI->getName(PhysReg) - << " clobbered, invalidating "; + DEBUG(errs() << "PhysReg " << TRI->getName(PhysReg) + << " clobbered, invalidating "); if (SlotOrReMat > VirtRegMap::MAX_STACK_SLOT) - DOUT << "RM#" << SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1 << "\n"; + DEBUG(errs() << "RM#" << SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1 <<"\n"); else - DOUT << "SS#" << SlotOrReMat << "\n"; + DEBUG(errs() << "SS#" << SlotOrReMat << "\n"); } } @@ -650,15 +763,17 @@ void AvailableSpills::ModifyStackSlotOrReMat(int SlotOrReMat) { /// GetRegForReload - We are about to emit a reload into PhysReg. If there /// is some other operand that is using the specified register, either pick /// a new register to use, or evict the previous reload and use this reg. -unsigned ReuseInfo::GetRegForReload(unsigned PhysReg, MachineInstr *MI, - AvailableSpills &Spills, +unsigned ReuseInfo::GetRegForReload(const TargetRegisterClass *RC, + unsigned PhysReg, + MachineFunction &MF, + MachineInstr *MI, AvailableSpills &Spills, std::vector<MachineInstr*> &MaybeDeadStores, SmallSet<unsigned, 8> &Rejected, BitVector &RegKills, std::vector<MachineOperand*> &KillOps, VirtRegMap &VRM) { - const TargetInstrInfo* TII = MI->getParent()->getParent()->getTarget() - .getInstrInfo(); + const TargetInstrInfo* TII = MF.getTarget().getInstrInfo(); + const TargetRegisterInfo *TRI = Spills.getRegInfo(); if (Reuses.empty()) return PhysReg; // This is most often empty. @@ -670,19 +785,19 @@ unsigned ReuseInfo::GetRegForReload(unsigned PhysReg, MachineInstr *MI, // considered and subsequently rejected because it has also been reused // by another operand. if (Op.PhysRegReused == PhysReg && - Rejected.count(Op.AssignedPhysReg) == 0) { + Rejected.count(Op.AssignedPhysReg) == 0 && + RC->contains(Op.AssignedPhysReg)) { // Yup, use the reload register that we didn't use before. unsigned NewReg = Op.AssignedPhysReg; Rejected.insert(PhysReg); - return GetRegForReload(NewReg, MI, Spills, MaybeDeadStores, Rejected, + return GetRegForReload(RC, NewReg, MF, MI, Spills, MaybeDeadStores, Rejected, RegKills, KillOps, VRM); } else { // Otherwise, we might also have a problem if a previously reused - // value aliases the new register. If so, codegen the previous reload + // value aliases the new register. If so, codegen the previous reload // and use this one. unsigned PRRU = Op.PhysRegReused; - const TargetRegisterInfo *TRI = Spills.getRegInfo(); - if (TRI->areAliases(PRRU, PhysReg)) { + if (TRI->regsOverlap(PRRU, PhysReg)) { // Okay, we found out that an alias of a reused register // was used. This isn't good because it means we have // to undo a previous reuse. @@ -695,21 +810,45 @@ unsigned ReuseInfo::GetRegForReload(unsigned PhysReg, MachineInstr *MI, ReusedOp NewOp = Op; Reuses.erase(Reuses.begin()+ro); + // MI may be using only a sub-register of PhysRegUsed. + unsigned RealPhysRegUsed = MI->getOperand(NewOp.Operand).getReg(); + unsigned SubIdx = 0; + assert(TargetRegisterInfo::isPhysicalRegister(RealPhysRegUsed) && + "A reuse cannot be a virtual register"); + if (PRRU != RealPhysRegUsed) { + // What was the sub-register index? + unsigned SubReg; + for (SubIdx = 1; (SubReg = TRI->getSubReg(PRRU, SubIdx)); SubIdx++) + if (SubReg == RealPhysRegUsed) + break; + assert(SubReg == RealPhysRegUsed && + "Operand physreg is not a sub-register of PhysRegUsed"); + } + // Ok, we're going to try to reload the assigned physreg into the // slot that we were supposed to in the first place. However, that // register could hold a reuse. Check to see if it conflicts or // would prefer us to use a different register. - unsigned NewPhysReg = GetRegForReload(NewOp.AssignedPhysReg, - MI, Spills, MaybeDeadStores, - Rejected, RegKills, KillOps, VRM); - - MachineBasicBlock::iterator MII = MI; - if (NewOp.StackSlotOrReMat > VirtRegMap::MAX_STACK_SLOT) { - ReMaterialize(*MBB, MII, NewPhysReg, NewOp.VirtReg, TII, TRI,VRM); - } else { - TII->loadRegFromStackSlot(*MBB, MII, NewPhysReg, + unsigned NewPhysReg = GetRegForReload(RC, NewOp.AssignedPhysReg, + MF, MI, Spills, MaybeDeadStores, + Rejected, RegKills, KillOps, VRM); + + bool DoReMat = NewOp.StackSlotOrReMat > VirtRegMap::MAX_STACK_SLOT; + int SSorRMId = DoReMat + ? VRM.getReMatId(NewOp.VirtReg) : NewOp.StackSlotOrReMat; + + // Back-schedule reloads and remats. + MachineBasicBlock::iterator InsertLoc = + ComputeReloadLoc(MI, MBB->begin(), PhysReg, TRI, + DoReMat, SSorRMId, TII, MF); + + if (DoReMat) { + ReMaterialize(*MBB, InsertLoc, NewPhysReg, NewOp.VirtReg, TII, + TRI, VRM); + } else { + TII->loadRegFromStackSlot(*MBB, InsertLoc, NewPhysReg, NewOp.StackSlotOrReMat, AliasRC); - MachineInstr *LoadMI = prior(MII); + MachineInstr *LoadMI = prior(InsertLoc); VRM.addSpillSlotUse(NewOp.StackSlotOrReMat, LoadMI); // Any stores to this stack slot are not dead anymore. MaybeDeadStores[NewOp.StackSlotOrReMat] = NULL; @@ -718,17 +857,15 @@ unsigned ReuseInfo::GetRegForReload(unsigned PhysReg, MachineInstr *MI, Spills.ClobberPhysReg(NewPhysReg); Spills.ClobberPhysReg(NewOp.PhysRegReused); - unsigned SubIdx = MI->getOperand(NewOp.Operand).getSubReg(); unsigned RReg = SubIdx ? TRI->getSubReg(NewPhysReg, SubIdx) : NewPhysReg; MI->getOperand(NewOp.Operand).setReg(RReg); MI->getOperand(NewOp.Operand).setSubReg(0); Spills.addAvailable(NewOp.StackSlotOrReMat, NewPhysReg); - --MII; - UpdateKills(*MII, TRI, RegKills, KillOps); - DOUT << '\t' << *MII; + UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps); + DEBUG(errs() << '\t' << *prior(InsertLoc)); - DOUT << "Reuse undone!\n"; + DEBUG(errs() << "Reuse undone!\n"); --NumReused; // Finally, PhysReg is now available, go ahead and use it. @@ -856,6 +993,8 @@ namespace { // Local Spiller Implementation // // ***************************** // +namespace { + class VISIBILITY_HIDDEN LocalRewriter : public VirtRegRewriter { MachineRegisterInfo *RegInfo; const TargetRegisterInfo *TRI; @@ -870,10 +1009,10 @@ public: TRI = MF.getTarget().getRegisterInfo(); TII = MF.getTarget().getInstrInfo(); AllocatableRegs = TRI->getAllocatableSet(MF); - DOUT << "\n**** Local spiller rewriting function '" - << MF.getFunction()->getName() << "':\n"; - DOUT << "**** Machine Instrs (NOTE! Does not include spills and reloads!)" - " ****\n"; + DEBUG(errs() << "\n**** Local spiller rewriting function '" + << MF.getFunction()->getName() << "':\n"); + DEBUG(errs() << "**** Machine Instrs (NOTE! Does not include spills and" + " reloads!) ****\n"); DEBUG(MF.dump()); // Spills - Keep track of which spilled values are available in physregs @@ -924,7 +1063,7 @@ public: Spills.clear(); } - DOUT << "**** Post Machine Instrs ****\n"; + DEBUG(errs() << "**** Post Machine Instrs ****\n"); DEBUG(MF.dump()); // Mark unused spill slots. @@ -988,6 +1127,9 @@ private: if (!FoldsStackSlotModRef(*NextMII, SS, PhysReg, TII, TRI, VRM)) return false; + // Back-schedule reloads and remats. + ComputeReloadLoc(MII, MBB.begin(), PhysReg, TRI, false, SS, TII, MF); + // Load from SS to the spare physical register. TII->loadRegFromStackSlot(MBB, MII, PhysReg, SS, RC); // This invalidates Phys. @@ -999,7 +1141,7 @@ private: // Unfold current MI. SmallVector<MachineInstr*, 4> NewMIs; if (!TII->unfoldMemoryOperand(MF, &MI, VirtReg, false, false, NewMIs)) - assert(0 && "Unable unfold the load / store folding instruction!"); + llvm_unreachable("Unable unfold the load / store folding instruction!"); assert(NewMIs.size() == 1); AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg); VRM.transferRestorePts(&MI, NewMIs[0]); @@ -1015,7 +1157,7 @@ private: NextMII = next(NextMII); NewMIs.clear(); if (!TII->unfoldMemoryOperand(MF, &NextMI, VirtReg, false, false, NewMIs)) - assert(0 && "Unable unfold the load / store folding instruction!"); + llvm_unreachable("Unable unfold the load / store folding instruction!"); assert(NewMIs.size() == 1); AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg); VRM.transferRestorePts(&NextMI, NewMIs[0]); @@ -1157,6 +1299,32 @@ private: return false; } + /// CommuteChangesDestination - We are looking for r0 = op r1, r2 and + /// where SrcReg is r1 and it is tied to r0. Return true if after + /// commuting this instruction it will be r0 = op r2, r1. + static bool CommuteChangesDestination(MachineInstr *DefMI, + const TargetInstrDesc &TID, + unsigned SrcReg, + const TargetInstrInfo *TII, + unsigned &DstIdx) { + if (TID.getNumDefs() != 1 && TID.getNumOperands() != 3) + return false; + if (!DefMI->getOperand(1).isReg() || + DefMI->getOperand(1).getReg() != SrcReg) + return false; + unsigned DefIdx; + if (!DefMI->isRegTiedToDefOperand(1, &DefIdx) || DefIdx != 0) + return false; + unsigned SrcIdx1, SrcIdx2; + if (!TII->findCommutedOpIndices(DefMI, SrcIdx1, SrcIdx2)) + return false; + if (SrcIdx1 == 1 && SrcIdx2 == 2) { + DstIdx = 2; + return true; + } + return false; + } + /// CommuteToFoldReload - /// Look for /// r1 = load fi#1 @@ -1185,7 +1353,7 @@ private: unsigned NewDstIdx; if (DefMII != MBB.begin() && TID.isCommutable() && - TII->CommuteChangesDestination(DefMI, NewDstIdx)) { + CommuteChangesDestination(DefMI, TID, SrcReg, TII, NewDstIdx)) { MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx); unsigned NewReg = NewDstMO.getReg(); if (!NewDstMO.isKill() || TRI->regsOverlap(NewReg, SrcReg)) @@ -1266,11 +1434,11 @@ private: TII->storeRegToStackSlot(MBB, next(MII), PhysReg, true, StackSlot, RC); MachineInstr *StoreMI = next(MII); VRM.addSpillSlotUse(StackSlot, StoreMI); - DOUT << "Store:\t" << *StoreMI; + DEBUG(errs() << "Store:\t" << *StoreMI); // If there is a dead store to this stack slot, nuke it now. if (LastStore) { - DOUT << "Removed dead store:\t" << *LastStore; + DEBUG(errs() << "Removed dead store:\t" << *LastStore); ++NumDSE; SmallVector<unsigned, 2> KillRegs; InvalidateKills(*LastStore, TRI, RegKills, KillOps, &KillRegs); @@ -1310,6 +1478,29 @@ private: ++NumStores; } + /// isSafeToDelete - Return true if this instruction doesn't produce any side + /// effect and all of its defs are dead. + static bool isSafeToDelete(MachineInstr &MI) { + const TargetInstrDesc &TID = MI.getDesc(); + if (TID.mayLoad() || TID.mayStore() || TID.isCall() || TID.isTerminator() || + TID.isCall() || TID.isBarrier() || TID.isReturn() || + TID.hasUnmodeledSideEffects()) + return false; + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI.getOperand(i); + if (!MO.isReg() || !MO.getReg()) + continue; + if (MO.isDef() && !MO.isDead()) + return false; + if (MO.isUse() && MO.isKill()) + // FIXME: We can't remove kill markers or else the scavenger will assert. + // An alternative is to add a ADD pseudo instruction to replace kill + // markers. + return false; + } + return true; + } + /// TransferDeadness - A identity copy definition is dead and it's being /// removed. Find the last def or use and mark it as dead / kill. void TransferDeadness(MachineBasicBlock *MBB, unsigned CurDist, @@ -1351,9 +1542,7 @@ private: if (LastUD->isDef()) { // If the instruction has no side effect, delete it and propagate // backward further. Otherwise, mark is dead and we are done. - const TargetInstrDesc &TID = LastUDMI->getDesc(); - if (TID.mayStore() || TID.isCall() || TID.isTerminator() || - TID.hasUnmodeledSideEffects()) { + if (!isSafeToDelete(*LastUDMI)) { LastUD->setIsDead(); break; } @@ -1375,8 +1564,8 @@ private: AvailableSpills &Spills, BitVector &RegKills, std::vector<MachineOperand*> &KillOps) { - DOUT << "\n**** Local spiller rewriting MBB '" - << MBB.getBasicBlock()->getName() << "':\n"; + DEBUG(errs() << "\n**** Local spiller rewriting MBB '" + << MBB.getBasicBlock()->getName() << "':\n"); MachineFunction &MF = *MBB.getParent(); @@ -1425,15 +1614,23 @@ private: assert(RC && "Unable to determine register class!"); int SS = VRM.getEmergencySpillSlot(RC); if (UsedSS.count(SS)) - assert(0 && "Need to spill more than one physical registers!"); + llvm_unreachable("Need to spill more than one physical registers!"); UsedSS.insert(SS); TII->storeRegToStackSlot(MBB, MII, PhysReg, true, SS, RC); MachineInstr *StoreMI = prior(MII); VRM.addSpillSlotUse(SS, StoreMI); - TII->loadRegFromStackSlot(MBB, next(MII), PhysReg, SS, RC); - MachineInstr *LoadMI = next(MII); + + // Back-schedule reloads and remats. + MachineBasicBlock::iterator InsertLoc = + ComputeReloadLoc(next(MII), MBB.begin(), PhysReg, TRI, false, + SS, TII, MF); + + TII->loadRegFromStackSlot(MBB, InsertLoc, PhysReg, SS, RC); + + MachineInstr *LoadMI = prior(InsertLoc); VRM.addSpillSlotUse(SS, LoadMI); ++NumPSpills; + DistanceMap.insert(std::make_pair(LoadMI, Dist++)); } NextMII = next(MII); } @@ -1467,28 +1664,36 @@ private: // If the value is already available in the expected register, save // a reload / remat. if (SSorRMId) - DOUT << "Reusing RM#" << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1; + DEBUG(errs() << "Reusing RM#" + << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1); else - DOUT << "Reusing SS#" << SSorRMId; - DOUT << " from physreg " - << TRI->getName(InReg) << " for vreg" - << VirtReg <<" instead of reloading into physreg " - << TRI->getName(Phys) << "\n"; + DEBUG(errs() << "Reusing SS#" << SSorRMId); + DEBUG(errs() << " from physreg " + << TRI->getName(InReg) << " for vreg" + << VirtReg <<" instead of reloading into physreg " + << TRI->getName(Phys) << '\n'); ++NumOmitted; continue; } else if (InReg && InReg != Phys) { if (SSorRMId) - DOUT << "Reusing RM#" << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1; + DEBUG(errs() << "Reusing RM#" + << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1); else - DOUT << "Reusing SS#" << SSorRMId; - DOUT << " from physreg " - << TRI->getName(InReg) << " for vreg" - << VirtReg <<" by copying it into physreg " - << TRI->getName(Phys) << "\n"; + DEBUG(errs() << "Reusing SS#" << SSorRMId); + DEBUG(errs() << " from physreg " + << TRI->getName(InReg) << " for vreg" + << VirtReg <<" by copying it into physreg " + << TRI->getName(Phys) << '\n'); // If the reloaded / remat value is available in another register, // copy it to the desired register. - TII->copyRegToReg(MBB, &MI, Phys, InReg, RC, RC); + + // Back-schedule reloads and remats. + MachineBasicBlock::iterator InsertLoc = + ComputeReloadLoc(MII, MBB.begin(), Phys, TRI, DoReMat, + SSorRMId, TII, MF); + + TII->copyRegToReg(MBB, InsertLoc, Phys, InReg, RC, RC); // This invalidates Phys. Spills.ClobberPhysReg(Phys); @@ -1496,24 +1701,30 @@ private: Spills.addAvailable(SSorRMId, Phys); // Mark is killed. - MachineInstr *CopyMI = prior(MII); + MachineInstr *CopyMI = prior(InsertLoc); MachineOperand *KillOpnd = CopyMI->findRegisterUseOperand(InReg); KillOpnd->setIsKill(); UpdateKills(*CopyMI, TRI, RegKills, KillOps); - DOUT << '\t' << *CopyMI; + DEBUG(errs() << '\t' << *CopyMI); ++NumCopified; continue; } + // Back-schedule reloads and remats. + MachineBasicBlock::iterator InsertLoc = + ComputeReloadLoc(MII, MBB.begin(), Phys, TRI, DoReMat, + SSorRMId, TII, MF); + if (VRM.isReMaterialized(VirtReg)) { - ReMaterialize(MBB, MII, Phys, VirtReg, TII, TRI, VRM); + ReMaterialize(MBB, InsertLoc, Phys, VirtReg, TII, TRI, VRM); } else { const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg); - TII->loadRegFromStackSlot(MBB, &MI, Phys, SSorRMId, RC); - MachineInstr *LoadMI = prior(MII); + TII->loadRegFromStackSlot(MBB, InsertLoc, Phys, SSorRMId, RC); + MachineInstr *LoadMI = prior(InsertLoc); VRM.addSpillSlotUse(SSorRMId, LoadMI); ++NumLoads; + DistanceMap.insert(std::make_pair(LoadMI, Dist++)); } // This invalidates Phys. @@ -1521,8 +1732,8 @@ private: // Remember it's available. Spills.addAvailable(SSorRMId, Phys); - UpdateKills(*prior(MII), TRI, RegKills, KillOps); - DOUT << '\t' << *prior(MII); + UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps); + DEBUG(errs() << '\t' << *prior(MII)); } } @@ -1541,7 +1752,7 @@ private: TII->storeRegToStackSlot(MBB, next(MII), Phys, isKill, StackSlot, RC); MachineInstr *StoreMI = next(MII); VRM.addSpillSlotUse(StackSlot, StoreMI); - DOUT << "Store:\t" << *StoreMI; + DEBUG(errs() << "Store:\t" << *StoreMI); VRM.virtFolded(VirtReg, StoreMI, VirtRegMap::isMod); } NextMII = next(MII); @@ -1660,13 +1871,14 @@ private: if (CanReuse) { // If this stack slot value is already available, reuse it! if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT) - DOUT << "Reusing RM#" << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1; + DEBUG(errs() << "Reusing RM#" + << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1); else - DOUT << "Reusing SS#" << ReuseSlot; - DOUT << " from physreg " - << TRI->getName(PhysReg) << " for vreg" - << VirtReg <<" instead of reloading into physreg " - << TRI->getName(VRM.getPhys(VirtReg)) << "\n"; + DEBUG(errs() << "Reusing SS#" << ReuseSlot); + DEBUG(errs() << " from physreg " + << TRI->getName(PhysReg) << " for vreg" + << VirtReg <<" instead of reloading into physreg " + << TRI->getName(VRM.getPhys(VirtReg)) << '\n'); unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg; MI.getOperand(i).setReg(RReg); MI.getOperand(i).setSubReg(0); @@ -1733,20 +1945,22 @@ private: // available. If this occurs, use the register indicated by the // reuser. if (ReusedOperands.hasReuses()) - DesignatedReg = ReusedOperands.GetRegForReload(DesignatedReg, &MI, - Spills, MaybeDeadStores, RegKills, KillOps, VRM); + DesignatedReg = ReusedOperands.GetRegForReload(VirtReg, + DesignatedReg, &MI, + Spills, MaybeDeadStores, RegKills, KillOps, VRM); // If the mapped designated register is actually the physreg we have // incoming, we don't need to inserted a dead copy. if (DesignatedReg == PhysReg) { // If this stack slot value is already available, reuse it! if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT) - DOUT << "Reusing RM#" << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1; + DEBUG(errs() << "Reusing RM#" + << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1); else - DOUT << "Reusing SS#" << ReuseSlot; - DOUT << " from physreg " << TRI->getName(PhysReg) - << " for vreg" << VirtReg - << " instead of reloading into same physreg.\n"; + DEBUG(errs() << "Reusing SS#" << ReuseSlot); + DEBUG(errs() << " from physreg " << TRI->getName(PhysReg) + << " for vreg" << VirtReg + << " instead of reloading into same physreg.\n"); unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg; MI.getOperand(i).setReg(RReg); MI.getOperand(i).setSubReg(0); @@ -1758,9 +1972,15 @@ private: const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg); RegInfo->setPhysRegUsed(DesignatedReg); ReusedOperands.markClobbered(DesignatedReg); - TII->copyRegToReg(MBB, &MI, DesignatedReg, PhysReg, RC, RC); - MachineInstr *CopyMI = prior(MII); + // Back-schedule reloads and remats. + MachineBasicBlock::iterator InsertLoc = + ComputeReloadLoc(&MI, MBB.begin(), PhysReg, TRI, DoReMat, + SSorRMId, TII, MF); + + TII->copyRegToReg(MBB, InsertLoc, DesignatedReg, PhysReg, RC, RC); + + MachineInstr *CopyMI = prior(InsertLoc); UpdateKills(*CopyMI, TRI, RegKills, KillOps); // This invalidates DesignatedReg. @@ -1771,7 +1991,7 @@ private: SubIdx ? TRI->getSubReg(DesignatedReg, SubIdx) : DesignatedReg; MI.getOperand(i).setReg(RReg); MI.getOperand(i).setSubReg(0); - DOUT << '\t' << *prior(MII); + DEBUG(errs() << '\t' << *prior(MII)); ++NumReused; continue; } // if (PhysReg) @@ -1785,22 +2005,28 @@ private: // available. If this occurs, use the register indicated by the // reuser. if (ReusedOperands.hasReuses()) - PhysReg = ReusedOperands.GetRegForReload(PhysReg, &MI, - Spills, MaybeDeadStores, RegKills, KillOps, VRM); + PhysReg = ReusedOperands.GetRegForReload(VirtReg, PhysReg, &MI, + Spills, MaybeDeadStores, RegKills, KillOps, VRM); RegInfo->setPhysRegUsed(PhysReg); ReusedOperands.markClobbered(PhysReg); if (AvoidReload) ++NumAvoided; else { + // Back-schedule reloads and remats. + MachineBasicBlock::iterator InsertLoc = + ComputeReloadLoc(MII, MBB.begin(), PhysReg, TRI, DoReMat, + SSorRMId, TII, MF); + if (DoReMat) { - ReMaterialize(MBB, MII, PhysReg, VirtReg, TII, TRI, VRM); + ReMaterialize(MBB, InsertLoc, PhysReg, VirtReg, TII, TRI, VRM); } else { const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg); - TII->loadRegFromStackSlot(MBB, &MI, PhysReg, SSorRMId, RC); - MachineInstr *LoadMI = prior(MII); + TII->loadRegFromStackSlot(MBB, InsertLoc, PhysReg, SSorRMId, RC); + MachineInstr *LoadMI = prior(InsertLoc); VRM.addSpillSlotUse(SSorRMId, LoadMI); ++NumLoads; + DistanceMap.insert(std::make_pair(LoadMI, Dist++)); } // This invalidates PhysReg. Spills.ClobberPhysReg(PhysReg); @@ -1817,8 +2043,8 @@ private: KilledMIRegs.insert(VirtReg); } - UpdateKills(*prior(MII), TRI, RegKills, KillOps); - DOUT << '\t' << *prior(MII); + UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps); + DEBUG(errs() << '\t' << *prior(InsertLoc)); } unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg; MI.getOperand(i).setReg(RReg); @@ -1832,7 +2058,7 @@ private: int PDSSlot = PotentialDeadStoreSlots[j]; MachineInstr* DeadStore = MaybeDeadStores[PDSSlot]; if (DeadStore) { - DOUT << "Removed dead store:\t" << *DeadStore; + DEBUG(errs() << "Removed dead store:\t" << *DeadStore); InvalidateKills(*DeadStore, TRI, RegKills, KillOps); VRM.RemoveMachineInstrFromMaps(DeadStore); MBB.erase(DeadStore); @@ -1842,7 +2068,7 @@ private: } - DOUT << '\t' << MI; + DEBUG(errs() << '\t' << MI); // If we have folded references to memory operands, make sure we clear all @@ -1852,7 +2078,7 @@ private: for (tie(I, End) = VRM.getFoldedVirts(&MI); I != End; ) { unsigned VirtReg = I->second.first; VirtRegMap::ModRef MR = I->second.second; - DOUT << "Folded vreg: " << VirtReg << " MR: " << MR; + DEBUG(errs() << "Folded vreg: " << VirtReg << " MR: " << MR); // MI2VirtMap be can updated which invalidate the iterator. // Increment the iterator first. @@ -1861,7 +2087,7 @@ private: if (SS == VirtRegMap::NO_STACK_SLOT) continue; FoldedSS.insert(SS); - DOUT << " - StackSlot: " << SS << "\n"; + DEBUG(errs() << " - StackSlot: " << SS << "\n"); // If this folded instruction is just a use, check to see if it's a // straight load from the virt reg slot. @@ -1872,7 +2098,7 @@ private: // If this spill slot is available, turn it into a copy (or nothing) // instead of leaving it as a load! if (unsigned InReg = Spills.getSpillSlotOrReMatPhysReg(SS)) { - DOUT << "Promoted Load To Copy: " << MI; + DEBUG(errs() << "Promoted Load To Copy: " << MI); if (DestReg != InReg) { const TargetRegisterClass *RC = RegInfo->getRegClass(VirtReg); TII->copyRegToReg(MBB, &MI, DestReg, InReg, RC, RC); @@ -1895,7 +2121,7 @@ private: BackTracked = true; } else { - DOUT << "Removing now-noop copy: " << MI; + DEBUG(errs() << "Removing now-noop copy: " << MI); // Unset last kill since it's being reused. InvalidateKill(InReg, TRI, RegKills, KillOps); Spills.disallowClobberPhysReg(InReg); @@ -1965,7 +2191,7 @@ private: if (isDead) { // Previous store is dead. // If we get here, the store is dead, nuke it now. - DOUT << "Removed dead store:\t" << *DeadStore; + DEBUG(errs() << "Removed dead store:\t" << *DeadStore); InvalidateKills(*DeadStore, TRI, RegKills, KillOps); VRM.RemoveMachineInstrFromMaps(DeadStore); MBB.erase(DeadStore); @@ -2036,7 +2262,7 @@ private: if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) && Src == Dst && !MI.findRegisterUseOperand(Src)->isUndef()) { ++NumDCE; - DOUT << "Removing now-noop copy: " << MI; + DEBUG(errs() << "Removing now-noop copy: " << MI); SmallVector<unsigned, 2> KillRegs; InvalidateKills(MI, TRI, RegKills, KillOps, &KillRegs); if (MO.isDead() && !KillRegs.empty()) { @@ -2100,8 +2326,8 @@ private: if (ReusedOperands.isClobbered(PhysReg)) { // Another def has taken the assigned physreg. It must have been a // use&def which got it due to reuse. Undo the reuse! - PhysReg = ReusedOperands.GetRegForReload(PhysReg, &MI, - Spills, MaybeDeadStores, RegKills, KillOps, VRM); + PhysReg = ReusedOperands.GetRegForReload(VirtReg, PhysReg, &MI, + Spills, MaybeDeadStores, RegKills, KillOps, VRM); } } @@ -2124,7 +2350,7 @@ private: unsigned Src, Dst, SrcSR, DstSR; if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) && Src == Dst) { ++NumDCE; - DOUT << "Removing now-noop copy: " << MI; + DEBUG(errs() << "Removing now-noop copy: " << MI); InvalidateKills(MI, TRI, RegKills, KillOps); VRM.RemoveMachineInstrFromMaps(&MI); MBB.erase(&MI); @@ -2136,7 +2362,15 @@ private: } } ProcessNextInst: - DistanceMap.insert(std::make_pair(&MI, Dist++)); + // Delete dead instructions without side effects. + if (!Erased && !BackTracked && isSafeToDelete(MI)) { + InvalidateKills(MI, TRI, RegKills, KillOps); + VRM.RemoveMachineInstrFromMaps(&MI); + MBB.erase(&MI); + Erased = true; + } + if (!Erased) + DistanceMap.insert(std::make_pair(&MI, Dist++)); if (!Erased && !BackTracked) { for (MachineBasicBlock::iterator II = &MI; II != NextMII; ++II) UpdateKills(*II, TRI, RegKills, KillOps); @@ -2148,9 +2382,11 @@ private: }; +} + llvm::VirtRegRewriter* llvm::createVirtRegRewriter() { switch (RewriterOpt) { - default: assert(0 && "Unreachable!"); + default: llvm_unreachable("Unreachable!"); case local: return new LocalRewriter(); case trivial: diff --git a/lib/CodeGen/VirtRegRewriter.h b/lib/CodeGen/VirtRegRewriter.h index f9d7fbb..44f9df6 100644 --- a/lib/CodeGen/VirtRegRewriter.h +++ b/lib/CodeGen/VirtRegRewriter.h @@ -10,27 +10,9 @@ #ifndef LLVM_CODEGEN_VIRTREGREWRITER_H #define LLVM_CODEGEN_VIRTREGREWRITER_H -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/IndexedMap.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Support/Streams.h" -#include "llvm/Function.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallSet.h" #include "VirtRegMap.h" -#include <map> namespace llvm { |