diff options
Diffstat (limited to 'contrib/llvm/tools/llvm-objdump/llvm-objdump.cpp')
-rw-r--r-- | contrib/llvm/tools/llvm-objdump/llvm-objdump.cpp | 594 |
1 files changed, 450 insertions, 144 deletions
diff --git a/contrib/llvm/tools/llvm-objdump/llvm-objdump.cpp b/contrib/llvm/tools/llvm-objdump/llvm-objdump.cpp index ed55c91..8373f0c 100644 --- a/contrib/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/contrib/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -23,6 +23,7 @@ #include "llvm/ADT/Triple.h" #include "llvm/CodeGen/FaultMaps.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/DebugInfo/Symbolize/Symbolize.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" @@ -36,6 +37,7 @@ #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Object/Archive.h" #include "llvm/Object/COFF.h" +#include "llvm/Object/COFFImportFile.h" #include "llvm/Object/ELFObjectFile.h" #include "llvm/Object/MachO.h" #include "llvm/Object/ObjectFile.h" @@ -60,6 +62,7 @@ #include <cstring> #include <system_error> #include <utility> +#include <unordered_map> using namespace llvm; using namespace object; @@ -185,9 +188,29 @@ cl::opt<bool> PrintFaultMaps("fault-map-section", cl::opt<DIDumpType> llvm::DwarfDumpType( "dwarf", cl::init(DIDT_Null), cl::desc("Dump of dwarf debug sections:"), - cl::values(clEnumValN(DIDT_Frames, "frames", ".debug_frame"), - clEnumValEnd)); + cl::values(clEnumValN(DIDT_Frames, "frames", ".debug_frame"))); +cl::opt<bool> PrintSource( + "source", + cl::desc( + "Display source inlined with disassembly. Implies disassmble object")); + +cl::alias PrintSourceShort("S", cl::desc("Alias for -source"), + cl::aliasopt(PrintSource)); + +cl::opt<bool> PrintLines("line-numbers", + cl::desc("Display source line numbers with " + "disassembly. Implies disassemble object")); + +cl::alias PrintLinesShort("l", cl::desc("Alias for -line-numbers"), + cl::aliasopt(PrintLines)); + +cl::opt<unsigned long long> + StartAddress("start-address", cl::desc("Disassemble beginning at address"), + cl::value_desc("address"), cl::init(0)); +cl::opt<unsigned long long> + StopAddress("stop-address", cl::desc("Stop disassembly at address"), + cl::value_desc("address"), cl::init(UINT64_MAX)); static StringRef ToolName; namespace { @@ -240,18 +263,17 @@ private: llvm::object::ObjectFile const &Object; }; SectionFilter ToolSectionFilter(llvm::object::ObjectFile const &O) { - return SectionFilter([](llvm::object::SectionRef const &S) { - if(FilterSections.empty()) - return true; - llvm::StringRef String; - std::error_code error = S.getName(String); - if (error) - return false; - return std::find(FilterSections.begin(), - FilterSections.end(), - String) != FilterSections.end(); - }, - O); + return SectionFilter( + [](llvm::object::SectionRef const &S) { + if (FilterSections.empty()) + return true; + llvm::StringRef String; + std::error_code error = S.getName(String); + if (error) + return false; + return is_contained(FilterSections, String); + }, + O); } } @@ -271,6 +293,12 @@ LLVM_ATTRIBUTE_NORETURN void llvm::error(Twine Message) { } LLVM_ATTRIBUTE_NORETURN void llvm::report_error(StringRef File, + Twine Message) { + errs() << ToolName << ": '" << File << "': " << Message << ".\n"; + exit(1); +} + +LLVM_ATTRIBUTE_NORETURN void llvm::report_error(StringRef File, std::error_code EC) { assert(EC); errs() << ToolName << ": '" << File << "': " << EC.message() << ".\n"; @@ -297,14 +325,14 @@ LLVM_ATTRIBUTE_NORETURN void llvm::report_error(StringRef ArchiveName, if (ArchiveName != "") errs() << ArchiveName << "(" << FileName << ")"; else - errs() << FileName; + errs() << "'" << FileName << "'"; if (!ArchitectureName.empty()) errs() << " (for architecture " << ArchitectureName << ")"; std::string Buf; raw_string_ostream OS(Buf); logAllUnhandledErrors(std::move(E), OS, ""); OS.flush(); - errs() << " " << Buf; + errs() << ": " << Buf; exit(1); } @@ -312,13 +340,14 @@ LLVM_ATTRIBUTE_NORETURN void llvm::report_error(StringRef ArchiveName, const object::Archive::Child &C, llvm::Error E, StringRef ArchitectureName) { - ErrorOr<StringRef> NameOrErr = C.getName(); + Expected<StringRef> NameOrErr = C.getName(); // TODO: if we have a error getting the name then it would be nice to print // the index of which archive member this is and or its offset in the // archive instead of "???" as the name. - if (NameOrErr.getError()) + if (!NameOrErr) { + consumeError(NameOrErr.takeError()); llvm::report_error(ArchiveName, "???", std::move(E), ArchitectureName); - else + } else llvm::report_error(ArchiveName, NameOrErr.get(), std::move(E), ArchitectureName); } @@ -347,8 +376,12 @@ static const Target *getTarget(const ObjectFile *Obj = nullptr) { std::string Error; const Target *TheTarget = TargetRegistry::lookupTarget(ArchName, TheTriple, Error); - if (!TheTarget) - report_fatal_error("can't find target: " + Error); + if (!TheTarget) { + if (Obj) + report_error(Obj->getFileName(), "can't find target: " + Error); + else + error("can't find target: " + Error); + } // Update the triple name and return the found target. TripleName = TheTriple.getTriple(); @@ -360,13 +393,104 @@ bool llvm::RelocAddressLess(RelocationRef a, RelocationRef b) { } namespace { +class SourcePrinter { +protected: + DILineInfo OldLineInfo; + const ObjectFile *Obj; + std::unique_ptr<symbolize::LLVMSymbolizer> Symbolizer; + // File name to file contents of source + std::unordered_map<std::string, std::unique_ptr<MemoryBuffer>> SourceCache; + // Mark the line endings of the cached source + std::unordered_map<std::string, std::vector<StringRef>> LineCache; + +private: + bool cacheSource(std::string File); + +public: + virtual ~SourcePrinter() {} + SourcePrinter() : Obj(nullptr), Symbolizer(nullptr) {} + SourcePrinter(const ObjectFile *Obj, StringRef DefaultArch) : Obj(Obj) { + symbolize::LLVMSymbolizer::Options SymbolizerOpts( + DILineInfoSpecifier::FunctionNameKind::None, true, false, false, + DefaultArch); + Symbolizer.reset(new symbolize::LLVMSymbolizer(SymbolizerOpts)); + } + virtual void printSourceLine(raw_ostream &OS, uint64_t Address, + StringRef Delimiter = "; "); +}; + +bool SourcePrinter::cacheSource(std::string File) { + auto BufferOrError = MemoryBuffer::getFile(File); + if (!BufferOrError) + return false; + // Chomp the file to get lines + size_t BufferSize = (*BufferOrError)->getBufferSize(); + const char *BufferStart = (*BufferOrError)->getBufferStart(); + for (const char *Start = BufferStart, *End = BufferStart; + End < BufferStart + BufferSize; End++) + if (*End == '\n' || End == BufferStart + BufferSize - 1 || + (*End == '\r' && *(End + 1) == '\n')) { + LineCache[File].push_back(StringRef(Start, End - Start)); + if (*End == '\r') + End++; + Start = End + 1; + } + SourceCache[File] = std::move(*BufferOrError); + return true; +} + +void SourcePrinter::printSourceLine(raw_ostream &OS, uint64_t Address, + StringRef Delimiter) { + if (!Symbolizer) + return; + DILineInfo LineInfo = DILineInfo(); + auto ExpectecLineInfo = + Symbolizer->symbolizeCode(Obj->getFileName(), Address); + if (!ExpectecLineInfo) + consumeError(ExpectecLineInfo.takeError()); + else + LineInfo = *ExpectecLineInfo; + + if ((LineInfo.FileName == "<invalid>") || OldLineInfo.Line == LineInfo.Line || + LineInfo.Line == 0) + return; + + if (PrintLines) + OS << Delimiter << LineInfo.FileName << ":" << LineInfo.Line << "\n"; + if (PrintSource) { + if (SourceCache.find(LineInfo.FileName) == SourceCache.end()) + if (!cacheSource(LineInfo.FileName)) + return; + auto FileBuffer = SourceCache.find(LineInfo.FileName); + if (FileBuffer != SourceCache.end()) { + auto LineBuffer = LineCache.find(LineInfo.FileName); + if (LineBuffer != LineCache.end()) + // Vector begins at 0, line numbers are non-zero + OS << Delimiter << LineBuffer->second[LineInfo.Line - 1].ltrim() + << "\n"; + } + } + OldLineInfo = LineInfo; +} + +static bool isArmElf(const ObjectFile *Obj) { + return (Obj->isELF() && + (Obj->getArch() == Triple::aarch64 || + Obj->getArch() == Triple::aarch64_be || + Obj->getArch() == Triple::arm || Obj->getArch() == Triple::armeb || + Obj->getArch() == Triple::thumb || + Obj->getArch() == Triple::thumbeb)); +} + class PrettyPrinter { public: virtual ~PrettyPrinter(){} virtual void printInst(MCInstPrinter &IP, const MCInst *MI, ArrayRef<uint8_t> Bytes, uint64_t Address, raw_ostream &OS, StringRef Annot, - MCSubtargetInfo const &STI) { + MCSubtargetInfo const &STI, SourcePrinter *SP) { + if (SP && (PrintSource || PrintLines)) + SP->printSourceLine(OS, Address); OS << format("%8" PRIx64 ":", Address); if (!NoShowRawInsn) { OS << "\t"; @@ -392,10 +516,11 @@ public: OS << format("%08" PRIx32, opcode); } } - void printInst(MCInstPrinter &IP, const MCInst *MI, - ArrayRef<uint8_t> Bytes, uint64_t Address, - raw_ostream &OS, StringRef Annot, - MCSubtargetInfo const &STI) override { + void printInst(MCInstPrinter &IP, const MCInst *MI, ArrayRef<uint8_t> Bytes, + uint64_t Address, raw_ostream &OS, StringRef Annot, + MCSubtargetInfo const &STI, SourcePrinter *SP) override { + if (SP && (PrintSource || PrintLines)) + SP->printSourceLine(OS, Address, ""); if (!MI) { printLead(Bytes, Address, OS); OS << " <unknown>"; @@ -416,6 +541,8 @@ public: while(!HeadTail.first.empty()) { OS << Separator; Separator = "\n"; + if (SP && (PrintSource || PrintLines)) + SP->printSourceLine(OS, Address, ""); printLead(Bytes, Address, OS); OS << Preamble; Preamble = " "; @@ -440,13 +567,9 @@ HexagonPrettyPrinter HexagonPrettyPrinterInst; class AMDGCNPrettyPrinter : public PrettyPrinter { public: - void printInst(MCInstPrinter &IP, - const MCInst *MI, - ArrayRef<uint8_t> Bytes, - uint64_t Address, - raw_ostream &OS, - StringRef Annot, - MCSubtargetInfo const &STI) override { + void printInst(MCInstPrinter &IP, const MCInst *MI, ArrayRef<uint8_t> Bytes, + uint64_t Address, raw_ostream &OS, StringRef Annot, + MCSubtargetInfo const &STI, SourcePrinter *SP) override { if (!MI) { OS << " <unknown>"; return; @@ -471,6 +594,26 @@ public: }; AMDGCNPrettyPrinter AMDGCNPrettyPrinterInst; +class BPFPrettyPrinter : public PrettyPrinter { +public: + void printInst(MCInstPrinter &IP, const MCInst *MI, ArrayRef<uint8_t> Bytes, + uint64_t Address, raw_ostream &OS, StringRef Annot, + MCSubtargetInfo const &STI, SourcePrinter *SP) override { + if (SP && (PrintSource || PrintLines)) + SP->printSourceLine(OS, Address); + OS << format("%8" PRId64 ":", Address / 8); + if (!NoShowRawInsn) { + OS << "\t"; + dumpBytes(Bytes, OS); + } + if (MI) + IP.printInst(MI, OS, "", STI); + else + OS << " <unknown>"; + } +}; +BPFPrettyPrinter BPFPrettyPrinterInst; + PrettyPrinter &selectPrettyPrinter(Triple const &Triple) { switch(Triple.getArch()) { default: @@ -479,6 +622,9 @@ PrettyPrinter &selectPrettyPrinter(Triple const &Triple) { return HexagonPrettyPrinterInst; case Triple::amdgcn: return AMDGCNPrettyPrinterInst; + case Triple::bpfel: + case Triple::bpfeb: + return BPFPrettyPrinterInst; } } } @@ -495,22 +641,22 @@ static std::error_code getRelocationValueString(const ELFObjectFile<ELFT> *Obj, const ELFFile<ELFT> &EF = *Obj->getELFFile(); - ErrorOr<const Elf_Shdr *> SecOrErr = EF.getSection(Rel.d.a); - if (std::error_code EC = SecOrErr.getError()) - return EC; + auto SecOrErr = EF.getSection(Rel.d.a); + if (!SecOrErr) + return errorToErrorCode(SecOrErr.takeError()); const Elf_Shdr *Sec = *SecOrErr; - ErrorOr<const Elf_Shdr *> SymTabOrErr = EF.getSection(Sec->sh_link); - if (std::error_code EC = SymTabOrErr.getError()) - return EC; + auto SymTabOrErr = EF.getSection(Sec->sh_link); + if (!SymTabOrErr) + return errorToErrorCode(SymTabOrErr.takeError()); const Elf_Shdr *SymTab = *SymTabOrErr; assert(SymTab->sh_type == ELF::SHT_SYMTAB || SymTab->sh_type == ELF::SHT_DYNSYM); - ErrorOr<const Elf_Shdr *> StrTabSec = EF.getSection(SymTab->sh_link); - if (std::error_code EC = StrTabSec.getError()) - return EC; - ErrorOr<StringRef> StrTabOrErr = EF.getStringTable(*StrTabSec); - if (std::error_code EC = StrTabOrErr.getError()) - return EC; + auto StrTabSec = EF.getSection(SymTab->sh_link); + if (!StrTabSec) + return errorToErrorCode(StrTabSec.takeError()); + auto StrTabOrErr = EF.getStringTable(*StrTabSec); + if (!StrTabOrErr) + return errorToErrorCode(StrTabOrErr.takeError()); StringRef StrTab = *StrTabOrErr; uint8_t type = RelRef.getType(); StringRef res; @@ -536,9 +682,9 @@ static std::error_code getRelocationValueString(const ELFObjectFile<ELFT> *Obj, if (!SymSI) return errorToErrorCode(SymSI.takeError()); const Elf_Shdr *SymSec = Obj->getSection((*SymSI)->getRawDataRefImpl()); - ErrorOr<StringRef> SecName = EF.getSectionName(SymSec); - if (std::error_code EC = SecName.getError()) - return EC; + auto SecName = EF.getSectionName(SymSec); + if (!SecName) + return errorToErrorCode(SecName.takeError()); Target = *SecName; } else { Expected<StringRef> SymName = symb->getName(StrTab); @@ -574,6 +720,7 @@ static std::error_code getRelocationValueString(const ELFObjectFile<ELFT> *Obj, } break; case ELF::EM_LANAI: + case ELF::EM_AVR: case ELF::EM_AARCH64: { std::string fmtbuf; raw_string_ostream fmt(fmtbuf); @@ -590,6 +737,7 @@ static std::error_code getRelocationValueString(const ELFObjectFile<ELFT> *Obj, case ELF::EM_HEXAGON: case ELF::EM_MIPS: case ELF::EM_BPF: + case ELF::EM_RISCV: res = Target; break; case ELF::EM_WEBASSEMBLY: @@ -657,23 +805,13 @@ static void printRelocationTargetName(const MachOObjectFile *O, for (const SymbolRef &Symbol : O->symbols()) { std::error_code ec; Expected<uint64_t> Addr = Symbol.getAddress(); - if (!Addr) { - std::string Buf; - raw_string_ostream OS(Buf); - logAllUnhandledErrors(Addr.takeError(), OS, ""); - OS.flush(); - report_fatal_error(Buf); - } + if (!Addr) + report_error(O->getFileName(), Addr.takeError()); if (*Addr != Val) continue; Expected<StringRef> Name = Symbol.getName(); - if (!Name) { - std::string Buf; - raw_string_ostream OS(Buf); - logAllUnhandledErrors(Name.takeError(), OS, ""); - OS.flush(); - report_fatal_error(Buf); - } + if (!Name) + report_error(O->getFileName(), Name.takeError()); fmt << *Name; return; } @@ -688,7 +826,7 @@ static void printRelocationTargetName(const MachOObjectFile *O, if (Addr != Val) continue; if ((ec = Section.getName(Name))) - report_fatal_error(ec.message()); + report_error(O->getFileName(), ec); fmt << Name; return; } @@ -705,7 +843,8 @@ static void printRelocationTargetName(const MachOObjectFile *O, symbol_iterator SI = O->symbol_begin(); advance(SI, Val); Expected<StringRef> SOrErr = SI->getName(); - error(errorToErrorCode(SOrErr.takeError())); + if (!SOrErr) + report_error(O->getFileName(), SOrErr.takeError()); S = *SOrErr; } else { section_iterator SI = O->section_begin(); @@ -756,8 +895,8 @@ static std::error_code getRelocationValueString(const MachOObjectFile *Obj, // NOTE: Scattered relocations don't exist on x86_64. unsigned RType = Obj->getAnyRelocationType(RENext); if (RType != MachO::X86_64_RELOC_UNSIGNED) - report_fatal_error("Expected X86_64_RELOC_UNSIGNED after " - "X86_64_RELOC_SUBTRACTOR."); + report_error(Obj->getFileName(), "Expected X86_64_RELOC_UNSIGNED after " + "X86_64_RELOC_SUBTRACTOR."); // The X86_64_RELOC_UNSIGNED contains the minuend symbol; // X86_64_RELOC_SUBTRACTOR contains the subtrahend. @@ -805,8 +944,8 @@ static std::error_code getRelocationValueString(const MachOObjectFile *Obj, unsigned RType = Obj->getAnyRelocationType(RENext); if (RType != MachO::GENERIC_RELOC_PAIR) - report_fatal_error("Expected GENERIC_RELOC_PAIR after " - "GENERIC_RELOC_SECTDIFF."); + report_error(Obj->getFileName(), "Expected GENERIC_RELOC_PAIR after " + "GENERIC_RELOC_SECTDIFF."); printRelocationTargetName(Obj, RE, fmt); fmt << "-"; @@ -826,8 +965,8 @@ static std::error_code getRelocationValueString(const MachOObjectFile *Obj, // GENERIC_RELOC_PAIR. unsigned RType = Obj->getAnyRelocationType(RENext); if (RType != MachO::GENERIC_RELOC_PAIR) - report_fatal_error("Expected GENERIC_RELOC_PAIR after " - "GENERIC_RELOC_LOCAL_SECTDIFF."); + report_error(Obj->getFileName(), "Expected GENERIC_RELOC_PAIR after " + "GENERIC_RELOC_LOCAL_SECTDIFF."); printRelocationTargetName(Obj, RE, fmt); fmt << "-"; @@ -866,8 +1005,8 @@ static std::error_code getRelocationValueString(const MachOObjectFile *Obj, // ARM_RELOC_PAIR. unsigned RType = Obj->getAnyRelocationType(RENext); if (RType != MachO::ARM_RELOC_PAIR) - report_fatal_error("Expected ARM_RELOC_PAIR after " - "ARM_RELOC_HALF"); + report_error(Obj->getFileName(), "Expected ARM_RELOC_PAIR after " + "ARM_RELOC_HALF"); // NOTE: The half of the target virtual address is stashed in the // address field of the secondary relocation, but we can't reverse @@ -939,7 +1078,23 @@ static bool getHidden(RelocationRef RelRef) { return false; } +static uint8_t getElfSymbolType(const ObjectFile *Obj, const SymbolRef &Sym) { + assert(Obj->isELF()); + if (auto *Elf32LEObj = dyn_cast<ELF32LEObjectFile>(Obj)) + return Elf32LEObj->getSymbol(Sym.getRawDataRefImpl())->getType(); + if (auto *Elf64LEObj = dyn_cast<ELF64LEObjectFile>(Obj)) + return Elf64LEObj->getSymbol(Sym.getRawDataRefImpl())->getType(); + if (auto *Elf32BEObj = dyn_cast<ELF32BEObjectFile>(Obj)) + return Elf32BEObj->getSymbol(Sym.getRawDataRefImpl())->getType(); + if (auto *Elf64BEObj = cast<ELF64BEObjectFile>(Obj)) + return Elf64BEObj->getSymbol(Sym.getRawDataRefImpl())->getType(); + llvm_unreachable("Unsupported binary format"); +} + static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) { + if (StartAddress > StopAddress) + error("Start address should be less than stop address"); + const Target *TheTarget = getTarget(Obj); // Package up features to be passed to target/subtarget @@ -952,27 +1107,34 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) { std::unique_ptr<const MCRegisterInfo> MRI( TheTarget->createMCRegInfo(TripleName)); if (!MRI) - report_fatal_error("error: no register info for target " + TripleName); + report_error(Obj->getFileName(), "no register info for target " + + TripleName); // Set up disassembler. std::unique_ptr<const MCAsmInfo> AsmInfo( TheTarget->createMCAsmInfo(*MRI, TripleName)); if (!AsmInfo) - report_fatal_error("error: no assembly info for target " + TripleName); + report_error(Obj->getFileName(), "no assembly info for target " + + TripleName); std::unique_ptr<const MCSubtargetInfo> STI( TheTarget->createMCSubtargetInfo(TripleName, MCPU, Features.getString())); if (!STI) - report_fatal_error("error: no subtarget info for target " + TripleName); + report_error(Obj->getFileName(), "no subtarget info for target " + + TripleName); std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo()); if (!MII) - report_fatal_error("error: no instruction info for target " + TripleName); - std::unique_ptr<const MCObjectFileInfo> MOFI(new MCObjectFileInfo); - MCContext Ctx(AsmInfo.get(), MRI.get(), MOFI.get()); + report_error(Obj->getFileName(), "no instruction info for target " + + TripleName); + MCObjectFileInfo MOFI; + MCContext Ctx(AsmInfo.get(), MRI.get(), &MOFI); + // FIXME: for now initialize MCObjectFileInfo with default values + MOFI.InitMCObjectFileInfo(Triple(TripleName), false, CodeModel::Default, Ctx); std::unique_ptr<MCDisassembler> DisAsm( TheTarget->createMCDisassembler(*STI, Ctx)); if (!DisAsm) - report_fatal_error("error: no disassembler for target " + TripleName); + report_error(Obj->getFileName(), "no disassembler for target " + + TripleName); std::unique_ptr<const MCInstrAnalysis> MIA( TheTarget->createMCInstrAnalysis(MII.get())); @@ -981,14 +1143,16 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) { std::unique_ptr<MCInstPrinter> IP(TheTarget->createMCInstPrinter( Triple(TripleName), AsmPrinterVariant, *AsmInfo, *MII, *MRI)); if (!IP) - report_fatal_error("error: no instruction printer for target " + - TripleName); + report_error(Obj->getFileName(), "no instruction printer for target " + + TripleName); IP->setPrintImmHex(PrintImmHex); PrettyPrinter &PIP = selectPrettyPrinter(Triple(TripleName)); StringRef Fmt = Obj->getBytesInAddress() > 4 ? "\t\t%016" PRIx64 ": " : "\t\t\t%08" PRIx64 ": "; + SourcePrinter SP(Obj, TheTarget->getName()); + // Create a mapping, RelocSecs = SectionRelocMap[S], where sections // in RelocSecs contain the relocations for section S. std::error_code EC; @@ -1001,25 +1165,33 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) { // Create a mapping from virtual address to symbol name. This is used to // pretty print the symbols while disassembling. - typedef std::vector<std::pair<uint64_t, StringRef>> SectionSymbolsTy; + typedef std::vector<std::tuple<uint64_t, StringRef, uint8_t>> SectionSymbolsTy; std::map<SectionRef, SectionSymbolsTy> AllSymbols; for (const SymbolRef &Symbol : Obj->symbols()) { Expected<uint64_t> AddressOrErr = Symbol.getAddress(); - error(errorToErrorCode(AddressOrErr.takeError())); + if (!AddressOrErr) + report_error(Obj->getFileName(), AddressOrErr.takeError()); uint64_t Address = *AddressOrErr; Expected<StringRef> Name = Symbol.getName(); - error(errorToErrorCode(Name.takeError())); + if (!Name) + report_error(Obj->getFileName(), Name.takeError()); if (Name->empty()) continue; Expected<section_iterator> SectionOrErr = Symbol.getSection(); - error(errorToErrorCode(SectionOrErr.takeError())); + if (!SectionOrErr) + report_error(Obj->getFileName(), SectionOrErr.takeError()); section_iterator SecI = *SectionOrErr; if (SecI == Obj->section_end()) continue; - AllSymbols[*SecI].emplace_back(Address, *Name); + uint8_t SymbolType = ELF::STT_NOTYPE; + if (Obj->isELF()) + SymbolType = getElfSymbolType(Obj, Symbol); + + AllSymbols[*SecI].emplace_back(Address, *Name, SymbolType); + } // Create a mapping from virtual address to section. @@ -1051,7 +1223,7 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) { Sec = SectionAddresses.end(); if (Sec != SectionAddresses.end()) - AllSymbols[Sec->second].emplace_back(VA, Name); + AllSymbols[Sec->second].emplace_back(VA, Name, ELF::STT_NOTYPE); } } @@ -1073,20 +1245,36 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) { SectionSymbolsTy &Symbols = AllSymbols[Section]; std::vector<uint64_t> DataMappingSymsAddr; std::vector<uint64_t> TextMappingSymsAddr; - if (Obj->isELF() && Obj->getArch() == Triple::aarch64) { + if (isArmElf(Obj)) { for (const auto &Symb : Symbols) { - uint64_t Address = Symb.first; - StringRef Name = Symb.second; + uint64_t Address = std::get<0>(Symb); + StringRef Name = std::get<1>(Symb); if (Name.startswith("$d")) DataMappingSymsAddr.push_back(Address - SectionAddr); if (Name.startswith("$x")) TextMappingSymsAddr.push_back(Address - SectionAddr); + if (Name.startswith("$a")) + TextMappingSymsAddr.push_back(Address - SectionAddr); + if (Name.startswith("$t")) + TextMappingSymsAddr.push_back(Address - SectionAddr); } } std::sort(DataMappingSymsAddr.begin(), DataMappingSymsAddr.end()); std::sort(TextMappingSymsAddr.begin(), TextMappingSymsAddr.end()); + if (Obj->isELF() && Obj->getArch() == Triple::amdgcn) { + // AMDGPU disassembler uses symbolizer for printing labels + std::unique_ptr<MCRelocationInfo> RelInfo( + TheTarget->createMCRelocationInfo(TripleName, Ctx)); + if (RelInfo) { + std::unique_ptr<MCSymbolizer> Symbolizer( + TheTarget->createMCSymbolizer( + TripleName, nullptr, nullptr, &Symbols, &Ctx, std::move(RelInfo))); + DisAsm->setSymbolizer(std::move(Symbolizer)); + } + } + // Make a list of all the relocations for this section. std::vector<RelocationRef> Rels; if (InlineRelocs) { @@ -1107,14 +1295,22 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) { } StringRef name; error(Section.getName(name)); + + if ((SectionAddr <= StopAddress) && + (SectionAddr + SectSize) >= StartAddress) { outs() << "Disassembly of section "; if (!SegmentName.empty()) outs() << SegmentName << ","; outs() << name << ':'; + } // If the section has no symbol at the start, just insert a dummy one. - if (Symbols.empty() || Symbols[0].first != 0) - Symbols.insert(Symbols.begin(), std::make_pair(SectionAddr, name)); + if (Symbols.empty() || std::get<0>(Symbols[0]) != 0) { + Symbols.insert(Symbols.begin(), + std::make_tuple(SectionAddr, name, Section.isText() + ? ELF::STT_FUNC + : ELF::STT_OBJECT)); + } SmallString<40> Comments; raw_svector_ostream CommentStream(Comments); @@ -1131,12 +1327,11 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) { std::vector<RelocationRef>::const_iterator rel_end = Rels.end(); // Disassemble symbol by symbol. for (unsigned si = 0, se = Symbols.size(); si != se; ++si) { - - uint64_t Start = Symbols[si].first - SectionAddr; + uint64_t Start = std::get<0>(Symbols[si]) - SectionAddr; // The end is either the section end or the beginning of the next // symbol. uint64_t End = - (si == se - 1) ? SectSize : Symbols[si + 1].first - SectionAddr; + (si == se - 1) ? SectSize : std::get<0>(Symbols[si + 1]) - SectionAddr; // Don't try to disassemble beyond the end of section contents. if (End > SectSize) End = SectSize; @@ -1144,19 +1339,37 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) { if (Start >= End) continue; + // Check if we need to skip symbol + // Skip if the symbol's data is not between StartAddress and StopAddress + if (End + SectionAddr < StartAddress || + Start + SectionAddr > StopAddress) { + continue; + } + + // Stop disassembly at the stop address specified + if (End + SectionAddr > StopAddress) + End = StopAddress - SectionAddr; + if (Obj->isELF() && Obj->getArch() == Triple::amdgcn) { // make size 4 bytes folded End = Start + ((End - Start) & ~0x3ull); - Start += 256; // add sizeof(amd_kernel_code_t) - // cut trailing zeroes - up to 256 bytes (align) - const uint64_t EndAlign = 256; - const auto Limit = End - (std::min)(EndAlign, End - Start); - while (End > Limit && - *reinterpret_cast<const support::ulittle32_t*>(&Bytes[End - 4]) == 0) - End -= 4; + if (std::get<2>(Symbols[si]) == ELF::STT_AMDGPU_HSA_KERNEL) { + // skip amd_kernel_code_t at the begining of kernel symbol (256 bytes) + Start += 256; + } + if (si == se - 1 || + std::get<2>(Symbols[si + 1]) == ELF::STT_AMDGPU_HSA_KERNEL) { + // cut trailing zeroes at the end of kernel + // cut up to 256 bytes + const uint64_t EndAlign = 256; + const auto Limit = End - (std::min)(EndAlign, End - Start); + while (End > Limit && + *reinterpret_cast<const support::ulittle32_t*>(&Bytes[End - 4]) == 0) + End -= 4; + } } - outs() << '\n' << Symbols[si].second << ":\n"; + outs() << '\n' << std::get<1>(Symbols[si]) << ":\n"; #ifndef NDEBUG raw_ostream &DebugOut = DebugFlag ? dbgs() : nulls(); @@ -1167,10 +1380,18 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) { for (Index = Start; Index < End; Index += Size) { MCInst Inst; + if (Index + SectionAddr < StartAddress || + Index + SectionAddr > StopAddress) { + // skip byte by byte till StartAddress is reached + Size = 1; + continue; + } // AArch64 ELF binaries can interleave data and text in the // same section. We rely on the markers introduced to - // understand what we need to dump. - if (Obj->isELF() && Obj->getArch() == Triple::aarch64) { + // understand what we need to dump. If the data marker is within a + // function, it is denoted as a word/short etc + if (isArmElf(Obj) && std::get<2>(Symbols[si]) != ELF::STT_OBJECT && + !DisassembleAll) { uint64_t Stride = 0; auto DAI = std::lower_bound(DataMappingSymsAddr.begin(), @@ -1183,15 +1404,41 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) { if (Index + 4 <= End) { Stride = 4; dumpBytes(Bytes.slice(Index, 4), outs()); - outs() << "\t.word"; + outs() << "\t.word\t"; + uint32_t Data = 0; + if (Obj->isLittleEndian()) { + const auto Word = + reinterpret_cast<const support::ulittle32_t *>( + Bytes.data() + Index); + Data = *Word; + } else { + const auto Word = reinterpret_cast<const support::ubig32_t *>( + Bytes.data() + Index); + Data = *Word; + } + outs() << "0x" << format("%08" PRIx32, Data); } else if (Index + 2 <= End) { Stride = 2; dumpBytes(Bytes.slice(Index, 2), outs()); - outs() << "\t.short"; + outs() << "\t\t.short\t"; + uint16_t Data = 0; + if (Obj->isLittleEndian()) { + const auto Short = + reinterpret_cast<const support::ulittle16_t *>( + Bytes.data() + Index); + Data = *Short; + } else { + const auto Short = + reinterpret_cast<const support::ubig16_t *>(Bytes.data() + + Index); + Data = *Short; + } + outs() << "0x" << format("%04" PRIx16, Data); } else { Stride = 1; dumpBytes(Bytes.slice(Index, 1), outs()); - outs() << "\t.byte"; + outs() << "\t\t.byte\t"; + outs() << "0x" << format("%02" PRIx8, Bytes.slice(Index, 1)[0]); } Index += Stride; outs() << "\n"; @@ -1203,17 +1450,62 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) { } } + // If there is a data symbol inside an ELF text section and we are only + // disassembling text (applicable all architectures), + // we are in a situation where we must print the data and not + // disassemble it. + if (Obj->isELF() && std::get<2>(Symbols[si]) == ELF::STT_OBJECT && + !DisassembleAll && Section.isText()) { + // print out data up to 8 bytes at a time in hex and ascii + uint8_t AsciiData[9] = {'\0'}; + uint8_t Byte; + int NumBytes = 0; + + for (Index = Start; Index < End; Index += 1) { + if (((SectionAddr + Index) < StartAddress) || + ((SectionAddr + Index) > StopAddress)) + continue; + if (NumBytes == 0) { + outs() << format("%8" PRIx64 ":", SectionAddr + Index); + outs() << "\t"; + } + Byte = Bytes.slice(Index)[0]; + outs() << format(" %02x", Byte); + AsciiData[NumBytes] = isprint(Byte) ? Byte : '.'; + + uint8_t IndentOffset = 0; + NumBytes++; + if (Index == End - 1 || NumBytes > 8) { + // Indent the space for less than 8 bytes data. + // 2 spaces for byte and one for space between bytes + IndentOffset = 3 * (8 - NumBytes); + for (int Excess = 8 - NumBytes; Excess < 8; Excess++) + AsciiData[Excess] = '\0'; + NumBytes = 8; + } + if (NumBytes == 8) { + AsciiData[8] = '\0'; + outs() << std::string(IndentOffset, ' ') << " "; + outs() << reinterpret_cast<char *>(AsciiData); + outs() << '\n'; + NumBytes = 0; + } + } + } if (Index >= End) break; + // Disassemble a real instruction or a data when disassemble all is + // provided bool Disassembled = DisAsm->getInstruction(Inst, Size, Bytes.slice(Index), SectionAddr + Index, DebugOut, CommentStream); if (Size == 0) Size = 1; + PIP.printInst(*IP, Disassembled ? &Inst : nullptr, - Bytes.slice(Index, Size), - SectionAddr + Index, outs(), "", *STI); + Bytes.slice(Index, Size), SectionAddr + Index, outs(), "", + *STI, &SP); outs() << CommentStream.str(); Comments.clear(); @@ -1252,8 +1544,8 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) { auto TargetSym = std::upper_bound( TargetSectionSymbols->begin(), TargetSectionSymbols->end(), Target, [](uint64_t LHS, - const std::pair<uint64_t, StringRef> &RHS) { - return LHS < RHS.first; + const std::tuple<uint64_t, StringRef, uint8_t> &RHS) { + return LHS < std::get<0>(RHS); }); if (TargetSym != TargetSectionSymbols->begin()) { --TargetSym; @@ -1278,7 +1570,10 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) { SmallString<32> val; // If this relocation is hidden, skip it. - if (hidden) goto skip_print_rel; + if (hidden || ((SectionAddr + addr) < StartAddress)) { + ++rel_cur; + continue; + } // Stop when rel_cur's address is past the current instruction. if (addr >= Index + Size) break; @@ -1286,8 +1581,6 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) { error(getRelocationValueString(*rel_cur, val)); outs() << format(Fmt.data(), SectionAddr + addr) << name << "\t" << val << "\n"; - - skip_print_rel: ++rel_cur; } } @@ -1314,7 +1607,7 @@ void llvm::PrintRelocations(const ObjectFile *Obj) { uint64_t address = Reloc.getOffset(); SmallString<32> relocname; SmallString<32> valuestr; - if (hidden) + if (address < StartAddress || address > StopAddress || hidden) continue; Reloc.getTypeName(relocname); error(getRelocationValueString(Reloc, valuestr)); @@ -1403,15 +1696,21 @@ void llvm::PrintSymbolTable(const ObjectFile *o, StringRef ArchiveName, for (const SymbolRef &Symbol : o->symbols()) { Expected<uint64_t> AddressOrError = Symbol.getAddress(); if (!AddressOrError) - report_error(ArchiveName, o->getFileName(), AddressOrError.takeError()); + report_error(ArchiveName, o->getFileName(), AddressOrError.takeError(), + ArchitectureName); uint64_t Address = *AddressOrError; + if ((Address < StartAddress) || (Address > StopAddress)) + continue; Expected<SymbolRef::Type> TypeOrError = Symbol.getType(); if (!TypeOrError) - report_error(ArchiveName, o->getFileName(), TypeOrError.takeError()); + report_error(ArchiveName, o->getFileName(), TypeOrError.takeError(), + ArchitectureName); SymbolRef::Type Type = *TypeOrError; uint32_t Flags = Symbol.getFlags(); Expected<section_iterator> SectionOrErr = Symbol.getSection(); - error(errorToErrorCode(SectionOrErr.takeError())); + if (!SectionOrErr) + report_error(ArchiveName, o->getFileName(), SectionOrErr.takeError(), + ArchitectureName); section_iterator Section = *SectionOrErr; StringRef Name; if (Type == SymbolRef::ST_Debug && Section != o->section_end()) { @@ -1629,27 +1928,20 @@ static void printFaultMaps(const ObjectFile *Obj) { outs() << FMP; } -static void printPrivateFileHeaders(const ObjectFile *o) { - if (o->isELF()) - printELFFileHeader(o); - else if (o->isCOFF()) - printCOFFFileHeader(o); - else if (o->isMachO()) { - printMachOFileHeader(o); - printMachOLoadCommands(o); - } else - report_fatal_error("Invalid/Unsupported object file format"); -} - -static void printFirstPrivateFileHeader(const ObjectFile *o) { +static void printPrivateFileHeaders(const ObjectFile *o, bool onlyFirst) { if (o->isELF()) - printELFFileHeader(o); - else if (o->isCOFF()) - printCOFFFileHeader(o); - else if (o->isMachO()) + return printELFFileHeader(o); + if (o->isCOFF()) + return printCOFFFileHeader(o); + if (o->isWasm()) + return printWasmFileHeader(o); + if (o->isMachO()) { printMachOFileHeader(o); - else - report_fatal_error("Invalid/Unsupported object file format"); + if (!onlyFirst) + printMachOLoadCommands(o); + return; + } + report_error(o->getFileName(), "Invalid/Unsupported object file format"); } static void DumpObject(const ObjectFile *o, const Archive *a = nullptr) { @@ -1676,10 +1968,8 @@ static void DumpObject(const ObjectFile *o, const Archive *a = nullptr) { PrintSymbolTable(o, ArchiveName); if (UnwindInfo) PrintUnwindInfo(o); - if (PrivateHeaders) - printPrivateFileHeaders(o); - if (FirstPrivateHeader) - printFirstPrivateFileHeader(o); + if (PrivateHeaders || FirstPrivateHeader) + printPrivateFileHeaders(o, FirstPrivateHeader); if (ExportsTrie) printExportsTrie(o); if (Rebase) @@ -1701,9 +1991,23 @@ static void DumpObject(const ObjectFile *o, const Archive *a = nullptr) { } } +static void DumpObject(const COFFImportFile *I, const Archive *A) { + StringRef ArchiveName = A ? A->getFileName() : ""; + + // Avoid other output when using a raw option. + if (!RawClangAST) + outs() << '\n' + << ArchiveName << "(" << I->getFileName() << ")" + << ":\tfile format COFF-import-file" + << "\n\n"; + + if (SymbolTable) + printCOFFSymbolTable(I); +} + /// @brief Dump each object file in \a a; static void DumpArchive(const Archive *a) { - Error Err; + Error Err = Error::success(); for (auto &C : a->children(Err)) { Expected<std::unique_ptr<Binary>> ChildOrErr = C.getAsBinary(); if (!ChildOrErr) { @@ -1713,6 +2017,8 @@ static void DumpArchive(const Archive *a) { } if (ObjectFile *o = dyn_cast<ObjectFile>(&*ChildOrErr.get())) DumpObject(o, a); + else if (COFFImportFile *I = dyn_cast<COFFImportFile>(&*ChildOrErr.get())) + DumpObject(I, a); else report_error(a->getFileName(), object_error::invalid_file_type); } @@ -1768,7 +2074,7 @@ int main(int argc, char **argv) { if (InputFilenames.size() == 0) InputFilenames.push_back("a.out"); - if (DisassembleAll) + if (DisassembleAll || PrintSource || PrintLines) Disassemble = true; if (!Disassemble && !Relocations |