diff options
Diffstat (limited to 'contrib/llvm/lib/Object/MachOObjectFile.cpp')
-rw-r--r-- | contrib/llvm/lib/Object/MachOObjectFile.cpp | 1108 |
1 files changed, 925 insertions, 183 deletions
diff --git a/contrib/llvm/lib/Object/MachOObjectFile.cpp b/contrib/llvm/lib/Object/MachOObjectFile.cpp index 5b01867..2e4da9f 100644 --- a/contrib/llvm/lib/Object/MachOObjectFile.cpp +++ b/contrib/llvm/lib/Object/MachOObjectFile.cpp @@ -1,4 +1,4 @@ -//===- MachOObjectFile.cpp - Mach-O object file binding ---------*- C++ -*-===// +//===- MachOObjectFile.cpp - Mach-O object file binding -------------------===// // // The LLVM Compiler Infrastructure // @@ -12,32 +12,52 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Object/MachO.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/None.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" +#include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/MachO.h" +#include "llvm/Object/Error.h" +#include "llvm/Object/MachO.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Object/SymbolicFile.h" #include "llvm/Support/DataExtractor.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/Host.h" #include "llvm/Support/LEB128.h" -#include "llvm/Support/MachO.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SwapByteOrder.h" #include "llvm/Support/raw_ostream.h" -#include <cctype> +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstdint> #include <cstring> #include <limits> #include <list> +#include <memory> +#include <string> +#include <system_error> using namespace llvm; using namespace object; namespace { + struct section_base { char sectname[16]; char segname[16]; }; -} + +} // end anonymous namespace static Error malformedError(Twine Msg) { @@ -106,13 +126,6 @@ static StringRef parseSegmentOrSectionName(const char *P) { return StringRef(P, 16); } -// Helper to advance a section or symbol iterator multiple increments at a time. -template<class T> -static void advance(T &it, size_t Val) { - while (Val--) - ++it; -} - static unsigned getCPUType(const MachOObjectFile &O) { return O.getHeader().cputype; } @@ -368,7 +381,7 @@ static Error parseSegmentLoadCommand( CmdName + " extends past the end of the file"); if (S.vmsize != 0 && S.filesize > S.vmsize) return malformedError("load command " + Twine(LoadCommandIndex) + - " fileoff field in " + CmdName + + " filesize field in " + CmdName + " greater than vmsize field"); IsPageZeroSegment |= StringRef("__PAGEZERO").equals(S.segname); } else @@ -784,6 +797,52 @@ static Error checkVersCommand(const MachOObjectFile &Obj, return Error::success(); } +static Error checkNoteCommand(const MachOObjectFile &Obj, + const MachOObjectFile::LoadCommandInfo &Load, + uint32_t LoadCommandIndex, + std::list<MachOElement> &Elements) { + if (Load.C.cmdsize != sizeof(MachO::note_command)) + return malformedError("load command " + Twine(LoadCommandIndex) + + " LC_NOTE has incorrect cmdsize"); + MachO::note_command Nt = getStruct<MachO::note_command>(Obj, Load.Ptr); + uint64_t FileSize = Obj.getData().size(); + if (Nt.offset > FileSize) + return malformedError("offset field of LC_NOTE command " + + Twine(LoadCommandIndex) + " extends " + "past the end of the file"); + uint64_t BigSize = Nt.offset; + BigSize += Nt.size; + if (BigSize > FileSize) + return malformedError("size field plus offset field of LC_NOTE command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + if (Error Err = checkOverlappingElement(Elements, Nt.offset, Nt.size, + "LC_NOTE data")) + return Err; + return Error::success(); +} + +static Error +parseBuildVersionCommand(const MachOObjectFile &Obj, + const MachOObjectFile::LoadCommandInfo &Load, + SmallVectorImpl<const char*> &BuildTools, + uint32_t LoadCommandIndex) { + MachO::build_version_command BVC = + getStruct<MachO::build_version_command>(Obj, Load.Ptr); + if (Load.C.cmdsize != + sizeof(MachO::build_version_command) + + BVC.ntools * sizeof(MachO::build_tool_version)) + return malformedError("load command " + Twine(LoadCommandIndex) + + " LC_BUILD_VERSION_COMMAND has incorrect cmdsize"); + + auto Start = Load.Ptr + sizeof(MachO::build_version_command); + BuildTools.resize(BVC.ntools); + for (unsigned i = 0; i < BVC.ntools; ++i) + BuildTools[i] = Start + i * sizeof(MachO::build_tool_version); + + return Error::success(); +} + static Error checkRpathCommand(const MachOObjectFile &Obj, const MachOObjectFile::LoadCommandInfo &Load, uint32_t LoadCommandIndex) { @@ -931,7 +990,26 @@ static Error checkThreadCommand(const MachOObjectFile &Obj, sys::swapByteOrder(count); state += sizeof(uint32_t); - if (cputype == MachO::CPU_TYPE_X86_64) { + if (cputype == MachO::CPU_TYPE_I386) { + if (flavor == MachO::x86_THREAD_STATE32) { + if (count != MachO::x86_THREAD_STATE32_COUNT) + return malformedError("load command " + Twine(LoadCommandIndex) + + " count not x86_THREAD_STATE32_COUNT for " + "flavor number " + Twine(nflavor) + " which is " + "a x86_THREAD_STATE32 flavor in " + CmdName + + " command"); + if (state + sizeof(MachO::x86_thread_state32_t) > end) + return malformedError("load command " + Twine(LoadCommandIndex) + + " x86_THREAD_STATE32 extends past end of " + "command in " + CmdName + " command"); + state += sizeof(MachO::x86_thread_state32_t); + } else { + return malformedError("load command " + Twine(LoadCommandIndex) + + " unknown flavor (" + Twine(flavor) + ") for " + "flavor number " + Twine(nflavor) + " in " + + CmdName + " command"); + } + } else if (cputype == MachO::CPU_TYPE_X86_64) { if (flavor == MachO::x86_THREAD_STATE64) { if (count != MachO::x86_THREAD_STATE64_COUNT) return malformedError("load command " + Twine(LoadCommandIndex) + @@ -1086,11 +1164,7 @@ MachOObjectFile::MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian, bool Is64bits, Error &Err, uint32_t UniversalCputype, uint32_t UniversalIndex) - : ObjectFile(getMachOType(IsLittleEndian, Is64bits), Object), - SymtabLoadCmd(nullptr), DysymtabLoadCmd(nullptr), - DataInCodeLoadCmd(nullptr), LinkOptHintsLoadCmd(nullptr), - DyldInfoLoadCmd(nullptr), UuidLoadCmd(nullptr), - HasPageZeroSegment(false) { + : ObjectFile(getMachOType(IsLittleEndian, Is64bits), Object) { ErrorAsOutParameter ErrAsOutParam(&Err); uint64_t SizeOfHeaders; uint32_t cputype; @@ -1280,6 +1354,12 @@ MachOObjectFile::MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian, if ((Err = checkVersCommand(*this, Load, I, &VersLoadCmd, "LC_VERSION_MIN_WATCHOS"))) return; + } else if (Load.C.cmd == MachO::LC_NOTE) { + if ((Err = checkNoteCommand(*this, Load, I, Elements))) + return; + } else if (Load.C.cmd == MachO::LC_BUILD_VERSION) { + if ((Err = parseBuildVersionCommand(*this, Load, BuildTools, I))) + return; } else if (Load.C.cmd == MachO::LC_RPATH) { if ((Err = checkRpathCommand(*this, Load, I))) return; @@ -1740,6 +1820,10 @@ uint64_t MachOObjectFile::getSectionAddress(DataRefImpl Sec) const { return getSection(Sec).addr; } +uint64_t MachOObjectFile::getSectionIndex(DataRefImpl Sec) const { + return Sec.d.a; +} + uint64_t MachOObjectFile::getSectionSize(DataRefImpl Sec) const { // In the case if a malformed Mach-O file where the section offset is past // the end of the file or some part of the section size is past the end of @@ -1867,13 +1951,29 @@ MachOObjectFile::section_rel_end(DataRefImpl Sec) const { return relocation_iterator(RelocationRef(Ret, this)); } +relocation_iterator MachOObjectFile::extrel_begin() const { + DataRefImpl Ret; + Ret.d.a = 0; // Would normally be a section index. + Ret.d.b = 0; // Index into the external relocations + return relocation_iterator(RelocationRef(Ret, this)); +} + +relocation_iterator MachOObjectFile::extrel_end() const { + MachO::dysymtab_command DysymtabLoadCmd = getDysymtabLoadCommand(); + DataRefImpl Ret; + Ret.d.a = 0; // Would normally be a section index. + Ret.d.b = DysymtabLoadCmd.nextrel; // Index into the external relocations + return relocation_iterator(RelocationRef(Ret, this)); +} + void MachOObjectFile::moveRelocationNext(DataRefImpl &Rel) const { ++Rel.d.b; } uint64_t MachOObjectFile::getRelocationOffset(DataRefImpl Rel) const { - assert(getHeader().filetype == MachO::MH_OBJECT && - "Only implemented for MH_OBJECT"); + assert((getHeader().filetype == MachO::MH_OBJECT || + getHeader().filetype == MachO::MH_KEXT_BUNDLE) && + "Only implemented for MH_OBJECT && MH_KEXT_BUNDLE"); MachO::any_relocation_info RE = getRelocation(Rel); return getAnyRelocationAddress(RE); } @@ -2201,6 +2301,10 @@ std::error_code MachOObjectFile::getLibraryShortNameByIndex(unsigned Index, return std::error_code(); } +uint32_t MachOObjectFile::getLibraryCount() const { + return Libraries.size(); +} + section_iterator MachOObjectFile::getRelocationRelocatedSection(relocation_iterator Rel) const { DataRefImpl Sec; @@ -2275,11 +2379,11 @@ StringRef MachOObjectFile::getFileFormatName() const { unsigned CPUType = getCPUType(*this); if (!is64Bit()) { switch (CPUType) { - case llvm::MachO::CPU_TYPE_I386: + case MachO::CPU_TYPE_I386: return "Mach-O 32-bit i386"; - case llvm::MachO::CPU_TYPE_ARM: + case MachO::CPU_TYPE_ARM: return "Mach-O arm"; - case llvm::MachO::CPU_TYPE_POWERPC: + case MachO::CPU_TYPE_POWERPC: return "Mach-O 32-bit ppc"; default: return "Mach-O 32-bit unknown"; @@ -2287,11 +2391,11 @@ StringRef MachOObjectFile::getFileFormatName() const { } switch (CPUType) { - case llvm::MachO::CPU_TYPE_X86_64: + case MachO::CPU_TYPE_X86_64: return "Mach-O 64-bit x86-64"; - case llvm::MachO::CPU_TYPE_ARM64: + case MachO::CPU_TYPE_ARM64: return "Mach-O arm64"; - case llvm::MachO::CPU_TYPE_POWERPC64: + case MachO::CPU_TYPE_POWERPC64: return "Mach-O 64-bit ppc64"; default: return "Mach-O 64-bit unknown"; @@ -2300,17 +2404,17 @@ StringRef MachOObjectFile::getFileFormatName() const { Triple::ArchType MachOObjectFile::getArch(uint32_t CPUType) { switch (CPUType) { - case llvm::MachO::CPU_TYPE_I386: + case MachO::CPU_TYPE_I386: return Triple::x86; - case llvm::MachO::CPU_TYPE_X86_64: + case MachO::CPU_TYPE_X86_64: return Triple::x86_64; - case llvm::MachO::CPU_TYPE_ARM: + case MachO::CPU_TYPE_ARM: return Triple::arm; - case llvm::MachO::CPU_TYPE_ARM64: + case MachO::CPU_TYPE_ARM64: return Triple::aarch64; - case llvm::MachO::CPU_TYPE_POWERPC: + case MachO::CPU_TYPE_POWERPC: return Triple::ppc; - case llvm::MachO::CPU_TYPE_POWERPC64: + case MachO::CPU_TYPE_POWERPC64: return Triple::ppc64; default: return Triple::UnknownArch; @@ -2383,6 +2487,8 @@ Triple MachOObjectFile::getArchTriple(uint32_t CPUType, uint32_t CPUSubType, *ArchFlag = "armv7em"; return Triple("thumbv7em-apple-darwin"); case MachO::CPU_SUBTYPE_ARM_V7K: + if (McpuDefault) + *McpuDefault = "cortex-a7"; if (ArchFlag) *ArchFlag = "armv7k"; return Triple("armv7k-apple-darwin"); @@ -2393,6 +2499,8 @@ Triple MachOObjectFile::getArchTriple(uint32_t CPUType, uint32_t CPUSubType, *ArchFlag = "armv7m"; return Triple("thumbv7m-apple-darwin"); case MachO::CPU_SUBTYPE_ARM_V7S: + if (McpuDefault) + *McpuDefault = "cortex-a7"; if (ArchFlag) *ArchFlag = "armv7s"; return Triple("armv7s-apple-darwin"); @@ -2402,6 +2510,8 @@ Triple MachOObjectFile::getArchTriple(uint32_t CPUType, uint32_t CPUSubType, case MachO::CPU_TYPE_ARM64: switch (CPUSubType & ~MachO::CPU_SUBTYPE_MASK) { case MachO::CPU_SUBTYPE_ARM64_ALL: + if (McpuDefault) + *McpuDefault = "cyclone"; if (ArchFlag) *ArchFlag = "arm64"; return Triple("arm64-apple-darwin"); @@ -2497,8 +2607,7 @@ dice_iterator MachOObjectFile::end_dices() const { return dice_iterator(DiceRef(DRI, this)); } -ExportEntry::ExportEntry(ArrayRef<uint8_t> T) - : Trie(T), Malformed(false), Done(false) {} +ExportEntry::ExportEntry(ArrayRef<uint8_t> T) : Trie(T) {} void ExportEntry::moveToFirst() { pushNode(0); @@ -2567,9 +2676,7 @@ uint32_t ExportEntry::nodeOffset() const { } ExportEntry::NodeState::NodeState(const uint8_t *Ptr) - : Start(Ptr), Current(Ptr), Flags(0), Address(0), Other(0), - ImportName(nullptr), ChildCount(0), NextChildIndex(0), - ParentStringLength(0), IsExportNode(false) {} + : Start(Ptr), Current(Ptr) {} void ExportEntry::pushNode(uint64_t offset) { const uint8_t *Ptr = Trie.begin() + offset; @@ -2659,7 +2766,7 @@ void ExportEntry::moveNext() { iterator_range<export_iterator> MachOObjectFile::exports(ArrayRef<uint8_t> Trie) { ExportEntry Start(Trie); - if (Trie.size() == 0) + if (Trie.empty()) Start.moveToEnd(); else Start.moveToFirst(); @@ -2674,10 +2781,10 @@ iterator_range<export_iterator> MachOObjectFile::exports() const { return exports(getDyldInfoExportsTrie()); } -MachORebaseEntry::MachORebaseEntry(ArrayRef<uint8_t> Bytes, bool is64Bit) - : Opcodes(Bytes), Ptr(Bytes.begin()), SegmentOffset(0), SegmentIndex(0), - RemainingLoopCount(0), AdvanceAmount(0), RebaseType(0), - PointerSize(is64Bit ? 8 : 4), Malformed(false), Done(false) {} +MachORebaseEntry::MachORebaseEntry(Error *E, const MachOObjectFile *O, + ArrayRef<uint8_t> Bytes, bool is64Bit) + : E(E), O(O), Opcodes(Bytes), Ptr(Bytes.begin()), + PointerSize(is64Bit ? 8 : 4) {} void MachORebaseEntry::moveToFirst() { Ptr = Opcodes.begin(); @@ -2691,122 +2798,307 @@ void MachORebaseEntry::moveToEnd() { } void MachORebaseEntry::moveNext() { + ErrorAsOutParameter ErrAsOutParam(E); // If in the middle of some loop, move to next rebasing in loop. SegmentOffset += AdvanceAmount; if (RemainingLoopCount) { --RemainingLoopCount; return; } + // REBASE_OPCODE_DONE is only used for padding if we are not aligned to + // pointer size. Therefore it is possible to reach the end without ever having + // seen REBASE_OPCODE_DONE. if (Ptr == Opcodes.end()) { Done = true; return; } bool More = true; - while (More && !Malformed) { + while (More) { // Parse next opcode and set up next loop. + const uint8_t *OpcodeStart = Ptr; uint8_t Byte = *Ptr++; uint8_t ImmValue = Byte & MachO::REBASE_IMMEDIATE_MASK; uint8_t Opcode = Byte & MachO::REBASE_OPCODE_MASK; + uint32_t Count, Skip; + const char *error = nullptr; switch (Opcode) { case MachO::REBASE_OPCODE_DONE: More = false; Done = true; moveToEnd(); - DEBUG_WITH_TYPE("mach-o-rebase", llvm::dbgs() << "REBASE_OPCODE_DONE\n"); + DEBUG_WITH_TYPE("mach-o-rebase", dbgs() << "REBASE_OPCODE_DONE\n"); break; case MachO::REBASE_OPCODE_SET_TYPE_IMM: RebaseType = ImmValue; + if (RebaseType > MachO::REBASE_TYPE_TEXT_PCREL32) { + *E = malformedError("for REBASE_OPCODE_SET_TYPE_IMM bad bind type: " + + Twine((int)RebaseType) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } DEBUG_WITH_TYPE( "mach-o-rebase", - llvm::dbgs() << "REBASE_OPCODE_SET_TYPE_IMM: " - << "RebaseType=" << (int) RebaseType << "\n"); + dbgs() << "REBASE_OPCODE_SET_TYPE_IMM: " + << "RebaseType=" << (int) RebaseType << "\n"); break; case MachO::REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: SegmentIndex = ImmValue; - SegmentOffset = readULEB128(); + SegmentOffset = readULEB128(&error); + if (error) { + *E = malformedError("for REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, + true); + if (error) { + *E = malformedError("for REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } DEBUG_WITH_TYPE( "mach-o-rebase", - llvm::dbgs() << "REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: " - << "SegmentIndex=" << SegmentIndex << ", " - << format("SegmentOffset=0x%06X", SegmentOffset) - << "\n"); + dbgs() << "REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: " + << "SegmentIndex=" << SegmentIndex << ", " + << format("SegmentOffset=0x%06X", SegmentOffset) + << "\n"); break; case MachO::REBASE_OPCODE_ADD_ADDR_ULEB: - SegmentOffset += readULEB128(); + SegmentOffset += readULEB128(&error); + if (error) { + *E = malformedError("for REBASE_OPCODE_ADD_ADDR_ULEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, + true); + if (error) { + *E = malformedError("for REBASE_OPCODE_ADD_ADDR_ULEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } DEBUG_WITH_TYPE("mach-o-rebase", - llvm::dbgs() << "REBASE_OPCODE_ADD_ADDR_ULEB: " - << format("SegmentOffset=0x%06X", - SegmentOffset) << "\n"); + dbgs() << "REBASE_OPCODE_ADD_ADDR_ULEB: " + << format("SegmentOffset=0x%06X", + SegmentOffset) << "\n"); break; case MachO::REBASE_OPCODE_ADD_ADDR_IMM_SCALED: + error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, + true); + if (error) { + *E = malformedError("for REBASE_OPCODE_ADD_ADDR_IMM_SCALED " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } SegmentOffset += ImmValue * PointerSize; + error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, + false); + if (error) { + *E = malformedError("for REBASE_OPCODE_ADD_ADDR_IMM_SCALED " + " (after adding immediate times the pointer size) " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } DEBUG_WITH_TYPE("mach-o-rebase", - llvm::dbgs() << "REBASE_OPCODE_ADD_ADDR_IMM_SCALED: " - << format("SegmentOffset=0x%06X", - SegmentOffset) << "\n"); + dbgs() << "REBASE_OPCODE_ADD_ADDR_IMM_SCALED: " + << format("SegmentOffset=0x%06X", + SegmentOffset) << "\n"); break; case MachO::REBASE_OPCODE_DO_REBASE_IMM_TIMES: + error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, + true); + if (error) { + *E = malformedError("for REBASE_OPCODE_DO_REBASE_IMM_TIMES " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } AdvanceAmount = PointerSize; - RemainingLoopCount = ImmValue - 1; + Skip = 0; + Count = ImmValue; + if (ImmValue != 0) + RemainingLoopCount = ImmValue - 1; + else + RemainingLoopCount = 0; + error = O->RebaseEntryCheckCountAndSkip(Count, Skip, PointerSize, + SegmentIndex, SegmentOffset); + if (error) { + *E = malformedError("for REBASE_OPCODE_DO_REBASE_IMM_TIMES " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } DEBUG_WITH_TYPE( "mach-o-rebase", - llvm::dbgs() << "REBASE_OPCODE_DO_REBASE_IMM_TIMES: " - << format("SegmentOffset=0x%06X", SegmentOffset) - << ", AdvanceAmount=" << AdvanceAmount - << ", RemainingLoopCount=" << RemainingLoopCount - << "\n"); + dbgs() << "REBASE_OPCODE_DO_REBASE_IMM_TIMES: " + << format("SegmentOffset=0x%06X", SegmentOffset) + << ", AdvanceAmount=" << AdvanceAmount + << ", RemainingLoopCount=" << RemainingLoopCount + << "\n"); return; case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES: + error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, + true); + if (error) { + *E = malformedError("for REBASE_OPCODE_DO_REBASE_ULEB_TIMES " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } AdvanceAmount = PointerSize; - RemainingLoopCount = readULEB128() - 1; + Skip = 0; + Count = readULEB128(&error); + if (error) { + *E = malformedError("for REBASE_OPCODE_DO_REBASE_ULEB_TIMES " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + if (Count != 0) + RemainingLoopCount = Count - 1; + else + RemainingLoopCount = 0; + error = O->RebaseEntryCheckCountAndSkip(Count, Skip, PointerSize, + SegmentIndex, SegmentOffset); + if (error) { + *E = malformedError("for REBASE_OPCODE_DO_REBASE_ULEB_TIMES " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } DEBUG_WITH_TYPE( "mach-o-rebase", - llvm::dbgs() << "REBASE_OPCODE_DO_REBASE_ULEB_TIMES: " - << format("SegmentOffset=0x%06X", SegmentOffset) - << ", AdvanceAmount=" << AdvanceAmount - << ", RemainingLoopCount=" << RemainingLoopCount - << "\n"); + dbgs() << "REBASE_OPCODE_DO_REBASE_ULEB_TIMES: " + << format("SegmentOffset=0x%06X", SegmentOffset) + << ", AdvanceAmount=" << AdvanceAmount + << ", RemainingLoopCount=" << RemainingLoopCount + << "\n"); return; case MachO::REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB: - AdvanceAmount = readULEB128() + PointerSize; + error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, + true); + if (error) { + *E = malformedError("for REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + Skip = readULEB128(&error); + if (error) { + *E = malformedError("for REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + AdvanceAmount = Skip + PointerSize; + Count = 1; RemainingLoopCount = 0; + error = O->RebaseEntryCheckCountAndSkip(Count, Skip, PointerSize, + SegmentIndex, SegmentOffset); + if (error) { + *E = malformedError("for REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } DEBUG_WITH_TYPE( "mach-o-rebase", - llvm::dbgs() << "REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB: " - << format("SegmentOffset=0x%06X", SegmentOffset) - << ", AdvanceAmount=" << AdvanceAmount - << ", RemainingLoopCount=" << RemainingLoopCount - << "\n"); + dbgs() << "REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB: " + << format("SegmentOffset=0x%06X", SegmentOffset) + << ", AdvanceAmount=" << AdvanceAmount + << ", RemainingLoopCount=" << RemainingLoopCount + << "\n"); return; case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB: - RemainingLoopCount = readULEB128() - 1; - AdvanceAmount = readULEB128() + PointerSize; + error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, + true); + if (error) { + *E = malformedError("for REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_" + "ULEB " + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + Count = readULEB128(&error); + if (error) { + *E = malformedError("for REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_" + "ULEB " + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + if (Count != 0) + RemainingLoopCount = Count - 1; + else + RemainingLoopCount = 0; + Skip = readULEB128(&error); + if (error) { + *E = malformedError("for REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_" + "ULEB " + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + AdvanceAmount = Skip + PointerSize; + + error = O->RebaseEntryCheckCountAndSkip(Count, Skip, PointerSize, + SegmentIndex, SegmentOffset); + if (error) { + *E = malformedError("for REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_" + "ULEB " + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } DEBUG_WITH_TYPE( "mach-o-rebase", - llvm::dbgs() << "REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB: " - << format("SegmentOffset=0x%06X", SegmentOffset) - << ", AdvanceAmount=" << AdvanceAmount - << ", RemainingLoopCount=" << RemainingLoopCount - << "\n"); + dbgs() << "REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB: " + << format("SegmentOffset=0x%06X", SegmentOffset) + << ", AdvanceAmount=" << AdvanceAmount + << ", RemainingLoopCount=" << RemainingLoopCount + << "\n"); return; default: - Malformed = true; + *E = malformedError("bad rebase info (bad opcode value 0x" + + utohexstr(Opcode) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; } } } -uint64_t MachORebaseEntry::readULEB128() { +uint64_t MachORebaseEntry::readULEB128(const char **error) { unsigned Count; - uint64_t Result = decodeULEB128(Ptr, &Count); + uint64_t Result = decodeULEB128(Ptr, &Count, Opcodes.end(), error); Ptr += Count; - if (Ptr > Opcodes.end()) { + if (Ptr > Opcodes.end()) Ptr = Opcodes.end(); - Malformed = true; - } return Result; } -uint32_t MachORebaseEntry::segmentIndex() const { return SegmentIndex; } +int32_t MachORebaseEntry::segmentIndex() const { return SegmentIndex; } uint64_t MachORebaseEntry::segmentOffset() const { return SegmentOffset; } @@ -2822,6 +3114,24 @@ StringRef MachORebaseEntry::typeName() const { return "unknown"; } +// For use with the SegIndex of a checked Mach-O Rebase entry +// to get the segment name. +StringRef MachORebaseEntry::segmentName() const { + return O->BindRebaseSegmentName(SegmentIndex); +} + +// For use with a SegIndex,SegOffset pair from a checked Mach-O Rebase entry +// to get the section name. +StringRef MachORebaseEntry::sectionName() const { + return O->BindRebaseSectionName(SegmentIndex, SegmentOffset); +} + +// For use with a SegIndex,SegOffset pair from a checked Mach-O Rebase entry +// to get the address. +uint64_t MachORebaseEntry::address() const { + return O->BindRebaseAddress(SegmentIndex, SegmentOffset); +} + bool MachORebaseEntry::operator==(const MachORebaseEntry &Other) const { #ifdef EXPENSIVE_CHECKS assert(Opcodes == Other.Opcodes && "compare iterators of different files"); @@ -2834,25 +3144,27 @@ bool MachORebaseEntry::operator==(const MachORebaseEntry &Other) const { } iterator_range<rebase_iterator> -MachOObjectFile::rebaseTable(ArrayRef<uint8_t> Opcodes, bool is64) { - MachORebaseEntry Start(Opcodes, is64); +MachOObjectFile::rebaseTable(Error &Err, MachOObjectFile *O, + ArrayRef<uint8_t> Opcodes, bool is64) { + if (O->BindRebaseSectionTable == nullptr) + O->BindRebaseSectionTable = llvm::make_unique<BindRebaseSegInfo>(O); + MachORebaseEntry Start(&Err, O, Opcodes, is64); Start.moveToFirst(); - MachORebaseEntry Finish(Opcodes, is64); + MachORebaseEntry Finish(&Err, O, Opcodes, is64); Finish.moveToEnd(); return make_range(rebase_iterator(Start), rebase_iterator(Finish)); } -iterator_range<rebase_iterator> MachOObjectFile::rebaseTable() const { - return rebaseTable(getDyldInfoRebaseOpcodes(), is64Bit()); +iterator_range<rebase_iterator> MachOObjectFile::rebaseTable(Error &Err) { + return rebaseTable(Err, this, getDyldInfoRebaseOpcodes(), is64Bit()); } -MachOBindEntry::MachOBindEntry(ArrayRef<uint8_t> Bytes, bool is64Bit, Kind BK) - : Opcodes(Bytes), Ptr(Bytes.begin()), SegmentOffset(0), SegmentIndex(0), - Ordinal(0), Flags(0), Addend(0), RemainingLoopCount(0), AdvanceAmount(0), - BindType(0), PointerSize(is64Bit ? 8 : 4), - TableKind(BK), Malformed(false), Done(false) {} +MachOBindEntry::MachOBindEntry(Error *E, const MachOObjectFile *O, + ArrayRef<uint8_t> Bytes, bool is64Bit, Kind BK) + : E(E), O(O), Opcodes(Bytes), Ptr(Bytes.begin()), + PointerSize(is64Bit ? 8 : 4), TableKind(BK) {} void MachOBindEntry::moveToFirst() { Ptr = Opcodes.begin(); @@ -2866,24 +3178,31 @@ void MachOBindEntry::moveToEnd() { } void MachOBindEntry::moveNext() { + ErrorAsOutParameter ErrAsOutParam(E); // If in the middle of some loop, move to next binding in loop. SegmentOffset += AdvanceAmount; if (RemainingLoopCount) { --RemainingLoopCount; return; } + // BIND_OPCODE_DONE is only used for padding if we are not aligned to + // pointer size. Therefore it is possible to reach the end without ever having + // seen BIND_OPCODE_DONE. if (Ptr == Opcodes.end()) { Done = true; return; } bool More = true; - while (More && !Malformed) { + while (More) { // Parse next opcode and set up next loop. + const uint8_t *OpcodeStart = Ptr; uint8_t Byte = *Ptr++; uint8_t ImmValue = Byte & MachO::BIND_IMMEDIATE_MASK; uint8_t Opcode = Byte & MachO::BIND_OPCODE_MASK; int8_t SignExtended; const uint8_t *SymStart; + uint32_t Count, Skip; + const char *error = nullptr; switch (Opcode) { case MachO::BIND_OPCODE_DONE: if (TableKind == Kind::Lazy) { @@ -2899,48 +3218,108 @@ void MachOBindEntry::moveNext() { break; } More = false; - Done = true; moveToEnd(); - DEBUG_WITH_TYPE("mach-o-bind", llvm::dbgs() << "BIND_OPCODE_DONE\n"); + DEBUG_WITH_TYPE("mach-o-bind", dbgs() << "BIND_OPCODE_DONE\n"); break; case MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_IMM: + if (TableKind == Kind::Weak) { + *E = malformedError("BIND_OPCODE_SET_DYLIB_ORDINAL_IMM not allowed in " + "weak bind table for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } Ordinal = ImmValue; + LibraryOrdinalSet = true; + if (ImmValue > O->getLibraryCount()) { + *E = malformedError("for BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB bad " + "library ordinal: " + Twine((int)ImmValue) + " (max " + + Twine((int)O->getLibraryCount()) + ") for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } DEBUG_WITH_TYPE( "mach-o-bind", - llvm::dbgs() << "BIND_OPCODE_SET_DYLIB_ORDINAL_IMM: " - << "Ordinal=" << Ordinal << "\n"); + dbgs() << "BIND_OPCODE_SET_DYLIB_ORDINAL_IMM: " + << "Ordinal=" << Ordinal << "\n"); break; case MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB: - Ordinal = readULEB128(); + if (TableKind == Kind::Weak) { + *E = malformedError("BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB not allowed in " + "weak bind table for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + Ordinal = readULEB128(&error); + LibraryOrdinalSet = true; + if (error) { + *E = malformedError("for BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + if (Ordinal > (int)O->getLibraryCount()) { + *E = malformedError("for BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB bad " + "library ordinal: " + Twine((int)Ordinal) + " (max " + + Twine((int)O->getLibraryCount()) + ") for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } DEBUG_WITH_TYPE( "mach-o-bind", - llvm::dbgs() << "BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB: " - << "Ordinal=" << Ordinal << "\n"); + dbgs() << "BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB: " + << "Ordinal=" << Ordinal << "\n"); break; case MachO::BIND_OPCODE_SET_DYLIB_SPECIAL_IMM: + if (TableKind == Kind::Weak) { + *E = malformedError("BIND_OPCODE_SET_DYLIB_SPECIAL_IMM not allowed in " + "weak bind table for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } if (ImmValue) { SignExtended = MachO::BIND_OPCODE_MASK | ImmValue; Ordinal = SignExtended; + if (Ordinal < MachO::BIND_SPECIAL_DYLIB_FLAT_LOOKUP) { + *E = malformedError("for BIND_OPCODE_SET_DYLIB_SPECIAL_IMM unknown " + "special ordinal: " + Twine((int)Ordinal) + " for opcode at: " + "0x" + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } } else Ordinal = 0; + LibraryOrdinalSet = true; DEBUG_WITH_TYPE( "mach-o-bind", - llvm::dbgs() << "BIND_OPCODE_SET_DYLIB_SPECIAL_IMM: " - << "Ordinal=" << Ordinal << "\n"); + dbgs() << "BIND_OPCODE_SET_DYLIB_SPECIAL_IMM: " + << "Ordinal=" << Ordinal << "\n"); break; case MachO::BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM: Flags = ImmValue; SymStart = Ptr; - while (*Ptr) { + while (*Ptr && (Ptr < Opcodes.end())) { ++Ptr; } + if (Ptr == Opcodes.end()) { + *E = malformedError("for BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM " + "symbol name extends past opcodes for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } SymbolName = StringRef(reinterpret_cast<const char*>(SymStart), Ptr-SymStart); ++Ptr; DEBUG_WITH_TYPE( "mach-o-bind", - llvm::dbgs() << "BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM: " - << "SymbolName=" << SymbolName << "\n"); + dbgs() << "BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM: " + << "SymbolName=" << SymbolName << "\n"); if (TableKind == Kind::Weak) { if (ImmValue & MachO::BIND_SYMBOL_FLAGS_NON_WEAK_DEFINITION) return; @@ -2948,111 +3327,311 @@ void MachOBindEntry::moveNext() { break; case MachO::BIND_OPCODE_SET_TYPE_IMM: BindType = ImmValue; + if (ImmValue > MachO::BIND_TYPE_TEXT_PCREL32) { + *E = malformedError("for BIND_OPCODE_SET_TYPE_IMM bad bind type: " + + Twine((int)ImmValue) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } DEBUG_WITH_TYPE( "mach-o-bind", - llvm::dbgs() << "BIND_OPCODE_SET_TYPE_IMM: " - << "BindType=" << (int)BindType << "\n"); + dbgs() << "BIND_OPCODE_SET_TYPE_IMM: " + << "BindType=" << (int)BindType << "\n"); break; case MachO::BIND_OPCODE_SET_ADDEND_SLEB: - Addend = readSLEB128(); - if (TableKind == Kind::Lazy) - Malformed = true; + Addend = readSLEB128(&error); + if (error) { + *E = malformedError("for BIND_OPCODE_SET_ADDEND_SLEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } DEBUG_WITH_TYPE( "mach-o-bind", - llvm::dbgs() << "BIND_OPCODE_SET_ADDEND_SLEB: " - << "Addend=" << Addend << "\n"); + dbgs() << "BIND_OPCODE_SET_ADDEND_SLEB: " + << "Addend=" << Addend << "\n"); break; case MachO::BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: SegmentIndex = ImmValue; - SegmentOffset = readULEB128(); + SegmentOffset = readULEB128(&error); + if (error) { + *E = malformedError("for BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + error = O->BindEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, true); + if (error) { + *E = malformedError("for BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } DEBUG_WITH_TYPE( "mach-o-bind", - llvm::dbgs() << "BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: " - << "SegmentIndex=" << SegmentIndex << ", " - << format("SegmentOffset=0x%06X", SegmentOffset) - << "\n"); + dbgs() << "BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: " + << "SegmentIndex=" << SegmentIndex << ", " + << format("SegmentOffset=0x%06X", SegmentOffset) + << "\n"); break; case MachO::BIND_OPCODE_ADD_ADDR_ULEB: - SegmentOffset += readULEB128(); + SegmentOffset += readULEB128(&error); + if (error) { + *E = malformedError("for BIND_OPCODE_ADD_ADDR_ULEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + error = O->BindEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, true); + if (error) { + *E = malformedError("for BIND_OPCODE_ADD_ADDR_ULEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } DEBUG_WITH_TYPE("mach-o-bind", - llvm::dbgs() << "BIND_OPCODE_ADD_ADDR_ULEB: " - << format("SegmentOffset=0x%06X", - SegmentOffset) << "\n"); + dbgs() << "BIND_OPCODE_ADD_ADDR_ULEB: " + << format("SegmentOffset=0x%06X", + SegmentOffset) << "\n"); break; case MachO::BIND_OPCODE_DO_BIND: AdvanceAmount = PointerSize; RemainingLoopCount = 0; + error = O->BindEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, true); + if (error) { + *E = malformedError("for BIND_OPCODE_DO_BIND " + Twine(error) + + " for opcode at: 0x" + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + if (SymbolName == StringRef()) { + *E = malformedError("for BIND_OPCODE_DO_BIND missing preceding " + "BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + if (!LibraryOrdinalSet && TableKind != Kind::Weak) { + *E = malformedError("for BIND_OPCODE_DO_BIND missing preceding " + "BIND_OPCODE_SET_DYLIB_ORDINAL_* for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } DEBUG_WITH_TYPE("mach-o-bind", - llvm::dbgs() << "BIND_OPCODE_DO_BIND: " - << format("SegmentOffset=0x%06X", - SegmentOffset) << "\n"); + dbgs() << "BIND_OPCODE_DO_BIND: " + << format("SegmentOffset=0x%06X", + SegmentOffset) << "\n"); return; case MachO::BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB: - AdvanceAmount = readULEB128() + PointerSize; + if (TableKind == Kind::Lazy) { + *E = malformedError("BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB not allowed in " + "lazy bind table for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + error = O->BindEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, true); + if (error) { + *E = malformedError("for BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + if (SymbolName == StringRef()) { + *E = malformedError("for BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB missing " + "preceding BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM for opcode " + "at: 0x" + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + if (!LibraryOrdinalSet && TableKind != Kind::Weak) { + *E = malformedError("for BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB missing " + "preceding BIND_OPCODE_SET_DYLIB_ORDINAL_* for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + AdvanceAmount = readULEB128(&error) + PointerSize; + if (error) { + *E = malformedError("for BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + // Note, this is not really an error until the next bind but make no sense + // for a BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB to not be followed by another + // bind operation. + error = O->BindEntryCheckSegAndOffset(SegmentIndex, SegmentOffset + + AdvanceAmount, false); + if (error) { + *E = malformedError("for BIND_OPCODE_ADD_ADDR_ULEB (after adding " + "ULEB) " + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } RemainingLoopCount = 0; - if (TableKind == Kind::Lazy) - Malformed = true; DEBUG_WITH_TYPE( "mach-o-bind", - llvm::dbgs() << "BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB: " - << format("SegmentOffset=0x%06X", SegmentOffset) - << ", AdvanceAmount=" << AdvanceAmount - << ", RemainingLoopCount=" << RemainingLoopCount - << "\n"); + dbgs() << "BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB: " + << format("SegmentOffset=0x%06X", SegmentOffset) + << ", AdvanceAmount=" << AdvanceAmount + << ", RemainingLoopCount=" << RemainingLoopCount + << "\n"); return; case MachO::BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED: + if (TableKind == Kind::Lazy) { + *E = malformedError("BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED not " + "allowed in lazy bind table for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + error = O->BindEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, true); + if (error) { + *E = malformedError("for BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + if (SymbolName == StringRef()) { + *E = malformedError("for BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED " + "missing preceding BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM for " + "opcode at: 0x" + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + if (!LibraryOrdinalSet && TableKind != Kind::Weak) { + *E = malformedError("for BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED " + "missing preceding BIND_OPCODE_SET_DYLIB_ORDINAL_* for opcode " + "at: 0x" + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } AdvanceAmount = ImmValue * PointerSize + PointerSize; RemainingLoopCount = 0; - if (TableKind == Kind::Lazy) - Malformed = true; + error = O->BindEntryCheckSegAndOffset(SegmentIndex, SegmentOffset + + AdvanceAmount, false); + if (error) { + *E = malformedError("for BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED " + " (after adding immediate times the pointer size) " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } DEBUG_WITH_TYPE("mach-o-bind", - llvm::dbgs() + dbgs() << "BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED: " - << format("SegmentOffset=0x%06X", - SegmentOffset) << "\n"); + << format("SegmentOffset=0x%06X", SegmentOffset) << "\n"); return; case MachO::BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB: - RemainingLoopCount = readULEB128() - 1; - AdvanceAmount = readULEB128() + PointerSize; - if (TableKind == Kind::Lazy) - Malformed = true; + if (TableKind == Kind::Lazy) { + *E = malformedError("BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB not " + "allowed in lazy bind table for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + Count = readULEB128(&error); + if (Count != 0) + RemainingLoopCount = Count - 1; + else + RemainingLoopCount = 0; + if (error) { + *E = malformedError("for BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB " + " (count value) " + Twine(error) + " for opcode at" + ": 0x" + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + Skip = readULEB128(&error); + AdvanceAmount = Skip + PointerSize; + if (error) { + *E = malformedError("for BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB " + " (skip value) " + Twine(error) + " for opcode at" + ": 0x" + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + error = O->BindEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, true); + if (error) { + *E = malformedError("for BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + if (SymbolName == StringRef()) { + *E = malformedError("for BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB " + "missing preceding BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM for " + "opcode at: 0x" + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + if (!LibraryOrdinalSet && TableKind != Kind::Weak) { + *E = malformedError("for BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB " + "missing preceding BIND_OPCODE_SET_DYLIB_ORDINAL_* for opcode " + "at: 0x" + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + error = O->BindEntryCheckCountAndSkip(Count, Skip, PointerSize, + SegmentIndex, SegmentOffset); + if (error) { + *E = malformedError("for BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } DEBUG_WITH_TYPE( "mach-o-bind", - llvm::dbgs() << "BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB: " - << format("SegmentOffset=0x%06X", SegmentOffset) - << ", AdvanceAmount=" << AdvanceAmount - << ", RemainingLoopCount=" << RemainingLoopCount - << "\n"); + dbgs() << "BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB: " + << format("SegmentOffset=0x%06X", SegmentOffset) + << ", AdvanceAmount=" << AdvanceAmount + << ", RemainingLoopCount=" << RemainingLoopCount + << "\n"); return; default: - Malformed = true; + *E = malformedError("bad bind info (bad opcode value 0x" + + utohexstr(Opcode) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; } } } -uint64_t MachOBindEntry::readULEB128() { +uint64_t MachOBindEntry::readULEB128(const char **error) { unsigned Count; - uint64_t Result = decodeULEB128(Ptr, &Count); + uint64_t Result = decodeULEB128(Ptr, &Count, Opcodes.end(), error); Ptr += Count; - if (Ptr > Opcodes.end()) { + if (Ptr > Opcodes.end()) Ptr = Opcodes.end(); - Malformed = true; - } return Result; } -int64_t MachOBindEntry::readSLEB128() { +int64_t MachOBindEntry::readSLEB128(const char **error) { unsigned Count; - int64_t Result = decodeSLEB128(Ptr, &Count); + int64_t Result = decodeSLEB128(Ptr, &Count, Opcodes.end(), error); Ptr += Count; - if (Ptr > Opcodes.end()) { + if (Ptr > Opcodes.end()) Ptr = Opcodes.end(); - Malformed = true; - } return Result; } -uint32_t MachOBindEntry::segmentIndex() const { return SegmentIndex; } +int32_t MachOBindEntry::segmentIndex() const { return SegmentIndex; } uint64_t MachOBindEntry::segmentOffset() const { return SegmentOffset; } @@ -3076,6 +3655,24 @@ uint32_t MachOBindEntry::flags() const { return Flags; } int MachOBindEntry::ordinal() const { return Ordinal; } +// For use with the SegIndex of a checked Mach-O Bind entry +// to get the segment name. +StringRef MachOBindEntry::segmentName() const { + return O->BindRebaseSegmentName(SegmentIndex); +} + +// For use with a SegIndex,SegOffset pair from a checked Mach-O Bind entry +// to get the section name. +StringRef MachOBindEntry::sectionName() const { + return O->BindRebaseSectionName(SegmentIndex, SegmentOffset); +} + +// For use with a SegIndex,SegOffset pair from a checked Mach-O Bind entry +// to get the address. +uint64_t MachOBindEntry::address() const { + return O->BindRebaseAddress(SegmentIndex, SegmentOffset); +} + bool MachOBindEntry::operator==(const MachOBindEntry &Other) const { #ifdef EXPENSIVE_CHECKS assert(Opcodes == Other.Opcodes && "compare iterators of different files"); @@ -3087,30 +3684,149 @@ bool MachOBindEntry::operator==(const MachOBindEntry &Other) const { (Done == Other.Done); } +// Build table of sections so SegIndex/SegOffset pairs can be translated. +BindRebaseSegInfo::BindRebaseSegInfo(const object::MachOObjectFile *Obj) { + uint32_t CurSegIndex = Obj->hasPageZeroSegment() ? 1 : 0; + StringRef CurSegName; + uint64_t CurSegAddress; + for (const SectionRef &Section : Obj->sections()) { + SectionInfo Info; + Section.getName(Info.SectionName); + Info.Address = Section.getAddress(); + Info.Size = Section.getSize(); + Info.SegmentName = + Obj->getSectionFinalSegmentName(Section.getRawDataRefImpl()); + if (!Info.SegmentName.equals(CurSegName)) { + ++CurSegIndex; + CurSegName = Info.SegmentName; + CurSegAddress = Info.Address; + } + Info.SegmentIndex = CurSegIndex - 1; + Info.OffsetInSegment = Info.Address - CurSegAddress; + Info.SegmentStartAddress = CurSegAddress; + Sections.push_back(Info); + } + MaxSegIndex = CurSegIndex; +} + +// For use with a SegIndex,SegOffset pair in MachOBindEntry::moveNext() to +// validate a MachOBindEntry or MachORebaseEntry. +const char * BindRebaseSegInfo::checkSegAndOffset(int32_t SegIndex, + uint64_t SegOffset, + bool endInvalid) { + if (SegIndex == -1) + return "missing preceding *_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB"; + if (SegIndex >= MaxSegIndex) + return "bad segIndex (too large)"; + for (const SectionInfo &SI : Sections) { + if (SI.SegmentIndex != SegIndex) + continue; + if (SI.OffsetInSegment > SegOffset) + continue; + if (SegOffset > (SI.OffsetInSegment + SI.Size)) + continue; + if (endInvalid && SegOffset >= (SI.OffsetInSegment + SI.Size)) + continue; + return nullptr; + } + return "bad segOffset, too large"; +} + +// For use in MachOBindEntry::moveNext() to validate a MachOBindEntry for +// the BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB opcode and for use in +// MachORebaseEntry::moveNext() to validate a MachORebaseEntry for +// REBASE_OPCODE_DO_*_TIMES* opcodes. The SegIndex and SegOffset must have +// been already checked. +const char * BindRebaseSegInfo::checkCountAndSkip(uint32_t Count, uint32_t Skip, + uint8_t PointerSize, + int32_t SegIndex, + uint64_t SegOffset) { + const SectionInfo &SI = findSection(SegIndex, SegOffset); + uint64_t addr = SI.SegmentStartAddress + SegOffset; + if (addr >= SI.Address + SI.Size) + return "bad segOffset, too large"; + uint64_t i = 0; + if (Count > 1) + i = (Skip + PointerSize) * (Count - 1); + else if (Count == 1) + i = Skip + PointerSize; + if (addr + i >= SI.Address + SI.Size) { + // For rebase opcodes they can step from one section to another. + uint64_t TrailingSegOffset = (addr + i) - SI.SegmentStartAddress; + const char *error = checkSegAndOffset(SegIndex, TrailingSegOffset, false); + if (error) + return "bad count and skip, too large"; + } + return nullptr; +} + +// For use with the SegIndex of a checked Mach-O Bind or Rebase entry +// to get the segment name. +StringRef BindRebaseSegInfo::segmentName(int32_t SegIndex) { + for (const SectionInfo &SI : Sections) { + if (SI.SegmentIndex == SegIndex) + return SI.SegmentName; + } + llvm_unreachable("invalid SegIndex"); +} + +// For use with a SegIndex,SegOffset pair from a checked Mach-O Bind or Rebase +// to get the SectionInfo. +const BindRebaseSegInfo::SectionInfo &BindRebaseSegInfo::findSection( + int32_t SegIndex, uint64_t SegOffset) { + for (const SectionInfo &SI : Sections) { + if (SI.SegmentIndex != SegIndex) + continue; + if (SI.OffsetInSegment > SegOffset) + continue; + if (SegOffset >= (SI.OffsetInSegment + SI.Size)) + continue; + return SI; + } + llvm_unreachable("SegIndex and SegOffset not in any section"); +} + +// For use with a SegIndex,SegOffset pair from a checked Mach-O Bind or Rebase +// entry to get the section name. +StringRef BindRebaseSegInfo::sectionName(int32_t SegIndex, + uint64_t SegOffset) { + return findSection(SegIndex, SegOffset).SectionName; +} + +// For use with a SegIndex,SegOffset pair from a checked Mach-O Bind or Rebase +// entry to get the address. +uint64_t BindRebaseSegInfo::address(uint32_t SegIndex, uint64_t OffsetInSeg) { + const SectionInfo &SI = findSection(SegIndex, OffsetInSeg); + return SI.SegmentStartAddress + OffsetInSeg; +} + iterator_range<bind_iterator> -MachOObjectFile::bindTable(ArrayRef<uint8_t> Opcodes, bool is64, +MachOObjectFile::bindTable(Error &Err, MachOObjectFile *O, + ArrayRef<uint8_t> Opcodes, bool is64, MachOBindEntry::Kind BKind) { - MachOBindEntry Start(Opcodes, is64, BKind); + if (O->BindRebaseSectionTable == nullptr) + O->BindRebaseSectionTable = llvm::make_unique<BindRebaseSegInfo>(O); + MachOBindEntry Start(&Err, O, Opcodes, is64, BKind); Start.moveToFirst(); - MachOBindEntry Finish(Opcodes, is64, BKind); + MachOBindEntry Finish(&Err, O, Opcodes, is64, BKind); Finish.moveToEnd(); return make_range(bind_iterator(Start), bind_iterator(Finish)); } -iterator_range<bind_iterator> MachOObjectFile::bindTable() const { - return bindTable(getDyldInfoBindOpcodes(), is64Bit(), +iterator_range<bind_iterator> MachOObjectFile::bindTable(Error &Err) { + return bindTable(Err, this, getDyldInfoBindOpcodes(), is64Bit(), MachOBindEntry::Kind::Regular); } -iterator_range<bind_iterator> MachOObjectFile::lazyBindTable() const { - return bindTable(getDyldInfoLazyBindOpcodes(), is64Bit(), +iterator_range<bind_iterator> MachOObjectFile::lazyBindTable(Error &Err) { + return bindTable(Err, this, getDyldInfoLazyBindOpcodes(), is64Bit(), MachOBindEntry::Kind::Lazy); } -iterator_range<bind_iterator> MachOObjectFile::weakBindTable() const { - return bindTable(getDyldInfoWeakBindOpcodes(), is64Bit(), +iterator_range<bind_iterator> MachOObjectFile::weakBindTable(Error &Err) { + return bindTable(Err, this, getDyldInfoWeakBindOpcodes(), is64Bit(), MachOBindEntry::Kind::Weak); } @@ -3289,6 +4005,21 @@ MachOObjectFile::getVersionMinLoadCommand(const LoadCommandInfo &L) const { return getStruct<MachO::version_min_command>(*this, L.Ptr); } +MachO::note_command +MachOObjectFile::getNoteLoadCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::note_command>(*this, L.Ptr); +} + +MachO::build_version_command +MachOObjectFile::getBuildVersionLoadCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::build_version_command>(*this, L.Ptr); +} + +MachO::build_tool_version +MachOObjectFile::getBuildToolVersion(unsigned index) const { + return getStruct<MachO::build_tool_version>(*this, BuildTools[index]); +} + MachO::dylib_command MachOObjectFile::getDylibIDLoadCommand(const LoadCommandInfo &L) const { return getStruct<MachO::dylib_command>(*this, L.Ptr); @@ -3371,15 +4102,20 @@ MachOObjectFile::getThreadCommand(const LoadCommandInfo &L) const { MachO::any_relocation_info MachOObjectFile::getRelocation(DataRefImpl Rel) const { - DataRefImpl Sec; - Sec.d.a = Rel.d.a; uint32_t Offset; - if (is64Bit()) { - MachO::section_64 Sect = getSection64(Sec); - Offset = Sect.reloff; + if (getHeader().filetype == MachO::MH_OBJECT) { + DataRefImpl Sec; + Sec.d.a = Rel.d.a; + if (is64Bit()) { + MachO::section_64 Sect = getSection64(Sec); + Offset = Sect.reloff; + } else { + MachO::section Sect = getSection(Sec); + Offset = Sect.reloff; + } } else { - MachO::section Sect = getSection(Sec); - Offset = Sect.reloff; + MachO::dysymtab_command DysymtabLoadCmd = getDysymtabLoadCommand(); + Offset = DysymtabLoadCmd.extreloff; // Offset to the external relocations } auto P = reinterpret_cast<const MachO::any_relocation_info *>( @@ -3599,3 +4335,9 @@ ObjectFile::createMachOObjectFile(MemoryBufferRef Buffer, return make_error<GenericBinaryError>("Unrecognized MachO magic number", object_error::invalid_file_type); } + +StringRef MachOObjectFile::mapDebugSectionName(StringRef Name) const { + return StringSwitch<StringRef>(Name) + .Case("debug_str_offs", "debug_str_offsets") + .Default(Name); +} |