diff options
Diffstat (limited to 'contrib/llvm/lib/Object')
-rw-r--r-- | contrib/llvm/lib/Object/Archive.cpp | 636 | ||||
-rw-r--r-- | contrib/llvm/lib/Object/ArchiveWriter.cpp | 84 | ||||
-rw-r--r-- | contrib/llvm/lib/Object/Binary.cpp | 2 | ||||
-rw-r--r-- | contrib/llvm/lib/Object/COFFObjectFile.cpp | 68 | ||||
-rw-r--r-- | contrib/llvm/lib/Object/Decompressor.cpp | 102 | ||||
-rw-r--r-- | contrib/llvm/lib/Object/ELF.cpp | 14 | ||||
-rw-r--r-- | contrib/llvm/lib/Object/Error.cpp | 27 | ||||
-rw-r--r-- | contrib/llvm/lib/Object/IRObjectFile.cpp | 276 | ||||
-rw-r--r-- | contrib/llvm/lib/Object/MachOObjectFile.cpp | 1556 | ||||
-rw-r--r-- | contrib/llvm/lib/Object/MachOUniversal.cpp | 94 | ||||
-rw-r--r-- | contrib/llvm/lib/Object/ModuleSummaryIndexObjectFile.cpp | 59 | ||||
-rw-r--r-- | contrib/llvm/lib/Object/ModuleSymbolTable.cpp | 189 | ||||
-rw-r--r-- | contrib/llvm/lib/Object/ObjectFile.cpp | 10 | ||||
-rw-r--r-- | contrib/llvm/lib/Object/RecordStreamer.cpp | 16 | ||||
-rw-r--r-- | contrib/llvm/lib/Object/RecordStreamer.h | 3 | ||||
-rw-r--r-- | contrib/llvm/lib/Object/SymbolSize.cpp | 18 | ||||
-rw-r--r-- | contrib/llvm/lib/Object/SymbolicFile.cpp | 12 | ||||
-rw-r--r-- | contrib/llvm/lib/Object/WasmObjectFile.cpp | 313 |
18 files changed, 2748 insertions, 731 deletions
diff --git a/contrib/llvm/lib/Object/Archive.cpp b/contrib/llvm/lib/Object/Archive.cpp index daf301e..f2021f7 100644 --- a/contrib/llvm/lib/Object/Archive.cpp +++ b/contrib/llvm/lib/Object/Archive.cpp @@ -27,125 +27,380 @@ static const char *const ThinMagic = "!<thin>\n"; void Archive::anchor() { } -StringRef ArchiveMemberHeader::getName() const { +static Error +malformedError(Twine Msg) { + std::string StringMsg = "truncated or malformed archive (" + Msg.str() + ")"; + return make_error<GenericBinaryError>(std::move(StringMsg), + object_error::parse_failed); +} + +ArchiveMemberHeader::ArchiveMemberHeader(const Archive *Parent, + const char *RawHeaderPtr, + uint64_t Size, Error *Err) + : Parent(Parent), + ArMemHdr(reinterpret_cast<const ArMemHdrType *>(RawHeaderPtr)) { + if (RawHeaderPtr == nullptr) + return; + ErrorAsOutParameter ErrAsOutParam(Err); + + if (Size < sizeof(ArMemHdrType)) { + if (Err) { + std::string Msg("remaining size of archive too small for next archive " + "member header "); + Expected<StringRef> NameOrErr = getName(Size); + if (!NameOrErr) { + consumeError(NameOrErr.takeError()); + uint64_t Offset = RawHeaderPtr - Parent->getData().data(); + *Err = malformedError(Msg + "at offset " + Twine(Offset)); + } else + *Err = malformedError(Msg + "for " + NameOrErr.get()); + } + return; + } + if (ArMemHdr->Terminator[0] != '`' || ArMemHdr->Terminator[1] != '\n') { + if (Err) { + std::string Buf; + raw_string_ostream OS(Buf); + OS.write_escaped(llvm::StringRef(ArMemHdr->Terminator, + sizeof(ArMemHdr->Terminator))); + OS.flush(); + std::string Msg("terminator characters in archive member \"" + Buf + + "\" not the correct \"`\\n\" values for the archive " + "member header "); + Expected<StringRef> NameOrErr = getName(Size); + if (!NameOrErr) { + consumeError(NameOrErr.takeError()); + uint64_t Offset = RawHeaderPtr - Parent->getData().data(); + *Err = malformedError(Msg + "at offset " + Twine(Offset)); + } else + *Err = malformedError(Msg + "for " + NameOrErr.get()); + } + return; + } +} + +// This gets the raw name from the ArMemHdr->Name field and checks that it is +// valid for the kind of archive. If it is not valid it returns an Error. +Expected<StringRef> ArchiveMemberHeader::getRawName() const { char EndCond; - if (Name[0] == '/' || Name[0] == '#') + auto Kind = Parent->kind(); + if (Kind == Archive::K_BSD || Kind == Archive::K_DARWIN64) { + if (ArMemHdr->Name[0] == ' ') { + uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) - + Parent->getData().data(); + return malformedError("name contains a leading space for archive member " + "header at offset " + Twine(Offset)); + } + EndCond = ' '; + } + else if (ArMemHdr->Name[0] == '/' || ArMemHdr->Name[0] == '#') EndCond = ' '; else EndCond = '/'; llvm::StringRef::size_type end = - llvm::StringRef(Name, sizeof(Name)).find(EndCond); + llvm::StringRef(ArMemHdr->Name, sizeof(ArMemHdr->Name)).find(EndCond); if (end == llvm::StringRef::npos) - end = sizeof(Name); - assert(end <= sizeof(Name) && end > 0); + end = sizeof(ArMemHdr->Name); + assert(end <= sizeof(ArMemHdr->Name) && end > 0); // Don't include the EndCond if there is one. - return llvm::StringRef(Name, end); + return llvm::StringRef(ArMemHdr->Name, end); } -ErrorOr<uint32_t> ArchiveMemberHeader::getSize() const { +// This gets the name looking up long names. Size is the size of the archive +// member including the header, so the size of any name following the header +// is checked to make sure it does not overflow. +Expected<StringRef> ArchiveMemberHeader::getName(uint64_t Size) const { + + // This can be called from the ArchiveMemberHeader constructor when the + // archive header is truncated to produce an error message with the name. + // Make sure the name field is not truncated. + if (Size < offsetof(ArMemHdrType, Name) + sizeof(ArMemHdr->Name)) { + uint64_t ArchiveOffset = reinterpret_cast<const char *>(ArMemHdr) - + Parent->getData().data(); + return malformedError("archive header truncated before the name field " + "for archive member header at offset " + + Twine(ArchiveOffset)); + } + + // The raw name itself can be invalid. + Expected<StringRef> NameOrErr = getRawName(); + if (!NameOrErr) + return NameOrErr.takeError(); + StringRef Name = NameOrErr.get(); + + // Check if it's a special name. + if (Name[0] == '/') { + if (Name.size() == 1) // Linker member. + return Name; + if (Name.size() == 2 && Name[1] == '/') // String table. + return Name; + // It's a long name. + // Get the string table offset. + std::size_t StringOffset; + if (Name.substr(1).rtrim(' ').getAsInteger(10, StringOffset)) { + std::string Buf; + raw_string_ostream OS(Buf); + OS.write_escaped(Name.substr(1).rtrim(' ')); + OS.flush(); + uint64_t ArchiveOffset = reinterpret_cast<const char *>(ArMemHdr) - + Parent->getData().data(); + return malformedError("long name offset characters after the '/' are " + "not all decimal numbers: '" + Buf + "' for " + "archive member header at offset " + + Twine(ArchiveOffset)); + } + + // Verify it. + if (StringOffset >= Parent->getStringTable().size()) { + uint64_t ArchiveOffset = reinterpret_cast<const char *>(ArMemHdr) - + Parent->getData().data(); + return malformedError("long name offset " + Twine(StringOffset) + " past " + "the end of the string table for archive member " + "header at offset " + Twine(ArchiveOffset)); + } + const char *addr = Parent->getStringTable().begin() + StringOffset; + + // GNU long file names end with a "/\n". + if (Parent->kind() == Archive::K_GNU || + Parent->kind() == Archive::K_MIPS64) { + StringRef::size_type End = StringRef(addr).find('\n'); + return StringRef(addr, End - 1); + } + return addr; + } + + if (Name.startswith("#1/")) { + uint64_t NameLength; + if (Name.substr(3).rtrim(' ').getAsInteger(10, NameLength)) { + std::string Buf; + raw_string_ostream OS(Buf); + OS.write_escaped(Name.substr(3).rtrim(' ')); + OS.flush(); + uint64_t ArchiveOffset = reinterpret_cast<const char *>(ArMemHdr) - + Parent->getData().data(); + return malformedError("long name length characters after the #1/ are " + "not all decimal numbers: '" + Buf + "' for " + "archive member header at offset " + + Twine(ArchiveOffset)); + } + if (getSizeOf() + NameLength > Size) { + uint64_t ArchiveOffset = reinterpret_cast<const char *>(ArMemHdr) - + Parent->getData().data(); + return malformedError("long name length: " + Twine(NameLength) + + " extends past the end of the member or archive " + "for archive member header at offset " + + Twine(ArchiveOffset)); + } + return StringRef(reinterpret_cast<const char *>(ArMemHdr) + getSizeOf(), + NameLength).rtrim('\0'); + } + + // It is not a long name so trim the blanks at the end of the name. + if (Name[Name.size() - 1] != '/') + return Name.rtrim(' '); + + // It's a simple name. + return Name.drop_back(1); +} + +Expected<uint32_t> ArchiveMemberHeader::getSize() const { uint32_t Ret; - if (llvm::StringRef(Size, sizeof(Size)).rtrim(" ").getAsInteger(10, Ret)) - return object_error::parse_failed; // Size is not a decimal number. + if (llvm::StringRef(ArMemHdr->Size, + sizeof(ArMemHdr->Size)).rtrim(" ").getAsInteger(10, Ret)) { + std::string Buf; + raw_string_ostream OS(Buf); + OS.write_escaped(llvm::StringRef(ArMemHdr->Size, + sizeof(ArMemHdr->Size)).rtrim(" ")); + OS.flush(); + uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) - + Parent->getData().data(); + return malformedError("characters in size field in archive header are not " + "all decimal numbers: '" + Buf + "' for archive " + "member header at offset " + Twine(Offset)); + } return Ret; } -sys::fs::perms ArchiveMemberHeader::getAccessMode() const { +Expected<sys::fs::perms> ArchiveMemberHeader::getAccessMode() const { unsigned Ret; - if (StringRef(AccessMode, sizeof(AccessMode)).rtrim(' ').getAsInteger(8, Ret)) - llvm_unreachable("Access mode is not an octal number."); + if (StringRef(ArMemHdr->AccessMode, + sizeof(ArMemHdr->AccessMode)).rtrim(' ').getAsInteger(8, Ret)) { + std::string Buf; + raw_string_ostream OS(Buf); + OS.write_escaped(llvm::StringRef(ArMemHdr->AccessMode, + sizeof(ArMemHdr->AccessMode)).rtrim(" ")); + OS.flush(); + uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) - + Parent->getData().data(); + return malformedError("characters in AccessMode field in archive header " + "are not all decimal numbers: '" + Buf + "' for the " + "archive member header at offset " + Twine(Offset)); + } return static_cast<sys::fs::perms>(Ret); } -sys::TimeValue ArchiveMemberHeader::getLastModified() const { +Expected<sys::TimePoint<std::chrono::seconds>> +ArchiveMemberHeader::getLastModified() const { unsigned Seconds; - if (StringRef(LastModified, sizeof(LastModified)).rtrim(' ') - .getAsInteger(10, Seconds)) - llvm_unreachable("Last modified time not a decimal number."); + if (StringRef(ArMemHdr->LastModified, + sizeof(ArMemHdr->LastModified)).rtrim(' ') + .getAsInteger(10, Seconds)) { + std::string Buf; + raw_string_ostream OS(Buf); + OS.write_escaped(llvm::StringRef(ArMemHdr->LastModified, + sizeof(ArMemHdr->LastModified)).rtrim(" ")); + OS.flush(); + uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) - + Parent->getData().data(); + return malformedError("characters in LastModified field in archive header " + "are not all decimal numbers: '" + Buf + "' for the " + "archive member header at offset " + Twine(Offset)); + } - sys::TimeValue Ret; - Ret.fromEpochTime(Seconds); - return Ret; + return sys::toTimePoint(Seconds); } -unsigned ArchiveMemberHeader::getUID() const { +Expected<unsigned> ArchiveMemberHeader::getUID() const { unsigned Ret; - StringRef User = StringRef(UID, sizeof(UID)).rtrim(' '); + StringRef User = StringRef(ArMemHdr->UID, sizeof(ArMemHdr->UID)).rtrim(' '); if (User.empty()) return 0; - if (User.getAsInteger(10, Ret)) - llvm_unreachable("UID time not a decimal number."); + if (User.getAsInteger(10, Ret)) { + std::string Buf; + raw_string_ostream OS(Buf); + OS.write_escaped(User); + OS.flush(); + uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) - + Parent->getData().data(); + return malformedError("characters in UID field in archive header " + "are not all decimal numbers: '" + Buf + "' for the " + "archive member header at offset " + Twine(Offset)); + } return Ret; } -unsigned ArchiveMemberHeader::getGID() const { +Expected<unsigned> ArchiveMemberHeader::getGID() const { unsigned Ret; - StringRef Group = StringRef(GID, sizeof(GID)).rtrim(' '); + StringRef Group = StringRef(ArMemHdr->GID, sizeof(ArMemHdr->GID)).rtrim(' '); if (Group.empty()) return 0; - if (Group.getAsInteger(10, Ret)) - llvm_unreachable("GID time not a decimal number."); + if (Group.getAsInteger(10, Ret)) { + std::string Buf; + raw_string_ostream OS(Buf); + OS.write_escaped(Group); + OS.flush(); + uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) - + Parent->getData().data(); + return malformedError("characters in GID field in archive header " + "are not all decimal numbers: '" + Buf + "' for the " + "archive member header at offset " + Twine(Offset)); + } return Ret; } Archive::Child::Child(const Archive *Parent, StringRef Data, uint16_t StartOfFile) - : Parent(Parent), Data(Data), StartOfFile(StartOfFile) {} + : Parent(Parent), Header(Parent, Data.data(), Data.size(), nullptr), + Data(Data), StartOfFile(StartOfFile) { +} -Archive::Child::Child(const Archive *Parent, const char *Start, - std::error_code *EC) - : Parent(Parent) { +Archive::Child::Child(const Archive *Parent, const char *Start, Error *Err) + : Parent(Parent), + Header(Parent, Start, + Parent + ? Parent->getData().size() - (Start - Parent->getData().data()) + : 0, Err) { if (!Start) return; - uint64_t Size = sizeof(ArchiveMemberHeader); + // If we are pointed to real data, Start is not a nullptr, then there must be + // a non-null Err pointer available to report malformed data on. Only in + // the case sentinel value is being constructed is Err is permitted to be a + // nullptr. + assert(Err && "Err can't be nullptr if Start is not a nullptr"); + + ErrorAsOutParameter ErrAsOutParam(Err); + + // If there was an error in the construction of the Header + // then just return with the error now set. + if (*Err) + return; + + uint64_t Size = Header.getSizeOf(); Data = StringRef(Start, Size); - if (!isThinMember()) { - ErrorOr<uint64_t> MemberSize = getRawSize(); - if ((*EC = MemberSize.getError())) + Expected<bool> isThinOrErr = isThinMember(); + if (!isThinOrErr) { + *Err = isThinOrErr.takeError(); + return; + } + bool isThin = isThinOrErr.get(); + if (!isThin) { + Expected<uint64_t> MemberSize = getRawSize(); + if (!MemberSize) { + *Err = MemberSize.takeError(); return; + } Size += MemberSize.get(); Data = StringRef(Start, Size); } // Setup StartOfFile and PaddingBytes. - StartOfFile = sizeof(ArchiveMemberHeader); + StartOfFile = Header.getSizeOf(); // Don't include attached name. - StringRef Name = getRawName(); + Expected<StringRef> NameOrErr = getRawName(); + if (!NameOrErr){ + *Err = NameOrErr.takeError(); + return; + } + StringRef Name = NameOrErr.get(); if (Name.startswith("#1/")) { uint64_t NameSize; - if (Name.substr(3).rtrim(' ').getAsInteger(10, NameSize)) - llvm_unreachable("Long name length is not an integer"); + if (Name.substr(3).rtrim(' ').getAsInteger(10, NameSize)) { + std::string Buf; + raw_string_ostream OS(Buf); + OS.write_escaped(Name.substr(3).rtrim(' ')); + OS.flush(); + uint64_t Offset = Start - Parent->getData().data(); + *Err = malformedError("long name length characters after the #1/ are " + "not all decimal numbers: '" + Buf + "' for " + "archive member header at offset " + + Twine(Offset)); + return; + } StartOfFile += NameSize; } } -ErrorOr<uint64_t> Archive::Child::getSize() const { +Expected<uint64_t> Archive::Child::getSize() const { if (Parent->IsThin) { - ErrorOr<uint32_t> Size = getHeader()->getSize(); - if (std::error_code EC = Size.getError()) - return EC; + Expected<uint32_t> Size = Header.getSize(); + if (!Size) + return Size.takeError(); return Size.get(); } return Data.size() - StartOfFile; } -ErrorOr<uint64_t> Archive::Child::getRawSize() const { - ErrorOr<uint32_t> Size = getHeader()->getSize(); - if (std::error_code EC = Size.getError()) - return EC; - return Size.get(); +Expected<uint64_t> Archive::Child::getRawSize() const { + return Header.getSize(); } -bool Archive::Child::isThinMember() const { - StringRef Name = getHeader()->getName(); +Expected<bool> Archive::Child::isThinMember() const { + Expected<StringRef> NameOrErr = Header.getRawName(); + if (!NameOrErr) + return NameOrErr.takeError(); + StringRef Name = NameOrErr.get(); return Parent->IsThin && Name != "/" && Name != "//"; } -ErrorOr<std::string> Archive::Child::getFullName() const { - assert(isThinMember()); - ErrorOr<StringRef> NameOrErr = getName(); - if (std::error_code EC = NameOrErr.getError()) - return EC; +Expected<std::string> Archive::Child::getFullName() const { + Expected<bool> isThin = isThinMember(); + if (!isThin) + return isThin.takeError(); + assert(isThin.get()); + Expected<StringRef> NameOrErr = getName(); + if (!NameOrErr) + return NameOrErr.takeError(); StringRef Name = *NameOrErr; if (sys::path::is_absolute(Name)) return Name; @@ -156,25 +411,29 @@ ErrorOr<std::string> Archive::Child::getFullName() const { return StringRef(FullName); } -ErrorOr<StringRef> Archive::Child::getBuffer() const { - if (!isThinMember()) { - ErrorOr<uint32_t> Size = getSize(); - if (std::error_code EC = Size.getError()) - return EC; +Expected<StringRef> Archive::Child::getBuffer() const { + Expected<bool> isThinOrErr = isThinMember(); + if (!isThinOrErr) + return isThinOrErr.takeError(); + bool isThin = isThinOrErr.get(); + if (!isThin) { + Expected<uint32_t> Size = getSize(); + if (!Size) + return Size.takeError(); return StringRef(Data.data() + StartOfFile, Size.get()); } - ErrorOr<std::string> FullNameOrEr = getFullName(); - if (std::error_code EC = FullNameOrEr.getError()) - return EC; - const std::string &FullName = *FullNameOrEr; + Expected<std::string> FullNameOrErr = getFullName(); + if (!FullNameOrErr) + return FullNameOrErr.takeError(); + const std::string &FullName = *FullNameOrErr; ErrorOr<std::unique_ptr<MemoryBuffer>> Buf = MemoryBuffer::getFile(FullName); if (std::error_code EC = Buf.getError()) - return EC; + return errorCodeToError(EC); Parent->ThinBuffers.push_back(std::move(*Buf)); return Parent->ThinBuffers.back()->getBuffer(); } -ErrorOr<Archive::Child> Archive::Child::getNext() const { +Expected<Archive::Child> Archive::Child::getNext() const { size_t SpaceToSkip = Data.size(); // If it's odd, add 1 to make it even. if (SpaceToSkip & 1) @@ -184,16 +443,25 @@ ErrorOr<Archive::Child> Archive::Child::getNext() const { // Check to see if this is at the end of the archive. if (NextLoc == Parent->Data.getBufferEnd()) - return Child(Parent, nullptr, nullptr); + return Child(nullptr, nullptr, nullptr); // Check to see if this is past the end of the archive. - if (NextLoc > Parent->Data.getBufferEnd()) - return object_error::parse_failed; + if (NextLoc > Parent->Data.getBufferEnd()) { + std::string Msg("offset to next archive member past the end of the archive " + "after member "); + Expected<StringRef> NameOrErr = getName(); + if (!NameOrErr) { + consumeError(NameOrErr.takeError()); + uint64_t Offset = Data.data() - Parent->getData().data(); + return malformedError(Msg + "at offset " + Twine(Offset)); + } else + return malformedError(Msg + NameOrErr.get()); + } - std::error_code EC; - Child Ret(Parent, NextLoc, &EC); - if (EC) - return EC; + Error Err = Error::success(); + Child Ret(Parent, NextLoc, &Err); + if (Err) + return std::move(Err); return Ret; } @@ -204,64 +472,34 @@ uint64_t Archive::Child::getChildOffset() const { return offset; } -ErrorOr<StringRef> Archive::Child::getName() const { - StringRef name = getRawName(); - // Check if it's a special name. - if (name[0] == '/') { - if (name.size() == 1) // Linker member. - return name; - if (name.size() == 2 && name[1] == '/') // String table. - return name; - // It's a long name. - // Get the offset. - std::size_t offset; - if (name.substr(1).rtrim(' ').getAsInteger(10, offset)) - llvm_unreachable("Long name offset is not an integer"); - - // Verify it. - if (offset >= Parent->StringTable.size()) - return object_error::parse_failed; - const char *addr = Parent->StringTable.begin() + offset; - - // GNU long file names end with a "/\n". - if (Parent->kind() == K_GNU || Parent->kind() == K_MIPS64) { - StringRef::size_type End = StringRef(addr).find('\n'); - return StringRef(addr, End - 1); - } - return StringRef(addr); - } else if (name.startswith("#1/")) { - uint64_t name_size; - if (name.substr(3).rtrim(' ').getAsInteger(10, name_size)) - llvm_unreachable("Long name length is not an ingeter"); - return Data.substr(sizeof(ArchiveMemberHeader), name_size).rtrim('\0'); - } else { - // It is not a long name so trim the blanks at the end of the name. - if (name[name.size() - 1] != '/') { - return name.rtrim(' '); - } - } - // It's a simple name. - if (name[name.size() - 1] == '/') - return name.substr(0, name.size() - 1); - return name; +Expected<StringRef> Archive::Child::getName() const { + Expected<uint64_t> RawSizeOrErr = getRawSize(); + if (!RawSizeOrErr) + return RawSizeOrErr.takeError(); + uint64_t RawSize = RawSizeOrErr.get(); + Expected<StringRef> NameOrErr = Header.getName(Header.getSizeOf() + RawSize); + if (!NameOrErr) + return NameOrErr.takeError(); + StringRef Name = NameOrErr.get(); + return Name; } -ErrorOr<MemoryBufferRef> Archive::Child::getMemoryBufferRef() const { - ErrorOr<StringRef> NameOrErr = getName(); - if (std::error_code EC = NameOrErr.getError()) - return EC; +Expected<MemoryBufferRef> Archive::Child::getMemoryBufferRef() const { + Expected<StringRef> NameOrErr = getName(); + if (!NameOrErr) + return NameOrErr.takeError(); StringRef Name = NameOrErr.get(); - ErrorOr<StringRef> Buf = getBuffer(); - if (std::error_code EC = Buf.getError()) - return EC; + Expected<StringRef> Buf = getBuffer(); + if (!Buf) + return Buf.takeError(); return MemoryBufferRef(*Buf, Name); } Expected<std::unique_ptr<Binary>> Archive::Child::getAsBinary(LLVMContext *Context) const { - ErrorOr<MemoryBufferRef> BuffOrErr = getMemoryBufferRef(); - if (std::error_code EC = BuffOrErr.getError()) - return errorCodeToError(EC); + Expected<MemoryBufferRef> BuffOrErr = getMemoryBufferRef(); + if (!BuffOrErr) + return BuffOrErr.takeError(); auto BinaryOrErr = createBinary(BuffOrErr.get(), Context); if (BinaryOrErr) @@ -270,7 +508,7 @@ Archive::Child::getAsBinary(LLVMContext *Context) const { } Expected<std::unique_ptr<Archive>> Archive::create(MemoryBufferRef Source) { - Error Err; + Error Err = Error::success(); std::unique_ptr<Archive> Ret(new Archive(Source, Err)); if (Err) return std::move(Err); @@ -284,7 +522,7 @@ void Archive::setFirstRegular(const Child &C) { Archive::Archive(MemoryBufferRef Source, Error &Err) : Binary(Binary::ID_Archive, Source) { - ErrorAsOutParameter ErrAsOutParam(Err); + ErrorAsOutParameter ErrAsOutParam(&Err); StringRef Buffer = Data.getBuffer(); // Check for sufficient magic. if (Buffer.startswith(ThinMagic)) { @@ -297,17 +535,20 @@ Archive::Archive(MemoryBufferRef Source, Error &Err) return; } + // Make sure Format is initialized before any call to + // ArchiveMemberHeader::getName() is made. This could be a valid empty + // archive which is the same in all formats. So claiming it to be gnu to is + // fine if not totally correct before we look for a string table or table of + // contents. + Format = K_GNU; + // Get the special members. child_iterator I = child_begin(Err, false); if (Err) return; child_iterator E = child_end(); - // This is at least a valid empty archive. Since an empty archive is the - // same in all formats, just claim it to be gnu to make sure Format is - // initialized. - Format = K_GNU; - + // See if this is a valid empty archive and if so return. if (I == E) { Err = Error::success(); return; @@ -322,7 +563,12 @@ Archive::Archive(MemoryBufferRef Source, Error &Err) return false; }; - StringRef Name = C->getRawName(); + Expected<StringRef> NameOrErr = C->getRawName(); + if (!NameOrErr) { + Err = NameOrErr.takeError(); + return; + } + StringRef Name = NameOrErr.get(); // Below is the pattern that is used to figure out the archive format // GNU archive format @@ -348,9 +594,14 @@ Archive::Archive(MemoryBufferRef Source, Error &Err) Format = K_BSD; else // Name == "__.SYMDEF_64" Format = K_DARWIN64; - // We know that the symbol table is not an external file, so we just assert - // there is no error. - SymbolTable = *C->getBuffer(); + // We know that the symbol table is not an external file, but we still must + // check any Expected<> return value. + Expected<StringRef> BufOrErr = C->getBuffer(); + if (!BufOrErr) { + Err = BufOrErr.takeError(); + return; + } + SymbolTable = BufOrErr.get(); if (Increment()) return; setFirstRegular(*C); @@ -362,24 +613,34 @@ Archive::Archive(MemoryBufferRef Source, Error &Err) if (Name.startswith("#1/")) { Format = K_BSD; // We know this is BSD, so getName will work since there is no string table. - ErrorOr<StringRef> NameOrErr = C->getName(); - if (auto ec = NameOrErr.getError()) { - Err = errorCodeToError(ec); + Expected<StringRef> NameOrErr = C->getName(); + if (!NameOrErr) { + Err = NameOrErr.takeError(); return; } Name = NameOrErr.get(); if (Name == "__.SYMDEF SORTED" || Name == "__.SYMDEF") { - // We know that the symbol table is not an external file, so we just - // assert there is no error. - SymbolTable = *C->getBuffer(); + // We know that the symbol table is not an external file, but we still + // must check any Expected<> return value. + Expected<StringRef> BufOrErr = C->getBuffer(); + if (!BufOrErr) { + Err = BufOrErr.takeError(); + return; + } + SymbolTable = BufOrErr.get(); if (Increment()) return; } else if (Name == "__.SYMDEF_64 SORTED" || Name == "__.SYMDEF_64") { Format = K_DARWIN64; - // We know that the symbol table is not an external file, so we just - // assert there is no error. - SymbolTable = *C->getBuffer(); + // We know that the symbol table is not an external file, but we still + // must check any Expected<> return value. + Expected<StringRef> BufOrErr = C->getBuffer(); + if (!BufOrErr) { + Err = BufOrErr.takeError(); + return; + } + SymbolTable = BufOrErr.get(); if (Increment()) return; } @@ -394,9 +655,14 @@ Archive::Archive(MemoryBufferRef Source, Error &Err) bool has64SymTable = false; if (Name == "/" || Name == "/SYM64/") { - // We know that the symbol table is not an external file, so we just assert - // there is no error. - SymbolTable = *C->getBuffer(); + // We know that the symbol table is not an external file, but we still + // must check any Expected<> return value. + Expected<StringRef> BufOrErr = C->getBuffer(); + if (!BufOrErr) { + Err = BufOrErr.takeError(); + return; + } + SymbolTable = BufOrErr.get(); if (Name == "/SYM64/") has64SymTable = true; @@ -406,14 +672,24 @@ Archive::Archive(MemoryBufferRef Source, Error &Err) Err = Error::success(); return; } - Name = C->getRawName(); + Expected<StringRef> NameOrErr = C->getRawName(); + if (!NameOrErr) { + Err = NameOrErr.takeError(); + return; + } + Name = NameOrErr.get(); } if (Name == "//") { Format = has64SymTable ? K_MIPS64 : K_GNU; - // The string table is never an external member, so we just assert on the - // ErrorOr. - StringTable = *C->getBuffer(); + // The string table is never an external member, but we still + // must check any Expected<> return value. + Expected<StringRef> BufOrErr = C->getBuffer(); + if (!BufOrErr) { + Err = BufOrErr.takeError(); + return; + } + StringTable = BufOrErr.get(); if (Increment()) return; setFirstRegular(*C); @@ -434,9 +710,14 @@ Archive::Archive(MemoryBufferRef Source, Error &Err) } Format = K_COFF; - // We know that the symbol table is not an external file, so we just assert - // there is no error. - SymbolTable = *C->getBuffer(); + // We know that the symbol table is not an external file, but we still + // must check any Expected<> return value. + Expected<StringRef> BufOrErr = C->getBuffer(); + if (!BufOrErr) { + Err = BufOrErr.takeError(); + return; + } + SymbolTable = BufOrErr.get(); if (Increment()) return; @@ -447,12 +728,22 @@ Archive::Archive(MemoryBufferRef Source, Error &Err) return; } - Name = C->getRawName(); + NameOrErr = C->getRawName(); + if (!NameOrErr) { + Err = NameOrErr.takeError(); + return; + } + Name = NameOrErr.get(); if (Name == "//") { - // The string table is never an external member, so we just assert on the - // ErrorOr. - StringTable = *C->getBuffer(); + // The string table is never an external member, but we still + // must check any Expected<> return value. + Expected<StringRef> BufOrErr = C->getBuffer(); + if (!BufOrErr) { + Err = BufOrErr.takeError(); + return; + } + StringTable = BufOrErr.get(); if (Increment()) return; } @@ -463,7 +754,7 @@ Archive::Archive(MemoryBufferRef Source, Error &Err) Archive::child_iterator Archive::child_begin(Error &Err, bool SkipInternal) const { - if (Data.getBufferSize() == 8) // empty archive. + if (isEmpty()) return child_end(); if (SkipInternal) @@ -472,25 +763,21 @@ Archive::child_iterator Archive::child_begin(Error &Err, &Err); const char *Loc = Data.getBufferStart() + strlen(Magic); - std::error_code EC; - Child C(this, Loc, &EC); - if (EC) { - ErrorAsOutParameter ErrAsOutParam(Err); - Err = errorCodeToError(EC); + Child C(this, Loc, &Err); + if (Err) return child_end(); - } return child_iterator(C, &Err); } Archive::child_iterator Archive::child_end() const { - return child_iterator(Child(this, nullptr, nullptr), nullptr); + return child_iterator(Child(nullptr, nullptr, nullptr), nullptr); } StringRef Archive::Symbol::getName() const { return Parent->getSymbolTable().begin() + StringIndex; } -ErrorOr<Archive::Child> Archive::Symbol::getMember() const { +Expected<Archive::Child> Archive::Symbol::getMember() const { const char *Buf = Parent->getSymbolTable().begin(); const char *Offsets = Buf; if (Parent->kind() == K_MIPS64 || Parent->kind() == K_DARWIN64) @@ -525,7 +812,7 @@ ErrorOr<Archive::Child> Archive::Symbol::getMember() const { uint32_t SymbolCount = read32le(Buf); if (SymbolIndex >= SymbolCount) - return object_error::parse_failed; + return errorCodeToError(object_error::parse_failed); // Skip SymbolCount to get to the indices table. const char *Indices = Buf + 4; @@ -537,16 +824,16 @@ ErrorOr<Archive::Child> Archive::Symbol::getMember() const { --OffsetIndex; if (OffsetIndex >= MemberCount) - return object_error::parse_failed; + return errorCodeToError(object_error::parse_failed); Offset = read32le(Offsets + OffsetIndex * 4); } const char *Loc = Parent->getData().begin() + Offset; - std::error_code EC; - Child C(Parent, Loc, &EC); - if (EC) - return EC; + Error Err = Error::success(); + Child C(Parent, Loc, &Err); + if (Err) + return std::move(Err); return C; } @@ -677,10 +964,13 @@ Expected<Optional<Archive::Child>> Archive::findSym(StringRef name) const { if (auto MemberOrErr = bs->getMember()) return Child(*MemberOrErr); else - return errorCodeToError(MemberOrErr.getError()); + return MemberOrErr.takeError(); } } return Optional<Child>(); } +// Returns true if archive file contains no member file. +bool Archive::isEmpty() const { return Data.getBufferSize() == 8; } + bool Archive::hasSymbolTable() const { return !SymbolTable.empty(); } diff --git a/contrib/llvm/lib/Object/ArchiveWriter.cpp b/contrib/llvm/lib/Object/ArchiveWriter.cpp index 5357326..f8e3c5a 100644 --- a/contrib/llvm/lib/Object/ArchiveWriter.cpp +++ b/contrib/llvm/lib/Object/ArchiveWriter.cpp @@ -40,17 +40,30 @@ NewArchiveMember::NewArchiveMember(MemoryBufferRef BufRef) Expected<NewArchiveMember> NewArchiveMember::getOldMember(const object::Archive::Child &OldMember, bool Deterministic) { - ErrorOr<llvm::MemoryBufferRef> BufOrErr = OldMember.getMemoryBufferRef(); + Expected<llvm::MemoryBufferRef> BufOrErr = OldMember.getMemoryBufferRef(); if (!BufOrErr) - return errorCodeToError(BufOrErr.getError()); + return BufOrErr.takeError(); NewArchiveMember M; + assert(M.IsNew == false); M.Buf = MemoryBuffer::getMemBuffer(*BufOrErr, false); if (!Deterministic) { - M.ModTime = OldMember.getLastModified(); - M.UID = OldMember.getUID(); - M.GID = OldMember.getGID(); - M.Perms = OldMember.getAccessMode(); + auto ModTimeOrErr = OldMember.getLastModified(); + if (!ModTimeOrErr) + return ModTimeOrErr.takeError(); + M.ModTime = ModTimeOrErr.get(); + Expected<unsigned> UIDOrErr = OldMember.getUID(); + if (!UIDOrErr) + return UIDOrErr.takeError(); + M.UID = UIDOrErr.get(); + Expected<unsigned> GIDOrErr = OldMember.getGID(); + if (!GIDOrErr) + return GIDOrErr.takeError(); + M.GID = GIDOrErr.get(); + Expected<sys::fs::perms> AccessModeOrErr = OldMember.getAccessMode(); + if (!AccessModeOrErr) + return AccessModeOrErr.takeError(); + M.Perms = AccessModeOrErr.get(); } return std::move(M); } @@ -81,9 +94,11 @@ Expected<NewArchiveMember> NewArchiveMember::getFile(StringRef FileName, return errorCodeToError(std::error_code(errno, std::generic_category())); NewArchiveMember M; + M.IsNew = true; M.Buf = std::move(*MemberBufferOrErr); if (!Deterministic) { - M.ModTime = Status.getLastModificationTime(); + M.ModTime = std::chrono::time_point_cast<std::chrono::seconds>( + Status.getLastModificationTime()); M.UID = Status.getUser(); M.GID = Status.getGroup(); M.Perms = Status.permissions(); @@ -115,11 +130,10 @@ static void print32(raw_ostream &Out, object::Archive::Kind Kind, support::endian::Writer<support::little>(Out).write(Val); } -static void printRestOfMemberHeader(raw_fd_ostream &Out, - const sys::TimeValue &ModTime, unsigned UID, - unsigned GID, unsigned Perms, - unsigned Size) { - printWithSpacePadding(Out, ModTime.toEpochTime(), 12); +static void printRestOfMemberHeader( + raw_fd_ostream &Out, const sys::TimePoint<std::chrono::seconds> &ModTime, + unsigned UID, unsigned GID, unsigned Perms, unsigned Size) { + printWithSpacePadding(Out, sys::toTimeT(ModTime), 12); printWithSpacePadding(Out, UID, 6, true); printWithSpacePadding(Out, GID, 6, true); printWithSpacePadding(Out, format("%o", Perms), 8); @@ -127,17 +141,20 @@ static void printRestOfMemberHeader(raw_fd_ostream &Out, Out << "`\n"; } -static void printGNUSmallMemberHeader(raw_fd_ostream &Out, StringRef Name, - const sys::TimeValue &ModTime, - unsigned UID, unsigned GID, - unsigned Perms, unsigned Size) { +static void +printGNUSmallMemberHeader(raw_fd_ostream &Out, StringRef Name, + const sys::TimePoint<std::chrono::seconds> &ModTime, + unsigned UID, unsigned GID, unsigned Perms, + unsigned Size) { printWithSpacePadding(Out, Twine(Name) + "/", 16); printRestOfMemberHeader(Out, ModTime, UID, GID, Perms, Size); } -static void printBSDMemberHeader(raw_fd_ostream &Out, StringRef Name, - const sys::TimeValue &ModTime, unsigned UID, - unsigned GID, unsigned Perms, unsigned Size) { +static void +printBSDMemberHeader(raw_fd_ostream &Out, StringRef Name, + const sys::TimePoint<std::chrono::seconds> &ModTime, + unsigned UID, unsigned GID, unsigned Perms, + unsigned Size) { uint64_t PosAfterHeader = Out.tell() + 60 + Name.size(); // Pad so that even 64 bit object files are aligned. unsigned Pad = OffsetToAlignment(PosAfterHeader, 8); @@ -159,8 +176,8 @@ static void printMemberHeader(raw_fd_ostream &Out, object::Archive::Kind Kind, bool Thin, StringRef Name, std::vector<unsigned>::iterator &StringMapIndexIter, - const sys::TimeValue &ModTime, unsigned UID, unsigned GID, - unsigned Perms, unsigned Size) { + const sys::TimePoint<std::chrono::seconds> &ModTime, + unsigned UID, unsigned GID, unsigned Perms, unsigned Size) { if (Kind == object::Archive::K_BSD) return printBSDMemberHeader(Out, Name, ModTime, UID, GID, Perms, Size); if (!useStringTable(Thin, Name)) @@ -190,6 +207,12 @@ static std::string computeRelativePath(StringRef From, StringRef To) { for (auto ToE = sys::path::end(To); ToI != ToE; ++ToI) sys::path::append(Relative, *ToI); +#ifdef LLVM_ON_WIN32 + // Replace backslashes with slashes so that the path is portable between *nix + // and Windows. + std::replace(Relative.begin(), Relative.end(), '\\', '/'); +#endif + return Relative.str(); } @@ -210,9 +233,12 @@ static void writeStringTable(raw_fd_ostream &Out, StringRef ArcName, } StringMapIndexes.push_back(Out.tell() - StartOffset); - if (Thin) - Out << computeRelativePath(ArcName, Path); - else + if (Thin) { + if (M.IsNew) + Out << computeRelativePath(ArcName, Path); + else + Out << M.Buf->getBufferIdentifier(); + } else Out << Name; Out << "/\n"; @@ -227,12 +253,12 @@ static void writeStringTable(raw_fd_ostream &Out, StringRef ArcName, Out.seek(Pos); } -static sys::TimeValue now(bool Deterministic) { +static sys::TimePoint<std::chrono::seconds> now(bool Deterministic) { + using namespace std::chrono; + if (!Deterministic) - return sys::TimeValue::now(); - sys::TimeValue TV; - TV.fromEpochTime(0); - return TV; + return time_point_cast<seconds>(system_clock::now()); + return sys::TimePoint<seconds>(); } // Returns the offset of the first reference to a member offset. diff --git a/contrib/llvm/lib/Object/Binary.cpp b/contrib/llvm/lib/Object/Binary.cpp index ec051fe..8467d34 100644 --- a/contrib/llvm/lib/Object/Binary.cpp +++ b/contrib/llvm/lib/Object/Binary.cpp @@ -63,10 +63,12 @@ Expected<std::unique_ptr<Binary>> object::createBinary(MemoryBufferRef Buffer, case sys::fs::file_magic::coff_import_library: case sys::fs::file_magic::pecoff_executable: case sys::fs::file_magic::bitcode: + case sys::fs::file_magic::wasm_object: return ObjectFile::createSymbolicFile(Buffer, Type, Context); case sys::fs::file_magic::macho_universal_binary: return MachOUniversalBinary::create(Buffer); case sys::fs::file_magic::unknown: + case sys::fs::file_magic::coff_cl_gl_object: case sys::fs::file_magic::windows_resource: // Unrecognized object file format. return errorCodeToError(object_error::invalid_file_type); diff --git a/contrib/llvm/lib/Object/COFFObjectFile.cpp b/contrib/llvm/lib/Object/COFFObjectFile.cpp index 0f79008..a2d8f12 100644 --- a/contrib/llvm/lib/Object/COFFObjectFile.cpp +++ b/contrib/llvm/lib/Object/COFFObjectFile.cpp @@ -157,6 +157,13 @@ uint64_t COFFObjectFile::getSymbolValueImpl(DataRefImpl Ref) const { return getCOFFSymbol(Ref).getValue(); } +uint32_t COFFObjectFile::getSymbolAlignment(DataRefImpl Ref) const { + // MSVC/link.exe seems to align symbols to the next-power-of-2 + // up to 32 bytes. + COFFSymbolRef Symb = getCOFFSymbol(Ref); + return std::min(uint64_t(32), PowerOf2Ceil(Symb.getValue())); +} + Expected<uint64_t> COFFObjectFile::getSymbolAddress(DataRefImpl Ref) const { uint64_t Result = getSymbolValue(Ref); COFFSymbolRef Symb = getCOFFSymbol(Ref); @@ -487,17 +494,18 @@ std::error_code COFFObjectFile::getHintName(uint32_t Rva, uint16_t &Hint, return std::error_code(); } -std::error_code COFFObjectFile::getDebugPDBInfo(const debug_directory *DebugDir, - const debug_pdb_info *&PDBInfo, - StringRef &PDBFileName) const { +std::error_code +COFFObjectFile::getDebugPDBInfo(const debug_directory *DebugDir, + const codeview::DebugInfo *&PDBInfo, + StringRef &PDBFileName) const { ArrayRef<uint8_t> InfoBytes; if (std::error_code EC = getRvaAndSizeAsBytes( DebugDir->AddressOfRawData, DebugDir->SizeOfData, InfoBytes)) return EC; - if (InfoBytes.size() < sizeof(debug_pdb_info) + 1) + if (InfoBytes.size() < sizeof(*PDBInfo) + 1) return object_error::parse_failed; - PDBInfo = reinterpret_cast<const debug_pdb_info *>(InfoBytes.data()); - InfoBytes = InfoBytes.drop_front(sizeof(debug_pdb_info)); + PDBInfo = reinterpret_cast<const codeview::DebugInfo *>(InfoBytes.data()); + InfoBytes = InfoBytes.drop_front(sizeof(*PDBInfo)); PDBFileName = StringRef(reinterpret_cast<const char *>(InfoBytes.data()), InfoBytes.size()); // Truncate the name at the first null byte. Ignore any padding. @@ -505,8 +513,9 @@ std::error_code COFFObjectFile::getDebugPDBInfo(const debug_directory *DebugDir, return std::error_code(); } -std::error_code COFFObjectFile::getDebugPDBInfo(const debug_pdb_info *&PDBInfo, - StringRef &PDBFileName) const { +std::error_code +COFFObjectFile::getDebugPDBInfo(const codeview::DebugInfo *&PDBInfo, + StringRef &PDBFileName) const { for (const debug_directory &D : debug_directories()) if (D.Type == COFF::IMAGE_DEBUG_TYPE_CODEVIEW) return getDebugPDBInfo(&D, PDBInfo, PDBFileName); @@ -538,7 +547,7 @@ std::error_code COFFObjectFile::initImportTablePtr() { if (std::error_code EC = checkOffset(Data, IntPtr, DataEntry->Size)) return EC; ImportDirectory = reinterpret_cast< - const import_directory_table_entry *>(IntPtr); + const coff_import_directory_table_entry *>(IntPtr); return std::error_code(); } @@ -716,17 +725,23 @@ COFFObjectFile::COFFObjectFile(MemoryBufferRef Object, std::error_code &EC) } if ((EC = getObject(DataDirectory, Data, DataDirAddr, DataDirSize))) return; - CurPtr += COFFHeader->SizeOfOptionalHeader; } + if (COFFHeader) + CurPtr += COFFHeader->SizeOfOptionalHeader; + if ((EC = getObject(SectionTable, Data, base() + CurPtr, (uint64_t)getNumberOfSections() * sizeof(coff_section)))) return; // Initialize the pointer to the symbol table. if (getPointerToSymbolTable() != 0) { - if ((EC = initSymbolTablePtr())) - return; + if ((EC = initSymbolTablePtr())) { + SymbolTable16 = nullptr; + SymbolTable32 = nullptr; + StringTable = nullptr; + StringTableSize = 0; + } } else { // We had better not have any symbols if we don't have a symbol table. if (getNumberOfSymbols() != 0) { @@ -756,13 +771,13 @@ COFFObjectFile::COFFObjectFile(MemoryBufferRef Object, std::error_code &EC) EC = std::error_code(); } -basic_symbol_iterator COFFObjectFile::symbol_begin_impl() const { +basic_symbol_iterator COFFObjectFile::symbol_begin() const { DataRefImpl Ret; Ret.p = getSymbolTable(); return basic_symbol_iterator(SymbolRef(Ret, this)); } -basic_symbol_iterator COFFObjectFile::symbol_end_impl() const { +basic_symbol_iterator COFFObjectFile::symbol_end() const { // The symbol table ends where the string table begins. DataRefImpl Ret; Ret.p = reinterpret_cast<uintptr_t>(StringTable); @@ -772,7 +787,7 @@ basic_symbol_iterator COFFObjectFile::symbol_end_impl() const { import_directory_iterator COFFObjectFile::import_directory_begin() const { if (!ImportDirectory) return import_directory_end(); - if (ImportDirectory[0].ImportLookupTableRVA == 0) + if (ImportDirectory->isNull()) return import_directory_end(); return import_directory_iterator( ImportDirectoryEntryRef(ImportDirectory, 0, this)); @@ -1201,14 +1216,14 @@ operator==(const ImportDirectoryEntryRef &Other) const { void ImportDirectoryEntryRef::moveNext() { ++Index; - if (ImportTable[Index].ImportLookupTableRVA == 0) { + if (ImportTable[Index].isNull()) { Index = -1; ImportTable = nullptr; } } std::error_code ImportDirectoryEntryRef::getImportTableEntry( - const import_directory_table_entry *&Result) const { + const coff_import_directory_table_entry *&Result) const { return getObject(Result, OwningObject->Data, ImportTable + Index); } @@ -1250,13 +1265,13 @@ importedSymbolEnd(uint32_t RVA, const COFFObjectFile *Object) { imported_symbol_iterator ImportDirectoryEntryRef::imported_symbol_begin() const { - return importedSymbolBegin(ImportTable[Index].ImportLookupTableRVA, + return importedSymbolBegin(ImportTable[Index].ImportAddressTableRVA, OwningObject); } imported_symbol_iterator ImportDirectoryEntryRef::imported_symbol_end() const { - return importedSymbolEnd(ImportTable[Index].ImportLookupTableRVA, + return importedSymbolEnd(ImportTable[Index].ImportAddressTableRVA, OwningObject); } @@ -1265,6 +1280,21 @@ ImportDirectoryEntryRef::imported_symbols() const { return make_range(imported_symbol_begin(), imported_symbol_end()); } +imported_symbol_iterator ImportDirectoryEntryRef::lookup_table_begin() const { + return importedSymbolBegin(ImportTable[Index].ImportLookupTableRVA, + OwningObject); +} + +imported_symbol_iterator ImportDirectoryEntryRef::lookup_table_end() const { + return importedSymbolEnd(ImportTable[Index].ImportLookupTableRVA, + OwningObject); +} + +iterator_range<imported_symbol_iterator> +ImportDirectoryEntryRef::lookup_table_symbols() const { + return make_range(lookup_table_begin(), lookup_table_end()); +} + std::error_code ImportDirectoryEntryRef::getName(StringRef &Result) const { uintptr_t IntPtr = 0; if (std::error_code EC = diff --git a/contrib/llvm/lib/Object/Decompressor.cpp b/contrib/llvm/lib/Object/Decompressor.cpp new file mode 100644 index 0000000..bca41fd --- /dev/null +++ b/contrib/llvm/lib/Object/Decompressor.cpp @@ -0,0 +1,102 @@ +//===-- Decompressor.cpp --------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/Decompressor.h" +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Support/Compression.h" +#include "llvm/Support/DataExtractor.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/ELF.h" + +using namespace llvm; +using namespace llvm::support::endian; +using namespace object; + +Expected<Decompressor> Decompressor::create(StringRef Name, StringRef Data, + bool IsLE, bool Is64Bit) { + if (!zlib::isAvailable()) + return createError("zlib is not available"); + + Decompressor D(Data); + Error Err = isGnuStyle(Name) ? D.consumeCompressedGnuHeader() + : D.consumeCompressedZLibHeader(Is64Bit, IsLE); + if (Err) + return std::move(Err); + return D; +} + +Decompressor::Decompressor(StringRef Data) + : SectionData(Data), DecompressedSize(0) {} + +Error Decompressor::consumeCompressedGnuHeader() { + if (!SectionData.startswith("ZLIB")) + return createError("corrupted compressed section header"); + + SectionData = SectionData.substr(4); + + // Consume uncompressed section size (big-endian 8 bytes). + if (SectionData.size() < 8) + return createError("corrupted uncompressed section size"); + DecompressedSize = read64be(SectionData.data()); + SectionData = SectionData.substr(8); + + return Error::success(); +} + +Error Decompressor::consumeCompressedZLibHeader(bool Is64Bit, + bool IsLittleEndian) { + using namespace ELF; + uint64_t HdrSize = Is64Bit ? sizeof(Elf64_Chdr) : sizeof(Elf32_Chdr); + if (SectionData.size() < HdrSize) + return createError("corrupted compressed section header"); + + DataExtractor Extractor(SectionData, IsLittleEndian, 0); + uint32_t Offset = 0; + if (Extractor.getUnsigned(&Offset, Is64Bit ? sizeof(Elf64_Word) + : sizeof(Elf32_Word)) != + ELFCOMPRESS_ZLIB) + return createError("unsupported compression type"); + + // Skip Elf64_Chdr::ch_reserved field. + if (Is64Bit) + Offset += sizeof(Elf64_Word); + + DecompressedSize = Extractor.getUnsigned( + &Offset, Is64Bit ? sizeof(Elf64_Xword) : sizeof(Elf32_Word)); + SectionData = SectionData.substr(HdrSize); + return Error::success(); +} + +bool Decompressor::isGnuStyle(StringRef Name) { + return Name.startswith(".zdebug"); +} + +bool Decompressor::isCompressed(const object::SectionRef &Section) { + StringRef Name; + if (Section.getName(Name)) + return false; + return Section.isCompressed() || isGnuStyle(Name); +} + +bool Decompressor::isCompressedELFSection(uint64_t Flags, StringRef Name) { + return (Flags & ELF::SHF_COMPRESSED) || isGnuStyle(Name); +} + +Error Decompressor::decompress(SmallString<32> &Out) { + Out.resize(DecompressedSize); + return decompress({Out.data(), (size_t)DecompressedSize}); +} + +Error Decompressor::decompress(MutableArrayRef<char> Buffer) { + size_t Size = Buffer.size(); + zlib::Status Status = zlib::uncompress(SectionData, Buffer.data(), Size); + if (Status != zlib::StatusOK) + return createError("decompression failed"); + return Error::success(); +} diff --git a/contrib/llvm/lib/Object/ELF.cpp b/contrib/llvm/lib/Object/ELF.cpp index 2dde18a..23682e1 100644 --- a/contrib/llvm/lib/Object/ELF.cpp +++ b/contrib/llvm/lib/Object/ELF.cpp @@ -54,6 +54,13 @@ StringRef getELFRelocationTypeName(uint32_t Machine, uint32_t Type) { break; } break; + case ELF::EM_AVR: + switch (Type) { +#include "llvm/Support/ELFRelocs/AVR.def" + default: + break; + } + break; case ELF::EM_HEXAGON: switch (Type) { #include "llvm/Support/ELFRelocs/Hexagon.def" @@ -82,6 +89,13 @@ StringRef getELFRelocationTypeName(uint32_t Machine, uint32_t Type) { break; } break; + case ELF::EM_RISCV: + switch (Type) { +#include "llvm/Support/ELFRelocs/RISCV.def" + default: + break; + } + break; case ELF::EM_S390: switch (Type) { #include "llvm/Support/ELFRelocs/SystemZ.def" diff --git a/contrib/llvm/lib/Object/Error.cpp b/contrib/llvm/lib/Object/Error.cpp index 2357526..7d43a84 100644 --- a/contrib/llvm/lib/Object/Error.cpp +++ b/contrib/llvm/lib/Object/Error.cpp @@ -24,12 +24,12 @@ namespace { // deal with the Error value directly, rather than converting to error_code. class _object_error_category : public std::error_category { public: - const char* name() const LLVM_NOEXCEPT override; + const char* name() const noexcept override; std::string message(int ev) const override; }; } -const char *_object_error_category::name() const LLVM_NOEXCEPT { +const char *_object_error_category::name() const noexcept { return "llvm.object"; } @@ -50,6 +50,8 @@ std::string _object_error_category::message(int EV) const { return "Invalid section index"; case object_error::bitcode_section_not_found: return "Bitcode section not found in object file"; + case object_error::invalid_symbol_index: + return "Invalid symbol index"; } llvm_unreachable("An enumerator of object_error does not have a message " "defined."); @@ -77,18 +79,17 @@ const std::error_category &object::object_category() { llvm::Error llvm::object::isNotObjectErrorInvalidFileType(llvm::Error Err) { if (auto Err2 = - handleErrors(std::move(Err), - [](std::unique_ptr<ECError> M) { - // Try to handle 'M'. If successful, return a success value from - // the handler. - if (M->convertToErrorCode() == object_error::invalid_file_type) - return Error::success(); + handleErrors(std::move(Err), [](std::unique_ptr<ECError> M) -> Error { + // Try to handle 'M'. If successful, return a success value from + // the handler. + if (M->convertToErrorCode() == object_error::invalid_file_type) + return Error::success(); - // We failed to handle 'M' - return it from the handler. - // This value will be passed back from catchErrors and - // wind up in Err2, where it will be returned from this function. - return Error(std::move(M)); - })) + // We failed to handle 'M' - return it from the handler. + // This value will be passed back from catchErrors and + // wind up in Err2, where it will be returned from this function. + return Error(std::move(M)); + })) return Err2; return Err; } diff --git a/contrib/llvm/lib/Object/IRObjectFile.cpp b/contrib/llvm/lib/Object/IRObjectFile.cpp index 42c8ecd..adbf0de 100644 --- a/contrib/llvm/lib/Object/IRObjectFile.cpp +++ b/contrib/llvm/lib/Object/IRObjectFile.cpp @@ -14,7 +14,7 @@ #include "llvm/Object/IRObjectFile.h" #include "RecordStreamer.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/Bitcode/BitcodeReader.h" #include "llvm/IR/GVMaterializer.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Mangler.h" @@ -35,246 +35,52 @@ using namespace llvm; using namespace object; -IRObjectFile::IRObjectFile(MemoryBufferRef Object, std::unique_ptr<Module> Mod) - : SymbolicFile(Binary::ID_IR, Object), M(std::move(Mod)) { - Mang.reset(new Mangler()); - CollectAsmUndefinedRefs(Triple(M->getTargetTriple()), M->getModuleInlineAsm(), - [this](StringRef Name, BasicSymbolRef::Flags Flags) { - AsmSymbols.emplace_back(Name, std::move(Flags)); - }); +IRObjectFile::IRObjectFile(MemoryBufferRef Object, + std::vector<std::unique_ptr<Module>> Mods) + : SymbolicFile(Binary::ID_IR, Object), Mods(std::move(Mods)) { + for (auto &M : this->Mods) + SymTab.addModule(M.get()); } -// Parse inline ASM and collect the list of symbols that are not defined in -// the current module. This is inspired from IRObjectFile. -void IRObjectFile::CollectAsmUndefinedRefs( - const Triple &TT, StringRef InlineAsm, - function_ref<void(StringRef, BasicSymbolRef::Flags)> AsmUndefinedRefs) { - if (InlineAsm.empty()) - return; +IRObjectFile::~IRObjectFile() {} - std::string Err; - const Target *T = TargetRegistry::lookupTarget(TT.str(), Err); - if (!T) - return; - - std::unique_ptr<MCRegisterInfo> MRI(T->createMCRegInfo(TT.str())); - if (!MRI) - return; - - std::unique_ptr<MCAsmInfo> MAI(T->createMCAsmInfo(*MRI, TT.str())); - if (!MAI) - return; - - std::unique_ptr<MCSubtargetInfo> STI( - T->createMCSubtargetInfo(TT.str(), "", "")); - if (!STI) - return; - - std::unique_ptr<MCInstrInfo> MCII(T->createMCInstrInfo()); - if (!MCII) - return; - - MCObjectFileInfo MOFI; - MCContext MCCtx(MAI.get(), MRI.get(), &MOFI); - MOFI.InitMCObjectFileInfo(TT, /*PIC*/ false, CodeModel::Default, MCCtx); - std::unique_ptr<RecordStreamer> Streamer(new RecordStreamer(MCCtx)); - T->createNullTargetStreamer(*Streamer); - - std::unique_ptr<MemoryBuffer> Buffer(MemoryBuffer::getMemBuffer(InlineAsm)); - SourceMgr SrcMgr; - SrcMgr.AddNewSourceBuffer(std::move(Buffer), SMLoc()); - std::unique_ptr<MCAsmParser> Parser( - createMCAsmParser(SrcMgr, MCCtx, *Streamer, *MAI)); - - MCTargetOptions MCOptions; - std::unique_ptr<MCTargetAsmParser> TAP( - T->createMCAsmParser(*STI, *Parser, *MCII, MCOptions)); - if (!TAP) - return; - - Parser->setTargetParser(*TAP); - if (Parser->Run(false)) - return; - - for (auto &KV : *Streamer) { - StringRef Key = KV.first(); - RecordStreamer::State Value = KV.second; - uint32_t Res = BasicSymbolRef::SF_None; - switch (Value) { - case RecordStreamer::NeverSeen: - llvm_unreachable("foo"); - case RecordStreamer::DefinedGlobal: - Res |= BasicSymbolRef::SF_Global; - break; - case RecordStreamer::Defined: - break; - case RecordStreamer::Global: - case RecordStreamer::Used: - Res |= BasicSymbolRef::SF_Undefined; - Res |= BasicSymbolRef::SF_Global; - break; - case RecordStreamer::GlobalWeak: - Res |= BasicSymbolRef::SF_Weak; - Res |= BasicSymbolRef::SF_Global; - break; - } - AsmUndefinedRefs(Key, BasicSymbolRef::Flags(Res)); - } -} - -IRObjectFile::~IRObjectFile() { - } - -static GlobalValue *getGV(DataRefImpl &Symb) { - if ((Symb.p & 3) == 3) - return nullptr; - - return reinterpret_cast<GlobalValue*>(Symb.p & ~uintptr_t(3)); -} - -static uintptr_t skipEmpty(Module::const_alias_iterator I, const Module &M) { - if (I == M.alias_end()) - return 3; - const GlobalValue *GV = &*I; - return reinterpret_cast<uintptr_t>(GV) | 2; -} - -static uintptr_t skipEmpty(Module::const_global_iterator I, const Module &M) { - if (I == M.global_end()) - return skipEmpty(M.alias_begin(), M); - const GlobalValue *GV = &*I; - return reinterpret_cast<uintptr_t>(GV) | 1; -} - -static uintptr_t skipEmpty(Module::const_iterator I, const Module &M) { - if (I == M.end()) - return skipEmpty(M.global_begin(), M); - const GlobalValue *GV = &*I; - return reinterpret_cast<uintptr_t>(GV) | 0; -} - -static unsigned getAsmSymIndex(DataRefImpl Symb) { - assert((Symb.p & uintptr_t(3)) == 3); - uintptr_t Index = Symb.p & ~uintptr_t(3); - Index >>= 2; - return Index; +static ModuleSymbolTable::Symbol getSym(DataRefImpl &Symb) { + return *reinterpret_cast<ModuleSymbolTable::Symbol *>(Symb.p); } void IRObjectFile::moveSymbolNext(DataRefImpl &Symb) const { - const GlobalValue *GV = getGV(Symb); - uintptr_t Res; - - switch (Symb.p & 3) { - case 0: { - Module::const_iterator Iter(static_cast<const Function*>(GV)); - ++Iter; - Res = skipEmpty(Iter, *M); - break; - } - case 1: { - Module::const_global_iterator Iter(static_cast<const GlobalVariable*>(GV)); - ++Iter; - Res = skipEmpty(Iter, *M); - break; - } - case 2: { - Module::const_alias_iterator Iter(static_cast<const GlobalAlias*>(GV)); - ++Iter; - Res = skipEmpty(Iter, *M); - break; - } - case 3: { - unsigned Index = getAsmSymIndex(Symb); - assert(Index < AsmSymbols.size()); - ++Index; - Res = (Index << 2) | 3; - break; - } - default: - llvm_unreachable("unreachable case"); - } - - Symb.p = Res; + Symb.p += sizeof(ModuleSymbolTable::Symbol); } std::error_code IRObjectFile::printSymbolName(raw_ostream &OS, DataRefImpl Symb) const { - const GlobalValue *GV = getGV(Symb); - if (!GV) { - unsigned Index = getAsmSymIndex(Symb); - assert(Index <= AsmSymbols.size()); - OS << AsmSymbols[Index].first; - return std::error_code(); - } - - if (GV->hasDLLImportStorageClass()) - OS << "__imp_"; - - if (Mang) - Mang->getNameWithPrefix(OS, GV, false); - else - OS << GV->getName(); - + SymTab.printSymbolName(OS, getSym(Symb)); return std::error_code(); } uint32_t IRObjectFile::getSymbolFlags(DataRefImpl Symb) const { - const GlobalValue *GV = getGV(Symb); - - if (!GV) { - unsigned Index = getAsmSymIndex(Symb); - assert(Index <= AsmSymbols.size()); - return AsmSymbols[Index].second; - } - - uint32_t Res = BasicSymbolRef::SF_None; - if (GV->isDeclarationForLinker()) - Res |= BasicSymbolRef::SF_Undefined; - else if (GV->hasHiddenVisibility() && !GV->hasLocalLinkage()) - Res |= BasicSymbolRef::SF_Hidden; - if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) { - if (GVar->isConstant()) - Res |= BasicSymbolRef::SF_Const; - } - if (GV->hasPrivateLinkage()) - Res |= BasicSymbolRef::SF_FormatSpecific; - if (!GV->hasLocalLinkage()) - Res |= BasicSymbolRef::SF_Global; - if (GV->hasCommonLinkage()) - Res |= BasicSymbolRef::SF_Common; - if (GV->hasLinkOnceLinkage() || GV->hasWeakLinkage() || - GV->hasExternalWeakLinkage()) - Res |= BasicSymbolRef::SF_Weak; - - if (GV->getName().startswith("llvm.")) - Res |= BasicSymbolRef::SF_FormatSpecific; - else if (auto *Var = dyn_cast<GlobalVariable>(GV)) { - if (Var->getSection() == "llvm.metadata") - Res |= BasicSymbolRef::SF_FormatSpecific; - } - - return Res; + return SymTab.getSymbolFlags(getSym(Symb)); } -GlobalValue *IRObjectFile::getSymbolGV(DataRefImpl Symb) { return getGV(Symb); } - -std::unique_ptr<Module> IRObjectFile::takeModule() { return std::move(M); } - -basic_symbol_iterator IRObjectFile::symbol_begin_impl() const { - Module::const_iterator I = M->begin(); +basic_symbol_iterator IRObjectFile::symbol_begin() const { DataRefImpl Ret; - Ret.p = skipEmpty(I, *M); + Ret.p = reinterpret_cast<uintptr_t>(SymTab.symbols().data()); return basic_symbol_iterator(BasicSymbolRef(Ret, this)); } -basic_symbol_iterator IRObjectFile::symbol_end_impl() const { +basic_symbol_iterator IRObjectFile::symbol_end() const { DataRefImpl Ret; - uint64_t NumAsm = AsmSymbols.size(); - NumAsm <<= 2; - Ret.p = 3 | NumAsm; + Ret.p = reinterpret_cast<uintptr_t>(SymTab.symbols().data() + + SymTab.symbols().size()); return basic_symbol_iterator(BasicSymbolRef(Ret, this)); } +StringRef IRObjectFile::getTargetTriple() const { + // Each module must have the same target triple, so we arbitrarily access the + // first one. + return Mods[0]->getTargetTriple(); +} + ErrorOr<MemoryBufferRef> IRObjectFile::findBitcodeInObject(const ObjectFile &Obj) { for (const SectionRef &Sec : Obj.sections()) { if (Sec.isBitcode()) { @@ -307,22 +113,28 @@ ErrorOr<MemoryBufferRef> IRObjectFile::findBitcodeInMemBuffer(MemoryBufferRef Ob } } -ErrorOr<std::unique_ptr<IRObjectFile>> -llvm::object::IRObjectFile::create(MemoryBufferRef Object, - LLVMContext &Context) { +Expected<std::unique_ptr<IRObjectFile>> +IRObjectFile::create(MemoryBufferRef Object, LLVMContext &Context) { ErrorOr<MemoryBufferRef> BCOrErr = findBitcodeInMemBuffer(Object); if (!BCOrErr) - return BCOrErr.getError(); - - std::unique_ptr<MemoryBuffer> Buff = - MemoryBuffer::getMemBuffer(BCOrErr.get(), false); - - ErrorOr<std::unique_ptr<Module>> MOrErr = - getLazyBitcodeModule(std::move(Buff), Context, - /*ShouldLazyLoadMetadata*/ true); - if (std::error_code EC = MOrErr.getError()) - return EC; + return errorCodeToError(BCOrErr.getError()); + + Expected<std::vector<BitcodeModule>> BMsOrErr = + getBitcodeModuleList(*BCOrErr); + if (!BMsOrErr) + return BMsOrErr.takeError(); + + std::vector<std::unique_ptr<Module>> Mods; + for (auto BM : *BMsOrErr) { + Expected<std::unique_ptr<Module>> MOrErr = + BM.getLazyModule(Context, /*ShouldLazyLoadMetadata*/ true, + /*IsImporting*/ false); + if (!MOrErr) + return MOrErr.takeError(); + + Mods.push_back(std::move(*MOrErr)); + } - std::unique_ptr<Module> &M = MOrErr.get(); - return llvm::make_unique<IRObjectFile>(Object, std::move(M)); + return std::unique_ptr<IRObjectFile>( + new IRObjectFile(*BCOrErr, std::move(Mods))); } diff --git a/contrib/llvm/lib/Object/MachOObjectFile.cpp b/contrib/llvm/lib/Object/MachOObjectFile.cpp index 563236f..5b01867 100644 --- a/contrib/llvm/lib/Object/MachOObjectFile.cpp +++ b/contrib/llvm/lib/Object/MachOObjectFile.cpp @@ -27,6 +27,7 @@ #include <cctype> #include <cstring> #include <limits> +#include <list> using namespace llvm; using namespace object; @@ -47,37 +48,37 @@ malformedError(Twine Msg) { // FIXME: Replace all uses of this function with getStructOrErr. template <typename T> -static T getStruct(const MachOObjectFile *O, const char *P) { +static T getStruct(const MachOObjectFile &O, const char *P) { // Don't read before the beginning or past the end of the file - if (P < O->getData().begin() || P + sizeof(T) > O->getData().end()) + if (P < O.getData().begin() || P + sizeof(T) > O.getData().end()) report_fatal_error("Malformed MachO file."); T Cmd; memcpy(&Cmd, P, sizeof(T)); - if (O->isLittleEndian() != sys::IsLittleEndianHost) + if (O.isLittleEndian() != sys::IsLittleEndianHost) MachO::swapStruct(Cmd); return Cmd; } template <typename T> -static Expected<T> getStructOrErr(const MachOObjectFile *O, const char *P) { +static Expected<T> getStructOrErr(const MachOObjectFile &O, const char *P) { // Don't read before the beginning or past the end of the file - if (P < O->getData().begin() || P + sizeof(T) > O->getData().end()) + if (P < O.getData().begin() || P + sizeof(T) > O.getData().end()) return malformedError("Structure read out-of-range"); T Cmd; memcpy(&Cmd, P, sizeof(T)); - if (O->isLittleEndian() != sys::IsLittleEndianHost) + if (O.isLittleEndian() != sys::IsLittleEndianHost) MachO::swapStruct(Cmd); return Cmd; } static const char * -getSectionPtr(const MachOObjectFile *O, MachOObjectFile::LoadCommandInfo L, +getSectionPtr(const MachOObjectFile &O, MachOObjectFile::LoadCommandInfo L, unsigned Sec) { uintptr_t CommandAddr = reinterpret_cast<uintptr_t>(L.Ptr); - bool Is64 = O->is64Bit(); + bool Is64 = O.is64Bit(); unsigned SegmentLoadSize = Is64 ? sizeof(MachO::segment_command_64) : sizeof(MachO::segment_command); unsigned SectionSize = Is64 ? sizeof(MachO::section_64) : @@ -87,12 +88,12 @@ getSectionPtr(const MachOObjectFile *O, MachOObjectFile::LoadCommandInfo L, return reinterpret_cast<const char*>(SectionAddr); } -static const char *getPtr(const MachOObjectFile *O, size_t Offset) { - return O->getData().substr(Offset, 1).data(); +static const char *getPtr(const MachOObjectFile &O, size_t Offset) { + return O.getData().substr(Offset, 1).data(); } static MachO::nlist_base -getSymbolTableEntryBase(const MachOObjectFile *O, DataRefImpl DRI) { +getSymbolTableEntryBase(const MachOObjectFile &O, DataRefImpl DRI) { const char *P = reinterpret_cast<const char *>(DRI.p); return getStruct<MachO::nlist_base>(O, P); } @@ -112,8 +113,8 @@ static void advance(T &it, size_t Val) { ++it; } -static unsigned getCPUType(const MachOObjectFile *O) { - return O->getHeader().cputype; +static unsigned getCPUType(const MachOObjectFile &O) { + return O.getHeader().cputype; } static uint32_t @@ -126,22 +127,21 @@ getScatteredRelocationAddress(const MachO::any_relocation_info &RE) { return RE.r_word0 & 0xffffff; } -static bool getPlainRelocationPCRel(const MachOObjectFile *O, +static bool getPlainRelocationPCRel(const MachOObjectFile &O, const MachO::any_relocation_info &RE) { - if (O->isLittleEndian()) + if (O.isLittleEndian()) return (RE.r_word1 >> 24) & 1; return (RE.r_word1 >> 7) & 1; } static bool -getScatteredRelocationPCRel(const MachOObjectFile *O, - const MachO::any_relocation_info &RE) { +getScatteredRelocationPCRel(const MachO::any_relocation_info &RE) { return (RE.r_word0 >> 30) & 1; } -static unsigned getPlainRelocationLength(const MachOObjectFile *O, +static unsigned getPlainRelocationLength(const MachOObjectFile &O, const MachO::any_relocation_info &RE) { - if (O->isLittleEndian()) + if (O.isLittleEndian()) return (RE.r_word1 >> 25) & 3; return (RE.r_word1 >> 5) & 3; } @@ -151,25 +151,25 @@ getScatteredRelocationLength(const MachO::any_relocation_info &RE) { return (RE.r_word0 >> 28) & 3; } -static unsigned getPlainRelocationType(const MachOObjectFile *O, +static unsigned getPlainRelocationType(const MachOObjectFile &O, const MachO::any_relocation_info &RE) { - if (O->isLittleEndian()) + if (O.isLittleEndian()) return RE.r_word1 >> 28; return RE.r_word1 & 0xf; } -static uint32_t getSectionFlags(const MachOObjectFile *O, +static uint32_t getSectionFlags(const MachOObjectFile &O, DataRefImpl Sec) { - if (O->is64Bit()) { - MachO::section_64 Sect = O->getSection64(Sec); + if (O.is64Bit()) { + MachO::section_64 Sect = O.getSection64(Sec); return Sect.flags; } - MachO::section Sect = O->getSection(Sec); + MachO::section Sect = O.getSection(Sec); return Sect.flags; } static Expected<MachOObjectFile::LoadCommandInfo> -getLoadCommandInfo(const MachOObjectFile *Obj, const char *Ptr, +getLoadCommandInfo(const MachOObjectFile &Obj, const char *Ptr, uint32_t LoadCommandIndex) { if (auto CmdOrErr = getStructOrErr<MachO::load_command>(Obj, Ptr)) { if (CmdOrErr->cmdsize < 8) @@ -181,31 +181,31 @@ getLoadCommandInfo(const MachOObjectFile *Obj, const char *Ptr, } static Expected<MachOObjectFile::LoadCommandInfo> -getFirstLoadCommandInfo(const MachOObjectFile *Obj) { - unsigned HeaderSize = Obj->is64Bit() ? sizeof(MachO::mach_header_64) - : sizeof(MachO::mach_header); - if (sizeof(MachOObjectFile::LoadCommandInfo) > Obj->getHeader().sizeofcmds) +getFirstLoadCommandInfo(const MachOObjectFile &Obj) { + unsigned HeaderSize = Obj.is64Bit() ? sizeof(MachO::mach_header_64) + : sizeof(MachO::mach_header); + if (sizeof(MachO::load_command) > Obj.getHeader().sizeofcmds) return malformedError("load command 0 extends past the end all load " "commands in the file"); return getLoadCommandInfo(Obj, getPtr(Obj, HeaderSize), 0); } static Expected<MachOObjectFile::LoadCommandInfo> -getNextLoadCommandInfo(const MachOObjectFile *Obj, uint32_t LoadCommandIndex, +getNextLoadCommandInfo(const MachOObjectFile &Obj, uint32_t LoadCommandIndex, const MachOObjectFile::LoadCommandInfo &L) { - unsigned HeaderSize = Obj->is64Bit() ? sizeof(MachO::mach_header_64) - : sizeof(MachO::mach_header); - if (L.Ptr + L.C.cmdsize + sizeof(MachOObjectFile::LoadCommandInfo) > - Obj->getData().data() + HeaderSize + Obj->getHeader().sizeofcmds) + unsigned HeaderSize = Obj.is64Bit() ? sizeof(MachO::mach_header_64) + : sizeof(MachO::mach_header); + if (L.Ptr + L.C.cmdsize + sizeof(MachO::load_command) > + Obj.getData().data() + HeaderSize + Obj.getHeader().sizeofcmds) return malformedError("load command " + Twine(LoadCommandIndex + 1) + " extends past the end all load commands in the file"); return getLoadCommandInfo(Obj, L.Ptr + L.C.cmdsize, LoadCommandIndex + 1); } template <typename T> -static void parseHeader(const MachOObjectFile *Obj, T &Header, +static void parseHeader(const MachOObjectFile &Obj, T &Header, Error &Err) { - if (sizeof(T) > Obj->getData().size()) { + if (sizeof(T) > Obj.getData().size()) { Err = malformedError("the mach header extends past the end of the " "file"); return; @@ -216,31 +216,160 @@ static void parseHeader(const MachOObjectFile *Obj, T &Header, Err = HeaderOrErr.takeError(); } +// This is used to check for overlapping of Mach-O elements. +struct MachOElement { + uint64_t Offset; + uint64_t Size; + const char *Name; +}; + +static Error checkOverlappingElement(std::list<MachOElement> &Elements, + uint64_t Offset, uint64_t Size, + const char *Name) { + if (Size == 0) + return Error::success(); + + for (auto it=Elements.begin() ; it != Elements.end(); ++it) { + auto E = *it; + if ((Offset >= E.Offset && Offset < E.Offset + E.Size) || + (Offset + Size > E.Offset && Offset + Size < E.Offset + E.Size) || + (Offset <= E.Offset && Offset + Size >= E.Offset + E.Size)) + return malformedError(Twine(Name) + " at offset " + Twine(Offset) + + " with a size of " + Twine(Size) + ", overlaps " + + E.Name + " at offset " + Twine(E.Offset) + " with " + "a size of " + Twine(E.Size)); + auto nt = it; + nt++; + if (nt != Elements.end()) { + auto N = *nt; + if (Offset + Size <= N.Offset) { + Elements.insert(nt, {Offset, Size, Name}); + return Error::success(); + } + } + } + Elements.push_back({Offset, Size, Name}); + return Error::success(); +} + // Parses LC_SEGMENT or LC_SEGMENT_64 load command, adds addresses of all // sections to \param Sections, and optionally sets // \param IsPageZeroSegment to true. -template <typename SegmentCmd> +template <typename Segment, typename Section> static Error parseSegmentLoadCommand( - const MachOObjectFile *Obj, const MachOObjectFile::LoadCommandInfo &Load, + const MachOObjectFile &Obj, const MachOObjectFile::LoadCommandInfo &Load, SmallVectorImpl<const char *> &Sections, bool &IsPageZeroSegment, - uint32_t LoadCommandIndex, const char *CmdName) { - const unsigned SegmentLoadSize = sizeof(SegmentCmd); + uint32_t LoadCommandIndex, const char *CmdName, uint64_t SizeOfHeaders, + std::list<MachOElement> &Elements) { + const unsigned SegmentLoadSize = sizeof(Segment); if (Load.C.cmdsize < SegmentLoadSize) return malformedError("load command " + Twine(LoadCommandIndex) + " " + CmdName + " cmdsize too small"); - if (auto SegOrErr = getStructOrErr<SegmentCmd>(Obj, Load.Ptr)) { - SegmentCmd S = SegOrErr.get(); - const unsigned SectionSize = - Obj->is64Bit() ? sizeof(MachO::section_64) : sizeof(MachO::section); + if (auto SegOrErr = getStructOrErr<Segment>(Obj, Load.Ptr)) { + Segment S = SegOrErr.get(); + const unsigned SectionSize = sizeof(Section); + uint64_t FileSize = Obj.getData().size(); if (S.nsects > std::numeric_limits<uint32_t>::max() / SectionSize || S.nsects * SectionSize > Load.C.cmdsize - SegmentLoadSize) return malformedError("load command " + Twine(LoadCommandIndex) + - " inconsistent cmdsize in " + CmdName + + " inconsistent cmdsize in " + CmdName + " for the number of sections"); for (unsigned J = 0; J < S.nsects; ++J) { const char *Sec = getSectionPtr(Obj, Load, J); Sections.push_back(Sec); + Section s = getStruct<Section>(Obj, Sec); + if (Obj.getHeader().filetype != MachO::MH_DYLIB_STUB && + Obj.getHeader().filetype != MachO::MH_DSYM && + s.flags != MachO::S_ZEROFILL && + s.flags != MachO::S_THREAD_LOCAL_ZEROFILL && + s.offset > FileSize) + return malformedError("offset field of section " + Twine(J) + " in " + + CmdName + " command " + Twine(LoadCommandIndex) + + " extends past the end of the file"); + if (Obj.getHeader().filetype != MachO::MH_DYLIB_STUB && + Obj.getHeader().filetype != MachO::MH_DSYM && + s.flags != MachO::S_ZEROFILL && + s.flags != MachO::S_THREAD_LOCAL_ZEROFILL && S.fileoff == 0 && + s.offset < SizeOfHeaders && s.size != 0) + return malformedError("offset field of section " + Twine(J) + " in " + + CmdName + " command " + Twine(LoadCommandIndex) + + " not past the headers of the file"); + uint64_t BigSize = s.offset; + BigSize += s.size; + if (Obj.getHeader().filetype != MachO::MH_DYLIB_STUB && + Obj.getHeader().filetype != MachO::MH_DSYM && + s.flags != MachO::S_ZEROFILL && + s.flags != MachO::S_THREAD_LOCAL_ZEROFILL && + BigSize > FileSize) + return malformedError("offset field plus size field of section " + + Twine(J) + " in " + CmdName + " command " + + Twine(LoadCommandIndex) + + " extends past the end of the file"); + if (Obj.getHeader().filetype != MachO::MH_DYLIB_STUB && + Obj.getHeader().filetype != MachO::MH_DSYM && + s.flags != MachO::S_ZEROFILL && + s.flags != MachO::S_THREAD_LOCAL_ZEROFILL && + s.size > S.filesize) + return malformedError("size field of section " + + Twine(J) + " in " + CmdName + " command " + + Twine(LoadCommandIndex) + + " greater than the segment"); + if (Obj.getHeader().filetype != MachO::MH_DYLIB_STUB && + Obj.getHeader().filetype != MachO::MH_DSYM && s.size != 0 && + s.addr < S.vmaddr) + return malformedError("addr field of section " + Twine(J) + " in " + + CmdName + " command " + Twine(LoadCommandIndex) + + " less than the segment's vmaddr"); + BigSize = s.addr; + BigSize += s.size; + uint64_t BigEnd = S.vmaddr; + BigEnd += S.vmsize; + if (S.vmsize != 0 && s.size != 0 && BigSize > BigEnd) + return malformedError("addr field plus size of section " + Twine(J) + + " in " + CmdName + " command " + + Twine(LoadCommandIndex) + + " greater than than " + "the segment's vmaddr plus vmsize"); + if (Obj.getHeader().filetype != MachO::MH_DYLIB_STUB && + Obj.getHeader().filetype != MachO::MH_DSYM && + s.flags != MachO::S_ZEROFILL && + s.flags != MachO::S_THREAD_LOCAL_ZEROFILL) + if (Error Err = checkOverlappingElement(Elements, s.offset, s.size, + "section contents")) + return Err; + if (s.reloff > FileSize) + return malformedError("reloff field of section " + Twine(J) + " in " + + CmdName + " command " + Twine(LoadCommandIndex) + + " extends past the end of the file"); + BigSize = s.nreloc; + BigSize *= sizeof(struct MachO::relocation_info); + BigSize += s.reloff; + if (BigSize > FileSize) + return malformedError("reloff field plus nreloc field times sizeof(" + "struct relocation_info) of section " + + Twine(J) + " in " + CmdName + " command " + + Twine(LoadCommandIndex) + + " extends past the end of the file"); + if (Error Err = checkOverlappingElement(Elements, s.reloff, s.nreloc * + sizeof(struct + MachO::relocation_info), + "section relocation entries")) + return Err; } + if (S.fileoff > FileSize) + return malformedError("load command " + Twine(LoadCommandIndex) + + " fileoff field in " + CmdName + + " extends past the end of the file"); + uint64_t BigSize = S.fileoff; + BigSize += S.filesize; + if (BigSize > FileSize) + return malformedError("load command " + Twine(LoadCommandIndex) + + " fileoff field plus filesize field in " + + CmdName + " extends past the end of the file"); + if (S.vmsize != 0 && S.filesize > S.vmsize) + return malformedError("load command " + Twine(LoadCommandIndex) + + " fileoff field in " + CmdName + + " greater than vmsize field"); IsPageZeroSegment |= StringRef("__PAGEZERO").equals(S.segname); } else return SegOrErr.takeError(); @@ -248,54 +377,771 @@ static Error parseSegmentLoadCommand( return Error::success(); } +static Error checkSymtabCommand(const MachOObjectFile &Obj, + const MachOObjectFile::LoadCommandInfo &Load, + uint32_t LoadCommandIndex, + const char **SymtabLoadCmd, + std::list<MachOElement> &Elements) { + if (Load.C.cmdsize < sizeof(MachO::symtab_command)) + return malformedError("load command " + Twine(LoadCommandIndex) + + " LC_SYMTAB cmdsize too small"); + if (*SymtabLoadCmd != nullptr) + return malformedError("more than one LC_SYMTAB command"); + MachO::symtab_command Symtab = + getStruct<MachO::symtab_command>(Obj, Load.Ptr); + if (Symtab.cmdsize != sizeof(MachO::symtab_command)) + return malformedError("LC_SYMTAB command " + Twine(LoadCommandIndex) + + " has incorrect cmdsize"); + uint64_t FileSize = Obj.getData().size(); + if (Symtab.symoff > FileSize) + return malformedError("symoff field of LC_SYMTAB command " + + Twine(LoadCommandIndex) + " extends past the end " + "of the file"); + uint64_t SymtabSize = Symtab.nsyms; + const char *struct_nlist_name; + if (Obj.is64Bit()) { + SymtabSize *= sizeof(MachO::nlist_64); + struct_nlist_name = "struct nlist_64"; + } else { + SymtabSize *= sizeof(MachO::nlist); + struct_nlist_name = "struct nlist"; + } + uint64_t BigSize = SymtabSize; + BigSize += Symtab.symoff; + if (BigSize > FileSize) + return malformedError("symoff field plus nsyms field times sizeof(" + + Twine(struct_nlist_name) + ") of LC_SYMTAB command " + + Twine(LoadCommandIndex) + " extends past the end " + "of the file"); + if (Error Err = checkOverlappingElement(Elements, Symtab.symoff, SymtabSize, + "symbol table")) + return Err; + if (Symtab.stroff > FileSize) + return malformedError("stroff field of LC_SYMTAB command " + + Twine(LoadCommandIndex) + " extends past the end " + "of the file"); + BigSize = Symtab.stroff; + BigSize += Symtab.strsize; + if (BigSize > FileSize) + return malformedError("stroff field plus strsize field of LC_SYMTAB " + "command " + Twine(LoadCommandIndex) + " extends " + "past the end of the file"); + if (Error Err = checkOverlappingElement(Elements, Symtab.stroff, + Symtab.strsize, "string table")) + return Err; + *SymtabLoadCmd = Load.Ptr; + return Error::success(); +} + +static Error checkDysymtabCommand(const MachOObjectFile &Obj, + const MachOObjectFile::LoadCommandInfo &Load, + uint32_t LoadCommandIndex, + const char **DysymtabLoadCmd, + std::list<MachOElement> &Elements) { + if (Load.C.cmdsize < sizeof(MachO::dysymtab_command)) + return malformedError("load command " + Twine(LoadCommandIndex) + + " LC_DYSYMTAB cmdsize too small"); + if (*DysymtabLoadCmd != nullptr) + return malformedError("more than one LC_DYSYMTAB command"); + MachO::dysymtab_command Dysymtab = + getStruct<MachO::dysymtab_command>(Obj, Load.Ptr); + if (Dysymtab.cmdsize != sizeof(MachO::dysymtab_command)) + return malformedError("LC_DYSYMTAB command " + Twine(LoadCommandIndex) + + " has incorrect cmdsize"); + uint64_t FileSize = Obj.getData().size(); + if (Dysymtab.tocoff > FileSize) + return malformedError("tocoff field of LC_DYSYMTAB command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + uint64_t BigSize = Dysymtab.ntoc; + BigSize *= sizeof(MachO::dylib_table_of_contents); + BigSize += Dysymtab.tocoff; + if (BigSize > FileSize) + return malformedError("tocoff field plus ntoc field times sizeof(struct " + "dylib_table_of_contents) of LC_DYSYMTAB command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + if (Error Err = checkOverlappingElement(Elements, Dysymtab.tocoff, + Dysymtab.ntoc * sizeof(struct + MachO::dylib_table_of_contents), + "table of contents")) + return Err; + if (Dysymtab.modtaboff > FileSize) + return malformedError("modtaboff field of LC_DYSYMTAB command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + BigSize = Dysymtab.nmodtab; + const char *struct_dylib_module_name; + uint64_t sizeof_modtab; + if (Obj.is64Bit()) { + sizeof_modtab = sizeof(MachO::dylib_module_64); + struct_dylib_module_name = "struct dylib_module_64"; + } else { + sizeof_modtab = sizeof(MachO::dylib_module); + struct_dylib_module_name = "struct dylib_module"; + } + BigSize *= sizeof_modtab; + BigSize += Dysymtab.modtaboff; + if (BigSize > FileSize) + return malformedError("modtaboff field plus nmodtab field times sizeof(" + + Twine(struct_dylib_module_name) + ") of LC_DYSYMTAB " + "command " + Twine(LoadCommandIndex) + " extends " + "past the end of the file"); + if (Error Err = checkOverlappingElement(Elements, Dysymtab.modtaboff, + Dysymtab.nmodtab * sizeof_modtab, + "module table")) + return Err; + if (Dysymtab.extrefsymoff > FileSize) + return malformedError("extrefsymoff field of LC_DYSYMTAB command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + BigSize = Dysymtab.nextrefsyms; + BigSize *= sizeof(MachO::dylib_reference); + BigSize += Dysymtab.extrefsymoff; + if (BigSize > FileSize) + return malformedError("extrefsymoff field plus nextrefsyms field times " + "sizeof(struct dylib_reference) of LC_DYSYMTAB " + "command " + Twine(LoadCommandIndex) + " extends " + "past the end of the file"); + if (Error Err = checkOverlappingElement(Elements, Dysymtab.extrefsymoff, + Dysymtab.nextrefsyms * + sizeof(MachO::dylib_reference), + "reference table")) + return Err; + if (Dysymtab.indirectsymoff > FileSize) + return malformedError("indirectsymoff field of LC_DYSYMTAB command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + BigSize = Dysymtab.nindirectsyms; + BigSize *= sizeof(uint32_t); + BigSize += Dysymtab.indirectsymoff; + if (BigSize > FileSize) + return malformedError("indirectsymoff field plus nindirectsyms field times " + "sizeof(uint32_t) of LC_DYSYMTAB command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + if (Error Err = checkOverlappingElement(Elements, Dysymtab.indirectsymoff, + Dysymtab.nindirectsyms * + sizeof(uint32_t), + "indirect table")) + return Err; + if (Dysymtab.extreloff > FileSize) + return malformedError("extreloff field of LC_DYSYMTAB command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + BigSize = Dysymtab.nextrel; + BigSize *= sizeof(MachO::relocation_info); + BigSize += Dysymtab.extreloff; + if (BigSize > FileSize) + return malformedError("extreloff field plus nextrel field times sizeof" + "(struct relocation_info) of LC_DYSYMTAB command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + if (Error Err = checkOverlappingElement(Elements, Dysymtab.extreloff, + Dysymtab.nextrel * + sizeof(MachO::relocation_info), + "external relocation table")) + return Err; + if (Dysymtab.locreloff > FileSize) + return malformedError("locreloff field of LC_DYSYMTAB command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + BigSize = Dysymtab.nlocrel; + BigSize *= sizeof(MachO::relocation_info); + BigSize += Dysymtab.locreloff; + if (BigSize > FileSize) + return malformedError("locreloff field plus nlocrel field times sizeof" + "(struct relocation_info) of LC_DYSYMTAB command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + if (Error Err = checkOverlappingElement(Elements, Dysymtab.locreloff, + Dysymtab.nlocrel * + sizeof(MachO::relocation_info), + "local relocation table")) + return Err; + *DysymtabLoadCmd = Load.Ptr; + return Error::success(); +} + +static Error checkLinkeditDataCommand(const MachOObjectFile &Obj, + const MachOObjectFile::LoadCommandInfo &Load, + uint32_t LoadCommandIndex, + const char **LoadCmd, const char *CmdName, + std::list<MachOElement> &Elements, + const char *ElementName) { + if (Load.C.cmdsize < sizeof(MachO::linkedit_data_command)) + return malformedError("load command " + Twine(LoadCommandIndex) + " " + + CmdName + " cmdsize too small"); + if (*LoadCmd != nullptr) + return malformedError("more than one " + Twine(CmdName) + " command"); + MachO::linkedit_data_command LinkData = + getStruct<MachO::linkedit_data_command>(Obj, Load.Ptr); + if (LinkData.cmdsize != sizeof(MachO::linkedit_data_command)) + return malformedError(Twine(CmdName) + " command " + + Twine(LoadCommandIndex) + " has incorrect cmdsize"); + uint64_t FileSize = Obj.getData().size(); + if (LinkData.dataoff > FileSize) + return malformedError("dataoff field of " + Twine(CmdName) + " command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + uint64_t BigSize = LinkData.dataoff; + BigSize += LinkData.datasize; + if (BigSize > FileSize) + return malformedError("dataoff field plus datasize field of " + + Twine(CmdName) + " command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + if (Error Err = checkOverlappingElement(Elements, LinkData.dataoff, + LinkData.datasize, ElementName)) + return Err; + *LoadCmd = Load.Ptr; + return Error::success(); +} + +static Error checkDyldInfoCommand(const MachOObjectFile &Obj, + const MachOObjectFile::LoadCommandInfo &Load, + uint32_t LoadCommandIndex, + const char **LoadCmd, const char *CmdName, + std::list<MachOElement> &Elements) { + if (Load.C.cmdsize < sizeof(MachO::dyld_info_command)) + return malformedError("load command " + Twine(LoadCommandIndex) + " " + + CmdName + " cmdsize too small"); + if (*LoadCmd != nullptr) + return malformedError("more than one LC_DYLD_INFO and or LC_DYLD_INFO_ONLY " + "command"); + MachO::dyld_info_command DyldInfo = + getStruct<MachO::dyld_info_command>(Obj, Load.Ptr); + if (DyldInfo.cmdsize != sizeof(MachO::dyld_info_command)) + return malformedError(Twine(CmdName) + " command " + + Twine(LoadCommandIndex) + " has incorrect cmdsize"); + uint64_t FileSize = Obj.getData().size(); + if (DyldInfo.rebase_off > FileSize) + return malformedError("rebase_off field of " + Twine(CmdName) + + " command " + Twine(LoadCommandIndex) + " extends " + "past the end of the file"); + uint64_t BigSize = DyldInfo.rebase_off; + BigSize += DyldInfo.rebase_size; + if (BigSize > FileSize) + return malformedError("rebase_off field plus rebase_size field of " + + Twine(CmdName) + " command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + if (Error Err = checkOverlappingElement(Elements, DyldInfo.rebase_off, + DyldInfo.rebase_size, + "dyld rebase info")) + return Err; + if (DyldInfo.bind_off > FileSize) + return malformedError("bind_off field of " + Twine(CmdName) + + " command " + Twine(LoadCommandIndex) + " extends " + "past the end of the file"); + BigSize = DyldInfo.bind_off; + BigSize += DyldInfo.bind_size; + if (BigSize > FileSize) + return malformedError("bind_off field plus bind_size field of " + + Twine(CmdName) + " command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + if (Error Err = checkOverlappingElement(Elements, DyldInfo.bind_off, + DyldInfo.bind_size, + "dyld bind info")) + return Err; + if (DyldInfo.weak_bind_off > FileSize) + return malformedError("weak_bind_off field of " + Twine(CmdName) + + " command " + Twine(LoadCommandIndex) + " extends " + "past the end of the file"); + BigSize = DyldInfo.weak_bind_off; + BigSize += DyldInfo.weak_bind_size; + if (BigSize > FileSize) + return malformedError("weak_bind_off field plus weak_bind_size field of " + + Twine(CmdName) + " command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + if (Error Err = checkOverlappingElement(Elements, DyldInfo.weak_bind_off, + DyldInfo.weak_bind_size, + "dyld weak bind info")) + return Err; + if (DyldInfo.lazy_bind_off > FileSize) + return malformedError("lazy_bind_off field of " + Twine(CmdName) + + " command " + Twine(LoadCommandIndex) + " extends " + "past the end of the file"); + BigSize = DyldInfo.lazy_bind_off; + BigSize += DyldInfo.lazy_bind_size; + if (BigSize > FileSize) + return malformedError("lazy_bind_off field plus lazy_bind_size field of " + + Twine(CmdName) + " command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + if (Error Err = checkOverlappingElement(Elements, DyldInfo.lazy_bind_off, + DyldInfo.lazy_bind_size, + "dyld lazy bind info")) + return Err; + if (DyldInfo.export_off > FileSize) + return malformedError("export_off field of " + Twine(CmdName) + + " command " + Twine(LoadCommandIndex) + " extends " + "past the end of the file"); + BigSize = DyldInfo.export_off; + BigSize += DyldInfo.export_size; + if (BigSize > FileSize) + return malformedError("export_off field plus export_size field of " + + Twine(CmdName) + " command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + if (Error Err = checkOverlappingElement(Elements, DyldInfo.export_off, + DyldInfo.export_size, + "dyld export info")) + return Err; + *LoadCmd = Load.Ptr; + return Error::success(); +} + +static Error checkDylibCommand(const MachOObjectFile &Obj, + const MachOObjectFile::LoadCommandInfo &Load, + uint32_t LoadCommandIndex, const char *CmdName) { + if (Load.C.cmdsize < sizeof(MachO::dylib_command)) + return malformedError("load command " + Twine(LoadCommandIndex) + " " + + CmdName + " cmdsize too small"); + MachO::dylib_command D = getStruct<MachO::dylib_command>(Obj, Load.Ptr); + if (D.dylib.name < sizeof(MachO::dylib_command)) + return malformedError("load command " + Twine(LoadCommandIndex) + " " + + CmdName + " name.offset field too small, not past " + "the end of the dylib_command struct"); + if (D.dylib.name >= D.cmdsize) + return malformedError("load command " + Twine(LoadCommandIndex) + " " + + CmdName + " name.offset field extends past the end " + "of the load command"); + // Make sure there is a null between the starting offset of the name and + // the end of the load command. + uint32_t i; + const char *P = (const char *)Load.Ptr; + for (i = D.dylib.name; i < D.cmdsize; i++) + if (P[i] == '\0') + break; + if (i >= D.cmdsize) + return malformedError("load command " + Twine(LoadCommandIndex) + " " + + CmdName + " library name extends past the end of the " + "load command"); + return Error::success(); +} + +static Error checkDylibIdCommand(const MachOObjectFile &Obj, + const MachOObjectFile::LoadCommandInfo &Load, + uint32_t LoadCommandIndex, + const char **LoadCmd) { + if (Error Err = checkDylibCommand(Obj, Load, LoadCommandIndex, + "LC_ID_DYLIB")) + return Err; + if (*LoadCmd != nullptr) + return malformedError("more than one LC_ID_DYLIB command"); + if (Obj.getHeader().filetype != MachO::MH_DYLIB && + Obj.getHeader().filetype != MachO::MH_DYLIB_STUB) + return malformedError("LC_ID_DYLIB load command in non-dynamic library " + "file type"); + *LoadCmd = Load.Ptr; + return Error::success(); +} + +static Error checkDyldCommand(const MachOObjectFile &Obj, + const MachOObjectFile::LoadCommandInfo &Load, + uint32_t LoadCommandIndex, const char *CmdName) { + if (Load.C.cmdsize < sizeof(MachO::dylinker_command)) + return malformedError("load command " + Twine(LoadCommandIndex) + " " + + CmdName + " cmdsize too small"); + MachO::dylinker_command D = getStruct<MachO::dylinker_command>(Obj, Load.Ptr); + if (D.name < sizeof(MachO::dylinker_command)) + return malformedError("load command " + Twine(LoadCommandIndex) + " " + + CmdName + " name.offset field too small, not past " + "the end of the dylinker_command struct"); + if (D.name >= D.cmdsize) + return malformedError("load command " + Twine(LoadCommandIndex) + " " + + CmdName + " name.offset field extends past the end " + "of the load command"); + // Make sure there is a null between the starting offset of the name and + // the end of the load command. + uint32_t i; + const char *P = (const char *)Load.Ptr; + for (i = D.name; i < D.cmdsize; i++) + if (P[i] == '\0') + break; + if (i >= D.cmdsize) + return malformedError("load command " + Twine(LoadCommandIndex) + " " + + CmdName + " dyld name extends past the end of the " + "load command"); + return Error::success(); +} + +static Error checkVersCommand(const MachOObjectFile &Obj, + const MachOObjectFile::LoadCommandInfo &Load, + uint32_t LoadCommandIndex, + const char **LoadCmd, const char *CmdName) { + if (Load.C.cmdsize != sizeof(MachO::version_min_command)) + return malformedError("load command " + Twine(LoadCommandIndex) + " " + + CmdName + " has incorrect cmdsize"); + if (*LoadCmd != nullptr) + return malformedError("more than one LC_VERSION_MIN_MACOSX, " + "LC_VERSION_MIN_IPHONEOS, LC_VERSION_MIN_TVOS or " + "LC_VERSION_MIN_WATCHOS command"); + *LoadCmd = Load.Ptr; + return Error::success(); +} + +static Error checkRpathCommand(const MachOObjectFile &Obj, + const MachOObjectFile::LoadCommandInfo &Load, + uint32_t LoadCommandIndex) { + if (Load.C.cmdsize < sizeof(MachO::rpath_command)) + return malformedError("load command " + Twine(LoadCommandIndex) + + " LC_RPATH cmdsize too small"); + MachO::rpath_command R = getStruct<MachO::rpath_command>(Obj, Load.Ptr); + if (R.path < sizeof(MachO::rpath_command)) + return malformedError("load command " + Twine(LoadCommandIndex) + + " LC_RPATH path.offset field too small, not past " + "the end of the rpath_command struct"); + if (R.path >= R.cmdsize) + return malformedError("load command " + Twine(LoadCommandIndex) + + " LC_RPATH path.offset field extends past the end " + "of the load command"); + // Make sure there is a null between the starting offset of the path and + // the end of the load command. + uint32_t i; + const char *P = (const char *)Load.Ptr; + for (i = R.path; i < R.cmdsize; i++) + if (P[i] == '\0') + break; + if (i >= R.cmdsize) + return malformedError("load command " + Twine(LoadCommandIndex) + + " LC_RPATH library name extends past the end of the " + "load command"); + return Error::success(); +} + +static Error checkEncryptCommand(const MachOObjectFile &Obj, + const MachOObjectFile::LoadCommandInfo &Load, + uint32_t LoadCommandIndex, + uint64_t cryptoff, uint64_t cryptsize, + const char **LoadCmd, const char *CmdName) { + if (*LoadCmd != nullptr) + return malformedError("more than one LC_ENCRYPTION_INFO and or " + "LC_ENCRYPTION_INFO_64 command"); + uint64_t FileSize = Obj.getData().size(); + if (cryptoff > FileSize) + return malformedError("cryptoff field of " + Twine(CmdName) + + " command " + Twine(LoadCommandIndex) + " extends " + "past the end of the file"); + uint64_t BigSize = cryptoff; + BigSize += cryptsize; + if (BigSize > FileSize) + return malformedError("cryptoff field plus cryptsize field of " + + Twine(CmdName) + " command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + *LoadCmd = Load.Ptr; + return Error::success(); +} + +static Error checkLinkerOptCommand(const MachOObjectFile &Obj, + const MachOObjectFile::LoadCommandInfo &Load, + uint32_t LoadCommandIndex) { + if (Load.C.cmdsize < sizeof(MachO::linker_option_command)) + return malformedError("load command " + Twine(LoadCommandIndex) + + " LC_LINKER_OPTION cmdsize too small"); + MachO::linker_option_command L = + getStruct<MachO::linker_option_command>(Obj, Load.Ptr); + // Make sure the count of strings is correct. + const char *string = (const char *)Load.Ptr + + sizeof(struct MachO::linker_option_command); + uint32_t left = L.cmdsize - sizeof(struct MachO::linker_option_command); + uint32_t i = 0; + while (left > 0) { + while (*string == '\0' && left > 0) { + string++; + left--; + } + if (left > 0) { + i++; + uint32_t NullPos = StringRef(string, left).find('\0'); + uint32_t len = std::min(NullPos, left) + 1; + string += len; + left -= len; + } + } + if (L.count != i) + return malformedError("load command " + Twine(LoadCommandIndex) + + " LC_LINKER_OPTION string count " + Twine(L.count) + + " does not match number of strings"); + return Error::success(); +} + +static Error checkSubCommand(const MachOObjectFile &Obj, + const MachOObjectFile::LoadCommandInfo &Load, + uint32_t LoadCommandIndex, const char *CmdName, + size_t SizeOfCmd, const char *CmdStructName, + uint32_t PathOffset, const char *PathFieldName) { + if (PathOffset < SizeOfCmd) + return malformedError("load command " + Twine(LoadCommandIndex) + " " + + CmdName + " " + PathFieldName + ".offset field too " + "small, not past the end of the " + CmdStructName); + if (PathOffset >= Load.C.cmdsize) + return malformedError("load command " + Twine(LoadCommandIndex) + " " + + CmdName + " " + PathFieldName + ".offset field " + "extends past the end of the load command"); + // Make sure there is a null between the starting offset of the path and + // the end of the load command. + uint32_t i; + const char *P = (const char *)Load.Ptr; + for (i = PathOffset; i < Load.C.cmdsize; i++) + if (P[i] == '\0') + break; + if (i >= Load.C.cmdsize) + return malformedError("load command " + Twine(LoadCommandIndex) + " " + + CmdName + " " + PathFieldName + " name extends past " + "the end of the load command"); + return Error::success(); +} + +static Error checkThreadCommand(const MachOObjectFile &Obj, + const MachOObjectFile::LoadCommandInfo &Load, + uint32_t LoadCommandIndex, + const char *CmdName) { + if (Load.C.cmdsize < sizeof(MachO::thread_command)) + return malformedError("load command " + Twine(LoadCommandIndex) + + CmdName + " cmdsize too small"); + MachO::thread_command T = + getStruct<MachO::thread_command>(Obj, Load.Ptr); + const char *state = Load.Ptr + sizeof(MachO::thread_command); + const char *end = Load.Ptr + T.cmdsize; + uint32_t nflavor = 0; + uint32_t cputype = getCPUType(Obj); + while (state < end) { + if(state + sizeof(uint32_t) > end) + return malformedError("load command " + Twine(LoadCommandIndex) + + "flavor in " + CmdName + " extends past end of " + "command"); + uint32_t flavor; + memcpy(&flavor, state, sizeof(uint32_t)); + if (Obj.isLittleEndian() != sys::IsLittleEndianHost) + sys::swapByteOrder(flavor); + state += sizeof(uint32_t); + + if(state + sizeof(uint32_t) > end) + return malformedError("load command " + Twine(LoadCommandIndex) + + " count in " + CmdName + " extends past end of " + "command"); + uint32_t count; + memcpy(&count, state, sizeof(uint32_t)); + if (Obj.isLittleEndian() != sys::IsLittleEndianHost) + sys::swapByteOrder(count); + state += sizeof(uint32_t); + + if (cputype == MachO::CPU_TYPE_X86_64) { + if (flavor == MachO::x86_THREAD_STATE64) { + if (count != MachO::x86_THREAD_STATE64_COUNT) + return malformedError("load command " + Twine(LoadCommandIndex) + + " count not x86_THREAD_STATE64_COUNT for " + "flavor number " + Twine(nflavor) + " which is " + "a x86_THREAD_STATE64 flavor in " + CmdName + + " command"); + if (state + sizeof(MachO::x86_thread_state64_t) > end) + return malformedError("load command " + Twine(LoadCommandIndex) + + " x86_THREAD_STATE64 extends past end of " + "command in " + CmdName + " command"); + state += sizeof(MachO::x86_thread_state64_t); + } else { + return malformedError("load command " + Twine(LoadCommandIndex) + + " unknown flavor (" + Twine(flavor) + ") for " + "flavor number " + Twine(nflavor) + " in " + + CmdName + " command"); + } + } else if (cputype == MachO::CPU_TYPE_ARM) { + if (flavor == MachO::ARM_THREAD_STATE) { + if (count != MachO::ARM_THREAD_STATE_COUNT) + return malformedError("load command " + Twine(LoadCommandIndex) + + " count not ARM_THREAD_STATE_COUNT for " + "flavor number " + Twine(nflavor) + " which is " + "a ARM_THREAD_STATE flavor in " + CmdName + + " command"); + if (state + sizeof(MachO::arm_thread_state32_t) > end) + return malformedError("load command " + Twine(LoadCommandIndex) + + " ARM_THREAD_STATE extends past end of " + "command in " + CmdName + " command"); + state += sizeof(MachO::arm_thread_state32_t); + } else { + return malformedError("load command " + Twine(LoadCommandIndex) + + " unknown flavor (" + Twine(flavor) + ") for " + "flavor number " + Twine(nflavor) + " in " + + CmdName + " command"); + } + } else if (cputype == MachO::CPU_TYPE_ARM64) { + if (flavor == MachO::ARM_THREAD_STATE64) { + if (count != MachO::ARM_THREAD_STATE64_COUNT) + return malformedError("load command " + Twine(LoadCommandIndex) + + " count not ARM_THREAD_STATE64_COUNT for " + "flavor number " + Twine(nflavor) + " which is " + "a ARM_THREAD_STATE64 flavor in " + CmdName + + " command"); + if (state + sizeof(MachO::arm_thread_state64_t) > end) + return malformedError("load command " + Twine(LoadCommandIndex) + + " ARM_THREAD_STATE64 extends past end of " + "command in " + CmdName + " command"); + state += sizeof(MachO::arm_thread_state64_t); + } else { + return malformedError("load command " + Twine(LoadCommandIndex) + + " unknown flavor (" + Twine(flavor) + ") for " + "flavor number " + Twine(nflavor) + " in " + + CmdName + " command"); + } + } else if (cputype == MachO::CPU_TYPE_POWERPC) { + if (flavor == MachO::PPC_THREAD_STATE) { + if (count != MachO::PPC_THREAD_STATE_COUNT) + return malformedError("load command " + Twine(LoadCommandIndex) + + " count not PPC_THREAD_STATE_COUNT for " + "flavor number " + Twine(nflavor) + " which is " + "a PPC_THREAD_STATE flavor in " + CmdName + + " command"); + if (state + sizeof(MachO::ppc_thread_state32_t) > end) + return malformedError("load command " + Twine(LoadCommandIndex) + + " PPC_THREAD_STATE extends past end of " + "command in " + CmdName + " command"); + state += sizeof(MachO::ppc_thread_state32_t); + } else { + return malformedError("load command " + Twine(LoadCommandIndex) + + " unknown flavor (" + Twine(flavor) + ") for " + "flavor number " + Twine(nflavor) + " in " + + CmdName + " command"); + } + } else { + return malformedError("unknown cputype (" + Twine(cputype) + ") load " + "command " + Twine(LoadCommandIndex) + " for " + + CmdName + " command can't be checked"); + } + nflavor++; + } + return Error::success(); +} + +static Error checkTwoLevelHintsCommand(const MachOObjectFile &Obj, + const MachOObjectFile::LoadCommandInfo + &Load, + uint32_t LoadCommandIndex, + const char **LoadCmd, + std::list<MachOElement> &Elements) { + if (Load.C.cmdsize != sizeof(MachO::twolevel_hints_command)) + return malformedError("load command " + Twine(LoadCommandIndex) + + " LC_TWOLEVEL_HINTS has incorrect cmdsize"); + if (*LoadCmd != nullptr) + return malformedError("more than one LC_TWOLEVEL_HINTS command"); + MachO::twolevel_hints_command Hints = + getStruct<MachO::twolevel_hints_command>(Obj, Load.Ptr); + uint64_t FileSize = Obj.getData().size(); + if (Hints.offset > FileSize) + return malformedError("offset field of LC_TWOLEVEL_HINTS command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + uint64_t BigSize = Hints.nhints; + BigSize *= Hints.nhints * sizeof(MachO::twolevel_hint); + BigSize += Hints.offset; + if (BigSize > FileSize) + return malformedError("offset field plus nhints times sizeof(struct " + "twolevel_hint) field of LC_TWOLEVEL_HINTS command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + if (Error Err = checkOverlappingElement(Elements, Hints.offset, Hints.nhints * + sizeof(MachO::twolevel_hint), + "two level hints")) + return Err; + *LoadCmd = Load.Ptr; + return Error::success(); +} + +// Returns true if the libObject code does not support the load command and its +// contents. The cmd value it is treated as an unknown load command but with +// an error message that says the cmd value is obsolete. +static bool isLoadCommandObsolete(uint32_t cmd) { + if (cmd == MachO::LC_SYMSEG || + cmd == MachO::LC_LOADFVMLIB || + cmd == MachO::LC_IDFVMLIB || + cmd == MachO::LC_IDENT || + cmd == MachO::LC_FVMFILE || + cmd == MachO::LC_PREPAGE || + cmd == MachO::LC_PREBOUND_DYLIB || + cmd == MachO::LC_TWOLEVEL_HINTS || + cmd == MachO::LC_PREBIND_CKSUM) + return true; + return false; +} + Expected<std::unique_ptr<MachOObjectFile>> MachOObjectFile::create(MemoryBufferRef Object, bool IsLittleEndian, - bool Is64Bits) { - Error Err; + bool Is64Bits, uint32_t UniversalCputype, + uint32_t UniversalIndex) { + Error Err = Error::success(); std::unique_ptr<MachOObjectFile> Obj( new MachOObjectFile(std::move(Object), IsLittleEndian, - Is64Bits, Err)); + Is64Bits, Err, UniversalCputype, + UniversalIndex)); if (Err) return std::move(Err); return std::move(Obj); } MachOObjectFile::MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian, - bool Is64bits, Error &Err) + bool Is64bits, Error &Err, + uint32_t UniversalCputype, + uint32_t UniversalIndex) : ObjectFile(getMachOType(IsLittleEndian, Is64bits), Object), SymtabLoadCmd(nullptr), DysymtabLoadCmd(nullptr), DataInCodeLoadCmd(nullptr), LinkOptHintsLoadCmd(nullptr), DyldInfoLoadCmd(nullptr), UuidLoadCmd(nullptr), HasPageZeroSegment(false) { - ErrorAsOutParameter ErrAsOutParam(Err); - uint64_t BigSize; + ErrorAsOutParameter ErrAsOutParam(&Err); + uint64_t SizeOfHeaders; + uint32_t cputype; if (is64Bit()) { - parseHeader(this, Header64, Err); - BigSize = sizeof(MachO::mach_header_64); + parseHeader(*this, Header64, Err); + SizeOfHeaders = sizeof(MachO::mach_header_64); + cputype = Header64.cputype; } else { - parseHeader(this, Header, Err); - BigSize = sizeof(MachO::mach_header); + parseHeader(*this, Header, Err); + SizeOfHeaders = sizeof(MachO::mach_header); + cputype = Header.cputype; } if (Err) return; - BigSize += getHeader().sizeofcmds; - if (getData().data() + BigSize > getData().end()) { + SizeOfHeaders += getHeader().sizeofcmds; + if (getData().data() + SizeOfHeaders > getData().end()) { Err = malformedError("load commands extend past the end of the file"); return; } - - uint32_t LoadCommandCount = getHeader().ncmds; - if (LoadCommandCount == 0) + if (UniversalCputype != 0 && cputype != UniversalCputype) { + Err = malformedError("universal header architecture: " + + Twine(UniversalIndex) + "'s cputype does not match " + "object file's mach header"); return; + } + std::list<MachOElement> Elements; + Elements.push_back({0, SizeOfHeaders, "Mach-O headers"}); + uint32_t LoadCommandCount = getHeader().ncmds; LoadCommandInfo Load; - if (auto LoadOrErr = getFirstLoadCommandInfo(this)) - Load = *LoadOrErr; - else { - Err = LoadOrErr.takeError(); - return; + if (LoadCommandCount != 0) { + if (auto LoadOrErr = getFirstLoadCommandInfo(*this)) + Load = *LoadOrErr; + else { + Err = LoadOrErr.takeError(); + return; + } } + const char *DyldIdLoadCmd = nullptr; + const char *FuncStartsLoadCmd = nullptr; + const char *SplitInfoLoadCmd = nullptr; + const char *CodeSignDrsLoadCmd = nullptr; + const char *CodeSignLoadCmd = nullptr; + const char *VersLoadCmd = nullptr; + const char *SourceLoadCmd = nullptr; + const char *EntryPointLoadCmd = nullptr; + const char *EncryptLoadCmd = nullptr; + const char *RoutinesLoadCmd = nullptr; + const char *UnixThreadLoadCmd = nullptr; + const char *TwoLevelHintsLoadCmd = nullptr; for (unsigned I = 0; I < LoadCommandCount; ++I) { if (is64Bit()) { if (Load.C.cmdsize % 8 != 0) { @@ -318,66 +1164,274 @@ MachOObjectFile::MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian, } LoadCommands.push_back(Load); if (Load.C.cmd == MachO::LC_SYMTAB) { - // Multiple symbol tables - if (SymtabLoadCmd) { - Err = malformedError("Multiple symbol tables"); + if ((Err = checkSymtabCommand(*this, Load, I, &SymtabLoadCmd, Elements))) return; - } - SymtabLoadCmd = Load.Ptr; } else if (Load.C.cmd == MachO::LC_DYSYMTAB) { - // Multiple dynamic symbol tables - if (DysymtabLoadCmd) { - Err = malformedError("Multiple dynamic symbol tables"); + if ((Err = checkDysymtabCommand(*this, Load, I, &DysymtabLoadCmd, + Elements))) return; - } - DysymtabLoadCmd = Load.Ptr; } else if (Load.C.cmd == MachO::LC_DATA_IN_CODE) { - // Multiple data in code tables - if (DataInCodeLoadCmd) { - Err = malformedError("Multiple data-in-code tables"); + if ((Err = checkLinkeditDataCommand(*this, Load, I, &DataInCodeLoadCmd, + "LC_DATA_IN_CODE", Elements, + "data in code info"))) return; - } - DataInCodeLoadCmd = Load.Ptr; } else if (Load.C.cmd == MachO::LC_LINKER_OPTIMIZATION_HINT) { - // Multiple linker optimization hint tables - if (LinkOptHintsLoadCmd) { - Err = malformedError("Multiple linker optimization hint tables"); + if ((Err = checkLinkeditDataCommand(*this, Load, I, &LinkOptHintsLoadCmd, + "LC_LINKER_OPTIMIZATION_HINT", + Elements, "linker optimization " + "hints"))) return; - } - LinkOptHintsLoadCmd = Load.Ptr; - } else if (Load.C.cmd == MachO::LC_DYLD_INFO || - Load.C.cmd == MachO::LC_DYLD_INFO_ONLY) { - // Multiple dyldinfo load commands - if (DyldInfoLoadCmd) { - Err = malformedError("Multiple dyldinfo load commands"); + } else if (Load.C.cmd == MachO::LC_FUNCTION_STARTS) { + if ((Err = checkLinkeditDataCommand(*this, Load, I, &FuncStartsLoadCmd, + "LC_FUNCTION_STARTS", Elements, + "function starts data"))) + return; + } else if (Load.C.cmd == MachO::LC_SEGMENT_SPLIT_INFO) { + if ((Err = checkLinkeditDataCommand(*this, Load, I, &SplitInfoLoadCmd, + "LC_SEGMENT_SPLIT_INFO", Elements, + "split info data"))) + return; + } else if (Load.C.cmd == MachO::LC_DYLIB_CODE_SIGN_DRS) { + if ((Err = checkLinkeditDataCommand(*this, Load, I, &CodeSignDrsLoadCmd, + "LC_DYLIB_CODE_SIGN_DRS", Elements, + "code signing RDs data"))) + return; + } else if (Load.C.cmd == MachO::LC_CODE_SIGNATURE) { + if ((Err = checkLinkeditDataCommand(*this, Load, I, &CodeSignLoadCmd, + "LC_CODE_SIGNATURE", Elements, + "code signature data"))) + return; + } else if (Load.C.cmd == MachO::LC_DYLD_INFO) { + if ((Err = checkDyldInfoCommand(*this, Load, I, &DyldInfoLoadCmd, + "LC_DYLD_INFO", Elements))) + return; + } else if (Load.C.cmd == MachO::LC_DYLD_INFO_ONLY) { + if ((Err = checkDyldInfoCommand(*this, Load, I, &DyldInfoLoadCmd, + "LC_DYLD_INFO_ONLY", Elements))) return; - } - DyldInfoLoadCmd = Load.Ptr; } else if (Load.C.cmd == MachO::LC_UUID) { - // Multiple UUID load commands + if (Load.C.cmdsize != sizeof(MachO::uuid_command)) { + Err = malformedError("LC_UUID command " + Twine(I) + " has incorrect " + "cmdsize"); + return; + } if (UuidLoadCmd) { - Err = malformedError("Multiple UUID load commands"); + Err = malformedError("more than one LC_UUID command"); return; } UuidLoadCmd = Load.Ptr; } else if (Load.C.cmd == MachO::LC_SEGMENT_64) { - if ((Err = parseSegmentLoadCommand<MachO::segment_command_64>( - this, Load, Sections, HasPageZeroSegment, I, - "LC_SEGMENT_64"))) + if ((Err = parseSegmentLoadCommand<MachO::segment_command_64, + MachO::section_64>( + *this, Load, Sections, HasPageZeroSegment, I, + "LC_SEGMENT_64", SizeOfHeaders, Elements))) return; } else if (Load.C.cmd == MachO::LC_SEGMENT) { - if ((Err = parseSegmentLoadCommand<MachO::segment_command>( - this, Load, Sections, HasPageZeroSegment, I, "LC_SEGMENT"))) + if ((Err = parseSegmentLoadCommand<MachO::segment_command, + MachO::section>( + *this, Load, Sections, HasPageZeroSegment, I, + "LC_SEGMENT", SizeOfHeaders, Elements))) + return; + } else if (Load.C.cmd == MachO::LC_ID_DYLIB) { + if ((Err = checkDylibIdCommand(*this, Load, I, &DyldIdLoadCmd))) + return; + } else if (Load.C.cmd == MachO::LC_LOAD_DYLIB) { + if ((Err = checkDylibCommand(*this, Load, I, "LC_LOAD_DYLIB"))) + return; + Libraries.push_back(Load.Ptr); + } else if (Load.C.cmd == MachO::LC_LOAD_WEAK_DYLIB) { + if ((Err = checkDylibCommand(*this, Load, I, "LC_LOAD_WEAK_DYLIB"))) + return; + Libraries.push_back(Load.Ptr); + } else if (Load.C.cmd == MachO::LC_LAZY_LOAD_DYLIB) { + if ((Err = checkDylibCommand(*this, Load, I, "LC_LAZY_LOAD_DYLIB"))) + return; + Libraries.push_back(Load.Ptr); + } else if (Load.C.cmd == MachO::LC_REEXPORT_DYLIB) { + if ((Err = checkDylibCommand(*this, Load, I, "LC_REEXPORT_DYLIB"))) return; - } else if (Load.C.cmd == MachO::LC_LOAD_DYLIB || - Load.C.cmd == MachO::LC_LOAD_WEAK_DYLIB || - Load.C.cmd == MachO::LC_LAZY_LOAD_DYLIB || - Load.C.cmd == MachO::LC_REEXPORT_DYLIB || - Load.C.cmd == MachO::LC_LOAD_UPWARD_DYLIB) { Libraries.push_back(Load.Ptr); + } else if (Load.C.cmd == MachO::LC_LOAD_UPWARD_DYLIB) { + if ((Err = checkDylibCommand(*this, Load, I, "LC_LOAD_UPWARD_DYLIB"))) + return; + Libraries.push_back(Load.Ptr); + } else if (Load.C.cmd == MachO::LC_ID_DYLINKER) { + if ((Err = checkDyldCommand(*this, Load, I, "LC_ID_DYLINKER"))) + return; + } else if (Load.C.cmd == MachO::LC_LOAD_DYLINKER) { + if ((Err = checkDyldCommand(*this, Load, I, "LC_LOAD_DYLINKER"))) + return; + } else if (Load.C.cmd == MachO::LC_DYLD_ENVIRONMENT) { + if ((Err = checkDyldCommand(*this, Load, I, "LC_DYLD_ENVIRONMENT"))) + return; + } else if (Load.C.cmd == MachO::LC_VERSION_MIN_MACOSX) { + if ((Err = checkVersCommand(*this, Load, I, &VersLoadCmd, + "LC_VERSION_MIN_MACOSX"))) + return; + } else if (Load.C.cmd == MachO::LC_VERSION_MIN_IPHONEOS) { + if ((Err = checkVersCommand(*this, Load, I, &VersLoadCmd, + "LC_VERSION_MIN_IPHONEOS"))) + return; + } else if (Load.C.cmd == MachO::LC_VERSION_MIN_TVOS) { + if ((Err = checkVersCommand(*this, Load, I, &VersLoadCmd, + "LC_VERSION_MIN_TVOS"))) + return; + } else if (Load.C.cmd == MachO::LC_VERSION_MIN_WATCHOS) { + if ((Err = checkVersCommand(*this, Load, I, &VersLoadCmd, + "LC_VERSION_MIN_WATCHOS"))) + return; + } else if (Load.C.cmd == MachO::LC_RPATH) { + if ((Err = checkRpathCommand(*this, Load, I))) + return; + } else if (Load.C.cmd == MachO::LC_SOURCE_VERSION) { + if (Load.C.cmdsize != sizeof(MachO::source_version_command)) { + Err = malformedError("LC_SOURCE_VERSION command " + Twine(I) + + " has incorrect cmdsize"); + return; + } + if (SourceLoadCmd) { + Err = malformedError("more than one LC_SOURCE_VERSION command"); + return; + } + SourceLoadCmd = Load.Ptr; + } else if (Load.C.cmd == MachO::LC_MAIN) { + if (Load.C.cmdsize != sizeof(MachO::entry_point_command)) { + Err = malformedError("LC_MAIN command " + Twine(I) + + " has incorrect cmdsize"); + return; + } + if (EntryPointLoadCmd) { + Err = malformedError("more than one LC_MAIN command"); + return; + } + EntryPointLoadCmd = Load.Ptr; + } else if (Load.C.cmd == MachO::LC_ENCRYPTION_INFO) { + if (Load.C.cmdsize != sizeof(MachO::encryption_info_command)) { + Err = malformedError("LC_ENCRYPTION_INFO command " + Twine(I) + + " has incorrect cmdsize"); + return; + } + MachO::encryption_info_command E = + getStruct<MachO::encryption_info_command>(*this, Load.Ptr); + if ((Err = checkEncryptCommand(*this, Load, I, E.cryptoff, E.cryptsize, + &EncryptLoadCmd, "LC_ENCRYPTION_INFO"))) + return; + } else if (Load.C.cmd == MachO::LC_ENCRYPTION_INFO_64) { + if (Load.C.cmdsize != sizeof(MachO::encryption_info_command_64)) { + Err = malformedError("LC_ENCRYPTION_INFO_64 command " + Twine(I) + + " has incorrect cmdsize"); + return; + } + MachO::encryption_info_command_64 E = + getStruct<MachO::encryption_info_command_64>(*this, Load.Ptr); + if ((Err = checkEncryptCommand(*this, Load, I, E.cryptoff, E.cryptsize, + &EncryptLoadCmd, "LC_ENCRYPTION_INFO_64"))) + return; + } else if (Load.C.cmd == MachO::LC_LINKER_OPTION) { + if ((Err = checkLinkerOptCommand(*this, Load, I))) + return; + } else if (Load.C.cmd == MachO::LC_SUB_FRAMEWORK) { + if (Load.C.cmdsize < sizeof(MachO::sub_framework_command)) { + Err = malformedError("load command " + Twine(I) + + " LC_SUB_FRAMEWORK cmdsize too small"); + return; + } + MachO::sub_framework_command S = + getStruct<MachO::sub_framework_command>(*this, Load.Ptr); + if ((Err = checkSubCommand(*this, Load, I, "LC_SUB_FRAMEWORK", + sizeof(MachO::sub_framework_command), + "sub_framework_command", S.umbrella, + "umbrella"))) + return; + } else if (Load.C.cmd == MachO::LC_SUB_UMBRELLA) { + if (Load.C.cmdsize < sizeof(MachO::sub_umbrella_command)) { + Err = malformedError("load command " + Twine(I) + + " LC_SUB_UMBRELLA cmdsize too small"); + return; + } + MachO::sub_umbrella_command S = + getStruct<MachO::sub_umbrella_command>(*this, Load.Ptr); + if ((Err = checkSubCommand(*this, Load, I, "LC_SUB_UMBRELLA", + sizeof(MachO::sub_umbrella_command), + "sub_umbrella_command", S.sub_umbrella, + "sub_umbrella"))) + return; + } else if (Load.C.cmd == MachO::LC_SUB_LIBRARY) { + if (Load.C.cmdsize < sizeof(MachO::sub_library_command)) { + Err = malformedError("load command " + Twine(I) + + " LC_SUB_LIBRARY cmdsize too small"); + return; + } + MachO::sub_library_command S = + getStruct<MachO::sub_library_command>(*this, Load.Ptr); + if ((Err = checkSubCommand(*this, Load, I, "LC_SUB_LIBRARY", + sizeof(MachO::sub_library_command), + "sub_library_command", S.sub_library, + "sub_library"))) + return; + } else if (Load.C.cmd == MachO::LC_SUB_CLIENT) { + if (Load.C.cmdsize < sizeof(MachO::sub_client_command)) { + Err = malformedError("load command " + Twine(I) + + " LC_SUB_CLIENT cmdsize too small"); + return; + } + MachO::sub_client_command S = + getStruct<MachO::sub_client_command>(*this, Load.Ptr); + if ((Err = checkSubCommand(*this, Load, I, "LC_SUB_CLIENT", + sizeof(MachO::sub_client_command), + "sub_client_command", S.client, "client"))) + return; + } else if (Load.C.cmd == MachO::LC_ROUTINES) { + if (Load.C.cmdsize != sizeof(MachO::routines_command)) { + Err = malformedError("LC_ROUTINES command " + Twine(I) + + " has incorrect cmdsize"); + return; + } + if (RoutinesLoadCmd) { + Err = malformedError("more than one LC_ROUTINES and or LC_ROUTINES_64 " + "command"); + return; + } + RoutinesLoadCmd = Load.Ptr; + } else if (Load.C.cmd == MachO::LC_ROUTINES_64) { + if (Load.C.cmdsize != sizeof(MachO::routines_command_64)) { + Err = malformedError("LC_ROUTINES_64 command " + Twine(I) + + " has incorrect cmdsize"); + return; + } + if (RoutinesLoadCmd) { + Err = malformedError("more than one LC_ROUTINES_64 and or LC_ROUTINES " + "command"); + return; + } + RoutinesLoadCmd = Load.Ptr; + } else if (Load.C.cmd == MachO::LC_UNIXTHREAD) { + if ((Err = checkThreadCommand(*this, Load, I, "LC_UNIXTHREAD"))) + return; + if (UnixThreadLoadCmd) { + Err = malformedError("more than one LC_UNIXTHREAD command"); + return; + } + UnixThreadLoadCmd = Load.Ptr; + } else if (Load.C.cmd == MachO::LC_THREAD) { + if ((Err = checkThreadCommand(*this, Load, I, "LC_THREAD"))) + return; + // Note: LC_TWOLEVEL_HINTS is really obsolete and is not supported. + } else if (Load.C.cmd == MachO::LC_TWOLEVEL_HINTS) { + if ((Err = checkTwoLevelHintsCommand(*this, Load, I, + &TwoLevelHintsLoadCmd, Elements))) + return; + } else if (isLoadCommandObsolete(Load.C.cmd)) { + Err = malformedError("load command " + Twine(I) + " for cmd value of: " + + Twine(Load.C.cmd) + " is obsolete and not " + "supported"); + return; } + // TODO: generate a error for unknown load commands by default. But still + // need work out an approach to allow or not allow unknown values like this + // as an option for some uses like lldb. if (I < LoadCommandCount - 1) { - if (auto LoadOrErr = getNextLoadCommandInfo(this, I, Load)) + if (auto LoadOrErr = getNextLoadCommandInfo(*this, I, Load)) Load = *LoadOrErr; else { Err = LoadOrErr.takeError(); @@ -393,9 +1447,9 @@ MachOObjectFile::MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian, } } else if (DysymtabLoadCmd) { MachO::symtab_command Symtab = - getStruct<MachO::symtab_command>(this, SymtabLoadCmd); + getStruct<MachO::symtab_command>(*this, SymtabLoadCmd); MachO::dysymtab_command Dysymtab = - getStruct<MachO::dysymtab_command>(this, DysymtabLoadCmd); + getStruct<MachO::dysymtab_command>(*this, DysymtabLoadCmd); if (Dysymtab.nlocalsym != 0 && Dysymtab.ilocalsym > Symtab.nsyms) { Err = malformedError("ilocalsym in LC_DYSYMTAB load command " "extends past the end of the symbol table"); @@ -434,11 +1488,86 @@ MachOObjectFile::MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian, return; } } + if ((getHeader().filetype == MachO::MH_DYLIB || + getHeader().filetype == MachO::MH_DYLIB_STUB) && + DyldIdLoadCmd == nullptr) { + Err = malformedError("no LC_ID_DYLIB load command in dynamic library " + "filetype"); + return; + } assert(LoadCommands.size() == LoadCommandCount); Err = Error::success(); } +Error MachOObjectFile::checkSymbolTable() const { + uint32_t Flags = 0; + if (is64Bit()) { + MachO::mach_header_64 H_64 = MachOObjectFile::getHeader64(); + Flags = H_64.flags; + } else { + MachO::mach_header H = MachOObjectFile::getHeader(); + Flags = H.flags; + } + uint8_t NType = 0; + uint8_t NSect = 0; + uint16_t NDesc = 0; + uint32_t NStrx = 0; + uint64_t NValue = 0; + uint32_t SymbolIndex = 0; + MachO::symtab_command S = getSymtabLoadCommand(); + for (const SymbolRef &Symbol : symbols()) { + DataRefImpl SymDRI = Symbol.getRawDataRefImpl(); + if (is64Bit()) { + MachO::nlist_64 STE_64 = getSymbol64TableEntry(SymDRI); + NType = STE_64.n_type; + NSect = STE_64.n_sect; + NDesc = STE_64.n_desc; + NStrx = STE_64.n_strx; + NValue = STE_64.n_value; + } else { + MachO::nlist STE = getSymbolTableEntry(SymDRI); + NType = STE.n_type; + NType = STE.n_type; + NSect = STE.n_sect; + NDesc = STE.n_desc; + NStrx = STE.n_strx; + NValue = STE.n_value; + } + if ((NType & MachO::N_STAB) == 0 && + (NType & MachO::N_TYPE) == MachO::N_SECT) { + if (NSect == 0 || NSect > Sections.size()) + return malformedError("bad section index: " + Twine((int)NSect) + + " for symbol at index " + Twine(SymbolIndex)); + } + if ((NType & MachO::N_STAB) == 0 && + (NType & MachO::N_TYPE) == MachO::N_INDR) { + if (NValue >= S.strsize) + return malformedError("bad n_value: " + Twine((int)NValue) + " past " + "the end of string table, for N_INDR symbol at " + "index " + Twine(SymbolIndex)); + } + if ((Flags & MachO::MH_TWOLEVEL) == MachO::MH_TWOLEVEL && + (((NType & MachO::N_TYPE) == MachO::N_UNDF && NValue == 0) || + (NType & MachO::N_TYPE) == MachO::N_PBUD)) { + uint32_t LibraryOrdinal = MachO::GET_LIBRARY_ORDINAL(NDesc); + if (LibraryOrdinal != 0 && + LibraryOrdinal != MachO::EXECUTABLE_ORDINAL && + LibraryOrdinal != MachO::DYNAMIC_LOOKUP_ORDINAL && + LibraryOrdinal - 1 >= Libraries.size() ) { + return malformedError("bad library ordinal: " + Twine(LibraryOrdinal) + + " for symbol at index " + Twine(SymbolIndex)); + } + } + if (NStrx >= S.strsize) + return malformedError("bad string table index: " + Twine((int)NStrx) + + " past the end of string table, for symbol at " + "index " + Twine(SymbolIndex)); + SymbolIndex++; + } + return Error::success(); +} + void MachOObjectFile::moveSymbolNext(DataRefImpl &Symb) const { unsigned SymbolTableEntrySize = is64Bit() ? sizeof(MachO::nlist_64) : @@ -448,7 +1577,7 @@ void MachOObjectFile::moveSymbolNext(DataRefImpl &Symb) const { Expected<StringRef> MachOObjectFile::getSymbolName(DataRefImpl Symb) const { StringRef StringTable = getStringTableData(); - MachO::nlist_base Entry = getSymbolTableEntryBase(this, Symb); + MachO::nlist_base Entry = getSymbolTableEntryBase(*this, Symb); const char *Start = &StringTable.data()[Entry.n_strx]; if (Start < getData().begin() || Start >= getData().end()) { return malformedError("bad string index: " + Twine(Entry.n_strx) + @@ -459,7 +1588,7 @@ Expected<StringRef> MachOObjectFile::getSymbolName(DataRefImpl Symb) const { unsigned MachOObjectFile::getSectionType(SectionRef Sec) const { DataRefImpl DRI = Sec.getRawDataRefImpl(); - uint32_t Flags = getSectionFlags(this, DRI); + uint32_t Flags = getSectionFlags(*this, DRI); return Flags & MachO::SECTION_TYPE; } @@ -477,7 +1606,7 @@ uint64_t MachOObjectFile::getNValue(DataRefImpl Sym) const { std::error_code MachOObjectFile::getIndirectName(DataRefImpl Symb, StringRef &Res) const { StringRef StringTable = getStringTableData(); - MachO::nlist_base Entry = getSymbolTableEntryBase(this, Symb); + MachO::nlist_base Entry = getSymbolTableEntryBase(*this, Symb); if ((Entry.n_type & MachO::N_TYPE) != MachO::N_INDR) return object_error::parse_failed; uint64_t NValue = getNValue(Symb); @@ -499,7 +1628,7 @@ Expected<uint64_t> MachOObjectFile::getSymbolAddress(DataRefImpl Sym) const { uint32_t MachOObjectFile::getSymbolAlignment(DataRefImpl DRI) const { uint32_t flags = getSymbolFlags(DRI); if (flags & SymbolRef::SF_Common) { - MachO::nlist_base Entry = getSymbolTableEntryBase(this, DRI); + MachO::nlist_base Entry = getSymbolTableEntryBase(*this, DRI); return 1 << MachO::GET_COMM_ALIGN(Entry.n_desc); } return 0; @@ -511,7 +1640,7 @@ uint64_t MachOObjectFile::getCommonSymbolSizeImpl(DataRefImpl DRI) const { Expected<SymbolRef::Type> MachOObjectFile::getSymbolType(DataRefImpl Symb) const { - MachO::nlist_base Entry = getSymbolTableEntryBase(this, Symb); + MachO::nlist_base Entry = getSymbolTableEntryBase(*this, Symb); uint8_t n_type = Entry.n_type; // If this is a STAB debugging symbol, we can do nothing more. @@ -534,7 +1663,7 @@ MachOObjectFile::getSymbolType(DataRefImpl Symb) const { } uint32_t MachOObjectFile::getSymbolFlags(DataRefImpl DRI) const { - MachO::nlist_base Entry = getSymbolTableEntryBase(this, DRI); + MachO::nlist_base Entry = getSymbolTableEntryBase(*this, DRI); uint8_t MachOType = Entry.n_type; uint16_t MachOFlags = Entry.n_desc; @@ -574,7 +1703,7 @@ uint32_t MachOObjectFile::getSymbolFlags(DataRefImpl DRI) const { Expected<section_iterator> MachOObjectFile::getSymbolSection(DataRefImpl Symb) const { - MachO::nlist_base Entry = getSymbolTableEntryBase(this, Symb); + MachO::nlist_base Entry = getSymbolTableEntryBase(*this, Symb); uint8_t index = Entry.n_sect; if (index == 0) @@ -590,7 +1719,7 @@ MachOObjectFile::getSymbolSection(DataRefImpl Symb) const { unsigned MachOObjectFile::getSymbolSectionID(SymbolRef Sym) const { MachO::nlist_base Entry = - getSymbolTableEntryBase(this, Sym.getRawDataRefImpl()); + getSymbolTableEntryBase(*this, Sym.getRawDataRefImpl()); return Entry.n_sect - 1; } @@ -677,12 +1806,12 @@ bool MachOObjectFile::isSectionCompressed(DataRefImpl Sec) const { } bool MachOObjectFile::isSectionText(DataRefImpl Sec) const { - uint32_t Flags = getSectionFlags(this, Sec); + uint32_t Flags = getSectionFlags(*this, Sec); return Flags & MachO::S_ATTR_PURE_INSTRUCTIONS; } bool MachOObjectFile::isSectionData(DataRefImpl Sec) const { - uint32_t Flags = getSectionFlags(this, Sec); + uint32_t Flags = getSectionFlags(*this, Sec); unsigned SectionType = Flags & MachO::SECTION_TYPE; return !(Flags & MachO::S_ATTR_PURE_INSTRUCTIONS) && !(SectionType == MachO::S_ZEROFILL || @@ -690,7 +1819,7 @@ bool MachOObjectFile::isSectionData(DataRefImpl Sec) const { } bool MachOObjectFile::isSectionBSS(DataRefImpl Sec) const { - uint32_t Flags = getSectionFlags(this, Sec); + uint32_t Flags = getSectionFlags(*this, Sec); unsigned SectionType = Flags & MachO::SECTION_TYPE; return !(Flags & MachO::S_ATTR_PURE_INSTRUCTIONS) && (SectionType == MachO::S_ZEROFILL || @@ -766,7 +1895,7 @@ MachOObjectFile::getRelocationSymbol(DataRefImpl Rel) const { sizeof(MachO::nlist); uint64_t Offset = S.symoff + SymbolIdx * SymbolTableEntrySize; DataRefImpl Sym; - Sym.p = reinterpret_cast<uintptr_t>(getPtr(this, Offset)); + Sym.p = reinterpret_cast<uintptr_t>(getPtr(*this, Offset)); return symbol_iterator(SymbolRef(Sym, this)); } @@ -1051,7 +2180,7 @@ std::error_code MachOObjectFile::getLibraryShortNameByIndex(unsigned Index, if (LibrariesShortNames.size() == 0) { for (unsigned i = 0; i < Libraries.size(); i++) { MachO::dylib_command D = - getStruct<MachO::dylib_command>(this, Libraries[i]); + getStruct<MachO::dylib_command>(*this, Libraries[i]); if (D.dylib.name >= D.cmdsize) return object_error::parse_failed; const char *P = (const char *)(Libraries[i]) + D.dylib.name; @@ -1079,7 +2208,7 @@ MachOObjectFile::getRelocationRelocatedSection(relocation_iterator Rel) const { return section_iterator(SectionRef(Sec, this)); } -basic_symbol_iterator MachOObjectFile::symbol_begin_impl() const { +basic_symbol_iterator MachOObjectFile::symbol_begin() const { DataRefImpl DRI; MachO::symtab_command Symtab = getSymtabLoadCommand(); if (!SymtabLoadCmd || Symtab.nsyms == 0) @@ -1088,7 +2217,7 @@ basic_symbol_iterator MachOObjectFile::symbol_begin_impl() const { return getSymbolByIndex(0); } -basic_symbol_iterator MachOObjectFile::symbol_end_impl() const { +basic_symbol_iterator MachOObjectFile::symbol_end() const { DataRefImpl DRI; MachO::symtab_command Symtab = getSymtabLoadCommand(); if (!SymtabLoadCmd || Symtab.nsyms == 0) @@ -1099,7 +2228,7 @@ basic_symbol_iterator MachOObjectFile::symbol_end_impl() const { sizeof(MachO::nlist); unsigned Offset = Symtab.symoff + Symtab.nsyms * SymbolTableEntrySize; - DRI.p = reinterpret_cast<uintptr_t>(getPtr(this, Offset)); + DRI.p = reinterpret_cast<uintptr_t>(getPtr(*this, Offset)); return basic_symbol_iterator(SymbolRef(DRI, this)); } @@ -1110,7 +2239,7 @@ basic_symbol_iterator MachOObjectFile::getSymbolByIndex(unsigned Index) const { unsigned SymbolTableEntrySize = is64Bit() ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist); DataRefImpl DRI; - DRI.p = reinterpret_cast<uintptr_t>(getPtr(this, Symtab.symoff)); + DRI.p = reinterpret_cast<uintptr_t>(getPtr(*this, Symtab.symoff)); DRI.p += Index * SymbolTableEntrySize; return basic_symbol_iterator(SymbolRef(DRI, this)); } @@ -1122,7 +2251,7 @@ uint64_t MachOObjectFile::getSymbolIndex(DataRefImpl Symb) const { unsigned SymbolTableEntrySize = is64Bit() ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist); DataRefImpl DRIstart; - DRIstart.p = reinterpret_cast<uintptr_t>(getPtr(this, Symtab.symoff)); + DRIstart.p = reinterpret_cast<uintptr_t>(getPtr(*this, Symtab.symoff)); uint64_t Index = (Symb.p - DRIstart.p) / SymbolTableEntrySize; return Index; } @@ -1143,7 +2272,7 @@ uint8_t MachOObjectFile::getBytesInAddress() const { } StringRef MachOObjectFile::getFileFormatName() const { - unsigned CPUType = getCPUType(this); + unsigned CPUType = getCPUType(*this); if (!is64Bit()) { switch (CPUType) { case llvm::MachO::CPU_TYPE_I386: @@ -1189,14 +2318,19 @@ Triple::ArchType MachOObjectFile::getArch(uint32_t CPUType) { } Triple MachOObjectFile::getArchTriple(uint32_t CPUType, uint32_t CPUSubType, - const char **McpuDefault) { + const char **McpuDefault, + const char **ArchFlag) { if (McpuDefault) *McpuDefault = nullptr; + if (ArchFlag) + *ArchFlag = nullptr; switch (CPUType) { case MachO::CPU_TYPE_I386: switch (CPUSubType & ~MachO::CPU_SUBTYPE_MASK) { case MachO::CPU_SUBTYPE_I386_ALL: + if (ArchFlag) + *ArchFlag = "i386"; return Triple("i386-apple-darwin"); default: return Triple(); @@ -1204,8 +2338,12 @@ Triple MachOObjectFile::getArchTriple(uint32_t CPUType, uint32_t CPUSubType, case MachO::CPU_TYPE_X86_64: switch (CPUSubType & ~MachO::CPU_SUBTYPE_MASK) { case MachO::CPU_SUBTYPE_X86_64_ALL: + if (ArchFlag) + *ArchFlag = "x86_64"; return Triple("x86_64-apple-darwin"); case MachO::CPU_SUBTYPE_X86_64_H: + if (ArchFlag) + *ArchFlag = "x86_64h"; return Triple("x86_64h-apple-darwin"); default: return Triple(); @@ -1213,30 +2351,50 @@ Triple MachOObjectFile::getArchTriple(uint32_t CPUType, uint32_t CPUSubType, case MachO::CPU_TYPE_ARM: switch (CPUSubType & ~MachO::CPU_SUBTYPE_MASK) { case MachO::CPU_SUBTYPE_ARM_V4T: + if (ArchFlag) + *ArchFlag = "armv4t"; return Triple("armv4t-apple-darwin"); case MachO::CPU_SUBTYPE_ARM_V5TEJ: + if (ArchFlag) + *ArchFlag = "armv5e"; return Triple("armv5e-apple-darwin"); case MachO::CPU_SUBTYPE_ARM_XSCALE: + if (ArchFlag) + *ArchFlag = "xscale"; return Triple("xscale-apple-darwin"); case MachO::CPU_SUBTYPE_ARM_V6: + if (ArchFlag) + *ArchFlag = "armv6"; return Triple("armv6-apple-darwin"); case MachO::CPU_SUBTYPE_ARM_V6M: if (McpuDefault) *McpuDefault = "cortex-m0"; + if (ArchFlag) + *ArchFlag = "armv6m"; return Triple("armv6m-apple-darwin"); case MachO::CPU_SUBTYPE_ARM_V7: + if (ArchFlag) + *ArchFlag = "armv7"; return Triple("armv7-apple-darwin"); case MachO::CPU_SUBTYPE_ARM_V7EM: if (McpuDefault) *McpuDefault = "cortex-m4"; + if (ArchFlag) + *ArchFlag = "armv7em"; return Triple("thumbv7em-apple-darwin"); case MachO::CPU_SUBTYPE_ARM_V7K: + if (ArchFlag) + *ArchFlag = "armv7k"; return Triple("armv7k-apple-darwin"); case MachO::CPU_SUBTYPE_ARM_V7M: if (McpuDefault) *McpuDefault = "cortex-m3"; + if (ArchFlag) + *ArchFlag = "armv7m"; return Triple("thumbv7m-apple-darwin"); case MachO::CPU_SUBTYPE_ARM_V7S: + if (ArchFlag) + *ArchFlag = "armv7s"; return Triple("armv7s-apple-darwin"); default: return Triple(); @@ -1244,6 +2402,8 @@ Triple MachOObjectFile::getArchTriple(uint32_t CPUType, uint32_t CPUSubType, case MachO::CPU_TYPE_ARM64: switch (CPUSubType & ~MachO::CPU_SUBTYPE_MASK) { case MachO::CPU_SUBTYPE_ARM64_ALL: + if (ArchFlag) + *ArchFlag = "arm64"; return Triple("arm64-apple-darwin"); default: return Triple(); @@ -1251,6 +2411,8 @@ Triple MachOObjectFile::getArchTriple(uint32_t CPUType, uint32_t CPUSubType, case MachO::CPU_TYPE_POWERPC: switch (CPUSubType & ~MachO::CPU_SUBTYPE_MASK) { case MachO::CPU_SUBTYPE_POWERPC_ALL: + if (ArchFlag) + *ArchFlag = "ppc"; return Triple("ppc-apple-darwin"); default: return Triple(); @@ -1258,6 +2420,8 @@ Triple MachOObjectFile::getArchTriple(uint32_t CPUType, uint32_t CPUSubType, case MachO::CPU_TYPE_POWERPC64: switch (CPUSubType & ~MachO::CPU_SUBTYPE_MASK) { case MachO::CPU_SUBTYPE_POWERPC_ALL: + if (ArchFlag) + *ArchFlag = "ppc64"; return Triple("ppc64-apple-darwin"); default: return Triple(); @@ -1293,7 +2457,7 @@ bool MachOObjectFile::isValidArch(StringRef ArchFlag) { } unsigned MachOObjectFile::getArch() const { - return getArch(getCPUType(this)); + return getArch(getCPUType(*this)); } Triple MachOObjectFile::getArchTriple(const char **McpuDefault) const { @@ -1318,7 +2482,7 @@ dice_iterator MachOObjectFile::begin_dices() const { return dice_iterator(DiceRef(DRI, this)); MachO::linkedit_data_command DicLC = getDataInCodeLoadCommand(); - DRI.p = reinterpret_cast<uintptr_t>(getPtr(this, DicLC.dataoff)); + DRI.p = reinterpret_cast<uintptr_t>(getPtr(*this, DicLC.dataoff)); return dice_iterator(DiceRef(DRI, this)); } @@ -1329,7 +2493,7 @@ dice_iterator MachOObjectFile::end_dices() const { MachO::linkedit_data_command DicLC = getDataInCodeLoadCommand(); unsigned Offset = DicLC.dataoff + DicLC.datasize; - DRI.p = reinterpret_cast<uintptr_t>(getPtr(this, Offset)); + DRI.p = reinterpret_cast<uintptr_t>(getPtr(*this, Offset)); return dice_iterator(DiceRef(DRI, this)); } @@ -1659,7 +2823,11 @@ StringRef MachORebaseEntry::typeName() const { } bool MachORebaseEntry::operator==(const MachORebaseEntry &Other) const { +#ifdef EXPENSIVE_CHECKS assert(Opcodes == Other.Opcodes && "compare iterators of different files"); +#else + assert(Opcodes.data() == Other.Opcodes.data() && "compare iterators of different files"); +#endif return (Ptr == Other.Ptr) && (RemainingLoopCount == Other.RemainingLoopCount) && (Done == Other.Done); @@ -1909,7 +3077,11 @@ uint32_t MachOBindEntry::flags() const { return Flags; } int MachOBindEntry::ordinal() const { return Ordinal; } bool MachOBindEntry::operator==(const MachOBindEntry &Other) const { +#ifdef EXPENSIVE_CHECKS assert(Opcodes == Other.Opcodes && "compare iterators of different files"); +#else + assert(Opcodes.data() == Other.Opcodes.data() && "compare iterators of different files"); +#endif return (Ptr == Other.Ptr) && (RemainingLoopCount == Other.RemainingLoopCount) && (Done == Other.Done); @@ -1982,7 +3154,7 @@ MachOObjectFile::getSectionRawFinalSegmentName(DataRefImpl Sec) const { bool MachOObjectFile::isRelocationScattered(const MachO::any_relocation_info &RE) const { - if (getCPUType(this) == MachO::CPU_TYPE_X86_64) + if (getCPUType(*this) == MachO::CPU_TYPE_X86_64) return false; return getPlainRelocationAddress(RE) & MachO::R_SCATTERED; } @@ -2026,15 +3198,15 @@ unsigned MachOObjectFile::getAnyRelocationAddress( unsigned MachOObjectFile::getAnyRelocationPCRel( const MachO::any_relocation_info &RE) const { if (isRelocationScattered(RE)) - return getScatteredRelocationPCRel(this, RE); - return getPlainRelocationPCRel(this, RE); + return getScatteredRelocationPCRel(RE); + return getPlainRelocationPCRel(*this, RE); } unsigned MachOObjectFile::getAnyRelocationLength( const MachO::any_relocation_info &RE) const { if (isRelocationScattered(RE)) return getScatteredRelocationLength(RE); - return getPlainRelocationLength(this, RE); + return getPlainRelocationLength(*this, RE); } unsigned @@ -2042,7 +3214,7 @@ MachOObjectFile::getAnyRelocationType( const MachO::any_relocation_info &RE) const { if (isRelocationScattered(RE)) return getScatteredRelocationType(RE); - return getPlainRelocationType(this, RE); + return getPlainRelocationType(*this, RE); } SectionRef @@ -2060,141 +3232,141 @@ MachOObjectFile::getAnyRelocationSection( MachO::section MachOObjectFile::getSection(DataRefImpl DRI) const { assert(DRI.d.a < Sections.size() && "Should have detected this earlier"); - return getStruct<MachO::section>(this, Sections[DRI.d.a]); + return getStruct<MachO::section>(*this, Sections[DRI.d.a]); } MachO::section_64 MachOObjectFile::getSection64(DataRefImpl DRI) const { assert(DRI.d.a < Sections.size() && "Should have detected this earlier"); - return getStruct<MachO::section_64>(this, Sections[DRI.d.a]); + return getStruct<MachO::section_64>(*this, Sections[DRI.d.a]); } MachO::section MachOObjectFile::getSection(const LoadCommandInfo &L, unsigned Index) const { - const char *Sec = getSectionPtr(this, L, Index); - return getStruct<MachO::section>(this, Sec); + const char *Sec = getSectionPtr(*this, L, Index); + return getStruct<MachO::section>(*this, Sec); } MachO::section_64 MachOObjectFile::getSection64(const LoadCommandInfo &L, unsigned Index) const { - const char *Sec = getSectionPtr(this, L, Index); - return getStruct<MachO::section_64>(this, Sec); + const char *Sec = getSectionPtr(*this, L, Index); + return getStruct<MachO::section_64>(*this, Sec); } MachO::nlist MachOObjectFile::getSymbolTableEntry(DataRefImpl DRI) const { const char *P = reinterpret_cast<const char *>(DRI.p); - return getStruct<MachO::nlist>(this, P); + return getStruct<MachO::nlist>(*this, P); } MachO::nlist_64 MachOObjectFile::getSymbol64TableEntry(DataRefImpl DRI) const { const char *P = reinterpret_cast<const char *>(DRI.p); - return getStruct<MachO::nlist_64>(this, P); + return getStruct<MachO::nlist_64>(*this, P); } MachO::linkedit_data_command MachOObjectFile::getLinkeditDataLoadCommand(const LoadCommandInfo &L) const { - return getStruct<MachO::linkedit_data_command>(this, L.Ptr); + return getStruct<MachO::linkedit_data_command>(*this, L.Ptr); } MachO::segment_command MachOObjectFile::getSegmentLoadCommand(const LoadCommandInfo &L) const { - return getStruct<MachO::segment_command>(this, L.Ptr); + return getStruct<MachO::segment_command>(*this, L.Ptr); } MachO::segment_command_64 MachOObjectFile::getSegment64LoadCommand(const LoadCommandInfo &L) const { - return getStruct<MachO::segment_command_64>(this, L.Ptr); + return getStruct<MachO::segment_command_64>(*this, L.Ptr); } MachO::linker_option_command MachOObjectFile::getLinkerOptionLoadCommand(const LoadCommandInfo &L) const { - return getStruct<MachO::linker_option_command>(this, L.Ptr); + return getStruct<MachO::linker_option_command>(*this, L.Ptr); } MachO::version_min_command MachOObjectFile::getVersionMinLoadCommand(const LoadCommandInfo &L) const { - return getStruct<MachO::version_min_command>(this, L.Ptr); + return getStruct<MachO::version_min_command>(*this, L.Ptr); } MachO::dylib_command MachOObjectFile::getDylibIDLoadCommand(const LoadCommandInfo &L) const { - return getStruct<MachO::dylib_command>(this, L.Ptr); + return getStruct<MachO::dylib_command>(*this, L.Ptr); } MachO::dyld_info_command MachOObjectFile::getDyldInfoLoadCommand(const LoadCommandInfo &L) const { - return getStruct<MachO::dyld_info_command>(this, L.Ptr); + return getStruct<MachO::dyld_info_command>(*this, L.Ptr); } MachO::dylinker_command MachOObjectFile::getDylinkerCommand(const LoadCommandInfo &L) const { - return getStruct<MachO::dylinker_command>(this, L.Ptr); + return getStruct<MachO::dylinker_command>(*this, L.Ptr); } MachO::uuid_command MachOObjectFile::getUuidCommand(const LoadCommandInfo &L) const { - return getStruct<MachO::uuid_command>(this, L.Ptr); + return getStruct<MachO::uuid_command>(*this, L.Ptr); } MachO::rpath_command MachOObjectFile::getRpathCommand(const LoadCommandInfo &L) const { - return getStruct<MachO::rpath_command>(this, L.Ptr); + return getStruct<MachO::rpath_command>(*this, L.Ptr); } MachO::source_version_command MachOObjectFile::getSourceVersionCommand(const LoadCommandInfo &L) const { - return getStruct<MachO::source_version_command>(this, L.Ptr); + return getStruct<MachO::source_version_command>(*this, L.Ptr); } MachO::entry_point_command MachOObjectFile::getEntryPointCommand(const LoadCommandInfo &L) const { - return getStruct<MachO::entry_point_command>(this, L.Ptr); + return getStruct<MachO::entry_point_command>(*this, L.Ptr); } MachO::encryption_info_command MachOObjectFile::getEncryptionInfoCommand(const LoadCommandInfo &L) const { - return getStruct<MachO::encryption_info_command>(this, L.Ptr); + return getStruct<MachO::encryption_info_command>(*this, L.Ptr); } MachO::encryption_info_command_64 MachOObjectFile::getEncryptionInfoCommand64(const LoadCommandInfo &L) const { - return getStruct<MachO::encryption_info_command_64>(this, L.Ptr); + return getStruct<MachO::encryption_info_command_64>(*this, L.Ptr); } MachO::sub_framework_command MachOObjectFile::getSubFrameworkCommand(const LoadCommandInfo &L) const { - return getStruct<MachO::sub_framework_command>(this, L.Ptr); + return getStruct<MachO::sub_framework_command>(*this, L.Ptr); } MachO::sub_umbrella_command MachOObjectFile::getSubUmbrellaCommand(const LoadCommandInfo &L) const { - return getStruct<MachO::sub_umbrella_command>(this, L.Ptr); + return getStruct<MachO::sub_umbrella_command>(*this, L.Ptr); } MachO::sub_library_command MachOObjectFile::getSubLibraryCommand(const LoadCommandInfo &L) const { - return getStruct<MachO::sub_library_command>(this, L.Ptr); + return getStruct<MachO::sub_library_command>(*this, L.Ptr); } MachO::sub_client_command MachOObjectFile::getSubClientCommand(const LoadCommandInfo &L) const { - return getStruct<MachO::sub_client_command>(this, L.Ptr); + return getStruct<MachO::sub_client_command>(*this, L.Ptr); } MachO::routines_command MachOObjectFile::getRoutinesCommand(const LoadCommandInfo &L) const { - return getStruct<MachO::routines_command>(this, L.Ptr); + return getStruct<MachO::routines_command>(*this, L.Ptr); } MachO::routines_command_64 MachOObjectFile::getRoutinesCommand64(const LoadCommandInfo &L) const { - return getStruct<MachO::routines_command_64>(this, L.Ptr); + return getStruct<MachO::routines_command_64>(*this, L.Ptr); } MachO::thread_command MachOObjectFile::getThreadCommand(const LoadCommandInfo &L) const { - return getStruct<MachO::thread_command>(this, L.Ptr); + return getStruct<MachO::thread_command>(*this, L.Ptr); } MachO::any_relocation_info @@ -2211,15 +3383,15 @@ MachOObjectFile::getRelocation(DataRefImpl Rel) const { } auto P = reinterpret_cast<const MachO::any_relocation_info *>( - getPtr(this, Offset)) + Rel.d.b; + getPtr(*this, Offset)) + Rel.d.b; return getStruct<MachO::any_relocation_info>( - this, reinterpret_cast<const char *>(P)); + *this, reinterpret_cast<const char *>(P)); } MachO::data_in_code_entry MachOObjectFile::getDice(DataRefImpl Rel) const { const char *P = reinterpret_cast<const char *>(Rel.p); - return getStruct<MachO::data_in_code_entry>(this, P); + return getStruct<MachO::data_in_code_entry>(*this, P); } const MachO::mach_header &MachOObjectFile::getHeader() const { @@ -2235,19 +3407,19 @@ uint32_t MachOObjectFile::getIndirectSymbolTableEntry( const MachO::dysymtab_command &DLC, unsigned Index) const { uint64_t Offset = DLC.indirectsymoff + Index * sizeof(uint32_t); - return getStruct<uint32_t>(this, getPtr(this, Offset)); + return getStruct<uint32_t>(*this, getPtr(*this, Offset)); } MachO::data_in_code_entry MachOObjectFile::getDataInCodeTableEntry(uint32_t DataOffset, unsigned Index) const { uint64_t Offset = DataOffset + Index * sizeof(MachO::data_in_code_entry); - return getStruct<MachO::data_in_code_entry>(this, getPtr(this, Offset)); + return getStruct<MachO::data_in_code_entry>(*this, getPtr(*this, Offset)); } MachO::symtab_command MachOObjectFile::getSymtabLoadCommand() const { if (SymtabLoadCmd) - return getStruct<MachO::symtab_command>(this, SymtabLoadCmd); + return getStruct<MachO::symtab_command>(*this, SymtabLoadCmd); // If there is no SymtabLoadCmd return a load command with zero'ed fields. MachO::symtab_command Cmd; @@ -2262,7 +3434,7 @@ MachO::symtab_command MachOObjectFile::getSymtabLoadCommand() const { MachO::dysymtab_command MachOObjectFile::getDysymtabLoadCommand() const { if (DysymtabLoadCmd) - return getStruct<MachO::dysymtab_command>(this, DysymtabLoadCmd); + return getStruct<MachO::dysymtab_command>(*this, DysymtabLoadCmd); // If there is no DysymtabLoadCmd return a load command with zero'ed fields. MachO::dysymtab_command Cmd; @@ -2292,7 +3464,7 @@ MachO::dysymtab_command MachOObjectFile::getDysymtabLoadCommand() const { MachO::linkedit_data_command MachOObjectFile::getDataInCodeLoadCommand() const { if (DataInCodeLoadCmd) - return getStruct<MachO::linkedit_data_command>(this, DataInCodeLoadCmd); + return getStruct<MachO::linkedit_data_command>(*this, DataInCodeLoadCmd); // If there is no DataInCodeLoadCmd return a load command with zero'ed fields. MachO::linkedit_data_command Cmd; @@ -2306,7 +3478,7 @@ MachOObjectFile::getDataInCodeLoadCommand() const { MachO::linkedit_data_command MachOObjectFile::getLinkOptHintsLoadCommand() const { if (LinkOptHintsLoadCmd) - return getStruct<MachO::linkedit_data_command>(this, LinkOptHintsLoadCmd); + return getStruct<MachO::linkedit_data_command>(*this, LinkOptHintsLoadCmd); // If there is no LinkOptHintsLoadCmd return a load command with zero'ed // fields. @@ -2323,9 +3495,9 @@ ArrayRef<uint8_t> MachOObjectFile::getDyldInfoRebaseOpcodes() const { return None; MachO::dyld_info_command DyldInfo = - getStruct<MachO::dyld_info_command>(this, DyldInfoLoadCmd); + getStruct<MachO::dyld_info_command>(*this, DyldInfoLoadCmd); const uint8_t *Ptr = - reinterpret_cast<const uint8_t *>(getPtr(this, DyldInfo.rebase_off)); + reinterpret_cast<const uint8_t *>(getPtr(*this, DyldInfo.rebase_off)); return makeArrayRef(Ptr, DyldInfo.rebase_size); } @@ -2334,9 +3506,9 @@ ArrayRef<uint8_t> MachOObjectFile::getDyldInfoBindOpcodes() const { return None; MachO::dyld_info_command DyldInfo = - getStruct<MachO::dyld_info_command>(this, DyldInfoLoadCmd); + getStruct<MachO::dyld_info_command>(*this, DyldInfoLoadCmd); const uint8_t *Ptr = - reinterpret_cast<const uint8_t *>(getPtr(this, DyldInfo.bind_off)); + reinterpret_cast<const uint8_t *>(getPtr(*this, DyldInfo.bind_off)); return makeArrayRef(Ptr, DyldInfo.bind_size); } @@ -2345,9 +3517,9 @@ ArrayRef<uint8_t> MachOObjectFile::getDyldInfoWeakBindOpcodes() const { return None; MachO::dyld_info_command DyldInfo = - getStruct<MachO::dyld_info_command>(this, DyldInfoLoadCmd); + getStruct<MachO::dyld_info_command>(*this, DyldInfoLoadCmd); const uint8_t *Ptr = - reinterpret_cast<const uint8_t *>(getPtr(this, DyldInfo.weak_bind_off)); + reinterpret_cast<const uint8_t *>(getPtr(*this, DyldInfo.weak_bind_off)); return makeArrayRef(Ptr, DyldInfo.weak_bind_size); } @@ -2356,9 +3528,9 @@ ArrayRef<uint8_t> MachOObjectFile::getDyldInfoLazyBindOpcodes() const { return None; MachO::dyld_info_command DyldInfo = - getStruct<MachO::dyld_info_command>(this, DyldInfoLoadCmd); + getStruct<MachO::dyld_info_command>(*this, DyldInfoLoadCmd); const uint8_t *Ptr = - reinterpret_cast<const uint8_t *>(getPtr(this, DyldInfo.lazy_bind_off)); + reinterpret_cast<const uint8_t *>(getPtr(*this, DyldInfo.lazy_bind_off)); return makeArrayRef(Ptr, DyldInfo.lazy_bind_size); } @@ -2367,9 +3539,9 @@ ArrayRef<uint8_t> MachOObjectFile::getDyldInfoExportsTrie() const { return None; MachO::dyld_info_command DyldInfo = - getStruct<MachO::dyld_info_command>(this, DyldInfoLoadCmd); + getStruct<MachO::dyld_info_command>(*this, DyldInfoLoadCmd); const uint8_t *Ptr = - reinterpret_cast<const uint8_t *>(getPtr(this, DyldInfo.export_off)); + reinterpret_cast<const uint8_t *>(getPtr(*this, DyldInfo.export_off)); return makeArrayRef(Ptr, DyldInfo.export_size); } @@ -2408,16 +3580,22 @@ bool MachOObjectFile::isRelocatableObject() const { } Expected<std::unique_ptr<MachOObjectFile>> -ObjectFile::createMachOObjectFile(MemoryBufferRef Buffer) { +ObjectFile::createMachOObjectFile(MemoryBufferRef Buffer, + uint32_t UniversalCputype, + uint32_t UniversalIndex) { StringRef Magic = Buffer.getBuffer().slice(0, 4); if (Magic == "\xFE\xED\xFA\xCE") - return MachOObjectFile::create(Buffer, false, false); + return MachOObjectFile::create(Buffer, false, false, + UniversalCputype, UniversalIndex); if (Magic == "\xCE\xFA\xED\xFE") - return MachOObjectFile::create(Buffer, true, false); + return MachOObjectFile::create(Buffer, true, false, + UniversalCputype, UniversalIndex); if (Magic == "\xFE\xED\xFA\xCF") - return MachOObjectFile::create(Buffer, false, true); + return MachOObjectFile::create(Buffer, false, true, + UniversalCputype, UniversalIndex); if (Magic == "\xCF\xFA\xED\xFE") - return MachOObjectFile::create(Buffer, true, true); + return MachOObjectFile::create(Buffer, true, true, + UniversalCputype, UniversalIndex); return make_error<GenericBinaryError>("Unrecognized MachO magic number", object_error::invalid_file_type); } diff --git a/contrib/llvm/lib/Object/MachOUniversal.cpp b/contrib/llvm/lib/Object/MachOUniversal.cpp index 66c9151..309708e 100644 --- a/contrib/llvm/lib/Object/MachOUniversal.cpp +++ b/contrib/llvm/lib/Object/MachOUniversal.cpp @@ -42,6 +42,7 @@ static T getUniversalBinaryStruct(const char *Ptr) { MachOUniversalBinary::ObjectForArch::ObjectForArch( const MachOUniversalBinary *Parent, uint32_t Index) : Parent(Parent), Index(Index) { + // The iterators use Parent as a nullptr and an Index+1 == NumberOfObjects. if (!Parent || Index >= Parent->getNumberOfObjects()) { clear(); } else { @@ -51,16 +52,10 @@ MachOUniversalBinary::ObjectForArch::ObjectForArch( const char *HeaderPos = ParentData.begin() + sizeof(MachO::fat_header) + Index * sizeof(MachO::fat_arch); Header = getUniversalBinaryStruct<MachO::fat_arch>(HeaderPos); - if (ParentData.size() < Header.offset + Header.size) { - clear(); - } } else { // Parent->getMagic() == MachO::FAT_MAGIC_64 const char *HeaderPos = ParentData.begin() + sizeof(MachO::fat_header) + Index * sizeof(MachO::fat_arch_64); Header64 = getUniversalBinaryStruct<MachO::fat_arch_64>(HeaderPos); - if (ParentData.size() < Header64.offset + Header64.size) { - clear(); - } } } } @@ -73,13 +68,17 @@ MachOUniversalBinary::ObjectForArch::getAsObjectFile() const { StringRef ParentData = Parent->getData(); StringRef ObjectData; - if (Parent->getMagic() == MachO::FAT_MAGIC) + uint32_t cputype; + if (Parent->getMagic() == MachO::FAT_MAGIC) { ObjectData = ParentData.substr(Header.offset, Header.size); - else // Parent->getMagic() == MachO::FAT_MAGIC_64 + cputype = Header.cputype; + } else { // Parent->getMagic() == MachO::FAT_MAGIC_64 ObjectData = ParentData.substr(Header64.offset, Header64.size); + cputype = Header64.cputype; + } StringRef ObjectName = Parent->getFileName(); MemoryBufferRef ObjBuffer(ObjectData, ObjectName); - return ObjectFile::createMachOObjectFile(ObjBuffer); + return ObjectFile::createMachOObjectFile(ObjBuffer, cputype, Index); } Expected<std::unique_ptr<Archive>> @@ -103,7 +102,7 @@ void MachOUniversalBinary::anchor() { } Expected<std::unique_ptr<MachOUniversalBinary>> MachOUniversalBinary::create(MemoryBufferRef Source) { - Error Err; + Error Err = Error::success(); std::unique_ptr<MachOUniversalBinary> Ret( new MachOUniversalBinary(Source, Err)); if (Err) @@ -114,7 +113,7 @@ MachOUniversalBinary::create(MemoryBufferRef Source) { MachOUniversalBinary::MachOUniversalBinary(MemoryBufferRef Source, Error &Err) : Binary(Binary::ID_MachOUniversalBinary, Source), Magic(0), NumberOfObjects(0) { - ErrorAsOutParameter ErrAsOutParam(Err); + ErrorAsOutParameter ErrAsOutParam(&Err); if (Data.getBufferSize() < sizeof(MachO::fat_header)) { Err = make_error<GenericBinaryError>("File too small to be a Mach-O " "universal file", @@ -127,6 +126,10 @@ MachOUniversalBinary::MachOUniversalBinary(MemoryBufferRef Source, Error &Err) getUniversalBinaryStruct<MachO::fat_header>(Buf.begin()); Magic = H.magic; NumberOfObjects = H.nfat_arch; + if (NumberOfObjects == 0) { + Err = malformedError("contains zero architecture types"); + return; + } uint32_t MinSize = sizeof(MachO::fat_header); if (Magic == MachO::FAT_MAGIC) MinSize += sizeof(MachO::fat_arch) * NumberOfObjects; @@ -142,6 +145,68 @@ MachOUniversalBinary::MachOUniversalBinary(MemoryBufferRef Source, Error &Err) " structs would extend past the end of the file"); return; } + for (uint32_t i = 0; i < NumberOfObjects; i++) { + ObjectForArch A(this, i); + uint64_t bigSize = A.getOffset(); + bigSize += A.getSize(); + if (bigSize > Buf.size()) { + Err = malformedError("offset plus size of cputype (" + + Twine(A.getCPUType()) + ") cpusubtype (" + + Twine(A.getCPUSubType() & ~MachO::CPU_SUBTYPE_MASK) + + ") extends past the end of the file"); + return; + } +#define MAXSECTALIGN 15 /* 2**15 or 0x8000 */ + if (A.getAlign() > MAXSECTALIGN) { + Err = malformedError("align (2^" + Twine(A.getAlign()) + ") too large " + "for cputype (" + Twine(A.getCPUType()) + ") cpusubtype (" + + Twine(A.getCPUSubType() & ~MachO::CPU_SUBTYPE_MASK) + + ") (maximum 2^" + Twine(MAXSECTALIGN) + ")"); + return; + } + if(A.getOffset() % (1 << A.getAlign()) != 0){ + Err = malformedError("offset: " + Twine(A.getOffset()) + + " for cputype (" + Twine(A.getCPUType()) + ") cpusubtype (" + + Twine(A.getCPUSubType() & ~MachO::CPU_SUBTYPE_MASK) + + ") not aligned on it's alignment (2^" + Twine(A.getAlign()) + ")"); + return; + } + if (A.getOffset() < MinSize) { + Err = malformedError("cputype (" + Twine(A.getCPUType()) + ") " + "cpusubtype (" + Twine(A.getCPUSubType() & ~MachO::CPU_SUBTYPE_MASK) + + ") offset " + Twine(A.getOffset()) + " overlaps universal headers"); + return; + } + } + for (uint32_t i = 0; i < NumberOfObjects; i++) { + ObjectForArch A(this, i); + for (uint32_t j = i + 1; j < NumberOfObjects; j++) { + ObjectForArch B(this, j); + if (A.getCPUType() == B.getCPUType() && + (A.getCPUSubType() & ~MachO::CPU_SUBTYPE_MASK) == + (B.getCPUSubType() & ~MachO::CPU_SUBTYPE_MASK)) { + Err = malformedError("contains two of the same architecture (cputype " + "(" + Twine(A.getCPUType()) + ") cpusubtype (" + + Twine(A.getCPUSubType() & ~MachO::CPU_SUBTYPE_MASK) + "))"); + return; + } + if ((A.getOffset() >= B.getOffset() && + A.getOffset() < B.getOffset() + B.getSize()) || + (A.getOffset() + A.getSize() > B.getOffset() && + A.getOffset() + A.getSize() < B.getOffset() + B.getSize()) || + (A.getOffset() <= B.getOffset() && + A.getOffset() + A.getSize() >= B.getOffset() + B.getSize())) { + Err = malformedError("cputype (" + Twine(A.getCPUType()) + ") " + "cpusubtype (" + Twine(A.getCPUSubType() & ~MachO::CPU_SUBTYPE_MASK) + + ") at offset " + Twine(A.getOffset()) + " with a size of " + + Twine(A.getSize()) + ", overlaps cputype (" + Twine(B.getCPUType()) + + ") cpusubtype (" + Twine(B.getCPUSubType() & ~MachO::CPU_SUBTYPE_MASK) + + ") at offset " + Twine(B.getOffset()) + " with a size of " + + Twine(B.getSize())); + return; + } + } + } Err = Error::success(); } @@ -153,10 +218,9 @@ MachOUniversalBinary::getObjectForArch(StringRef ArchName) const { ArchName, object_error::arch_not_found); - for (object_iterator I = begin_objects(), E = end_objects(); I != E; ++I) { - if (I->getArchTypeName() == ArchName) - return I->getAsObjectFile(); - } + for (auto &Obj : objects()) + if (Obj.getArchFlagName() == ArchName) + return Obj.getAsObjectFile(); return make_error<GenericBinaryError>("fat file does not " "contain " + ArchName, diff --git a/contrib/llvm/lib/Object/ModuleSummaryIndexObjectFile.cpp b/contrib/llvm/lib/Object/ModuleSummaryIndexObjectFile.cpp index e6b1040..11ace84 100644 --- a/contrib/llvm/lib/Object/ModuleSummaryIndexObjectFile.cpp +++ b/contrib/llvm/lib/Object/ModuleSummaryIndexObjectFile.cpp @@ -13,7 +13,7 @@ #include "llvm/Object/ModuleSummaryIndexObjectFile.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/Bitcode/BitcodeReader.h" #include "llvm/IR/ModuleSummaryIndex.h" #include "llvm/MC/MCStreamer.h" #include "llvm/Object/ObjectFile.h" @@ -22,6 +22,12 @@ using namespace llvm; using namespace object; +static llvm::cl::opt<bool> IgnoreEmptyThinLTOIndexFile( + "ignore-empty-index-file", llvm::cl::ZeroOrMore, + llvm::cl::desc( + "Ignore an empty index file and perform non-ThinLTO compilation"), + llvm::cl::init(false)); + ModuleSummaryIndexObjectFile::ModuleSummaryIndexObjectFile( MemoryBufferRef Object, std::unique_ptr<ModuleSummaryIndex> I) : SymbolicFile(Binary::ID_ModuleSummaryIndex, Object), Index(std::move(I)) { @@ -67,59 +73,42 @@ ModuleSummaryIndexObjectFile::findBitcodeInMemBuffer(MemoryBufferRef Object) { } } -// Looks for module summary index in the given memory buffer. -// returns true if found, else false. -bool ModuleSummaryIndexObjectFile::hasGlobalValueSummaryInMemBuffer( - MemoryBufferRef Object, - const DiagnosticHandlerFunction &DiagnosticHandler) { - ErrorOr<MemoryBufferRef> BCOrErr = findBitcodeInMemBuffer(Object); - if (!BCOrErr) - return false; - - return hasGlobalValueSummary(BCOrErr.get(), DiagnosticHandler); -} - // Parse module summary index in the given memory buffer. // Return new ModuleSummaryIndexObjectFile instance containing parsed // module summary/index. -ErrorOr<std::unique_ptr<ModuleSummaryIndexObjectFile>> -ModuleSummaryIndexObjectFile::create( - MemoryBufferRef Object, - const DiagnosticHandlerFunction &DiagnosticHandler) { - std::unique_ptr<ModuleSummaryIndex> Index; - +Expected<std::unique_ptr<ModuleSummaryIndexObjectFile>> +ModuleSummaryIndexObjectFile::create(MemoryBufferRef Object) { ErrorOr<MemoryBufferRef> BCOrErr = findBitcodeInMemBuffer(Object); if (!BCOrErr) - return BCOrErr.getError(); + return errorCodeToError(BCOrErr.getError()); - ErrorOr<std::unique_ptr<ModuleSummaryIndex>> IOrErr = - getModuleSummaryIndex(BCOrErr.get(), DiagnosticHandler); + Expected<std::unique_ptr<ModuleSummaryIndex>> IOrErr = + getModuleSummaryIndex(BCOrErr.get()); - if (std::error_code EC = IOrErr.getError()) - return EC; - - Index = std::move(IOrErr.get()); + if (!IOrErr) + return IOrErr.takeError(); + std::unique_ptr<ModuleSummaryIndex> Index = std::move(IOrErr.get()); return llvm::make_unique<ModuleSummaryIndexObjectFile>(Object, std::move(Index)); } // Parse the module summary index out of an IR file and return the summary // index object if found, or nullptr if not. -ErrorOr<std::unique_ptr<ModuleSummaryIndex>> llvm::getModuleSummaryIndexForFile( - StringRef Path, const DiagnosticHandlerFunction &DiagnosticHandler) { +Expected<std::unique_ptr<ModuleSummaryIndex>> +llvm::getModuleSummaryIndexForFile(StringRef Path) { ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr = MemoryBuffer::getFileOrSTDIN(Path); std::error_code EC = FileOrErr.getError(); if (EC) - return EC; + return errorCodeToError(EC); MemoryBufferRef BufferRef = (FileOrErr.get())->getMemBufferRef(); - ErrorOr<std::unique_ptr<object::ModuleSummaryIndexObjectFile>> ObjOrErr = - object::ModuleSummaryIndexObjectFile::create(BufferRef, - DiagnosticHandler); - EC = ObjOrErr.getError(); - if (EC) - return EC; + if (IgnoreEmptyThinLTOIndexFile && !BufferRef.getBufferSize()) + return nullptr; + Expected<std::unique_ptr<object::ModuleSummaryIndexObjectFile>> ObjOrErr = + object::ModuleSummaryIndexObjectFile::create(BufferRef); + if (!ObjOrErr) + return ObjOrErr.takeError(); object::ModuleSummaryIndexObjectFile &Obj = **ObjOrErr; return Obj.takeIndex(); diff --git a/contrib/llvm/lib/Object/ModuleSymbolTable.cpp b/contrib/llvm/lib/Object/ModuleSymbolTable.cpp new file mode 100644 index 0000000..9048800 --- /dev/null +++ b/contrib/llvm/lib/Object/ModuleSymbolTable.cpp @@ -0,0 +1,189 @@ +//===- ModuleSymbolTable.cpp - symbol table for in-memory IR ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class represents a symbol table built from in-memory IR. It provides +// access to GlobalValues and should only be used if such access is required +// (e.g. in the LTO implementation). +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/IRObjectFile.h" +#include "RecordStreamer.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Bitcode/BitcodeReader.h" +#include "llvm/IR/GVMaterializer.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Mangler.h" +#include "llvm/IR/Module.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/MC/MCParser/MCTargetAsmParser.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; +using namespace object; + +void ModuleSymbolTable::addModule(Module *M) { + if (FirstMod) + assert(FirstMod->getTargetTriple() == M->getTargetTriple()); + else + FirstMod = M; + + for (Function &F : *M) + SymTab.push_back(&F); + for (GlobalVariable &GV : M->globals()) + SymTab.push_back(&GV); + for (GlobalAlias &GA : M->aliases()) + SymTab.push_back(&GA); + + CollectAsmSymbols(Triple(M->getTargetTriple()), M->getModuleInlineAsm(), + [this](StringRef Name, BasicSymbolRef::Flags Flags) { + SymTab.push_back(new (AsmSymbols.Allocate()) + AsmSymbol(Name, Flags)); + }); +} + +void ModuleSymbolTable::CollectAsmSymbols( + const Triple &TT, StringRef InlineAsm, + function_ref<void(StringRef, BasicSymbolRef::Flags)> AsmSymbol) { + if (InlineAsm.empty()) + return; + + std::string Err; + const Target *T = TargetRegistry::lookupTarget(TT.str(), Err); + assert(T && T->hasMCAsmParser()); + + std::unique_ptr<MCRegisterInfo> MRI(T->createMCRegInfo(TT.str())); + if (!MRI) + return; + + std::unique_ptr<MCAsmInfo> MAI(T->createMCAsmInfo(*MRI, TT.str())); + if (!MAI) + return; + + std::unique_ptr<MCSubtargetInfo> STI( + T->createMCSubtargetInfo(TT.str(), "", "")); + if (!STI) + return; + + std::unique_ptr<MCInstrInfo> MCII(T->createMCInstrInfo()); + if (!MCII) + return; + + MCObjectFileInfo MOFI; + MCContext MCCtx(MAI.get(), MRI.get(), &MOFI); + MOFI.InitMCObjectFileInfo(TT, /*PIC*/ false, CodeModel::Default, MCCtx); + RecordStreamer Streamer(MCCtx); + T->createNullTargetStreamer(Streamer); + + std::unique_ptr<MemoryBuffer> Buffer(MemoryBuffer::getMemBuffer(InlineAsm)); + SourceMgr SrcMgr; + SrcMgr.AddNewSourceBuffer(std::move(Buffer), SMLoc()); + std::unique_ptr<MCAsmParser> Parser( + createMCAsmParser(SrcMgr, MCCtx, Streamer, *MAI)); + + MCTargetOptions MCOptions; + std::unique_ptr<MCTargetAsmParser> TAP( + T->createMCAsmParser(*STI, *Parser, *MCII, MCOptions)); + if (!TAP) + return; + + Parser->setTargetParser(*TAP); + if (Parser->Run(false)) + return; + + for (auto &KV : Streamer) { + StringRef Key = KV.first(); + RecordStreamer::State Value = KV.second; + // FIXME: For now we just assume that all asm symbols are executable. + uint32_t Res = BasicSymbolRef::SF_Executable; + switch (Value) { + case RecordStreamer::NeverSeen: + llvm_unreachable("NeverSeen should have been replaced earlier"); + case RecordStreamer::DefinedGlobal: + Res |= BasicSymbolRef::SF_Global; + break; + case RecordStreamer::Defined: + break; + case RecordStreamer::Global: + case RecordStreamer::Used: + Res |= BasicSymbolRef::SF_Undefined; + Res |= BasicSymbolRef::SF_Global; + break; + case RecordStreamer::DefinedWeak: + Res |= BasicSymbolRef::SF_Weak; + Res |= BasicSymbolRef::SF_Global; + break; + case RecordStreamer::UndefinedWeak: + Res |= BasicSymbolRef::SF_Weak; + Res |= BasicSymbolRef::SF_Undefined; + } + AsmSymbol(Key, BasicSymbolRef::Flags(Res)); + } +} + +void ModuleSymbolTable::printSymbolName(raw_ostream &OS, Symbol S) const { + if (S.is<AsmSymbol *>()) { + OS << S.get<AsmSymbol *>()->first; + return; + } + + auto *GV = S.get<GlobalValue *>(); + if (GV->hasDLLImportStorageClass()) + OS << "__imp_"; + + Mang.getNameWithPrefix(OS, GV, false); +} + +uint32_t ModuleSymbolTable::getSymbolFlags(Symbol S) const { + if (S.is<AsmSymbol *>()) + return S.get<AsmSymbol *>()->second; + + auto *GV = S.get<GlobalValue *>(); + + uint32_t Res = BasicSymbolRef::SF_None; + if (GV->isDeclarationForLinker()) + Res |= BasicSymbolRef::SF_Undefined; + else if (GV->hasHiddenVisibility() && !GV->hasLocalLinkage()) + Res |= BasicSymbolRef::SF_Hidden; + if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) { + if (GVar->isConstant()) + Res |= BasicSymbolRef::SF_Const; + } + if (dyn_cast_or_null<Function>(GV->getBaseObject())) + Res |= BasicSymbolRef::SF_Executable; + if (isa<GlobalAlias>(GV)) + Res |= BasicSymbolRef::SF_Indirect; + if (GV->hasPrivateLinkage()) + Res |= BasicSymbolRef::SF_FormatSpecific; + if (!GV->hasLocalLinkage()) + Res |= BasicSymbolRef::SF_Global; + if (GV->hasCommonLinkage()) + Res |= BasicSymbolRef::SF_Common; + if (GV->hasLinkOnceLinkage() || GV->hasWeakLinkage() || + GV->hasExternalWeakLinkage()) + Res |= BasicSymbolRef::SF_Weak; + + if (GV->getName().startswith("llvm.")) + Res |= BasicSymbolRef::SF_FormatSpecific; + else if (auto *Var = dyn_cast<GlobalVariable>(GV)) { + if (Var->getSection() == "llvm.metadata") + Res |= BasicSymbolRef::SF_FormatSpecific; + } + + return Res; +} diff --git a/contrib/llvm/lib/Object/ObjectFile.cpp b/contrib/llvm/lib/Object/ObjectFile.cpp index 92f9c1f..f36388b 100644 --- a/contrib/llvm/lib/Object/ObjectFile.cpp +++ b/contrib/llvm/lib/Object/ObjectFile.cpp @@ -11,9 +11,10 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Object/ObjectFile.h" #include "llvm/Object/COFF.h" #include "llvm/Object/MachO.h" -#include "llvm/Object/ObjectFile.h" +#include "llvm/Object/Wasm.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" @@ -78,6 +79,7 @@ ObjectFile::createObjectFile(MemoryBufferRef Object, sys::fs::file_magic Type) { switch (Type) { case sys::fs::file_magic::unknown: case sys::fs::file_magic::bitcode: + case sys::fs::file_magic::coff_cl_gl_object: case sys::fs::file_magic::archive: case sys::fs::file_magic::macho_universal_binary: case sys::fs::file_magic::windows_resource: @@ -104,6 +106,8 @@ ObjectFile::createObjectFile(MemoryBufferRef Object, sys::fs::file_magic Type) { case sys::fs::file_magic::coff_import_library: case sys::fs::file_magic::pecoff_executable: return errorOrToExpected(createCOFFObjectFile(Object)); + case sys::fs::file_magic::wasm_object: + return createWasmObjectFile(Object); } llvm_unreachable("Unexpected Object File Type"); } @@ -118,8 +122,8 @@ ObjectFile::createObjectFile(StringRef ObjectPath) { Expected<std::unique_ptr<ObjectFile>> ObjOrErr = createObjectFile(Buffer->getMemBufferRef()); - if (!ObjOrErr) - ObjOrErr.takeError(); + if (Error Err = ObjOrErr.takeError()) + return std::move(Err); std::unique_ptr<ObjectFile> Obj = std::move(ObjOrErr.get()); return OwningBinary<ObjectFile>(std::move(Obj), std::move(Buffer)); diff --git a/contrib/llvm/lib/Object/RecordStreamer.cpp b/contrib/llvm/lib/Object/RecordStreamer.cpp index f03bd5e..572b960 100644 --- a/contrib/llvm/lib/Object/RecordStreamer.cpp +++ b/contrib/llvm/lib/Object/RecordStreamer.cpp @@ -23,8 +23,10 @@ void RecordStreamer::markDefined(const MCSymbol &Symbol) { case Used: S = Defined; break; - case GlobalWeak: + case DefinedWeak: break; + case UndefinedWeak: + S = DefinedWeak; } } @@ -34,15 +36,16 @@ void RecordStreamer::markGlobal(const MCSymbol &Symbol, switch (S) { case DefinedGlobal: case Defined: - S = (Attribute == MCSA_Weak) ? GlobalWeak : DefinedGlobal; + S = (Attribute == MCSA_Weak) ? DefinedWeak : DefinedGlobal; break; case NeverSeen: case Global: case Used: - S = (Attribute == MCSA_Weak) ? GlobalWeak : Global; + S = (Attribute == MCSA_Weak) ? UndefinedWeak : Global; break; - case GlobalWeak: + case UndefinedWeak: + case DefinedWeak: break; } } @@ -53,7 +56,8 @@ void RecordStreamer::markUsed(const MCSymbol &Symbol) { case DefinedGlobal: case Defined: case Global: - case GlobalWeak: + case DefinedWeak: + case UndefinedWeak: break; case NeverSeen: @@ -92,6 +96,8 @@ bool RecordStreamer::EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) { if (Attribute == MCSA_Global || Attribute == MCSA_Weak) markGlobal(*Symbol, Attribute); + if (Attribute == MCSA_LazyReference) + markUsed(*Symbol); return true; } diff --git a/contrib/llvm/lib/Object/RecordStreamer.h b/contrib/llvm/lib/Object/RecordStreamer.h index 71337a6..617d8a4 100644 --- a/contrib/llvm/lib/Object/RecordStreamer.h +++ b/contrib/llvm/lib/Object/RecordStreamer.h @@ -15,7 +15,8 @@ namespace llvm { class RecordStreamer : public MCStreamer { public: - enum State { NeverSeen, Global, GlobalWeak, Defined, DefinedGlobal, Used }; + enum State { NeverSeen, Global, Defined, DefinedGlobal, DefinedWeak, Used, + UndefinedWeak}; private: StringMap<State> Symbols; diff --git a/contrib/llvm/lib/Object/SymbolSize.cpp b/contrib/llvm/lib/Object/SymbolSize.cpp index 1d5cd78..dd49d5f 100644 --- a/contrib/llvm/lib/Object/SymbolSize.cpp +++ b/contrib/llvm/lib/Object/SymbolSize.cpp @@ -16,19 +16,13 @@ using namespace llvm; using namespace object; -namespace { -struct SymEntry { - symbol_iterator I; - uint64_t Address; - unsigned Number; - unsigned SectionID; -}; -} - -static int compareAddress(const SymEntry *A, const SymEntry *B) { +// Orders increasingly by (SectionID, Address). +int llvm::object::compareAddress(const SymEntry *A, const SymEntry *B) { if (A->SectionID != B->SectionID) - return A->SectionID - B->SectionID; - return A->Address - B->Address; + return A->SectionID < B->SectionID ? -1 : 1; + if (A->Address != B->Address) + return A->Address < B->Address ? -1 : 1; + return 0; } static unsigned getSectionID(const ObjectFile &O, SectionRef Sec) { diff --git a/contrib/llvm/lib/Object/SymbolicFile.cpp b/contrib/llvm/lib/Object/SymbolicFile.cpp index 1e8e31b..4b51a49 100644 --- a/contrib/llvm/lib/Object/SymbolicFile.cpp +++ b/contrib/llvm/lib/Object/SymbolicFile.cpp @@ -35,10 +35,11 @@ Expected<std::unique_ptr<SymbolicFile>> SymbolicFile::createSymbolicFile( switch (Type) { case sys::fs::file_magic::bitcode: if (Context) - return errorOrToExpected(IRObjectFile::create(Object, *Context)); - // Fallthrough + return IRObjectFile::create(Object, *Context); + LLVM_FALLTHROUGH; case sys::fs::file_magic::unknown: case sys::fs::file_magic::archive: + case sys::fs::file_magic::coff_cl_gl_object: case sys::fs::file_magic::macho_universal_binary: case sys::fs::file_magic::windows_resource: return errorCodeToError(object_error::invalid_file_type); @@ -57,6 +58,7 @@ Expected<std::unique_ptr<SymbolicFile>> SymbolicFile::createSymbolicFile( case sys::fs::file_magic::macho_dsym_companion: case sys::fs::file_magic::macho_kext_bundle: case sys::fs::file_magic::pecoff_executable: + case sys::fs::file_magic::wasm_object: return ObjectFile::createObjectFile(Object, Type); case sys::fs::file_magic::coff_import_library: return std::unique_ptr<SymbolicFile>(new COFFImportFile(Object)); @@ -73,9 +75,9 @@ Expected<std::unique_ptr<SymbolicFile>> SymbolicFile::createSymbolicFile( if (!BCData) return std::move(Obj); - return errorOrToExpected(IRObjectFile::create( - MemoryBufferRef(BCData->getBuffer(), - Object.getBufferIdentifier()), *Context)); + return IRObjectFile::create( + MemoryBufferRef(BCData->getBuffer(), Object.getBufferIdentifier()), + *Context); } } llvm_unreachable("Unexpected Binary File Type"); diff --git a/contrib/llvm/lib/Object/WasmObjectFile.cpp b/contrib/llvm/lib/Object/WasmObjectFile.cpp new file mode 100644 index 0000000..2b61a8a0 --- /dev/null +++ b/contrib/llvm/lib/Object/WasmObjectFile.cpp @@ -0,0 +1,313 @@ +//===- WasmObjectFile.cpp - Wasm object file implementation -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/Wasm.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/LEB128.h" + +namespace llvm { +namespace object { + +Expected<std::unique_ptr<WasmObjectFile>> +ObjectFile::createWasmObjectFile(MemoryBufferRef Buffer) { + Error Err = Error::success(); + auto ObjectFile = llvm::make_unique<WasmObjectFile>(Buffer, Err); + if (Err) + return std::move(Err); + + return std::move(ObjectFile); +} + +namespace { + +uint32_t readUint32(const uint8_t *&Ptr) { + uint32_t Result = support::endian::read32le(Ptr); + Ptr += sizeof(Result); + return Result; +} + +uint64_t readULEB128(const uint8_t *&Ptr) { + unsigned Count; + uint64_t Result = decodeULEB128(Ptr, &Count); + Ptr += Count; + return Result; +} + +StringRef readString(const uint8_t *&Ptr) { + uint32_t StringLen = readULEB128(Ptr); + StringRef Return = StringRef(reinterpret_cast<const char *>(Ptr), StringLen); + Ptr += StringLen; + return Return; +} + +Error readSection(wasm::WasmSection &Section, const uint8_t *&Ptr, + const uint8_t *Start) { + // TODO(sbc): Avoid reading past EOF in the case of malformed files. + Section.Offset = Ptr - Start; + Section.Type = readULEB128(Ptr); + uint32_t Size = readULEB128(Ptr); + if (Size == 0) + return make_error<StringError>("Zero length section", + object_error::parse_failed); + Section.Content = ArrayRef<uint8_t>(Ptr, Size); + Ptr += Size; + return Error::success(); +} +} + +WasmObjectFile::WasmObjectFile(MemoryBufferRef Buffer, Error &Err) + : ObjectFile(Binary::ID_Wasm, Buffer) { + ErrorAsOutParameter ErrAsOutParam(&Err); + Header.Magic = getData().substr(0, 4); + if (Header.Magic != StringRef("\0asm", 4)) { + Err = make_error<StringError>("Bad magic number", + object_error::parse_failed); + return; + } + const uint8_t *Ptr = getPtr(4); + Header.Version = readUint32(Ptr); + if (Header.Version != wasm::WasmVersion) { + Err = make_error<StringError>("Bad version number", + object_error::parse_failed); + return; + } + + const uint8_t *Eof = getPtr(getData().size()); + wasm::WasmSection Sec; + while (Ptr < Eof) { + if ((Err = readSection(Sec, Ptr, getPtr(0)))) + return; + if (Sec.Type == wasm::WASM_SEC_USER) { + if ((Err = parseUserSection(Sec, Sec.Content.data(), Sec.Content.size()))) + return; + } + Sections.push_back(Sec); + } +} + +Error WasmObjectFile::parseUserSection(wasm::WasmSection &Sec, + const uint8_t *Ptr, size_t Length) { + Sec.Name = readString(Ptr); + return Error::success(); +} + +const uint8_t *WasmObjectFile::getPtr(size_t Offset) const { + return reinterpret_cast<const uint8_t *>(getData().substr(Offset, 1).data()); +} + +const wasm::WasmObjectHeader &WasmObjectFile::getHeader() const { + return Header; +} + +void WasmObjectFile::moveSymbolNext(DataRefImpl &Symb) const { + llvm_unreachable("not yet implemented"); +} + +std::error_code WasmObjectFile::printSymbolName(raw_ostream &OS, + DataRefImpl Symb) const { + llvm_unreachable("not yet implemented"); + return object_error::invalid_symbol_index; +} + +uint32_t WasmObjectFile::getSymbolFlags(DataRefImpl Symb) const { + llvm_unreachable("not yet implemented"); + return 0; +} + +basic_symbol_iterator WasmObjectFile::symbol_begin() const { + return BasicSymbolRef(DataRefImpl(), this); +} + +basic_symbol_iterator WasmObjectFile::symbol_end() const { + return BasicSymbolRef(DataRefImpl(), this); +} + +Expected<StringRef> WasmObjectFile::getSymbolName(DataRefImpl Symb) const { + llvm_unreachable("not yet implemented"); + return errorCodeToError(object_error::invalid_symbol_index); +} + +Expected<uint64_t> WasmObjectFile::getSymbolAddress(DataRefImpl Symb) const { + llvm_unreachable("not yet implemented"); + return errorCodeToError(object_error::invalid_symbol_index); +} + +uint64_t WasmObjectFile::getSymbolValueImpl(DataRefImpl Symb) const { + llvm_unreachable("not yet implemented"); + return 0; +} + +uint32_t WasmObjectFile::getSymbolAlignment(DataRefImpl Symb) const { + llvm_unreachable("not yet implemented"); + return 0; +} + +uint64_t WasmObjectFile::getCommonSymbolSizeImpl(DataRefImpl Symb) const { + llvm_unreachable("not yet implemented"); + return 0; +} + +Expected<SymbolRef::Type> +WasmObjectFile::getSymbolType(DataRefImpl Symb) const { + llvm_unreachable("not yet implemented"); + return errorCodeToError(object_error::invalid_symbol_index); +} + +Expected<section_iterator> +WasmObjectFile::getSymbolSection(DataRefImpl Symb) const { + llvm_unreachable("not yet implemented"); + return errorCodeToError(object_error::invalid_symbol_index); +} + +void WasmObjectFile::moveSectionNext(DataRefImpl &Sec) const { Sec.d.a++; } + +std::error_code WasmObjectFile::getSectionName(DataRefImpl Sec, + StringRef &Res) const { + const wasm::WasmSection &S = Sections[Sec.d.a]; +#define ECase(X) \ + case wasm::WASM_SEC_##X: \ + Res = #X; \ + break + switch (S.Type) { + ECase(TYPE); + ECase(IMPORT); + ECase(FUNCTION); + ECase(TABLE); + ECase(MEMORY); + ECase(GLOBAL); + ECase(EXPORT); + ECase(START); + ECase(ELEM); + ECase(CODE); + ECase(DATA); + case wasm::WASM_SEC_USER: + Res = S.Name; + break; + default: + return object_error::invalid_section_index; + } +#undef ECase + return std::error_code(); +} + +uint64_t WasmObjectFile::getSectionAddress(DataRefImpl Sec) const { return 0; } + +uint64_t WasmObjectFile::getSectionSize(DataRefImpl Sec) const { + const wasm::WasmSection &S = Sections[Sec.d.a]; + return S.Content.size(); +} + +std::error_code WasmObjectFile::getSectionContents(DataRefImpl Sec, + StringRef &Res) const { + const wasm::WasmSection &S = Sections[Sec.d.a]; + // This will never fail since wasm sections can never be empty (user-sections + // must have a name and non-user sections each have a defined structure). + Res = StringRef(reinterpret_cast<const char *>(S.Content.data()), + S.Content.size()); + return std::error_code(); +} + +uint64_t WasmObjectFile::getSectionAlignment(DataRefImpl Sec) const { + return 1; +} + +bool WasmObjectFile::isSectionCompressed(DataRefImpl Sec) const { + return false; +} + +bool WasmObjectFile::isSectionText(DataRefImpl Sec) const { + const wasm::WasmSection &S = Sections[Sec.d.a]; + return S.Type == wasm::WASM_SEC_CODE; +} + +bool WasmObjectFile::isSectionData(DataRefImpl Sec) const { + const wasm::WasmSection &S = Sections[Sec.d.a]; + return S.Type == wasm::WASM_SEC_DATA; +} + +bool WasmObjectFile::isSectionBSS(DataRefImpl Sec) const { return false; } + +bool WasmObjectFile::isSectionVirtual(DataRefImpl Sec) const { return false; } + +bool WasmObjectFile::isSectionBitcode(DataRefImpl Sec) const { return false; } + +relocation_iterator WasmObjectFile::section_rel_begin(DataRefImpl Sec) const { + llvm_unreachable("not yet implemented"); + RelocationRef Rel; + return relocation_iterator(Rel); +} + +relocation_iterator WasmObjectFile::section_rel_end(DataRefImpl Sec) const { + llvm_unreachable("not yet implemented"); + RelocationRef Rel; + return relocation_iterator(Rel); +} + +section_iterator WasmObjectFile::getRelocatedSection(DataRefImpl Sec) const { + llvm_unreachable("not yet implemented"); + SectionRef Ref; + return section_iterator(Ref); +} + +void WasmObjectFile::moveRelocationNext(DataRefImpl &Rel) const { + llvm_unreachable("not yet implemented"); +} + +uint64_t WasmObjectFile::getRelocationOffset(DataRefImpl Rel) const { + llvm_unreachable("not yet implemented"); + return 0; +} + +symbol_iterator WasmObjectFile::getRelocationSymbol(DataRefImpl Rel) const { + llvm_unreachable("not yet implemented"); + SymbolRef Ref; + return symbol_iterator(Ref); +} + +uint64_t WasmObjectFile::getRelocationType(DataRefImpl Rel) const { + llvm_unreachable("not yet implemented"); + return 0; +} + +void WasmObjectFile::getRelocationTypeName( + DataRefImpl Rel, SmallVectorImpl<char> &Result) const { + llvm_unreachable("not yet implemented"); +} + +section_iterator WasmObjectFile::section_begin() const { + DataRefImpl Ref; + Ref.d.a = 0; + return section_iterator(SectionRef(Ref, this)); +} + +section_iterator WasmObjectFile::section_end() const { + DataRefImpl Ref; + Ref.d.a = Sections.size(); + return section_iterator(SectionRef(Ref, this)); +} + +uint8_t WasmObjectFile::getBytesInAddress() const { return 4; } + +StringRef WasmObjectFile::getFileFormatName() const { return "WASM"; } + +unsigned WasmObjectFile::getArch() const { return Triple::wasm32; } + +SubtargetFeatures WasmObjectFile::getFeatures() const { + return SubtargetFeatures(); +} + +bool WasmObjectFile::isRelocatableObject() const { return false; } + +const wasm::WasmSection * +WasmObjectFile::getWasmSection(const SectionRef &Section) const { + return &Sections[Section.getRawDataRefImpl().d.a]; +} + +} // end namespace object +} // end namespace llvm |