diff options
Diffstat (limited to 'contrib/llvm/lib/Object')
21 files changed, 4553 insertions, 657 deletions
diff --git a/contrib/llvm/lib/Object/Archive.cpp b/contrib/llvm/lib/Object/Archive.cpp index f2021f7..977cccc 100644 --- a/contrib/llvm/lib/Object/Archive.cpp +++ b/contrib/llvm/lib/Object/Archive.cpp @@ -1,4 +1,4 @@ -//===- Archive.cpp - ar File Format implementation --------------*- C++ -*-===// +//===- Archive.cpp - ar File Format implementation ------------------------===// // // The LLVM Compiler Infrastructure // @@ -12,11 +12,28 @@ //===----------------------------------------------------------------------===// #include "llvm/Object/Archive.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/Object/Binary.h" +#include "llvm/Object/Error.h" +#include "llvm/Support/Chrono.h" #include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <cstring> +#include <memory> +#include <string> +#include <system_error> using namespace llvm; using namespace object; @@ -25,7 +42,7 @@ using namespace llvm::support::endian; static const char *const Magic = "!<arch>\n"; static const char *const ThinMagic = "!<thin>\n"; -void Archive::anchor() { } +void Archive::anchor() {} static Error malformedError(Twine Msg) { @@ -61,8 +78,8 @@ ArchiveMemberHeader::ArchiveMemberHeader(const Archive *Parent, if (Err) { std::string Buf; raw_string_ostream OS(Buf); - OS.write_escaped(llvm::StringRef(ArMemHdr->Terminator, - sizeof(ArMemHdr->Terminator))); + OS.write_escaped(StringRef(ArMemHdr->Terminator, + sizeof(ArMemHdr->Terminator))); OS.flush(); std::string Msg("terminator characters in archive member \"" + Buf + "\" not the correct \"`\\n\" values for the archive " @@ -97,13 +114,13 @@ Expected<StringRef> ArchiveMemberHeader::getRawName() const { EndCond = ' '; else EndCond = '/'; - llvm::StringRef::size_type end = - llvm::StringRef(ArMemHdr->Name, sizeof(ArMemHdr->Name)).find(EndCond); - if (end == llvm::StringRef::npos) + StringRef::size_type end = + StringRef(ArMemHdr->Name, sizeof(ArMemHdr->Name)).find(EndCond); + if (end == StringRef::npos) end = sizeof(ArMemHdr->Name); assert(end <= sizeof(ArMemHdr->Name) && end > 0); // Don't include the EndCond if there is one. - return llvm::StringRef(ArMemHdr->Name, end); + return StringRef(ArMemHdr->Name, end); } // This gets the name looking up long names. Size is the size of the archive @@ -205,12 +222,12 @@ Expected<StringRef> ArchiveMemberHeader::getName(uint64_t Size) const { Expected<uint32_t> ArchiveMemberHeader::getSize() const { uint32_t Ret; - if (llvm::StringRef(ArMemHdr->Size, - sizeof(ArMemHdr->Size)).rtrim(" ").getAsInteger(10, Ret)) { + if (StringRef(ArMemHdr->Size, + sizeof(ArMemHdr->Size)).rtrim(" ").getAsInteger(10, Ret)) { std::string Buf; raw_string_ostream OS(Buf); - OS.write_escaped(llvm::StringRef(ArMemHdr->Size, - sizeof(ArMemHdr->Size)).rtrim(" ")); + OS.write_escaped(StringRef(ArMemHdr->Size, + sizeof(ArMemHdr->Size)).rtrim(" ")); OS.flush(); uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data(); @@ -227,8 +244,8 @@ Expected<sys::fs::perms> ArchiveMemberHeader::getAccessMode() const { sizeof(ArMemHdr->AccessMode)).rtrim(' ').getAsInteger(8, Ret)) { std::string Buf; raw_string_ostream OS(Buf); - OS.write_escaped(llvm::StringRef(ArMemHdr->AccessMode, - sizeof(ArMemHdr->AccessMode)).rtrim(" ")); + OS.write_escaped(StringRef(ArMemHdr->AccessMode, + sizeof(ArMemHdr->AccessMode)).rtrim(" ")); OS.flush(); uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data(); @@ -247,8 +264,8 @@ ArchiveMemberHeader::getLastModified() const { .getAsInteger(10, Seconds)) { std::string Buf; raw_string_ostream OS(Buf); - OS.write_escaped(llvm::StringRef(ArMemHdr->LastModified, - sizeof(ArMemHdr->LastModified)).rtrim(" ")); + OS.write_escaped(StringRef(ArMemHdr->LastModified, + sizeof(ArMemHdr->LastModified)).rtrim(" ")); OS.flush(); uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data(); diff --git a/contrib/llvm/lib/Object/ArchiveWriter.cpp b/contrib/llvm/lib/Object/ArchiveWriter.cpp index f8e3c5a..b052c76 100644 --- a/contrib/llvm/lib/Object/ArchiveWriter.cpp +++ b/contrib/llvm/lib/Object/ArchiveWriter.cpp @@ -14,6 +14,7 @@ #include "llvm/Object/ArchiveWriter.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Magic.h" #include "llvm/IR/LLVMContext.h" #include "llvm/Object/Archive.h" #include "llvm/Object/ObjectFile.h" @@ -35,7 +36,8 @@ using namespace llvm; NewArchiveMember::NewArchiveMember(MemoryBufferRef BufRef) - : Buf(MemoryBuffer::getMemBuffer(BufRef, false)) {} + : Buf(MemoryBuffer::getMemBuffer(BufRef, false)), + MemberName(BufRef.getBufferIdentifier()) {} Expected<NewArchiveMember> NewArchiveMember::getOldMember(const object::Archive::Child &OldMember, @@ -47,6 +49,7 @@ NewArchiveMember::getOldMember(const object::Archive::Child &OldMember, NewArchiveMember M; assert(M.IsNew == false); M.Buf = MemoryBuffer::getMemBuffer(*BufOrErr, false); + M.MemberName = M.Buf->getBufferIdentifier(); if (!Deterministic) { auto ModTimeOrErr = OldMember.getLastModified(); if (!ModTimeOrErr) @@ -96,6 +99,7 @@ Expected<NewArchiveMember> NewArchiveMember::getFile(StringRef FileName, NewArchiveMember M; M.IsNew = true; M.Buf = std::move(*MemberBufferOrErr); + M.MemberName = M.Buf->getBufferIdentifier(); if (!Deterministic) { M.ModTime = std::chrono::time_point_cast<std::chrono::seconds>( Status.getLastModificationTime()); @@ -122,12 +126,27 @@ static void printWithSpacePadding(raw_fd_ostream &OS, T Data, unsigned Size, } } +static bool isBSDLike(object::Archive::Kind Kind) { + switch (Kind) { + case object::Archive::K_GNU: + return false; + case object::Archive::K_BSD: + case object::Archive::K_DARWIN: + return true; + case object::Archive::K_MIPS64: + case object::Archive::K_DARWIN64: + case object::Archive::K_COFF: + break; + } + llvm_unreachable("not supported for writting"); +} + static void print32(raw_ostream &Out, object::Archive::Kind Kind, uint32_t Val) { - if (Kind == object::Archive::K_GNU) - support::endian::Writer<support::big>(Out).write(Val); - else + if (isBSDLike(Kind)) support::endian::Writer<support::little>(Out).write(Val); + else + support::endian::Writer<support::big>(Out).write(Val); } static void printRestOfMemberHeader( @@ -169,7 +188,7 @@ printBSDMemberHeader(raw_fd_ostream &Out, StringRef Name, } static bool useStringTable(bool Thin, StringRef Name) { - return Thin || Name.size() >= 16; + return Thin || Name.size() >= 16 || Name.contains('/'); } static void @@ -178,7 +197,7 @@ printMemberHeader(raw_fd_ostream &Out, object::Archive::Kind Kind, bool Thin, std::vector<unsigned>::iterator &StringMapIndexIter, const sys::TimePoint<std::chrono::seconds> &ModTime, unsigned UID, unsigned GID, unsigned Perms, unsigned Size) { - if (Kind == object::Archive::K_BSD) + if (isBSDLike(Kind)) return printBSDMemberHeader(Out, Name, ModTime, UID, GID, Perms, Size); if (!useStringTable(Thin, Name)) return printGNUSmallMemberHeader(Out, Name, ModTime, UID, GID, Perms, Size); @@ -223,7 +242,7 @@ static void writeStringTable(raw_fd_ostream &Out, StringRef ArcName, unsigned StartOffset = 0; for (const NewArchiveMember &M : Members) { StringRef Path = M.Buf->getBufferIdentifier(); - StringRef Name = sys::path::filename(Path); + StringRef Name = M.MemberName; if (!useStringTable(Thin, Name)) continue; if (StartOffset == 0) { @@ -275,7 +294,7 @@ writeSymbolTable(raw_fd_ostream &Out, object::Archive::Kind Kind, MemoryBufferRef MemberBuffer = Members[MemberNum].Buf->getMemBufferRef(); Expected<std::unique_ptr<object::SymbolicFile>> ObjOrErr = object::SymbolicFile::createSymbolicFile( - MemberBuffer, sys::fs::file_magic::unknown, &Context); + MemberBuffer, llvm::file_magic::unknown, &Context); if (!ObjOrErr) { // FIXME: check only for "not an object file" errors. consumeError(ObjOrErr.takeError()); @@ -285,10 +304,10 @@ writeSymbolTable(raw_fd_ostream &Out, object::Archive::Kind Kind, if (!HeaderStartOffset) { HeaderStartOffset = Out.tell(); - if (Kind == object::Archive::K_GNU) - printGNUSmallMemberHeader(Out, "", now(Deterministic), 0, 0, 0, 0); - else + if (isBSDLike(Kind)) printBSDMemberHeader(Out, "__.SYMDEF", now(Deterministic), 0, 0, 0, 0); + else + printGNUSmallMemberHeader(Out, "", now(Deterministic), 0, 0, 0, 0); BodyStartOffset = Out.tell(); print32(Out, Kind, 0); // number of entries or bytes } @@ -299,7 +318,8 @@ writeSymbolTable(raw_fd_ostream &Out, object::Archive::Kind Kind, continue; if (!(Symflags & object::SymbolRef::SF_Global)) continue; - if (Symflags & object::SymbolRef::SF_Undefined) + if (Symflags & object::SymbolRef::SF_Undefined && + !(Symflags & object::SymbolRef::SF_Indirect)) continue; unsigned NameOffset = NameOS.tell(); @@ -307,7 +327,7 @@ writeSymbolTable(raw_fd_ostream &Out, object::Archive::Kind Kind, return EC; NameOS << '\0'; MemberOffsetRefs.push_back(MemberNum); - if (Kind == object::Archive::K_BSD) + if (isBSDLike(Kind)) print32(Out, Kind, NameOffset); print32(Out, Kind, 0); // member offset } @@ -316,10 +336,21 @@ writeSymbolTable(raw_fd_ostream &Out, object::Archive::Kind Kind, if (HeaderStartOffset == 0) return 0; + // ld64 prefers the cctools type archive which pads its string table to a + // boundary of sizeof(int32_t). + if (isBSDLike(Kind)) + for (unsigned P = OffsetToAlignment(NameOS.tell(), sizeof(int32_t)); P--;) + NameOS << '\0'; + StringRef StringTable = NameOS.str(); - if (Kind == object::Archive::K_BSD) + if (isBSDLike(Kind)) print32(Out, Kind, StringTable.size()); // byte count of the string table Out << StringTable; + // If there are no symbols, emit an empty symbol table, to satisfy Solaris + // tools, older versions of which expect a symbol table in a non-empty + // archive, regardless of whether there are any symbols in it. + if (StringTable.size() == 0) + print32(Out, Kind, 0); // ld64 requires the next member header to start at an offset that is // 4 bytes aligned. @@ -336,10 +367,10 @@ writeSymbolTable(raw_fd_ostream &Out, object::Archive::Kind Kind, // Patch up the number of symbols. Out.seek(BodyStartOffset); unsigned NumSyms = MemberOffsetRefs.size(); - if (Kind == object::Archive::K_GNU) - print32(Out, Kind, NumSyms); - else + if (isBSDLike(Kind)) print32(Out, Kind, NumSyms * 8); + else + print32(Out, Kind, NumSyms); Out.seek(Pos); return BodyStartOffset + 4; @@ -351,8 +382,7 @@ llvm::writeArchive(StringRef ArcName, bool WriteSymtab, object::Archive::Kind Kind, bool Deterministic, bool Thin, std::unique_ptr<MemoryBuffer> OldArchiveBuf) { - assert((!Thin || Kind == object::Archive::K_GNU) && - "Only the gnu format has a thin mode"); + assert((!Thin || !isBSDLike(Kind)) && "Only the gnu format has a thin mode"); SmallString<128> TmpArchive; int TmpArchiveFD; if (auto EC = sys::fs::createUniqueFile(ArcName + ".temp-archive-%%%%%%%.a", @@ -368,10 +398,6 @@ llvm::writeArchive(StringRef ArcName, std::vector<unsigned> MemberOffsetRefs; - std::vector<std::unique_ptr<MemoryBuffer>> Buffers; - std::vector<MemoryBufferRef> Members; - std::vector<sys::fs::file_status> NewMemberStatus; - unsigned MemberReferenceOffset = 0; if (WriteSymtab) { ErrorOr<unsigned> MemberReferenceOffsetOrErr = writeSymbolTable( @@ -382,25 +408,34 @@ llvm::writeArchive(StringRef ArcName, } std::vector<unsigned> StringMapIndexes; - if (Kind != object::Archive::K_BSD) + if (!isBSDLike(Kind)) writeStringTable(Out, ArcName, NewMembers, StringMapIndexes, Thin); std::vector<unsigned>::iterator StringMapIndexIter = StringMapIndexes.begin(); std::vector<unsigned> MemberOffset; for (const NewArchiveMember &M : NewMembers) { MemoryBufferRef File = M.Buf->getMemBufferRef(); + unsigned Padding = 0; unsigned Pos = Out.tell(); MemberOffset.push_back(Pos); - printMemberHeader(Out, Kind, Thin, - sys::path::filename(M.Buf->getBufferIdentifier()), - StringMapIndexIter, M.ModTime, M.UID, M.GID, M.Perms, - M.Buf->getBufferSize()); + // ld64 expects the members to be 8-byte aligned for 64-bit content and at + // least 4-byte aligned for 32-bit content. Opt for the larger encoding + // uniformly. This matches the behaviour with cctools and ensures that ld64 + // is happy with archives that we generate. + if (Kind == object::Archive::K_DARWIN) + Padding = OffsetToAlignment(M.Buf->getBufferSize(), 8); + + printMemberHeader(Out, Kind, Thin, M.MemberName, StringMapIndexIter, + M.ModTime, M.UID, M.GID, M.Perms, + M.Buf->getBufferSize() + Padding); if (!Thin) Out << File.getBuffer(); + while (Padding--) + Out << '\n'; if (Out.tell() % 2) Out << '\n'; } @@ -408,7 +443,7 @@ llvm::writeArchive(StringRef ArcName, if (MemberReferenceOffset) { Out.seek(MemberReferenceOffset); for (unsigned MemberNum : MemberOffsetRefs) { - if (Kind == object::Archive::K_BSD) + if (isBSDLike(Kind)) Out.seek(Out.tell() + 4); // skip over the string offset print32(Out, Kind, MemberOffset[MemberNum]); } diff --git a/contrib/llvm/lib/Object/Binary.cpp b/contrib/llvm/lib/Object/Binary.cpp index 8467d34..c4565db 100644 --- a/contrib/llvm/lib/Object/Binary.cpp +++ b/contrib/llvm/lib/Object/Binary.cpp @@ -1,4 +1,4 @@ -//===- Binary.cpp - A generic binary file -----------------------*- C++ -*-===// +//===- Binary.cpp - A generic binary file ---------------------------------===// // // The LLVM Compiler Infrastructure // @@ -13,19 +13,25 @@ #include "llvm/Object/Binary.h" #include "llvm/ADT/StringRef.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/Path.h" - -// Include headers for createBinary. +#include "llvm/BinaryFormat/Magic.h" #include "llvm/Object/Archive.h" +#include "llvm/Object/Error.h" #include "llvm/Object/MachOUniversal.h" #include "llvm/Object/ObjectFile.h" +#include "llvm/Object/WindowsResource.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/MemoryBuffer.h" +#include <algorithm> +#include <memory> +#include <system_error> using namespace llvm; using namespace object; -Binary::~Binary() {} +Binary::~Binary() = default; Binary::Binary(unsigned int Type, MemoryBufferRef Source) : TypeID(Type), Data(Source) {} @@ -38,40 +44,41 @@ MemoryBufferRef Binary::getMemoryBufferRef() const { return Data; } Expected<std::unique_ptr<Binary>> object::createBinary(MemoryBufferRef Buffer, LLVMContext *Context) { - sys::fs::file_magic Type = sys::fs::identify_magic(Buffer.getBuffer()); + file_magic Type = identify_magic(Buffer.getBuffer()); switch (Type) { - case sys::fs::file_magic::archive: - return Archive::create(Buffer); - case sys::fs::file_magic::elf: - case sys::fs::file_magic::elf_relocatable: - case sys::fs::file_magic::elf_executable: - case sys::fs::file_magic::elf_shared_object: - case sys::fs::file_magic::elf_core: - case sys::fs::file_magic::macho_object: - case sys::fs::file_magic::macho_executable: - case sys::fs::file_magic::macho_fixed_virtual_memory_shared_lib: - case sys::fs::file_magic::macho_core: - case sys::fs::file_magic::macho_preload_executable: - case sys::fs::file_magic::macho_dynamically_linked_shared_lib: - case sys::fs::file_magic::macho_dynamic_linker: - case sys::fs::file_magic::macho_bundle: - case sys::fs::file_magic::macho_dynamically_linked_shared_lib_stub: - case sys::fs::file_magic::macho_dsym_companion: - case sys::fs::file_magic::macho_kext_bundle: - case sys::fs::file_magic::coff_object: - case sys::fs::file_magic::coff_import_library: - case sys::fs::file_magic::pecoff_executable: - case sys::fs::file_magic::bitcode: - case sys::fs::file_magic::wasm_object: - return ObjectFile::createSymbolicFile(Buffer, Type, Context); - case sys::fs::file_magic::macho_universal_binary: - return MachOUniversalBinary::create(Buffer); - case sys::fs::file_magic::unknown: - case sys::fs::file_magic::coff_cl_gl_object: - case sys::fs::file_magic::windows_resource: - // Unrecognized object file format. - return errorCodeToError(object_error::invalid_file_type); + case file_magic::archive: + return Archive::create(Buffer); + case file_magic::elf: + case file_magic::elf_relocatable: + case file_magic::elf_executable: + case file_magic::elf_shared_object: + case file_magic::elf_core: + case file_magic::macho_object: + case file_magic::macho_executable: + case file_magic::macho_fixed_virtual_memory_shared_lib: + case file_magic::macho_core: + case file_magic::macho_preload_executable: + case file_magic::macho_dynamically_linked_shared_lib: + case file_magic::macho_dynamic_linker: + case file_magic::macho_bundle: + case file_magic::macho_dynamically_linked_shared_lib_stub: + case file_magic::macho_dsym_companion: + case file_magic::macho_kext_bundle: + case file_magic::coff_object: + case file_magic::coff_import_library: + case file_magic::pecoff_executable: + case file_magic::bitcode: + case file_magic::wasm_object: + return ObjectFile::createSymbolicFile(Buffer, Type, Context); + case file_magic::macho_universal_binary: + return MachOUniversalBinary::create(Buffer); + case file_magic::windows_resource: + return WindowsResource::createWindowsResource(Buffer); + case file_magic::unknown: + case file_magic::coff_cl_gl_object: + // Unrecognized object file format. + return errorCodeToError(object_error::invalid_file_type); } llvm_unreachable("Unexpected Binary File Type"); } diff --git a/contrib/llvm/lib/Object/COFFImportFile.cpp b/contrib/llvm/lib/Object/COFFImportFile.cpp new file mode 100644 index 0000000..ff03946 --- /dev/null +++ b/contrib/llvm/lib/Object/COFFImportFile.cpp @@ -0,0 +1,612 @@ +//===- COFFImportFile.cpp - COFF short import file implementation ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the writeImportLibrary function. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/COFFImportFile.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/Object/Archive.h" +#include "llvm/Object/ArchiveWriter.h" +#include "llvm/Object/COFF.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/Path.h" + +#include <cstdint> +#include <map> +#include <set> +#include <string> +#include <vector> + +using namespace llvm::COFF; +using namespace llvm::object; +using namespace llvm; + +namespace llvm { +namespace object { + +static bool is32bit(MachineTypes Machine) { + switch (Machine) { + default: + llvm_unreachable("unsupported machine"); + case IMAGE_FILE_MACHINE_AMD64: + return false; + case IMAGE_FILE_MACHINE_ARMNT: + case IMAGE_FILE_MACHINE_I386: + return true; + } +} + +static uint16_t getImgRelRelocation(MachineTypes Machine) { + switch (Machine) { + default: + llvm_unreachable("unsupported machine"); + case IMAGE_FILE_MACHINE_AMD64: + return IMAGE_REL_AMD64_ADDR32NB; + case IMAGE_FILE_MACHINE_ARMNT: + return IMAGE_REL_ARM_ADDR32NB; + case IMAGE_FILE_MACHINE_I386: + return IMAGE_REL_I386_DIR32NB; + } +} + +template <class T> static void append(std::vector<uint8_t> &B, const T &Data) { + size_t S = B.size(); + B.resize(S + sizeof(T)); + memcpy(&B[S], &Data, sizeof(T)); +} + +static void writeStringTable(std::vector<uint8_t> &B, + ArrayRef<const std::string> Strings) { + // The COFF string table consists of a 4-byte value which is the size of the + // table, including the length field itself. This value is followed by the + // string content itself, which is an array of null-terminated C-style + // strings. The termination is important as they are referenced to by offset + // by the symbol entity in the file format. + + size_t Pos = B.size(); + size_t Offset = B.size(); + + // Skip over the length field, we will fill it in later as we will have + // computed the length while emitting the string content itself. + Pos += sizeof(uint32_t); + + for (const auto &S : Strings) { + B.resize(Pos + S.length() + 1); + strcpy(reinterpret_cast<char *>(&B[Pos]), S.c_str()); + Pos += S.length() + 1; + } + + // Backfill the length of the table now that it has been computed. + support::ulittle32_t Length(B.size() - Offset); + support::endian::write32le(&B[Offset], Length); +} + +static ImportNameType getNameType(StringRef Sym, StringRef ExtName, + MachineTypes Machine) { + if (Sym != ExtName) + return IMPORT_NAME_UNDECORATE; + if (Machine == IMAGE_FILE_MACHINE_I386 && Sym.startswith("_")) + return IMPORT_NAME_NOPREFIX; + return IMPORT_NAME; +} + +static Expected<std::string> replace(StringRef S, StringRef From, + StringRef To) { + size_t Pos = S.find(From); + + // From and To may be mangled, but substrings in S may not. + if (Pos == StringRef::npos && From.startswith("_") && To.startswith("_")) { + From = From.substr(1); + To = To.substr(1); + Pos = S.find(From); + } + + if (Pos == StringRef::npos) { + return make_error<StringError>( + StringRef(Twine(S + ": replacing '" + From + + "' with '" + To + "' failed").str()), object_error::parse_failed); + } + + return (Twine(S.substr(0, Pos)) + To + S.substr(Pos + From.size())).str(); +} + +static const std::string NullImportDescriptorSymbolName = + "__NULL_IMPORT_DESCRIPTOR"; + +namespace { +// This class constructs various small object files necessary to support linking +// symbols imported from a DLL. The contents are pretty strictly defined and +// nearly entirely static. The details of the structures files are defined in +// WINNT.h and the PE/COFF specification. +class ObjectFactory { + using u16 = support::ulittle16_t; + using u32 = support::ulittle32_t; + MachineTypes Machine; + BumpPtrAllocator Alloc; + StringRef ImportName; + StringRef Library; + std::string ImportDescriptorSymbolName; + std::string NullThunkSymbolName; + +public: + ObjectFactory(StringRef S, MachineTypes M) + : Machine(M), ImportName(S), Library(S.drop_back(4)), + ImportDescriptorSymbolName(("__IMPORT_DESCRIPTOR_" + Library).str()), + NullThunkSymbolName(("\x7f" + Library + "_NULL_THUNK_DATA").str()) {} + + // Creates an Import Descriptor. This is a small object file which contains a + // reference to the terminators and contains the library name (entry) for the + // import name table. It will force the linker to construct the necessary + // structure to import symbols from the DLL. + NewArchiveMember createImportDescriptor(std::vector<uint8_t> &Buffer); + + // Creates a NULL import descriptor. This is a small object file whcih + // contains a NULL import descriptor. It is used to terminate the imports + // from a specific DLL. + NewArchiveMember createNullImportDescriptor(std::vector<uint8_t> &Buffer); + + // Create a NULL Thunk Entry. This is a small object file which contains a + // NULL Import Address Table entry and a NULL Import Lookup Table Entry. It + // is used to terminate the IAT and ILT. + NewArchiveMember createNullThunk(std::vector<uint8_t> &Buffer); + + // Create a short import file which is described in PE/COFF spec 7. Import + // Library Format. + NewArchiveMember createShortImport(StringRef Sym, uint16_t Ordinal, + ImportType Type, ImportNameType NameType); + + // Create a weak external file which is described in PE/COFF Aux Format 3. + NewArchiveMember createWeakExternal(StringRef Sym, StringRef Weak, bool Imp); +}; +} // namespace + +NewArchiveMember +ObjectFactory::createImportDescriptor(std::vector<uint8_t> &Buffer) { + const uint32_t NumberOfSections = 2; + const uint32_t NumberOfSymbols = 7; + const uint32_t NumberOfRelocations = 3; + + // COFF Header + coff_file_header Header{ + u16(Machine), + u16(NumberOfSections), + u32(0), + u32(sizeof(Header) + (NumberOfSections * sizeof(coff_section)) + + // .idata$2 + sizeof(coff_import_directory_table_entry) + + NumberOfRelocations * sizeof(coff_relocation) + + // .idata$4 + (ImportName.size() + 1)), + u32(NumberOfSymbols), + u16(0), + u16(is32bit(Machine) ? IMAGE_FILE_32BIT_MACHINE : 0), + }; + append(Buffer, Header); + + // Section Header Table + const coff_section SectionTable[NumberOfSections] = { + {{'.', 'i', 'd', 'a', 't', 'a', '$', '2'}, + u32(0), + u32(0), + u32(sizeof(coff_import_directory_table_entry)), + u32(sizeof(coff_file_header) + NumberOfSections * sizeof(coff_section)), + u32(sizeof(coff_file_header) + NumberOfSections * sizeof(coff_section) + + sizeof(coff_import_directory_table_entry)), + u32(0), + u16(NumberOfRelocations), + u16(0), + u32(IMAGE_SCN_ALIGN_4BYTES | IMAGE_SCN_CNT_INITIALIZED_DATA | + IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE)}, + {{'.', 'i', 'd', 'a', 't', 'a', '$', '6'}, + u32(0), + u32(0), + u32(ImportName.size() + 1), + u32(sizeof(coff_file_header) + NumberOfSections * sizeof(coff_section) + + sizeof(coff_import_directory_table_entry) + + NumberOfRelocations * sizeof(coff_relocation)), + u32(0), + u32(0), + u16(0), + u16(0), + u32(IMAGE_SCN_ALIGN_2BYTES | IMAGE_SCN_CNT_INITIALIZED_DATA | + IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE)}, + }; + append(Buffer, SectionTable); + + // .idata$2 + const coff_import_directory_table_entry ImportDescriptor{ + u32(0), u32(0), u32(0), u32(0), u32(0), + }; + append(Buffer, ImportDescriptor); + + const coff_relocation RelocationTable[NumberOfRelocations] = { + {u32(offsetof(coff_import_directory_table_entry, NameRVA)), u32(2), + u16(getImgRelRelocation(Machine))}, + {u32(offsetof(coff_import_directory_table_entry, ImportLookupTableRVA)), + u32(3), u16(getImgRelRelocation(Machine))}, + {u32(offsetof(coff_import_directory_table_entry, ImportAddressTableRVA)), + u32(4), u16(getImgRelRelocation(Machine))}, + }; + append(Buffer, RelocationTable); + + // .idata$6 + auto S = Buffer.size(); + Buffer.resize(S + ImportName.size() + 1); + memcpy(&Buffer[S], ImportName.data(), ImportName.size()); + Buffer[S + ImportName.size()] = '\0'; + + // Symbol Table + coff_symbol16 SymbolTable[NumberOfSymbols] = { + {{{0, 0, 0, 0, 0, 0, 0, 0}}, + u32(0), + u16(1), + u16(0), + IMAGE_SYM_CLASS_EXTERNAL, + 0}, + {{{'.', 'i', 'd', 'a', 't', 'a', '$', '2'}}, + u32(0), + u16(1), + u16(0), + IMAGE_SYM_CLASS_SECTION, + 0}, + {{{'.', 'i', 'd', 'a', 't', 'a', '$', '6'}}, + u32(0), + u16(2), + u16(0), + IMAGE_SYM_CLASS_STATIC, + 0}, + {{{'.', 'i', 'd', 'a', 't', 'a', '$', '4'}}, + u32(0), + u16(0), + u16(0), + IMAGE_SYM_CLASS_SECTION, + 0}, + {{{'.', 'i', 'd', 'a', 't', 'a', '$', '5'}}, + u32(0), + u16(0), + u16(0), + IMAGE_SYM_CLASS_SECTION, + 0}, + {{{0, 0, 0, 0, 0, 0, 0, 0}}, + u32(0), + u16(0), + u16(0), + IMAGE_SYM_CLASS_EXTERNAL, + 0}, + {{{0, 0, 0, 0, 0, 0, 0, 0}}, + u32(0), + u16(0), + u16(0), + IMAGE_SYM_CLASS_EXTERNAL, + 0}, + }; + // TODO: Name.Offset.Offset here and in the all similar places below + // suggests a names refactoring. Maybe StringTableOffset.Value? + SymbolTable[0].Name.Offset.Offset = + sizeof(uint32_t); + SymbolTable[5].Name.Offset.Offset = + sizeof(uint32_t) + ImportDescriptorSymbolName.length() + 1; + SymbolTable[6].Name.Offset.Offset = + sizeof(uint32_t) + ImportDescriptorSymbolName.length() + 1 + + NullImportDescriptorSymbolName.length() + 1; + append(Buffer, SymbolTable); + + // String Table + writeStringTable(Buffer, + {ImportDescriptorSymbolName, NullImportDescriptorSymbolName, + NullThunkSymbolName}); + + StringRef F{reinterpret_cast<const char *>(Buffer.data()), Buffer.size()}; + return {MemoryBufferRef(F, ImportName)}; +} + +NewArchiveMember +ObjectFactory::createNullImportDescriptor(std::vector<uint8_t> &Buffer) { + const uint32_t NumberOfSections = 1; + const uint32_t NumberOfSymbols = 1; + + // COFF Header + coff_file_header Header{ + u16(Machine), + u16(NumberOfSections), + u32(0), + u32(sizeof(Header) + (NumberOfSections * sizeof(coff_section)) + + // .idata$3 + sizeof(coff_import_directory_table_entry)), + u32(NumberOfSymbols), + u16(0), + u16(is32bit(Machine) ? IMAGE_FILE_32BIT_MACHINE : 0), + }; + append(Buffer, Header); + + // Section Header Table + const coff_section SectionTable[NumberOfSections] = { + {{'.', 'i', 'd', 'a', 't', 'a', '$', '3'}, + u32(0), + u32(0), + u32(sizeof(coff_import_directory_table_entry)), + u32(sizeof(coff_file_header) + + (NumberOfSections * sizeof(coff_section))), + u32(0), + u32(0), + u16(0), + u16(0), + u32(IMAGE_SCN_ALIGN_4BYTES | IMAGE_SCN_CNT_INITIALIZED_DATA | + IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE)}, + }; + append(Buffer, SectionTable); + + // .idata$3 + const coff_import_directory_table_entry ImportDescriptor{ + u32(0), u32(0), u32(0), u32(0), u32(0), + }; + append(Buffer, ImportDescriptor); + + // Symbol Table + coff_symbol16 SymbolTable[NumberOfSymbols] = { + {{{0, 0, 0, 0, 0, 0, 0, 0}}, + u32(0), + u16(1), + u16(0), + IMAGE_SYM_CLASS_EXTERNAL, + 0}, + }; + SymbolTable[0].Name.Offset.Offset = sizeof(uint32_t); + append(Buffer, SymbolTable); + + // String Table + writeStringTable(Buffer, {NullImportDescriptorSymbolName}); + + StringRef F{reinterpret_cast<const char *>(Buffer.data()), Buffer.size()}; + return {MemoryBufferRef(F, ImportName)}; +} + +NewArchiveMember ObjectFactory::createNullThunk(std::vector<uint8_t> &Buffer) { + const uint32_t NumberOfSections = 2; + const uint32_t NumberOfSymbols = 1; + uint32_t VASize = is32bit(Machine) ? 4 : 8; + + // COFF Header + coff_file_header Header{ + u16(Machine), + u16(NumberOfSections), + u32(0), + u32(sizeof(Header) + (NumberOfSections * sizeof(coff_section)) + + // .idata$5 + VASize + + // .idata$4 + VASize), + u32(NumberOfSymbols), + u16(0), + u16(is32bit(Machine) ? IMAGE_FILE_32BIT_MACHINE : 0), + }; + append(Buffer, Header); + + // Section Header Table + const coff_section SectionTable[NumberOfSections] = { + {{'.', 'i', 'd', 'a', 't', 'a', '$', '5'}, + u32(0), + u32(0), + u32(VASize), + u32(sizeof(coff_file_header) + NumberOfSections * sizeof(coff_section)), + u32(0), + u32(0), + u16(0), + u16(0), + u32((is32bit(Machine) ? IMAGE_SCN_ALIGN_4BYTES + : IMAGE_SCN_ALIGN_8BYTES) | + IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ | + IMAGE_SCN_MEM_WRITE)}, + {{'.', 'i', 'd', 'a', 't', 'a', '$', '4'}, + u32(0), + u32(0), + u32(VASize), + u32(sizeof(coff_file_header) + NumberOfSections * sizeof(coff_section) + + VASize), + u32(0), + u32(0), + u16(0), + u16(0), + u32((is32bit(Machine) ? IMAGE_SCN_ALIGN_4BYTES + : IMAGE_SCN_ALIGN_8BYTES) | + IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ | + IMAGE_SCN_MEM_WRITE)}, + }; + append(Buffer, SectionTable); + + // .idata$5, ILT + append(Buffer, u32(0)); + if (!is32bit(Machine)) + append(Buffer, u32(0)); + + // .idata$4, IAT + append(Buffer, u32(0)); + if (!is32bit(Machine)) + append(Buffer, u32(0)); + + // Symbol Table + coff_symbol16 SymbolTable[NumberOfSymbols] = { + {{{0, 0, 0, 0, 0, 0, 0, 0}}, + u32(0), + u16(1), + u16(0), + IMAGE_SYM_CLASS_EXTERNAL, + 0}, + }; + SymbolTable[0].Name.Offset.Offset = sizeof(uint32_t); + append(Buffer, SymbolTable); + + // String Table + writeStringTable(Buffer, {NullThunkSymbolName}); + + StringRef F{reinterpret_cast<const char *>(Buffer.data()), Buffer.size()}; + return {MemoryBufferRef{F, ImportName}}; +} + +NewArchiveMember ObjectFactory::createShortImport(StringRef Sym, + uint16_t Ordinal, + ImportType ImportType, + ImportNameType NameType) { + size_t ImpSize = ImportName.size() + Sym.size() + 2; // +2 for NULs + size_t Size = sizeof(coff_import_header) + ImpSize; + char *Buf = Alloc.Allocate<char>(Size); + memset(Buf, 0, Size); + char *P = Buf; + + // Write short import library. + auto *Imp = reinterpret_cast<coff_import_header *>(P); + P += sizeof(*Imp); + Imp->Sig2 = 0xFFFF; + Imp->Machine = Machine; + Imp->SizeOfData = ImpSize; + if (Ordinal > 0) + Imp->OrdinalHint = Ordinal; + Imp->TypeInfo = (NameType << 2) | ImportType; + + // Write symbol name and DLL name. + memcpy(P, Sym.data(), Sym.size()); + P += Sym.size() + 1; + memcpy(P, ImportName.data(), ImportName.size()); + + return {MemoryBufferRef(StringRef(Buf, Size), ImportName)}; +} + +NewArchiveMember ObjectFactory::createWeakExternal(StringRef Sym, + StringRef Weak, bool Imp) { + std::vector<uint8_t> Buffer; + const uint32_t NumberOfSections = 1; + const uint32_t NumberOfSymbols = 5; + + // COFF Header + coff_file_header Header{ + u16(0), + u16(NumberOfSections), + u32(0), + u32(sizeof(Header) + (NumberOfSections * sizeof(coff_section))), + u32(NumberOfSymbols), + u16(0), + u16(0), + }; + append(Buffer, Header); + + // Section Header Table + const coff_section SectionTable[NumberOfSections] = { + {{'.', 'd', 'r', 'e', 'c', 't', 'v', 'e'}, + u32(0), + u32(0), + u32(0), + u32(0), + u32(0), + u32(0), + u16(0), + u16(0), + u32(IMAGE_SCN_LNK_INFO | IMAGE_SCN_LNK_REMOVE)}}; + append(Buffer, SectionTable); + + // Symbol Table + coff_symbol16 SymbolTable[NumberOfSymbols] = { + {{{'@', 'c', 'o', 'm', 'p', '.', 'i', 'd'}}, + u32(0), + u16(0xFFFF), + u16(0), + IMAGE_SYM_CLASS_STATIC, + 0}, + {{{'@', 'f', 'e', 'a', 't', '.', '0', '0'}}, + u32(0), + u16(0xFFFF), + u16(0), + IMAGE_SYM_CLASS_STATIC, + 0}, + {{{0, 0, 0, 0, 0, 0, 0, 0}}, + u32(0), + u16(0), + u16(0), + IMAGE_SYM_CLASS_EXTERNAL, + 0}, + {{{0, 0, 0, 0, 0, 0, 0, 0}}, + u32(0), + u16(0), + u16(0), + IMAGE_SYM_CLASS_WEAK_EXTERNAL, + 1}, + {{{2, 0, 0, 0, 3, 0, 0, 0}}, u32(0), u16(0), u16(0), uint8_t(0), 0}, + }; + SymbolTable[2].Name.Offset.Offset = sizeof(uint32_t); + + //__imp_ String Table + StringRef Prefix = Imp ? "__imp_" : ""; + SymbolTable[3].Name.Offset.Offset = + sizeof(uint32_t) + Sym.size() + Prefix.size() + 1; + append(Buffer, SymbolTable); + writeStringTable(Buffer, {(Prefix + Sym).str(), + (Prefix + Weak).str()}); + + // Copied here so we can still use writeStringTable + char *Buf = Alloc.Allocate<char>(Buffer.size()); + memcpy(Buf, Buffer.data(), Buffer.size()); + return {MemoryBufferRef(StringRef(Buf, Buffer.size()), ImportName)}; +} + +std::error_code writeImportLibrary(StringRef ImportName, StringRef Path, + ArrayRef<COFFShortExport> Exports, + MachineTypes Machine, bool MakeWeakAliases) { + + std::vector<NewArchiveMember> Members; + ObjectFactory OF(llvm::sys::path::filename(ImportName), Machine); + + std::vector<uint8_t> ImportDescriptor; + Members.push_back(OF.createImportDescriptor(ImportDescriptor)); + + std::vector<uint8_t> NullImportDescriptor; + Members.push_back(OF.createNullImportDescriptor(NullImportDescriptor)); + + std::vector<uint8_t> NullThunk; + Members.push_back(OF.createNullThunk(NullThunk)); + + for (COFFShortExport E : Exports) { + if (E.Private) + continue; + + if (E.isWeak() && MakeWeakAliases) { + Members.push_back(OF.createWeakExternal(E.Name, E.ExtName, false)); + Members.push_back(OF.createWeakExternal(E.Name, E.ExtName, true)); + continue; + } + + ImportType ImportType = IMPORT_CODE; + if (E.Data) + ImportType = IMPORT_DATA; + if (E.Constant) + ImportType = IMPORT_CONST; + + StringRef SymbolName = E.SymbolName.empty() ? E.Name : E.SymbolName; + ImportNameType NameType = getNameType(SymbolName, E.Name, Machine); + Expected<std::string> Name = E.ExtName.empty() + ? SymbolName + : replace(SymbolName, E.Name, E.ExtName); + + if (!Name) { + return errorToErrorCode(Name.takeError()); + } + + Members.push_back( + OF.createShortImport(*Name, E.Ordinal, ImportType, NameType)); + } + + std::pair<StringRef, std::error_code> Result = + writeArchive(Path, Members, /*WriteSymtab*/ true, object::Archive::K_GNU, + /*Deterministic*/ true, /*Thin*/ false); + + return Result.second; +} + +} // namespace object +} // namespace llvm diff --git a/contrib/llvm/lib/Object/COFFModuleDefinition.cpp b/contrib/llvm/lib/Object/COFFModuleDefinition.cpp new file mode 100644 index 0000000..510eac8 --- /dev/null +++ b/contrib/llvm/lib/Object/COFFModuleDefinition.cpp @@ -0,0 +1,337 @@ +//===--- COFFModuleDefinition.cpp - Simple DEF parser ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Windows-specific. +// A parser for the module-definition file (.def file). +// +// The format of module-definition files are described in this document: +// https://msdn.microsoft.com/en-us/library/28d6s79h.aspx +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/COFFModuleDefinition.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Object/COFF.h" +#include "llvm/Object/COFFImportFile.h" +#include "llvm/Object/Error.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm::COFF; +using namespace llvm; + +namespace llvm { +namespace object { + +enum Kind { + Unknown, + Eof, + Identifier, + Comma, + Equal, + KwBase, + KwConstant, + KwData, + KwExports, + KwHeapsize, + KwLibrary, + KwName, + KwNoname, + KwPrivate, + KwStacksize, + KwVersion, +}; + +struct Token { + explicit Token(Kind T = Unknown, StringRef S = "") : K(T), Value(S) {} + Kind K; + StringRef Value; +}; + +static bool isDecorated(StringRef Sym, bool MingwDef) { + // mingw does not prepend "_". + return (!MingwDef && Sym.startswith("_")) || Sym.startswith("@") || + Sym.startswith("?"); +} + +static Error createError(const Twine &Err) { + return make_error<StringError>(StringRef(Err.str()), + object_error::parse_failed); +} + +class Lexer { +public: + Lexer(StringRef S) : Buf(S) {} + + Token lex() { + Buf = Buf.trim(); + if (Buf.empty()) + return Token(Eof); + + switch (Buf[0]) { + case '\0': + return Token(Eof); + case ';': { + size_t End = Buf.find('\n'); + Buf = (End == Buf.npos) ? "" : Buf.drop_front(End); + return lex(); + } + case '=': + Buf = Buf.drop_front(); + // GNU dlltool accepts both = and ==. + if (Buf.startswith("=")) + Buf = Buf.drop_front(); + return Token(Equal, "="); + case ',': + Buf = Buf.drop_front(); + return Token(Comma, ","); + case '"': { + StringRef S; + std::tie(S, Buf) = Buf.substr(1).split('"'); + return Token(Identifier, S); + } + default: { + size_t End = Buf.find_first_of("=,\r\n \t\v"); + StringRef Word = Buf.substr(0, End); + Kind K = llvm::StringSwitch<Kind>(Word) + .Case("BASE", KwBase) + .Case("CONSTANT", KwConstant) + .Case("DATA", KwData) + .Case("EXPORTS", KwExports) + .Case("HEAPSIZE", KwHeapsize) + .Case("LIBRARY", KwLibrary) + .Case("NAME", KwName) + .Case("NONAME", KwNoname) + .Case("PRIVATE", KwPrivate) + .Case("STACKSIZE", KwStacksize) + .Case("VERSION", KwVersion) + .Default(Identifier); + Buf = (End == Buf.npos) ? "" : Buf.drop_front(End); + return Token(K, Word); + } + } + } + +private: + StringRef Buf; +}; + +class Parser { +public: + explicit Parser(StringRef S, MachineTypes M, bool B) + : Lex(S), Machine(M), MingwDef(B) {} + + Expected<COFFModuleDefinition> parse() { + do { + if (Error Err = parseOne()) + return std::move(Err); + } while (Tok.K != Eof); + return Info; + } + +private: + void read() { + if (Stack.empty()) { + Tok = Lex.lex(); + return; + } + Tok = Stack.back(); + Stack.pop_back(); + } + + Error readAsInt(uint64_t *I) { + read(); + if (Tok.K != Identifier || Tok.Value.getAsInteger(10, *I)) + return createError("integer expected"); + return Error::success(); + } + + Error expect(Kind Expected, StringRef Msg) { + read(); + if (Tok.K != Expected) + return createError(Msg); + return Error::success(); + } + + void unget() { Stack.push_back(Tok); } + + Error parseOne() { + read(); + switch (Tok.K) { + case Eof: + return Error::success(); + case KwExports: + for (;;) { + read(); + if (Tok.K != Identifier) { + unget(); + return Error::success(); + } + if (Error Err = parseExport()) + return Err; + } + case KwHeapsize: + return parseNumbers(&Info.HeapReserve, &Info.HeapCommit); + case KwStacksize: + return parseNumbers(&Info.StackReserve, &Info.StackCommit); + case KwLibrary: + case KwName: { + bool IsDll = Tok.K == KwLibrary; // Check before parseName. + std::string Name; + if (Error Err = parseName(&Name, &Info.ImageBase)) + return Err; + + Info.ImportName = Name; + + // Set the output file, but don't override /out if it was already passed. + if (Info.OutputFile.empty()) { + Info.OutputFile = Name; + // Append the appropriate file extension if not already present. + if (!sys::path::has_extension(Name)) + Info.OutputFile += IsDll ? ".dll" : ".exe"; + } + + return Error::success(); + } + case KwVersion: + return parseVersion(&Info.MajorImageVersion, &Info.MinorImageVersion); + default: + return createError("unknown directive: " + Tok.Value); + } + } + + Error parseExport() { + COFFShortExport E; + E.Name = Tok.Value; + read(); + if (Tok.K == Equal) { + read(); + if (Tok.K != Identifier) + return createError("identifier expected, but got " + Tok.Value); + E.ExtName = E.Name; + E.Name = Tok.Value; + } else { + unget(); + } + + if (Machine == IMAGE_FILE_MACHINE_I386) { + if (!isDecorated(E.Name, MingwDef)) + E.Name = (std::string("_").append(E.Name)); + if (!E.ExtName.empty() && !isDecorated(E.ExtName, MingwDef)) + E.ExtName = (std::string("_").append(E.ExtName)); + } + + for (;;) { + read(); + if (Tok.K == Identifier && Tok.Value[0] == '@') { + if (Tok.Value.drop_front().getAsInteger(10, E.Ordinal)) { + // Not an ordinal modifier at all, but the next export (fastcall + // decorated) - complete the current one. + unget(); + Info.Exports.push_back(E); + return Error::success(); + } + read(); + if (Tok.K == KwNoname) { + E.Noname = true; + } else { + unget(); + } + continue; + } + if (Tok.K == KwData) { + E.Data = true; + continue; + } + if (Tok.K == KwConstant) { + E.Constant = true; + continue; + } + if (Tok.K == KwPrivate) { + E.Private = true; + continue; + } + unget(); + Info.Exports.push_back(E); + return Error::success(); + } + } + + // HEAPSIZE/STACKSIZE reserve[,commit] + Error parseNumbers(uint64_t *Reserve, uint64_t *Commit) { + if (Error Err = readAsInt(Reserve)) + return Err; + read(); + if (Tok.K != Comma) { + unget(); + Commit = nullptr; + return Error::success(); + } + if (Error Err = readAsInt(Commit)) + return Err; + return Error::success(); + } + + // NAME outputPath [BASE=address] + Error parseName(std::string *Out, uint64_t *Baseaddr) { + read(); + if (Tok.K == Identifier) { + *Out = Tok.Value; + } else { + *Out = ""; + unget(); + return Error::success(); + } + read(); + if (Tok.K == KwBase) { + if (Error Err = expect(Equal, "'=' expected")) + return Err; + if (Error Err = readAsInt(Baseaddr)) + return Err; + } else { + unget(); + *Baseaddr = 0; + } + return Error::success(); + } + + // VERSION major[.minor] + Error parseVersion(uint32_t *Major, uint32_t *Minor) { + read(); + if (Tok.K != Identifier) + return createError("identifier expected, but got " + Tok.Value); + StringRef V1, V2; + std::tie(V1, V2) = Tok.Value.split('.'); + if (V1.getAsInteger(10, *Major)) + return createError("integer expected, but got " + Tok.Value); + if (V2.empty()) + *Minor = 0; + else if (V2.getAsInteger(10, *Minor)) + return createError("integer expected, but got " + Tok.Value); + return Error::success(); + } + + Lexer Lex; + Token Tok; + std::vector<Token> Stack; + MachineTypes Machine; + COFFModuleDefinition Info; + bool MingwDef; +}; + +Expected<COFFModuleDefinition> parseCOFFModuleDefinition(MemoryBufferRef MB, + MachineTypes Machine, + bool MingwDef) { + return Parser(MB.getBuffer(), Machine, MingwDef).parse(); +} + +} // namespace object +} // namespace llvm diff --git a/contrib/llvm/lib/Object/COFFObjectFile.cpp b/contrib/llvm/lib/Object/COFFObjectFile.cpp index a2d8f12..0a20534 100644 --- a/contrib/llvm/lib/Object/COFFObjectFile.cpp +++ b/contrib/llvm/lib/Object/COFFObjectFile.cpp @@ -1,4 +1,4 @@ -//===- COFFObjectFile.cpp - COFF object file implementation -----*- C++ -*-===// +//===- COFFObjectFile.cpp - COFF object file implementation ---------------===// // // The LLVM Compiler Infrastructure // @@ -11,16 +11,29 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Object/COFF.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/iterator_range.h" -#include "llvm/Support/COFF.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include <cctype> +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/Object/Binary.h" +#include "llvm/Object/COFF.h" +#include "llvm/Object/Error.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/BinaryStreamReader.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/MemoryBuffer.h" +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <cstring> #include <limits> +#include <memory> +#include <system_error> using namespace llvm; using namespace object; @@ -116,7 +129,7 @@ const coff_symbol_type *COFFObjectFile::toSymb(DataRefImpl Ref) const { const coff_section *COFFObjectFile::toSec(DataRefImpl Ref) const { const coff_section *Addr = reinterpret_cast<const coff_section*>(Ref.p); -# ifndef NDEBUG +#ifndef NDEBUG // Verify that the section points to a valid entry in the section table. if (Addr < SectionTable || Addr >= (SectionTable + getNumberOfSections())) report_fatal_error("Section was outside of section table."); @@ -124,7 +137,7 @@ const coff_section *COFFObjectFile::toSec(DataRefImpl Ref) const { uintptr_t Offset = uintptr_t(Addr) - uintptr_t(SectionTable); assert(Offset % sizeof(coff_section) == 0 && "Section did not point to the beginning of a section"); -# endif +#endif return Addr; } @@ -147,8 +160,7 @@ void COFFObjectFile::moveSymbolNext(DataRefImpl &Ref) const { Expected<StringRef> COFFObjectFile::getSymbolName(DataRefImpl Ref) const { COFFSymbolRef Symb = getCOFFSymbol(Ref); StringRef Result; - std::error_code EC = getSymbolName(Symb, Result); - if (EC) + if (std::error_code EC = getSymbolName(Symb, Result)) return errorCodeToError(EC); return Result; } @@ -215,8 +227,11 @@ uint32_t COFFObjectFile::getSymbolFlags(DataRefImpl Ref) const { if (Symb.isExternal() || Symb.isWeakExternal()) Result |= SymbolRef::SF_Global; - if (Symb.isWeakExternal()) + if (Symb.isWeakExternal()) { Result |= SymbolRef::SF_Weak; + // We use indirect to allow the archiver to write weak externs + Result |= SymbolRef::SF_Indirect; + } if (Symb.getSectionNumber() == COFF::IMAGE_SYM_ABSOLUTE) Result |= SymbolRef::SF_Absolute; @@ -281,6 +296,10 @@ uint64_t COFFObjectFile::getSectionAddress(DataRefImpl Ref) const { return Result; } +uint64_t COFFObjectFile::getSectionIndex(DataRefImpl Sec) const { + return toSec(Sec) - SectionTable; +} + uint64_t COFFObjectFile::getSectionSize(DataRefImpl Ref) const { return getSectionSize(toSec(Ref)); } @@ -634,6 +653,23 @@ std::error_code COFFObjectFile::initDebugDirectoryPtr() { return std::error_code(); } +std::error_code COFFObjectFile::initLoadConfigPtr() { + // Get the RVA of the debug directory. Do nothing if it does not exist. + const data_directory *DataEntry; + if (getDataDirectory(COFF::LOAD_CONFIG_TABLE, DataEntry)) + return std::error_code(); + + // Do nothing if the RVA is NULL. + if (DataEntry->RelativeVirtualAddress == 0) + return std::error_code(); + uintptr_t IntPtr = 0; + if (std::error_code EC = getRvaPtr(DataEntry->RelativeVirtualAddress, IntPtr)) + return EC; + + LoadConfig = (const void *)IntPtr; + return std::error_code(); +} + COFFObjectFile::COFFObjectFile(MemoryBufferRef Object, std::error_code &EC) : ObjectFile(Binary::ID_COFF, Object), COFFHeader(nullptr), COFFBigObjHeader(nullptr), PE32Header(nullptr), PE32PlusHeader(nullptr), @@ -768,6 +804,9 @@ COFFObjectFile::COFFObjectFile(MemoryBufferRef Object, std::error_code &EC) if ((EC = initDebugDirectoryPtr())) return; + if ((EC = initLoadConfigPtr())) + return; + EC = std::error_code(); } @@ -847,7 +886,7 @@ base_reloc_iterator COFFObjectFile::base_reloc_end() const { } uint8_t COFFObjectFile::getBytesInAddress() const { - return getArch() == Triple::x86_64 ? 8 : 4; + return getArch() == Triple::x86_64 || getArch() == Triple::aarch64 ? 8 : 4; } StringRef COFFObjectFile::getFileFormatName() const { @@ -985,7 +1024,7 @@ COFFObjectFile::getSymbolAuxData(COFFSymbolRef Symbol) const { if (Symbol.getNumberOfAuxSymbols() > 0) { // AUX data comes immediately after the symbol in COFF Aux = reinterpret_cast<const uint8_t *>(Symbol.getRawPtr()) + SymbolSize; -# ifndef NDEBUG +#ifndef NDEBUG // Verify that the Aux symbol points to a valid entry in the symbol table. uintptr_t Offset = uintptr_t(Aux) - uintptr_t(base()); if (Offset < getPointerToSymbolTable() || @@ -995,7 +1034,7 @@ COFFObjectFile::getSymbolAuxData(COFFSymbolRef Symbol) const { assert((Offset - getPointerToSymbolTable()) % SymbolSize == 0 && "Aux Symbol data did not point to the beginning of a symbol"); -# endif +#endif } return makeArrayRef(Aux, Symbol.getNumberOfAuxSymbols() * SymbolSize); } @@ -1050,7 +1089,7 @@ COFFObjectFile::getSectionContents(const coff_section *Sec, // In COFF, a virtual section won't have any in-file // content, so the file pointer to the content will be zero. if (Sec->PointerToRawData == 0) - return object_error::parse_failed; + return std::error_code(); // The only thing that we need to verify is that the contents is contained // within the file bounds. We don't need to make sure it doesn't cover other // data, as there's nothing that says that is not allowed. @@ -1180,6 +1219,29 @@ void COFFObjectFile::getRelocationTypeName( Res = "Unknown"; } break; + case COFF::IMAGE_FILE_MACHINE_ARM64: + switch (Reloc->Type) { + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_ABSOLUTE); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_ADDR32); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_ADDR32NB); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_BRANCH26); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_PAGEBASE_REL21); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_REL21); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_PAGEOFFSET_12A); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_PAGEOFFSET_12L); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_SECREL); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_SECREL_LOW12A); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_SECREL_HIGH12A); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_SECREL_LOW12L); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_TOKEN); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_SECTION); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_ADDR64); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_BRANCH19); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_BRANCH14); + default: + Res = "Unknown"; + } + break; case COFF::IMAGE_FILE_MACHINE_I386: switch (Reloc->Type) { LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_ABSOLUTE); @@ -1579,3 +1641,42 @@ std::error_code BaseRelocRef::getRVA(uint32_t &Result) const { Result = Header->PageRVA + Entry[Index].getOffset(); return std::error_code(); } + +#define RETURN_IF_ERROR(X) \ + if (auto EC = errorToErrorCode(X)) \ + return EC; + +ErrorOr<ArrayRef<UTF16>> ResourceSectionRef::getDirStringAtOffset(uint32_t Offset) { + BinaryStreamReader Reader = BinaryStreamReader(BBS); + Reader.setOffset(Offset); + uint16_t Length; + RETURN_IF_ERROR(Reader.readInteger(Length)); + ArrayRef<UTF16> RawDirString; + RETURN_IF_ERROR(Reader.readArray(RawDirString, Length)); + return RawDirString; +} + +ErrorOr<ArrayRef<UTF16>> +ResourceSectionRef::getEntryNameString(const coff_resource_dir_entry &Entry) { + return getDirStringAtOffset(Entry.Identifier.getNameOffset()); +} + +ErrorOr<const coff_resource_dir_table &> +ResourceSectionRef::getTableAtOffset(uint32_t Offset) { + const coff_resource_dir_table *Table = nullptr; + + BinaryStreamReader Reader(BBS); + Reader.setOffset(Offset); + RETURN_IF_ERROR(Reader.readObject(Table)); + assert(Table != nullptr); + return *Table; +} + +ErrorOr<const coff_resource_dir_table &> +ResourceSectionRef::getEntrySubDir(const coff_resource_dir_entry &Entry) { + return getTableAtOffset(Entry.Offset.value()); +} + +ErrorOr<const coff_resource_dir_table &> ResourceSectionRef::getBaseTable() { + return getTableAtOffset(0); +} diff --git a/contrib/llvm/lib/Object/Decompressor.cpp b/contrib/llvm/lib/Object/Decompressor.cpp index bca41fd..53f084d 100644 --- a/contrib/llvm/lib/Object/Decompressor.cpp +++ b/contrib/llvm/lib/Object/Decompressor.cpp @@ -8,11 +8,11 @@ //===----------------------------------------------------------------------===// #include "llvm/Object/Decompressor.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/Object/ELFObjectFile.h" #include "llvm/Support/Compression.h" #include "llvm/Support/DataExtractor.h" #include "llvm/Support/Endian.h" -#include "llvm/Support/ELF.h" using namespace llvm; using namespace llvm::support::endian; @@ -88,15 +88,7 @@ bool Decompressor::isCompressedELFSection(uint64_t Flags, StringRef Name) { return (Flags & ELF::SHF_COMPRESSED) || isGnuStyle(Name); } -Error Decompressor::decompress(SmallString<32> &Out) { - Out.resize(DecompressedSize); - return decompress({Out.data(), (size_t)DecompressedSize}); -} - Error Decompressor::decompress(MutableArrayRef<char> Buffer) { size_t Size = Buffer.size(); - zlib::Status Status = zlib::uncompress(SectionData, Buffer.data(), Size); - if (Status != zlib::StatusOK) - return createError("decompression failed"); - return Error::success(); + return zlib::uncompress(SectionData, Buffer.data(), Size); } diff --git a/contrib/llvm/lib/Object/ELF.cpp b/contrib/llvm/lib/Object/ELF.cpp index 23682e1..448fb1b 100644 --- a/contrib/llvm/lib/Object/ELF.cpp +++ b/contrib/llvm/lib/Object/ELF.cpp @@ -1,4 +1,4 @@ -//===- ELF.cpp - ELF object file implementation -----------------*- C++ -*-===// +//===- ELF.cpp - ELF object file implementation ---------------------------===// // // The LLVM Compiler Infrastructure // @@ -8,19 +8,23 @@ //===----------------------------------------------------------------------===// #include "llvm/Object/ELF.h" +#include "llvm/BinaryFormat/ELF.h" -namespace llvm { -namespace object { +using namespace llvm; +using namespace object; -#define ELF_RELOC(name, value) \ - case ELF::name: \ - return #name; \ +#define STRINGIFY_ENUM_CASE(ns, name) \ + case ns::name: \ + return #name; -StringRef getELFRelocationTypeName(uint32_t Machine, uint32_t Type) { +#define ELF_RELOC(name, value) STRINGIFY_ENUM_CASE(ELF, name) + +StringRef llvm::object::getELFRelocationTypeName(uint32_t Machine, + uint32_t Type) { switch (Machine) { case ELF::EM_X86_64: switch (Type) { -#include "llvm/Support/ELFRelocs/x86_64.def" +#include "llvm/BinaryFormat/ELFRelocs/x86_64.def" default: break; } @@ -28,77 +32,77 @@ StringRef getELFRelocationTypeName(uint32_t Machine, uint32_t Type) { case ELF::EM_386: case ELF::EM_IAMCU: switch (Type) { -#include "llvm/Support/ELFRelocs/i386.def" +#include "llvm/BinaryFormat/ELFRelocs/i386.def" default: break; } break; case ELF::EM_MIPS: switch (Type) { -#include "llvm/Support/ELFRelocs/Mips.def" +#include "llvm/BinaryFormat/ELFRelocs/Mips.def" default: break; } break; case ELF::EM_AARCH64: switch (Type) { -#include "llvm/Support/ELFRelocs/AArch64.def" +#include "llvm/BinaryFormat/ELFRelocs/AArch64.def" default: break; } break; case ELF::EM_ARM: switch (Type) { -#include "llvm/Support/ELFRelocs/ARM.def" +#include "llvm/BinaryFormat/ELFRelocs/ARM.def" default: break; } break; case ELF::EM_AVR: switch (Type) { -#include "llvm/Support/ELFRelocs/AVR.def" +#include "llvm/BinaryFormat/ELFRelocs/AVR.def" default: break; } break; case ELF::EM_HEXAGON: switch (Type) { -#include "llvm/Support/ELFRelocs/Hexagon.def" +#include "llvm/BinaryFormat/ELFRelocs/Hexagon.def" default: break; } break; case ELF::EM_LANAI: switch (Type) { -#include "llvm/Support/ELFRelocs/Lanai.def" +#include "llvm/BinaryFormat/ELFRelocs/Lanai.def" default: break; } break; case ELF::EM_PPC: switch (Type) { -#include "llvm/Support/ELFRelocs/PowerPC.def" +#include "llvm/BinaryFormat/ELFRelocs/PowerPC.def" default: break; } break; case ELF::EM_PPC64: switch (Type) { -#include "llvm/Support/ELFRelocs/PowerPC64.def" +#include "llvm/BinaryFormat/ELFRelocs/PowerPC64.def" default: break; } break; case ELF::EM_RISCV: switch (Type) { -#include "llvm/Support/ELFRelocs/RISCV.def" +#include "llvm/BinaryFormat/ELFRelocs/RISCV.def" default: break; } break; case ELF::EM_S390: switch (Type) { -#include "llvm/Support/ELFRelocs/SystemZ.def" +#include "llvm/BinaryFormat/ELFRelocs/SystemZ.def" default: break; } @@ -107,27 +111,27 @@ StringRef getELFRelocationTypeName(uint32_t Machine, uint32_t Type) { case ELF::EM_SPARC32PLUS: case ELF::EM_SPARCV9: switch (Type) { -#include "llvm/Support/ELFRelocs/Sparc.def" +#include "llvm/BinaryFormat/ELFRelocs/Sparc.def" default: break; } break; case ELF::EM_WEBASSEMBLY: switch (Type) { -#include "llvm/Support/ELFRelocs/WebAssembly.def" +#include "llvm/BinaryFormat/ELFRelocs/WebAssembly.def" default: break; } break; case ELF::EM_AMDGPU: switch (Type) { -#include "llvm/Support/ELFRelocs/AMDGPU.def" +#include "llvm/BinaryFormat/ELFRelocs/AMDGPU.def" default: break; } case ELF::EM_BPF: switch (Type) { -#include "llvm/Support/ELFRelocs/BPF.def" +#include "llvm/BinaryFormat/ELFRelocs/BPF.def" default: break; } @@ -140,5 +144,61 @@ StringRef getELFRelocationTypeName(uint32_t Machine, uint32_t Type) { #undef ELF_RELOC -} // end namespace object -} // end namespace llvm +StringRef llvm::object::getELFSectionTypeName(uint32_t Machine, unsigned Type) { + switch (Machine) { + case ELF::EM_ARM: + switch (Type) { + STRINGIFY_ENUM_CASE(ELF, SHT_ARM_EXIDX); + STRINGIFY_ENUM_CASE(ELF, SHT_ARM_PREEMPTMAP); + STRINGIFY_ENUM_CASE(ELF, SHT_ARM_ATTRIBUTES); + STRINGIFY_ENUM_CASE(ELF, SHT_ARM_DEBUGOVERLAY); + STRINGIFY_ENUM_CASE(ELF, SHT_ARM_OVERLAYSECTION); + } + break; + case ELF::EM_HEXAGON: + switch (Type) { STRINGIFY_ENUM_CASE(ELF, SHT_HEX_ORDERED); } + break; + case ELF::EM_X86_64: + switch (Type) { STRINGIFY_ENUM_CASE(ELF, SHT_X86_64_UNWIND); } + break; + case ELF::EM_MIPS: + case ELF::EM_MIPS_RS3_LE: + switch (Type) { + STRINGIFY_ENUM_CASE(ELF, SHT_MIPS_REGINFO); + STRINGIFY_ENUM_CASE(ELF, SHT_MIPS_OPTIONS); + STRINGIFY_ENUM_CASE(ELF, SHT_MIPS_ABIFLAGS); + STRINGIFY_ENUM_CASE(ELF, SHT_MIPS_DWARF); + } + break; + default: + break; + } + + switch (Type) { + STRINGIFY_ENUM_CASE(ELF, SHT_NULL); + STRINGIFY_ENUM_CASE(ELF, SHT_PROGBITS); + STRINGIFY_ENUM_CASE(ELF, SHT_SYMTAB); + STRINGIFY_ENUM_CASE(ELF, SHT_STRTAB); + STRINGIFY_ENUM_CASE(ELF, SHT_RELA); + STRINGIFY_ENUM_CASE(ELF, SHT_HASH); + STRINGIFY_ENUM_CASE(ELF, SHT_DYNAMIC); + STRINGIFY_ENUM_CASE(ELF, SHT_NOTE); + STRINGIFY_ENUM_CASE(ELF, SHT_NOBITS); + STRINGIFY_ENUM_CASE(ELF, SHT_REL); + STRINGIFY_ENUM_CASE(ELF, SHT_SHLIB); + STRINGIFY_ENUM_CASE(ELF, SHT_DYNSYM); + STRINGIFY_ENUM_CASE(ELF, SHT_INIT_ARRAY); + STRINGIFY_ENUM_CASE(ELF, SHT_FINI_ARRAY); + STRINGIFY_ENUM_CASE(ELF, SHT_PREINIT_ARRAY); + STRINGIFY_ENUM_CASE(ELF, SHT_GROUP); + STRINGIFY_ENUM_CASE(ELF, SHT_SYMTAB_SHNDX); + STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_ODRTAB); + STRINGIFY_ENUM_CASE(ELF, SHT_GNU_ATTRIBUTES); + STRINGIFY_ENUM_CASE(ELF, SHT_GNU_HASH); + STRINGIFY_ENUM_CASE(ELF, SHT_GNU_verdef); + STRINGIFY_ENUM_CASE(ELF, SHT_GNU_verneed); + STRINGIFY_ENUM_CASE(ELF, SHT_GNU_versym); + default: + return "Unknown"; + } +} diff --git a/contrib/llvm/lib/Object/ELFObjectFile.cpp b/contrib/llvm/lib/Object/ELFObjectFile.cpp index 4bd69e3..fa136d7 100644 --- a/contrib/llvm/lib/Object/ELFObjectFile.cpp +++ b/contrib/llvm/lib/Object/ELFObjectFile.cpp @@ -1,4 +1,4 @@ -//===- ELFObjectFile.cpp - ELF object file implementation -------*- C++ -*-===// +//===- ELFObjectFile.cpp - ELF object file implementation -----------------===// // // The LLVM Compiler Infrastructure // @@ -12,9 +12,26 @@ //===----------------------------------------------------------------------===// #include "llvm/Object/ELFObjectFile.h" +#include "llvm/ADT/Triple.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/MC/SubtargetFeature.h" +#include "llvm/Object/ELF.h" +#include "llvm/Object/ELFTypes.h" +#include "llvm/Object/Error.h" +#include "llvm/Support/ARMAttributeParser.h" +#include "llvm/Support/ARMBuildAttributes.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include <algorithm> +#include <cstddef> +#include <cstdint> +#include <memory> +#include <string> +#include <system_error> +#include <utility> -namespace llvm { +using namespace llvm; using namespace object; ELFObjectFileBase::ELFObjectFileBase(unsigned int Type, MemoryBufferRef Source) @@ -55,71 +72,245 @@ ObjectFile::createELFObjectFile(MemoryBufferRef Obj) { return std::move(R); } -SubtargetFeatures ELFObjectFileBase::getFeatures() const { - switch (getEMachine()) { - case ELF::EM_MIPS: { - SubtargetFeatures Features; - unsigned PlatformFlags; - getPlatformFlags(PlatformFlags); +SubtargetFeatures ELFObjectFileBase::getMIPSFeatures() const { + SubtargetFeatures Features; + unsigned PlatformFlags; + getPlatformFlags(PlatformFlags); + + switch (PlatformFlags & ELF::EF_MIPS_ARCH) { + case ELF::EF_MIPS_ARCH_1: + break; + case ELF::EF_MIPS_ARCH_2: + Features.AddFeature("mips2"); + break; + case ELF::EF_MIPS_ARCH_3: + Features.AddFeature("mips3"); + break; + case ELF::EF_MIPS_ARCH_4: + Features.AddFeature("mips4"); + break; + case ELF::EF_MIPS_ARCH_5: + Features.AddFeature("mips5"); + break; + case ELF::EF_MIPS_ARCH_32: + Features.AddFeature("mips32"); + break; + case ELF::EF_MIPS_ARCH_64: + Features.AddFeature("mips64"); + break; + case ELF::EF_MIPS_ARCH_32R2: + Features.AddFeature("mips32r2"); + break; + case ELF::EF_MIPS_ARCH_64R2: + Features.AddFeature("mips64r2"); + break; + case ELF::EF_MIPS_ARCH_32R6: + Features.AddFeature("mips32r6"); + break; + case ELF::EF_MIPS_ARCH_64R6: + Features.AddFeature("mips64r6"); + break; + default: + llvm_unreachable("Unknown EF_MIPS_ARCH value"); + } + + switch (PlatformFlags & ELF::EF_MIPS_MACH) { + case ELF::EF_MIPS_MACH_NONE: + // No feature associated with this value. + break; + case ELF::EF_MIPS_MACH_OCTEON: + Features.AddFeature("cnmips"); + break; + default: + llvm_unreachable("Unknown EF_MIPS_ARCH value"); + } + + if (PlatformFlags & ELF::EF_MIPS_ARCH_ASE_M16) + Features.AddFeature("mips16"); + if (PlatformFlags & ELF::EF_MIPS_MICROMIPS) + Features.AddFeature("micromips"); + + return Features; +} + +SubtargetFeatures ELFObjectFileBase::getARMFeatures() const { + SubtargetFeatures Features; + ARMAttributeParser Attributes; + std::error_code EC = getBuildAttributes(Attributes); + if (EC) + return SubtargetFeatures(); + + // both ARMv7-M and R have to support thumb hardware div + bool isV7 = false; + if (Attributes.hasAttribute(ARMBuildAttrs::CPU_arch)) + isV7 = Attributes.getAttributeValue(ARMBuildAttrs::CPU_arch) + == ARMBuildAttrs::v7; - switch (PlatformFlags & ELF::EF_MIPS_ARCH) { - case ELF::EF_MIPS_ARCH_1: + if (Attributes.hasAttribute(ARMBuildAttrs::CPU_arch_profile)) { + switch(Attributes.getAttributeValue(ARMBuildAttrs::CPU_arch_profile)) { + case ARMBuildAttrs::ApplicationProfile: + Features.AddFeature("aclass"); break; - case ELF::EF_MIPS_ARCH_2: - Features.AddFeature("mips2"); + case ARMBuildAttrs::RealTimeProfile: + Features.AddFeature("rclass"); + if (isV7) + Features.AddFeature("hwdiv"); break; - case ELF::EF_MIPS_ARCH_3: - Features.AddFeature("mips3"); + case ARMBuildAttrs::MicroControllerProfile: + Features.AddFeature("mclass"); + if (isV7) + Features.AddFeature("hwdiv"); break; - case ELF::EF_MIPS_ARCH_4: - Features.AddFeature("mips4"); + } + } + + if (Attributes.hasAttribute(ARMBuildAttrs::THUMB_ISA_use)) { + switch(Attributes.getAttributeValue(ARMBuildAttrs::THUMB_ISA_use)) { + default: break; - case ELF::EF_MIPS_ARCH_5: - Features.AddFeature("mips5"); + case ARMBuildAttrs::Not_Allowed: + Features.AddFeature("thumb", false); + Features.AddFeature("thumb2", false); break; - case ELF::EF_MIPS_ARCH_32: - Features.AddFeature("mips32"); + case ARMBuildAttrs::AllowThumb32: + Features.AddFeature("thumb2"); break; - case ELF::EF_MIPS_ARCH_64: - Features.AddFeature("mips64"); + } + } + + if (Attributes.hasAttribute(ARMBuildAttrs::FP_arch)) { + switch(Attributes.getAttributeValue(ARMBuildAttrs::FP_arch)) { + default: break; - case ELF::EF_MIPS_ARCH_32R2: - Features.AddFeature("mips32r2"); + case ARMBuildAttrs::Not_Allowed: + Features.AddFeature("vfp2", false); + Features.AddFeature("vfp3", false); + Features.AddFeature("vfp4", false); break; - case ELF::EF_MIPS_ARCH_64R2: - Features.AddFeature("mips64r2"); + case ARMBuildAttrs::AllowFPv2: + Features.AddFeature("vfp2"); break; - case ELF::EF_MIPS_ARCH_32R6: - Features.AddFeature("mips32r6"); + case ARMBuildAttrs::AllowFPv3A: + case ARMBuildAttrs::AllowFPv3B: + Features.AddFeature("vfp3"); break; - case ELF::EF_MIPS_ARCH_64R6: - Features.AddFeature("mips64r6"); + case ARMBuildAttrs::AllowFPv4A: + case ARMBuildAttrs::AllowFPv4B: + Features.AddFeature("vfp4"); break; - default: - llvm_unreachable("Unknown EF_MIPS_ARCH value"); } + } - switch (PlatformFlags & ELF::EF_MIPS_MACH) { - case ELF::EF_MIPS_MACH_NONE: - // No feature associated with this value. + if (Attributes.hasAttribute(ARMBuildAttrs::Advanced_SIMD_arch)) { + switch(Attributes.getAttributeValue(ARMBuildAttrs::Advanced_SIMD_arch)) { + default: + break; + case ARMBuildAttrs::Not_Allowed: + Features.AddFeature("neon", false); + Features.AddFeature("fp16", false); break; - case ELF::EF_MIPS_MACH_OCTEON: - Features.AddFeature("cnmips"); + case ARMBuildAttrs::AllowNeon: + Features.AddFeature("neon"); break; + case ARMBuildAttrs::AllowNeon2: + Features.AddFeature("neon"); + Features.AddFeature("fp16"); + break; + } + } + + if (Attributes.hasAttribute(ARMBuildAttrs::DIV_use)) { + switch(Attributes.getAttributeValue(ARMBuildAttrs::DIV_use)) { default: - llvm_unreachable("Unknown EF_MIPS_ARCH value"); + break; + case ARMBuildAttrs::DisallowDIV: + Features.AddFeature("hwdiv", false); + Features.AddFeature("hwdiv-arm", false); + break; + case ARMBuildAttrs::AllowDIVExt: + Features.AddFeature("hwdiv"); + Features.AddFeature("hwdiv-arm"); + break; } + } - if (PlatformFlags & ELF::EF_MIPS_ARCH_ASE_M16) - Features.AddFeature("mips16"); - if (PlatformFlags & ELF::EF_MIPS_MICROMIPS) - Features.AddFeature("micromips"); + return Features; +} - return Features; - } +SubtargetFeatures ELFObjectFileBase::getFeatures() const { + switch (getEMachine()) { + case ELF::EM_MIPS: + return getMIPSFeatures(); + case ELF::EM_ARM: + return getARMFeatures(); default: return SubtargetFeatures(); } } -} // end namespace llvm +// FIXME Encode from a tablegen description or target parser. +void ELFObjectFileBase::setARMSubArch(Triple &TheTriple) const { + if (TheTriple.getSubArch() != Triple::NoSubArch) + return; + + ARMAttributeParser Attributes; + std::error_code EC = getBuildAttributes(Attributes); + if (EC) + return; + + std::string Triple; + // Default to ARM, but use the triple if it's been set. + if (TheTriple.getArch() == Triple::thumb || + TheTriple.getArch() == Triple::thumbeb) + Triple = "thumb"; + else + Triple = "arm"; + + if (Attributes.hasAttribute(ARMBuildAttrs::CPU_arch)) { + switch(Attributes.getAttributeValue(ARMBuildAttrs::CPU_arch)) { + case ARMBuildAttrs::v4: + Triple += "v4"; + break; + case ARMBuildAttrs::v4T: + Triple += "v4t"; + break; + case ARMBuildAttrs::v5T: + Triple += "v5t"; + break; + case ARMBuildAttrs::v5TE: + Triple += "v5te"; + break; + case ARMBuildAttrs::v5TEJ: + Triple += "v5tej"; + break; + case ARMBuildAttrs::v6: + Triple += "v6"; + break; + case ARMBuildAttrs::v6KZ: + Triple += "v6kz"; + break; + case ARMBuildAttrs::v6T2: + Triple += "v6t2"; + break; + case ARMBuildAttrs::v6K: + Triple += "v6k"; + break; + case ARMBuildAttrs::v7: + Triple += "v7"; + break; + case ARMBuildAttrs::v6_M: + Triple += "v6m"; + break; + case ARMBuildAttrs::v6S_M: + Triple += "v6sm"; + break; + case ARMBuildAttrs::v7E_M: + Triple += "v7em"; + break; + } + } + if (!isLittleEndian()) + Triple += "eb"; + + TheTriple.setArchName(Triple); +} diff --git a/contrib/llvm/lib/Object/IRObjectFile.cpp b/contrib/llvm/lib/Object/IRObjectFile.cpp index adbf0de..e7807b0 100644 --- a/contrib/llvm/lib/Object/IRObjectFile.cpp +++ b/contrib/llvm/lib/Object/IRObjectFile.cpp @@ -14,6 +14,7 @@ #include "llvm/Object/IRObjectFile.h" #include "RecordStreamer.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/BinaryFormat/Magic.h" #include "llvm/Bitcode/BitcodeReader.h" #include "llvm/IR/GVMaterializer.h" #include "llvm/IR/LLVMContext.h" @@ -95,13 +96,13 @@ ErrorOr<MemoryBufferRef> IRObjectFile::findBitcodeInObject(const ObjectFile &Obj } ErrorOr<MemoryBufferRef> IRObjectFile::findBitcodeInMemBuffer(MemoryBufferRef Object) { - sys::fs::file_magic Type = sys::fs::identify_magic(Object.getBuffer()); + file_magic Type = identify_magic(Object.getBuffer()); switch (Type) { - case sys::fs::file_magic::bitcode: + case file_magic::bitcode: return Object; - case sys::fs::file_magic::elf_relocatable: - case sys::fs::file_magic::macho_object: - case sys::fs::file_magic::coff_object: { + case file_magic::elf_relocatable: + case file_magic::macho_object: + case file_magic::coff_object: { Expected<std::unique_ptr<ObjectFile>> ObjFile = ObjectFile::createObjectFile(Object, Type); if (!ObjFile) @@ -138,3 +139,25 @@ IRObjectFile::create(MemoryBufferRef Object, LLVMContext &Context) { return std::unique_ptr<IRObjectFile>( new IRObjectFile(*BCOrErr, std::move(Mods))); } + +Expected<IRSymtabFile> object::readIRSymtab(MemoryBufferRef MBRef) { + IRSymtabFile F; + ErrorOr<MemoryBufferRef> BCOrErr = + IRObjectFile::findBitcodeInMemBuffer(MBRef); + if (!BCOrErr) + return errorCodeToError(BCOrErr.getError()); + + Expected<BitcodeFileContents> BFCOrErr = getBitcodeFileContents(*BCOrErr); + if (!BFCOrErr) + return BFCOrErr.takeError(); + + Expected<irsymtab::FileContents> FCOrErr = irsymtab::readBitcode(*BFCOrErr); + if (!FCOrErr) + return FCOrErr.takeError(); + + F.Mods = std::move(BFCOrErr->Mods); + F.Symtab = std::move(FCOrErr->Symtab); + F.Strtab = std::move(FCOrErr->Strtab); + F.TheReader = std::move(FCOrErr->TheReader); + return std::move(F); +} diff --git a/contrib/llvm/lib/Object/IRSymtab.cpp b/contrib/llvm/lib/Object/IRSymtab.cpp new file mode 100644 index 0000000..7a6424a --- /dev/null +++ b/contrib/llvm/lib/Object/IRSymtab.cpp @@ -0,0 +1,348 @@ +//===- IRSymtab.cpp - implementation of IR symbol tables ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/IRSymtab.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Analysis/ObjectUtils.h" +#include "llvm/IR/Comdat.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalObject.h" +#include "llvm/IR/Mangler.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/Bitcode/BitcodeReader.h" +#include "llvm/MC/StringTableBuilder.h" +#include "llvm/Object/IRObjectFile.h" +#include "llvm/Object/ModuleSymbolTable.h" +#include "llvm/Object/SymbolicFile.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/StringSaver.h" +#include "llvm/Support/VCSRevision.h" +#include "llvm/Support/raw_ostream.h" +#include <cassert> +#include <string> +#include <utility> +#include <vector> + +using namespace llvm; +using namespace irsymtab; + +namespace { + +const char *getExpectedProducerName() { + static char DefaultName[] = LLVM_VERSION_STRING +#ifdef LLVM_REVISION + " " LLVM_REVISION +#endif + ; + // Allows for testing of the irsymtab writer and upgrade mechanism. This + // environment variable should not be set by users. + if (char *OverrideName = getenv("LLVM_OVERRIDE_PRODUCER")) + return OverrideName; + return DefaultName; +} + +const char *kExpectedProducerName = getExpectedProducerName(); + +/// Stores the temporary state that is required to build an IR symbol table. +struct Builder { + SmallVector<char, 0> &Symtab; + StringTableBuilder &StrtabBuilder; + StringSaver Saver; + + // This ctor initializes a StringSaver using the passed in BumpPtrAllocator. + // The StringTableBuilder does not create a copy of any strings added to it, + // so this provides somewhere to store any strings that we create. + Builder(SmallVector<char, 0> &Symtab, StringTableBuilder &StrtabBuilder, + BumpPtrAllocator &Alloc) + : Symtab(Symtab), StrtabBuilder(StrtabBuilder), Saver(Alloc) {} + + DenseMap<const Comdat *, unsigned> ComdatMap; + Mangler Mang; + Triple TT; + + std::vector<storage::Comdat> Comdats; + std::vector<storage::Module> Mods; + std::vector<storage::Symbol> Syms; + std::vector<storage::Uncommon> Uncommons; + + std::string COFFLinkerOpts; + raw_string_ostream COFFLinkerOptsOS{COFFLinkerOpts}; + + void setStr(storage::Str &S, StringRef Value) { + S.Offset = StrtabBuilder.add(Value); + S.Size = Value.size(); + } + + template <typename T> + void writeRange(storage::Range<T> &R, const std::vector<T> &Objs) { + R.Offset = Symtab.size(); + R.Size = Objs.size(); + Symtab.insert(Symtab.end(), reinterpret_cast<const char *>(Objs.data()), + reinterpret_cast<const char *>(Objs.data() + Objs.size())); + } + + Error addModule(Module *M); + Error addSymbol(const ModuleSymbolTable &Msymtab, + const SmallPtrSet<GlobalValue *, 8> &Used, + ModuleSymbolTable::Symbol Sym); + + Error build(ArrayRef<Module *> Mods); +}; + +Error Builder::addModule(Module *M) { + if (M->getDataLayoutStr().empty()) + return make_error<StringError>("input module has no datalayout", + inconvertibleErrorCode()); + + SmallPtrSet<GlobalValue *, 8> Used; + collectUsedGlobalVariables(*M, Used, /*CompilerUsed*/ false); + + ModuleSymbolTable Msymtab; + Msymtab.addModule(M); + + storage::Module Mod; + Mod.Begin = Syms.size(); + Mod.End = Syms.size() + Msymtab.symbols().size(); + Mod.UncBegin = Uncommons.size(); + Mods.push_back(Mod); + + if (TT.isOSBinFormatCOFF()) { + if (auto E = M->materializeMetadata()) + return E; + if (NamedMDNode *LinkerOptions = + M->getNamedMetadata("llvm.linker.options")) { + for (MDNode *MDOptions : LinkerOptions->operands()) + for (const MDOperand &MDOption : cast<MDNode>(MDOptions)->operands()) + COFFLinkerOptsOS << " " << cast<MDString>(MDOption)->getString(); + } + } + + for (ModuleSymbolTable::Symbol Msym : Msymtab.symbols()) + if (Error Err = addSymbol(Msymtab, Used, Msym)) + return Err; + + return Error::success(); +} + +Error Builder::addSymbol(const ModuleSymbolTable &Msymtab, + const SmallPtrSet<GlobalValue *, 8> &Used, + ModuleSymbolTable::Symbol Msym) { + Syms.emplace_back(); + storage::Symbol &Sym = Syms.back(); + Sym = {}; + + storage::Uncommon *Unc = nullptr; + auto Uncommon = [&]() -> storage::Uncommon & { + if (Unc) + return *Unc; + Sym.Flags |= 1 << storage::Symbol::FB_has_uncommon; + Uncommons.emplace_back(); + Unc = &Uncommons.back(); + *Unc = {}; + setStr(Unc->COFFWeakExternFallbackName, ""); + return *Unc; + }; + + SmallString<64> Name; + { + raw_svector_ostream OS(Name); + Msymtab.printSymbolName(OS, Msym); + } + setStr(Sym.Name, Saver.save(StringRef(Name))); + + auto Flags = Msymtab.getSymbolFlags(Msym); + if (Flags & object::BasicSymbolRef::SF_Undefined) + Sym.Flags |= 1 << storage::Symbol::FB_undefined; + if (Flags & object::BasicSymbolRef::SF_Weak) + Sym.Flags |= 1 << storage::Symbol::FB_weak; + if (Flags & object::BasicSymbolRef::SF_Common) + Sym.Flags |= 1 << storage::Symbol::FB_common; + if (Flags & object::BasicSymbolRef::SF_Indirect) + Sym.Flags |= 1 << storage::Symbol::FB_indirect; + if (Flags & object::BasicSymbolRef::SF_Global) + Sym.Flags |= 1 << storage::Symbol::FB_global; + if (Flags & object::BasicSymbolRef::SF_FormatSpecific) + Sym.Flags |= 1 << storage::Symbol::FB_format_specific; + if (Flags & object::BasicSymbolRef::SF_Executable) + Sym.Flags |= 1 << storage::Symbol::FB_executable; + + Sym.ComdatIndex = -1; + auto *GV = Msym.dyn_cast<GlobalValue *>(); + if (!GV) { + // Undefined module asm symbols act as GC roots and are implicitly used. + if (Flags & object::BasicSymbolRef::SF_Undefined) + Sym.Flags |= 1 << storage::Symbol::FB_used; + setStr(Sym.IRName, ""); + return Error::success(); + } + + setStr(Sym.IRName, GV->getName()); + + if (Used.count(GV)) + Sym.Flags |= 1 << storage::Symbol::FB_used; + if (GV->isThreadLocal()) + Sym.Flags |= 1 << storage::Symbol::FB_tls; + if (GV->hasGlobalUnnamedAddr()) + Sym.Flags |= 1 << storage::Symbol::FB_unnamed_addr; + if (canBeOmittedFromSymbolTable(GV)) + Sym.Flags |= 1 << storage::Symbol::FB_may_omit; + Sym.Flags |= unsigned(GV->getVisibility()) << storage::Symbol::FB_visibility; + + if (Flags & object::BasicSymbolRef::SF_Common) { + Uncommon().CommonSize = GV->getParent()->getDataLayout().getTypeAllocSize( + GV->getType()->getElementType()); + Uncommon().CommonAlign = GV->getAlignment(); + } + + const GlobalObject *Base = GV->getBaseObject(); + if (!Base) + return make_error<StringError>("Unable to determine comdat of alias!", + inconvertibleErrorCode()); + if (const Comdat *C = Base->getComdat()) { + auto P = ComdatMap.insert(std::make_pair(C, Comdats.size())); + Sym.ComdatIndex = P.first->second; + + if (P.second) { + storage::Comdat Comdat; + setStr(Comdat.Name, C->getName()); + Comdats.push_back(Comdat); + } + } + + if (TT.isOSBinFormatCOFF()) { + emitLinkerFlagsForGlobalCOFF(COFFLinkerOptsOS, GV, TT, Mang); + + if ((Flags & object::BasicSymbolRef::SF_Weak) && + (Flags & object::BasicSymbolRef::SF_Indirect)) { + std::string FallbackName; + raw_string_ostream OS(FallbackName); + Msymtab.printSymbolName( + OS, cast<GlobalValue>( + cast<GlobalAlias>(GV)->getAliasee()->stripPointerCasts())); + OS.flush(); + setStr(Uncommon().COFFWeakExternFallbackName, Saver.save(FallbackName)); + } + } + + return Error::success(); +} + +Error Builder::build(ArrayRef<Module *> IRMods) { + storage::Header Hdr; + + assert(!IRMods.empty()); + Hdr.Version = storage::Header::kCurrentVersion; + setStr(Hdr.Producer, kExpectedProducerName); + setStr(Hdr.TargetTriple, IRMods[0]->getTargetTriple()); + setStr(Hdr.SourceFileName, IRMods[0]->getSourceFileName()); + TT = Triple(IRMods[0]->getTargetTriple()); + + for (auto *M : IRMods) + if (Error Err = addModule(M)) + return Err; + + COFFLinkerOptsOS.flush(); + setStr(Hdr.COFFLinkerOpts, Saver.save(COFFLinkerOpts)); + + // We are about to fill in the header's range fields, so reserve space for it + // and copy it in afterwards. + Symtab.resize(sizeof(storage::Header)); + writeRange(Hdr.Modules, Mods); + writeRange(Hdr.Comdats, Comdats); + writeRange(Hdr.Symbols, Syms); + writeRange(Hdr.Uncommons, Uncommons); + + *reinterpret_cast<storage::Header *>(Symtab.data()) = Hdr; + return Error::success(); +} + +} // end anonymous namespace + +Error irsymtab::build(ArrayRef<Module *> Mods, SmallVector<char, 0> &Symtab, + StringTableBuilder &StrtabBuilder, + BumpPtrAllocator &Alloc) { + return Builder(Symtab, StrtabBuilder, Alloc).build(Mods); +} + +// Upgrade a vector of bitcode modules created by an old version of LLVM by +// creating an irsymtab for them in the current format. +static Expected<FileContents> upgrade(ArrayRef<BitcodeModule> BMs) { + FileContents FC; + + LLVMContext Ctx; + std::vector<Module *> Mods; + std::vector<std::unique_ptr<Module>> OwnedMods; + for (auto BM : BMs) { + Expected<std::unique_ptr<Module>> MOrErr = + BM.getLazyModule(Ctx, /*ShouldLazyLoadMetadata*/ true, + /*IsImporting*/ false); + if (!MOrErr) + return MOrErr.takeError(); + + Mods.push_back(MOrErr->get()); + OwnedMods.push_back(std::move(*MOrErr)); + } + + StringTableBuilder StrtabBuilder(StringTableBuilder::RAW); + BumpPtrAllocator Alloc; + if (Error E = build(Mods, FC.Symtab, StrtabBuilder, Alloc)) + return std::move(E); + + StrtabBuilder.finalizeInOrder(); + FC.Strtab.resize(StrtabBuilder.getSize()); + StrtabBuilder.write((uint8_t *)FC.Strtab.data()); + + FC.TheReader = {{FC.Symtab.data(), FC.Symtab.size()}, + {FC.Strtab.data(), FC.Strtab.size()}}; + return std::move(FC); +} + +Expected<FileContents> irsymtab::readBitcode(const BitcodeFileContents &BFC) { + if (BFC.Mods.empty()) + return make_error<StringError>("Bitcode file does not contain any modules", + inconvertibleErrorCode()); + + if (BFC.StrtabForSymtab.empty() || + BFC.Symtab.size() < sizeof(storage::Header)) + return upgrade(BFC.Mods); + + // We cannot use the regular reader to read the version and producer, because + // it will expect the header to be in the current format. The only thing we + // can rely on is that the version and producer will be present as the first + // struct elements. + auto *Hdr = reinterpret_cast<const storage::Header *>(BFC.Symtab.data()); + unsigned Version = Hdr->Version; + StringRef Producer = Hdr->Producer.get(BFC.StrtabForSymtab); + if (Version != storage::Header::kCurrentVersion || + Producer != kExpectedProducerName) + return upgrade(BFC.Mods); + + FileContents FC; + FC.TheReader = {{BFC.Symtab.data(), BFC.Symtab.size()}, + {BFC.StrtabForSymtab.data(), BFC.StrtabForSymtab.size()}}; + + // Finally, make sure that the number of modules in the symbol table matches + // the number of modules in the bitcode file. If they differ, it may mean that + // the bitcode file was created by binary concatenation, so we need to create + // a new symbol table from scratch. + if (FC.TheReader.getNumModules() != BFC.Mods.size()) + return upgrade(std::move(BFC.Mods)); + + return std::move(FC); +} diff --git a/contrib/llvm/lib/Object/MachOObjectFile.cpp b/contrib/llvm/lib/Object/MachOObjectFile.cpp index 5b01867..2e4da9f 100644 --- a/contrib/llvm/lib/Object/MachOObjectFile.cpp +++ b/contrib/llvm/lib/Object/MachOObjectFile.cpp @@ -1,4 +1,4 @@ -//===- MachOObjectFile.cpp - Mach-O object file binding ---------*- C++ -*-===// +//===- MachOObjectFile.cpp - Mach-O object file binding -------------------===// // // The LLVM Compiler Infrastructure // @@ -12,32 +12,52 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Object/MachO.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/None.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" +#include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/MachO.h" +#include "llvm/Object/Error.h" +#include "llvm/Object/MachO.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Object/SymbolicFile.h" #include "llvm/Support/DataExtractor.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/Host.h" #include "llvm/Support/LEB128.h" -#include "llvm/Support/MachO.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SwapByteOrder.h" #include "llvm/Support/raw_ostream.h" -#include <cctype> +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstdint> #include <cstring> #include <limits> #include <list> +#include <memory> +#include <string> +#include <system_error> using namespace llvm; using namespace object; namespace { + struct section_base { char sectname[16]; char segname[16]; }; -} + +} // end anonymous namespace static Error malformedError(Twine Msg) { @@ -106,13 +126,6 @@ static StringRef parseSegmentOrSectionName(const char *P) { return StringRef(P, 16); } -// Helper to advance a section or symbol iterator multiple increments at a time. -template<class T> -static void advance(T &it, size_t Val) { - while (Val--) - ++it; -} - static unsigned getCPUType(const MachOObjectFile &O) { return O.getHeader().cputype; } @@ -368,7 +381,7 @@ static Error parseSegmentLoadCommand( CmdName + " extends past the end of the file"); if (S.vmsize != 0 && S.filesize > S.vmsize) return malformedError("load command " + Twine(LoadCommandIndex) + - " fileoff field in " + CmdName + + " filesize field in " + CmdName + " greater than vmsize field"); IsPageZeroSegment |= StringRef("__PAGEZERO").equals(S.segname); } else @@ -784,6 +797,52 @@ static Error checkVersCommand(const MachOObjectFile &Obj, return Error::success(); } +static Error checkNoteCommand(const MachOObjectFile &Obj, + const MachOObjectFile::LoadCommandInfo &Load, + uint32_t LoadCommandIndex, + std::list<MachOElement> &Elements) { + if (Load.C.cmdsize != sizeof(MachO::note_command)) + return malformedError("load command " + Twine(LoadCommandIndex) + + " LC_NOTE has incorrect cmdsize"); + MachO::note_command Nt = getStruct<MachO::note_command>(Obj, Load.Ptr); + uint64_t FileSize = Obj.getData().size(); + if (Nt.offset > FileSize) + return malformedError("offset field of LC_NOTE command " + + Twine(LoadCommandIndex) + " extends " + "past the end of the file"); + uint64_t BigSize = Nt.offset; + BigSize += Nt.size; + if (BigSize > FileSize) + return malformedError("size field plus offset field of LC_NOTE command " + + Twine(LoadCommandIndex) + " extends past the end of " + "the file"); + if (Error Err = checkOverlappingElement(Elements, Nt.offset, Nt.size, + "LC_NOTE data")) + return Err; + return Error::success(); +} + +static Error +parseBuildVersionCommand(const MachOObjectFile &Obj, + const MachOObjectFile::LoadCommandInfo &Load, + SmallVectorImpl<const char*> &BuildTools, + uint32_t LoadCommandIndex) { + MachO::build_version_command BVC = + getStruct<MachO::build_version_command>(Obj, Load.Ptr); + if (Load.C.cmdsize != + sizeof(MachO::build_version_command) + + BVC.ntools * sizeof(MachO::build_tool_version)) + return malformedError("load command " + Twine(LoadCommandIndex) + + " LC_BUILD_VERSION_COMMAND has incorrect cmdsize"); + + auto Start = Load.Ptr + sizeof(MachO::build_version_command); + BuildTools.resize(BVC.ntools); + for (unsigned i = 0; i < BVC.ntools; ++i) + BuildTools[i] = Start + i * sizeof(MachO::build_tool_version); + + return Error::success(); +} + static Error checkRpathCommand(const MachOObjectFile &Obj, const MachOObjectFile::LoadCommandInfo &Load, uint32_t LoadCommandIndex) { @@ -931,7 +990,26 @@ static Error checkThreadCommand(const MachOObjectFile &Obj, sys::swapByteOrder(count); state += sizeof(uint32_t); - if (cputype == MachO::CPU_TYPE_X86_64) { + if (cputype == MachO::CPU_TYPE_I386) { + if (flavor == MachO::x86_THREAD_STATE32) { + if (count != MachO::x86_THREAD_STATE32_COUNT) + return malformedError("load command " + Twine(LoadCommandIndex) + + " count not x86_THREAD_STATE32_COUNT for " + "flavor number " + Twine(nflavor) + " which is " + "a x86_THREAD_STATE32 flavor in " + CmdName + + " command"); + if (state + sizeof(MachO::x86_thread_state32_t) > end) + return malformedError("load command " + Twine(LoadCommandIndex) + + " x86_THREAD_STATE32 extends past end of " + "command in " + CmdName + " command"); + state += sizeof(MachO::x86_thread_state32_t); + } else { + return malformedError("load command " + Twine(LoadCommandIndex) + + " unknown flavor (" + Twine(flavor) + ") for " + "flavor number " + Twine(nflavor) + " in " + + CmdName + " command"); + } + } else if (cputype == MachO::CPU_TYPE_X86_64) { if (flavor == MachO::x86_THREAD_STATE64) { if (count != MachO::x86_THREAD_STATE64_COUNT) return malformedError("load command " + Twine(LoadCommandIndex) + @@ -1086,11 +1164,7 @@ MachOObjectFile::MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian, bool Is64bits, Error &Err, uint32_t UniversalCputype, uint32_t UniversalIndex) - : ObjectFile(getMachOType(IsLittleEndian, Is64bits), Object), - SymtabLoadCmd(nullptr), DysymtabLoadCmd(nullptr), - DataInCodeLoadCmd(nullptr), LinkOptHintsLoadCmd(nullptr), - DyldInfoLoadCmd(nullptr), UuidLoadCmd(nullptr), - HasPageZeroSegment(false) { + : ObjectFile(getMachOType(IsLittleEndian, Is64bits), Object) { ErrorAsOutParameter ErrAsOutParam(&Err); uint64_t SizeOfHeaders; uint32_t cputype; @@ -1280,6 +1354,12 @@ MachOObjectFile::MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian, if ((Err = checkVersCommand(*this, Load, I, &VersLoadCmd, "LC_VERSION_MIN_WATCHOS"))) return; + } else if (Load.C.cmd == MachO::LC_NOTE) { + if ((Err = checkNoteCommand(*this, Load, I, Elements))) + return; + } else if (Load.C.cmd == MachO::LC_BUILD_VERSION) { + if ((Err = parseBuildVersionCommand(*this, Load, BuildTools, I))) + return; } else if (Load.C.cmd == MachO::LC_RPATH) { if ((Err = checkRpathCommand(*this, Load, I))) return; @@ -1740,6 +1820,10 @@ uint64_t MachOObjectFile::getSectionAddress(DataRefImpl Sec) const { return getSection(Sec).addr; } +uint64_t MachOObjectFile::getSectionIndex(DataRefImpl Sec) const { + return Sec.d.a; +} + uint64_t MachOObjectFile::getSectionSize(DataRefImpl Sec) const { // In the case if a malformed Mach-O file where the section offset is past // the end of the file or some part of the section size is past the end of @@ -1867,13 +1951,29 @@ MachOObjectFile::section_rel_end(DataRefImpl Sec) const { return relocation_iterator(RelocationRef(Ret, this)); } +relocation_iterator MachOObjectFile::extrel_begin() const { + DataRefImpl Ret; + Ret.d.a = 0; // Would normally be a section index. + Ret.d.b = 0; // Index into the external relocations + return relocation_iterator(RelocationRef(Ret, this)); +} + +relocation_iterator MachOObjectFile::extrel_end() const { + MachO::dysymtab_command DysymtabLoadCmd = getDysymtabLoadCommand(); + DataRefImpl Ret; + Ret.d.a = 0; // Would normally be a section index. + Ret.d.b = DysymtabLoadCmd.nextrel; // Index into the external relocations + return relocation_iterator(RelocationRef(Ret, this)); +} + void MachOObjectFile::moveRelocationNext(DataRefImpl &Rel) const { ++Rel.d.b; } uint64_t MachOObjectFile::getRelocationOffset(DataRefImpl Rel) const { - assert(getHeader().filetype == MachO::MH_OBJECT && - "Only implemented for MH_OBJECT"); + assert((getHeader().filetype == MachO::MH_OBJECT || + getHeader().filetype == MachO::MH_KEXT_BUNDLE) && + "Only implemented for MH_OBJECT && MH_KEXT_BUNDLE"); MachO::any_relocation_info RE = getRelocation(Rel); return getAnyRelocationAddress(RE); } @@ -2201,6 +2301,10 @@ std::error_code MachOObjectFile::getLibraryShortNameByIndex(unsigned Index, return std::error_code(); } +uint32_t MachOObjectFile::getLibraryCount() const { + return Libraries.size(); +} + section_iterator MachOObjectFile::getRelocationRelocatedSection(relocation_iterator Rel) const { DataRefImpl Sec; @@ -2275,11 +2379,11 @@ StringRef MachOObjectFile::getFileFormatName() const { unsigned CPUType = getCPUType(*this); if (!is64Bit()) { switch (CPUType) { - case llvm::MachO::CPU_TYPE_I386: + case MachO::CPU_TYPE_I386: return "Mach-O 32-bit i386"; - case llvm::MachO::CPU_TYPE_ARM: + case MachO::CPU_TYPE_ARM: return "Mach-O arm"; - case llvm::MachO::CPU_TYPE_POWERPC: + case MachO::CPU_TYPE_POWERPC: return "Mach-O 32-bit ppc"; default: return "Mach-O 32-bit unknown"; @@ -2287,11 +2391,11 @@ StringRef MachOObjectFile::getFileFormatName() const { } switch (CPUType) { - case llvm::MachO::CPU_TYPE_X86_64: + case MachO::CPU_TYPE_X86_64: return "Mach-O 64-bit x86-64"; - case llvm::MachO::CPU_TYPE_ARM64: + case MachO::CPU_TYPE_ARM64: return "Mach-O arm64"; - case llvm::MachO::CPU_TYPE_POWERPC64: + case MachO::CPU_TYPE_POWERPC64: return "Mach-O 64-bit ppc64"; default: return "Mach-O 64-bit unknown"; @@ -2300,17 +2404,17 @@ StringRef MachOObjectFile::getFileFormatName() const { Triple::ArchType MachOObjectFile::getArch(uint32_t CPUType) { switch (CPUType) { - case llvm::MachO::CPU_TYPE_I386: + case MachO::CPU_TYPE_I386: return Triple::x86; - case llvm::MachO::CPU_TYPE_X86_64: + case MachO::CPU_TYPE_X86_64: return Triple::x86_64; - case llvm::MachO::CPU_TYPE_ARM: + case MachO::CPU_TYPE_ARM: return Triple::arm; - case llvm::MachO::CPU_TYPE_ARM64: + case MachO::CPU_TYPE_ARM64: return Triple::aarch64; - case llvm::MachO::CPU_TYPE_POWERPC: + case MachO::CPU_TYPE_POWERPC: return Triple::ppc; - case llvm::MachO::CPU_TYPE_POWERPC64: + case MachO::CPU_TYPE_POWERPC64: return Triple::ppc64; default: return Triple::UnknownArch; @@ -2383,6 +2487,8 @@ Triple MachOObjectFile::getArchTriple(uint32_t CPUType, uint32_t CPUSubType, *ArchFlag = "armv7em"; return Triple("thumbv7em-apple-darwin"); case MachO::CPU_SUBTYPE_ARM_V7K: + if (McpuDefault) + *McpuDefault = "cortex-a7"; if (ArchFlag) *ArchFlag = "armv7k"; return Triple("armv7k-apple-darwin"); @@ -2393,6 +2499,8 @@ Triple MachOObjectFile::getArchTriple(uint32_t CPUType, uint32_t CPUSubType, *ArchFlag = "armv7m"; return Triple("thumbv7m-apple-darwin"); case MachO::CPU_SUBTYPE_ARM_V7S: + if (McpuDefault) + *McpuDefault = "cortex-a7"; if (ArchFlag) *ArchFlag = "armv7s"; return Triple("armv7s-apple-darwin"); @@ -2402,6 +2510,8 @@ Triple MachOObjectFile::getArchTriple(uint32_t CPUType, uint32_t CPUSubType, case MachO::CPU_TYPE_ARM64: switch (CPUSubType & ~MachO::CPU_SUBTYPE_MASK) { case MachO::CPU_SUBTYPE_ARM64_ALL: + if (McpuDefault) + *McpuDefault = "cyclone"; if (ArchFlag) *ArchFlag = "arm64"; return Triple("arm64-apple-darwin"); @@ -2497,8 +2607,7 @@ dice_iterator MachOObjectFile::end_dices() const { return dice_iterator(DiceRef(DRI, this)); } -ExportEntry::ExportEntry(ArrayRef<uint8_t> T) - : Trie(T), Malformed(false), Done(false) {} +ExportEntry::ExportEntry(ArrayRef<uint8_t> T) : Trie(T) {} void ExportEntry::moveToFirst() { pushNode(0); @@ -2567,9 +2676,7 @@ uint32_t ExportEntry::nodeOffset() const { } ExportEntry::NodeState::NodeState(const uint8_t *Ptr) - : Start(Ptr), Current(Ptr), Flags(0), Address(0), Other(0), - ImportName(nullptr), ChildCount(0), NextChildIndex(0), - ParentStringLength(0), IsExportNode(false) {} + : Start(Ptr), Current(Ptr) {} void ExportEntry::pushNode(uint64_t offset) { const uint8_t *Ptr = Trie.begin() + offset; @@ -2659,7 +2766,7 @@ void ExportEntry::moveNext() { iterator_range<export_iterator> MachOObjectFile::exports(ArrayRef<uint8_t> Trie) { ExportEntry Start(Trie); - if (Trie.size() == 0) + if (Trie.empty()) Start.moveToEnd(); else Start.moveToFirst(); @@ -2674,10 +2781,10 @@ iterator_range<export_iterator> MachOObjectFile::exports() const { return exports(getDyldInfoExportsTrie()); } -MachORebaseEntry::MachORebaseEntry(ArrayRef<uint8_t> Bytes, bool is64Bit) - : Opcodes(Bytes), Ptr(Bytes.begin()), SegmentOffset(0), SegmentIndex(0), - RemainingLoopCount(0), AdvanceAmount(0), RebaseType(0), - PointerSize(is64Bit ? 8 : 4), Malformed(false), Done(false) {} +MachORebaseEntry::MachORebaseEntry(Error *E, const MachOObjectFile *O, + ArrayRef<uint8_t> Bytes, bool is64Bit) + : E(E), O(O), Opcodes(Bytes), Ptr(Bytes.begin()), + PointerSize(is64Bit ? 8 : 4) {} void MachORebaseEntry::moveToFirst() { Ptr = Opcodes.begin(); @@ -2691,122 +2798,307 @@ void MachORebaseEntry::moveToEnd() { } void MachORebaseEntry::moveNext() { + ErrorAsOutParameter ErrAsOutParam(E); // If in the middle of some loop, move to next rebasing in loop. SegmentOffset += AdvanceAmount; if (RemainingLoopCount) { --RemainingLoopCount; return; } + // REBASE_OPCODE_DONE is only used for padding if we are not aligned to + // pointer size. Therefore it is possible to reach the end without ever having + // seen REBASE_OPCODE_DONE. if (Ptr == Opcodes.end()) { Done = true; return; } bool More = true; - while (More && !Malformed) { + while (More) { // Parse next opcode and set up next loop. + const uint8_t *OpcodeStart = Ptr; uint8_t Byte = *Ptr++; uint8_t ImmValue = Byte & MachO::REBASE_IMMEDIATE_MASK; uint8_t Opcode = Byte & MachO::REBASE_OPCODE_MASK; + uint32_t Count, Skip; + const char *error = nullptr; switch (Opcode) { case MachO::REBASE_OPCODE_DONE: More = false; Done = true; moveToEnd(); - DEBUG_WITH_TYPE("mach-o-rebase", llvm::dbgs() << "REBASE_OPCODE_DONE\n"); + DEBUG_WITH_TYPE("mach-o-rebase", dbgs() << "REBASE_OPCODE_DONE\n"); break; case MachO::REBASE_OPCODE_SET_TYPE_IMM: RebaseType = ImmValue; + if (RebaseType > MachO::REBASE_TYPE_TEXT_PCREL32) { + *E = malformedError("for REBASE_OPCODE_SET_TYPE_IMM bad bind type: " + + Twine((int)RebaseType) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } DEBUG_WITH_TYPE( "mach-o-rebase", - llvm::dbgs() << "REBASE_OPCODE_SET_TYPE_IMM: " - << "RebaseType=" << (int) RebaseType << "\n"); + dbgs() << "REBASE_OPCODE_SET_TYPE_IMM: " + << "RebaseType=" << (int) RebaseType << "\n"); break; case MachO::REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: SegmentIndex = ImmValue; - SegmentOffset = readULEB128(); + SegmentOffset = readULEB128(&error); + if (error) { + *E = malformedError("for REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, + true); + if (error) { + *E = malformedError("for REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } DEBUG_WITH_TYPE( "mach-o-rebase", - llvm::dbgs() << "REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: " - << "SegmentIndex=" << SegmentIndex << ", " - << format("SegmentOffset=0x%06X", SegmentOffset) - << "\n"); + dbgs() << "REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: " + << "SegmentIndex=" << SegmentIndex << ", " + << format("SegmentOffset=0x%06X", SegmentOffset) + << "\n"); break; case MachO::REBASE_OPCODE_ADD_ADDR_ULEB: - SegmentOffset += readULEB128(); + SegmentOffset += readULEB128(&error); + if (error) { + *E = malformedError("for REBASE_OPCODE_ADD_ADDR_ULEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, + true); + if (error) { + *E = malformedError("for REBASE_OPCODE_ADD_ADDR_ULEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } DEBUG_WITH_TYPE("mach-o-rebase", - llvm::dbgs() << "REBASE_OPCODE_ADD_ADDR_ULEB: " - << format("SegmentOffset=0x%06X", - SegmentOffset) << "\n"); + dbgs() << "REBASE_OPCODE_ADD_ADDR_ULEB: " + << format("SegmentOffset=0x%06X", + SegmentOffset) << "\n"); break; case MachO::REBASE_OPCODE_ADD_ADDR_IMM_SCALED: + error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, + true); + if (error) { + *E = malformedError("for REBASE_OPCODE_ADD_ADDR_IMM_SCALED " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } SegmentOffset += ImmValue * PointerSize; + error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, + false); + if (error) { + *E = malformedError("for REBASE_OPCODE_ADD_ADDR_IMM_SCALED " + " (after adding immediate times the pointer size) " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } DEBUG_WITH_TYPE("mach-o-rebase", - llvm::dbgs() << "REBASE_OPCODE_ADD_ADDR_IMM_SCALED: " - << format("SegmentOffset=0x%06X", - SegmentOffset) << "\n"); + dbgs() << "REBASE_OPCODE_ADD_ADDR_IMM_SCALED: " + << format("SegmentOffset=0x%06X", + SegmentOffset) << "\n"); break; case MachO::REBASE_OPCODE_DO_REBASE_IMM_TIMES: + error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, + true); + if (error) { + *E = malformedError("for REBASE_OPCODE_DO_REBASE_IMM_TIMES " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } AdvanceAmount = PointerSize; - RemainingLoopCount = ImmValue - 1; + Skip = 0; + Count = ImmValue; + if (ImmValue != 0) + RemainingLoopCount = ImmValue - 1; + else + RemainingLoopCount = 0; + error = O->RebaseEntryCheckCountAndSkip(Count, Skip, PointerSize, + SegmentIndex, SegmentOffset); + if (error) { + *E = malformedError("for REBASE_OPCODE_DO_REBASE_IMM_TIMES " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } DEBUG_WITH_TYPE( "mach-o-rebase", - llvm::dbgs() << "REBASE_OPCODE_DO_REBASE_IMM_TIMES: " - << format("SegmentOffset=0x%06X", SegmentOffset) - << ", AdvanceAmount=" << AdvanceAmount - << ", RemainingLoopCount=" << RemainingLoopCount - << "\n"); + dbgs() << "REBASE_OPCODE_DO_REBASE_IMM_TIMES: " + << format("SegmentOffset=0x%06X", SegmentOffset) + << ", AdvanceAmount=" << AdvanceAmount + << ", RemainingLoopCount=" << RemainingLoopCount + << "\n"); return; case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES: + error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, + true); + if (error) { + *E = malformedError("for REBASE_OPCODE_DO_REBASE_ULEB_TIMES " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } AdvanceAmount = PointerSize; - RemainingLoopCount = readULEB128() - 1; + Skip = 0; + Count = readULEB128(&error); + if (error) { + *E = malformedError("for REBASE_OPCODE_DO_REBASE_ULEB_TIMES " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + if (Count != 0) + RemainingLoopCount = Count - 1; + else + RemainingLoopCount = 0; + error = O->RebaseEntryCheckCountAndSkip(Count, Skip, PointerSize, + SegmentIndex, SegmentOffset); + if (error) { + *E = malformedError("for REBASE_OPCODE_DO_REBASE_ULEB_TIMES " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } DEBUG_WITH_TYPE( "mach-o-rebase", - llvm::dbgs() << "REBASE_OPCODE_DO_REBASE_ULEB_TIMES: " - << format("SegmentOffset=0x%06X", SegmentOffset) - << ", AdvanceAmount=" << AdvanceAmount - << ", RemainingLoopCount=" << RemainingLoopCount - << "\n"); + dbgs() << "REBASE_OPCODE_DO_REBASE_ULEB_TIMES: " + << format("SegmentOffset=0x%06X", SegmentOffset) + << ", AdvanceAmount=" << AdvanceAmount + << ", RemainingLoopCount=" << RemainingLoopCount + << "\n"); return; case MachO::REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB: - AdvanceAmount = readULEB128() + PointerSize; + error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, + true); + if (error) { + *E = malformedError("for REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + Skip = readULEB128(&error); + if (error) { + *E = malformedError("for REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + AdvanceAmount = Skip + PointerSize; + Count = 1; RemainingLoopCount = 0; + error = O->RebaseEntryCheckCountAndSkip(Count, Skip, PointerSize, + SegmentIndex, SegmentOffset); + if (error) { + *E = malformedError("for REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } DEBUG_WITH_TYPE( "mach-o-rebase", - llvm::dbgs() << "REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB: " - << format("SegmentOffset=0x%06X", SegmentOffset) - << ", AdvanceAmount=" << AdvanceAmount - << ", RemainingLoopCount=" << RemainingLoopCount - << "\n"); + dbgs() << "REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB: " + << format("SegmentOffset=0x%06X", SegmentOffset) + << ", AdvanceAmount=" << AdvanceAmount + << ", RemainingLoopCount=" << RemainingLoopCount + << "\n"); return; case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB: - RemainingLoopCount = readULEB128() - 1; - AdvanceAmount = readULEB128() + PointerSize; + error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, + true); + if (error) { + *E = malformedError("for REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_" + "ULEB " + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + Count = readULEB128(&error); + if (error) { + *E = malformedError("for REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_" + "ULEB " + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + if (Count != 0) + RemainingLoopCount = Count - 1; + else + RemainingLoopCount = 0; + Skip = readULEB128(&error); + if (error) { + *E = malformedError("for REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_" + "ULEB " + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + AdvanceAmount = Skip + PointerSize; + + error = O->RebaseEntryCheckCountAndSkip(Count, Skip, PointerSize, + SegmentIndex, SegmentOffset); + if (error) { + *E = malformedError("for REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_" + "ULEB " + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } DEBUG_WITH_TYPE( "mach-o-rebase", - llvm::dbgs() << "REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB: " - << format("SegmentOffset=0x%06X", SegmentOffset) - << ", AdvanceAmount=" << AdvanceAmount - << ", RemainingLoopCount=" << RemainingLoopCount - << "\n"); + dbgs() << "REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB: " + << format("SegmentOffset=0x%06X", SegmentOffset) + << ", AdvanceAmount=" << AdvanceAmount + << ", RemainingLoopCount=" << RemainingLoopCount + << "\n"); return; default: - Malformed = true; + *E = malformedError("bad rebase info (bad opcode value 0x" + + utohexstr(Opcode) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; } } } -uint64_t MachORebaseEntry::readULEB128() { +uint64_t MachORebaseEntry::readULEB128(const char **error) { unsigned Count; - uint64_t Result = decodeULEB128(Ptr, &Count); + uint64_t Result = decodeULEB128(Ptr, &Count, Opcodes.end(), error); Ptr += Count; - if (Ptr > Opcodes.end()) { + if (Ptr > Opcodes.end()) Ptr = Opcodes.end(); - Malformed = true; - } return Result; } -uint32_t MachORebaseEntry::segmentIndex() const { return SegmentIndex; } +int32_t MachORebaseEntry::segmentIndex() const { return SegmentIndex; } uint64_t MachORebaseEntry::segmentOffset() const { return SegmentOffset; } @@ -2822,6 +3114,24 @@ StringRef MachORebaseEntry::typeName() const { return "unknown"; } +// For use with the SegIndex of a checked Mach-O Rebase entry +// to get the segment name. +StringRef MachORebaseEntry::segmentName() const { + return O->BindRebaseSegmentName(SegmentIndex); +} + +// For use with a SegIndex,SegOffset pair from a checked Mach-O Rebase entry +// to get the section name. +StringRef MachORebaseEntry::sectionName() const { + return O->BindRebaseSectionName(SegmentIndex, SegmentOffset); +} + +// For use with a SegIndex,SegOffset pair from a checked Mach-O Rebase entry +// to get the address. +uint64_t MachORebaseEntry::address() const { + return O->BindRebaseAddress(SegmentIndex, SegmentOffset); +} + bool MachORebaseEntry::operator==(const MachORebaseEntry &Other) const { #ifdef EXPENSIVE_CHECKS assert(Opcodes == Other.Opcodes && "compare iterators of different files"); @@ -2834,25 +3144,27 @@ bool MachORebaseEntry::operator==(const MachORebaseEntry &Other) const { } iterator_range<rebase_iterator> -MachOObjectFile::rebaseTable(ArrayRef<uint8_t> Opcodes, bool is64) { - MachORebaseEntry Start(Opcodes, is64); +MachOObjectFile::rebaseTable(Error &Err, MachOObjectFile *O, + ArrayRef<uint8_t> Opcodes, bool is64) { + if (O->BindRebaseSectionTable == nullptr) + O->BindRebaseSectionTable = llvm::make_unique<BindRebaseSegInfo>(O); + MachORebaseEntry Start(&Err, O, Opcodes, is64); Start.moveToFirst(); - MachORebaseEntry Finish(Opcodes, is64); + MachORebaseEntry Finish(&Err, O, Opcodes, is64); Finish.moveToEnd(); return make_range(rebase_iterator(Start), rebase_iterator(Finish)); } -iterator_range<rebase_iterator> MachOObjectFile::rebaseTable() const { - return rebaseTable(getDyldInfoRebaseOpcodes(), is64Bit()); +iterator_range<rebase_iterator> MachOObjectFile::rebaseTable(Error &Err) { + return rebaseTable(Err, this, getDyldInfoRebaseOpcodes(), is64Bit()); } -MachOBindEntry::MachOBindEntry(ArrayRef<uint8_t> Bytes, bool is64Bit, Kind BK) - : Opcodes(Bytes), Ptr(Bytes.begin()), SegmentOffset(0), SegmentIndex(0), - Ordinal(0), Flags(0), Addend(0), RemainingLoopCount(0), AdvanceAmount(0), - BindType(0), PointerSize(is64Bit ? 8 : 4), - TableKind(BK), Malformed(false), Done(false) {} +MachOBindEntry::MachOBindEntry(Error *E, const MachOObjectFile *O, + ArrayRef<uint8_t> Bytes, bool is64Bit, Kind BK) + : E(E), O(O), Opcodes(Bytes), Ptr(Bytes.begin()), + PointerSize(is64Bit ? 8 : 4), TableKind(BK) {} void MachOBindEntry::moveToFirst() { Ptr = Opcodes.begin(); @@ -2866,24 +3178,31 @@ void MachOBindEntry::moveToEnd() { } void MachOBindEntry::moveNext() { + ErrorAsOutParameter ErrAsOutParam(E); // If in the middle of some loop, move to next binding in loop. SegmentOffset += AdvanceAmount; if (RemainingLoopCount) { --RemainingLoopCount; return; } + // BIND_OPCODE_DONE is only used for padding if we are not aligned to + // pointer size. Therefore it is possible to reach the end without ever having + // seen BIND_OPCODE_DONE. if (Ptr == Opcodes.end()) { Done = true; return; } bool More = true; - while (More && !Malformed) { + while (More) { // Parse next opcode and set up next loop. + const uint8_t *OpcodeStart = Ptr; uint8_t Byte = *Ptr++; uint8_t ImmValue = Byte & MachO::BIND_IMMEDIATE_MASK; uint8_t Opcode = Byte & MachO::BIND_OPCODE_MASK; int8_t SignExtended; const uint8_t *SymStart; + uint32_t Count, Skip; + const char *error = nullptr; switch (Opcode) { case MachO::BIND_OPCODE_DONE: if (TableKind == Kind::Lazy) { @@ -2899,48 +3218,108 @@ void MachOBindEntry::moveNext() { break; } More = false; - Done = true; moveToEnd(); - DEBUG_WITH_TYPE("mach-o-bind", llvm::dbgs() << "BIND_OPCODE_DONE\n"); + DEBUG_WITH_TYPE("mach-o-bind", dbgs() << "BIND_OPCODE_DONE\n"); break; case MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_IMM: + if (TableKind == Kind::Weak) { + *E = malformedError("BIND_OPCODE_SET_DYLIB_ORDINAL_IMM not allowed in " + "weak bind table for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } Ordinal = ImmValue; + LibraryOrdinalSet = true; + if (ImmValue > O->getLibraryCount()) { + *E = malformedError("for BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB bad " + "library ordinal: " + Twine((int)ImmValue) + " (max " + + Twine((int)O->getLibraryCount()) + ") for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } DEBUG_WITH_TYPE( "mach-o-bind", - llvm::dbgs() << "BIND_OPCODE_SET_DYLIB_ORDINAL_IMM: " - << "Ordinal=" << Ordinal << "\n"); + dbgs() << "BIND_OPCODE_SET_DYLIB_ORDINAL_IMM: " + << "Ordinal=" << Ordinal << "\n"); break; case MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB: - Ordinal = readULEB128(); + if (TableKind == Kind::Weak) { + *E = malformedError("BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB not allowed in " + "weak bind table for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + Ordinal = readULEB128(&error); + LibraryOrdinalSet = true; + if (error) { + *E = malformedError("for BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + if (Ordinal > (int)O->getLibraryCount()) { + *E = malformedError("for BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB bad " + "library ordinal: " + Twine((int)Ordinal) + " (max " + + Twine((int)O->getLibraryCount()) + ") for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } DEBUG_WITH_TYPE( "mach-o-bind", - llvm::dbgs() << "BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB: " - << "Ordinal=" << Ordinal << "\n"); + dbgs() << "BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB: " + << "Ordinal=" << Ordinal << "\n"); break; case MachO::BIND_OPCODE_SET_DYLIB_SPECIAL_IMM: + if (TableKind == Kind::Weak) { + *E = malformedError("BIND_OPCODE_SET_DYLIB_SPECIAL_IMM not allowed in " + "weak bind table for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } if (ImmValue) { SignExtended = MachO::BIND_OPCODE_MASK | ImmValue; Ordinal = SignExtended; + if (Ordinal < MachO::BIND_SPECIAL_DYLIB_FLAT_LOOKUP) { + *E = malformedError("for BIND_OPCODE_SET_DYLIB_SPECIAL_IMM unknown " + "special ordinal: " + Twine((int)Ordinal) + " for opcode at: " + "0x" + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } } else Ordinal = 0; + LibraryOrdinalSet = true; DEBUG_WITH_TYPE( "mach-o-bind", - llvm::dbgs() << "BIND_OPCODE_SET_DYLIB_SPECIAL_IMM: " - << "Ordinal=" << Ordinal << "\n"); + dbgs() << "BIND_OPCODE_SET_DYLIB_SPECIAL_IMM: " + << "Ordinal=" << Ordinal << "\n"); break; case MachO::BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM: Flags = ImmValue; SymStart = Ptr; - while (*Ptr) { + while (*Ptr && (Ptr < Opcodes.end())) { ++Ptr; } + if (Ptr == Opcodes.end()) { + *E = malformedError("for BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM " + "symbol name extends past opcodes for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } SymbolName = StringRef(reinterpret_cast<const char*>(SymStart), Ptr-SymStart); ++Ptr; DEBUG_WITH_TYPE( "mach-o-bind", - llvm::dbgs() << "BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM: " - << "SymbolName=" << SymbolName << "\n"); + dbgs() << "BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM: " + << "SymbolName=" << SymbolName << "\n"); if (TableKind == Kind::Weak) { if (ImmValue & MachO::BIND_SYMBOL_FLAGS_NON_WEAK_DEFINITION) return; @@ -2948,111 +3327,311 @@ void MachOBindEntry::moveNext() { break; case MachO::BIND_OPCODE_SET_TYPE_IMM: BindType = ImmValue; + if (ImmValue > MachO::BIND_TYPE_TEXT_PCREL32) { + *E = malformedError("for BIND_OPCODE_SET_TYPE_IMM bad bind type: " + + Twine((int)ImmValue) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } DEBUG_WITH_TYPE( "mach-o-bind", - llvm::dbgs() << "BIND_OPCODE_SET_TYPE_IMM: " - << "BindType=" << (int)BindType << "\n"); + dbgs() << "BIND_OPCODE_SET_TYPE_IMM: " + << "BindType=" << (int)BindType << "\n"); break; case MachO::BIND_OPCODE_SET_ADDEND_SLEB: - Addend = readSLEB128(); - if (TableKind == Kind::Lazy) - Malformed = true; + Addend = readSLEB128(&error); + if (error) { + *E = malformedError("for BIND_OPCODE_SET_ADDEND_SLEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } DEBUG_WITH_TYPE( "mach-o-bind", - llvm::dbgs() << "BIND_OPCODE_SET_ADDEND_SLEB: " - << "Addend=" << Addend << "\n"); + dbgs() << "BIND_OPCODE_SET_ADDEND_SLEB: " + << "Addend=" << Addend << "\n"); break; case MachO::BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: SegmentIndex = ImmValue; - SegmentOffset = readULEB128(); + SegmentOffset = readULEB128(&error); + if (error) { + *E = malformedError("for BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + error = O->BindEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, true); + if (error) { + *E = malformedError("for BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } DEBUG_WITH_TYPE( "mach-o-bind", - llvm::dbgs() << "BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: " - << "SegmentIndex=" << SegmentIndex << ", " - << format("SegmentOffset=0x%06X", SegmentOffset) - << "\n"); + dbgs() << "BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: " + << "SegmentIndex=" << SegmentIndex << ", " + << format("SegmentOffset=0x%06X", SegmentOffset) + << "\n"); break; case MachO::BIND_OPCODE_ADD_ADDR_ULEB: - SegmentOffset += readULEB128(); + SegmentOffset += readULEB128(&error); + if (error) { + *E = malformedError("for BIND_OPCODE_ADD_ADDR_ULEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + error = O->BindEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, true); + if (error) { + *E = malformedError("for BIND_OPCODE_ADD_ADDR_ULEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } DEBUG_WITH_TYPE("mach-o-bind", - llvm::dbgs() << "BIND_OPCODE_ADD_ADDR_ULEB: " - << format("SegmentOffset=0x%06X", - SegmentOffset) << "\n"); + dbgs() << "BIND_OPCODE_ADD_ADDR_ULEB: " + << format("SegmentOffset=0x%06X", + SegmentOffset) << "\n"); break; case MachO::BIND_OPCODE_DO_BIND: AdvanceAmount = PointerSize; RemainingLoopCount = 0; + error = O->BindEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, true); + if (error) { + *E = malformedError("for BIND_OPCODE_DO_BIND " + Twine(error) + + " for opcode at: 0x" + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + if (SymbolName == StringRef()) { + *E = malformedError("for BIND_OPCODE_DO_BIND missing preceding " + "BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + if (!LibraryOrdinalSet && TableKind != Kind::Weak) { + *E = malformedError("for BIND_OPCODE_DO_BIND missing preceding " + "BIND_OPCODE_SET_DYLIB_ORDINAL_* for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } DEBUG_WITH_TYPE("mach-o-bind", - llvm::dbgs() << "BIND_OPCODE_DO_BIND: " - << format("SegmentOffset=0x%06X", - SegmentOffset) << "\n"); + dbgs() << "BIND_OPCODE_DO_BIND: " + << format("SegmentOffset=0x%06X", + SegmentOffset) << "\n"); return; case MachO::BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB: - AdvanceAmount = readULEB128() + PointerSize; + if (TableKind == Kind::Lazy) { + *E = malformedError("BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB not allowed in " + "lazy bind table for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + error = O->BindEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, true); + if (error) { + *E = malformedError("for BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + if (SymbolName == StringRef()) { + *E = malformedError("for BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB missing " + "preceding BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM for opcode " + "at: 0x" + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + if (!LibraryOrdinalSet && TableKind != Kind::Weak) { + *E = malformedError("for BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB missing " + "preceding BIND_OPCODE_SET_DYLIB_ORDINAL_* for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + AdvanceAmount = readULEB128(&error) + PointerSize; + if (error) { + *E = malformedError("for BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + // Note, this is not really an error until the next bind but make no sense + // for a BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB to not be followed by another + // bind operation. + error = O->BindEntryCheckSegAndOffset(SegmentIndex, SegmentOffset + + AdvanceAmount, false); + if (error) { + *E = malformedError("for BIND_OPCODE_ADD_ADDR_ULEB (after adding " + "ULEB) " + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } RemainingLoopCount = 0; - if (TableKind == Kind::Lazy) - Malformed = true; DEBUG_WITH_TYPE( "mach-o-bind", - llvm::dbgs() << "BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB: " - << format("SegmentOffset=0x%06X", SegmentOffset) - << ", AdvanceAmount=" << AdvanceAmount - << ", RemainingLoopCount=" << RemainingLoopCount - << "\n"); + dbgs() << "BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB: " + << format("SegmentOffset=0x%06X", SegmentOffset) + << ", AdvanceAmount=" << AdvanceAmount + << ", RemainingLoopCount=" << RemainingLoopCount + << "\n"); return; case MachO::BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED: + if (TableKind == Kind::Lazy) { + *E = malformedError("BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED not " + "allowed in lazy bind table for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + error = O->BindEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, true); + if (error) { + *E = malformedError("for BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + if (SymbolName == StringRef()) { + *E = malformedError("for BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED " + "missing preceding BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM for " + "opcode at: 0x" + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + if (!LibraryOrdinalSet && TableKind != Kind::Weak) { + *E = malformedError("for BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED " + "missing preceding BIND_OPCODE_SET_DYLIB_ORDINAL_* for opcode " + "at: 0x" + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } AdvanceAmount = ImmValue * PointerSize + PointerSize; RemainingLoopCount = 0; - if (TableKind == Kind::Lazy) - Malformed = true; + error = O->BindEntryCheckSegAndOffset(SegmentIndex, SegmentOffset + + AdvanceAmount, false); + if (error) { + *E = malformedError("for BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED " + " (after adding immediate times the pointer size) " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } DEBUG_WITH_TYPE("mach-o-bind", - llvm::dbgs() + dbgs() << "BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED: " - << format("SegmentOffset=0x%06X", - SegmentOffset) << "\n"); + << format("SegmentOffset=0x%06X", SegmentOffset) << "\n"); return; case MachO::BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB: - RemainingLoopCount = readULEB128() - 1; - AdvanceAmount = readULEB128() + PointerSize; - if (TableKind == Kind::Lazy) - Malformed = true; + if (TableKind == Kind::Lazy) { + *E = malformedError("BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB not " + "allowed in lazy bind table for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + Count = readULEB128(&error); + if (Count != 0) + RemainingLoopCount = Count - 1; + else + RemainingLoopCount = 0; + if (error) { + *E = malformedError("for BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB " + " (count value) " + Twine(error) + " for opcode at" + ": 0x" + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + Skip = readULEB128(&error); + AdvanceAmount = Skip + PointerSize; + if (error) { + *E = malformedError("for BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB " + " (skip value) " + Twine(error) + " for opcode at" + ": 0x" + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + error = O->BindEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, true); + if (error) { + *E = malformedError("for BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + if (SymbolName == StringRef()) { + *E = malformedError("for BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB " + "missing preceding BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM for " + "opcode at: 0x" + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + if (!LibraryOrdinalSet && TableKind != Kind::Weak) { + *E = malformedError("for BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB " + "missing preceding BIND_OPCODE_SET_DYLIB_ORDINAL_* for opcode " + "at: 0x" + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } + error = O->BindEntryCheckCountAndSkip(Count, Skip, PointerSize, + SegmentIndex, SegmentOffset); + if (error) { + *E = malformedError("for BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB " + + Twine(error) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; + } DEBUG_WITH_TYPE( "mach-o-bind", - llvm::dbgs() << "BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB: " - << format("SegmentOffset=0x%06X", SegmentOffset) - << ", AdvanceAmount=" << AdvanceAmount - << ", RemainingLoopCount=" << RemainingLoopCount - << "\n"); + dbgs() << "BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB: " + << format("SegmentOffset=0x%06X", SegmentOffset) + << ", AdvanceAmount=" << AdvanceAmount + << ", RemainingLoopCount=" << RemainingLoopCount + << "\n"); return; default: - Malformed = true; + *E = malformedError("bad bind info (bad opcode value 0x" + + utohexstr(Opcode) + " for opcode at: 0x" + + utohexstr(OpcodeStart - Opcodes.begin())); + moveToEnd(); + return; } } } -uint64_t MachOBindEntry::readULEB128() { +uint64_t MachOBindEntry::readULEB128(const char **error) { unsigned Count; - uint64_t Result = decodeULEB128(Ptr, &Count); + uint64_t Result = decodeULEB128(Ptr, &Count, Opcodes.end(), error); Ptr += Count; - if (Ptr > Opcodes.end()) { + if (Ptr > Opcodes.end()) Ptr = Opcodes.end(); - Malformed = true; - } return Result; } -int64_t MachOBindEntry::readSLEB128() { +int64_t MachOBindEntry::readSLEB128(const char **error) { unsigned Count; - int64_t Result = decodeSLEB128(Ptr, &Count); + int64_t Result = decodeSLEB128(Ptr, &Count, Opcodes.end(), error); Ptr += Count; - if (Ptr > Opcodes.end()) { + if (Ptr > Opcodes.end()) Ptr = Opcodes.end(); - Malformed = true; - } return Result; } -uint32_t MachOBindEntry::segmentIndex() const { return SegmentIndex; } +int32_t MachOBindEntry::segmentIndex() const { return SegmentIndex; } uint64_t MachOBindEntry::segmentOffset() const { return SegmentOffset; } @@ -3076,6 +3655,24 @@ uint32_t MachOBindEntry::flags() const { return Flags; } int MachOBindEntry::ordinal() const { return Ordinal; } +// For use with the SegIndex of a checked Mach-O Bind entry +// to get the segment name. +StringRef MachOBindEntry::segmentName() const { + return O->BindRebaseSegmentName(SegmentIndex); +} + +// For use with a SegIndex,SegOffset pair from a checked Mach-O Bind entry +// to get the section name. +StringRef MachOBindEntry::sectionName() const { + return O->BindRebaseSectionName(SegmentIndex, SegmentOffset); +} + +// For use with a SegIndex,SegOffset pair from a checked Mach-O Bind entry +// to get the address. +uint64_t MachOBindEntry::address() const { + return O->BindRebaseAddress(SegmentIndex, SegmentOffset); +} + bool MachOBindEntry::operator==(const MachOBindEntry &Other) const { #ifdef EXPENSIVE_CHECKS assert(Opcodes == Other.Opcodes && "compare iterators of different files"); @@ -3087,30 +3684,149 @@ bool MachOBindEntry::operator==(const MachOBindEntry &Other) const { (Done == Other.Done); } +// Build table of sections so SegIndex/SegOffset pairs can be translated. +BindRebaseSegInfo::BindRebaseSegInfo(const object::MachOObjectFile *Obj) { + uint32_t CurSegIndex = Obj->hasPageZeroSegment() ? 1 : 0; + StringRef CurSegName; + uint64_t CurSegAddress; + for (const SectionRef &Section : Obj->sections()) { + SectionInfo Info; + Section.getName(Info.SectionName); + Info.Address = Section.getAddress(); + Info.Size = Section.getSize(); + Info.SegmentName = + Obj->getSectionFinalSegmentName(Section.getRawDataRefImpl()); + if (!Info.SegmentName.equals(CurSegName)) { + ++CurSegIndex; + CurSegName = Info.SegmentName; + CurSegAddress = Info.Address; + } + Info.SegmentIndex = CurSegIndex - 1; + Info.OffsetInSegment = Info.Address - CurSegAddress; + Info.SegmentStartAddress = CurSegAddress; + Sections.push_back(Info); + } + MaxSegIndex = CurSegIndex; +} + +// For use with a SegIndex,SegOffset pair in MachOBindEntry::moveNext() to +// validate a MachOBindEntry or MachORebaseEntry. +const char * BindRebaseSegInfo::checkSegAndOffset(int32_t SegIndex, + uint64_t SegOffset, + bool endInvalid) { + if (SegIndex == -1) + return "missing preceding *_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB"; + if (SegIndex >= MaxSegIndex) + return "bad segIndex (too large)"; + for (const SectionInfo &SI : Sections) { + if (SI.SegmentIndex != SegIndex) + continue; + if (SI.OffsetInSegment > SegOffset) + continue; + if (SegOffset > (SI.OffsetInSegment + SI.Size)) + continue; + if (endInvalid && SegOffset >= (SI.OffsetInSegment + SI.Size)) + continue; + return nullptr; + } + return "bad segOffset, too large"; +} + +// For use in MachOBindEntry::moveNext() to validate a MachOBindEntry for +// the BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB opcode and for use in +// MachORebaseEntry::moveNext() to validate a MachORebaseEntry for +// REBASE_OPCODE_DO_*_TIMES* opcodes. The SegIndex and SegOffset must have +// been already checked. +const char * BindRebaseSegInfo::checkCountAndSkip(uint32_t Count, uint32_t Skip, + uint8_t PointerSize, + int32_t SegIndex, + uint64_t SegOffset) { + const SectionInfo &SI = findSection(SegIndex, SegOffset); + uint64_t addr = SI.SegmentStartAddress + SegOffset; + if (addr >= SI.Address + SI.Size) + return "bad segOffset, too large"; + uint64_t i = 0; + if (Count > 1) + i = (Skip + PointerSize) * (Count - 1); + else if (Count == 1) + i = Skip + PointerSize; + if (addr + i >= SI.Address + SI.Size) { + // For rebase opcodes they can step from one section to another. + uint64_t TrailingSegOffset = (addr + i) - SI.SegmentStartAddress; + const char *error = checkSegAndOffset(SegIndex, TrailingSegOffset, false); + if (error) + return "bad count and skip, too large"; + } + return nullptr; +} + +// For use with the SegIndex of a checked Mach-O Bind or Rebase entry +// to get the segment name. +StringRef BindRebaseSegInfo::segmentName(int32_t SegIndex) { + for (const SectionInfo &SI : Sections) { + if (SI.SegmentIndex == SegIndex) + return SI.SegmentName; + } + llvm_unreachable("invalid SegIndex"); +} + +// For use with a SegIndex,SegOffset pair from a checked Mach-O Bind or Rebase +// to get the SectionInfo. +const BindRebaseSegInfo::SectionInfo &BindRebaseSegInfo::findSection( + int32_t SegIndex, uint64_t SegOffset) { + for (const SectionInfo &SI : Sections) { + if (SI.SegmentIndex != SegIndex) + continue; + if (SI.OffsetInSegment > SegOffset) + continue; + if (SegOffset >= (SI.OffsetInSegment + SI.Size)) + continue; + return SI; + } + llvm_unreachable("SegIndex and SegOffset not in any section"); +} + +// For use with a SegIndex,SegOffset pair from a checked Mach-O Bind or Rebase +// entry to get the section name. +StringRef BindRebaseSegInfo::sectionName(int32_t SegIndex, + uint64_t SegOffset) { + return findSection(SegIndex, SegOffset).SectionName; +} + +// For use with a SegIndex,SegOffset pair from a checked Mach-O Bind or Rebase +// entry to get the address. +uint64_t BindRebaseSegInfo::address(uint32_t SegIndex, uint64_t OffsetInSeg) { + const SectionInfo &SI = findSection(SegIndex, OffsetInSeg); + return SI.SegmentStartAddress + OffsetInSeg; +} + iterator_range<bind_iterator> -MachOObjectFile::bindTable(ArrayRef<uint8_t> Opcodes, bool is64, +MachOObjectFile::bindTable(Error &Err, MachOObjectFile *O, + ArrayRef<uint8_t> Opcodes, bool is64, MachOBindEntry::Kind BKind) { - MachOBindEntry Start(Opcodes, is64, BKind); + if (O->BindRebaseSectionTable == nullptr) + O->BindRebaseSectionTable = llvm::make_unique<BindRebaseSegInfo>(O); + MachOBindEntry Start(&Err, O, Opcodes, is64, BKind); Start.moveToFirst(); - MachOBindEntry Finish(Opcodes, is64, BKind); + MachOBindEntry Finish(&Err, O, Opcodes, is64, BKind); Finish.moveToEnd(); return make_range(bind_iterator(Start), bind_iterator(Finish)); } -iterator_range<bind_iterator> MachOObjectFile::bindTable() const { - return bindTable(getDyldInfoBindOpcodes(), is64Bit(), +iterator_range<bind_iterator> MachOObjectFile::bindTable(Error &Err) { + return bindTable(Err, this, getDyldInfoBindOpcodes(), is64Bit(), MachOBindEntry::Kind::Regular); } -iterator_range<bind_iterator> MachOObjectFile::lazyBindTable() const { - return bindTable(getDyldInfoLazyBindOpcodes(), is64Bit(), +iterator_range<bind_iterator> MachOObjectFile::lazyBindTable(Error &Err) { + return bindTable(Err, this, getDyldInfoLazyBindOpcodes(), is64Bit(), MachOBindEntry::Kind::Lazy); } -iterator_range<bind_iterator> MachOObjectFile::weakBindTable() const { - return bindTable(getDyldInfoWeakBindOpcodes(), is64Bit(), +iterator_range<bind_iterator> MachOObjectFile::weakBindTable(Error &Err) { + return bindTable(Err, this, getDyldInfoWeakBindOpcodes(), is64Bit(), MachOBindEntry::Kind::Weak); } @@ -3289,6 +4005,21 @@ MachOObjectFile::getVersionMinLoadCommand(const LoadCommandInfo &L) const { return getStruct<MachO::version_min_command>(*this, L.Ptr); } +MachO::note_command +MachOObjectFile::getNoteLoadCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::note_command>(*this, L.Ptr); +} + +MachO::build_version_command +MachOObjectFile::getBuildVersionLoadCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::build_version_command>(*this, L.Ptr); +} + +MachO::build_tool_version +MachOObjectFile::getBuildToolVersion(unsigned index) const { + return getStruct<MachO::build_tool_version>(*this, BuildTools[index]); +} + MachO::dylib_command MachOObjectFile::getDylibIDLoadCommand(const LoadCommandInfo &L) const { return getStruct<MachO::dylib_command>(*this, L.Ptr); @@ -3371,15 +4102,20 @@ MachOObjectFile::getThreadCommand(const LoadCommandInfo &L) const { MachO::any_relocation_info MachOObjectFile::getRelocation(DataRefImpl Rel) const { - DataRefImpl Sec; - Sec.d.a = Rel.d.a; uint32_t Offset; - if (is64Bit()) { - MachO::section_64 Sect = getSection64(Sec); - Offset = Sect.reloff; + if (getHeader().filetype == MachO::MH_OBJECT) { + DataRefImpl Sec; + Sec.d.a = Rel.d.a; + if (is64Bit()) { + MachO::section_64 Sect = getSection64(Sec); + Offset = Sect.reloff; + } else { + MachO::section Sect = getSection(Sec); + Offset = Sect.reloff; + } } else { - MachO::section Sect = getSection(Sec); - Offset = Sect.reloff; + MachO::dysymtab_command DysymtabLoadCmd = getDysymtabLoadCommand(); + Offset = DysymtabLoadCmd.extreloff; // Offset to the external relocations } auto P = reinterpret_cast<const MachO::any_relocation_info *>( @@ -3599,3 +4335,9 @@ ObjectFile::createMachOObjectFile(MemoryBufferRef Buffer, return make_error<GenericBinaryError>("Unrecognized MachO magic number", object_error::invalid_file_type); } + +StringRef MachOObjectFile::mapDebugSectionName(StringRef Name) const { + return StringSwitch<StringRef>(Name) + .Case("debug_str_offs", "debug_str_offsets") + .Default(Name); +} diff --git a/contrib/llvm/lib/Object/ModuleSummaryIndexObjectFile.cpp b/contrib/llvm/lib/Object/ModuleSummaryIndexObjectFile.cpp deleted file mode 100644 index 11ace84..0000000 --- a/contrib/llvm/lib/Object/ModuleSummaryIndexObjectFile.cpp +++ /dev/null @@ -1,115 +0,0 @@ -//===- ModuleSummaryIndexObjectFile.cpp - Summary index file implementation ==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Part of the ModuleSummaryIndexObjectFile class implementation. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Object/ModuleSummaryIndexObjectFile.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/Bitcode/BitcodeReader.h" -#include "llvm/IR/ModuleSummaryIndex.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/Object/ObjectFile.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; -using namespace object; - -static llvm::cl::opt<bool> IgnoreEmptyThinLTOIndexFile( - "ignore-empty-index-file", llvm::cl::ZeroOrMore, - llvm::cl::desc( - "Ignore an empty index file and perform non-ThinLTO compilation"), - llvm::cl::init(false)); - -ModuleSummaryIndexObjectFile::ModuleSummaryIndexObjectFile( - MemoryBufferRef Object, std::unique_ptr<ModuleSummaryIndex> I) - : SymbolicFile(Binary::ID_ModuleSummaryIndex, Object), Index(std::move(I)) { -} - -ModuleSummaryIndexObjectFile::~ModuleSummaryIndexObjectFile() {} - -std::unique_ptr<ModuleSummaryIndex> ModuleSummaryIndexObjectFile::takeIndex() { - return std::move(Index); -} - -ErrorOr<MemoryBufferRef> -ModuleSummaryIndexObjectFile::findBitcodeInObject(const ObjectFile &Obj) { - for (const SectionRef &Sec : Obj.sections()) { - if (Sec.isBitcode()) { - StringRef SecContents; - if (std::error_code EC = Sec.getContents(SecContents)) - return EC; - return MemoryBufferRef(SecContents, Obj.getFileName()); - } - } - - return object_error::bitcode_section_not_found; -} - -ErrorOr<MemoryBufferRef> -ModuleSummaryIndexObjectFile::findBitcodeInMemBuffer(MemoryBufferRef Object) { - sys::fs::file_magic Type = sys::fs::identify_magic(Object.getBuffer()); - switch (Type) { - case sys::fs::file_magic::bitcode: - return Object; - case sys::fs::file_magic::elf_relocatable: - case sys::fs::file_magic::macho_object: - case sys::fs::file_magic::coff_object: { - Expected<std::unique_ptr<ObjectFile>> ObjFile = - ObjectFile::createObjectFile(Object, Type); - if (!ObjFile) - return errorToErrorCode(ObjFile.takeError()); - return findBitcodeInObject(*ObjFile->get()); - } - default: - return object_error::invalid_file_type; - } -} - -// Parse module summary index in the given memory buffer. -// Return new ModuleSummaryIndexObjectFile instance containing parsed -// module summary/index. -Expected<std::unique_ptr<ModuleSummaryIndexObjectFile>> -ModuleSummaryIndexObjectFile::create(MemoryBufferRef Object) { - ErrorOr<MemoryBufferRef> BCOrErr = findBitcodeInMemBuffer(Object); - if (!BCOrErr) - return errorCodeToError(BCOrErr.getError()); - - Expected<std::unique_ptr<ModuleSummaryIndex>> IOrErr = - getModuleSummaryIndex(BCOrErr.get()); - - if (!IOrErr) - return IOrErr.takeError(); - - std::unique_ptr<ModuleSummaryIndex> Index = std::move(IOrErr.get()); - return llvm::make_unique<ModuleSummaryIndexObjectFile>(Object, - std::move(Index)); -} - -// Parse the module summary index out of an IR file and return the summary -// index object if found, or nullptr if not. -Expected<std::unique_ptr<ModuleSummaryIndex>> -llvm::getModuleSummaryIndexForFile(StringRef Path) { - ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr = - MemoryBuffer::getFileOrSTDIN(Path); - std::error_code EC = FileOrErr.getError(); - if (EC) - return errorCodeToError(EC); - MemoryBufferRef BufferRef = (FileOrErr.get())->getMemBufferRef(); - if (IgnoreEmptyThinLTOIndexFile && !BufferRef.getBufferSize()) - return nullptr; - Expected<std::unique_ptr<object::ModuleSummaryIndexObjectFile>> ObjOrErr = - object::ModuleSummaryIndexObjectFile::create(BufferRef); - if (!ObjOrErr) - return ObjOrErr.takeError(); - - object::ModuleSummaryIndexObjectFile &Obj = **ObjOrErr; - return Obj.takeIndex(); -} diff --git a/contrib/llvm/lib/Object/ModuleSymbolTable.cpp b/contrib/llvm/lib/Object/ModuleSymbolTable.cpp index 9048800..f2e7a21 100644 --- a/contrib/llvm/lib/Object/ModuleSymbolTable.cpp +++ b/contrib/llvm/lib/Object/ModuleSymbolTable.cpp @@ -1,4 +1,4 @@ -//===- ModuleSymbolTable.cpp - symbol table for in-memory IR ----*- C++ -*-===// +//===- ModuleSymbolTable.cpp - symbol table for in-memory IR --------------===// // // The LLVM Compiler Infrastructure // @@ -13,27 +13,45 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Object/IRObjectFile.h" +#include "llvm/Object/ModuleSymbolTable.h" #include "RecordStreamer.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/Bitcode/BitcodeReader.h" -#include "llvm/IR/GVMaterializer.h" -#include "llvm/IR/LLVMContext.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Triple.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Mangler.h" #include "llvm/IR/Module.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCTargetAsmParser.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Object/ObjectFile.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCTargetOptions.h" +#include "llvm/Object/SymbolicFile.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SMLoc.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <memory> +#include <string> + using namespace llvm; using namespace object; @@ -43,27 +61,98 @@ void ModuleSymbolTable::addModule(Module *M) { else FirstMod = M; - for (Function &F : *M) - SymTab.push_back(&F); - for (GlobalVariable &GV : M->globals()) + for (GlobalValue &GV : M->global_values()) SymTab.push_back(&GV); - for (GlobalAlias &GA : M->aliases()) - SymTab.push_back(&GA); - - CollectAsmSymbols(Triple(M->getTargetTriple()), M->getModuleInlineAsm(), - [this](StringRef Name, BasicSymbolRef::Flags Flags) { - SymTab.push_back(new (AsmSymbols.Allocate()) - AsmSymbol(Name, Flags)); - }); + + CollectAsmSymbols(*M, [this](StringRef Name, BasicSymbolRef::Flags Flags) { + SymTab.push_back(new (AsmSymbols.Allocate()) AsmSymbol(Name, Flags)); + }); +} + +// Ensure ELF .symver aliases get the same binding as the defined symbol +// they alias with. +static void handleSymverAliases(const Module &M, RecordStreamer &Streamer) { + if (Streamer.symverAliases().empty()) + return; + + // The name in the assembler will be mangled, but the name in the IR + // might not, so we first compute a mapping from mangled name to GV. + Mangler Mang; + SmallString<64> MangledName; + StringMap<const GlobalValue *> MangledNameMap; + auto GetMangledName = [&](const GlobalValue &GV) { + if (!GV.hasName()) + return; + + MangledName.clear(); + MangledName.reserve(GV.getName().size() + 1); + Mang.getNameWithPrefix(MangledName, &GV, /*CannotUsePrivateLabel=*/false); + MangledNameMap[MangledName] = &GV; + }; + for (const Function &F : M) + GetMangledName(F); + for (const GlobalVariable &GV : M.globals()) + GetMangledName(GV); + for (const GlobalAlias &GA : M.aliases()) + GetMangledName(GA); + + // Walk all the recorded .symver aliases, and set up the binding + // for each alias. + for (auto &Symver : Streamer.symverAliases()) { + const MCSymbol *Aliasee = Symver.first; + MCSymbolAttr Attr = MCSA_Invalid; + + // First check if the aliasee binding was recorded in the asm. + RecordStreamer::State state = Streamer.getSymbolState(Aliasee); + switch (state) { + case RecordStreamer::Global: + case RecordStreamer::DefinedGlobal: + Attr = MCSA_Global; + break; + case RecordStreamer::UndefinedWeak: + case RecordStreamer::DefinedWeak: + Attr = MCSA_Weak; + break; + default: + break; + } + + // If we don't have a symbol attribute from assembly, then check if + // the aliasee was defined in the IR. + if (Attr == MCSA_Invalid) { + const auto *GV = M.getNamedValue(Aliasee->getName()); + if (!GV) { + auto MI = MangledNameMap.find(Aliasee->getName()); + if (MI != MangledNameMap.end()) + GV = MI->second; + else + continue; + } + if (GV->hasExternalLinkage()) + Attr = MCSA_Global; + else if (GV->hasLocalLinkage()) + Attr = MCSA_Local; + else if (GV->isWeakForLinker()) + Attr = MCSA_Weak; + } + if (Attr == MCSA_Invalid) + continue; + + // Set the detected binding on each alias with this aliasee. + for (auto &Alias : Symver.second) + Streamer.EmitSymbolAttribute(Alias, Attr); + } } void ModuleSymbolTable::CollectAsmSymbols( - const Triple &TT, StringRef InlineAsm, + const Module &M, function_ref<void(StringRef, BasicSymbolRef::Flags)> AsmSymbol) { + StringRef InlineAsm = M.getModuleInlineAsm(); if (InlineAsm.empty()) return; std::string Err; + const Triple TT(M.getTargetTriple()); const Target *T = TargetRegistry::lookupTarget(TT.str(), Err); assert(T && T->hasMCAsmParser()); @@ -106,6 +195,8 @@ void ModuleSymbolTable::CollectAsmSymbols( if (Parser->Run(false)) return; + handleSymverAliases(M, Streamer); + for (auto &KV : Streamer) { StringRef Key = KV.first(); RecordStreamer::State Value = KV.second; diff --git a/contrib/llvm/lib/Object/Object.cpp b/contrib/llvm/lib/Object/Object.cpp index 6df481b..1d2859c 100644 --- a/contrib/llvm/lib/Object/Object.cpp +++ b/contrib/llvm/lib/Object/Object.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/SmallVector.h" #include "llvm-c/Object.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Object/ObjectFile.h" using namespace llvm; diff --git a/contrib/llvm/lib/Object/ObjectFile.cpp b/contrib/llvm/lib/Object/ObjectFile.cpp index f36388b..8377dd0 100644 --- a/contrib/llvm/lib/Object/ObjectFile.cpp +++ b/contrib/llvm/lib/Object/ObjectFile.cpp @@ -1,4 +1,4 @@ -//===- ObjectFile.cpp - File format independent object file -----*- C++ -*-===// +//===- ObjectFile.cpp - File format independent object file ---------------===// // // The LLVM Compiler Infrastructure // @@ -12,19 +12,28 @@ //===----------------------------------------------------------------------===// #include "llvm/Object/ObjectFile.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Magic.h" +#include "llvm/Object/Binary.h" #include "llvm/Object/COFF.h" +#include "llvm/Object/Error.h" #include "llvm/Object/MachO.h" #include "llvm/Object/Wasm.h" +#include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ErrorOr.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cstdint> +#include <memory> #include <system_error> using namespace llvm; using namespace object; -void ObjectFile::anchor() { } +void ObjectFile::anchor() {} ObjectFile::ObjectFile(unsigned int Type, MemoryBufferRef Source) : SymbolicFile(Type, Source) {} @@ -71,42 +80,42 @@ section_iterator ObjectFile::getRelocatedSection(DataRefImpl Sec) const { } Expected<std::unique_ptr<ObjectFile>> -ObjectFile::createObjectFile(MemoryBufferRef Object, sys::fs::file_magic Type) { +ObjectFile::createObjectFile(MemoryBufferRef Object, file_magic Type) { StringRef Data = Object.getBuffer(); - if (Type == sys::fs::file_magic::unknown) - Type = sys::fs::identify_magic(Data); + if (Type == file_magic::unknown) + Type = identify_magic(Data); switch (Type) { - case sys::fs::file_magic::unknown: - case sys::fs::file_magic::bitcode: - case sys::fs::file_magic::coff_cl_gl_object: - case sys::fs::file_magic::archive: - case sys::fs::file_magic::macho_universal_binary: - case sys::fs::file_magic::windows_resource: + case file_magic::unknown: + case file_magic::bitcode: + case file_magic::coff_cl_gl_object: + case file_magic::archive: + case file_magic::macho_universal_binary: + case file_magic::windows_resource: return errorCodeToError(object_error::invalid_file_type); - case sys::fs::file_magic::elf: - case sys::fs::file_magic::elf_relocatable: - case sys::fs::file_magic::elf_executable: - case sys::fs::file_magic::elf_shared_object: - case sys::fs::file_magic::elf_core: + case file_magic::elf: + case file_magic::elf_relocatable: + case file_magic::elf_executable: + case file_magic::elf_shared_object: + case file_magic::elf_core: return errorOrToExpected(createELFObjectFile(Object)); - case sys::fs::file_magic::macho_object: - case sys::fs::file_magic::macho_executable: - case sys::fs::file_magic::macho_fixed_virtual_memory_shared_lib: - case sys::fs::file_magic::macho_core: - case sys::fs::file_magic::macho_preload_executable: - case sys::fs::file_magic::macho_dynamically_linked_shared_lib: - case sys::fs::file_magic::macho_dynamic_linker: - case sys::fs::file_magic::macho_bundle: - case sys::fs::file_magic::macho_dynamically_linked_shared_lib_stub: - case sys::fs::file_magic::macho_dsym_companion: - case sys::fs::file_magic::macho_kext_bundle: + case file_magic::macho_object: + case file_magic::macho_executable: + case file_magic::macho_fixed_virtual_memory_shared_lib: + case file_magic::macho_core: + case file_magic::macho_preload_executable: + case file_magic::macho_dynamically_linked_shared_lib: + case file_magic::macho_dynamic_linker: + case file_magic::macho_bundle: + case file_magic::macho_dynamically_linked_shared_lib_stub: + case file_magic::macho_dsym_companion: + case file_magic::macho_kext_bundle: return createMachOObjectFile(Object); - case sys::fs::file_magic::coff_object: - case sys::fs::file_magic::coff_import_library: - case sys::fs::file_magic::pecoff_executable: + case file_magic::coff_object: + case file_magic::coff_import_library: + case file_magic::pecoff_executable: return errorOrToExpected(createCOFFObjectFile(Object)); - case sys::fs::file_magic::wasm_object: + case file_magic::wasm_object: return createWasmObjectFile(Object); } llvm_unreachable("Unexpected Object File Type"); diff --git a/contrib/llvm/lib/Object/RecordStreamer.cpp b/contrib/llvm/lib/Object/RecordStreamer.cpp index 572b960..e94e9cf 100644 --- a/contrib/llvm/lib/Object/RecordStreamer.cpp +++ b/contrib/llvm/lib/Object/RecordStreamer.cpp @@ -1,4 +1,4 @@ -//===-- RecordStreamer.cpp - Record asm definde and used symbols ----------===// +//===-- RecordStreamer.cpp - Record asm defined and used symbols ----------===// // // The LLVM Compiler Infrastructure // @@ -9,6 +9,7 @@ #include "RecordStreamer.h" #include "llvm/MC/MCSymbol.h" + using namespace llvm; void RecordStreamer::markDefined(const MCSymbol &Symbol) { @@ -69,20 +70,20 @@ void RecordStreamer::markUsed(const MCSymbol &Symbol) { void RecordStreamer::visitUsedSymbol(const MCSymbol &Sym) { markUsed(Sym); } +RecordStreamer::RecordStreamer(MCContext &Context) : MCStreamer(Context) {} + RecordStreamer::const_iterator RecordStreamer::begin() { return Symbols.begin(); } RecordStreamer::const_iterator RecordStreamer::end() { return Symbols.end(); } -RecordStreamer::RecordStreamer(MCContext &Context) : MCStreamer(Context) {} - void RecordStreamer::EmitInstruction(const MCInst &Inst, - const MCSubtargetInfo &STI) { + const MCSubtargetInfo &STI, bool) { MCStreamer::EmitInstruction(Inst, STI); } -void RecordStreamer::EmitLabel(MCSymbol *Symbol) { +void RecordStreamer::EmitLabel(MCSymbol *Symbol, SMLoc Loc) { MCStreamer::EmitLabel(Symbol); markDefined(*Symbol); } @@ -110,3 +111,8 @@ void RecordStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment) { markDefined(*Symbol); } + +void RecordStreamer::emitELFSymverDirective(MCSymbol *Alias, + const MCSymbol *Aliasee) { + SymverAliasMap[Aliasee].push_back(Alias); +} diff --git a/contrib/llvm/lib/Object/RecordStreamer.h b/contrib/llvm/lib/Object/RecordStreamer.h index 617d8a4..4d11909 100644 --- a/contrib/llvm/lib/Object/RecordStreamer.h +++ b/contrib/llvm/lib/Object/RecordStreamer.h @@ -1,4 +1,4 @@ -//===-- RecordStreamer.h - Record asm defined and used symbols ---*- C++ -*===// +//===- RecordStreamer.h - Record asm defined and used symbols ---*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -10,9 +10,16 @@ #ifndef LLVM_LIB_OBJECT_RECORDSTREAMER_H #define LLVM_LIB_OBJECT_RECORDSTREAMER_H +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/SMLoc.h" +#include <vector> namespace llvm { + class RecordStreamer : public MCStreamer { public: enum State { NeverSeen, Global, Defined, DefinedGlobal, DefinedWeak, Used, @@ -20,24 +27,49 @@ public: private: StringMap<State> Symbols; + // Map of aliases created by .symver directives, saved so we can update + // their symbol binding after parsing complete. This maps from each + // aliasee to its list of aliases. + DenseMap<const MCSymbol *, std::vector<MCSymbol *>> SymverAliasMap; + void markDefined(const MCSymbol &Symbol); void markGlobal(const MCSymbol &Symbol, MCSymbolAttr Attribute); void markUsed(const MCSymbol &Symbol); void visitUsedSymbol(const MCSymbol &Sym) override; public: - typedef StringMap<State>::const_iterator const_iterator; + RecordStreamer(MCContext &Context); + + using const_iterator = StringMap<State>::const_iterator; + const_iterator begin(); const_iterator end(); - RecordStreamer(MCContext &Context); - void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override; - void EmitLabel(MCSymbol *Symbol) override; + void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI, + bool) override; + void EmitLabel(MCSymbol *Symbol, SMLoc Loc = SMLoc()) override; void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) override; bool EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) override; void EmitZerofill(MCSection *Section, MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment) override; void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment) override; + /// Record .symver aliases for later processing. + void emitELFSymverDirective(MCSymbol *Alias, + const MCSymbol *Aliasee) override; + /// Return the map of .symver aliasee to associated aliases. + DenseMap<const MCSymbol *, std::vector<MCSymbol *>> &symverAliases() { + return SymverAliasMap; + } + + /// Get the state recorded for the given symbol. + State getSymbolState(const MCSymbol *Sym) { + auto SI = Symbols.find(Sym->getName()); + if (SI == Symbols.end()) + return NeverSeen; + return SI->second; + } }; -} -#endif + +} // end namespace llvm + +#endif // LLVM_LIB_OBJECT_RECORDSTREAMER_H diff --git a/contrib/llvm/lib/Object/SymbolicFile.cpp b/contrib/llvm/lib/Object/SymbolicFile.cpp index 4b51a49..1042d29 100644 --- a/contrib/llvm/lib/Object/SymbolicFile.cpp +++ b/contrib/llvm/lib/Object/SymbolicFile.cpp @@ -1,4 +1,4 @@ -//===- SymbolicFile.cpp - Interface that only provides symbols --*- C++ -*-===// +//===- SymbolicFile.cpp - Interface that only provides symbols ------------===// // // The LLVM Compiler Infrastructure // @@ -11,12 +11,21 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Object/COFF.h" +#include "llvm/Object/SymbolicFile.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Magic.h" #include "llvm/Object/COFFImportFile.h" +#include "llvm/Object/Error.h" #include "llvm/Object/IRObjectFile.h" #include "llvm/Object/ObjectFile.h" -#include "llvm/Object/SymbolicFile.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" +#include <algorithm> +#include <memory> using namespace llvm; using namespace object; @@ -24,47 +33,48 @@ using namespace object; SymbolicFile::SymbolicFile(unsigned int Type, MemoryBufferRef Source) : Binary(Type, Source) {} -SymbolicFile::~SymbolicFile() {} +SymbolicFile::~SymbolicFile() = default; -Expected<std::unique_ptr<SymbolicFile>> SymbolicFile::createSymbolicFile( - MemoryBufferRef Object, sys::fs::file_magic Type, LLVMContext *Context) { +Expected<std::unique_ptr<SymbolicFile>> +SymbolicFile::createSymbolicFile(MemoryBufferRef Object, file_magic Type, + LLVMContext *Context) { StringRef Data = Object.getBuffer(); - if (Type == sys::fs::file_magic::unknown) - Type = sys::fs::identify_magic(Data); + if (Type == file_magic::unknown) + Type = identify_magic(Data); switch (Type) { - case sys::fs::file_magic::bitcode: + case file_magic::bitcode: if (Context) return IRObjectFile::create(Object, *Context); LLVM_FALLTHROUGH; - case sys::fs::file_magic::unknown: - case sys::fs::file_magic::archive: - case sys::fs::file_magic::coff_cl_gl_object: - case sys::fs::file_magic::macho_universal_binary: - case sys::fs::file_magic::windows_resource: + case file_magic::unknown: + case file_magic::archive: + case file_magic::coff_cl_gl_object: + case file_magic::macho_universal_binary: + case file_magic::windows_resource: return errorCodeToError(object_error::invalid_file_type); - case sys::fs::file_magic::elf: - case sys::fs::file_magic::elf_executable: - case sys::fs::file_magic::elf_shared_object: - case sys::fs::file_magic::elf_core: - case sys::fs::file_magic::macho_executable: - case sys::fs::file_magic::macho_fixed_virtual_memory_shared_lib: - case sys::fs::file_magic::macho_core: - case sys::fs::file_magic::macho_preload_executable: - case sys::fs::file_magic::macho_dynamically_linked_shared_lib: - case sys::fs::file_magic::macho_dynamic_linker: - case sys::fs::file_magic::macho_bundle: - case sys::fs::file_magic::macho_dynamically_linked_shared_lib_stub: - case sys::fs::file_magic::macho_dsym_companion: - case sys::fs::file_magic::macho_kext_bundle: - case sys::fs::file_magic::pecoff_executable: - case sys::fs::file_magic::wasm_object: + case file_magic::elf: + case file_magic::elf_executable: + case file_magic::elf_shared_object: + case file_magic::elf_core: + case file_magic::macho_executable: + case file_magic::macho_fixed_virtual_memory_shared_lib: + case file_magic::macho_core: + case file_magic::macho_preload_executable: + case file_magic::macho_dynamically_linked_shared_lib: + case file_magic::macho_dynamic_linker: + case file_magic::macho_bundle: + case file_magic::macho_dynamically_linked_shared_lib_stub: + case file_magic::macho_dsym_companion: + case file_magic::macho_kext_bundle: + case file_magic::pecoff_executable: + case file_magic::wasm_object: return ObjectFile::createObjectFile(Object, Type); - case sys::fs::file_magic::coff_import_library: + case file_magic::coff_import_library: return std::unique_ptr<SymbolicFile>(new COFFImportFile(Object)); - case sys::fs::file_magic::elf_relocatable: - case sys::fs::file_magic::macho_object: - case sys::fs::file_magic::coff_object: { + case file_magic::elf_relocatable: + case file_magic::macho_object: + case file_magic::coff_object: { Expected<std::unique_ptr<ObjectFile>> Obj = ObjectFile::createObjectFile(Object, Type); if (!Obj || !Context) diff --git a/contrib/llvm/lib/Object/WasmObjectFile.cpp b/contrib/llvm/lib/Object/WasmObjectFile.cpp index 2b61a8a0..7f80bf0 100644 --- a/contrib/llvm/lib/Object/WasmObjectFile.cpp +++ b/contrib/llvm/lib/Object/WasmObjectFile.cpp @@ -1,4 +1,4 @@ -//===- WasmObjectFile.cpp - Wasm object file implementation -----*- C++ -*-===// +//===- WasmObjectFile.cpp - Wasm object file implementation ---------------===// // // The LLVM Compiler Infrastructure // @@ -7,12 +7,31 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Triple.h" +#include "llvm/BinaryFormat/Wasm.h" +#include "llvm/MC/SubtargetFeature.h" +#include "llvm/Object/Binary.h" +#include "llvm/Object/Error.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Object/SymbolicFile.h" #include "llvm/Object/Wasm.h" #include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/LEB128.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <cstring> +#include <system_error> -namespace llvm { -namespace object { +#define DEBUG_TYPE "wasm-object" + +using namespace llvm; +using namespace object; Expected<std::unique_ptr<WasmObjectFile>> ObjectFile::createWasmObjectFile(MemoryBufferRef Buffer) { @@ -24,34 +43,146 @@ ObjectFile::createWasmObjectFile(MemoryBufferRef Buffer) { return std::move(ObjectFile); } -namespace { +#define VARINT7_MAX ((1<<7)-1) +#define VARINT7_MIN (-(1<<7)) +#define VARUINT7_MAX (1<<7) +#define VARUINT1_MAX (1) + +static uint8_t readUint8(const uint8_t *&Ptr) { return *Ptr++; } -uint32_t readUint32(const uint8_t *&Ptr) { +static uint32_t readUint32(const uint8_t *&Ptr) { uint32_t Result = support::endian::read32le(Ptr); Ptr += sizeof(Result); return Result; } -uint64_t readULEB128(const uint8_t *&Ptr) { +static int32_t readFloat32(const uint8_t *&Ptr) { + int32_t Result = 0; + memcpy(&Result, Ptr, sizeof(Result)); + Ptr += sizeof(Result); + return Result; +} + +static int64_t readFloat64(const uint8_t *&Ptr) { + int64_t Result = 0; + memcpy(&Result, Ptr, sizeof(Result)); + Ptr += sizeof(Result); + return Result; +} + +static uint64_t readULEB128(const uint8_t *&Ptr) { unsigned Count; uint64_t Result = decodeULEB128(Ptr, &Count); Ptr += Count; return Result; } -StringRef readString(const uint8_t *&Ptr) { +static StringRef readString(const uint8_t *&Ptr) { uint32_t StringLen = readULEB128(Ptr); StringRef Return = StringRef(reinterpret_cast<const char *>(Ptr), StringLen); Ptr += StringLen; return Return; } -Error readSection(wasm::WasmSection &Section, const uint8_t *&Ptr, - const uint8_t *Start) { +static int64_t readLEB128(const uint8_t *&Ptr) { + unsigned Count; + uint64_t Result = decodeSLEB128(Ptr, &Count); + Ptr += Count; + return Result; +} + +static uint8_t readVaruint1(const uint8_t *&Ptr) { + int64_t result = readLEB128(Ptr); + assert(result <= VARUINT1_MAX && result >= 0); + return result; +} + +static int8_t readVarint7(const uint8_t *&Ptr) { + int64_t result = readLEB128(Ptr); + assert(result <= VARINT7_MAX && result >= VARINT7_MIN); + return result; +} + +static uint8_t readVaruint7(const uint8_t *&Ptr) { + uint64_t result = readULEB128(Ptr); + assert(result <= VARUINT7_MAX); + return result; +} + +static int32_t readVarint32(const uint8_t *&Ptr) { + int64_t result = readLEB128(Ptr); + assert(result <= INT32_MAX && result >= INT32_MIN); + return result; +} + +static uint32_t readVaruint32(const uint8_t *&Ptr) { + uint64_t result = readULEB128(Ptr); + assert(result <= UINT32_MAX); + return result; +} + +static int64_t readVarint64(const uint8_t *&Ptr) { + return readLEB128(Ptr); +} + +static uint8_t readOpcode(const uint8_t *&Ptr) { + return readUint8(Ptr); +} + +static Error readInitExpr(wasm::WasmInitExpr &Expr, const uint8_t *&Ptr) { + Expr.Opcode = readOpcode(Ptr); + + switch (Expr.Opcode) { + case wasm::WASM_OPCODE_I32_CONST: + Expr.Value.Int32 = readVarint32(Ptr); + break; + case wasm::WASM_OPCODE_I64_CONST: + Expr.Value.Int64 = readVarint64(Ptr); + break; + case wasm::WASM_OPCODE_F32_CONST: + Expr.Value.Float32 = readFloat32(Ptr); + break; + case wasm::WASM_OPCODE_F64_CONST: + Expr.Value.Float64 = readFloat64(Ptr); + break; + case wasm::WASM_OPCODE_GET_GLOBAL: + Expr.Value.Global = readULEB128(Ptr); + break; + default: + return make_error<GenericBinaryError>("Invalid opcode in init_expr", + object_error::parse_failed); + } + + uint8_t EndOpcode = readOpcode(Ptr); + if (EndOpcode != wasm::WASM_OPCODE_END) { + return make_error<GenericBinaryError>("Invalid init_expr", + object_error::parse_failed); + } + return Error::success(); +} + +static wasm::WasmLimits readLimits(const uint8_t *&Ptr) { + wasm::WasmLimits Result; + Result.Flags = readVaruint1(Ptr); + Result.Initial = readVaruint32(Ptr); + if (Result.Flags & wasm::WASM_LIMITS_FLAG_HAS_MAX) + Result.Maximum = readVaruint32(Ptr); + return Result; +} + +static wasm::WasmTable readTable(const uint8_t *&Ptr) { + wasm::WasmTable Table; + Table.ElemType = readVarint7(Ptr); + Table.Limits = readLimits(Ptr); + return Table; +} + +static Error readSection(WasmSection &Section, const uint8_t *&Ptr, + const uint8_t *Start) { // TODO(sbc): Avoid reading past EOF in the case of malformed files. Section.Offset = Ptr - Start; - Section.Type = readULEB128(Ptr); - uint32_t Size = readULEB128(Ptr); + Section.Type = readVaruint7(Ptr); + uint32_t Size = readVaruint32(Ptr); if (Size == 0) return make_error<StringError>("Zero length section", object_error::parse_failed); @@ -59,10 +190,12 @@ Error readSection(wasm::WasmSection &Section, const uint8_t *&Ptr, Ptr += Size; return Error::success(); } -} WasmObjectFile::WasmObjectFile(MemoryBufferRef Buffer, Error &Err) : ObjectFile(Binary::ID_Wasm, Buffer) { + LinkingData.DataAlignment = 0; + LinkingData.DataSize = 0; + ErrorAsOutParameter ErrAsOutParam(&Err); Header.Magic = getData().substr(0, 4); if (Header.Magic != StringRef("\0asm", 4)) { @@ -79,21 +212,486 @@ WasmObjectFile::WasmObjectFile(MemoryBufferRef Buffer, Error &Err) } const uint8_t *Eof = getPtr(getData().size()); - wasm::WasmSection Sec; + WasmSection Sec; while (Ptr < Eof) { if ((Err = readSection(Sec, Ptr, getPtr(0)))) return; - if (Sec.Type == wasm::WASM_SEC_USER) { - if ((Err = parseUserSection(Sec, Sec.Content.data(), Sec.Content.size()))) - return; - } + if ((Err = parseSection(Sec))) + return; + Sections.push_back(Sec); } } -Error WasmObjectFile::parseUserSection(wasm::WasmSection &Sec, - const uint8_t *Ptr, size_t Length) { +Error WasmObjectFile::parseSection(WasmSection &Sec) { + const uint8_t* Start = Sec.Content.data(); + const uint8_t* End = Start + Sec.Content.size(); + switch (Sec.Type) { + case wasm::WASM_SEC_CUSTOM: + return parseCustomSection(Sec, Start, End); + case wasm::WASM_SEC_TYPE: + return parseTypeSection(Start, End); + case wasm::WASM_SEC_IMPORT: + return parseImportSection(Start, End); + case wasm::WASM_SEC_FUNCTION: + return parseFunctionSection(Start, End); + case wasm::WASM_SEC_TABLE: + return parseTableSection(Start, End); + case wasm::WASM_SEC_MEMORY: + return parseMemorySection(Start, End); + case wasm::WASM_SEC_GLOBAL: + return parseGlobalSection(Start, End); + case wasm::WASM_SEC_EXPORT: + return parseExportSection(Start, End); + case wasm::WASM_SEC_START: + return parseStartSection(Start, End); + case wasm::WASM_SEC_ELEM: + return parseElemSection(Start, End); + case wasm::WASM_SEC_CODE: + return parseCodeSection(Start, End); + case wasm::WASM_SEC_DATA: + return parseDataSection(Start, End); + default: + return make_error<GenericBinaryError>("Bad section type", + object_error::parse_failed); + } +} + +Error WasmObjectFile::parseNameSection(const uint8_t *Ptr, const uint8_t *End) { + while (Ptr < End) { + uint8_t Type = readVarint7(Ptr); + uint32_t Size = readVaruint32(Ptr); + const uint8_t *SubSectionEnd = Ptr + Size; + switch (Type) { + case wasm::WASM_NAMES_FUNCTION: { + uint32_t Count = readVaruint32(Ptr); + while (Count--) { + uint32_t Index = readVaruint32(Ptr); + StringRef Name = readString(Ptr); + if (!Name.empty()) + Symbols.emplace_back(Name, + WasmSymbol::SymbolType::DEBUG_FUNCTION_NAME, + Sections.size(), Index); + } + break; + } + // Ignore local names for now + case wasm::WASM_NAMES_LOCAL: + default: + Ptr += Size; + break; + } + if (Ptr != SubSectionEnd) + return make_error<GenericBinaryError>("Name sub-section ended prematurely", + object_error::parse_failed); + } + + if (Ptr != End) + return make_error<GenericBinaryError>("Name section ended prematurely", + object_error::parse_failed); + return Error::success(); +} + +Error WasmObjectFile::parseLinkingSection(const uint8_t *Ptr, + const uint8_t *End) { + HasLinkingSection = true; + while (Ptr < End) { + uint8_t Type = readVarint7(Ptr); + uint32_t Size = readVaruint32(Ptr); + const uint8_t *SubSectionEnd = Ptr + Size; + switch (Type) { + case wasm::WASM_SYMBOL_INFO: { + uint32_t Count = readVaruint32(Ptr); + while (Count--) { + StringRef Symbol = readString(Ptr); + DEBUG(dbgs() << "reading syminfo: " << Symbol << "\n"); + uint32_t Flags = readVaruint32(Ptr); + auto iter = SymbolMap.find(Symbol); + if (iter == SymbolMap.end()) { + return make_error<GenericBinaryError>( + "Invalid symbol name in linking section: " + Symbol, + object_error::parse_failed); + } + uint32_t SymIndex = iter->second; + assert(SymIndex < Symbols.size()); + Symbols[SymIndex].Flags = Flags; + DEBUG(dbgs() << "Set symbol flags index:" + << SymIndex << " name:" + << Symbols[SymIndex].Name << " exptected:" + << Symbol << " flags: " << Flags << "\n"); + } + break; + } + case wasm::WASM_DATA_SIZE: + LinkingData.DataSize = readVaruint32(Ptr); + break; + case wasm::WASM_DATA_ALIGNMENT: + LinkingData.DataAlignment = readVaruint32(Ptr); + break; + case wasm::WASM_STACK_POINTER: + default: + Ptr += Size; + break; + } + if (Ptr != SubSectionEnd) + return make_error<GenericBinaryError>( + "Linking sub-section ended prematurely", object_error::parse_failed); + } + if (Ptr != End) + return make_error<GenericBinaryError>("Linking section ended prematurely", + object_error::parse_failed); + return Error::success(); +} + +WasmSection* WasmObjectFile::findCustomSectionByName(StringRef Name) { + for (WasmSection& Section : Sections) { + if (Section.Type == wasm::WASM_SEC_CUSTOM && Section.Name == Name) + return &Section; + } + return nullptr; +} + +WasmSection* WasmObjectFile::findSectionByType(uint32_t Type) { + assert(Type != wasm::WASM_SEC_CUSTOM); + for (WasmSection& Section : Sections) { + if (Section.Type == Type) + return &Section; + } + return nullptr; +} + +Error WasmObjectFile::parseRelocSection(StringRef Name, const uint8_t *Ptr, + const uint8_t *End) { + uint8_t SectionCode = readVarint7(Ptr); + WasmSection* Section = nullptr; + if (SectionCode == wasm::WASM_SEC_CUSTOM) { + StringRef Name = readString(Ptr); + Section = findCustomSectionByName(Name); + } else { + Section = findSectionByType(SectionCode); + } + if (!Section) + return make_error<GenericBinaryError>("Invalid section code", + object_error::parse_failed); + uint32_t RelocCount = readVaruint32(Ptr); + while (RelocCount--) { + wasm::WasmRelocation Reloc; + memset(&Reloc, 0, sizeof(Reloc)); + Reloc.Type = readVaruint32(Ptr); + Reloc.Offset = readVaruint32(Ptr); + Reloc.Index = readVaruint32(Ptr); + switch (Reloc.Type) { + case wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB: + case wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB: + case wasm::R_WEBASSEMBLY_TABLE_INDEX_I32: + case wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB: + case wasm::R_WEBASSEMBLY_GLOBAL_INDEX_LEB: + break; + case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_LEB: + case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_SLEB: + case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_I32: + Reloc.Addend = readVarint32(Ptr); + break; + default: + return make_error<GenericBinaryError>("Bad relocation type: " + + Twine(Reloc.Type), + object_error::parse_failed); + } + Section->Relocations.push_back(Reloc); + } + if (Ptr != End) + return make_error<GenericBinaryError>("Reloc section ended prematurely", + object_error::parse_failed); + return Error::success(); +} + +Error WasmObjectFile::parseCustomSection(WasmSection &Sec, + const uint8_t *Ptr, const uint8_t *End) { Sec.Name = readString(Ptr); + if (Sec.Name == "name") { + if (Error Err = parseNameSection(Ptr, End)) + return Err; + } else if (Sec.Name == "linking") { + if (Error Err = parseLinkingSection(Ptr, End)) + return Err; + } else if (Sec.Name.startswith("reloc.")) { + if (Error Err = parseRelocSection(Sec.Name, Ptr, End)) + return Err; + } + return Error::success(); +} + +Error WasmObjectFile::parseTypeSection(const uint8_t *Ptr, const uint8_t *End) { + uint32_t Count = readVaruint32(Ptr); + Signatures.reserve(Count); + while (Count--) { + wasm::WasmSignature Sig; + Sig.ReturnType = wasm::WASM_TYPE_NORESULT; + int8_t Form = readVarint7(Ptr); + if (Form != wasm::WASM_TYPE_FUNC) { + return make_error<GenericBinaryError>("Invalid signature type", + object_error::parse_failed); + } + uint32_t ParamCount = readVaruint32(Ptr); + Sig.ParamTypes.reserve(ParamCount); + while (ParamCount--) { + uint32_t ParamType = readVarint7(Ptr); + Sig.ParamTypes.push_back(ParamType); + } + uint32_t ReturnCount = readVaruint32(Ptr); + if (ReturnCount) { + if (ReturnCount != 1) { + return make_error<GenericBinaryError>( + "Multiple return types not supported", object_error::parse_failed); + } + Sig.ReturnType = readVarint7(Ptr); + } + Signatures.push_back(Sig); + } + if (Ptr != End) + return make_error<GenericBinaryError>("Type section ended prematurely", + object_error::parse_failed); + return Error::success(); +} + +Error WasmObjectFile::parseImportSection(const uint8_t *Ptr, const uint8_t *End) { + uint32_t Count = readVaruint32(Ptr); + Imports.reserve(Count); + for (uint32_t i = 0; i < Count; i++) { + wasm::WasmImport Im; + Im.Module = readString(Ptr); + Im.Field = readString(Ptr); + Im.Kind = readUint8(Ptr); + switch (Im.Kind) { + case wasm::WASM_EXTERNAL_FUNCTION: + Im.SigIndex = readVaruint32(Ptr); + SymbolMap.try_emplace(Im.Field, Symbols.size()); + Symbols.emplace_back(Im.Field, WasmSymbol::SymbolType::FUNCTION_IMPORT, + Sections.size(), i); + DEBUG(dbgs() << "Adding import: " << Symbols.back() + << " sym index:" << Symbols.size() << "\n"); + break; + case wasm::WASM_EXTERNAL_GLOBAL: + Im.Global.Type = readVarint7(Ptr); + Im.Global.Mutable = readVaruint1(Ptr); + SymbolMap.try_emplace(Im.Field, Symbols.size()); + Symbols.emplace_back(Im.Field, WasmSymbol::SymbolType::GLOBAL_IMPORT, + Sections.size(), i); + DEBUG(dbgs() << "Adding import: " << Symbols.back() + << " sym index:" << Symbols.size() << "\n"); + break; + case wasm::WASM_EXTERNAL_MEMORY: + Im.Memory = readLimits(Ptr); + break; + case wasm::WASM_EXTERNAL_TABLE: + Im.Table = readTable(Ptr); + if (Im.Table.ElemType != wasm::WASM_TYPE_ANYFUNC) { + return make_error<GenericBinaryError>("Invalid table element type", + object_error::parse_failed); + } + break; + default: + return make_error<GenericBinaryError>( + "Unexpected import kind", object_error::parse_failed); + } + Imports.push_back(Im); + } + if (Ptr != End) + return make_error<GenericBinaryError>("Import section ended prematurely", + object_error::parse_failed); + return Error::success(); +} + +Error WasmObjectFile::parseFunctionSection(const uint8_t *Ptr, const uint8_t *End) { + uint32_t Count = readVaruint32(Ptr); + FunctionTypes.reserve(Count); + while (Count--) { + FunctionTypes.push_back(readVaruint32(Ptr)); + } + if (Ptr != End) + return make_error<GenericBinaryError>("Function section ended prematurely", + object_error::parse_failed); + return Error::success(); +} + +Error WasmObjectFile::parseTableSection(const uint8_t *Ptr, const uint8_t *End) { + uint32_t Count = readVaruint32(Ptr); + Tables.reserve(Count); + while (Count--) { + Tables.push_back(readTable(Ptr)); + if (Tables.back().ElemType != wasm::WASM_TYPE_ANYFUNC) { + return make_error<GenericBinaryError>("Invalid table element type", + object_error::parse_failed); + } + } + if (Ptr != End) + return make_error<GenericBinaryError>("Table section ended prematurely", + object_error::parse_failed); + return Error::success(); +} + +Error WasmObjectFile::parseMemorySection(const uint8_t *Ptr, const uint8_t *End) { + uint32_t Count = readVaruint32(Ptr); + Memories.reserve(Count); + while (Count--) { + Memories.push_back(readLimits(Ptr)); + } + if (Ptr != End) + return make_error<GenericBinaryError>("Memory section ended prematurely", + object_error::parse_failed); + return Error::success(); +} + +Error WasmObjectFile::parseGlobalSection(const uint8_t *Ptr, const uint8_t *End) { + uint32_t Count = readVaruint32(Ptr); + Globals.reserve(Count); + while (Count--) { + wasm::WasmGlobal Global; + Global.Type = readVarint7(Ptr); + Global.Mutable = readVaruint1(Ptr); + if (Error Err = readInitExpr(Global.InitExpr, Ptr)) + return Err; + Globals.push_back(Global); + } + if (Ptr != End) + return make_error<GenericBinaryError>("Global section ended prematurely", + object_error::parse_failed); + return Error::success(); +} + +Error WasmObjectFile::parseExportSection(const uint8_t *Ptr, const uint8_t *End) { + uint32_t Count = readVaruint32(Ptr); + Exports.reserve(Count); + for (uint32_t i = 0; i < Count; i++) { + wasm::WasmExport Ex; + Ex.Name = readString(Ptr); + Ex.Kind = readUint8(Ptr); + Ex.Index = readVaruint32(Ptr); + WasmSymbol::SymbolType ExportType; + bool MakeSymbol = false; + switch (Ex.Kind) { + case wasm::WASM_EXTERNAL_FUNCTION: + ExportType = WasmSymbol::SymbolType::FUNCTION_EXPORT; + MakeSymbol = true; + break; + case wasm::WASM_EXTERNAL_GLOBAL: + ExportType = WasmSymbol::SymbolType::GLOBAL_EXPORT; + MakeSymbol = true; + break; + case wasm::WASM_EXTERNAL_MEMORY: + case wasm::WASM_EXTERNAL_TABLE: + break; + default: + return make_error<GenericBinaryError>( + "Unexpected export kind", object_error::parse_failed); + } + if (MakeSymbol) { + auto Pair = SymbolMap.try_emplace(Ex.Name, Symbols.size()); + if (Pair.second) { + Symbols.emplace_back(Ex.Name, ExportType, + Sections.size(), i); + DEBUG(dbgs() << "Adding export: " << Symbols.back() + << " sym index:" << Symbols.size() << "\n"); + } else { + uint32_t SymIndex = Pair.first->second; + Symbols[SymIndex] = WasmSymbol(Ex.Name, ExportType, Sections.size(), i); + DEBUG(dbgs() << "Replacing existing symbol: " << Symbols[SymIndex] + << " sym index:" << SymIndex << "\n"); + } + } + Exports.push_back(Ex); + } + if (Ptr != End) + return make_error<GenericBinaryError>("Export section ended prematurely", + object_error::parse_failed); + return Error::success(); +} + +Error WasmObjectFile::parseStartSection(const uint8_t *Ptr, const uint8_t *End) { + StartFunction = readVaruint32(Ptr); + if (StartFunction >= FunctionTypes.size()) + return make_error<GenericBinaryError>("Invalid start function", + object_error::parse_failed); + return Error::success(); +} + +Error WasmObjectFile::parseCodeSection(const uint8_t *Ptr, const uint8_t *End) { + uint32_t FunctionCount = readVaruint32(Ptr); + if (FunctionCount != FunctionTypes.size()) { + return make_error<GenericBinaryError>("Invalid function count", + object_error::parse_failed); + } + + CodeSection = ArrayRef<uint8_t>(Ptr, End - Ptr); + + while (FunctionCount--) { + wasm::WasmFunction Function; + uint32_t FunctionSize = readVaruint32(Ptr); + const uint8_t *FunctionEnd = Ptr + FunctionSize; + + uint32_t NumLocalDecls = readVaruint32(Ptr); + Function.Locals.reserve(NumLocalDecls); + while (NumLocalDecls--) { + wasm::WasmLocalDecl Decl; + Decl.Count = readVaruint32(Ptr); + Decl.Type = readVarint7(Ptr); + Function.Locals.push_back(Decl); + } + + uint32_t BodySize = FunctionEnd - Ptr; + Function.Body = ArrayRef<uint8_t>(Ptr, BodySize); + Ptr += BodySize; + assert(Ptr == FunctionEnd); + Functions.push_back(Function); + } + if (Ptr != End) + return make_error<GenericBinaryError>("Code section ended prematurely", + object_error::parse_failed); + return Error::success(); +} + +Error WasmObjectFile::parseElemSection(const uint8_t *Ptr, const uint8_t *End) { + uint32_t Count = readVaruint32(Ptr); + ElemSegments.reserve(Count); + while (Count--) { + wasm::WasmElemSegment Segment; + Segment.TableIndex = readVaruint32(Ptr); + if (Segment.TableIndex != 0) { + return make_error<GenericBinaryError>("Invalid TableIndex", + object_error::parse_failed); + } + if (Error Err = readInitExpr(Segment.Offset, Ptr)) + return Err; + uint32_t NumElems = readVaruint32(Ptr); + while (NumElems--) { + Segment.Functions.push_back(readVaruint32(Ptr)); + } + ElemSegments.push_back(Segment); + } + if (Ptr != End) + return make_error<GenericBinaryError>("Elem section ended prematurely", + object_error::parse_failed); + return Error::success(); +} + +Error WasmObjectFile::parseDataSection(const uint8_t *Ptr, const uint8_t *End) { + const uint8_t *Start = Ptr; + uint32_t Count = readVaruint32(Ptr); + DataSegments.reserve(Count); + while (Count--) { + WasmSegment Segment; + Segment.Data.MemoryIndex = readVaruint32(Ptr); + if (Error Err = readInitExpr(Segment.Data.Offset, Ptr)) + return Err; + uint32_t Size = readVaruint32(Ptr); + Segment.Data.Content = ArrayRef<uint8_t>(Ptr, Size); + Segment.SectionOffset = Ptr - Start; + Ptr += Size; + DataSegments.push_back(Segment); + } + if (Ptr != End) + return make_error<GenericBinaryError>("Data section ended prematurely", + object_error::parse_failed); return Error::success(); } @@ -105,42 +703,79 @@ const wasm::WasmObjectHeader &WasmObjectFile::getHeader() const { return Header; } -void WasmObjectFile::moveSymbolNext(DataRefImpl &Symb) const { - llvm_unreachable("not yet implemented"); -} - -std::error_code WasmObjectFile::printSymbolName(raw_ostream &OS, - DataRefImpl Symb) const { - llvm_unreachable("not yet implemented"); - return object_error::invalid_symbol_index; -} +void WasmObjectFile::moveSymbolNext(DataRefImpl &Symb) const { Symb.d.a++; } uint32_t WasmObjectFile::getSymbolFlags(DataRefImpl Symb) const { - llvm_unreachable("not yet implemented"); - return 0; + uint32_t Result = SymbolRef::SF_None; + const WasmSymbol &Sym = getWasmSymbol(Symb); + + DEBUG(dbgs() << "getSymbolFlags: ptr=" << &Sym << " " << Sym << "\n"); + if (Sym.Flags & wasm::WASM_SYMBOL_FLAG_WEAK) + Result |= SymbolRef::SF_Weak; + + switch (Sym.Type) { + case WasmSymbol::SymbolType::FUNCTION_IMPORT: + Result |= SymbolRef::SF_Undefined | SymbolRef::SF_Executable; + break; + case WasmSymbol::SymbolType::FUNCTION_EXPORT: + Result |= SymbolRef::SF_Global | SymbolRef::SF_Executable; + break; + case WasmSymbol::SymbolType::DEBUG_FUNCTION_NAME: + Result |= SymbolRef::SF_Executable; + Result |= SymbolRef::SF_FormatSpecific; + break; + case WasmSymbol::SymbolType::GLOBAL_IMPORT: + Result |= SymbolRef::SF_Undefined; + break; + case WasmSymbol::SymbolType::GLOBAL_EXPORT: + Result |= SymbolRef::SF_Global; + break; + } + + return Result; } basic_symbol_iterator WasmObjectFile::symbol_begin() const { - return BasicSymbolRef(DataRefImpl(), this); + DataRefImpl Ref; + Ref.d.a = 0; + return BasicSymbolRef(Ref, this); } basic_symbol_iterator WasmObjectFile::symbol_end() const { - return BasicSymbolRef(DataRefImpl(), this); + DataRefImpl Ref; + Ref.d.a = Symbols.size(); + return BasicSymbolRef(Ref, this); +} + +const WasmSymbol &WasmObjectFile::getWasmSymbol(const DataRefImpl &Symb) const { + return Symbols[Symb.d.a]; +} + +const WasmSymbol &WasmObjectFile::getWasmSymbol(const SymbolRef &Symb) const { + return getWasmSymbol(Symb.getRawDataRefImpl()); } Expected<StringRef> WasmObjectFile::getSymbolName(DataRefImpl Symb) const { - llvm_unreachable("not yet implemented"); - return errorCodeToError(object_error::invalid_symbol_index); + return getWasmSymbol(Symb).Name; } Expected<uint64_t> WasmObjectFile::getSymbolAddress(DataRefImpl Symb) const { - llvm_unreachable("not yet implemented"); - return errorCodeToError(object_error::invalid_symbol_index); + return getSymbolValue(Symb); } uint64_t WasmObjectFile::getSymbolValueImpl(DataRefImpl Symb) const { - llvm_unreachable("not yet implemented"); - return 0; + const WasmSymbol& Sym = getWasmSymbol(Symb); + switch (Sym.Type) { + case WasmSymbol::SymbolType::FUNCTION_IMPORT: + case WasmSymbol::SymbolType::GLOBAL_IMPORT: + return 0; + case WasmSymbol::SymbolType::FUNCTION_EXPORT: + case WasmSymbol::SymbolType::GLOBAL_EXPORT: + return Exports[Sym.ElementIndex].Index; + case WasmSymbol::SymbolType::DEBUG_FUNCTION_NAME: + return Sym.ElementIndex; + } + llvm_unreachable("invalid symbol type"); } uint32_t WasmObjectFile::getSymbolAlignment(DataRefImpl Symb) const { @@ -155,21 +790,34 @@ uint64_t WasmObjectFile::getCommonSymbolSizeImpl(DataRefImpl Symb) const { Expected<SymbolRef::Type> WasmObjectFile::getSymbolType(DataRefImpl Symb) const { - llvm_unreachable("not yet implemented"); - return errorCodeToError(object_error::invalid_symbol_index); + const WasmSymbol &Sym = getWasmSymbol(Symb); + + switch (Sym.Type) { + case WasmSymbol::SymbolType::FUNCTION_IMPORT: + case WasmSymbol::SymbolType::FUNCTION_EXPORT: + case WasmSymbol::SymbolType::DEBUG_FUNCTION_NAME: + return SymbolRef::ST_Function; + case WasmSymbol::SymbolType::GLOBAL_IMPORT: + case WasmSymbol::SymbolType::GLOBAL_EXPORT: + return SymbolRef::ST_Data; + } + + llvm_unreachable("Unknown WasmSymbol::SymbolType"); + return SymbolRef::ST_Other; } Expected<section_iterator> WasmObjectFile::getSymbolSection(DataRefImpl Symb) const { - llvm_unreachable("not yet implemented"); - return errorCodeToError(object_error::invalid_symbol_index); + DataRefImpl Ref; + Ref.d.a = getWasmSymbol(Symb).Section; + return section_iterator(SectionRef(Ref, this)); } void WasmObjectFile::moveSectionNext(DataRefImpl &Sec) const { Sec.d.a++; } std::error_code WasmObjectFile::getSectionName(DataRefImpl Sec, StringRef &Res) const { - const wasm::WasmSection &S = Sections[Sec.d.a]; + const WasmSection &S = Sections[Sec.d.a]; #define ECase(X) \ case wasm::WASM_SEC_##X: \ Res = #X; \ @@ -186,7 +834,7 @@ std::error_code WasmObjectFile::getSectionName(DataRefImpl Sec, ECase(ELEM); ECase(CODE); ECase(DATA); - case wasm::WASM_SEC_USER: + case wasm::WASM_SEC_CUSTOM: Res = S.Name; break; default: @@ -198,14 +846,18 @@ std::error_code WasmObjectFile::getSectionName(DataRefImpl Sec, uint64_t WasmObjectFile::getSectionAddress(DataRefImpl Sec) const { return 0; } +uint64_t WasmObjectFile::getSectionIndex(DataRefImpl Sec) const { + return Sec.d.a; +} + uint64_t WasmObjectFile::getSectionSize(DataRefImpl Sec) const { - const wasm::WasmSection &S = Sections[Sec.d.a]; + const WasmSection &S = Sections[Sec.d.a]; return S.Content.size(); } std::error_code WasmObjectFile::getSectionContents(DataRefImpl Sec, StringRef &Res) const { - const wasm::WasmSection &S = Sections[Sec.d.a]; + const WasmSection &S = Sections[Sec.d.a]; // This will never fail since wasm sections can never be empty (user-sections // must have a name and non-user sections each have a defined structure). Res = StringRef(reinterpret_cast<const char *>(S.Content.data()), @@ -222,13 +874,11 @@ bool WasmObjectFile::isSectionCompressed(DataRefImpl Sec) const { } bool WasmObjectFile::isSectionText(DataRefImpl Sec) const { - const wasm::WasmSection &S = Sections[Sec.d.a]; - return S.Type == wasm::WASM_SEC_CODE; + return getWasmSection(Sec).Type == wasm::WASM_SEC_CODE; } bool WasmObjectFile::isSectionData(DataRefImpl Sec) const { - const wasm::WasmSection &S = Sections[Sec.d.a]; - return S.Type == wasm::WASM_SEC_DATA; + return getWasmSection(Sec).Type == wasm::WASM_SEC_DATA; } bool WasmObjectFile::isSectionBSS(DataRefImpl Sec) const { return false; } @@ -237,31 +887,28 @@ bool WasmObjectFile::isSectionVirtual(DataRefImpl Sec) const { return false; } bool WasmObjectFile::isSectionBitcode(DataRefImpl Sec) const { return false; } -relocation_iterator WasmObjectFile::section_rel_begin(DataRefImpl Sec) const { - llvm_unreachable("not yet implemented"); - RelocationRef Rel; - return relocation_iterator(Rel); -} - -relocation_iterator WasmObjectFile::section_rel_end(DataRefImpl Sec) const { - llvm_unreachable("not yet implemented"); - RelocationRef Rel; - return relocation_iterator(Rel); +relocation_iterator WasmObjectFile::section_rel_begin(DataRefImpl Ref) const { + DataRefImpl RelocRef; + RelocRef.d.a = Ref.d.a; + RelocRef.d.b = 0; + return relocation_iterator(RelocationRef(RelocRef, this)); } -section_iterator WasmObjectFile::getRelocatedSection(DataRefImpl Sec) const { - llvm_unreachable("not yet implemented"); - SectionRef Ref; - return section_iterator(Ref); +relocation_iterator WasmObjectFile::section_rel_end(DataRefImpl Ref) const { + const WasmSection &Sec = getWasmSection(Ref); + DataRefImpl RelocRef; + RelocRef.d.a = Ref.d.a; + RelocRef.d.b = Sec.Relocations.size(); + return relocation_iterator(RelocationRef(RelocRef, this)); } void WasmObjectFile::moveRelocationNext(DataRefImpl &Rel) const { - llvm_unreachable("not yet implemented"); + Rel.d.b++; } -uint64_t WasmObjectFile::getRelocationOffset(DataRefImpl Rel) const { - llvm_unreachable("not yet implemented"); - return 0; +uint64_t WasmObjectFile::getRelocationOffset(DataRefImpl Ref) const { + const wasm::WasmRelocation &Rel = getWasmRelocation(Ref); + return Rel.Offset; } symbol_iterator WasmObjectFile::getRelocationSymbol(DataRefImpl Rel) const { @@ -270,14 +917,28 @@ symbol_iterator WasmObjectFile::getRelocationSymbol(DataRefImpl Rel) const { return symbol_iterator(Ref); } -uint64_t WasmObjectFile::getRelocationType(DataRefImpl Rel) const { - llvm_unreachable("not yet implemented"); - return 0; +uint64_t WasmObjectFile::getRelocationType(DataRefImpl Ref) const { + const wasm::WasmRelocation &Rel = getWasmRelocation(Ref); + return Rel.Type; } void WasmObjectFile::getRelocationTypeName( - DataRefImpl Rel, SmallVectorImpl<char> &Result) const { - llvm_unreachable("not yet implemented"); + DataRefImpl Ref, SmallVectorImpl<char> &Result) const { + const wasm::WasmRelocation& Rel = getWasmRelocation(Ref); + StringRef Res = "Unknown"; + +#define WASM_RELOC(name, value) \ + case wasm::name: \ + Res = #name; \ + break; + + switch (Rel.Type) { +#include "llvm/BinaryFormat/WasmRelocs/WebAssembly.def" + } + +#undef WASM_RELOC + + Result.append(Res.begin(), Res.end()); } section_iterator WasmObjectFile::section_begin() const { @@ -302,12 +963,29 @@ SubtargetFeatures WasmObjectFile::getFeatures() const { return SubtargetFeatures(); } -bool WasmObjectFile::isRelocatableObject() const { return false; } +bool WasmObjectFile::isRelocatableObject() const { + return HasLinkingSection; +} + +const WasmSection &WasmObjectFile::getWasmSection(DataRefImpl Ref) const { + assert(Ref.d.a < Sections.size()); + return Sections[Ref.d.a]; +} -const wasm::WasmSection * +const WasmSection & WasmObjectFile::getWasmSection(const SectionRef &Section) const { - return &Sections[Section.getRawDataRefImpl().d.a]; + return getWasmSection(Section.getRawDataRefImpl()); } -} // end namespace object -} // end namespace llvm +const wasm::WasmRelocation & +WasmObjectFile::getWasmRelocation(const RelocationRef &Ref) const { + return getWasmRelocation(Ref.getRawDataRefImpl()); +} + +const wasm::WasmRelocation & +WasmObjectFile::getWasmRelocation(DataRefImpl Ref) const { + assert(Ref.d.a < Sections.size()); + const WasmSection& Sec = Sections[Ref.d.a]; + assert(Ref.d.b < Sec.Relocations.size()); + return Sec.Relocations[Ref.d.b]; +} diff --git a/contrib/llvm/lib/Object/WindowsResource.cpp b/contrib/llvm/lib/Object/WindowsResource.cpp new file mode 100644 index 0000000..246eee5 --- /dev/null +++ b/contrib/llvm/lib/Object/WindowsResource.cpp @@ -0,0 +1,720 @@ +//===-- WindowsResource.cpp -------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the .res file class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/WindowsResource.h" +#include "llvm/Object/COFF.h" +#include "llvm/Support/FileOutputBuffer.h" +#include "llvm/Support/MathExtras.h" +#include <ctime> +#include <queue> +#include <sstream> +#include <system_error> + +using namespace llvm; +using namespace object; + +namespace llvm { +namespace object { + +#define RETURN_IF_ERROR(X) \ + if (auto EC = X) \ + return EC; + +const uint32_t MIN_HEADER_SIZE = 7 * sizeof(uint32_t) + 2 * sizeof(uint16_t); + +// COFF files seem to be inconsistent with alignment between sections, just use +// 8-byte because it makes everyone happy. +const uint32_t SECTION_ALIGNMENT = sizeof(uint64_t); + +uint32_t WindowsResourceParser::TreeNode::StringCount = 0; +uint32_t WindowsResourceParser::TreeNode::DataCount = 0; + +WindowsResource::WindowsResource(MemoryBufferRef Source) + : Binary(Binary::ID_WinRes, Source) { + size_t LeadingSize = WIN_RES_MAGIC_SIZE + WIN_RES_NULL_ENTRY_SIZE; + BBS = BinaryByteStream(Data.getBuffer().drop_front(LeadingSize), + support::little); +} + +Expected<std::unique_ptr<WindowsResource>> +WindowsResource::createWindowsResource(MemoryBufferRef Source) { + if (Source.getBufferSize() < WIN_RES_MAGIC_SIZE + WIN_RES_NULL_ENTRY_SIZE) + return make_error<GenericBinaryError>( + "File too small to be a resource file", + object_error::invalid_file_type); + std::unique_ptr<WindowsResource> Ret(new WindowsResource(Source)); + return std::move(Ret); +} + +Expected<ResourceEntryRef> WindowsResource::getHeadEntry() { + Error Err = Error::success(); + auto Ref = ResourceEntryRef(BinaryStreamRef(BBS), this, Err); + if (Err) + return std::move(Err); + return Ref; +} + +ResourceEntryRef::ResourceEntryRef(BinaryStreamRef Ref, + const WindowsResource *Owner, Error &Err) + : Reader(Ref), OwningRes(Owner) { + if (loadNext()) + Err = make_error<GenericBinaryError>("Could not read first entry.\n", + object_error::unexpected_eof); +} + +Error ResourceEntryRef::moveNext(bool &End) { + // Reached end of all the entries. + if (Reader.bytesRemaining() == 0) { + End = true; + return Error::success(); + } + RETURN_IF_ERROR(loadNext()); + + return Error::success(); +} + +static Error readStringOrId(BinaryStreamReader &Reader, uint16_t &ID, + ArrayRef<UTF16> &Str, bool &IsString) { + uint16_t IDFlag; + RETURN_IF_ERROR(Reader.readInteger(IDFlag)); + IsString = IDFlag != 0xffff; + + if (IsString) { + Reader.setOffset( + Reader.getOffset() - + sizeof(uint16_t)); // Re-read the bytes which we used to check the flag. + RETURN_IF_ERROR(Reader.readWideString(Str)); + } else + RETURN_IF_ERROR(Reader.readInteger(ID)); + + return Error::success(); +} + +Error ResourceEntryRef::loadNext() { + const WinResHeaderPrefix *Prefix; + RETURN_IF_ERROR(Reader.readObject(Prefix)); + + if (Prefix->HeaderSize < MIN_HEADER_SIZE) + return make_error<GenericBinaryError>("Header size is too small.", + object_error::parse_failed); + + RETURN_IF_ERROR(readStringOrId(Reader, TypeID, Type, IsStringType)); + + RETURN_IF_ERROR(readStringOrId(Reader, NameID, Name, IsStringName)); + + RETURN_IF_ERROR(Reader.padToAlignment(WIN_RES_HEADER_ALIGNMENT)); + + RETURN_IF_ERROR(Reader.readObject(Suffix)); + + RETURN_IF_ERROR(Reader.readArray(Data, Prefix->DataSize)); + + RETURN_IF_ERROR(Reader.padToAlignment(WIN_RES_DATA_ALIGNMENT)); + + return Error::success(); +} + +WindowsResourceParser::WindowsResourceParser() : Root(false) {} + +Error WindowsResourceParser::parse(WindowsResource *WR) { + auto EntryOrErr = WR->getHeadEntry(); + if (!EntryOrErr) + return EntryOrErr.takeError(); + + ResourceEntryRef Entry = EntryOrErr.get(); + bool End = false; + while (!End) { + Data.push_back(Entry.getData()); + + bool IsNewTypeString = false; + bool IsNewNameString = false; + + Root.addEntry(Entry, IsNewTypeString, IsNewNameString); + + if (IsNewTypeString) + StringTable.push_back(Entry.getTypeString()); + + if (IsNewNameString) + StringTable.push_back(Entry.getNameString()); + + RETURN_IF_ERROR(Entry.moveNext(End)); + } + + return Error::success(); +} + +void WindowsResourceParser::printTree(raw_ostream &OS) const { + ScopedPrinter Writer(OS); + Root.print(Writer, "Resource Tree"); +} + +void WindowsResourceParser::TreeNode::addEntry(const ResourceEntryRef &Entry, + bool &IsNewTypeString, + bool &IsNewNameString) { + TreeNode &TypeNode = addTypeNode(Entry, IsNewTypeString); + TreeNode &NameNode = TypeNode.addNameNode(Entry, IsNewNameString); + NameNode.addLanguageNode(Entry); +} + +WindowsResourceParser::TreeNode::TreeNode(bool IsStringNode) { + if (IsStringNode) + StringIndex = StringCount++; +} + +WindowsResourceParser::TreeNode::TreeNode(uint16_t MajorVersion, + uint16_t MinorVersion, + uint32_t Characteristics) + : IsDataNode(true), MajorVersion(MajorVersion), MinorVersion(MinorVersion), + Characteristics(Characteristics) { + DataIndex = DataCount++; +} + +std::unique_ptr<WindowsResourceParser::TreeNode> +WindowsResourceParser::TreeNode::createStringNode() { + return std::unique_ptr<TreeNode>(new TreeNode(true)); +} + +std::unique_ptr<WindowsResourceParser::TreeNode> +WindowsResourceParser::TreeNode::createIDNode() { + return std::unique_ptr<TreeNode>(new TreeNode(false)); +} + +std::unique_ptr<WindowsResourceParser::TreeNode> +WindowsResourceParser::TreeNode::createDataNode(uint16_t MajorVersion, + uint16_t MinorVersion, + uint32_t Characteristics) { + return std::unique_ptr<TreeNode>( + new TreeNode(MajorVersion, MinorVersion, Characteristics)); +} + +WindowsResourceParser::TreeNode & +WindowsResourceParser::TreeNode::addTypeNode(const ResourceEntryRef &Entry, + bool &IsNewTypeString) { + if (Entry.checkTypeString()) + return addChild(Entry.getTypeString(), IsNewTypeString); + else + return addChild(Entry.getTypeID()); +} + +WindowsResourceParser::TreeNode & +WindowsResourceParser::TreeNode::addNameNode(const ResourceEntryRef &Entry, + bool &IsNewNameString) { + if (Entry.checkNameString()) + return addChild(Entry.getNameString(), IsNewNameString); + else + return addChild(Entry.getNameID()); +} + +WindowsResourceParser::TreeNode & +WindowsResourceParser::TreeNode::addLanguageNode( + const ResourceEntryRef &Entry) { + return addChild(Entry.getLanguage(), true, Entry.getMajorVersion(), + Entry.getMinorVersion(), Entry.getCharacteristics()); +} + +WindowsResourceParser::TreeNode &WindowsResourceParser::TreeNode::addChild( + uint32_t ID, bool IsDataNode, uint16_t MajorVersion, uint16_t MinorVersion, + uint32_t Characteristics) { + auto Child = IDChildren.find(ID); + if (Child == IDChildren.end()) { + auto NewChild = + IsDataNode ? createDataNode(MajorVersion, MinorVersion, Characteristics) + : createIDNode(); + WindowsResourceParser::TreeNode &Node = *NewChild; + IDChildren.emplace(ID, std::move(NewChild)); + return Node; + } else + return *(Child->second); +} + +WindowsResourceParser::TreeNode & +WindowsResourceParser::TreeNode::addChild(ArrayRef<UTF16> NameRef, + bool &IsNewString) { + std::string NameString; + ArrayRef<UTF16> CorrectedName; + std::vector<UTF16> EndianCorrectedName; + if (sys::IsBigEndianHost) { + EndianCorrectedName.resize(NameRef.size() + 1); + std::copy(NameRef.begin(), NameRef.end(), EndianCorrectedName.begin() + 1); + EndianCorrectedName[0] = UNI_UTF16_BYTE_ORDER_MARK_SWAPPED; + CorrectedName = makeArrayRef(EndianCorrectedName); + } else + CorrectedName = NameRef; + convertUTF16ToUTF8String(CorrectedName, NameString); + + auto Child = StringChildren.find(NameString); + if (Child == StringChildren.end()) { + auto NewChild = createStringNode(); + IsNewString = true; + WindowsResourceParser::TreeNode &Node = *NewChild; + StringChildren.emplace(NameString, std::move(NewChild)); + return Node; + } else + return *(Child->second); +} + +void WindowsResourceParser::TreeNode::print(ScopedPrinter &Writer, + StringRef Name) const { + ListScope NodeScope(Writer, Name); + for (auto const &Child : StringChildren) { + Child.second->print(Writer, Child.first); + } + for (auto const &Child : IDChildren) { + Child.second->print(Writer, to_string(Child.first)); + } +} + +// This function returns the size of the entire resource tree, including +// directory tables, directory entries, and data entries. It does not include +// the directory strings or the relocations of the .rsrc section. +uint32_t WindowsResourceParser::TreeNode::getTreeSize() const { + uint32_t Size = (IDChildren.size() + StringChildren.size()) * + sizeof(coff_resource_dir_entry); + + // Reached a node pointing to a data entry. + if (IsDataNode) { + Size += sizeof(coff_resource_data_entry); + return Size; + } + + // If the node does not point to data, it must have a directory table pointing + // to other nodes. + Size += sizeof(coff_resource_dir_table); + + for (auto const &Child : StringChildren) { + Size += Child.second->getTreeSize(); + } + for (auto const &Child : IDChildren) { + Size += Child.second->getTreeSize(); + } + return Size; +} + +class WindowsResourceCOFFWriter { +public: + WindowsResourceCOFFWriter(COFF::MachineTypes MachineType, + const WindowsResourceParser &Parser, Error &E); + std::unique_ptr<MemoryBuffer> write(); + +private: + void performFileLayout(); + void performSectionOneLayout(); + void performSectionTwoLayout(); + void writeCOFFHeader(); + void writeFirstSectionHeader(); + void writeSecondSectionHeader(); + void writeFirstSection(); + void writeSecondSection(); + void writeSymbolTable(); + void writeStringTable(); + void writeDirectoryTree(); + void writeDirectoryStringTable(); + void writeFirstSectionRelocations(); + std::unique_ptr<MemoryBuffer> OutputBuffer; + char *BufferStart; + uint64_t CurrentOffset = 0; + COFF::MachineTypes MachineType; + const WindowsResourceParser::TreeNode &Resources; + const ArrayRef<std::vector<uint8_t>> Data; + uint64_t FileSize; + uint32_t SymbolTableOffset; + uint32_t SectionOneSize; + uint32_t SectionOneOffset; + uint32_t SectionOneRelocations; + uint32_t SectionTwoSize; + uint32_t SectionTwoOffset; + const ArrayRef<std::vector<UTF16>> StringTable; + std::vector<uint32_t> StringTableOffsets; + std::vector<uint32_t> DataOffsets; + std::vector<uint32_t> RelocationAddresses; +}; + +WindowsResourceCOFFWriter::WindowsResourceCOFFWriter( + COFF::MachineTypes MachineType, const WindowsResourceParser &Parser, + Error &E) + : MachineType(MachineType), Resources(Parser.getTree()), + Data(Parser.getData()), StringTable(Parser.getStringTable()) { + performFileLayout(); + + OutputBuffer = MemoryBuffer::getNewMemBuffer(FileSize); +} + +void WindowsResourceCOFFWriter::performFileLayout() { + // Add size of COFF header. + FileSize = COFF::Header16Size; + + // one .rsrc section header for directory tree, another for resource data. + FileSize += 2 * COFF::SectionSize; + + performSectionOneLayout(); + performSectionTwoLayout(); + + // We have reached the address of the symbol table. + SymbolTableOffset = FileSize; + + FileSize += COFF::Symbol16Size; // size of the @feat.00 symbol. + FileSize += 4 * COFF::Symbol16Size; // symbol + aux for each section. + FileSize += Data.size() * COFF::Symbol16Size; // 1 symbol per resource. + FileSize += 4; // four null bytes for the string table. +} + +void WindowsResourceCOFFWriter::performSectionOneLayout() { + SectionOneOffset = FileSize; + + SectionOneSize = Resources.getTreeSize(); + uint32_t CurrentStringOffset = SectionOneSize; + uint32_t TotalStringTableSize = 0; + for (auto const &String : StringTable) { + StringTableOffsets.push_back(CurrentStringOffset); + uint32_t StringSize = String.size() * sizeof(UTF16) + sizeof(uint16_t); + CurrentStringOffset += StringSize; + TotalStringTableSize += StringSize; + } + SectionOneSize += alignTo(TotalStringTableSize, sizeof(uint32_t)); + + // account for the relocations of section one. + SectionOneRelocations = FileSize + SectionOneSize; + FileSize += SectionOneSize; + FileSize += + Data.size() * COFF::RelocationSize; // one relocation for each resource. + FileSize = alignTo(FileSize, SECTION_ALIGNMENT); +} + +void WindowsResourceCOFFWriter::performSectionTwoLayout() { + // add size of .rsrc$2 section, which contains all resource data on 8-byte + // alignment. + SectionTwoOffset = FileSize; + SectionTwoSize = 0; + for (auto const &Entry : Data) { + DataOffsets.push_back(SectionTwoSize); + SectionTwoSize += alignTo(Entry.size(), sizeof(uint64_t)); + } + FileSize += SectionTwoSize; + FileSize = alignTo(FileSize, SECTION_ALIGNMENT); +} + +static std::time_t getTime() { + std::time_t Now = time(nullptr); + if (Now < 0 || !isUInt<32>(Now)) + return UINT32_MAX; + return Now; +} + +std::unique_ptr<MemoryBuffer> WindowsResourceCOFFWriter::write() { + BufferStart = const_cast<char *>(OutputBuffer->getBufferStart()); + + writeCOFFHeader(); + writeFirstSectionHeader(); + writeSecondSectionHeader(); + writeFirstSection(); + writeSecondSection(); + writeSymbolTable(); + writeStringTable(); + + return std::move(OutputBuffer); +} + +void WindowsResourceCOFFWriter::writeCOFFHeader() { + // Write the COFF header. + auto *Header = reinterpret_cast<coff_file_header *>(BufferStart); + switch (MachineType) { + case COFF::IMAGE_FILE_MACHINE_ARMNT: + Header->Machine = COFF::IMAGE_FILE_MACHINE_ARMNT; + break; + case COFF::IMAGE_FILE_MACHINE_AMD64: + Header->Machine = COFF::IMAGE_FILE_MACHINE_AMD64; + break; + case COFF::IMAGE_FILE_MACHINE_I386: + Header->Machine = COFF::IMAGE_FILE_MACHINE_I386; + break; + default: + Header->Machine = COFF::IMAGE_FILE_MACHINE_UNKNOWN; + } + Header->NumberOfSections = 2; + Header->TimeDateStamp = getTime(); + Header->PointerToSymbolTable = SymbolTableOffset; + // One symbol for every resource plus 2 for each section and @feat.00 + Header->NumberOfSymbols = Data.size() + 5; + Header->SizeOfOptionalHeader = 0; + Header->Characteristics = COFF::IMAGE_FILE_32BIT_MACHINE; +} + +void WindowsResourceCOFFWriter::writeFirstSectionHeader() { + // Write the first section header. + CurrentOffset += sizeof(coff_file_header); + auto *SectionOneHeader = + reinterpret_cast<coff_section *>(BufferStart + CurrentOffset); + strncpy(SectionOneHeader->Name, ".rsrc$01", (size_t)COFF::NameSize); + SectionOneHeader->VirtualSize = 0; + SectionOneHeader->VirtualAddress = 0; + SectionOneHeader->SizeOfRawData = SectionOneSize; + SectionOneHeader->PointerToRawData = SectionOneOffset; + SectionOneHeader->PointerToRelocations = SectionOneRelocations; + SectionOneHeader->PointerToLinenumbers = 0; + SectionOneHeader->NumberOfRelocations = Data.size(); + SectionOneHeader->NumberOfLinenumbers = 0; + SectionOneHeader->Characteristics += COFF::IMAGE_SCN_CNT_INITIALIZED_DATA; + SectionOneHeader->Characteristics += COFF::IMAGE_SCN_MEM_READ; +} + +void WindowsResourceCOFFWriter::writeSecondSectionHeader() { + // Write the second section header. + CurrentOffset += sizeof(coff_section); + auto *SectionTwoHeader = + reinterpret_cast<coff_section *>(BufferStart + CurrentOffset); + strncpy(SectionTwoHeader->Name, ".rsrc$02", (size_t)COFF::NameSize); + SectionTwoHeader->VirtualSize = 0; + SectionTwoHeader->VirtualAddress = 0; + SectionTwoHeader->SizeOfRawData = SectionTwoSize; + SectionTwoHeader->PointerToRawData = SectionTwoOffset; + SectionTwoHeader->PointerToRelocations = 0; + SectionTwoHeader->PointerToLinenumbers = 0; + SectionTwoHeader->NumberOfRelocations = 0; + SectionTwoHeader->NumberOfLinenumbers = 0; + SectionTwoHeader->Characteristics = COFF::IMAGE_SCN_CNT_INITIALIZED_DATA; + SectionTwoHeader->Characteristics += COFF::IMAGE_SCN_MEM_READ; +} + +void WindowsResourceCOFFWriter::writeFirstSection() { + // Write section one. + CurrentOffset += sizeof(coff_section); + + writeDirectoryTree(); + writeDirectoryStringTable(); + writeFirstSectionRelocations(); + + CurrentOffset = alignTo(CurrentOffset, SECTION_ALIGNMENT); +} + +void WindowsResourceCOFFWriter::writeSecondSection() { + // Now write the .rsrc$02 section. + for (auto const &RawDataEntry : Data) { + std::copy(RawDataEntry.begin(), RawDataEntry.end(), + BufferStart + CurrentOffset); + CurrentOffset += alignTo(RawDataEntry.size(), sizeof(uint64_t)); + } + + CurrentOffset = alignTo(CurrentOffset, SECTION_ALIGNMENT); +} + +void WindowsResourceCOFFWriter::writeSymbolTable() { + // Now write the symbol table. + // First, the feat symbol. + auto *Symbol = reinterpret_cast<coff_symbol16 *>(BufferStart + CurrentOffset); + strncpy(Symbol->Name.ShortName, "@feat.00", (size_t)COFF::NameSize); + Symbol->Value = 0x11; + Symbol->SectionNumber = 0xffff; + Symbol->Type = COFF::IMAGE_SYM_DTYPE_NULL; + Symbol->StorageClass = COFF::IMAGE_SYM_CLASS_STATIC; + Symbol->NumberOfAuxSymbols = 0; + CurrentOffset += sizeof(coff_symbol16); + + // Now write the .rsrc1 symbol + aux. + Symbol = reinterpret_cast<coff_symbol16 *>(BufferStart + CurrentOffset); + strncpy(Symbol->Name.ShortName, ".rsrc$01", (size_t)COFF::NameSize); + Symbol->Value = 0; + Symbol->SectionNumber = 1; + Symbol->Type = COFF::IMAGE_SYM_DTYPE_NULL; + Symbol->StorageClass = COFF::IMAGE_SYM_CLASS_STATIC; + Symbol->NumberOfAuxSymbols = 1; + CurrentOffset += sizeof(coff_symbol16); + auto *Aux = reinterpret_cast<coff_aux_section_definition *>(BufferStart + + CurrentOffset); + Aux->Length = SectionOneSize; + Aux->NumberOfRelocations = Data.size(); + Aux->NumberOfLinenumbers = 0; + Aux->CheckSum = 0; + Aux->NumberLowPart = 0; + Aux->Selection = 0; + CurrentOffset += sizeof(coff_aux_section_definition); + + // Now write the .rsrc2 symbol + aux. + Symbol = reinterpret_cast<coff_symbol16 *>(BufferStart + CurrentOffset); + strncpy(Symbol->Name.ShortName, ".rsrc$02", (size_t)COFF::NameSize); + Symbol->Value = 0; + Symbol->SectionNumber = 2; + Symbol->Type = COFF::IMAGE_SYM_DTYPE_NULL; + Symbol->StorageClass = COFF::IMAGE_SYM_CLASS_STATIC; + Symbol->NumberOfAuxSymbols = 1; + CurrentOffset += sizeof(coff_symbol16); + Aux = reinterpret_cast<coff_aux_section_definition *>(BufferStart + + CurrentOffset); + Aux->Length = SectionTwoSize; + Aux->NumberOfRelocations = 0; + Aux->NumberOfLinenumbers = 0; + Aux->CheckSum = 0; + Aux->NumberLowPart = 0; + Aux->Selection = 0; + CurrentOffset += sizeof(coff_aux_section_definition); + + // Now write a symbol for each relocation. + for (unsigned i = 0; i < Data.size(); i++) { + char RelocationName[9]; + sprintf(RelocationName, "$R%06X", DataOffsets[i]); + Symbol = reinterpret_cast<coff_symbol16 *>(BufferStart + CurrentOffset); + strncpy(Symbol->Name.ShortName, RelocationName, (size_t)COFF::NameSize); + Symbol->Value = DataOffsets[i]; + Symbol->SectionNumber = 2; + Symbol->Type = COFF::IMAGE_SYM_DTYPE_NULL; + Symbol->StorageClass = COFF::IMAGE_SYM_CLASS_STATIC; + Symbol->NumberOfAuxSymbols = 0; + CurrentOffset += sizeof(coff_symbol16); + } +} + +void WindowsResourceCOFFWriter::writeStringTable() { + // Just 4 null bytes for the string table. + auto COFFStringTable = reinterpret_cast<void *>(BufferStart + CurrentOffset); + memset(COFFStringTable, 0, 4); +} + +void WindowsResourceCOFFWriter::writeDirectoryTree() { + // Traverse parsed resource tree breadth-first and write the corresponding + // COFF objects. + std::queue<const WindowsResourceParser::TreeNode *> Queue; + Queue.push(&Resources); + uint32_t NextLevelOffset = + sizeof(coff_resource_dir_table) + (Resources.getStringChildren().size() + + Resources.getIDChildren().size()) * + sizeof(coff_resource_dir_entry); + std::vector<const WindowsResourceParser::TreeNode *> DataEntriesTreeOrder; + uint32_t CurrentRelativeOffset = 0; + + while (!Queue.empty()) { + auto CurrentNode = Queue.front(); + Queue.pop(); + auto *Table = reinterpret_cast<coff_resource_dir_table *>(BufferStart + + CurrentOffset); + Table->Characteristics = CurrentNode->getCharacteristics(); + Table->TimeDateStamp = 0; + Table->MajorVersion = CurrentNode->getMajorVersion(); + Table->MinorVersion = CurrentNode->getMinorVersion(); + auto &IDChildren = CurrentNode->getIDChildren(); + auto &StringChildren = CurrentNode->getStringChildren(); + Table->NumberOfNameEntries = StringChildren.size(); + Table->NumberOfIDEntries = IDChildren.size(); + CurrentOffset += sizeof(coff_resource_dir_table); + CurrentRelativeOffset += sizeof(coff_resource_dir_table); + + // Write the directory entries immediately following each directory table. + for (auto const &Child : StringChildren) { + auto *Entry = reinterpret_cast<coff_resource_dir_entry *>(BufferStart + + CurrentOffset); + Entry->Identifier.setNameOffset( + StringTableOffsets[Child.second->getStringIndex()]); + if (Child.second->checkIsDataNode()) { + Entry->Offset.DataEntryOffset = NextLevelOffset; + NextLevelOffset += sizeof(coff_resource_data_entry); + DataEntriesTreeOrder.push_back(Child.second.get()); + } else { + Entry->Offset.SubdirOffset = NextLevelOffset + (1 << 31); + NextLevelOffset += sizeof(coff_resource_dir_table) + + (Child.second->getStringChildren().size() + + Child.second->getIDChildren().size()) * + sizeof(coff_resource_dir_entry); + Queue.push(Child.second.get()); + } + CurrentOffset += sizeof(coff_resource_dir_entry); + CurrentRelativeOffset += sizeof(coff_resource_dir_entry); + } + for (auto const &Child : IDChildren) { + auto *Entry = reinterpret_cast<coff_resource_dir_entry *>(BufferStart + + CurrentOffset); + Entry->Identifier.ID = Child.first; + if (Child.second->checkIsDataNode()) { + Entry->Offset.DataEntryOffset = NextLevelOffset; + NextLevelOffset += sizeof(coff_resource_data_entry); + DataEntriesTreeOrder.push_back(Child.second.get()); + } else { + Entry->Offset.SubdirOffset = NextLevelOffset + (1 << 31); + NextLevelOffset += sizeof(coff_resource_dir_table) + + (Child.second->getStringChildren().size() + + Child.second->getIDChildren().size()) * + sizeof(coff_resource_dir_entry); + Queue.push(Child.second.get()); + } + CurrentOffset += sizeof(coff_resource_dir_entry); + CurrentRelativeOffset += sizeof(coff_resource_dir_entry); + } + } + + RelocationAddresses.resize(Data.size()); + // Now write all the resource data entries. + for (auto DataNodes : DataEntriesTreeOrder) { + auto *Entry = reinterpret_cast<coff_resource_data_entry *>(BufferStart + + CurrentOffset); + RelocationAddresses[DataNodes->getDataIndex()] = CurrentRelativeOffset; + Entry->DataRVA = 0; // Set to zero because it is a relocation. + Entry->DataSize = Data[DataNodes->getDataIndex()].size(); + Entry->Codepage = 0; + Entry->Reserved = 0; + CurrentOffset += sizeof(coff_resource_data_entry); + CurrentRelativeOffset += sizeof(coff_resource_data_entry); + } +} + +void WindowsResourceCOFFWriter::writeDirectoryStringTable() { + // Now write the directory string table for .rsrc$01 + uint32_t TotalStringTableSize = 0; + for (auto &String : StringTable) { + uint16_t Length = String.size(); + support::endian::write16le(BufferStart + CurrentOffset, Length); + CurrentOffset += sizeof(uint16_t); + auto *Start = reinterpret_cast<UTF16 *>(BufferStart + CurrentOffset); + std::copy(String.begin(), String.end(), Start); + CurrentOffset += Length * sizeof(UTF16); + TotalStringTableSize += Length * sizeof(UTF16) + sizeof(uint16_t); + } + CurrentOffset += + alignTo(TotalStringTableSize, sizeof(uint32_t)) - TotalStringTableSize; +} + +void WindowsResourceCOFFWriter::writeFirstSectionRelocations() { + + // Now write the relocations for .rsrc$01 + // Five symbols already in table before we start, @feat.00 and 2 for each + // .rsrc section. + uint32_t NextSymbolIndex = 5; + for (unsigned i = 0; i < Data.size(); i++) { + auto *Reloc = + reinterpret_cast<coff_relocation *>(BufferStart + CurrentOffset); + Reloc->VirtualAddress = RelocationAddresses[i]; + Reloc->SymbolTableIndex = NextSymbolIndex++; + switch (MachineType) { + case COFF::IMAGE_FILE_MACHINE_ARMNT: + Reloc->Type = COFF::IMAGE_REL_ARM_ADDR32NB; + break; + case COFF::IMAGE_FILE_MACHINE_AMD64: + Reloc->Type = COFF::IMAGE_REL_AMD64_ADDR32NB; + break; + case COFF::IMAGE_FILE_MACHINE_I386: + Reloc->Type = COFF::IMAGE_REL_I386_DIR32NB; + break; + default: + Reloc->Type = 0; + } + CurrentOffset += sizeof(coff_relocation); + } +} + +Expected<std::unique_ptr<MemoryBuffer>> +writeWindowsResourceCOFF(COFF::MachineTypes MachineType, + const WindowsResourceParser &Parser) { + Error E = Error::success(); + WindowsResourceCOFFWriter Writer(MachineType, Parser, E); + if (E) + return std::move(E); + return Writer.write(); +} + +} // namespace object +} // namespace llvm |