diff options
Diffstat (limited to 'contrib/llvm/lib/MC')
40 files changed, 8505 insertions, 0 deletions
diff --git a/contrib/llvm/lib/MC/CMakeLists.txt b/contrib/llvm/lib/MC/CMakeLists.txt new file mode 100644 index 0000000..fc4f3c6 --- /dev/null +++ b/contrib/llvm/lib/MC/CMakeLists.txt @@ -0,0 +1,30 @@ +add_llvm_library(LLVMMC + MCAsmInfo.cpp + MCAsmInfoCOFF.cpp + MCAsmInfoDarwin.cpp + MCAsmStreamer.cpp + MCAssembler.cpp + MCCodeEmitter.cpp + MCContext.cpp + MCDisassembler.cpp + MCExpr.cpp + MCInst.cpp + MCInstPrinter.cpp + MCLabel.cpp + MCLoggingStreamer.cpp + MCMachOStreamer.cpp + MCNullStreamer.cpp + MCObjectStreamer.cpp + MCObjectWriter.cpp + MCSection.cpp + MCSectionCOFF.cpp + MCSectionELF.cpp + MCSectionMachO.cpp + MCStreamer.cpp + MCSymbol.cpp + MCValue.cpp + MachObjectWriter.cpp + WinCOFFStreamer.cpp + WinCOFFObjectWriter.cpp + TargetAsmBackend.cpp + ) diff --git a/contrib/llvm/lib/MC/MCAsmInfo.cpp b/contrib/llvm/lib/MC/MCAsmInfo.cpp new file mode 100644 index 0000000..a275be2 --- /dev/null +++ b/contrib/llvm/lib/MC/MCAsmInfo.cpp @@ -0,0 +1,102 @@ +//===-- MCAsmInfo.cpp - Asm Info -------------------------------------------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines target asm properties related what form asm statements +// should take. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/System/DataTypes.h" +#include <cctype> +#include <cstring> +using namespace llvm; + +MCAsmInfo::MCAsmInfo() { + HasSubsectionsViaSymbols = false; + HasMachoZeroFillDirective = false; + HasMachoTBSSDirective = false; + HasStaticCtorDtorReferenceInStaticMode = false; + MaxInstLength = 4; + PCSymbol = "$"; + SeparatorChar = ';'; + CommentColumn = 40; + CommentString = "#"; + GlobalPrefix = ""; + PrivateGlobalPrefix = "."; + LinkerPrivateGlobalPrefix = ""; + InlineAsmStart = "APP"; + InlineAsmEnd = "NO_APP"; + AssemblerDialect = 0; + AllowQuotesInName = false; + AllowNameToStartWithDigit = false; + AllowPeriodsInName = true; + ZeroDirective = "\t.zero\t"; + AsciiDirective = "\t.ascii\t"; + AscizDirective = "\t.asciz\t"; + Data8bitsDirective = "\t.byte\t"; + Data16bitsDirective = "\t.short\t"; + Data32bitsDirective = "\t.long\t"; + Data64bitsDirective = "\t.quad\t"; + SunStyleELFSectionSwitchSyntax = false; + UsesELFSectionDirectiveForBSS = false; + AlignDirective = "\t.align\t"; + AlignmentIsInBytes = true; + TextAlignFillValue = 0; + GPRel32Directive = 0; + GlobalDirective = "\t.globl\t"; + HasSetDirective = true; + HasLCOMMDirective = false; + COMMDirectiveAlignmentIsInBytes = true; + HasDotTypeDotSizeDirective = true; + HasSingleParameterDotFile = true; + HasNoDeadStrip = false; + WeakRefDirective = 0; + WeakDefDirective = 0; + LinkOnceDirective = 0; + HiddenVisibilityAttr = MCSA_Hidden; + ProtectedVisibilityAttr = MCSA_Protected; + HasLEB128 = false; + HasDotLocAndDotFile = false; + SupportsDebugInformation = false; + ExceptionsType = ExceptionHandling::None; + DwarfRequiresFrameSection = true; + DwarfUsesInlineInfoSection = false; + DwarfSectionOffsetDirective = 0; + HasMicrosoftFastStdCallMangling = false; + + AsmTransCBE = 0; +} + +MCAsmInfo::~MCAsmInfo() { +} + + +unsigned MCAsmInfo::getULEB128Size(unsigned Value) { + unsigned Size = 0; + do { + Value >>= 7; + Size += sizeof(int8_t); + } while (Value); + return Size; +} + +unsigned MCAsmInfo::getSLEB128Size(int Value) { + unsigned Size = 0; + int Sign = Value >> (8 * sizeof(Value) - 1); + bool IsMore; + + do { + unsigned Byte = Value & 0x7f; + Value >>= 7; + IsMore = Value != Sign || ((Byte ^ Sign) & 0x40) != 0; + Size += sizeof(int8_t); + } while (IsMore); + return Size; +} diff --git a/contrib/llvm/lib/MC/MCAsmInfoCOFF.cpp b/contrib/llvm/lib/MC/MCAsmInfoCOFF.cpp new file mode 100644 index 0000000..7fc7d7a --- /dev/null +++ b/contrib/llvm/lib/MC/MCAsmInfoCOFF.cpp @@ -0,0 +1,37 @@ +//===-- MCAsmInfoCOFF.cpp - COFF asm properties -----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines target asm properties related what form asm statements +// should take in general on COFF-based targets +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCAsmInfoCOFF.h" +#include "llvm/ADT/SmallVector.h" +using namespace llvm; + +MCAsmInfoCOFF::MCAsmInfoCOFF() { + GlobalPrefix = "_"; + COMMDirectiveAlignmentIsInBytes = false; + HasLCOMMDirective = true; + HasDotTypeDotSizeDirective = false; + HasSingleParameterDotFile = false; + PrivateGlobalPrefix = "L"; // Prefix for private global symbols + WeakRefDirective = "\t.weak\t"; + LinkOnceDirective = "\t.linkonce discard\n"; + + // Doesn't support visibility: + HiddenVisibilityAttr = ProtectedVisibilityAttr = MCSA_Invalid; + + // Set up DWARF directives + HasLEB128 = true; // Target asm supports leb128 directives (little-endian) + SupportsDebugInformation = true; + DwarfSectionOffsetDirective = "\t.secrel32\t"; + HasMicrosoftFastStdCallMangling = true; +} diff --git a/contrib/llvm/lib/MC/MCAsmInfoDarwin.cpp b/contrib/llvm/lib/MC/MCAsmInfoDarwin.cpp new file mode 100644 index 0000000..0bd3b2d --- /dev/null +++ b/contrib/llvm/lib/MC/MCAsmInfoDarwin.cpp @@ -0,0 +1,48 @@ +//===-- MCAsmInfoDarwin.cpp - Darwin asm properties -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines target asm properties related what form asm statements +// should take in general on Darwin-based targets +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCAsmInfoDarwin.h" +using namespace llvm; + +MCAsmInfoDarwin::MCAsmInfoDarwin() { + // Common settings for all Darwin targets. + // Syntax: + GlobalPrefix = "_"; + PrivateGlobalPrefix = "L"; + LinkerPrivateGlobalPrefix = "l"; + AllowQuotesInName = true; + HasSingleParameterDotFile = false; + HasSubsectionsViaSymbols = true; + + AlignmentIsInBytes = false; + COMMDirectiveAlignmentIsInBytes = false; + InlineAsmStart = " InlineAsm Start"; + InlineAsmEnd = " InlineAsm End"; + + // Directives: + WeakDefDirective = "\t.weak_definition "; + WeakRefDirective = "\t.weak_reference "; + ZeroDirective = "\t.space\t"; // ".space N" emits N zeros. + HasMachoZeroFillDirective = true; // Uses .zerofill + HasMachoTBSSDirective = true; // Uses .tbss + HasStaticCtorDtorReferenceInStaticMode = true; + + HiddenVisibilityAttr = MCSA_PrivateExtern; + // Doesn't support protected visibility. + ProtectedVisibilityAttr = MCSA_Global; + + HasDotTypeDotSizeDirective = false; + HasNoDeadStrip = true; +} + diff --git a/contrib/llvm/lib/MC/MCAsmStreamer.cpp b/contrib/llvm/lib/MC/MCAsmStreamer.cpp new file mode 100644 index 0000000..e272b60 --- /dev/null +++ b/contrib/llvm/lib/MC/MCAsmStreamer.cpp @@ -0,0 +1,706 @@ +//===- lib/MC/MCAsmStreamer.cpp - Text Assembly Output --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/FormattedStream.h" +using namespace llvm; + +namespace { + +class MCAsmStreamer : public MCStreamer { + formatted_raw_ostream &OS; + const MCAsmInfo &MAI; + OwningPtr<MCInstPrinter> InstPrinter; + MCCodeEmitter *Emitter; + + SmallString<128> CommentToEmit; + raw_svector_ostream CommentStream; + + unsigned IsLittleEndian : 1; + unsigned IsVerboseAsm : 1; + unsigned ShowInst : 1; + +public: + MCAsmStreamer(MCContext &Context, formatted_raw_ostream &os, + bool isLittleEndian, bool isVerboseAsm, MCInstPrinter *printer, + MCCodeEmitter *emitter, bool showInst) + : MCStreamer(Context), OS(os), MAI(Context.getAsmInfo()), + InstPrinter(printer), Emitter(emitter), CommentStream(CommentToEmit), + IsLittleEndian(isLittleEndian), IsVerboseAsm(isVerboseAsm), + ShowInst(showInst) { + if (InstPrinter && IsVerboseAsm) + InstPrinter->setCommentStream(CommentStream); + } + ~MCAsmStreamer() {} + + bool isLittleEndian() const { return IsLittleEndian; } + + inline void EmitEOL() { + // If we don't have any comments, just emit a \n. + if (!IsVerboseAsm) { + OS << '\n'; + return; + } + EmitCommentsAndEOL(); + } + void EmitCommentsAndEOL(); + + /// isVerboseAsm - Return true if this streamer supports verbose assembly at + /// all. + virtual bool isVerboseAsm() const { return IsVerboseAsm; } + + /// hasRawTextSupport - We support EmitRawText. + virtual bool hasRawTextSupport() const { return true; } + + /// AddComment - Add a comment that can be emitted to the generated .s + /// file if applicable as a QoI issue to make the output of the compiler + /// more readable. This only affects the MCAsmStreamer, and only when + /// verbose assembly output is enabled. + virtual void AddComment(const Twine &T); + + /// AddEncodingComment - Add a comment showing the encoding of an instruction. + virtual void AddEncodingComment(const MCInst &Inst); + + /// GetCommentOS - Return a raw_ostream that comments can be written to. + /// Unlike AddComment, you are required to terminate comments with \n if you + /// use this method. + virtual raw_ostream &GetCommentOS() { + if (!IsVerboseAsm) + return nulls(); // Discard comments unless in verbose asm mode. + return CommentStream; + } + + /// AddBlankLine - Emit a blank line to a .s file to pretty it up. + virtual void AddBlankLine() { + EmitEOL(); + } + + /// @name MCStreamer Interface + /// @{ + + virtual void SwitchSection(const MCSection *Section); + + virtual void EmitLabel(MCSymbol *Symbol); + + virtual void EmitAssemblerFlag(MCAssemblerFlag Flag); + + virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value); + + virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute); + + virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue); + virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol); + virtual void EmitCOFFSymbolStorageClass(int StorageClass); + virtual void EmitCOFFSymbolType(int Type); + virtual void EndCOFFSymbolDef(); + virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value); + virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment); + + /// EmitLocalCommonSymbol - Emit a local common (.lcomm) symbol. + /// + /// @param Symbol - The common symbol to emit. + /// @param Size - The size of the common symbol. + virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size); + + virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0, + unsigned Size = 0, unsigned ByteAlignment = 0); + + virtual void EmitTBSSSymbol (const MCSection *Section, MCSymbol *Symbol, + uint64_t Size, unsigned ByteAlignment = 0); + + virtual void EmitBytes(StringRef Data, unsigned AddrSpace); + + virtual void EmitValue(const MCExpr *Value, unsigned Size,unsigned AddrSpace); + virtual void EmitIntValue(uint64_t Value, unsigned Size, unsigned AddrSpace); + virtual void EmitGPRel32Value(const MCExpr *Value); + + + virtual void EmitFill(uint64_t NumBytes, uint8_t FillValue, + unsigned AddrSpace); + + virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0, + unsigned ValueSize = 1, + unsigned MaxBytesToEmit = 0); + + virtual void EmitCodeAlignment(unsigned ByteAlignment, + unsigned MaxBytesToEmit = 0); + + virtual void EmitValueToOffset(const MCExpr *Offset, + unsigned char Value = 0); + + virtual void EmitFileDirective(StringRef Filename); + virtual void EmitDwarfFileDirective(unsigned FileNo, StringRef Filename); + + virtual void EmitInstruction(const MCInst &Inst); + + /// EmitRawText - If this file is backed by a assembly streamer, this dumps + /// the specified string in the output .s file. This capability is + /// indicated by the hasRawTextSupport() predicate. + virtual void EmitRawText(StringRef String); + + virtual void Finish(); + + /// @} +}; + +} // end anonymous namespace. + +/// AddComment - Add a comment that can be emitted to the generated .s +/// file if applicable as a QoI issue to make the output of the compiler +/// more readable. This only affects the MCAsmStreamer, and only when +/// verbose assembly output is enabled. +void MCAsmStreamer::AddComment(const Twine &T) { + if (!IsVerboseAsm) return; + + // Make sure that CommentStream is flushed. + CommentStream.flush(); + + T.toVector(CommentToEmit); + // Each comment goes on its own line. + CommentToEmit.push_back('\n'); + + // Tell the comment stream that the vector changed underneath it. + CommentStream.resync(); +} + +void MCAsmStreamer::EmitCommentsAndEOL() { + if (CommentToEmit.empty() && CommentStream.GetNumBytesInBuffer() == 0) { + OS << '\n'; + return; + } + + CommentStream.flush(); + StringRef Comments = CommentToEmit.str(); + + assert(Comments.back() == '\n' && + "Comment array not newline terminated"); + do { + // Emit a line of comments. + OS.PadToColumn(MAI.getCommentColumn()); + size_t Position = Comments.find('\n'); + OS << MAI.getCommentString() << ' ' << Comments.substr(0, Position) << '\n'; + + Comments = Comments.substr(Position+1); + } while (!Comments.empty()); + + CommentToEmit.clear(); + // Tell the comment stream that the vector changed underneath it. + CommentStream.resync(); +} + +static inline int64_t truncateToSize(int64_t Value, unsigned Bytes) { + assert(Bytes && "Invalid size!"); + return Value & ((uint64_t) (int64_t) -1 >> (64 - Bytes * 8)); +} + +void MCAsmStreamer::SwitchSection(const MCSection *Section) { + assert(Section && "Cannot switch to a null section!"); + if (Section != CurSection) { + CurSection = Section; + Section->PrintSwitchToSection(MAI, OS); + } +} + +void MCAsmStreamer::EmitLabel(MCSymbol *Symbol) { + assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); + assert(!Symbol->isVariable() && "Cannot emit a variable symbol!"); + assert(CurSection && "Cannot emit before setting section!"); + + OS << *Symbol << ":"; + EmitEOL(); + Symbol->setSection(*CurSection); +} + +void MCAsmStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) { + switch (Flag) { + default: assert(0 && "Invalid flag!"); + case MCAF_SubsectionsViaSymbols: OS << ".subsections_via_symbols"; break; + } + EmitEOL(); +} + +void MCAsmStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) { + OS << *Symbol << " = " << *Value; + EmitEOL(); + + // FIXME: Lift context changes into super class. + Symbol->setVariableValue(Value); +} + +void MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol, + MCSymbolAttr Attribute) { + switch (Attribute) { + case MCSA_Invalid: assert(0 && "Invalid symbol attribute"); + case MCSA_ELF_TypeFunction: /// .type _foo, STT_FUNC # aka @function + case MCSA_ELF_TypeIndFunction: /// .type _foo, STT_GNU_IFUNC + case MCSA_ELF_TypeObject: /// .type _foo, STT_OBJECT # aka @object + case MCSA_ELF_TypeTLS: /// .type _foo, STT_TLS # aka @tls_object + case MCSA_ELF_TypeCommon: /// .type _foo, STT_COMMON # aka @common + case MCSA_ELF_TypeNoType: /// .type _foo, STT_NOTYPE # aka @notype + assert(MAI.hasDotTypeDotSizeDirective() && "Symbol Attr not supported"); + OS << "\t.type\t" << *Symbol << ',' + << ((MAI.getCommentString()[0] != '@') ? '@' : '%'); + switch (Attribute) { + default: assert(0 && "Unknown ELF .type"); + case MCSA_ELF_TypeFunction: OS << "function"; break; + case MCSA_ELF_TypeIndFunction: OS << "gnu_indirect_function"; break; + case MCSA_ELF_TypeObject: OS << "object"; break; + case MCSA_ELF_TypeTLS: OS << "tls_object"; break; + case MCSA_ELF_TypeCommon: OS << "common"; break; + case MCSA_ELF_TypeNoType: OS << "no_type"; break; + } + EmitEOL(); + return; + case MCSA_Global: // .globl/.global + OS << MAI.getGlobalDirective(); + break; + case MCSA_Hidden: OS << "\t.hidden\t"; break; + case MCSA_IndirectSymbol: OS << "\t.indirect_symbol\t"; break; + case MCSA_Internal: OS << "\t.internal\t"; break; + case MCSA_LazyReference: OS << "\t.lazy_reference\t"; break; + case MCSA_Local: OS << "\t.local\t"; break; + case MCSA_NoDeadStrip: OS << "\t.no_dead_strip\t"; break; + case MCSA_PrivateExtern: OS << "\t.private_extern\t"; break; + case MCSA_Protected: OS << "\t.protected\t"; break; + case MCSA_Reference: OS << "\t.reference\t"; break; + case MCSA_Weak: OS << "\t.weak\t"; break; + case MCSA_WeakDefinition: OS << "\t.weak_definition\t"; break; + // .weak_reference + case MCSA_WeakReference: OS << MAI.getWeakRefDirective(); break; + case MCSA_WeakDefAutoPrivate: OS << "\t.weak_def_can_be_hidden\t"; break; + } + + OS << *Symbol; + EmitEOL(); +} + +void MCAsmStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) { + OS << ".desc" << ' ' << *Symbol << ',' << DescValue; + EmitEOL(); +} + +void MCAsmStreamer::BeginCOFFSymbolDef(const MCSymbol *Symbol) { + OS << "\t.def\t " << *Symbol << ';'; + EmitEOL(); +} + +void MCAsmStreamer::EmitCOFFSymbolStorageClass (int StorageClass) { + OS << "\t.scl\t" << StorageClass << ';'; + EmitEOL(); +} + +void MCAsmStreamer::EmitCOFFSymbolType (int Type) { + OS << "\t.type\t" << Type << ';'; + EmitEOL(); +} + +void MCAsmStreamer::EndCOFFSymbolDef() { + OS << "\t.endef"; + EmitEOL(); +} + +void MCAsmStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) { + assert(MAI.hasDotTypeDotSizeDirective()); + OS << "\t.size\t" << *Symbol << ", " << *Value << '\n'; +} + +void MCAsmStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment) { + OS << "\t.comm\t" << *Symbol << ',' << Size; + if (ByteAlignment != 0) { + if (MAI.getCOMMDirectiveAlignmentIsInBytes()) + OS << ',' << ByteAlignment; + else + OS << ',' << Log2_32(ByteAlignment); + } + EmitEOL(); +} + +/// EmitLocalCommonSymbol - Emit a local common (.lcomm) symbol. +/// +/// @param Symbol - The common symbol to emit. +/// @param Size - The size of the common symbol. +void MCAsmStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) { + assert(MAI.hasLCOMMDirective() && "Doesn't have .lcomm, can't emit it!"); + OS << "\t.lcomm\t" << *Symbol << ',' << Size; + EmitEOL(); +} + +void MCAsmStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol, + unsigned Size, unsigned ByteAlignment) { + // Note: a .zerofill directive does not switch sections. + OS << ".zerofill "; + + // This is a mach-o specific directive. + const MCSectionMachO *MOSection = ((const MCSectionMachO*)Section); + OS << MOSection->getSegmentName() << "," << MOSection->getSectionName(); + + if (Symbol != NULL) { + OS << ',' << *Symbol << ',' << Size; + if (ByteAlignment != 0) + OS << ',' << Log2_32(ByteAlignment); + } + EmitEOL(); +} + +// .tbss sym, size, align +// This depends that the symbol has already been mangled from the original, +// e.g. _a. +void MCAsmStreamer::EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol, + uint64_t Size, unsigned ByteAlignment) { + assert(Symbol != NULL && "Symbol shouldn't be NULL!"); + // Instead of using the Section we'll just use the shortcut. + // This is a mach-o specific directive and section. + OS << ".tbss " << *Symbol << ", " << Size; + + // Output align if we have it. We default to 1 so don't bother printing + // that. + if (ByteAlignment > 1) OS << ", " << Log2_32(ByteAlignment); + + EmitEOL(); +} + +static inline char toOctal(int X) { return (X&7)+'0'; } + +static void PrintQuotedString(StringRef Data, raw_ostream &OS) { + OS << '"'; + + for (unsigned i = 0, e = Data.size(); i != e; ++i) { + unsigned char C = Data[i]; + if (C == '"' || C == '\\') { + OS << '\\' << (char)C; + continue; + } + + if (isprint((unsigned char)C)) { + OS << (char)C; + continue; + } + + switch (C) { + case '\b': OS << "\\b"; break; + case '\f': OS << "\\f"; break; + case '\n': OS << "\\n"; break; + case '\r': OS << "\\r"; break; + case '\t': OS << "\\t"; break; + default: + OS << '\\'; + OS << toOctal(C >> 6); + OS << toOctal(C >> 3); + OS << toOctal(C >> 0); + break; + } + } + + OS << '"'; +} + + +void MCAsmStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) { + assert(CurSection && "Cannot emit contents before setting section!"); + if (Data.empty()) return; + + if (Data.size() == 1) { + OS << MAI.getData8bitsDirective(AddrSpace); + OS << (unsigned)(unsigned char)Data[0]; + EmitEOL(); + return; + } + + // If the data ends with 0 and the target supports .asciz, use it, otherwise + // use .ascii + if (MAI.getAscizDirective() && Data.back() == 0) { + OS << MAI.getAscizDirective(); + Data = Data.substr(0, Data.size()-1); + } else { + OS << MAI.getAsciiDirective(); + } + + OS << ' '; + PrintQuotedString(Data, OS); + EmitEOL(); +} + +/// EmitIntValue - Special case of EmitValue that avoids the client having +/// to pass in a MCExpr for constant integers. +void MCAsmStreamer::EmitIntValue(uint64_t Value, unsigned Size, + unsigned AddrSpace) { + assert(CurSection && "Cannot emit contents before setting section!"); + const char *Directive = 0; + switch (Size) { + default: break; + case 1: Directive = MAI.getData8bitsDirective(AddrSpace); break; + case 2: Directive = MAI.getData16bitsDirective(AddrSpace); break; + case 4: Directive = MAI.getData32bitsDirective(AddrSpace); break; + case 8: + Directive = MAI.getData64bitsDirective(AddrSpace); + // If the target doesn't support 64-bit data, emit as two 32-bit halves. + if (Directive) break; + if (isLittleEndian()) { + EmitIntValue((uint32_t)(Value >> 0 ), 4, AddrSpace); + EmitIntValue((uint32_t)(Value >> 32), 4, AddrSpace); + } else { + EmitIntValue((uint32_t)(Value >> 32), 4, AddrSpace); + EmitIntValue((uint32_t)(Value >> 0 ), 4, AddrSpace); + } + return; + } + + assert(Directive && "Invalid size for machine code value!"); + OS << Directive << truncateToSize(Value, Size); + EmitEOL(); +} + +void MCAsmStreamer::EmitValue(const MCExpr *Value, unsigned Size, + unsigned AddrSpace) { + assert(CurSection && "Cannot emit contents before setting section!"); + const char *Directive = 0; + switch (Size) { + default: break; + case 1: Directive = MAI.getData8bitsDirective(AddrSpace); break; + case 2: Directive = MAI.getData16bitsDirective(AddrSpace); break; + case 4: Directive = MAI.getData32bitsDirective(AddrSpace); break; + case 8: Directive = MAI.getData64bitsDirective(AddrSpace); break; + } + + assert(Directive && "Invalid size for machine code value!"); + OS << Directive << *Value; + EmitEOL(); +} + +void MCAsmStreamer::EmitGPRel32Value(const MCExpr *Value) { + assert(MAI.getGPRel32Directive() != 0); + OS << MAI.getGPRel32Directive() << *Value; + EmitEOL(); +} + + +/// EmitFill - Emit NumBytes bytes worth of the value specified by +/// FillValue. This implements directives such as '.space'. +void MCAsmStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue, + unsigned AddrSpace) { + if (NumBytes == 0) return; + + if (AddrSpace == 0) + if (const char *ZeroDirective = MAI.getZeroDirective()) { + OS << ZeroDirective << NumBytes; + if (FillValue != 0) + OS << ',' << (int)FillValue; + EmitEOL(); + return; + } + + // Emit a byte at a time. + MCStreamer::EmitFill(NumBytes, FillValue, AddrSpace); +} + +void MCAsmStreamer::EmitValueToAlignment(unsigned ByteAlignment, int64_t Value, + unsigned ValueSize, + unsigned MaxBytesToEmit) { + // Some assemblers don't support non-power of two alignments, so we always + // emit alignments as a power of two if possible. + if (isPowerOf2_32(ByteAlignment)) { + switch (ValueSize) { + default: llvm_unreachable("Invalid size for machine code value!"); + case 1: OS << MAI.getAlignDirective(); break; + // FIXME: use MAI for this! + case 2: OS << ".p2alignw "; break; + case 4: OS << ".p2alignl "; break; + case 8: llvm_unreachable("Unsupported alignment size!"); + } + + if (MAI.getAlignmentIsInBytes()) + OS << ByteAlignment; + else + OS << Log2_32(ByteAlignment); + + if (Value || MaxBytesToEmit) { + OS << ", 0x"; + OS.write_hex(truncateToSize(Value, ValueSize)); + + if (MaxBytesToEmit) + OS << ", " << MaxBytesToEmit; + } + EmitEOL(); + return; + } + + // Non-power of two alignment. This is not widely supported by assemblers. + // FIXME: Parameterize this based on MAI. + switch (ValueSize) { + default: llvm_unreachable("Invalid size for machine code value!"); + case 1: OS << ".balign"; break; + case 2: OS << ".balignw"; break; + case 4: OS << ".balignl"; break; + case 8: llvm_unreachable("Unsupported alignment size!"); + } + + OS << ' ' << ByteAlignment; + OS << ", " << truncateToSize(Value, ValueSize); + if (MaxBytesToEmit) + OS << ", " << MaxBytesToEmit; + EmitEOL(); +} + +void MCAsmStreamer::EmitCodeAlignment(unsigned ByteAlignment, + unsigned MaxBytesToEmit) { + // Emit with a text fill value. + EmitValueToAlignment(ByteAlignment, MAI.getTextAlignFillValue(), + 1, MaxBytesToEmit); +} + +void MCAsmStreamer::EmitValueToOffset(const MCExpr *Offset, + unsigned char Value) { + // FIXME: Verify that Offset is associated with the current section. + OS << ".org " << *Offset << ", " << (unsigned) Value; + EmitEOL(); +} + + +void MCAsmStreamer::EmitFileDirective(StringRef Filename) { + assert(MAI.hasSingleParameterDotFile()); + OS << "\t.file\t"; + PrintQuotedString(Filename, OS); + EmitEOL(); +} + +void MCAsmStreamer::EmitDwarfFileDirective(unsigned FileNo, StringRef Filename){ + OS << "\t.file\t" << FileNo << ' '; + PrintQuotedString(Filename, OS); + EmitEOL(); +} + +void MCAsmStreamer::AddEncodingComment(const MCInst &Inst) { + raw_ostream &OS = GetCommentOS(); + SmallString<256> Code; + SmallVector<MCFixup, 4> Fixups; + raw_svector_ostream VecOS(Code); + Emitter->EncodeInstruction(Inst, VecOS, Fixups); + VecOS.flush(); + + // If we are showing fixups, create symbolic markers in the encoded + // representation. We do this by making a per-bit map to the fixup item index, + // then trying to display it as nicely as possible. + SmallVector<uint8_t, 64> FixupMap; + FixupMap.resize(Code.size() * 8); + for (unsigned i = 0, e = Code.size() * 8; i != e; ++i) + FixupMap[i] = 0; + + for (unsigned i = 0, e = Fixups.size(); i != e; ++i) { + MCFixup &F = Fixups[i]; + const MCFixupKindInfo &Info = Emitter->getFixupKindInfo(F.getKind()); + for (unsigned j = 0; j != Info.TargetSize; ++j) { + unsigned Index = F.getOffset() * 8 + Info.TargetOffset + j; + assert(Index < Code.size() * 8 && "Invalid offset in fixup!"); + FixupMap[Index] = 1 + i; + } + } + + OS << "encoding: ["; + for (unsigned i = 0, e = Code.size(); i != e; ++i) { + if (i) + OS << ','; + + // See if all bits are the same map entry. + uint8_t MapEntry = FixupMap[i * 8 + 0]; + for (unsigned j = 1; j != 8; ++j) { + if (FixupMap[i * 8 + j] == MapEntry) + continue; + + MapEntry = uint8_t(~0U); + break; + } + + if (MapEntry != uint8_t(~0U)) { + if (MapEntry == 0) { + OS << format("0x%02x", uint8_t(Code[i])); + } else { + assert(Code[i] == 0 && "Encoder wrote into fixed up bit!"); + OS << char('A' + MapEntry - 1); + } + } else { + // Otherwise, write out in binary. + OS << "0b"; + for (unsigned j = 8; j--;) { + unsigned Bit = (Code[i] >> j) & 1; + if (uint8_t MapEntry = FixupMap[i * 8 + j]) { + assert(Bit == 0 && "Encoder wrote into fixed up bit!"); + OS << char('A' + MapEntry - 1); + } else + OS << Bit; + } + } + } + OS << "]\n"; + + for (unsigned i = 0, e = Fixups.size(); i != e; ++i) { + MCFixup &F = Fixups[i]; + const MCFixupKindInfo &Info = Emitter->getFixupKindInfo(F.getKind()); + OS << " fixup " << char('A' + i) << " - " << "offset: " << F.getOffset() + << ", value: " << *F.getValue() << ", kind: " << Info.Name << "\n"; + } +} + +void MCAsmStreamer::EmitInstruction(const MCInst &Inst) { + assert(CurSection && "Cannot emit contents before setting section!"); + + // Show the encoding in a comment if we have a code emitter. + if (Emitter) + AddEncodingComment(Inst); + + // Show the MCInst if enabled. + if (ShowInst) { + Inst.dump_pretty(GetCommentOS(), &MAI, InstPrinter.get(), "\n "); + GetCommentOS() << "\n"; + } + + // If we have an AsmPrinter, use that to print, otherwise print the MCInst. + if (InstPrinter) + InstPrinter->printInst(&Inst, OS); + else + Inst.print(OS, &MAI); + EmitEOL(); +} + +/// EmitRawText - If this file is backed by a assembly streamer, this dumps +/// the specified string in the output .s file. This capability is +/// indicated by the hasRawTextSupport() predicate. +void MCAsmStreamer::EmitRawText(StringRef String) { + if (!String.empty() && String.back() == '\n') + String = String.substr(0, String.size()-1); + OS << String; + EmitEOL(); +} + +void MCAsmStreamer::Finish() { +} + +MCStreamer *llvm::createAsmStreamer(MCContext &Context, + formatted_raw_ostream &OS, + bool isLittleEndian, + bool isVerboseAsm, MCInstPrinter *IP, + MCCodeEmitter *CE, bool ShowInst) { + return new MCAsmStreamer(Context, OS, isLittleEndian, isVerboseAsm, + IP, CE, ShowInst); +} diff --git a/contrib/llvm/lib/MC/MCAssembler.cpp b/contrib/llvm/lib/MC/MCAssembler.cpp new file mode 100644 index 0000000..7d84554 --- /dev/null +++ b/contrib/llvm/lib/MC/MCAssembler.cpp @@ -0,0 +1,1025 @@ +//===- lib/MC/MCAssembler.cpp - Assembler Backend Implementation ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "assembler" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCAsmLayout.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCValue.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetRegistry.h" +#include "llvm/Target/TargetAsmBackend.h" + +#include <vector> +using namespace llvm; + +namespace { +namespace stats { +STATISTIC(EmittedFragments, "Number of emitted assembler fragments"); +STATISTIC(EvaluateFixup, "Number of evaluated fixups"); +STATISTIC(FragmentLayouts, "Number of fragment layouts"); +STATISTIC(ObjectBytes, "Number of emitted object file bytes"); +STATISTIC(RelaxationSteps, "Number of assembler layout and relaxation steps"); +STATISTIC(RelaxedInstructions, "Number of relaxed instructions"); +STATISTIC(SectionLayouts, "Number of section layouts"); +} +} + +// FIXME FIXME FIXME: There are number of places in this file where we convert +// what is a 64-bit assembler value used for computation into a value in the +// object file, which may truncate it. We should detect that truncation where +// invalid and report errors back. + +/* *** */ + +MCAsmLayout::MCAsmLayout(MCAssembler &Asm) + : Assembler(Asm), LastValidFragment(0) + { + // Compute the section layout order. Virtual sections must go last. + for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie; ++it) + if (!Asm.getBackend().isVirtualSection(it->getSection())) + SectionOrder.push_back(&*it); + for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie; ++it) + if (Asm.getBackend().isVirtualSection(it->getSection())) + SectionOrder.push_back(&*it); +} + +bool MCAsmLayout::isSectionUpToDate(const MCSectionData *SD) const { + // The first section is always up-to-date. + unsigned Index = SD->getLayoutOrder(); + if (!Index) + return true; + + // Otherwise, sections are always implicitly computed when the preceeding + // fragment is layed out. + const MCSectionData *Prev = getSectionOrder()[Index - 1]; + return isFragmentUpToDate(&(Prev->getFragmentList().back())); +} + +bool MCAsmLayout::isFragmentUpToDate(const MCFragment *F) const { + return (LastValidFragment && + F->getLayoutOrder() <= LastValidFragment->getLayoutOrder()); +} + +void MCAsmLayout::UpdateForSlide(MCFragment *F, int SlideAmount) { + // If this fragment wasn't already up-to-date, we don't need to do anything. + if (!isFragmentUpToDate(F)) + return; + + // Otherwise, reset the last valid fragment to the predecessor of the + // invalidated fragment. + LastValidFragment = F->getPrevNode(); + if (!LastValidFragment) { + unsigned Index = F->getParent()->getLayoutOrder(); + if (Index != 0) { + MCSectionData *Prev = getSectionOrder()[Index - 1]; + LastValidFragment = &(Prev->getFragmentList().back()); + } + } +} + +void MCAsmLayout::EnsureValid(const MCFragment *F) const { + // Advance the layout position until the fragment is up-to-date. + while (!isFragmentUpToDate(F)) { + // Advance to the next fragment. + MCFragment *Cur = LastValidFragment; + if (Cur) + Cur = Cur->getNextNode(); + if (!Cur) { + unsigned NextIndex = 0; + if (LastValidFragment) + NextIndex = LastValidFragment->getParent()->getLayoutOrder() + 1; + Cur = SectionOrder[NextIndex]->begin(); + } + + const_cast<MCAsmLayout*>(this)->LayoutFragment(Cur); + } +} + +void MCAsmLayout::FragmentReplaced(MCFragment *Src, MCFragment *Dst) { + if (LastValidFragment == Src) + LastValidFragment = Dst; + + Dst->Offset = Src->Offset; + Dst->EffectiveSize = Src->EffectiveSize; +} + +uint64_t MCAsmLayout::getFragmentAddress(const MCFragment *F) const { + assert(F->getParent() && "Missing section()!"); + return getSectionAddress(F->getParent()) + getFragmentOffset(F); +} + +uint64_t MCAsmLayout::getFragmentEffectiveSize(const MCFragment *F) const { + EnsureValid(F); + assert(F->EffectiveSize != ~UINT64_C(0) && "Address not set!"); + return F->EffectiveSize; +} + +uint64_t MCAsmLayout::getFragmentOffset(const MCFragment *F) const { + EnsureValid(F); + assert(F->Offset != ~UINT64_C(0) && "Address not set!"); + return F->Offset; +} + +uint64_t MCAsmLayout::getSymbolAddress(const MCSymbolData *SD) const { + assert(SD->getFragment() && "Invalid getAddress() on undefined symbol!"); + return getFragmentAddress(SD->getFragment()) + SD->getOffset(); +} + +uint64_t MCAsmLayout::getSectionAddress(const MCSectionData *SD) const { + EnsureValid(SD->begin()); + assert(SD->Address != ~UINT64_C(0) && "Address not set!"); + return SD->Address; +} + +uint64_t MCAsmLayout::getSectionAddressSize(const MCSectionData *SD) const { + // The size is the last fragment's end offset. + const MCFragment &F = SD->getFragmentList().back(); + return getFragmentOffset(&F) + getFragmentEffectiveSize(&F); +} + +uint64_t MCAsmLayout::getSectionFileSize(const MCSectionData *SD) const { + // Virtual sections have no file size. + if (getAssembler().getBackend().isVirtualSection(SD->getSection())) + return 0; + + // Otherwise, the file size is the same as the address space size. + return getSectionAddressSize(SD); +} + +uint64_t MCAsmLayout::getSectionSize(const MCSectionData *SD) const { + // The logical size is the address space size minus any tail padding. + uint64_t Size = getSectionAddressSize(SD); + const MCAlignFragment *AF = + dyn_cast<MCAlignFragment>(&(SD->getFragmentList().back())); + if (AF && AF->hasOnlyAlignAddress()) + Size -= getFragmentEffectiveSize(AF); + + return Size; +} + +/* *** */ + +MCFragment::MCFragment() : Kind(FragmentType(~0)) { +} + +MCFragment::MCFragment(FragmentType _Kind, MCSectionData *_Parent) + : Kind(_Kind), Parent(_Parent), Atom(0), EffectiveSize(~UINT64_C(0)) +{ + if (Parent) + Parent->getFragmentList().push_back(this); +} + +/* *** */ + +MCSectionData::MCSectionData() : Section(0) {} + +MCSectionData::MCSectionData(const MCSection &_Section, MCAssembler *A) + : Section(&_Section), + Alignment(1), + Address(~UINT64_C(0)), + HasInstructions(false) +{ + if (A) + A->getSectionList().push_back(this); +} + +/* *** */ + +MCSymbolData::MCSymbolData() : Symbol(0) {} + +MCSymbolData::MCSymbolData(const MCSymbol &_Symbol, MCFragment *_Fragment, + uint64_t _Offset, MCAssembler *A) + : Symbol(&_Symbol), Fragment(_Fragment), Offset(_Offset), + IsExternal(false), IsPrivateExtern(false), + CommonSize(0), CommonAlign(0), Flags(0), Index(0) +{ + if (A) + A->getSymbolList().push_back(this); +} + +/* *** */ + +MCAssembler::MCAssembler(MCContext &_Context, TargetAsmBackend &_Backend, + MCCodeEmitter &_Emitter, raw_ostream &_OS) + : Context(_Context), Backend(_Backend), Emitter(_Emitter), + OS(_OS), RelaxAll(false), SubsectionsViaSymbols(false) +{ +} + +MCAssembler::~MCAssembler() { +} + +static bool isScatteredFixupFullyResolvedSimple(const MCAssembler &Asm, + const MCFixup &Fixup, + const MCValue Target, + const MCSection *BaseSection) { + // The effective fixup address is + // addr(atom(A)) + offset(A) + // - addr(atom(B)) - offset(B) + // - addr(<base symbol>) + <fixup offset from base symbol> + // and the offsets are not relocatable, so the fixup is fully resolved when + // addr(atom(A)) - addr(atom(B)) - addr(<base symbol>)) == 0. + // + // The simple (Darwin, except on x86_64) way of dealing with this was to + // assume that any reference to a temporary symbol *must* be a temporary + // symbol in the same atom, unless the sections differ. Therefore, any PCrel + // relocation to a temporary symbol (in the same section) is fully + // resolved. This also works in conjunction with absolutized .set, which + // requires the compiler to use .set to absolutize the differences between + // symbols which the compiler knows to be assembly time constants, so we don't + // need to worry about considering symbol differences fully resolved. + + // Non-relative fixups are only resolved if constant. + if (!BaseSection) + return Target.isAbsolute(); + + // Otherwise, relative fixups are only resolved if not a difference and the + // target is a temporary in the same section. + if (Target.isAbsolute() || Target.getSymB()) + return false; + + const MCSymbol *A = &Target.getSymA()->getSymbol(); + if (!A->isTemporary() || !A->isInSection() || + &A->getSection() != BaseSection) + return false; + + return true; +} + +static bool isScatteredFixupFullyResolved(const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFixup &Fixup, + const MCValue Target, + const MCSymbolData *BaseSymbol) { + // The effective fixup address is + // addr(atom(A)) + offset(A) + // - addr(atom(B)) - offset(B) + // - addr(BaseSymbol) + <fixup offset from base symbol> + // and the offsets are not relocatable, so the fixup is fully resolved when + // addr(atom(A)) - addr(atom(B)) - addr(BaseSymbol) == 0. + // + // Note that "false" is almost always conservatively correct (it means we emit + // a relocation which is unnecessary), except when it would force us to emit a + // relocation which the target cannot encode. + + const MCSymbolData *A_Base = 0, *B_Base = 0; + if (const MCSymbolRefExpr *A = Target.getSymA()) { + // Modified symbol references cannot be resolved. + if (A->getKind() != MCSymbolRefExpr::VK_None) + return false; + + A_Base = Asm.getAtom(Layout, &Asm.getSymbolData(A->getSymbol())); + if (!A_Base) + return false; + } + + if (const MCSymbolRefExpr *B = Target.getSymB()) { + // Modified symbol references cannot be resolved. + if (B->getKind() != MCSymbolRefExpr::VK_None) + return false; + + B_Base = Asm.getAtom(Layout, &Asm.getSymbolData(B->getSymbol())); + if (!B_Base) + return false; + } + + // If there is no base, A and B have to be the same atom for this fixup to be + // fully resolved. + if (!BaseSymbol) + return A_Base == B_Base; + + // Otherwise, B must be missing and A must be the base. + return !B_Base && BaseSymbol == A_Base; +} + +bool MCAssembler::isSymbolLinkerVisible(const MCSymbol &Symbol) const { + // Non-temporary labels should always be visible to the linker. + if (!Symbol.isTemporary()) + return true; + + // Absolute temporary labels are never visible. + if (!Symbol.isInSection()) + return false; + + // Otherwise, check if the section requires symbols even for temporary labels. + return getBackend().doesSectionRequireSymbols(Symbol.getSection()); +} + +const MCSymbolData *MCAssembler::getAtom(const MCAsmLayout &Layout, + const MCSymbolData *SD) const { + // Linker visible symbols define atoms. + if (isSymbolLinkerVisible(SD->getSymbol())) + return SD; + + // Absolute and undefined symbols have no defining atom. + if (!SD->getFragment()) + return 0; + + // Non-linker visible symbols in sections which can't be atomized have no + // defining atom. + if (!getBackend().isSectionAtomizable( + SD->getFragment()->getParent()->getSection())) + return 0; + + // Otherwise, return the atom for the containing fragment. + return SD->getFragment()->getAtom(); +} + +bool MCAssembler::EvaluateFixup(const MCAsmLayout &Layout, + const MCFixup &Fixup, const MCFragment *DF, + MCValue &Target, uint64_t &Value) const { + ++stats::EvaluateFixup; + + if (!Fixup.getValue()->EvaluateAsRelocatable(Target, &Layout)) + report_fatal_error("expected relocatable expression"); + + // FIXME: How do non-scattered symbols work in ELF? I presume the linker + // doesn't support small relocations, but then under what criteria does the + // assembler allow symbol differences? + + Value = Target.getConstant(); + + bool IsPCRel = Emitter.getFixupKindInfo( + Fixup.getKind()).Flags & MCFixupKindInfo::FKF_IsPCRel; + bool IsResolved = true; + if (const MCSymbolRefExpr *A = Target.getSymA()) { + if (A->getSymbol().isDefined()) + Value += Layout.getSymbolAddress(&getSymbolData(A->getSymbol())); + else + IsResolved = false; + } + if (const MCSymbolRefExpr *B = Target.getSymB()) { + if (B->getSymbol().isDefined()) + Value -= Layout.getSymbolAddress(&getSymbolData(B->getSymbol())); + else + IsResolved = false; + } + + // If we are using scattered symbols, determine whether this value is actually + // resolved; scattering may cause atoms to move. + if (IsResolved && getBackend().hasScatteredSymbols()) { + if (getBackend().hasReliableSymbolDifference()) { + // If this is a PCrel relocation, find the base atom (identified by its + // symbol) that the fixup value is relative to. + const MCSymbolData *BaseSymbol = 0; + if (IsPCRel) { + BaseSymbol = DF->getAtom(); + if (!BaseSymbol) + IsResolved = false; + } + + if (IsResolved) + IsResolved = isScatteredFixupFullyResolved(*this, Layout, Fixup, Target, + BaseSymbol); + } else { + const MCSection *BaseSection = 0; + if (IsPCRel) + BaseSection = &DF->getParent()->getSection(); + + IsResolved = isScatteredFixupFullyResolvedSimple(*this, Fixup, Target, + BaseSection); + } + } + + if (IsPCRel) + Value -= Layout.getFragmentAddress(DF) + Fixup.getOffset(); + + return IsResolved; +} + +uint64_t MCAssembler::ComputeFragmentSize(MCAsmLayout &Layout, + const MCFragment &F, + uint64_t SectionAddress, + uint64_t FragmentOffset) const { + switch (F.getKind()) { + case MCFragment::FT_Data: + return cast<MCDataFragment>(F).getContents().size(); + case MCFragment::FT_Fill: + return cast<MCFillFragment>(F).getSize(); + case MCFragment::FT_Inst: + return cast<MCInstFragment>(F).getInstSize(); + + case MCFragment::FT_Align: { + const MCAlignFragment &AF = cast<MCAlignFragment>(F); + + assert((!AF.hasOnlyAlignAddress() || !AF.getNextNode()) && + "Invalid OnlyAlignAddress bit, not the last fragment!"); + + uint64_t Size = OffsetToAlignment(SectionAddress + FragmentOffset, + AF.getAlignment()); + + // Honor MaxBytesToEmit. + if (Size > AF.getMaxBytesToEmit()) + return 0; + + return Size; + } + + case MCFragment::FT_Org: { + const MCOrgFragment &OF = cast<MCOrgFragment>(F); + + // FIXME: We should compute this sooner, we don't want to recurse here, and + // we would like to be more functional. + int64_t TargetLocation; + if (!OF.getOffset().EvaluateAsAbsolute(TargetLocation, &Layout)) + report_fatal_error("expected assembly-time absolute expression"); + + // FIXME: We need a way to communicate this error. + int64_t Offset = TargetLocation - FragmentOffset; + if (Offset < 0) + report_fatal_error("invalid .org offset '" + Twine(TargetLocation) + + "' (at offset '" + Twine(FragmentOffset) + "'"); + + return Offset; + } + } + + assert(0 && "invalid fragment kind"); + return 0; +} + +void MCAsmLayout::LayoutFile() { + // Initialize the first section and set the valid fragment layout point. All + // actual layout computations are done lazily. + LastValidFragment = 0; + if (!getSectionOrder().empty()) + getSectionOrder().front()->Address = 0; +} + +void MCAsmLayout::LayoutFragment(MCFragment *F) { + MCFragment *Prev = F->getPrevNode(); + + // We should never try to recompute something which is up-to-date. + assert(!isFragmentUpToDate(F) && "Attempt to recompute up-to-date fragment!"); + // We should never try to compute the fragment layout if the section isn't + // up-to-date. + assert(isSectionUpToDate(F->getParent()) && + "Attempt to compute fragment before it's section!"); + // We should never try to compute the fragment layout if it's predecessor + // isn't up-to-date. + assert((!Prev || isFragmentUpToDate(Prev)) && + "Attempt to compute fragment before it's predecessor!"); + + ++stats::FragmentLayouts; + + // Compute the fragment start address. + uint64_t StartAddress = F->getParent()->Address; + uint64_t Address = StartAddress; + if (Prev) + Address += Prev->Offset + Prev->EffectiveSize; + + // Compute fragment offset and size. + F->Offset = Address - StartAddress; + F->EffectiveSize = getAssembler().ComputeFragmentSize(*this, *F, StartAddress, + F->Offset); + LastValidFragment = F; + + // If this is the last fragment in a section, update the next section address. + if (!F->getNextNode()) { + unsigned NextIndex = F->getParent()->getLayoutOrder() + 1; + if (NextIndex != getSectionOrder().size()) + LayoutSection(getSectionOrder()[NextIndex]); + } +} + +void MCAsmLayout::LayoutSection(MCSectionData *SD) { + unsigned SectionOrderIndex = SD->getLayoutOrder(); + + ++stats::SectionLayouts; + + // Compute the section start address. + uint64_t StartAddress = 0; + if (SectionOrderIndex) { + MCSectionData *Prev = getSectionOrder()[SectionOrderIndex - 1]; + StartAddress = getSectionAddress(Prev) + getSectionAddressSize(Prev); + } + + // Honor the section alignment requirements. + StartAddress = RoundUpToAlignment(StartAddress, SD->getAlignment()); + + // Set the section address. + SD->Address = StartAddress; +} + +/// WriteFragmentData - Write the \arg F data to the output file. +static void WriteFragmentData(const MCAssembler &Asm, const MCAsmLayout &Layout, + const MCFragment &F, MCObjectWriter *OW) { + uint64_t Start = OW->getStream().tell(); + (void) Start; + + ++stats::EmittedFragments; + + // FIXME: Embed in fragments instead? + uint64_t FragmentSize = Layout.getFragmentEffectiveSize(&F); + switch (F.getKind()) { + case MCFragment::FT_Align: { + MCAlignFragment &AF = cast<MCAlignFragment>(F); + uint64_t Count = FragmentSize / AF.getValueSize(); + + assert(AF.getValueSize() && "Invalid virtual align in concrete fragment!"); + + // FIXME: This error shouldn't actually occur (the front end should emit + // multiple .align directives to enforce the semantics it wants), but is + // severe enough that we want to report it. How to handle this? + if (Count * AF.getValueSize() != FragmentSize) + report_fatal_error("undefined .align directive, value size '" + + Twine(AF.getValueSize()) + + "' is not a divisor of padding size '" + + Twine(FragmentSize) + "'"); + + // See if we are aligning with nops, and if so do that first to try to fill + // the Count bytes. Then if that did not fill any bytes or there are any + // bytes left to fill use the the Value and ValueSize to fill the rest. + // If we are aligning with nops, ask that target to emit the right data. + if (AF.hasEmitNops()) { + if (!Asm.getBackend().WriteNopData(Count, OW)) + report_fatal_error("unable to write nop sequence of " + + Twine(Count) + " bytes"); + break; + } + + // Otherwise, write out in multiples of the value size. + for (uint64_t i = 0; i != Count; ++i) { + switch (AF.getValueSize()) { + default: + assert(0 && "Invalid size!"); + case 1: OW->Write8 (uint8_t (AF.getValue())); break; + case 2: OW->Write16(uint16_t(AF.getValue())); break; + case 4: OW->Write32(uint32_t(AF.getValue())); break; + case 8: OW->Write64(uint64_t(AF.getValue())); break; + } + } + break; + } + + case MCFragment::FT_Data: { + MCDataFragment &DF = cast<MCDataFragment>(F); + assert(FragmentSize == DF.getContents().size() && "Invalid size!"); + OW->WriteBytes(DF.getContents().str()); + break; + } + + case MCFragment::FT_Fill: { + MCFillFragment &FF = cast<MCFillFragment>(F); + + assert(FF.getValueSize() && "Invalid virtual align in concrete fragment!"); + + for (uint64_t i = 0, e = FF.getSize() / FF.getValueSize(); i != e; ++i) { + switch (FF.getValueSize()) { + default: + assert(0 && "Invalid size!"); + case 1: OW->Write8 (uint8_t (FF.getValue())); break; + case 2: OW->Write16(uint16_t(FF.getValue())); break; + case 4: OW->Write32(uint32_t(FF.getValue())); break; + case 8: OW->Write64(uint64_t(FF.getValue())); break; + } + } + break; + } + + case MCFragment::FT_Inst: + llvm_unreachable("unexpected inst fragment after lowering"); + break; + + case MCFragment::FT_Org: { + MCOrgFragment &OF = cast<MCOrgFragment>(F); + + for (uint64_t i = 0, e = FragmentSize; i != e; ++i) + OW->Write8(uint8_t(OF.getValue())); + + break; + } + } + + assert(OW->getStream().tell() - Start == FragmentSize); +} + +void MCAssembler::WriteSectionData(const MCSectionData *SD, + const MCAsmLayout &Layout, + MCObjectWriter *OW) const { + // Ignore virtual sections. + if (getBackend().isVirtualSection(SD->getSection())) { + assert(Layout.getSectionFileSize(SD) == 0 && "Invalid size for section!"); + + // Check that contents are only things legal inside a virtual section. + for (MCSectionData::const_iterator it = SD->begin(), + ie = SD->end(); it != ie; ++it) { + switch (it->getKind()) { + default: + assert(0 && "Invalid fragment in virtual section!"); + case MCFragment::FT_Align: + assert(!cast<MCAlignFragment>(it)->getValueSize() && + "Invalid align in virtual section!"); + break; + case MCFragment::FT_Fill: + assert(!cast<MCFillFragment>(it)->getValueSize() && + "Invalid fill in virtual section!"); + break; + } + } + + return; + } + + uint64_t Start = OW->getStream().tell(); + (void) Start; + + for (MCSectionData::const_iterator it = SD->begin(), + ie = SD->end(); it != ie; ++it) + WriteFragmentData(*this, Layout, *it, OW); + + assert(OW->getStream().tell() - Start == Layout.getSectionFileSize(SD)); +} + +void MCAssembler::Finish() { + DEBUG_WITH_TYPE("mc-dump", { + llvm::errs() << "assembler backend - pre-layout\n--\n"; + dump(); }); + + // Create the layout object. + MCAsmLayout Layout(*this); + + // Insert additional align fragments for concrete sections to explicitly pad + // the previous section to match their alignment requirements. This is for + // 'gas' compatibility, it shouldn't strictly be necessary. + // + // FIXME: This may be Mach-O specific. + for (unsigned i = 1, e = Layout.getSectionOrder().size(); i < e; ++i) { + MCSectionData *SD = Layout.getSectionOrder()[i]; + + // Ignore sections without alignment requirements. + unsigned Align = SD->getAlignment(); + if (Align <= 1) + continue; + + // Ignore virtual sections, they don't cause file size modifications. + if (getBackend().isVirtualSection(SD->getSection())) + continue; + + // Otherwise, create a new align fragment at the end of the previous + // section. + MCAlignFragment *AF = new MCAlignFragment(Align, 0, 1, Align, + Layout.getSectionOrder()[i - 1]); + AF->setOnlyAlignAddress(true); + } + + // Create dummy fragments and assign section ordinals. + unsigned SectionIndex = 0; + for (MCAssembler::iterator it = begin(), ie = end(); it != ie; ++it) { + // Create dummy fragments to eliminate any empty sections, this simplifies + // layout. + if (it->getFragmentList().empty()) + new MCFillFragment(0, 1, 0, it); + + it->setOrdinal(SectionIndex++); + } + + // Assign layout order indices to sections and fragments. + unsigned FragmentIndex = 0; + for (unsigned i = 0, e = Layout.getSectionOrder().size(); i != e; ++i) { + MCSectionData *SD = Layout.getSectionOrder()[i]; + SD->setLayoutOrder(i); + + for (MCSectionData::iterator it2 = SD->begin(), + ie2 = SD->end(); it2 != ie2; ++it2) + it2->setLayoutOrder(FragmentIndex++); + } + + // Layout until everything fits. + while (LayoutOnce(Layout)) + continue; + + DEBUG_WITH_TYPE("mc-dump", { + llvm::errs() << "assembler backend - post-relaxation\n--\n"; + dump(); }); + + // Finalize the layout, including fragment lowering. + FinishLayout(Layout); + + DEBUG_WITH_TYPE("mc-dump", { + llvm::errs() << "assembler backend - final-layout\n--\n"; + dump(); }); + + uint64_t StartOffset = OS.tell(); + llvm::OwningPtr<MCObjectWriter> Writer(getBackend().createObjectWriter(OS)); + if (!Writer) + report_fatal_error("unable to create object writer!"); + + // Allow the object writer a chance to perform post-layout binding (for + // example, to set the index fields in the symbol data). + Writer->ExecutePostLayoutBinding(*this); + + // Evaluate and apply the fixups, generating relocation entries as necessary. + for (MCAssembler::iterator it = begin(), ie = end(); it != ie; ++it) { + for (MCSectionData::iterator it2 = it->begin(), + ie2 = it->end(); it2 != ie2; ++it2) { + MCDataFragment *DF = dyn_cast<MCDataFragment>(it2); + if (!DF) + continue; + + for (MCDataFragment::fixup_iterator it3 = DF->fixup_begin(), + ie3 = DF->fixup_end(); it3 != ie3; ++it3) { + MCFixup &Fixup = *it3; + + // Evaluate the fixup. + MCValue Target; + uint64_t FixedValue; + if (!EvaluateFixup(Layout, Fixup, DF, Target, FixedValue)) { + // The fixup was unresolved, we need a relocation. Inform the object + // writer of the relocation, and give it an opportunity to adjust the + // fixup value if need be. + Writer->RecordRelocation(*this, Layout, DF, Fixup, Target,FixedValue); + } + + getBackend().ApplyFixup(Fixup, *DF, FixedValue); + } + } + } + + // Write the object file. + Writer->WriteObject(*this, Layout); + + stats::ObjectBytes += OS.tell() - StartOffset; +} + +bool MCAssembler::FixupNeedsRelaxation(const MCFixup &Fixup, + const MCFragment *DF, + const MCAsmLayout &Layout) const { + if (getRelaxAll()) + return true; + + // If we cannot resolve the fixup value, it requires relaxation. + MCValue Target; + uint64_t Value; + if (!EvaluateFixup(Layout, Fixup, DF, Target, Value)) + return true; + + // Otherwise, relax if the value is too big for a (signed) i8. + // + // FIXME: This is target dependent! + return int64_t(Value) != int64_t(int8_t(Value)); +} + +bool MCAssembler::FragmentNeedsRelaxation(const MCInstFragment *IF, + const MCAsmLayout &Layout) const { + // If this inst doesn't ever need relaxation, ignore it. This occurs when we + // are intentionally pushing out inst fragments, or because we relaxed a + // previous instruction to one that doesn't need relaxation. + if (!getBackend().MayNeedRelaxation(IF->getInst())) + return false; + + for (MCInstFragment::const_fixup_iterator it = IF->fixup_begin(), + ie = IF->fixup_end(); it != ie; ++it) + if (FixupNeedsRelaxation(*it, IF, Layout)) + return true; + + return false; +} + +bool MCAssembler::LayoutOnce(MCAsmLayout &Layout) { + ++stats::RelaxationSteps; + + // Layout the sections in order. + Layout.LayoutFile(); + + // Scan for fragments that need relaxation. + bool WasRelaxed = false; + for (iterator it = begin(), ie = end(); it != ie; ++it) { + MCSectionData &SD = *it; + + for (MCSectionData::iterator it2 = SD.begin(), + ie2 = SD.end(); it2 != ie2; ++it2) { + // Check if this is an instruction fragment that needs relaxation. + MCInstFragment *IF = dyn_cast<MCInstFragment>(it2); + if (!IF || !FragmentNeedsRelaxation(IF, Layout)) + continue; + + ++stats::RelaxedInstructions; + + // FIXME-PERF: We could immediately lower out instructions if we can tell + // they are fully resolved, to avoid retesting on later passes. + + // Relax the fragment. + + MCInst Relaxed; + getBackend().RelaxInstruction(IF->getInst(), Relaxed); + + // Encode the new instruction. + // + // FIXME-PERF: If it matters, we could let the target do this. It can + // probably do so more efficiently in many cases. + SmallVector<MCFixup, 4> Fixups; + SmallString<256> Code; + raw_svector_ostream VecOS(Code); + getEmitter().EncodeInstruction(Relaxed, VecOS, Fixups); + VecOS.flush(); + + // Update the instruction fragment. + int SlideAmount = Code.size() - IF->getInstSize(); + IF->setInst(Relaxed); + IF->getCode() = Code; + IF->getFixups().clear(); + // FIXME: Eliminate copy. + for (unsigned i = 0, e = Fixups.size(); i != e; ++i) + IF->getFixups().push_back(Fixups[i]); + + // Update the layout, and remember that we relaxed. + Layout.UpdateForSlide(IF, SlideAmount); + WasRelaxed = true; + } + } + + return WasRelaxed; +} + +void MCAssembler::FinishLayout(MCAsmLayout &Layout) { + // Lower out any instruction fragments, to simplify the fixup application and + // output. + // + // FIXME-PERF: We don't have to do this, but the assumption is that it is + // cheap (we will mostly end up eliminating fragments and appending on to data + // fragments), so the extra complexity downstream isn't worth it. Evaluate + // this assumption. + for (iterator it = begin(), ie = end(); it != ie; ++it) { + MCSectionData &SD = *it; + + for (MCSectionData::iterator it2 = SD.begin(), + ie2 = SD.end(); it2 != ie2; ++it2) { + MCInstFragment *IF = dyn_cast<MCInstFragment>(it2); + if (!IF) + continue; + + // Create a new data fragment for the instruction. + // + // FIXME-PERF: Reuse previous data fragment if possible. + MCDataFragment *DF = new MCDataFragment(); + SD.getFragmentList().insert(it2, DF); + + // Update the data fragments layout data. + DF->setParent(IF->getParent()); + DF->setAtom(IF->getAtom()); + DF->setLayoutOrder(IF->getLayoutOrder()); + Layout.FragmentReplaced(IF, DF); + + // Copy in the data and the fixups. + DF->getContents().append(IF->getCode().begin(), IF->getCode().end()); + for (unsigned i = 0, e = IF->getFixups().size(); i != e; ++i) + DF->getFixups().push_back(IF->getFixups()[i]); + + // Delete the instruction fragment and update the iterator. + SD.getFragmentList().erase(IF); + it2 = DF; + } + } +} + +// Debugging methods + +namespace llvm { + +raw_ostream &operator<<(raw_ostream &OS, const MCFixup &AF) { + OS << "<MCFixup" << " Offset:" << AF.getOffset() + << " Value:" << *AF.getValue() + << " Kind:" << AF.getKind() << ">"; + return OS; +} + +} + +void MCFragment::dump() { + raw_ostream &OS = llvm::errs(); + + OS << "<"; + switch (getKind()) { + case MCFragment::FT_Align: OS << "MCAlignFragment"; break; + case MCFragment::FT_Data: OS << "MCDataFragment"; break; + case MCFragment::FT_Fill: OS << "MCFillFragment"; break; + case MCFragment::FT_Inst: OS << "MCInstFragment"; break; + case MCFragment::FT_Org: OS << "MCOrgFragment"; break; + } + + OS << "<MCFragment " << (void*) this << " LayoutOrder:" << LayoutOrder + << " Offset:" << Offset << " EffectiveSize:" << EffectiveSize << ">"; + + switch (getKind()) { + case MCFragment::FT_Align: { + const MCAlignFragment *AF = cast<MCAlignFragment>(this); + if (AF->hasEmitNops()) + OS << " (emit nops)"; + if (AF->hasOnlyAlignAddress()) + OS << " (only align section)"; + OS << "\n "; + OS << " Alignment:" << AF->getAlignment() + << " Value:" << AF->getValue() << " ValueSize:" << AF->getValueSize() + << " MaxBytesToEmit:" << AF->getMaxBytesToEmit() << ">"; + break; + } + case MCFragment::FT_Data: { + const MCDataFragment *DF = cast<MCDataFragment>(this); + OS << "\n "; + OS << " Contents:["; + const SmallVectorImpl<char> &Contents = DF->getContents(); + for (unsigned i = 0, e = Contents.size(); i != e; ++i) { + if (i) OS << ","; + OS << hexdigit((Contents[i] >> 4) & 0xF) << hexdigit(Contents[i] & 0xF); + } + OS << "] (" << Contents.size() << " bytes)"; + + if (!DF->getFixups().empty()) { + OS << ",\n "; + OS << " Fixups:["; + for (MCDataFragment::const_fixup_iterator it = DF->fixup_begin(), + ie = DF->fixup_end(); it != ie; ++it) { + if (it != DF->fixup_begin()) OS << ",\n "; + OS << *it; + } + OS << "]"; + } + break; + } + case MCFragment::FT_Fill: { + const MCFillFragment *FF = cast<MCFillFragment>(this); + OS << " Value:" << FF->getValue() << " ValueSize:" << FF->getValueSize() + << " Size:" << FF->getSize(); + break; + } + case MCFragment::FT_Inst: { + const MCInstFragment *IF = cast<MCInstFragment>(this); + OS << "\n "; + OS << " Inst:"; + IF->getInst().dump_pretty(OS); + break; + } + case MCFragment::FT_Org: { + const MCOrgFragment *OF = cast<MCOrgFragment>(this); + OS << "\n "; + OS << " Offset:" << OF->getOffset() << " Value:" << OF->getValue(); + break; + } + } + OS << ">"; +} + +void MCSectionData::dump() { + raw_ostream &OS = llvm::errs(); + + OS << "<MCSectionData"; + OS << " Alignment:" << getAlignment() << " Address:" << Address + << " Fragments:[\n "; + for (iterator it = begin(), ie = end(); it != ie; ++it) { + if (it != begin()) OS << ",\n "; + it->dump(); + } + OS << "]>"; +} + +void MCSymbolData::dump() { + raw_ostream &OS = llvm::errs(); + + OS << "<MCSymbolData Symbol:" << getSymbol() + << " Fragment:" << getFragment() << " Offset:" << getOffset() + << " Flags:" << getFlags() << " Index:" << getIndex(); + if (isCommon()) + OS << " (common, size:" << getCommonSize() + << " align: " << getCommonAlignment() << ")"; + if (isExternal()) + OS << " (external)"; + if (isPrivateExtern()) + OS << " (private extern)"; + OS << ">"; +} + +void MCAssembler::dump() { + raw_ostream &OS = llvm::errs(); + + OS << "<MCAssembler\n"; + OS << " Sections:[\n "; + for (iterator it = begin(), ie = end(); it != ie; ++it) { + if (it != begin()) OS << ",\n "; + it->dump(); + } + OS << "],\n"; + OS << " Symbols:["; + + for (symbol_iterator it = symbol_begin(), ie = symbol_end(); it != ie; ++it) { + if (it != symbol_begin()) OS << ",\n "; + it->dump(); + } + OS << "]>\n"; +} diff --git a/contrib/llvm/lib/MC/MCCodeEmitter.cpp b/contrib/llvm/lib/MC/MCCodeEmitter.cpp new file mode 100644 index 0000000..d513237 --- /dev/null +++ b/contrib/llvm/lib/MC/MCCodeEmitter.cpp @@ -0,0 +1,30 @@ +//===-- MCCodeEmitter.cpp - Instruction Encoding --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCCodeEmitter.h" + +using namespace llvm; + +MCCodeEmitter::MCCodeEmitter() { +} + +MCCodeEmitter::~MCCodeEmitter() { +} + +const MCFixupKindInfo &MCCodeEmitter::getFixupKindInfo(MCFixupKind Kind) const { + static const MCFixupKindInfo Builtins[] = { + { "FK_Data_1", 0, 8, 0 }, + { "FK_Data_2", 0, 16, 0 }, + { "FK_Data_4", 0, 32, 0 }, + { "FK_Data_8", 0, 64, 0 } + }; + + assert(Kind <= 3 && "Unknown fixup kind"); + return Builtins[Kind]; +} diff --git a/contrib/llvm/lib/MC/MCContext.cpp b/contrib/llvm/lib/MC/MCContext.cpp new file mode 100644 index 0000000..1137064 --- /dev/null +++ b/contrib/llvm/lib/MC/MCContext.cpp @@ -0,0 +1,183 @@ +//===- lib/MC/MCContext.cpp - Machine Code Context ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCSectionCOFF.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCLabel.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/Twine.h" +using namespace llvm; + +typedef StringMap<const MCSectionMachO*> MachOUniqueMapTy; +typedef StringMap<const MCSectionELF*> ELFUniqueMapTy; +typedef StringMap<const MCSectionCOFF*> COFFUniqueMapTy; + + +MCContext::MCContext(const MCAsmInfo &mai) : MAI(mai), NextUniqueID(0) { + MachOUniquingMap = 0; + ELFUniquingMap = 0; + COFFUniquingMap = 0; + + SecureLogFile = getenv("AS_SECURE_LOG_FILE"); + SecureLog = 0; + SecureLogUsed = false; +} + +MCContext::~MCContext() { + // NOTE: The symbols are all allocated out of a bump pointer allocator, + // we don't need to free them here. + + // If we have the MachO uniquing map, free it. + delete (MachOUniqueMapTy*)MachOUniquingMap; + delete (ELFUniqueMapTy*)ELFUniquingMap; + delete (COFFUniqueMapTy*)COFFUniquingMap; + + // If the stream for the .secure_log_unique directive was created free it. + delete (raw_ostream*)SecureLog; +} + +//===----------------------------------------------------------------------===// +// Symbol Manipulation +//===----------------------------------------------------------------------===// + +MCSymbol *MCContext::GetOrCreateSymbol(StringRef Name) { + assert(!Name.empty() && "Normal symbols cannot be unnamed!"); + + // Determine whether this is an assembler temporary or normal label. + bool isTemporary = Name.startswith(MAI.getPrivateGlobalPrefix()); + + // Do the lookup and get the entire StringMapEntry. We want access to the + // key if we are creating the entry. + StringMapEntry<MCSymbol*> &Entry = Symbols.GetOrCreateValue(Name); + if (Entry.getValue()) return Entry.getValue(); + + // Ok, the entry doesn't already exist. Have the MCSymbol object itself refer + // to the copy of the string that is embedded in the StringMapEntry. + MCSymbol *Result = new (*this) MCSymbol(Entry.getKey(), isTemporary); + Entry.setValue(Result); + return Result; +} + +MCSymbol *MCContext::GetOrCreateSymbol(const Twine &Name) { + SmallString<128> NameSV; + Name.toVector(NameSV); + return GetOrCreateSymbol(NameSV.str()); +} + +MCSymbol *MCContext::CreateTempSymbol() { + return GetOrCreateSymbol(Twine(MAI.getPrivateGlobalPrefix()) + + "tmp" + Twine(NextUniqueID++)); +} + +unsigned MCContext::NextInstance(int64_t LocalLabelVal) { + MCLabel *&Label = Instances[LocalLabelVal]; + if (!Label) + Label = new (*this) MCLabel(0); + return Label->incInstance(); +} + +unsigned MCContext::GetInstance(int64_t LocalLabelVal) { + MCLabel *&Label = Instances[LocalLabelVal]; + if (!Label) + Label = new (*this) MCLabel(0); + return Label->getInstance(); +} + +MCSymbol *MCContext::CreateDirectionalLocalSymbol(int64_t LocalLabelVal) { + return GetOrCreateSymbol(Twine(MAI.getPrivateGlobalPrefix()) + + Twine(LocalLabelVal) + + "\2" + + Twine(NextInstance(LocalLabelVal))); +} +MCSymbol *MCContext::GetDirectionalLocalSymbol(int64_t LocalLabelVal, + int bORf) { + return GetOrCreateSymbol(Twine(MAI.getPrivateGlobalPrefix()) + + Twine(LocalLabelVal) + + "\2" + + Twine(GetInstance(LocalLabelVal) + bORf)); +} + +MCSymbol *MCContext::LookupSymbol(StringRef Name) const { + return Symbols.lookup(Name); +} + +//===----------------------------------------------------------------------===// +// Section Management +//===----------------------------------------------------------------------===// + +const MCSectionMachO *MCContext:: +getMachOSection(StringRef Segment, StringRef Section, + unsigned TypeAndAttributes, + unsigned Reserved2, SectionKind Kind) { + + // We unique sections by their segment/section pair. The returned section + // may not have the same flags as the requested section, if so this should be + // diagnosed by the client as an error. + + // Create the map if it doesn't already exist. + if (MachOUniquingMap == 0) + MachOUniquingMap = new MachOUniqueMapTy(); + MachOUniqueMapTy &Map = *(MachOUniqueMapTy*)MachOUniquingMap; + + // Form the name to look up. + SmallString<64> Name; + Name += Segment; + Name.push_back(','); + Name += Section; + + // Do the lookup, if we have a hit, return it. + const MCSectionMachO *&Entry = Map[Name.str()]; + if (Entry) return Entry; + + // Otherwise, return a new section. + return Entry = new (*this) MCSectionMachO(Segment, Section, TypeAndAttributes, + Reserved2, Kind); +} + + +const MCSection *MCContext:: +getELFSection(StringRef Section, unsigned Type, unsigned Flags, + SectionKind Kind, bool IsExplicit) { + if (ELFUniquingMap == 0) + ELFUniquingMap = new ELFUniqueMapTy(); + ELFUniqueMapTy &Map = *(ELFUniqueMapTy*)ELFUniquingMap; + + // Do the lookup, if we have a hit, return it. + StringMapEntry<const MCSectionELF*> &Entry = Map.GetOrCreateValue(Section); + if (Entry.getValue()) return Entry.getValue(); + + MCSectionELF *Result = new (*this) MCSectionELF(Entry.getKey(), Type, Flags, + Kind, IsExplicit); + Entry.setValue(Result); + return Result; +} + +const MCSection *MCContext::getCOFFSection(StringRef Section, + unsigned Characteristics, + int Selection, + SectionKind Kind) { + if (COFFUniquingMap == 0) + COFFUniquingMap = new COFFUniqueMapTy(); + COFFUniqueMapTy &Map = *(COFFUniqueMapTy*)COFFUniquingMap; + + // Do the lookup, if we have a hit, return it. + StringMapEntry<const MCSectionCOFF*> &Entry = Map.GetOrCreateValue(Section); + if (Entry.getValue()) return Entry.getValue(); + + MCSectionCOFF *Result = new (*this) MCSectionCOFF(Entry.getKey(), + Characteristics, + Selection, Kind); + + Entry.setValue(Result); + return Result; +} diff --git a/contrib/llvm/lib/MC/MCDisassembler.cpp b/contrib/llvm/lib/MC/MCDisassembler.cpp new file mode 100644 index 0000000..0809690 --- /dev/null +++ b/contrib/llvm/lib/MC/MCDisassembler.cpp @@ -0,0 +1,14 @@ +//===-- lib/MC/MCDisassembler.cpp - Disassembler interface ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCDisassembler.h" +using namespace llvm; + +MCDisassembler::~MCDisassembler() { +} diff --git a/contrib/llvm/lib/MC/MCExpr.cpp b/contrib/llvm/lib/MC/MCExpr.cpp new file mode 100644 index 0000000..343f334 --- /dev/null +++ b/contrib/llvm/lib/MC/MCExpr.cpp @@ -0,0 +1,382 @@ +//===- MCExpr.cpp - Assembly Level Expression Implementation --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "mcexpr" +#include "llvm/MC/MCExpr.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/MC/MCAsmLayout.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCValue.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetAsmBackend.h" +using namespace llvm; + +namespace { +namespace stats { +STATISTIC(MCExprEvaluate, "Number of MCExpr evaluations"); +} +} + +void MCExpr::print(raw_ostream &OS) const { + switch (getKind()) { + case MCExpr::Target: + return cast<MCTargetExpr>(this)->PrintImpl(OS); + case MCExpr::Constant: + OS << cast<MCConstantExpr>(*this).getValue(); + return; + + case MCExpr::SymbolRef: { + const MCSymbolRefExpr &SRE = cast<MCSymbolRefExpr>(*this); + const MCSymbol &Sym = SRE.getSymbol(); + + if (SRE.getKind() == MCSymbolRefExpr::VK_ARM_HI16 || + SRE.getKind() == MCSymbolRefExpr::VK_ARM_LO16) + OS << MCSymbolRefExpr::getVariantKindName(SRE.getKind()); + + // Parenthesize names that start with $ so that they don't look like + // absolute names. + if (Sym.getName()[0] == '$') + OS << '(' << Sym << ')'; + else + OS << Sym; + + if (SRE.getKind() != MCSymbolRefExpr::VK_None && + SRE.getKind() != MCSymbolRefExpr::VK_ARM_HI16 && + SRE.getKind() != MCSymbolRefExpr::VK_ARM_LO16) + OS << '@' << MCSymbolRefExpr::getVariantKindName(SRE.getKind()); + + return; + } + + case MCExpr::Unary: { + const MCUnaryExpr &UE = cast<MCUnaryExpr>(*this); + switch (UE.getOpcode()) { + default: assert(0 && "Invalid opcode!"); + case MCUnaryExpr::LNot: OS << '!'; break; + case MCUnaryExpr::Minus: OS << '-'; break; + case MCUnaryExpr::Not: OS << '~'; break; + case MCUnaryExpr::Plus: OS << '+'; break; + } + OS << *UE.getSubExpr(); + return; + } + + case MCExpr::Binary: { + const MCBinaryExpr &BE = cast<MCBinaryExpr>(*this); + + // Only print parens around the LHS if it is non-trivial. + if (isa<MCConstantExpr>(BE.getLHS()) || isa<MCSymbolRefExpr>(BE.getLHS())) { + OS << *BE.getLHS(); + } else { + OS << '(' << *BE.getLHS() << ')'; + } + + switch (BE.getOpcode()) { + default: assert(0 && "Invalid opcode!"); + case MCBinaryExpr::Add: + // Print "X-42" instead of "X+-42". + if (const MCConstantExpr *RHSC = dyn_cast<MCConstantExpr>(BE.getRHS())) { + if (RHSC->getValue() < 0) { + OS << RHSC->getValue(); + return; + } + } + + OS << '+'; + break; + case MCBinaryExpr::And: OS << '&'; break; + case MCBinaryExpr::Div: OS << '/'; break; + case MCBinaryExpr::EQ: OS << "=="; break; + case MCBinaryExpr::GT: OS << '>'; break; + case MCBinaryExpr::GTE: OS << ">="; break; + case MCBinaryExpr::LAnd: OS << "&&"; break; + case MCBinaryExpr::LOr: OS << "||"; break; + case MCBinaryExpr::LT: OS << '<'; break; + case MCBinaryExpr::LTE: OS << "<="; break; + case MCBinaryExpr::Mod: OS << '%'; break; + case MCBinaryExpr::Mul: OS << '*'; break; + case MCBinaryExpr::NE: OS << "!="; break; + case MCBinaryExpr::Or: OS << '|'; break; + case MCBinaryExpr::Shl: OS << "<<"; break; + case MCBinaryExpr::Shr: OS << ">>"; break; + case MCBinaryExpr::Sub: OS << '-'; break; + case MCBinaryExpr::Xor: OS << '^'; break; + } + + // Only print parens around the LHS if it is non-trivial. + if (isa<MCConstantExpr>(BE.getRHS()) || isa<MCSymbolRefExpr>(BE.getRHS())) { + OS << *BE.getRHS(); + } else { + OS << '(' << *BE.getRHS() << ')'; + } + return; + } + } + + assert(0 && "Invalid expression kind!"); +} + +void MCExpr::dump() const { + print(dbgs()); + dbgs() << '\n'; +} + +/* *** */ + +const MCBinaryExpr *MCBinaryExpr::Create(Opcode Opc, const MCExpr *LHS, + const MCExpr *RHS, MCContext &Ctx) { + return new (Ctx) MCBinaryExpr(Opc, LHS, RHS); +} + +const MCUnaryExpr *MCUnaryExpr::Create(Opcode Opc, const MCExpr *Expr, + MCContext &Ctx) { + return new (Ctx) MCUnaryExpr(Opc, Expr); +} + +const MCConstantExpr *MCConstantExpr::Create(int64_t Value, MCContext &Ctx) { + return new (Ctx) MCConstantExpr(Value); +} + +/* *** */ + +const MCSymbolRefExpr *MCSymbolRefExpr::Create(const MCSymbol *Sym, + VariantKind Kind, + MCContext &Ctx) { + return new (Ctx) MCSymbolRefExpr(Sym, Kind); +} + +const MCSymbolRefExpr *MCSymbolRefExpr::Create(StringRef Name, VariantKind Kind, + MCContext &Ctx) { + return Create(Ctx.GetOrCreateSymbol(Name), Kind, Ctx); +} + +StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) { + switch (Kind) { + default: + case VK_Invalid: return "<<invalid>>"; + case VK_None: return "<<none>>"; + + case VK_GOT: return "GOT"; + case VK_GOTOFF: return "GOTOFF"; + case VK_GOTPCREL: return "GOTPCREL"; + case VK_GOTTPOFF: return "GOTTPOFF"; + case VK_INDNTPOFF: return "INDNTPOFF"; + case VK_NTPOFF: return "NTPOFF"; + case VK_PLT: return "PLT"; + case VK_TLSGD: return "TLSGD"; + case VK_TPOFF: return "TPOFF"; + case VK_ARM_HI16: return ":upper16:"; + case VK_ARM_LO16: return ":lower16:"; + case VK_TLVP: return "TLVP"; + } +} + +MCSymbolRefExpr::VariantKind +MCSymbolRefExpr::getVariantKindForName(StringRef Name) { + return StringSwitch<VariantKind>(Name) + .Case("GOT", VK_GOT) + .Case("GOTOFF", VK_GOTOFF) + .Case("GOTPCREL", VK_GOTPCREL) + .Case("GOTTPOFF", VK_GOTTPOFF) + .Case("INDNTPOFF", VK_INDNTPOFF) + .Case("NTPOFF", VK_NTPOFF) + .Case("PLT", VK_PLT) + .Case("TLSGD", VK_TLSGD) + .Case("TPOFF", VK_TPOFF) + .Case("TLVP", VK_TLVP) + .Default(VK_Invalid); +} + +/* *** */ + +void MCTargetExpr::Anchor() {} + +/* *** */ + +bool MCExpr::EvaluateAsAbsolute(int64_t &Res, const MCAsmLayout *Layout) const { + MCValue Value; + + // Fast path constants. + if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(this)) { + Res = CE->getValue(); + return true; + } + + if (!EvaluateAsRelocatable(Value, Layout) || !Value.isAbsolute()) + return false; + + Res = Value.getConstant(); + return true; +} + +static bool EvaluateSymbolicAdd(const MCValue &LHS,const MCSymbolRefExpr *RHS_A, + const MCSymbolRefExpr *RHS_B, int64_t RHS_Cst, + MCValue &Res) { + // We can't add or subtract two symbols. + if ((LHS.getSymA() && RHS_A) || + (LHS.getSymB() && RHS_B)) + return false; + + const MCSymbolRefExpr *A = LHS.getSymA() ? LHS.getSymA() : RHS_A; + const MCSymbolRefExpr *B = LHS.getSymB() ? LHS.getSymB() : RHS_B; + if (B) { + // If we have a negated symbol, then we must have also have a non-negated + // symbol in order to encode the expression. We can do this check later to + // permit expressions which eventually fold to a representable form -- such + // as (a + (0 - b)) -- if necessary. + if (!A) + return false; + } + Res = MCValue::get(A, B, LHS.getConstant() + RHS_Cst); + return true; +} + +bool MCExpr::EvaluateAsRelocatable(MCValue &Res, + const MCAsmLayout *Layout) const { + ++stats::MCExprEvaluate; + + switch (getKind()) { + case Target: + return cast<MCTargetExpr>(this)->EvaluateAsRelocatableImpl(Res, Layout); + + case Constant: + Res = MCValue::get(cast<MCConstantExpr>(this)->getValue()); + return true; + + case SymbolRef: { + const MCSymbolRefExpr *SRE = cast<MCSymbolRefExpr>(this); + const MCSymbol &Sym = SRE->getSymbol(); + + // Evaluate recursively if this is a variable. + if (Sym.isVariable()) { + if (!Sym.getVariableValue()->EvaluateAsRelocatable(Res, Layout)) + return false; + + // Absolutize symbol differences between defined symbols when we have a + // layout object and the target requests it. + if (Layout && Res.getSymB() && + Layout->getAssembler().getBackend().hasAbsolutizedSet() && + Res.getSymA()->getSymbol().isDefined() && + Res.getSymB()->getSymbol().isDefined()) { + MCSymbolData &A = + Layout->getAssembler().getSymbolData(Res.getSymA()->getSymbol()); + MCSymbolData &B = + Layout->getAssembler().getSymbolData(Res.getSymB()->getSymbol()); + Res = MCValue::get(+ Layout->getSymbolAddress(&A) + - Layout->getSymbolAddress(&B) + + Res.getConstant()); + } + + return true; + } + + Res = MCValue::get(SRE, 0, 0); + return true; + } + + case Unary: { + const MCUnaryExpr *AUE = cast<MCUnaryExpr>(this); + MCValue Value; + + if (!AUE->getSubExpr()->EvaluateAsRelocatable(Value, Layout)) + return false; + + switch (AUE->getOpcode()) { + case MCUnaryExpr::LNot: + if (!Value.isAbsolute()) + return false; + Res = MCValue::get(!Value.getConstant()); + break; + case MCUnaryExpr::Minus: + /// -(a - b + const) ==> (b - a - const) + if (Value.getSymA() && !Value.getSymB()) + return false; + Res = MCValue::get(Value.getSymB(), Value.getSymA(), + -Value.getConstant()); + break; + case MCUnaryExpr::Not: + if (!Value.isAbsolute()) + return false; + Res = MCValue::get(~Value.getConstant()); + break; + case MCUnaryExpr::Plus: + Res = Value; + break; + } + + return true; + } + + case Binary: { + const MCBinaryExpr *ABE = cast<MCBinaryExpr>(this); + MCValue LHSValue, RHSValue; + + if (!ABE->getLHS()->EvaluateAsRelocatable(LHSValue, Layout) || + !ABE->getRHS()->EvaluateAsRelocatable(RHSValue, Layout)) + return false; + + // We only support a few operations on non-constant expressions, handle + // those first. + if (!LHSValue.isAbsolute() || !RHSValue.isAbsolute()) { + switch (ABE->getOpcode()) { + default: + return false; + case MCBinaryExpr::Sub: + // Negate RHS and add. + return EvaluateSymbolicAdd(LHSValue, + RHSValue.getSymB(), RHSValue.getSymA(), + -RHSValue.getConstant(), + Res); + + case MCBinaryExpr::Add: + return EvaluateSymbolicAdd(LHSValue, + RHSValue.getSymA(), RHSValue.getSymB(), + RHSValue.getConstant(), + Res); + } + } + + // FIXME: We need target hooks for the evaluation. It may be limited in + // width, and gas defines the result of comparisons and right shifts + // differently from Apple as. + int64_t LHS = LHSValue.getConstant(), RHS = RHSValue.getConstant(); + int64_t Result = 0; + switch (ABE->getOpcode()) { + case MCBinaryExpr::Add: Result = LHS + RHS; break; + case MCBinaryExpr::And: Result = LHS & RHS; break; + case MCBinaryExpr::Div: Result = LHS / RHS; break; + case MCBinaryExpr::EQ: Result = LHS == RHS; break; + case MCBinaryExpr::GT: Result = LHS > RHS; break; + case MCBinaryExpr::GTE: Result = LHS >= RHS; break; + case MCBinaryExpr::LAnd: Result = LHS && RHS; break; + case MCBinaryExpr::LOr: Result = LHS || RHS; break; + case MCBinaryExpr::LT: Result = LHS < RHS; break; + case MCBinaryExpr::LTE: Result = LHS <= RHS; break; + case MCBinaryExpr::Mod: Result = LHS % RHS; break; + case MCBinaryExpr::Mul: Result = LHS * RHS; break; + case MCBinaryExpr::NE: Result = LHS != RHS; break; + case MCBinaryExpr::Or: Result = LHS | RHS; break; + case MCBinaryExpr::Shl: Result = LHS << RHS; break; + case MCBinaryExpr::Shr: Result = LHS >> RHS; break; + case MCBinaryExpr::Sub: Result = LHS - RHS; break; + case MCBinaryExpr::Xor: Result = LHS ^ RHS; break; + } + + Res = MCValue::get(Result); + return true; + } + } + + assert(0 && "Invalid assembly expression kind!"); + return false; +} diff --git a/contrib/llvm/lib/MC/MCInst.cpp b/contrib/llvm/lib/MC/MCInst.cpp new file mode 100644 index 0000000..4cb628b --- /dev/null +++ b/contrib/llvm/lib/MC/MCInst.cpp @@ -0,0 +1,66 @@ +//===- lib/MC/MCInst.cpp - MCInst implementation --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +void MCOperand::print(raw_ostream &OS, const MCAsmInfo *MAI) const { + OS << "<MCOperand "; + if (!isValid()) + OS << "INVALID"; + else if (isReg()) + OS << "Reg:" << getReg(); + else if (isImm()) + OS << "Imm:" << getImm(); + else if (isExpr()) { + OS << "Expr:(" << *getExpr() << ")"; + } else + OS << "UNDEFINED"; + OS << ">"; +} + +void MCOperand::dump() const { + print(dbgs(), 0); + dbgs() << "\n"; +} + +void MCInst::print(raw_ostream &OS, const MCAsmInfo *MAI) const { + OS << "<MCInst " << getOpcode(); + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + OS << " "; + getOperand(i).print(OS, MAI); + } + OS << ">"; +} + +void MCInst::dump_pretty(raw_ostream &OS, const MCAsmInfo *MAI, + const MCInstPrinter *Printer, + StringRef Separator) const { + OS << "<MCInst #" << getOpcode(); + + // Show the instruction opcode name if we have access to a printer. + if (Printer) + OS << ' ' << Printer->getOpcodeName(getOpcode()); + + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + OS << Separator; + getOperand(i).print(OS, MAI); + } + OS << ">"; +} + +void MCInst::dump() const { + print(dbgs(), 0); + dbgs() << "\n"; +} diff --git a/contrib/llvm/lib/MC/MCInstPrinter.cpp b/contrib/llvm/lib/MC/MCInstPrinter.cpp new file mode 100644 index 0000000..92a7154 --- /dev/null +++ b/contrib/llvm/lib/MC/MCInstPrinter.cpp @@ -0,0 +1,21 @@ +//===-- MCInstPrinter.cpp - Convert an MCInst to target assembly syntax ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/ADT/StringRef.h" +using namespace llvm; + +MCInstPrinter::~MCInstPrinter() { +} + +/// getOpcodeName - Return the name of the specified opcode enum (e.g. +/// "MOV32ri") or empty if we can't resolve it. +StringRef MCInstPrinter::getOpcodeName(unsigned Opcode) const { + return ""; +} diff --git a/contrib/llvm/lib/MC/MCLabel.cpp b/contrib/llvm/lib/MC/MCLabel.cpp new file mode 100644 index 0000000..9c0fc92 --- /dev/null +++ b/contrib/llvm/lib/MC/MCLabel.cpp @@ -0,0 +1,21 @@ +//===- lib/MC/MCLabel.cpp - MCLabel implementation ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCLabel.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +void MCLabel::print(raw_ostream &OS) const { + OS << '"' << getInstance() << '"'; +} + +void MCLabel::dump() const { + print(dbgs()); +} diff --git a/contrib/llvm/lib/MC/MCLoggingStreamer.cpp b/contrib/llvm/lib/MC/MCLoggingStreamer.cpp new file mode 100644 index 0000000..b96040a --- /dev/null +++ b/contrib/llvm/lib/MC/MCLoggingStreamer.cpp @@ -0,0 +1,208 @@ +//===- lib/MC/MCLoggingStreamer.cpp - API Logging Streamer ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCStreamer.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +namespace { + +class MCLoggingStreamer : public MCStreamer { + llvm::OwningPtr<MCStreamer> Child; + + raw_ostream &OS; + +public: + MCLoggingStreamer(MCStreamer *_Child, raw_ostream &_OS) + : MCStreamer(_Child->getContext()), Child(_Child), OS(_OS) {} + + void LogCall(const char *Function) { + OS << Function << "\n"; + } + + void LogCall(const char *Function, const Twine &Message) { + OS << Function << ": " << Message << "\n"; + } + + virtual bool isVerboseAsm() const { return Child->isVerboseAsm(); } + + virtual bool hasRawTextSupport() const { return Child->hasRawTextSupport(); } + + virtual raw_ostream &GetCommentOS() { return Child->GetCommentOS(); } + + virtual void AddComment(const Twine &T) { + LogCall("AddComment", T); + return Child->AddComment(T); + } + + virtual void AddBlankLine() { + LogCall("AddBlankLine"); + return Child->AddBlankLine(); + } + + virtual void SwitchSection(const MCSection *Section) { + CurSection = Section; + LogCall("SwitchSection"); + return Child->SwitchSection(Section); + } + + virtual void EmitLabel(MCSymbol *Symbol) { + LogCall("EmitLabel"); + return Child->EmitLabel(Symbol); + } + + virtual void EmitAssemblerFlag(MCAssemblerFlag Flag) { + LogCall("EmitAssemblerFlag"); + return Child->EmitAssemblerFlag(Flag); + } + + virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) { + LogCall("EmitAssignment"); + return Child->EmitAssignment(Symbol, Value); + } + + virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) { + LogCall("EmitSymbolAttribute"); + return Child->EmitSymbolAttribute(Symbol, Attribute); + } + + virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) { + LogCall("EmitSymbolDesc"); + return Child->EmitSymbolDesc(Symbol, DescValue); + } + + virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) { + LogCall("BeginCOFFSymbolDef"); + return Child->BeginCOFFSymbolDef(Symbol); + } + + virtual void EmitCOFFSymbolStorageClass(int StorageClass) { + LogCall("EmitCOFFSymbolStorageClass"); + return Child->EmitCOFFSymbolStorageClass(StorageClass); + } + + virtual void EmitCOFFSymbolType(int Type) { + LogCall("EmitCOFFSymbolType"); + return Child->EmitCOFFSymbolType(Type); + } + + virtual void EndCOFFSymbolDef() { + LogCall("EndCOFFSymbolDef"); + return Child->EndCOFFSymbolDef(); + } + + virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) { + LogCall("EmitELFSize"); + return Child->EmitELFSize(Symbol, Value); + } + + virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment) { + LogCall("EmitCommonSymbol"); + return Child->EmitCommonSymbol(Symbol, Size, ByteAlignment); + } + + virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) { + LogCall("EmitLocalCommonSymbol"); + return Child->EmitLocalCommonSymbol(Symbol, Size); + } + + virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0, + unsigned Size = 0, unsigned ByteAlignment = 0) { + LogCall("EmitZerofill"); + return Child->EmitZerofill(Section, Symbol, Size, ByteAlignment); + } + + virtual void EmitTBSSSymbol (const MCSection *Section, MCSymbol *Symbol, + uint64_t Size, unsigned ByteAlignment = 0) { + LogCall("EmitTBSSSymbol"); + return Child->EmitTBSSSymbol(Section, Symbol, Size, ByteAlignment); + } + + virtual void EmitBytes(StringRef Data, unsigned AddrSpace) { + LogCall("EmitBytes"); + return Child->EmitBytes(Data, AddrSpace); + } + + virtual void EmitValue(const MCExpr *Value, unsigned Size,unsigned AddrSpace){ + LogCall("EmitValue"); + return Child->EmitValue(Value, Size, AddrSpace); + } + + virtual void EmitIntValue(uint64_t Value, unsigned Size, unsigned AddrSpace) { + LogCall("EmitIntValue"); + return Child->EmitIntValue(Value, Size, AddrSpace); + } + + virtual void EmitGPRel32Value(const MCExpr *Value) { + LogCall("EmitGPRel32Value"); + return Child->EmitGPRel32Value(Value); + } + + virtual void EmitFill(uint64_t NumBytes, uint8_t FillValue, + unsigned AddrSpace) { + LogCall("EmitFill"); + return Child->EmitFill(NumBytes, FillValue, AddrSpace); + } + + virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0, + unsigned ValueSize = 1, + unsigned MaxBytesToEmit = 0) { + LogCall("EmitValueToAlignment"); + return Child->EmitValueToAlignment(ByteAlignment, Value, + ValueSize, MaxBytesToEmit); + } + + virtual void EmitCodeAlignment(unsigned ByteAlignment, + unsigned MaxBytesToEmit = 0) { + LogCall("EmitCodeAlignment"); + return Child->EmitCodeAlignment(ByteAlignment, MaxBytesToEmit); + } + + virtual void EmitValueToOffset(const MCExpr *Offset, + unsigned char Value = 0) { + LogCall("EmitValueToOffset"); + return Child->EmitValueToOffset(Offset, Value); + } + + virtual void EmitFileDirective(StringRef Filename) { + LogCall("EmitFileDirective", "FileName:" + Filename); + return Child->EmitFileDirective(Filename); + } + + virtual void EmitDwarfFileDirective(unsigned FileNo, StringRef Filename) { + LogCall("EmitDwarfFileDirective", + "FileNo:" + Twine(FileNo) + " Filename:" + Filename); + return Child->EmitDwarfFileDirective(FileNo, Filename); + } + + virtual void EmitInstruction(const MCInst &Inst) { + LogCall("EmitInstruction"); + return Child->EmitInstruction(Inst); + } + + virtual void EmitRawText(StringRef String) { + LogCall("EmitRawText", "\"" + String + "\""); + return Child->EmitRawText(String); + } + + virtual void Finish() { + LogCall("Finish"); + return Child->Finish(); + } + +}; + +} // end anonymous namespace. + +MCStreamer *llvm::createLoggingStreamer(MCStreamer *Child, raw_ostream &OS) { + return new MCLoggingStreamer(Child, OS); +} diff --git a/contrib/llvm/lib/MC/MCMachOStreamer.cpp b/contrib/llvm/lib/MC/MCMachOStreamer.cpp new file mode 100644 index 0000000..44bc267 --- /dev/null +++ b/contrib/llvm/lib/MC/MCMachOStreamer.cpp @@ -0,0 +1,493 @@ +//===- lib/MC/MCMachOStreamer.cpp - Mach-O Object Output ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCStreamer.h" + +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCObjectStreamer.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCMachOSymbolFlags.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetAsmBackend.h" + +using namespace llvm; + +namespace { + +class MCMachOStreamer : public MCObjectStreamer { +private: + MCFragment *getCurrentFragment() const { + assert(getCurrentSectionData() && "No current section!"); + + if (!getCurrentSectionData()->empty()) + return &getCurrentSectionData()->getFragmentList().back(); + + return 0; + } + + /// Get a data fragment to write into, creating a new one if the current + /// fragment is not a data fragment. + MCDataFragment *getOrCreateDataFragment() const { + MCDataFragment *F = dyn_cast_or_null<MCDataFragment>(getCurrentFragment()); + if (!F) + F = new MCDataFragment(getCurrentSectionData()); + return F; + } + + void EmitInstToFragment(const MCInst &Inst); + void EmitInstToData(const MCInst &Inst); + +public: + MCMachOStreamer(MCContext &Context, TargetAsmBackend &TAB, + raw_ostream &OS, MCCodeEmitter *Emitter) + : MCObjectStreamer(Context, TAB, OS, Emitter) {} + + const MCExpr *AddValueSymbols(const MCExpr *Value) { + switch (Value->getKind()) { + case MCExpr::Target: assert(0 && "Can't handle target exprs yet!"); + case MCExpr::Constant: + break; + + case MCExpr::Binary: { + const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value); + AddValueSymbols(BE->getLHS()); + AddValueSymbols(BE->getRHS()); + break; + } + + case MCExpr::SymbolRef: + getAssembler().getOrCreateSymbolData( + cast<MCSymbolRefExpr>(Value)->getSymbol()); + break; + + case MCExpr::Unary: + AddValueSymbols(cast<MCUnaryExpr>(Value)->getSubExpr()); + break; + } + + return Value; + } + + /// @name MCStreamer Interface + /// @{ + + virtual void EmitLabel(MCSymbol *Symbol); + virtual void EmitAssemblerFlag(MCAssemblerFlag Flag); + virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value); + virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute); + virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue); + virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment); + virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) { + assert(0 && "macho doesn't support this directive"); + } + virtual void EmitCOFFSymbolStorageClass(int StorageClass) { + assert(0 && "macho doesn't support this directive"); + } + virtual void EmitCOFFSymbolType(int Type) { + assert(0 && "macho doesn't support this directive"); + } + virtual void EndCOFFSymbolDef() { + assert(0 && "macho doesn't support this directive"); + } + virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) { + assert(0 && "macho doesn't support this directive"); + } + virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) { + assert(0 && "macho doesn't support this directive"); + } + virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0, + unsigned Size = 0, unsigned ByteAlignment = 0); + virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol, + uint64_t Size, unsigned ByteAlignment = 0); + virtual void EmitBytes(StringRef Data, unsigned AddrSpace); + virtual void EmitValue(const MCExpr *Value, unsigned Size,unsigned AddrSpace); + virtual void EmitGPRel32Value(const MCExpr *Value) { + assert(0 && "macho doesn't support this directive"); + } + virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0, + unsigned ValueSize = 1, + unsigned MaxBytesToEmit = 0); + virtual void EmitCodeAlignment(unsigned ByteAlignment, + unsigned MaxBytesToEmit = 0); + virtual void EmitValueToOffset(const MCExpr *Offset, + unsigned char Value = 0); + + virtual void EmitFileDirective(StringRef Filename) { + report_fatal_error("unsupported directive: '.file'"); + } + virtual void EmitDwarfFileDirective(unsigned FileNo, StringRef Filename) { + report_fatal_error("unsupported directive: '.file'"); + } + + virtual void EmitInstruction(const MCInst &Inst); + + virtual void Finish(); + + /// @} +}; + +} // end anonymous namespace. + +void MCMachOStreamer::EmitLabel(MCSymbol *Symbol) { + assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); + assert(!Symbol->isVariable() && "Cannot emit a variable symbol!"); + assert(CurSection && "Cannot emit before setting section!"); + + Symbol->setSection(*CurSection); + + MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); + + // We have to create a new fragment if this is an atom defining symbol, + // fragments cannot span atoms. + if (getAssembler().isSymbolLinkerVisible(SD.getSymbol())) + new MCDataFragment(getCurrentSectionData()); + + // FIXME: This is wasteful, we don't necessarily need to create a data + // fragment. Instead, we should mark the symbol as pointing into the data + // fragment if it exists, otherwise we should just queue the label and set its + // fragment pointer when we emit the next fragment. + MCDataFragment *F = getOrCreateDataFragment(); + assert(!SD.getFragment() && "Unexpected fragment on symbol data!"); + SD.setFragment(F); + SD.setOffset(F->getContents().size()); + + // This causes the reference type flag to be cleared. Darwin 'as' was "trying" + // to clear the weak reference and weak definition bits too, but the + // implementation was buggy. For now we just try to match 'as', for + // diffability. + // + // FIXME: Cleanup this code, these bits should be emitted based on semantic + // properties, not on the order of definition, etc. + SD.setFlags(SD.getFlags() & ~SF_ReferenceTypeMask); +} + +void MCMachOStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) { + switch (Flag) { + case MCAF_SubsectionsViaSymbols: + getAssembler().setSubsectionsViaSymbols(true); + return; + } + + assert(0 && "invalid assembler flag!"); +} + +void MCMachOStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) { + // FIXME: Lift context changes into super class. + getAssembler().getOrCreateSymbolData(*Symbol); + Symbol->setVariableValue(AddValueSymbols(Value)); +} + +void MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Symbol, + MCSymbolAttr Attribute) { + // Indirect symbols are handled differently, to match how 'as' handles + // them. This makes writing matching .o files easier. + if (Attribute == MCSA_IndirectSymbol) { + // Note that we intentionally cannot use the symbol data here; this is + // important for matching the string table that 'as' generates. + IndirectSymbolData ISD; + ISD.Symbol = Symbol; + ISD.SectionData = getCurrentSectionData(); + getAssembler().getIndirectSymbols().push_back(ISD); + return; + } + + // Adding a symbol attribute always introduces the symbol, note that an + // important side effect of calling getOrCreateSymbolData here is to register + // the symbol with the assembler. + MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); + + // The implementation of symbol attributes is designed to match 'as', but it + // leaves much to desired. It doesn't really make sense to arbitrarily add and + // remove flags, but 'as' allows this (in particular, see .desc). + // + // In the future it might be worth trying to make these operations more well + // defined. + switch (Attribute) { + case MCSA_Invalid: + case MCSA_ELF_TypeFunction: + case MCSA_ELF_TypeIndFunction: + case MCSA_ELF_TypeObject: + case MCSA_ELF_TypeTLS: + case MCSA_ELF_TypeCommon: + case MCSA_ELF_TypeNoType: + case MCSA_IndirectSymbol: + case MCSA_Hidden: + case MCSA_Internal: + case MCSA_Protected: + case MCSA_Weak: + case MCSA_Local: + assert(0 && "Invalid symbol attribute for Mach-O!"); + break; + + case MCSA_Global: + SD.setExternal(true); + // This effectively clears the undefined lazy bit, in Darwin 'as', although + // it isn't very consistent because it implements this as part of symbol + // lookup. + // + // FIXME: Cleanup this code, these bits should be emitted based on semantic + // properties, not on the order of definition, etc. + SD.setFlags(SD.getFlags() & ~SF_ReferenceTypeUndefinedLazy); + break; + + case MCSA_LazyReference: + // FIXME: This requires -dynamic. + SD.setFlags(SD.getFlags() | SF_NoDeadStrip); + if (Symbol->isUndefined()) + SD.setFlags(SD.getFlags() | SF_ReferenceTypeUndefinedLazy); + break; + + // Since .reference sets the no dead strip bit, it is equivalent to + // .no_dead_strip in practice. + case MCSA_Reference: + case MCSA_NoDeadStrip: + SD.setFlags(SD.getFlags() | SF_NoDeadStrip); + break; + + case MCSA_PrivateExtern: + SD.setExternal(true); + SD.setPrivateExtern(true); + break; + + case MCSA_WeakReference: + // FIXME: This requires -dynamic. + if (Symbol->isUndefined()) + SD.setFlags(SD.getFlags() | SF_WeakReference); + break; + + case MCSA_WeakDefinition: + // FIXME: 'as' enforces that this is defined and global. The manual claims + // it has to be in a coalesced section, but this isn't enforced. + SD.setFlags(SD.getFlags() | SF_WeakDefinition); + break; + + case MCSA_WeakDefAutoPrivate: + SD.setFlags(SD.getFlags() | SF_WeakDefinition | SF_WeakReference); + break; + } +} + +void MCMachOStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) { + // Encode the 'desc' value into the lowest implementation defined bits. + assert(DescValue == (DescValue & SF_DescFlagsMask) && + "Invalid .desc value!"); + getAssembler().getOrCreateSymbolData(*Symbol).setFlags( + DescValue & SF_DescFlagsMask); +} + +void MCMachOStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment) { + // FIXME: Darwin 'as' does appear to allow redef of a .comm by itself. + assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); + + MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); + SD.setExternal(true); + SD.setCommon(Size, ByteAlignment); +} + +void MCMachOStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol, + unsigned Size, unsigned ByteAlignment) { + MCSectionData &SectData = getAssembler().getOrCreateSectionData(*Section); + + // The symbol may not be present, which only creates the section. + if (!Symbol) + return; + + // FIXME: Assert that this section has the zerofill type. + + assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); + + MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); + + // Emit an align fragment if necessary. + if (ByteAlignment != 1) + new MCAlignFragment(ByteAlignment, 0, 0, ByteAlignment, &SectData); + + MCFragment *F = new MCFillFragment(0, 0, Size, &SectData); + SD.setFragment(F); + + Symbol->setSection(*Section); + + // Update the maximum alignment on the zero fill section if necessary. + if (ByteAlignment > SectData.getAlignment()) + SectData.setAlignment(ByteAlignment); +} + +// This should always be called with the thread local bss section. Like the +// .zerofill directive this doesn't actually switch sections on us. +void MCMachOStreamer::EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol, + uint64_t Size, unsigned ByteAlignment) { + EmitZerofill(Section, Symbol, Size, ByteAlignment); + return; +} + +void MCMachOStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) { + getOrCreateDataFragment()->getContents().append(Data.begin(), Data.end()); +} + +void MCMachOStreamer::EmitValue(const MCExpr *Value, unsigned Size, + unsigned AddrSpace) { + MCDataFragment *DF = getOrCreateDataFragment(); + + // Avoid fixups when possible. + int64_t AbsValue; + if (AddValueSymbols(Value)->EvaluateAsAbsolute(AbsValue)) { + // FIXME: Endianness assumption. + for (unsigned i = 0; i != Size; ++i) + DF->getContents().push_back(uint8_t(AbsValue >> (i * 8))); + } else { + DF->addFixup(MCFixup::Create(DF->getContents().size(), + AddValueSymbols(Value), + MCFixup::getKindForSize(Size))); + DF->getContents().resize(DF->getContents().size() + Size, 0); + } +} + +void MCMachOStreamer::EmitValueToAlignment(unsigned ByteAlignment, + int64_t Value, unsigned ValueSize, + unsigned MaxBytesToEmit) { + if (MaxBytesToEmit == 0) + MaxBytesToEmit = ByteAlignment; + new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit, + getCurrentSectionData()); + + // Update the maximum alignment on the current section if necessary. + if (ByteAlignment > getCurrentSectionData()->getAlignment()) + getCurrentSectionData()->setAlignment(ByteAlignment); +} + +void MCMachOStreamer::EmitCodeAlignment(unsigned ByteAlignment, + unsigned MaxBytesToEmit) { + if (MaxBytesToEmit == 0) + MaxBytesToEmit = ByteAlignment; + MCAlignFragment *F = new MCAlignFragment(ByteAlignment, 0, 1, MaxBytesToEmit, + getCurrentSectionData()); + F->setEmitNops(true); + + // Update the maximum alignment on the current section if necessary. + if (ByteAlignment > getCurrentSectionData()->getAlignment()) + getCurrentSectionData()->setAlignment(ByteAlignment); +} + +void MCMachOStreamer::EmitValueToOffset(const MCExpr *Offset, + unsigned char Value) { + new MCOrgFragment(*Offset, Value, getCurrentSectionData()); +} + +void MCMachOStreamer::EmitInstToFragment(const MCInst &Inst) { + MCInstFragment *IF = new MCInstFragment(Inst, getCurrentSectionData()); + + // Add the fixups and data. + // + // FIXME: Revisit this design decision when relaxation is done, we may be + // able to get away with not storing any extra data in the MCInst. + SmallVector<MCFixup, 4> Fixups; + SmallString<256> Code; + raw_svector_ostream VecOS(Code); + getAssembler().getEmitter().EncodeInstruction(Inst, VecOS, Fixups); + VecOS.flush(); + + IF->getCode() = Code; + IF->getFixups() = Fixups; +} + +void MCMachOStreamer::EmitInstToData(const MCInst &Inst) { + MCDataFragment *DF = getOrCreateDataFragment(); + + SmallVector<MCFixup, 4> Fixups; + SmallString<256> Code; + raw_svector_ostream VecOS(Code); + getAssembler().getEmitter().EncodeInstruction(Inst, VecOS, Fixups); + VecOS.flush(); + + // Add the fixups and data. + for (unsigned i = 0, e = Fixups.size(); i != e; ++i) { + Fixups[i].setOffset(Fixups[i].getOffset() + DF->getContents().size()); + DF->addFixup(Fixups[i]); + } + DF->getContents().append(Code.begin(), Code.end()); +} + +void MCMachOStreamer::EmitInstruction(const MCInst &Inst) { + // Scan for values. + for (unsigned i = Inst.getNumOperands(); i--; ) + if (Inst.getOperand(i).isExpr()) + AddValueSymbols(Inst.getOperand(i).getExpr()); + + getCurrentSectionData()->setHasInstructions(true); + + // If this instruction doesn't need relaxation, just emit it as data. + if (!getAssembler().getBackend().MayNeedRelaxation(Inst)) { + EmitInstToData(Inst); + return; + } + + // Otherwise, if we are relaxing everything, relax the instruction as much as + // possible and emit it as data. + if (getAssembler().getRelaxAll()) { + MCInst Relaxed; + getAssembler().getBackend().RelaxInstruction(Inst, Relaxed); + while (getAssembler().getBackend().MayNeedRelaxation(Relaxed)) + getAssembler().getBackend().RelaxInstruction(Relaxed, Relaxed); + EmitInstToData(Relaxed); + return; + } + + // Otherwise emit to a separate fragment. + EmitInstToFragment(Inst); +} + +void MCMachOStreamer::Finish() { + // We have to set the fragment atom associations so we can relax properly for + // Mach-O. + + // First, scan the symbol table to build a lookup table from fragments to + // defining symbols. + DenseMap<const MCFragment*, MCSymbolData*> DefiningSymbolMap; + for (MCAssembler::symbol_iterator it = getAssembler().symbol_begin(), + ie = getAssembler().symbol_end(); it != ie; ++it) { + if (getAssembler().isSymbolLinkerVisible(it->getSymbol()) && + it->getFragment()) { + // An atom defining symbol should never be internal to a fragment. + assert(it->getOffset() == 0 && "Invalid offset in atom defining symbol!"); + DefiningSymbolMap[it->getFragment()] = it; + } + } + + // Set the fragment atom associations by tracking the last seen atom defining + // symbol. + for (MCAssembler::iterator it = getAssembler().begin(), + ie = getAssembler().end(); it != ie; ++it) { + MCSymbolData *CurrentAtom = 0; + for (MCSectionData::iterator it2 = it->begin(), + ie2 = it->end(); it2 != ie2; ++it2) { + if (MCSymbolData *SD = DefiningSymbolMap.lookup(it2)) + CurrentAtom = SD; + it2->setAtom(CurrentAtom); + } + } + + this->MCObjectStreamer::Finish(); +} + +MCStreamer *llvm::createMachOStreamer(MCContext &Context, TargetAsmBackend &TAB, + raw_ostream &OS, MCCodeEmitter *CE, + bool RelaxAll) { + MCMachOStreamer *S = new MCMachOStreamer(Context, TAB, OS, CE); + if (RelaxAll) + S->getAssembler().setRelaxAll(true); + return S; +} diff --git a/contrib/llvm/lib/MC/MCNullStreamer.cpp b/contrib/llvm/lib/MC/MCNullStreamer.cpp new file mode 100644 index 0000000..5332ade --- /dev/null +++ b/contrib/llvm/lib/MC/MCNullStreamer.cpp @@ -0,0 +1,88 @@ +//===- lib/MC/MCNullStreamer.cpp - Dummy Streamer Implementation ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCStreamer.h" + +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCSymbol.h" + +using namespace llvm; + +namespace { + + class MCNullStreamer : public MCStreamer { + public: + MCNullStreamer(MCContext &Context) : MCStreamer(Context) {} + + /// @name MCStreamer Interface + /// @{ + + virtual void SwitchSection(const MCSection *Section) { + CurSection = Section; + } + + virtual void EmitLabel(MCSymbol *Symbol) { + assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); + assert(CurSection && "Cannot emit before setting section!"); + Symbol->setSection(*CurSection); + } + + virtual void EmitAssemblerFlag(MCAssemblerFlag Flag) {} + + virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {} + + virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute){} + + virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {} + + virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) {} + virtual void EmitCOFFSymbolStorageClass(int StorageClass) {} + virtual void EmitCOFFSymbolType(int Type) {} + virtual void EndCOFFSymbolDef() {} + + virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {} + virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment) {} + virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) {} + + virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0, + unsigned Size = 0, unsigned ByteAlignment = 0) {} + virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol, + uint64_t Size, unsigned ByteAlignment) {} + virtual void EmitBytes(StringRef Data, unsigned AddrSpace) {} + + virtual void EmitValue(const MCExpr *Value, unsigned Size, + unsigned AddrSpace) {} + virtual void EmitGPRel32Value(const MCExpr *Value) {} + virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0, + unsigned ValueSize = 1, + unsigned MaxBytesToEmit = 0) {} + + virtual void EmitCodeAlignment(unsigned ByteAlignment, + unsigned MaxBytesToEmit = 0) {} + + virtual void EmitValueToOffset(const MCExpr *Offset, + unsigned char Value = 0) {} + + virtual void EmitFileDirective(StringRef Filename) {} + virtual void EmitDwarfFileDirective(unsigned FileNo,StringRef Filename) {} + virtual void EmitInstruction(const MCInst &Inst) {} + + virtual void Finish() {} + + /// @} + }; + +} + +MCStreamer *llvm::createNullStreamer(MCContext &Context) { + return new MCNullStreamer(Context); +} diff --git a/contrib/llvm/lib/MC/MCObjectStreamer.cpp b/contrib/llvm/lib/MC/MCObjectStreamer.cpp new file mode 100644 index 0000000..d3f7f77 --- /dev/null +++ b/contrib/llvm/lib/MC/MCObjectStreamer.cpp @@ -0,0 +1,39 @@ +//===- lib/MC/MCObjectStreamer.cpp - Object File MCStreamer Interface -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCObjectStreamer.h" + +#include "llvm/MC/MCAssembler.h" +using namespace llvm; + +MCObjectStreamer::MCObjectStreamer(MCContext &Context, TargetAsmBackend &TAB, + raw_ostream &_OS, MCCodeEmitter *_Emitter) + : MCStreamer(Context), Assembler(new MCAssembler(Context, TAB, + *_Emitter, _OS)), + CurSectionData(0) +{ +} + +MCObjectStreamer::~MCObjectStreamer() { + delete Assembler; +} + +void MCObjectStreamer::SwitchSection(const MCSection *Section) { + assert(Section && "Cannot switch to a null section!"); + + // If already in this section, then this is a noop. + if (Section == CurSection) return; + + CurSection = Section; + CurSectionData = &getAssembler().getOrCreateSectionData(*Section); +} + +void MCObjectStreamer::Finish() { + getAssembler().Finish(); +} diff --git a/contrib/llvm/lib/MC/MCObjectWriter.cpp b/contrib/llvm/lib/MC/MCObjectWriter.cpp new file mode 100644 index 0000000..d117e82 --- /dev/null +++ b/contrib/llvm/lib/MC/MCObjectWriter.cpp @@ -0,0 +1,15 @@ +//===- lib/MC/MCObjectWriter.cpp - MCObjectWriter implementation ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCObjectWriter.h" + +using namespace llvm; + +MCObjectWriter::~MCObjectWriter() { +} diff --git a/contrib/llvm/lib/MC/MCParser/AsmLexer.cpp b/contrib/llvm/lib/MC/MCParser/AsmLexer.cpp new file mode 100644 index 0000000..465d983 --- /dev/null +++ b/contrib/llvm/lib/MC/MCParser/AsmLexer.cpp @@ -0,0 +1,328 @@ +//===- AsmLexer.cpp - Lexer for Assembly Files ----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class implements the lexer for assembly files. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCParser/AsmLexer.h" +#include "llvm/Support/SMLoc.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/MC/MCAsmInfo.h" +#include <cerrno> +#include <cstdio> +#include <cstdlib> +using namespace llvm; + +AsmLexer::AsmLexer(const MCAsmInfo &_MAI) : MAI(_MAI) { + CurBuf = NULL; + CurPtr = NULL; +} + +AsmLexer::~AsmLexer() { +} + +void AsmLexer::setBuffer(const MemoryBuffer *buf, const char *ptr) { + CurBuf = buf; + + if (ptr) + CurPtr = ptr; + else + CurPtr = CurBuf->getBufferStart(); + + TokStart = 0; +} + +/// ReturnError - Set the error to the specified string at the specified +/// location. This is defined to always return AsmToken::Error. +AsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg) { + SetError(SMLoc::getFromPointer(Loc), Msg); + + return AsmToken(AsmToken::Error, StringRef(Loc, 0)); +} + +int AsmLexer::getNextChar() { + char CurChar = *CurPtr++; + switch (CurChar) { + default: + return (unsigned char)CurChar; + case 0: + // A nul character in the stream is either the end of the current buffer or + // a random nul in the file. Disambiguate that here. + if (CurPtr-1 != CurBuf->getBufferEnd()) + return 0; // Just whitespace. + + // Otherwise, return end of file. + --CurPtr; // Another call to lex will return EOF again. + return EOF; + } +} + +/// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@]* +AsmToken AsmLexer::LexIdentifier() { + while (isalnum(*CurPtr) || *CurPtr == '_' || *CurPtr == '$' || + *CurPtr == '.' || *CurPtr == '@') + ++CurPtr; + + // Handle . as a special case. + if (CurPtr == TokStart+1 && TokStart[0] == '.') + return AsmToken(AsmToken::Dot, StringRef(TokStart, 1)); + + return AsmToken(AsmToken::Identifier, StringRef(TokStart, CurPtr - TokStart)); +} + +/// LexSlash: Slash: / +/// C-Style Comment: /* ... */ +AsmToken AsmLexer::LexSlash() { + switch (*CurPtr) { + case '*': break; // C style comment. + case '/': return ++CurPtr, LexLineComment(); + default: return AsmToken(AsmToken::Slash, StringRef(CurPtr, 1)); + } + + // C Style comment. + ++CurPtr; // skip the star. + while (1) { + int CurChar = getNextChar(); + switch (CurChar) { + case EOF: + return ReturnError(TokStart, "unterminated comment"); + case '*': + // End of the comment? + if (CurPtr[0] != '/') break; + + ++CurPtr; // End the */. + return LexToken(); + } + } +} + +/// LexLineComment: Comment: #[^\n]* +/// : //[^\n]* +AsmToken AsmLexer::LexLineComment() { + // FIXME: This is broken if we happen to a comment at the end of a file, which + // was .included, and which doesn't end with a newline. + int CurChar = getNextChar(); + while (CurChar != '\n' && CurChar != '\n' && CurChar != EOF) + CurChar = getNextChar(); + + if (CurChar == EOF) + return AsmToken(AsmToken::Eof, StringRef(CurPtr, 0)); + return AsmToken(AsmToken::EndOfStatement, StringRef(CurPtr, 0)); +} + + +/// LexDigit: First character is [0-9]. +/// Local Label: [0-9][:] +/// Forward/Backward Label: [0-9][fb] +/// Binary integer: 0b[01]+ +/// Octal integer: 0[0-7]+ +/// Hex integer: 0x[0-9a-fA-F]+ +/// Decimal integer: [1-9][0-9]* +/// TODO: FP literal. +AsmToken AsmLexer::LexDigit() { + // Decimal integer: [1-9][0-9]* + if (CurPtr[-1] != '0') { + while (isdigit(*CurPtr)) + ++CurPtr; + + StringRef Result(TokStart, CurPtr - TokStart); + + long long Value; + if (Result.getAsInteger(10, Value)) { + // We have to handle minint_as_a_positive_value specially, because + // - minint_as_a_positive_value = minint and it is valid. + if (Result == "9223372036854775808") + Value = -9223372036854775808ULL; + else + return ReturnError(TokStart, "Invalid decimal number"); + } + return AsmToken(AsmToken::Integer, Result, Value); + } + + if (*CurPtr == 'b') { + ++CurPtr; + // See if we actually have "0b" as part of something like "jmp 0b\n" + if (!isdigit(CurPtr[0])) { + --CurPtr; + StringRef Result(TokStart, CurPtr - TokStart); + return AsmToken(AsmToken::Integer, Result, 0); + } + const char *NumStart = CurPtr; + while (CurPtr[0] == '0' || CurPtr[0] == '1') + ++CurPtr; + + // Requires at least one binary digit. + if (CurPtr == NumStart) + return ReturnError(TokStart, "Invalid binary number"); + + StringRef Result(TokStart, CurPtr - TokStart); + + long long Value; + if (Result.getAsInteger(2, Value)) + return ReturnError(TokStart, "Invalid binary number"); + + return AsmToken(AsmToken::Integer, Result, Value); + } + + if (*CurPtr == 'x') { + ++CurPtr; + const char *NumStart = CurPtr; + while (isxdigit(CurPtr[0])) + ++CurPtr; + + // Requires at least one hex digit. + if (CurPtr == NumStart) + return ReturnError(CurPtr-2, "Invalid hexadecimal number"); + + unsigned long long Result; + if (StringRef(TokStart, CurPtr - TokStart).getAsInteger(0, Result)) + return ReturnError(TokStart, "Invalid hexadecimal number"); + + return AsmToken(AsmToken::Integer, StringRef(TokStart, CurPtr - TokStart), + (int64_t)Result); + } + + // Must be an octal number, it starts with 0. + while (*CurPtr >= '0' && *CurPtr <= '7') + ++CurPtr; + + StringRef Result(TokStart, CurPtr - TokStart); + long long Value; + if (Result.getAsInteger(8, Value)) + return ReturnError(TokStart, "Invalid octal number"); + + return AsmToken(AsmToken::Integer, Result, Value); +} + +/// LexQuote: String: "..." +AsmToken AsmLexer::LexQuote() { + int CurChar = getNextChar(); + // TODO: does gas allow multiline string constants? + while (CurChar != '"') { + if (CurChar == '\\') { + // Allow \", etc. + CurChar = getNextChar(); + } + + if (CurChar == EOF) + return ReturnError(TokStart, "unterminated string constant"); + + CurChar = getNextChar(); + } + + return AsmToken(AsmToken::String, StringRef(TokStart, CurPtr - TokStart)); +} + +StringRef AsmLexer::LexUntilEndOfStatement() { + TokStart = CurPtr; + + while (!isAtStartOfComment(*CurPtr) && // Start of line comment. + *CurPtr != ';' && // End of statement marker. + *CurPtr != '\n' && + *CurPtr != '\r' && + (*CurPtr != 0 || CurPtr != CurBuf->getBufferEnd())) { + ++CurPtr; + } + return StringRef(TokStart, CurPtr-TokStart); +} + +bool AsmLexer::isAtStartOfComment(char Char) { + // FIXME: This won't work for multi-character comment indicators like "//". + return Char == *MAI.getCommentString(); +} + +AsmToken AsmLexer::LexToken() { + TokStart = CurPtr; + // This always consumes at least one character. + int CurChar = getNextChar(); + + if (isAtStartOfComment(CurChar)) + return LexLineComment(); + + switch (CurChar) { + default: + // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]* + if (isalpha(CurChar) || CurChar == '_' || CurChar == '.') + return LexIdentifier(); + + // Unknown character, emit an error. + return ReturnError(TokStart, "invalid character in input"); + case EOF: return AsmToken(AsmToken::Eof, StringRef(TokStart, 0)); + case 0: + case ' ': + case '\t': + // Ignore whitespace. + return LexToken(); + case '\n': // FALL THROUGH. + case '\r': // FALL THROUGH. + case ';': return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1)); + case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1)); + case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1)); + case '-': return AsmToken(AsmToken::Minus, StringRef(TokStart, 1)); + case '~': return AsmToken(AsmToken::Tilde, StringRef(TokStart, 1)); + case '(': return AsmToken(AsmToken::LParen, StringRef(TokStart, 1)); + case ')': return AsmToken(AsmToken::RParen, StringRef(TokStart, 1)); + case '[': return AsmToken(AsmToken::LBrac, StringRef(TokStart, 1)); + case ']': return AsmToken(AsmToken::RBrac, StringRef(TokStart, 1)); + case '{': return AsmToken(AsmToken::LCurly, StringRef(TokStart, 1)); + case '}': return AsmToken(AsmToken::RCurly, StringRef(TokStart, 1)); + case '*': return AsmToken(AsmToken::Star, StringRef(TokStart, 1)); + case ',': return AsmToken(AsmToken::Comma, StringRef(TokStart, 1)); + case '$': return AsmToken(AsmToken::Dollar, StringRef(TokStart, 1)); + case '@': return AsmToken(AsmToken::At, StringRef(TokStart, 1)); + case '=': + if (*CurPtr == '=') + return ++CurPtr, AsmToken(AsmToken::EqualEqual, StringRef(TokStart, 2)); + return AsmToken(AsmToken::Equal, StringRef(TokStart, 1)); + case '|': + if (*CurPtr == '|') + return ++CurPtr, AsmToken(AsmToken::PipePipe, StringRef(TokStart, 2)); + return AsmToken(AsmToken::Pipe, StringRef(TokStart, 1)); + case '^': return AsmToken(AsmToken::Caret, StringRef(TokStart, 1)); + case '&': + if (*CurPtr == '&') + return ++CurPtr, AsmToken(AsmToken::AmpAmp, StringRef(TokStart, 2)); + return AsmToken(AsmToken::Amp, StringRef(TokStart, 1)); + case '!': + if (*CurPtr == '=') + return ++CurPtr, AsmToken(AsmToken::ExclaimEqual, StringRef(TokStart, 2)); + return AsmToken(AsmToken::Exclaim, StringRef(TokStart, 1)); + case '%': return AsmToken(AsmToken::Percent, StringRef(TokStart, 1)); + case '/': return LexSlash(); + case '#': return AsmToken(AsmToken::Hash, StringRef(TokStart, 1)); + case '"': return LexQuote(); + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return LexDigit(); + case '<': + switch (*CurPtr) { + case '<': return ++CurPtr, AsmToken(AsmToken::LessLess, + StringRef(TokStart, 2)); + case '=': return ++CurPtr, AsmToken(AsmToken::LessEqual, + StringRef(TokStart, 2)); + case '>': return ++CurPtr, AsmToken(AsmToken::LessGreater, + StringRef(TokStart, 2)); + default: return AsmToken(AsmToken::Less, StringRef(TokStart, 1)); + } + case '>': + switch (*CurPtr) { + case '>': return ++CurPtr, AsmToken(AsmToken::GreaterGreater, + StringRef(TokStart, 2)); + case '=': return ++CurPtr, AsmToken(AsmToken::GreaterEqual, + StringRef(TokStart, 2)); + default: return AsmToken(AsmToken::Greater, StringRef(TokStart, 1)); + } + + // TODO: Quoted identifiers (objc methods etc) + // local labels: [0-9][:] + // Forward/backward labels: [0-9][fb] + // Integers, fp constants, character constants. + } +} diff --git a/contrib/llvm/lib/MC/MCParser/AsmParser.cpp b/contrib/llvm/lib/MC/MCParser/AsmParser.cpp new file mode 100644 index 0000000..e0949bd --- /dev/null +++ b/contrib/llvm/lib/MC/MCParser/AsmParser.cpp @@ -0,0 +1,1488 @@ +//===- AsmParser.cpp - Parser for Assembly Files --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class implements the parser for assembly files. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCParser/AsmParser.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Twine.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetAsmParser.h" +using namespace llvm; + +namespace { + +/// \brief Generic implementations of directive handling, etc. which is shared +/// (or the default, at least) for all assembler parser. +class GenericAsmParser : public MCAsmParserExtension { +public: + GenericAsmParser() {} + + virtual void Initialize(MCAsmParser &Parser) { + // Call the base implementation. + this->MCAsmParserExtension::Initialize(Parser); + + // Debugging directives. + Parser.AddDirectiveHandler(this, ".file", MCAsmParser::DirectiveHandler( + &GenericAsmParser::ParseDirectiveFile)); + Parser.AddDirectiveHandler(this, ".line", MCAsmParser::DirectiveHandler( + &GenericAsmParser::ParseDirectiveLine)); + Parser.AddDirectiveHandler(this, ".loc", MCAsmParser::DirectiveHandler( + &GenericAsmParser::ParseDirectiveLoc)); + } + + bool ParseDirectiveFile(StringRef, SMLoc DirectiveLoc); // ".file" + bool ParseDirectiveLine(StringRef, SMLoc DirectiveLoc); // ".line" + bool ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc); // ".loc" +}; + +} + +namespace llvm { + +extern MCAsmParserExtension *createDarwinAsmParser(); +extern MCAsmParserExtension *createELFAsmParser(); + +} + +enum { DEFAULT_ADDRSPACE = 0 }; + +AsmParser::AsmParser(const Target &T, SourceMgr &_SM, MCContext &_Ctx, + MCStreamer &_Out, const MCAsmInfo &_MAI) + : Lexer(_MAI), Ctx(_Ctx), Out(_Out), SrcMgr(_SM), + GenericParser(new GenericAsmParser), PlatformParser(0), + TargetParser(0), CurBuffer(0) { + Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)); + + // Initialize the generic parser. + GenericParser->Initialize(*this); + + // Initialize the platform / file format parser. + // + // FIXME: This is a hack, we need to (majorly) cleanup how these objects are + // created. + if (_MAI.hasSubsectionsViaSymbols()) { + PlatformParser = createDarwinAsmParser(); + PlatformParser->Initialize(*this); + } else { + PlatformParser = createELFAsmParser(); + PlatformParser->Initialize(*this); + } +} + +AsmParser::~AsmParser() { + delete PlatformParser; + delete GenericParser; +} + +void AsmParser::setTargetParser(TargetAsmParser &P) { + assert(!TargetParser && "Target parser is already initialized!"); + TargetParser = &P; + TargetParser->Initialize(*this); +} + +void AsmParser::Warning(SMLoc L, const Twine &Msg) { + PrintMessage(L, Msg.str(), "warning"); +} + +bool AsmParser::Error(SMLoc L, const Twine &Msg) { + PrintMessage(L, Msg.str(), "error"); + return true; +} + +void AsmParser::PrintMessage(SMLoc Loc, const std::string &Msg, + const char *Type) const { + SrcMgr.PrintMessage(Loc, Msg, Type); +} + +bool AsmParser::EnterIncludeFile(const std::string &Filename) { + int NewBuf = SrcMgr.AddIncludeFile(Filename, Lexer.getLoc()); + if (NewBuf == -1) + return true; + + CurBuffer = NewBuf; + + Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)); + + return false; +} + +const AsmToken &AsmParser::Lex() { + const AsmToken *tok = &Lexer.Lex(); + + if (tok->is(AsmToken::Eof)) { + // If this is the end of an included file, pop the parent file off the + // include stack. + SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer); + if (ParentIncludeLoc != SMLoc()) { + CurBuffer = SrcMgr.FindBufferContainingLoc(ParentIncludeLoc); + Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer), + ParentIncludeLoc.getPointer()); + tok = &Lexer.Lex(); + } + } + + if (tok->is(AsmToken::Error)) + PrintMessage(Lexer.getErrLoc(), Lexer.getErr(), "error"); + + return *tok; +} + +bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) { + // Create the initial section, if requested. + // + // FIXME: Target hook & command line option for initial section. + if (!NoInitialTextSection) + Out.SwitchSection(Ctx.getMachOSection("__TEXT", "__text", + MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, + 0, SectionKind::getText())); + + // Prime the lexer. + Lex(); + + bool HadError = false; + + AsmCond StartingCondState = TheCondState; + + // While we have input, parse each statement. + while (Lexer.isNot(AsmToken::Eof)) { + if (!ParseStatement()) continue; + + // We had an error, remember it and recover by skipping to the next line. + HadError = true; + EatToEndOfStatement(); + } + + if (TheCondState.TheCond != StartingCondState.TheCond || + TheCondState.Ignore != StartingCondState.Ignore) + return TokError("unmatched .ifs or .elses"); + + // Finalize the output stream if there are no errors and if the client wants + // us to. + if (!HadError && !NoFinalize) + Out.Finish(); + + return HadError; +} + +/// EatToEndOfStatement - Throw away the rest of the line for testing purposes. +void AsmParser::EatToEndOfStatement() { + while (Lexer.isNot(AsmToken::EndOfStatement) && + Lexer.isNot(AsmToken::Eof)) + Lex(); + + // Eat EOL. + if (Lexer.is(AsmToken::EndOfStatement)) + Lex(); +} + + +/// ParseParenExpr - Parse a paren expression and return it. +/// NOTE: This assumes the leading '(' has already been consumed. +/// +/// parenexpr ::= expr) +/// +bool AsmParser::ParseParenExpr(const MCExpr *&Res, SMLoc &EndLoc) { + if (ParseExpression(Res)) return true; + if (Lexer.isNot(AsmToken::RParen)) + return TokError("expected ')' in parentheses expression"); + EndLoc = Lexer.getLoc(); + Lex(); + return false; +} + +/// ParsePrimaryExpr - Parse a primary expression and return it. +/// primaryexpr ::= (parenexpr +/// primaryexpr ::= symbol +/// primaryexpr ::= number +/// primaryexpr ::= '.' +/// primaryexpr ::= ~,+,- primaryexpr +bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { + switch (Lexer.getKind()) { + default: + return TokError("unknown token in expression"); + case AsmToken::Exclaim: + Lex(); // Eat the operator. + if (ParsePrimaryExpr(Res, EndLoc)) + return true; + Res = MCUnaryExpr::CreateLNot(Res, getContext()); + return false; + case AsmToken::String: + case AsmToken::Identifier: { + // This is a symbol reference. + std::pair<StringRef, StringRef> Split = getTok().getIdentifier().split('@'); + MCSymbol *Sym = getContext().GetOrCreateSymbol(Split.first); + + // Mark the symbol as used in an expression. + Sym->setUsedInExpr(true); + + // Lookup the symbol variant if used. + MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; + if (Split.first.size() != getTok().getIdentifier().size()) + Variant = MCSymbolRefExpr::getVariantKindForName(Split.second); + + EndLoc = Lexer.getLoc(); + Lex(); // Eat identifier. + + // If this is an absolute variable reference, substitute it now to preserve + // semantics in the face of reassignment. + if (Sym->isVariable() && isa<MCConstantExpr>(Sym->getVariableValue())) { + if (Variant) + return Error(EndLoc, "unexpected modified on variable reference"); + + Res = Sym->getVariableValue(); + return false; + } + + // Otherwise create a symbol ref. + Res = MCSymbolRefExpr::Create(Sym, Variant, getContext()); + return false; + } + case AsmToken::Integer: { + SMLoc Loc = getTok().getLoc(); + int64_t IntVal = getTok().getIntVal(); + Res = MCConstantExpr::Create(IntVal, getContext()); + EndLoc = Lexer.getLoc(); + Lex(); // Eat token. + // Look for 'b' or 'f' following an Integer as a directional label + if (Lexer.getKind() == AsmToken::Identifier) { + StringRef IDVal = getTok().getString(); + if (IDVal == "f" || IDVal == "b"){ + MCSymbol *Sym = Ctx.GetDirectionalLocalSymbol(IntVal, + IDVal == "f" ? 1 : 0); + Res = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, + getContext()); + if(IDVal == "b" && Sym->isUndefined()) + return Error(Loc, "invalid reference to undefined symbol"); + EndLoc = Lexer.getLoc(); + Lex(); // Eat identifier. + } + } + return false; + } + case AsmToken::Dot: { + // This is a '.' reference, which references the current PC. Emit a + // temporary label to the streamer and refer to it. + MCSymbol *Sym = Ctx.CreateTempSymbol(); + Out.EmitLabel(Sym); + Res = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, getContext()); + EndLoc = Lexer.getLoc(); + Lex(); // Eat identifier. + return false; + } + + case AsmToken::LParen: + Lex(); // Eat the '('. + return ParseParenExpr(Res, EndLoc); + case AsmToken::Minus: + Lex(); // Eat the operator. + if (ParsePrimaryExpr(Res, EndLoc)) + return true; + Res = MCUnaryExpr::CreateMinus(Res, getContext()); + return false; + case AsmToken::Plus: + Lex(); // Eat the operator. + if (ParsePrimaryExpr(Res, EndLoc)) + return true; + Res = MCUnaryExpr::CreatePlus(Res, getContext()); + return false; + case AsmToken::Tilde: + Lex(); // Eat the operator. + if (ParsePrimaryExpr(Res, EndLoc)) + return true; + Res = MCUnaryExpr::CreateNot(Res, getContext()); + return false; + } +} + +bool AsmParser::ParseExpression(const MCExpr *&Res) { + SMLoc EndLoc; + return ParseExpression(Res, EndLoc); +} + +/// ParseExpression - Parse an expression and return it. +/// +/// expr ::= expr +,- expr -> lowest. +/// expr ::= expr |,^,&,! expr -> middle. +/// expr ::= expr *,/,%,<<,>> expr -> highest. +/// expr ::= primaryexpr +/// +bool AsmParser::ParseExpression(const MCExpr *&Res, SMLoc &EndLoc) { + // Parse the expression. + Res = 0; + if (ParsePrimaryExpr(Res, EndLoc) || ParseBinOpRHS(1, Res, EndLoc)) + return true; + + // Try to constant fold it up front, if possible. + int64_t Value; + if (Res->EvaluateAsAbsolute(Value)) + Res = MCConstantExpr::Create(Value, getContext()); + + return false; +} + +bool AsmParser::ParseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) { + Res = 0; + return ParseParenExpr(Res, EndLoc) || + ParseBinOpRHS(1, Res, EndLoc); +} + +bool AsmParser::ParseAbsoluteExpression(int64_t &Res) { + const MCExpr *Expr; + + SMLoc StartLoc = Lexer.getLoc(); + if (ParseExpression(Expr)) + return true; + + if (!Expr->EvaluateAsAbsolute(Res)) + return Error(StartLoc, "expected absolute expression"); + + return false; +} + +static unsigned getBinOpPrecedence(AsmToken::TokenKind K, + MCBinaryExpr::Opcode &Kind) { + switch (K) { + default: + return 0; // not a binop. + + // Lowest Precedence: &&, || + case AsmToken::AmpAmp: + Kind = MCBinaryExpr::LAnd; + return 1; + case AsmToken::PipePipe: + Kind = MCBinaryExpr::LOr; + return 1; + + // Low Precedence: +, -, ==, !=, <>, <, <=, >, >= + case AsmToken::Plus: + Kind = MCBinaryExpr::Add; + return 2; + case AsmToken::Minus: + Kind = MCBinaryExpr::Sub; + return 2; + case AsmToken::EqualEqual: + Kind = MCBinaryExpr::EQ; + return 2; + case AsmToken::ExclaimEqual: + case AsmToken::LessGreater: + Kind = MCBinaryExpr::NE; + return 2; + case AsmToken::Less: + Kind = MCBinaryExpr::LT; + return 2; + case AsmToken::LessEqual: + Kind = MCBinaryExpr::LTE; + return 2; + case AsmToken::Greater: + Kind = MCBinaryExpr::GT; + return 2; + case AsmToken::GreaterEqual: + Kind = MCBinaryExpr::GTE; + return 2; + + // Intermediate Precedence: |, &, ^ + // + // FIXME: gas seems to support '!' as an infix operator? + case AsmToken::Pipe: + Kind = MCBinaryExpr::Or; + return 3; + case AsmToken::Caret: + Kind = MCBinaryExpr::Xor; + return 3; + case AsmToken::Amp: + Kind = MCBinaryExpr::And; + return 3; + + // Highest Precedence: *, /, %, <<, >> + case AsmToken::Star: + Kind = MCBinaryExpr::Mul; + return 4; + case AsmToken::Slash: + Kind = MCBinaryExpr::Div; + return 4; + case AsmToken::Percent: + Kind = MCBinaryExpr::Mod; + return 4; + case AsmToken::LessLess: + Kind = MCBinaryExpr::Shl; + return 4; + case AsmToken::GreaterGreater: + Kind = MCBinaryExpr::Shr; + return 4; + } +} + + +/// ParseBinOpRHS - Parse all binary operators with precedence >= 'Precedence'. +/// Res contains the LHS of the expression on input. +bool AsmParser::ParseBinOpRHS(unsigned Precedence, const MCExpr *&Res, + SMLoc &EndLoc) { + while (1) { + MCBinaryExpr::Opcode Kind = MCBinaryExpr::Add; + unsigned TokPrec = getBinOpPrecedence(Lexer.getKind(), Kind); + + // If the next token is lower precedence than we are allowed to eat, return + // successfully with what we ate already. + if (TokPrec < Precedence) + return false; + + Lex(); + + // Eat the next primary expression. + const MCExpr *RHS; + if (ParsePrimaryExpr(RHS, EndLoc)) return true; + + // If BinOp binds less tightly with RHS than the operator after RHS, let + // the pending operator take RHS as its LHS. + MCBinaryExpr::Opcode Dummy; + unsigned NextTokPrec = getBinOpPrecedence(Lexer.getKind(), Dummy); + if (TokPrec < NextTokPrec) { + if (ParseBinOpRHS(Precedence+1, RHS, EndLoc)) return true; + } + + // Merge LHS and RHS according to operator. + Res = MCBinaryExpr::Create(Kind, Res, RHS, getContext()); + } +} + + + + +/// ParseStatement: +/// ::= EndOfStatement +/// ::= Label* Directive ...Operands... EndOfStatement +/// ::= Label* Identifier OperandList* EndOfStatement +bool AsmParser::ParseStatement() { + if (Lexer.is(AsmToken::EndOfStatement)) { + Out.AddBlankLine(); + Lex(); + return false; + } + + // Statements always start with an identifier. + AsmToken ID = getTok(); + SMLoc IDLoc = ID.getLoc(); + StringRef IDVal; + int64_t LocalLabelVal = -1; + // GUESS allow an integer followed by a ':' as a directional local label + if (Lexer.is(AsmToken::Integer)) { + LocalLabelVal = getTok().getIntVal(); + if (LocalLabelVal < 0) { + if (!TheCondState.Ignore) + return TokError("unexpected token at start of statement"); + IDVal = ""; + } + else { + IDVal = getTok().getString(); + Lex(); // Consume the integer token to be used as an identifier token. + if (Lexer.getKind() != AsmToken::Colon) { + if (!TheCondState.Ignore) + return TokError("unexpected token at start of statement"); + } + } + } + else if (ParseIdentifier(IDVal)) { + if (!TheCondState.Ignore) + return TokError("unexpected token at start of statement"); + IDVal = ""; + } + + // Handle conditional assembly here before checking for skipping. We + // have to do this so that .endif isn't skipped in a ".if 0" block for + // example. + if (IDVal == ".if") + return ParseDirectiveIf(IDLoc); + if (IDVal == ".elseif") + return ParseDirectiveElseIf(IDLoc); + if (IDVal == ".else") + return ParseDirectiveElse(IDLoc); + if (IDVal == ".endif") + return ParseDirectiveEndIf(IDLoc); + + // If we are in a ".if 0" block, ignore this statement. + if (TheCondState.Ignore) { + EatToEndOfStatement(); + return false; + } + + // FIXME: Recurse on local labels? + + // See what kind of statement we have. + switch (Lexer.getKind()) { + case AsmToken::Colon: { + // identifier ':' -> Label. + Lex(); + + // Diagnose attempt to use a variable as a label. + // + // FIXME: Diagnostics. Note the location of the definition as a label. + // FIXME: This doesn't diagnose assignment to a symbol which has been + // implicitly marked as external. + MCSymbol *Sym; + if (LocalLabelVal == -1) + Sym = getContext().GetOrCreateSymbol(IDVal); + else + Sym = Ctx.CreateDirectionalLocalSymbol(LocalLabelVal); + if (!Sym->isUndefined() || Sym->isVariable()) + return Error(IDLoc, "invalid symbol redefinition"); + + // Emit the label. + Out.EmitLabel(Sym); + + // Consume any end of statement token, if present, to avoid spurious + // AddBlankLine calls(). + if (Lexer.is(AsmToken::EndOfStatement)) { + Lex(); + if (Lexer.is(AsmToken::Eof)) + return false; + } + + return ParseStatement(); + } + + case AsmToken::Equal: + // identifier '=' ... -> assignment statement + Lex(); + + return ParseAssignment(IDVal); + + default: // Normal instruction or directive. + break; + } + + // Otherwise, we have a normal instruction or directive. + if (IDVal[0] == '.') { + // Assembler features + if (IDVal == ".set") + return ParseDirectiveSet(); + + // Data directives + + if (IDVal == ".ascii") + return ParseDirectiveAscii(false); + if (IDVal == ".asciz") + return ParseDirectiveAscii(true); + + if (IDVal == ".byte") + return ParseDirectiveValue(1); + if (IDVal == ".short") + return ParseDirectiveValue(2); + if (IDVal == ".long") + return ParseDirectiveValue(4); + if (IDVal == ".quad") + return ParseDirectiveValue(8); + + // FIXME: Target hooks for IsPow2. + if (IDVal == ".align") + return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/1); + if (IDVal == ".align32") + return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/4); + if (IDVal == ".balign") + return ParseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/1); + if (IDVal == ".balignw") + return ParseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/2); + if (IDVal == ".balignl") + return ParseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/4); + if (IDVal == ".p2align") + return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/1); + if (IDVal == ".p2alignw") + return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/2); + if (IDVal == ".p2alignl") + return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/4); + + if (IDVal == ".org") + return ParseDirectiveOrg(); + + if (IDVal == ".fill") + return ParseDirectiveFill(); + if (IDVal == ".space") + return ParseDirectiveSpace(); + + // Symbol attribute directives + + if (IDVal == ".globl" || IDVal == ".global") + return ParseDirectiveSymbolAttribute(MCSA_Global); + if (IDVal == ".hidden") + return ParseDirectiveSymbolAttribute(MCSA_Hidden); + if (IDVal == ".indirect_symbol") + return ParseDirectiveSymbolAttribute(MCSA_IndirectSymbol); + if (IDVal == ".internal") + return ParseDirectiveSymbolAttribute(MCSA_Internal); + if (IDVal == ".lazy_reference") + return ParseDirectiveSymbolAttribute(MCSA_LazyReference); + if (IDVal == ".no_dead_strip") + return ParseDirectiveSymbolAttribute(MCSA_NoDeadStrip); + if (IDVal == ".private_extern") + return ParseDirectiveSymbolAttribute(MCSA_PrivateExtern); + if (IDVal == ".protected") + return ParseDirectiveSymbolAttribute(MCSA_Protected); + if (IDVal == ".reference") + return ParseDirectiveSymbolAttribute(MCSA_Reference); + if (IDVal == ".type") + return ParseDirectiveELFType(); + if (IDVal == ".weak") + return ParseDirectiveSymbolAttribute(MCSA_Weak); + if (IDVal == ".weak_definition") + return ParseDirectiveSymbolAttribute(MCSA_WeakDefinition); + if (IDVal == ".weak_reference") + return ParseDirectiveSymbolAttribute(MCSA_WeakReference); + if (IDVal == ".weak_def_can_be_hidden") + return ParseDirectiveSymbolAttribute(MCSA_WeakDefAutoPrivate); + + if (IDVal == ".comm") + return ParseDirectiveComm(/*IsLocal=*/false); + if (IDVal == ".lcomm") + return ParseDirectiveComm(/*IsLocal=*/true); + + if (IDVal == ".abort") + return ParseDirectiveAbort(); + if (IDVal == ".include") + return ParseDirectiveInclude(); + + // Look up the handler in the handler table. + std::pair<MCAsmParserExtension*, DirectiveHandler> Handler = + DirectiveMap.lookup(IDVal); + if (Handler.first) + return (Handler.first->*Handler.second)(IDVal, IDLoc); + + // Target hook for parsing target specific directives. + if (!getTargetParser().ParseDirective(ID)) + return false; + + Warning(IDLoc, "ignoring directive for now"); + EatToEndOfStatement(); + return false; + } + + // Canonicalize the opcode to lower case. + SmallString<128> Opcode; + for (unsigned i = 0, e = IDVal.size(); i != e; ++i) + Opcode.push_back(tolower(IDVal[i])); + + SmallVector<MCParsedAsmOperand*, 8> ParsedOperands; + bool HadError = getTargetParser().ParseInstruction(Opcode.str(), IDLoc, + ParsedOperands); + if (!HadError && Lexer.isNot(AsmToken::EndOfStatement)) + HadError = TokError("unexpected token in argument list"); + + // If parsing succeeded, match the instruction. + if (!HadError) { + MCInst Inst; + if (!getTargetParser().MatchInstruction(ParsedOperands, Inst)) { + // Emit the instruction on success. + Out.EmitInstruction(Inst); + } else { + // Otherwise emit a diagnostic about the match failure and set the error + // flag. + // + // FIXME: We should give nicer diagnostics about the exact failure. + Error(IDLoc, "unrecognized instruction"); + HadError = true; + } + } + + // If there was no error, consume the end-of-statement token. Otherwise this + // will be done by our caller. + if (!HadError) + Lex(); + + // Free any parsed operands. + for (unsigned i = 0, e = ParsedOperands.size(); i != e; ++i) + delete ParsedOperands[i]; + + return HadError; +} + +bool AsmParser::ParseAssignment(StringRef Name) { + // FIXME: Use better location, we should use proper tokens. + SMLoc EqualLoc = Lexer.getLoc(); + + const MCExpr *Value; + if (ParseExpression(Value)) + return true; + + if (Lexer.isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in assignment"); + + // Eat the end of statement marker. + Lex(); + + // Validate that the LHS is allowed to be a variable (either it has not been + // used as a symbol, or it is an absolute symbol). + MCSymbol *Sym = getContext().LookupSymbol(Name); + if (Sym) { + // Diagnose assignment to a label. + // + // FIXME: Diagnostics. Note the location of the definition as a label. + // FIXME: Diagnose assignment to protected identifier (e.g., register name). + if (Sym->isUndefined() && !Sym->isUsedInExpr()) + ; // Allow redefinitions of undefined symbols only used in directives. + else if (!Sym->isUndefined() && !Sym->isAbsolute()) + return Error(EqualLoc, "redefinition of '" + Name + "'"); + else if (!Sym->isVariable()) + return Error(EqualLoc, "invalid assignment to '" + Name + "'"); + else if (!isa<MCConstantExpr>(Sym->getVariableValue())) + return Error(EqualLoc, "invalid reassignment of non-absolute variable '" + + Name + "'"); + } else + Sym = getContext().GetOrCreateSymbol(Name); + + // FIXME: Handle '.'. + + Sym->setUsedInExpr(true); + + // Do the assignment. + Out.EmitAssignment(Sym, Value); + + return false; +} + +/// ParseIdentifier: +/// ::= identifier +/// ::= string +bool AsmParser::ParseIdentifier(StringRef &Res) { + if (Lexer.isNot(AsmToken::Identifier) && + Lexer.isNot(AsmToken::String)) + return true; + + Res = getTok().getIdentifier(); + + Lex(); // Consume the identifier token. + + return false; +} + +/// ParseDirectiveSet: +/// ::= .set identifier ',' expression +bool AsmParser::ParseDirectiveSet() { + StringRef Name; + + if (ParseIdentifier(Name)) + return TokError("expected identifier after '.set' directive"); + + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in '.set'"); + Lex(); + + return ParseAssignment(Name); +} + +bool AsmParser::ParseEscapedString(std::string &Data) { + assert(getLexer().is(AsmToken::String) && "Unexpected current token!"); + + Data = ""; + StringRef Str = getTok().getStringContents(); + for (unsigned i = 0, e = Str.size(); i != e; ++i) { + if (Str[i] != '\\') { + Data += Str[i]; + continue; + } + + // Recognize escaped characters. Note that this escape semantics currently + // loosely follows Darwin 'as'. Notably, it doesn't support hex escapes. + ++i; + if (i == e) + return TokError("unexpected backslash at end of string"); + + // Recognize octal sequences. + if ((unsigned) (Str[i] - '0') <= 7) { + // Consume up to three octal characters. + unsigned Value = Str[i] - '0'; + + if (i + 1 != e && ((unsigned) (Str[i + 1] - '0')) <= 7) { + ++i; + Value = Value * 8 + (Str[i] - '0'); + + if (i + 1 != e && ((unsigned) (Str[i + 1] - '0')) <= 7) { + ++i; + Value = Value * 8 + (Str[i] - '0'); + } + } + + if (Value > 255) + return TokError("invalid octal escape sequence (out of range)"); + + Data += (unsigned char) Value; + continue; + } + + // Otherwise recognize individual escapes. + switch (Str[i]) { + default: + // Just reject invalid escape sequences for now. + return TokError("invalid escape sequence (unrecognized character)"); + + case 'b': Data += '\b'; break; + case 'f': Data += '\f'; break; + case 'n': Data += '\n'; break; + case 'r': Data += '\r'; break; + case 't': Data += '\t'; break; + case '"': Data += '"'; break; + case '\\': Data += '\\'; break; + } + } + + return false; +} + +/// ParseDirectiveAscii: +/// ::= ( .ascii | .asciz ) [ "string" ( , "string" )* ] +bool AsmParser::ParseDirectiveAscii(bool ZeroTerminated) { + if (getLexer().isNot(AsmToken::EndOfStatement)) { + for (;;) { + if (getLexer().isNot(AsmToken::String)) + return TokError("expected string in '.ascii' or '.asciz' directive"); + + std::string Data; + if (ParseEscapedString(Data)) + return true; + + getStreamer().EmitBytes(Data, DEFAULT_ADDRSPACE); + if (ZeroTerminated) + getStreamer().EmitBytes(StringRef("\0", 1), DEFAULT_ADDRSPACE); + + Lex(); + + if (getLexer().is(AsmToken::EndOfStatement)) + break; + + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in '.ascii' or '.asciz' directive"); + Lex(); + } + } + + Lex(); + return false; +} + +/// ParseDirectiveValue +/// ::= (.byte | .short | ... ) [ expression (, expression)* ] +bool AsmParser::ParseDirectiveValue(unsigned Size) { + if (getLexer().isNot(AsmToken::EndOfStatement)) { + for (;;) { + const MCExpr *Value; + SMLoc ATTRIBUTE_UNUSED StartLoc = getLexer().getLoc(); + if (ParseExpression(Value)) + return true; + + // Special case constant expressions to match code generator. + if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) + getStreamer().EmitIntValue(MCE->getValue(), Size, DEFAULT_ADDRSPACE); + else + getStreamer().EmitValue(Value, Size, DEFAULT_ADDRSPACE); + + if (getLexer().is(AsmToken::EndOfStatement)) + break; + + // FIXME: Improve diagnostic. + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in directive"); + Lex(); + } + } + + Lex(); + return false; +} + +/// ParseDirectiveSpace +/// ::= .space expression [ , expression ] +bool AsmParser::ParseDirectiveSpace() { + int64_t NumBytes; + if (ParseAbsoluteExpression(NumBytes)) + return true; + + int64_t FillExpr = 0; + if (getLexer().isNot(AsmToken::EndOfStatement)) { + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in '.space' directive"); + Lex(); + + if (ParseAbsoluteExpression(FillExpr)) + return true; + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.space' directive"); + } + + Lex(); + + if (NumBytes <= 0) + return TokError("invalid number of bytes in '.space' directive"); + + // FIXME: Sometimes the fill expr is 'nop' if it isn't supplied, instead of 0. + getStreamer().EmitFill(NumBytes, FillExpr, DEFAULT_ADDRSPACE); + + return false; +} + +/// ParseDirectiveFill +/// ::= .fill expression , expression , expression +bool AsmParser::ParseDirectiveFill() { + int64_t NumValues; + if (ParseAbsoluteExpression(NumValues)) + return true; + + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in '.fill' directive"); + Lex(); + + int64_t FillSize; + if (ParseAbsoluteExpression(FillSize)) + return true; + + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in '.fill' directive"); + Lex(); + + int64_t FillExpr; + if (ParseAbsoluteExpression(FillExpr)) + return true; + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.fill' directive"); + + Lex(); + + if (FillSize != 1 && FillSize != 2 && FillSize != 4 && FillSize != 8) + return TokError("invalid '.fill' size, expected 1, 2, 4, or 8"); + + for (uint64_t i = 0, e = NumValues; i != e; ++i) + getStreamer().EmitIntValue(FillExpr, FillSize, DEFAULT_ADDRSPACE); + + return false; +} + +/// ParseDirectiveOrg +/// ::= .org expression [ , expression ] +bool AsmParser::ParseDirectiveOrg() { + const MCExpr *Offset; + if (ParseExpression(Offset)) + return true; + + // Parse optional fill expression. + int64_t FillExpr = 0; + if (getLexer().isNot(AsmToken::EndOfStatement)) { + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in '.org' directive"); + Lex(); + + if (ParseAbsoluteExpression(FillExpr)) + return true; + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.org' directive"); + } + + Lex(); + + // FIXME: Only limited forms of relocatable expressions are accepted here, it + // has to be relative to the current section. + getStreamer().EmitValueToOffset(Offset, FillExpr); + + return false; +} + +/// ParseDirectiveAlign +/// ::= {.align, ...} expression [ , expression [ , expression ]] +bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) { + SMLoc AlignmentLoc = getLexer().getLoc(); + int64_t Alignment; + if (ParseAbsoluteExpression(Alignment)) + return true; + + SMLoc MaxBytesLoc; + bool HasFillExpr = false; + int64_t FillExpr = 0; + int64_t MaxBytesToFill = 0; + if (getLexer().isNot(AsmToken::EndOfStatement)) { + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in directive"); + Lex(); + + // The fill expression can be omitted while specifying a maximum number of + // alignment bytes, e.g: + // .align 3,,4 + if (getLexer().isNot(AsmToken::Comma)) { + HasFillExpr = true; + if (ParseAbsoluteExpression(FillExpr)) + return true; + } + + if (getLexer().isNot(AsmToken::EndOfStatement)) { + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in directive"); + Lex(); + + MaxBytesLoc = getLexer().getLoc(); + if (ParseAbsoluteExpression(MaxBytesToFill)) + return true; + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in directive"); + } + } + + Lex(); + + if (!HasFillExpr) + FillExpr = 0; + + // Compute alignment in bytes. + if (IsPow2) { + // FIXME: Diagnose overflow. + if (Alignment >= 32) { + Error(AlignmentLoc, "invalid alignment value"); + Alignment = 31; + } + + Alignment = 1ULL << Alignment; + } + + // Diagnose non-sensical max bytes to align. + if (MaxBytesLoc.isValid()) { + if (MaxBytesToFill < 1) { + Error(MaxBytesLoc, "alignment directive can never be satisfied in this " + "many bytes, ignoring maximum bytes expression"); + MaxBytesToFill = 0; + } + + if (MaxBytesToFill >= Alignment) { + Warning(MaxBytesLoc, "maximum bytes expression exceeds alignment and " + "has no effect"); + MaxBytesToFill = 0; + } + } + + // Check whether we should use optimal code alignment for this .align + // directive. + // + // FIXME: This should be using a target hook. + bool UseCodeAlign = false; + if (const MCSectionMachO *S = dyn_cast<MCSectionMachO>( + getStreamer().getCurrentSection())) + UseCodeAlign = S->hasAttribute(MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS); + if ((!HasFillExpr || Lexer.getMAI().getTextAlignFillValue() == FillExpr) && + ValueSize == 1 && UseCodeAlign) { + getStreamer().EmitCodeAlignment(Alignment, MaxBytesToFill); + } else { + // FIXME: Target specific behavior about how the "extra" bytes are filled. + getStreamer().EmitValueToAlignment(Alignment, FillExpr, ValueSize, MaxBytesToFill); + } + + return false; +} + +/// ParseDirectiveSymbolAttribute +/// ::= { ".globl", ".weak", ... } [ identifier ( , identifier )* ] +bool AsmParser::ParseDirectiveSymbolAttribute(MCSymbolAttr Attr) { + if (getLexer().isNot(AsmToken::EndOfStatement)) { + for (;;) { + StringRef Name; + + if (ParseIdentifier(Name)) + return TokError("expected identifier in directive"); + + MCSymbol *Sym = getContext().GetOrCreateSymbol(Name); + + getStreamer().EmitSymbolAttribute(Sym, Attr); + + if (getLexer().is(AsmToken::EndOfStatement)) + break; + + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in directive"); + Lex(); + } + } + + Lex(); + return false; +} + +/// ParseDirectiveELFType +/// ::= .type identifier , @attribute +bool AsmParser::ParseDirectiveELFType() { + StringRef Name; + if (ParseIdentifier(Name)) + return TokError("expected identifier in directive"); + + // Handle the identifier as the key symbol. + MCSymbol *Sym = getContext().GetOrCreateSymbol(Name); + + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in '.type' directive"); + Lex(); + + if (getLexer().isNot(AsmToken::At)) + return TokError("expected '@' before type"); + Lex(); + + StringRef Type; + SMLoc TypeLoc; + + TypeLoc = getLexer().getLoc(); + if (ParseIdentifier(Type)) + return TokError("expected symbol type in directive"); + + MCSymbolAttr Attr = StringSwitch<MCSymbolAttr>(Type) + .Case("function", MCSA_ELF_TypeFunction) + .Case("object", MCSA_ELF_TypeObject) + .Case("tls_object", MCSA_ELF_TypeTLS) + .Case("common", MCSA_ELF_TypeCommon) + .Case("notype", MCSA_ELF_TypeNoType) + .Default(MCSA_Invalid); + + if (Attr == MCSA_Invalid) + return Error(TypeLoc, "unsupported attribute in '.type' directive"); + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.type' directive"); + + Lex(); + + getStreamer().EmitSymbolAttribute(Sym, Attr); + + return false; +} + +/// ParseDirectiveComm +/// ::= ( .comm | .lcomm ) identifier , size_expression [ , align_expression ] +bool AsmParser::ParseDirectiveComm(bool IsLocal) { + SMLoc IDLoc = getLexer().getLoc(); + StringRef Name; + if (ParseIdentifier(Name)) + return TokError("expected identifier in directive"); + + // Handle the identifier as the key symbol. + MCSymbol *Sym = getContext().GetOrCreateSymbol(Name); + + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in directive"); + Lex(); + + int64_t Size; + SMLoc SizeLoc = getLexer().getLoc(); + if (ParseAbsoluteExpression(Size)) + return true; + + int64_t Pow2Alignment = 0; + SMLoc Pow2AlignmentLoc; + if (getLexer().is(AsmToken::Comma)) { + Lex(); + Pow2AlignmentLoc = getLexer().getLoc(); + if (ParseAbsoluteExpression(Pow2Alignment)) + return true; + + // If this target takes alignments in bytes (not log) validate and convert. + if (Lexer.getMAI().getAlignmentIsInBytes()) { + if (!isPowerOf2_64(Pow2Alignment)) + return Error(Pow2AlignmentLoc, "alignment must be a power of 2"); + Pow2Alignment = Log2_64(Pow2Alignment); + } + } + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.comm' or '.lcomm' directive"); + + Lex(); + + // NOTE: a size of zero for a .comm should create a undefined symbol + // but a size of .lcomm creates a bss symbol of size zero. + if (Size < 0) + return Error(SizeLoc, "invalid '.comm' or '.lcomm' directive size, can't " + "be less than zero"); + + // NOTE: The alignment in the directive is a power of 2 value, the assembler + // may internally end up wanting an alignment in bytes. + // FIXME: Diagnose overflow. + if (Pow2Alignment < 0) + return Error(Pow2AlignmentLoc, "invalid '.comm' or '.lcomm' directive " + "alignment, can't be less than zero"); + + if (!Sym->isUndefined()) + return Error(IDLoc, "invalid symbol redefinition"); + + // '.lcomm' is equivalent to '.zerofill'. + // Create the Symbol as a common or local common with Size and Pow2Alignment + if (IsLocal) { + getStreamer().EmitZerofill(Ctx.getMachOSection( + "__DATA", "__bss", MCSectionMachO::S_ZEROFILL, + 0, SectionKind::getBSS()), + Sym, Size, 1 << Pow2Alignment); + return false; + } + + getStreamer().EmitCommonSymbol(Sym, Size, 1 << Pow2Alignment); + return false; +} + +/// ParseDirectiveAbort +/// ::= .abort [ "abort_string" ] +bool AsmParser::ParseDirectiveAbort() { + // FIXME: Use loc from directive. + SMLoc Loc = getLexer().getLoc(); + + StringRef Str = ""; + if (getLexer().isNot(AsmToken::EndOfStatement)) { + if (getLexer().isNot(AsmToken::String)) + return TokError("expected string in '.abort' directive"); + + Str = getTok().getString(); + + Lex(); + } + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.abort' directive"); + + Lex(); + + // FIXME: Handle here. + if (Str.empty()) + Error(Loc, ".abort detected. Assembly stopping."); + else + Error(Loc, ".abort '" + Str + "' detected. Assembly stopping."); + + return false; +} + +/// ParseDirectiveInclude +/// ::= .include "filename" +bool AsmParser::ParseDirectiveInclude() { + if (getLexer().isNot(AsmToken::String)) + return TokError("expected string in '.include' directive"); + + std::string Filename = getTok().getString(); + SMLoc IncludeLoc = getLexer().getLoc(); + Lex(); + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.include' directive"); + + // Strip the quotes. + Filename = Filename.substr(1, Filename.size()-2); + + // Attempt to switch the lexer to the included file before consuming the end + // of statement to avoid losing it when we switch. + if (EnterIncludeFile(Filename)) { + PrintMessage(IncludeLoc, + "Could not find include file '" + Filename + "'", + "error"); + return true; + } + + return false; +} + +/// ParseDirectiveIf +/// ::= .if expression +bool AsmParser::ParseDirectiveIf(SMLoc DirectiveLoc) { + TheCondStack.push_back(TheCondState); + TheCondState.TheCond = AsmCond::IfCond; + if(TheCondState.Ignore) { + EatToEndOfStatement(); + } + else { + int64_t ExprValue; + if (ParseAbsoluteExpression(ExprValue)) + return true; + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.if' directive"); + + Lex(); + + TheCondState.CondMet = ExprValue; + TheCondState.Ignore = !TheCondState.CondMet; + } + + return false; +} + +/// ParseDirectiveElseIf +/// ::= .elseif expression +bool AsmParser::ParseDirectiveElseIf(SMLoc DirectiveLoc) { + if (TheCondState.TheCond != AsmCond::IfCond && + TheCondState.TheCond != AsmCond::ElseIfCond) + Error(DirectiveLoc, "Encountered a .elseif that doesn't follow a .if or " + " an .elseif"); + TheCondState.TheCond = AsmCond::ElseIfCond; + + bool LastIgnoreState = false; + if (!TheCondStack.empty()) + LastIgnoreState = TheCondStack.back().Ignore; + if (LastIgnoreState || TheCondState.CondMet) { + TheCondState.Ignore = true; + EatToEndOfStatement(); + } + else { + int64_t ExprValue; + if (ParseAbsoluteExpression(ExprValue)) + return true; + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.elseif' directive"); + + Lex(); + TheCondState.CondMet = ExprValue; + TheCondState.Ignore = !TheCondState.CondMet; + } + + return false; +} + +/// ParseDirectiveElse +/// ::= .else +bool AsmParser::ParseDirectiveElse(SMLoc DirectiveLoc) { + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.else' directive"); + + Lex(); + + if (TheCondState.TheCond != AsmCond::IfCond && + TheCondState.TheCond != AsmCond::ElseIfCond) + Error(DirectiveLoc, "Encountered a .else that doesn't follow a .if or an " + ".elseif"); + TheCondState.TheCond = AsmCond::ElseCond; + bool LastIgnoreState = false; + if (!TheCondStack.empty()) + LastIgnoreState = TheCondStack.back().Ignore; + if (LastIgnoreState || TheCondState.CondMet) + TheCondState.Ignore = true; + else + TheCondState.Ignore = false; + + return false; +} + +/// ParseDirectiveEndIf +/// ::= .endif +bool AsmParser::ParseDirectiveEndIf(SMLoc DirectiveLoc) { + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.endif' directive"); + + Lex(); + + if ((TheCondState.TheCond == AsmCond::NoCond) || + TheCondStack.empty()) + Error(DirectiveLoc, "Encountered a .endif that doesn't follow a .if or " + ".else"); + if (!TheCondStack.empty()) { + TheCondState = TheCondStack.back(); + TheCondStack.pop_back(); + } + + return false; +} + +/// ParseDirectiveFile +/// ::= .file [number] string +bool GenericAsmParser::ParseDirectiveFile(StringRef, SMLoc DirectiveLoc) { + // FIXME: I'm not sure what this is. + int64_t FileNumber = -1; + if (getLexer().is(AsmToken::Integer)) { + FileNumber = getTok().getIntVal(); + Lex(); + + if (FileNumber < 1) + return TokError("file number less than one"); + } + + if (getLexer().isNot(AsmToken::String)) + return TokError("unexpected token in '.file' directive"); + + StringRef Filename = getTok().getString(); + Filename = Filename.substr(1, Filename.size()-2); + Lex(); + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.file' directive"); + + if (FileNumber == -1) + getStreamer().EmitFileDirective(Filename); + else + getStreamer().EmitDwarfFileDirective(FileNumber, Filename); + + return false; +} + +/// ParseDirectiveLine +/// ::= .line [number] +bool GenericAsmParser::ParseDirectiveLine(StringRef, SMLoc DirectiveLoc) { + if (getLexer().isNot(AsmToken::EndOfStatement)) { + if (getLexer().isNot(AsmToken::Integer)) + return TokError("unexpected token in '.line' directive"); + + int64_t LineNumber = getTok().getIntVal(); + (void) LineNumber; + Lex(); + + // FIXME: Do something with the .line. + } + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.line' directive"); + + return false; +} + + +/// ParseDirectiveLoc +/// ::= .loc number [number [number]] +bool GenericAsmParser::ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc) { + if (getLexer().isNot(AsmToken::Integer)) + return TokError("unexpected token in '.loc' directive"); + + // FIXME: What are these fields? + int64_t FileNumber = getTok().getIntVal(); + (void) FileNumber; + // FIXME: Validate file. + + Lex(); + if (getLexer().isNot(AsmToken::EndOfStatement)) { + if (getLexer().isNot(AsmToken::Integer)) + return TokError("unexpected token in '.loc' directive"); + + int64_t Param2 = getTok().getIntVal(); + (void) Param2; + Lex(); + + if (getLexer().isNot(AsmToken::EndOfStatement)) { + if (getLexer().isNot(AsmToken::Integer)) + return TokError("unexpected token in '.loc' directive"); + + int64_t Param3 = getTok().getIntVal(); + (void) Param3; + Lex(); + + // FIXME: Do something with the .loc. + } + } + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.file' directive"); + + return false; +} + diff --git a/contrib/llvm/lib/MC/MCParser/CMakeLists.txt b/contrib/llvm/lib/MC/MCParser/CMakeLists.txt new file mode 100644 index 0000000..25a7bf4 --- /dev/null +++ b/contrib/llvm/lib/MC/MCParser/CMakeLists.txt @@ -0,0 +1,10 @@ +add_llvm_library(LLVMMCParser + AsmLexer.cpp + AsmParser.cpp + DarwinAsmParser.cpp + ELFAsmParser.cpp + MCAsmLexer.cpp + MCAsmParser.cpp + MCAsmParserExtension.cpp + TargetAsmParser.cpp + ) diff --git a/contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp b/contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp new file mode 100644 index 0000000..7d8639e --- /dev/null +++ b/contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp @@ -0,0 +1,758 @@ +//===- DarwinAsmParser.cpp - Darwin (Mach-O) Assembly Parser --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCParser/MCAsmParserExtension.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCParser/MCAsmLexer.h" +#include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SourceMgr.h" +using namespace llvm; + +namespace { + +/// \brief Implementation of directive handling which is shared across all +/// Darwin targets. +class DarwinAsmParser : public MCAsmParserExtension { + bool ParseSectionSwitch(const char *Segment, const char *Section, + unsigned TAA = 0, unsigned ImplicitAlign = 0, + unsigned StubSize = 0); + +public: + DarwinAsmParser() {} + + virtual void Initialize(MCAsmParser &Parser) { + // Call the base implementation. + this->MCAsmParserExtension::Initialize(Parser); + + Parser.AddDirectiveHandler(this, ".desc", MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseDirectiveDesc)); + Parser.AddDirectiveHandler(this, ".lsym", MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseDirectiveLsym)); + Parser.AddDirectiveHandler(this, ".subsections_via_symbols", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseDirectiveSubsectionsViaSymbols)); + Parser.AddDirectiveHandler(this, ".dump", MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseDirectiveDumpOrLoad)); + Parser.AddDirectiveHandler(this, ".load", MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseDirectiveDumpOrLoad)); + Parser.AddDirectiveHandler(this, ".section", MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseDirectiveSection)); + Parser.AddDirectiveHandler(this, ".secure_log_unique", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseDirectiveSecureLogUnique)); + Parser.AddDirectiveHandler(this, ".secure_log_reset", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseDirectiveSecureLogReset)); + Parser.AddDirectiveHandler(this, ".tbss", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseDirectiveTBSS)); + Parser.AddDirectiveHandler(this, ".zerofill", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseDirectiveZerofill)); + + // Special section directives. + Parser.AddDirectiveHandler(this, ".const", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveConst)); + Parser.AddDirectiveHandler(this, ".const_data", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveConstData)); + Parser.AddDirectiveHandler(this, ".constructor", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveConstructor)); + Parser.AddDirectiveHandler(this, ".cstring", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveCString)); + Parser.AddDirectiveHandler(this, ".data", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveData)); + Parser.AddDirectiveHandler(this, ".destructor", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveDestructor)); + Parser.AddDirectiveHandler(this, ".dyld", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveDyld)); + Parser.AddDirectiveHandler(this, ".fvmlib_init0", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveFVMLibInit0)); + Parser.AddDirectiveHandler(this, ".fvmlib_init1", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveFVMLibInit1)); + Parser.AddDirectiveHandler(this, ".lazy_symbol_pointer", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveLazySymbolPointers)); + Parser.AddDirectiveHandler(this, ".literal16", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveLiteral16)); + Parser.AddDirectiveHandler(this, ".literal4", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveLiteral4)); + Parser.AddDirectiveHandler(this, ".literal8", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveLiteral8)); + Parser.AddDirectiveHandler(this, ".mod_init_func", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveModInitFunc)); + Parser.AddDirectiveHandler(this, ".mod_term_func", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveModTermFunc)); + Parser.AddDirectiveHandler(this, ".non_lazy_symbol_pointer", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveNonLazySymbolPointers)); + Parser.AddDirectiveHandler(this, ".objc_cat_cls_meth", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveObjCCatClsMeth)); + Parser.AddDirectiveHandler(this, ".objc_cat_inst_meth", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveObjCCatInstMeth)); + Parser.AddDirectiveHandler(this, ".objc_category", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveObjCCategory)); + Parser.AddDirectiveHandler(this, ".objc_class", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveObjCClass)); + Parser.AddDirectiveHandler(this, ".objc_class_names", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveObjCClassNames)); + Parser.AddDirectiveHandler(this, ".objc_class_vars", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveObjCClassVars)); + Parser.AddDirectiveHandler(this, ".objc_cls_meth", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveObjCClsMeth)); + Parser.AddDirectiveHandler(this, ".objc_cls_refs", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveObjCClsRefs)); + Parser.AddDirectiveHandler(this, ".objc_inst_meth", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveObjCInstMeth)); + Parser.AddDirectiveHandler(this, ".objc_instance_vars", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveObjCInstanceVars)); + Parser.AddDirectiveHandler(this, ".objc_message_refs", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveObjCMessageRefs)); + Parser.AddDirectiveHandler(this, ".objc_meta_class", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveObjCMetaClass)); + Parser.AddDirectiveHandler(this, ".objc_meth_var_names", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveObjCMethVarNames)); + Parser.AddDirectiveHandler(this, ".objc_meth_var_types", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveObjCMethVarTypes)); + Parser.AddDirectiveHandler(this, ".objc_module_info", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveObjCModuleInfo)); + Parser.AddDirectiveHandler(this, ".objc_protocol", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveObjCProtocol)); + Parser.AddDirectiveHandler(this, ".objc_selector_strs", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveObjCSelectorStrs)); + Parser.AddDirectiveHandler(this, ".objc_string_object", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveObjCStringObject)); + Parser.AddDirectiveHandler(this, ".objc_symbols", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveObjCSymbols)); + Parser.AddDirectiveHandler(this, ".picsymbol_stub", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectivePICSymbolStub)); + Parser.AddDirectiveHandler(this, ".static_const", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveStaticConst)); + Parser.AddDirectiveHandler(this, ".static_data", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveStaticData)); + Parser.AddDirectiveHandler(this, ".symbol_stub", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveSymbolStub)); + Parser.AddDirectiveHandler(this, ".tdata", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveTData)); + Parser.AddDirectiveHandler(this, ".text", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveText)); + Parser.AddDirectiveHandler(this, ".thread_init_func", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveThreadInitFunc)); + Parser.AddDirectiveHandler(this, ".tlv", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveTLV)); + } + + bool ParseDirectiveDesc(StringRef, SMLoc); + bool ParseDirectiveDumpOrLoad(StringRef, SMLoc); + bool ParseDirectiveLsym(StringRef, SMLoc); + bool ParseDirectiveSection(); + bool ParseDirectiveSecureLogReset(StringRef, SMLoc); + bool ParseDirectiveSecureLogUnique(StringRef, SMLoc); + bool ParseDirectiveSubsectionsViaSymbols(StringRef, SMLoc); + bool ParseDirectiveTBSS(StringRef, SMLoc); + bool ParseDirectiveZerofill(StringRef, SMLoc); + + // Named Section Directive + bool ParseSectionDirectiveConst(StringRef, SMLoc) { + return ParseSectionSwitch("__TEXT", "__const"); + } + bool ParseSectionDirectiveStaticConst(StringRef, SMLoc) { + return ParseSectionSwitch("__TEXT", "__static_const"); + } + bool ParseSectionDirectiveCString(StringRef, SMLoc) { + return ParseSectionSwitch("__TEXT","__cstring", + MCSectionMachO::S_CSTRING_LITERALS); + } + bool ParseSectionDirectiveLiteral4(StringRef, SMLoc) { + return ParseSectionSwitch("__TEXT", "__literal4", + MCSectionMachO::S_4BYTE_LITERALS, 4); + } + bool ParseSectionDirectiveLiteral8(StringRef, SMLoc) { + return ParseSectionSwitch("__TEXT", "__literal8", + MCSectionMachO::S_8BYTE_LITERALS, 8); + } + bool ParseSectionDirectiveLiteral16(StringRef, SMLoc) { + return ParseSectionSwitch("__TEXT","__literal16", + MCSectionMachO::S_16BYTE_LITERALS, 16); + } + bool ParseSectionDirectiveConstructor(StringRef, SMLoc) { + return ParseSectionSwitch("__TEXT","__constructor"); + } + bool ParseSectionDirectiveDestructor(StringRef, SMLoc) { + return ParseSectionSwitch("__TEXT","__destructor"); + } + bool ParseSectionDirectiveFVMLibInit0(StringRef, SMLoc) { + return ParseSectionSwitch("__TEXT","__fvmlib_init0"); + } + bool ParseSectionDirectiveFVMLibInit1(StringRef, SMLoc) { + return ParseSectionSwitch("__TEXT","__fvmlib_init1"); + } + bool ParseSectionDirectiveSymbolStub(StringRef, SMLoc) { + return ParseSectionSwitch("__TEXT","__symbol_stub", + MCSectionMachO::S_SYMBOL_STUBS | + MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, + // FIXME: Different on PPC and ARM. + 0, 16); + } + bool ParseSectionDirectivePICSymbolStub(StringRef, SMLoc) { + return ParseSectionSwitch("__TEXT","__picsymbol_stub", + MCSectionMachO::S_SYMBOL_STUBS | + MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, 0, 26); + } + bool ParseSectionDirectiveData(StringRef, SMLoc) { + return ParseSectionSwitch("__DATA", "__data"); + } + bool ParseSectionDirectiveStaticData(StringRef, SMLoc) { + return ParseSectionSwitch("__DATA", "__static_data"); + } + bool ParseSectionDirectiveNonLazySymbolPointers(StringRef, SMLoc) { + return ParseSectionSwitch("__DATA", "__nl_symbol_ptr", + MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS, 4); + } + bool ParseSectionDirectiveLazySymbolPointers(StringRef, SMLoc) { + return ParseSectionSwitch("__DATA", "__la_symbol_ptr", + MCSectionMachO::S_LAZY_SYMBOL_POINTERS, 4); + } + bool ParseSectionDirectiveDyld(StringRef, SMLoc) { + return ParseSectionSwitch("__DATA", "__dyld"); + } + bool ParseSectionDirectiveModInitFunc(StringRef, SMLoc) { + return ParseSectionSwitch("__DATA", "__mod_init_func", + MCSectionMachO::S_MOD_INIT_FUNC_POINTERS, 4); + } + bool ParseSectionDirectiveModTermFunc(StringRef, SMLoc) { + return ParseSectionSwitch("__DATA", "__mod_term_func", + MCSectionMachO::S_MOD_TERM_FUNC_POINTERS, 4); + } + bool ParseSectionDirectiveConstData(StringRef, SMLoc) { + return ParseSectionSwitch("__DATA", "__const"); + } + bool ParseSectionDirectiveObjCClass(StringRef, SMLoc) { + return ParseSectionSwitch("__OBJC", "__class", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + } + bool ParseSectionDirectiveObjCMetaClass(StringRef, SMLoc) { + return ParseSectionSwitch("__OBJC", "__meta_class", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + } + bool ParseSectionDirectiveObjCCatClsMeth(StringRef, SMLoc) { + return ParseSectionSwitch("__OBJC", "__cat_cls_meth", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + } + bool ParseSectionDirectiveObjCCatInstMeth(StringRef, SMLoc) { + return ParseSectionSwitch("__OBJC", "__cat_inst_meth", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + } + bool ParseSectionDirectiveObjCProtocol(StringRef, SMLoc) { + return ParseSectionSwitch("__OBJC", "__protocol", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + } + bool ParseSectionDirectiveObjCStringObject(StringRef, SMLoc) { + return ParseSectionSwitch("__OBJC", "__string_object", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + } + bool ParseSectionDirectiveObjCClsMeth(StringRef, SMLoc) { + return ParseSectionSwitch("__OBJC", "__cls_meth", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + } + bool ParseSectionDirectiveObjCInstMeth(StringRef, SMLoc) { + return ParseSectionSwitch("__OBJC", "__inst_meth", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + } + bool ParseSectionDirectiveObjCClsRefs(StringRef, SMLoc) { + return ParseSectionSwitch("__OBJC", "__cls_refs", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP | + MCSectionMachO::S_LITERAL_POINTERS, 4); + } + bool ParseSectionDirectiveObjCMessageRefs(StringRef, SMLoc) { + return ParseSectionSwitch("__OBJC", "__message_refs", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP | + MCSectionMachO::S_LITERAL_POINTERS, 4); + } + bool ParseSectionDirectiveObjCSymbols(StringRef, SMLoc) { + return ParseSectionSwitch("__OBJC", "__symbols", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + } + bool ParseSectionDirectiveObjCCategory(StringRef, SMLoc) { + return ParseSectionSwitch("__OBJC", "__category", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + } + bool ParseSectionDirectiveObjCClassVars(StringRef, SMLoc) { + return ParseSectionSwitch("__OBJC", "__class_vars", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + } + bool ParseSectionDirectiveObjCInstanceVars(StringRef, SMLoc) { + return ParseSectionSwitch("__OBJC", "__instance_vars", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + } + bool ParseSectionDirectiveObjCModuleInfo(StringRef, SMLoc) { + return ParseSectionSwitch("__OBJC", "__module_info", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + } + bool ParseSectionDirectiveObjCClassNames(StringRef, SMLoc) { + return ParseSectionSwitch("__TEXT", "__cstring", + MCSectionMachO::S_CSTRING_LITERALS); + } + bool ParseSectionDirectiveObjCMethVarTypes(StringRef, SMLoc) { + return ParseSectionSwitch("__TEXT", "__cstring", + MCSectionMachO::S_CSTRING_LITERALS); + } + bool ParseSectionDirectiveObjCMethVarNames(StringRef, SMLoc) { + return ParseSectionSwitch("__TEXT", "__cstring", + MCSectionMachO::S_CSTRING_LITERALS); + } + bool ParseSectionDirectiveObjCSelectorStrs(StringRef, SMLoc) { + return ParseSectionSwitch("__OBJC", "__selector_strs", + MCSectionMachO::S_CSTRING_LITERALS); + } + bool ParseSectionDirectiveTData(StringRef, SMLoc) { + return ParseSectionSwitch("__DATA", "__thread_data", + MCSectionMachO::S_THREAD_LOCAL_REGULAR); + } + bool ParseSectionDirectiveText(StringRef, SMLoc) { + return ParseSectionSwitch("__TEXT", "__text", + MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS); + } + bool ParseSectionDirectiveTLV(StringRef, SMLoc) { + return ParseSectionSwitch("__DATA", "__thread_vars", + MCSectionMachO::S_THREAD_LOCAL_VARIABLES); + } + bool ParseSectionDirectiveThreadInitFunc(StringRef, SMLoc) { + return ParseSectionSwitch("__DATA", "__thread_init", + MCSectionMachO::S_THREAD_LOCAL_INIT_FUNCTION_POINTERS); + } + +}; + +} + +bool DarwinAsmParser::ParseSectionSwitch(const char *Segment, + const char *Section, + unsigned TAA, unsigned Align, + unsigned StubSize) { + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in section switching directive"); + Lex(); + + // FIXME: Arch specific. + bool isText = StringRef(Segment) == "__TEXT"; // FIXME: Hack. + getStreamer().SwitchSection(getContext().getMachOSection( + Segment, Section, TAA, StubSize, + isText ? SectionKind::getText() + : SectionKind::getDataRel())); + + // Set the implicit alignment, if any. + // + // FIXME: This isn't really what 'as' does; I think it just uses the implicit + // alignment on the section (e.g., if one manually inserts bytes into the + // section, then just issueing the section switch directive will not realign + // the section. However, this is arguably more reasonable behavior, and there + // is no good reason for someone to intentionally emit incorrectly sized + // values into the implicitly aligned sections. + if (Align) + getStreamer().EmitValueToAlignment(Align, 0, 1, 0); + + return false; +} + +/// ParseDirectiveDesc +/// ::= .desc identifier , expression +bool DarwinAsmParser::ParseDirectiveDesc(StringRef, SMLoc) { + StringRef Name; + if (getParser().ParseIdentifier(Name)) + return TokError("expected identifier in directive"); + + // Handle the identifier as the key symbol. + MCSymbol *Sym = getContext().GetOrCreateSymbol(Name); + + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in '.desc' directive"); + Lex(); + + int64_t DescValue; + if (getParser().ParseAbsoluteExpression(DescValue)) + return true; + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.desc' directive"); + + Lex(); + + // Set the n_desc field of this Symbol to this DescValue + getStreamer().EmitSymbolDesc(Sym, DescValue); + + return false; +} + +/// ParseDirectiveDumpOrLoad +/// ::= ( .dump | .load ) "filename" +bool DarwinAsmParser::ParseDirectiveDumpOrLoad(StringRef Directive, + SMLoc IDLoc) { + bool IsDump = Directive == ".dump"; + if (getLexer().isNot(AsmToken::String)) + return TokError("expected string in '.dump' or '.load' directive"); + + Lex(); + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.dump' or '.load' directive"); + + Lex(); + + // FIXME: If/when .dump and .load are implemented they will be done in the + // the assembly parser and not have any need for an MCStreamer API. + if (IsDump) + Warning(IDLoc, "ignoring directive .dump for now"); + else + Warning(IDLoc, "ignoring directive .load for now"); + + return false; +} + +/// ParseDirectiveLsym +/// ::= .lsym identifier , expression +bool DarwinAsmParser::ParseDirectiveLsym(StringRef, SMLoc) { + StringRef Name; + if (getParser().ParseIdentifier(Name)) + return TokError("expected identifier in directive"); + + // Handle the identifier as the key symbol. + MCSymbol *Sym = getContext().GetOrCreateSymbol(Name); + + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in '.lsym' directive"); + Lex(); + + const MCExpr *Value; + if (getParser().ParseExpression(Value)) + return true; + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.lsym' directive"); + + Lex(); + + // We don't currently support this directive. + // + // FIXME: Diagnostic location! + (void) Sym; + return TokError("directive '.lsym' is unsupported"); +} + +/// ParseDirectiveSection: +/// ::= .section identifier (',' identifier)* +bool DarwinAsmParser::ParseDirectiveSection() { + SMLoc Loc = getLexer().getLoc(); + + StringRef SectionName; + if (getParser().ParseIdentifier(SectionName)) + return Error(Loc, "expected identifier after '.section' directive"); + + // Verify there is a following comma. + if (!getLexer().is(AsmToken::Comma)) + return TokError("unexpected token in '.section' directive"); + + std::string SectionSpec = SectionName; + SectionSpec += ","; + + // Add all the tokens until the end of the line, ParseSectionSpecifier will + // handle this. + StringRef EOL = getLexer().LexUntilEndOfStatement(); + SectionSpec.append(EOL.begin(), EOL.end()); + + Lex(); + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.section' directive"); + Lex(); + + + StringRef Segment, Section; + unsigned TAA, StubSize; + std::string ErrorStr = + MCSectionMachO::ParseSectionSpecifier(SectionSpec, Segment, Section, + TAA, StubSize); + + if (!ErrorStr.empty()) + return Error(Loc, ErrorStr.c_str()); + + // FIXME: Arch specific. + bool isText = Segment == "__TEXT"; // FIXME: Hack. + getStreamer().SwitchSection(getContext().getMachOSection( + Segment, Section, TAA, StubSize, + isText ? SectionKind::getText() + : SectionKind::getDataRel())); + return false; +} + +/// ParseDirectiveSecureLogUnique +/// ::= .secure_log_unique "log message" +bool DarwinAsmParser::ParseDirectiveSecureLogUnique(StringRef, SMLoc IDLoc) { + std::string LogMessage; + + if (getLexer().isNot(AsmToken::String)) + LogMessage = ""; + else{ + LogMessage = getTok().getString(); + Lex(); + } + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.secure_log_unique' directive"); + + if (getContext().getSecureLogUsed() != false) + return Error(IDLoc, ".secure_log_unique specified multiple times"); + + char *SecureLogFile = getContext().getSecureLogFile(); + if (SecureLogFile == NULL) + return Error(IDLoc, ".secure_log_unique used but AS_SECURE_LOG_FILE " + "environment variable unset."); + + raw_ostream *OS = getContext().getSecureLog(); + if (OS == NULL) { + std::string Err; + OS = new raw_fd_ostream(SecureLogFile, Err, raw_fd_ostream::F_Append); + if (!Err.empty()) { + delete OS; + return Error(IDLoc, Twine("can't open secure log file: ") + + SecureLogFile + " (" + Err + ")"); + } + getContext().setSecureLog(OS); + } + + int CurBuf = getSourceManager().FindBufferContainingLoc(IDLoc); + *OS << getSourceManager().getBufferInfo(CurBuf).Buffer->getBufferIdentifier() + << ":" << getSourceManager().FindLineNumber(IDLoc, CurBuf) << ":" + << LogMessage + "\n"; + + getContext().setSecureLogUsed(true); + + return false; +} + +/// ParseDirectiveSecureLogReset +/// ::= .secure_log_reset +bool DarwinAsmParser::ParseDirectiveSecureLogReset(StringRef, SMLoc IDLoc) { + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.secure_log_reset' directive"); + + Lex(); + + getContext().setSecureLogUsed(false); + + return false; +} + +/// ParseDirectiveSubsectionsViaSymbols +/// ::= .subsections_via_symbols +bool DarwinAsmParser::ParseDirectiveSubsectionsViaSymbols(StringRef, SMLoc) { + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.subsections_via_symbols' directive"); + + Lex(); + + getStreamer().EmitAssemblerFlag(MCAF_SubsectionsViaSymbols); + + return false; +} + +/// ParseDirectiveTBSS +/// ::= .tbss identifier, size, align +bool DarwinAsmParser::ParseDirectiveTBSS(StringRef, SMLoc) { + SMLoc IDLoc = getLexer().getLoc(); + StringRef Name; + if (getParser().ParseIdentifier(Name)) + return TokError("expected identifier in directive"); + + // Handle the identifier as the key symbol. + MCSymbol *Sym = getContext().GetOrCreateSymbol(Name); + + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in directive"); + Lex(); + + int64_t Size; + SMLoc SizeLoc = getLexer().getLoc(); + if (getParser().ParseAbsoluteExpression(Size)) + return true; + + int64_t Pow2Alignment = 0; + SMLoc Pow2AlignmentLoc; + if (getLexer().is(AsmToken::Comma)) { + Lex(); + Pow2AlignmentLoc = getLexer().getLoc(); + if (getParser().ParseAbsoluteExpression(Pow2Alignment)) + return true; + } + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.tbss' directive"); + + Lex(); + + if (Size < 0) + return Error(SizeLoc, "invalid '.tbss' directive size, can't be less than" + "zero"); + + // FIXME: Diagnose overflow. + if (Pow2Alignment < 0) + return Error(Pow2AlignmentLoc, "invalid '.tbss' alignment, can't be less" + "than zero"); + + if (!Sym->isUndefined()) + return Error(IDLoc, "invalid symbol redefinition"); + + getStreamer().EmitTBSSSymbol(getContext().getMachOSection( + "__DATA", "__thread_bss", + MCSectionMachO::S_THREAD_LOCAL_ZEROFILL, + 0, SectionKind::getThreadBSS()), + Sym, Size, 1 << Pow2Alignment); + + return false; +} + +/// ParseDirectiveZerofill +/// ::= .zerofill segname , sectname [, identifier , size_expression [ +/// , align_expression ]] +bool DarwinAsmParser::ParseDirectiveZerofill(StringRef, SMLoc) { + StringRef Segment; + if (getParser().ParseIdentifier(Segment)) + return TokError("expected segment name after '.zerofill' directive"); + + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in directive"); + Lex(); + + StringRef Section; + if (getParser().ParseIdentifier(Section)) + return TokError("expected section name after comma in '.zerofill' " + "directive"); + + // If this is the end of the line all that was wanted was to create the + // the section but with no symbol. + if (getLexer().is(AsmToken::EndOfStatement)) { + // Create the zerofill section but no symbol + getStreamer().EmitZerofill(getContext().getMachOSection( + Segment, Section, MCSectionMachO::S_ZEROFILL, + 0, SectionKind::getBSS())); + return false; + } + + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in directive"); + Lex(); + + SMLoc IDLoc = getLexer().getLoc(); + StringRef IDStr; + if (getParser().ParseIdentifier(IDStr)) + return TokError("expected identifier in directive"); + + // handle the identifier as the key symbol. + MCSymbol *Sym = getContext().GetOrCreateSymbol(IDStr); + + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in directive"); + Lex(); + + int64_t Size; + SMLoc SizeLoc = getLexer().getLoc(); + if (getParser().ParseAbsoluteExpression(Size)) + return true; + + int64_t Pow2Alignment = 0; + SMLoc Pow2AlignmentLoc; + if (getLexer().is(AsmToken::Comma)) { + Lex(); + Pow2AlignmentLoc = getLexer().getLoc(); + if (getParser().ParseAbsoluteExpression(Pow2Alignment)) + return true; + } + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.zerofill' directive"); + + Lex(); + + if (Size < 0) + return Error(SizeLoc, "invalid '.zerofill' directive size, can't be less " + "than zero"); + + // NOTE: The alignment in the directive is a power of 2 value, the assembler + // may internally end up wanting an alignment in bytes. + // FIXME: Diagnose overflow. + if (Pow2Alignment < 0) + return Error(Pow2AlignmentLoc, "invalid '.zerofill' directive alignment, " + "can't be less than zero"); + + if (!Sym->isUndefined()) + return Error(IDLoc, "invalid symbol redefinition"); + + // Create the zerofill Symbol with Size and Pow2Alignment + // + // FIXME: Arch specific. + getStreamer().EmitZerofill(getContext().getMachOSection( + Segment, Section, MCSectionMachO::S_ZEROFILL, + 0, SectionKind::getBSS()), + Sym, Size, 1 << Pow2Alignment); + + return false; +} + +namespace llvm { + +MCAsmParserExtension *createDarwinAsmParser() { + return new DarwinAsmParser; +} + +} diff --git a/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp new file mode 100644 index 0000000..7a54dd3 --- /dev/null +++ b/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp @@ -0,0 +1,68 @@ +//===- ELFAsmParser.cpp - ELF Assembly Parser -----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCParser/MCAsmParserExtension.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCParser/MCAsmLexer.h" +using namespace llvm; + +namespace { + +class ELFAsmParser : public MCAsmParserExtension { + bool ParseSectionSwitch(StringRef Section, unsigned Type, + unsigned Flags, SectionKind Kind); + +public: + ELFAsmParser() {} + + virtual void Initialize(MCAsmParser &Parser) { + // Call the base implementation. + this->MCAsmParserExtension::Initialize(Parser); + + Parser.AddDirectiveHandler(this, ".data", MCAsmParser::DirectiveHandler( + &ELFAsmParser::ParseSectionDirectiveData)); + Parser.AddDirectiveHandler(this, ".text", MCAsmParser::DirectiveHandler( + &ELFAsmParser::ParseSectionDirectiveText)); + } + + bool ParseSectionDirectiveData(StringRef, SMLoc) { + return ParseSectionSwitch(".data", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_WRITE |MCSectionELF::SHF_ALLOC, + SectionKind::getDataRel()); + } + bool ParseSectionDirectiveText(StringRef, SMLoc) { + return ParseSectionSwitch(".text", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_EXECINSTR | + MCSectionELF::SHF_ALLOC, SectionKind::getText()); + } +}; + +} + +bool ELFAsmParser::ParseSectionSwitch(StringRef Section, unsigned Type, + unsigned Flags, SectionKind Kind) { + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in section switching directive"); + Lex(); + + getStreamer().SwitchSection(getContext().getELFSection( + Section, Type, Flags, Kind)); + + return false; +} + +namespace llvm { + +MCAsmParserExtension *createELFAsmParser() { + return new ELFAsmParser; +} + +} diff --git a/contrib/llvm/lib/MC/MCParser/MCAsmLexer.cpp b/contrib/llvm/lib/MC/MCParser/MCAsmLexer.cpp new file mode 100644 index 0000000..dceece7 --- /dev/null +++ b/contrib/llvm/lib/MC/MCParser/MCAsmLexer.cpp @@ -0,0 +1,27 @@ +//===-- MCAsmLexer.cpp - Abstract Asm Lexer Interface ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCParser/MCAsmLexer.h" +#include "llvm/Support/SourceMgr.h" + +using namespace llvm; + +MCAsmLexer::MCAsmLexer() : CurTok(AsmToken::Error, StringRef()), TokStart(0) { +} + +MCAsmLexer::~MCAsmLexer() { +} + +SMLoc MCAsmLexer::getLoc() const { + return SMLoc::getFromPointer(TokStart); +} + +SMLoc AsmToken::getLoc() const { + return SMLoc::getFromPointer(Str.data()); +} diff --git a/contrib/llvm/lib/MC/MCParser/MCAsmParser.cpp b/contrib/llvm/lib/MC/MCParser/MCAsmParser.cpp new file mode 100644 index 0000000..bee3064 --- /dev/null +++ b/contrib/llvm/lib/MC/MCParser/MCAsmParser.cpp @@ -0,0 +1,41 @@ +//===-- MCAsmParser.cpp - Abstract Asm Parser Interface -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/ADT/Twine.h" +#include "llvm/MC/MCParser/MCAsmLexer.h" +#include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/Support/SourceMgr.h" +using namespace llvm; + +MCAsmParser::MCAsmParser() { +} + +MCAsmParser::~MCAsmParser() { +} + +const AsmToken &MCAsmParser::getTok() { + return getLexer().getTok(); +} + +bool MCAsmParser::TokError(const char *Msg) { + Error(getLexer().getLoc(), Msg); + return true; +} + +bool MCAsmParser::ParseExpression(const MCExpr *&Res) { + SMLoc L; + return ParseExpression(Res, L); +} + +/// getStartLoc - Get the location of the first token of this operand. +SMLoc MCParsedAsmOperand::getStartLoc() const { return SMLoc(); } +SMLoc MCParsedAsmOperand::getEndLoc() const { return SMLoc(); } + + diff --git a/contrib/llvm/lib/MC/MCParser/MCAsmParserExtension.cpp b/contrib/llvm/lib/MC/MCParser/MCAsmParserExtension.cpp new file mode 100644 index 0000000..c30d306 --- /dev/null +++ b/contrib/llvm/lib/MC/MCParser/MCAsmParserExtension.cpp @@ -0,0 +1,21 @@ +//===-- MCAsmParserExtension.cpp - Asm Parser Hooks -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCParser/MCAsmParserExtension.h" +using namespace llvm; + +MCAsmParserExtension::MCAsmParserExtension() { +} + +MCAsmParserExtension::~MCAsmParserExtension() { +} + +void MCAsmParserExtension::Initialize(MCAsmParser &Parser) { + this->Parser = &Parser; +} diff --git a/contrib/llvm/lib/MC/MCParser/Makefile b/contrib/llvm/lib/MC/MCParser/Makefile new file mode 100644 index 0000000..4477757 --- /dev/null +++ b/contrib/llvm/lib/MC/MCParser/Makefile @@ -0,0 +1,15 @@ +##===- lib/MC/MCParser/Makefile ----------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../.. +LIBRARYNAME = LLVMMCParser +BUILD_ARCHIVE := 1 + +include $(LEVEL)/Makefile.common + diff --git a/contrib/llvm/lib/MC/MCParser/TargetAsmParser.cpp b/contrib/llvm/lib/MC/MCParser/TargetAsmParser.cpp new file mode 100644 index 0000000..05760c9 --- /dev/null +++ b/contrib/llvm/lib/MC/MCParser/TargetAsmParser.cpp @@ -0,0 +1,19 @@ +//===-- TargetAsmParser.cpp - Target Assembly Parser -----------------------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Target/TargetAsmParser.h" +using namespace llvm; + +TargetAsmParser::TargetAsmParser(const Target &T) + : TheTarget(T) +{ +} + +TargetAsmParser::~TargetAsmParser() { +} diff --git a/contrib/llvm/lib/MC/MCSection.cpp b/contrib/llvm/lib/MC/MCSection.cpp new file mode 100644 index 0000000..a792d56 --- /dev/null +++ b/contrib/llvm/lib/MC/MCSection.cpp @@ -0,0 +1,22 @@ +//===- lib/MC/MCSection.cpp - Machine Code Section Representation ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// MCSection +//===----------------------------------------------------------------------===// + +MCSection::~MCSection() { +} + diff --git a/contrib/llvm/lib/MC/MCSectionCOFF.cpp b/contrib/llvm/lib/MC/MCSectionCOFF.cpp new file mode 100644 index 0000000..eb53160 --- /dev/null +++ b/contrib/llvm/lib/MC/MCSectionCOFF.cpp @@ -0,0 +1,76 @@ +//===- lib/MC/MCSectionCOFF.cpp - COFF Code Section Representation --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCSectionCOFF.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +MCSectionCOFF::~MCSectionCOFF() {} // anchor. + +// ShouldOmitSectionDirective - Decides whether a '.section' directive +// should be printed before the section name +bool MCSectionCOFF::ShouldOmitSectionDirective(StringRef Name, + const MCAsmInfo &MAI) const { + + // FIXME: Does .section .bss/.data/.text work everywhere?? + if (Name == ".text" || Name == ".data" || Name == ".bss") + return true; + + return false; +} + +void MCSectionCOFF::PrintSwitchToSection(const MCAsmInfo &MAI, + raw_ostream &OS) const { + + // standard sections don't require the '.section' + if (ShouldOmitSectionDirective(SectionName, MAI)) { + OS << '\t' << getSectionName() << '\n'; + return; + } + + OS << "\t.section\t" << getSectionName() << ",\""; + if (getKind().isText()) + OS << 'x'; + if (getKind().isWriteable()) + OS << 'w'; + else + OS << 'r'; + if (getCharacteristics() & COFF::IMAGE_SCN_MEM_DISCARDABLE) + OS << 'n'; + OS << "\"\n"; + + if (getCharacteristics() & COFF::IMAGE_SCN_LNK_COMDAT) { + switch (Selection) { + case COFF::IMAGE_COMDAT_SELECT_NODUPLICATES: + OS << "\t.linkonce one_only\n"; + break; + case COFF::IMAGE_COMDAT_SELECT_ANY: + OS << "\t.linkonce discard\n"; + break; + case COFF::IMAGE_COMDAT_SELECT_SAME_SIZE: + OS << "\t.linkonce same_size\n"; + break; + case COFF::IMAGE_COMDAT_SELECT_EXACT_MATCH: + OS << "\t.linkonce same_contents\n"; + break; + //NOTE: as of binutils 2.20, there is no way to specifiy select largest + // with the .linkonce directive. For now, we treat it as an invalid + // comdat selection value. + case COFF::IMAGE_COMDAT_SELECT_LARGEST: + // OS << "\t.linkonce largest\n"; + // break; + default: + assert (0 && "unsupported COFF selection type"); + break; + } + } +} diff --git a/contrib/llvm/lib/MC/MCSectionELF.cpp b/contrib/llvm/lib/MC/MCSectionELF.cpp new file mode 100644 index 0000000..a7599de --- /dev/null +++ b/contrib/llvm/lib/MC/MCSectionELF.cpp @@ -0,0 +1,135 @@ +//===- lib/MC/MCSectionELF.cpp - ELF Code Section Representation ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +MCSectionELF::~MCSectionELF() {} // anchor. + +// ShouldOmitSectionDirective - Decides whether a '.section' directive +// should be printed before the section name +bool MCSectionELF::ShouldOmitSectionDirective(StringRef Name, + const MCAsmInfo &MAI) const { + + // FIXME: Does .section .bss/.data/.text work everywhere?? + if (Name == ".text" || Name == ".data" || + (Name == ".bss" && !MAI.usesELFSectionDirectiveForBSS())) + return true; + + return false; +} + +// ShouldPrintSectionType - Only prints the section type if supported +bool MCSectionELF::ShouldPrintSectionType(unsigned Ty) const { + if (IsExplicit && !(Ty == SHT_NOBITS || Ty == SHT_PROGBITS)) + return false; + + return true; +} + +void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI, + raw_ostream &OS) const { + + if (ShouldOmitSectionDirective(SectionName, MAI)) { + OS << '\t' << getSectionName() << '\n'; + return; + } + + OS << "\t.section\t" << getSectionName(); + + // Handle the weird solaris syntax if desired. + if (MAI.usesSunStyleELFSectionSwitchSyntax() && + !(Flags & MCSectionELF::SHF_MERGE)) { + if (Flags & MCSectionELF::SHF_ALLOC) + OS << ",#alloc"; + if (Flags & MCSectionELF::SHF_EXECINSTR) + OS << ",#execinstr"; + if (Flags & MCSectionELF::SHF_WRITE) + OS << ",#write"; + if (Flags & MCSectionELF::SHF_TLS) + OS << ",#tls"; + OS << '\n'; + return; + } + + OS << ",\""; + if (Flags & MCSectionELF::SHF_ALLOC) + OS << 'a'; + if (Flags & MCSectionELF::SHF_EXECINSTR) + OS << 'x'; + if (Flags & MCSectionELF::SHF_WRITE) + OS << 'w'; + if (Flags & MCSectionELF::SHF_MERGE) + OS << 'M'; + if (Flags & MCSectionELF::SHF_STRINGS) + OS << 'S'; + if (Flags & MCSectionELF::SHF_TLS) + OS << 'T'; + + // If there are target-specific flags, print them. + if (Flags & MCSectionELF::XCORE_SHF_CP_SECTION) + OS << 'c'; + if (Flags & MCSectionELF::XCORE_SHF_DP_SECTION) + OS << 'd'; + + OS << '"'; + + if (ShouldPrintSectionType(Type)) { + OS << ','; + + // If comment string is '@', e.g. as on ARM - use '%' instead + if (MAI.getCommentString()[0] == '@') + OS << '%'; + else + OS << '@'; + + if (Type == MCSectionELF::SHT_INIT_ARRAY) + OS << "init_array"; + else if (Type == MCSectionELF::SHT_FINI_ARRAY) + OS << "fini_array"; + else if (Type == MCSectionELF::SHT_PREINIT_ARRAY) + OS << "preinit_array"; + else if (Type == MCSectionELF::SHT_NOBITS) + OS << "nobits"; + else if (Type == MCSectionELF::SHT_PROGBITS) + OS << "progbits"; + + if (getKind().isMergeable1ByteCString()) { + OS << ",1"; + } else if (getKind().isMergeable2ByteCString()) { + OS << ",2"; + } else if (getKind().isMergeable4ByteCString() || + getKind().isMergeableConst4()) { + OS << ",4"; + } else if (getKind().isMergeableConst8()) { + OS << ",8"; + } else if (getKind().isMergeableConst16()) { + OS << ",16"; + } + } + + OS << '\n'; +} + +// HasCommonSymbols - True if this section holds common symbols, this is +// indicated on the ELF object file by a symbol with SHN_COMMON section +// header index. +bool MCSectionELF::HasCommonSymbols() const { + + if (StringRef(SectionName).startswith(".gnu.linkonce.")) + return true; + + return false; +} + + diff --git a/contrib/llvm/lib/MC/MCSectionMachO.cpp b/contrib/llvm/lib/MC/MCSectionMachO.cpp new file mode 100644 index 0000000..ded3b20 --- /dev/null +++ b/contrib/llvm/lib/MC/MCSectionMachO.cpp @@ -0,0 +1,286 @@ +//===- lib/MC/MCSectionMachO.cpp - MachO Code Section Representation ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCContext.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +/// SectionTypeDescriptors - These are strings that describe the various section +/// types. This *must* be kept in order with and stay synchronized with the +/// section type list. +static const struct { + const char *AssemblerName, *EnumName; +} SectionTypeDescriptors[MCSectionMachO::LAST_KNOWN_SECTION_TYPE+1] = { + { "regular", "S_REGULAR" }, // 0x00 + { 0, "S_ZEROFILL" }, // 0x01 + { "cstring_literals", "S_CSTRING_LITERALS" }, // 0x02 + { "4byte_literals", "S_4BYTE_LITERALS" }, // 0x03 + { "8byte_literals", "S_8BYTE_LITERALS" }, // 0x04 + { "literal_pointers", "S_LITERAL_POINTERS" }, // 0x05 + { "non_lazy_symbol_pointers", "S_NON_LAZY_SYMBOL_POINTERS" }, // 0x06 + { "lazy_symbol_pointers", "S_LAZY_SYMBOL_POINTERS" }, // 0x07 + { "symbol_stubs", "S_SYMBOL_STUBS" }, // 0x08 + { "mod_init_funcs", "S_MOD_INIT_FUNC_POINTERS" }, // 0x09 + { "mod_term_funcs", "S_MOD_TERM_FUNC_POINTERS" }, // 0x0A + { "coalesced", "S_COALESCED" }, // 0x0B + { 0, /*FIXME??*/ "S_GB_ZEROFILL" }, // 0x0C + { "interposing", "S_INTERPOSING" }, // 0x0D + { "16byte_literals", "S_16BYTE_LITERALS" }, // 0x0E + { 0, /*FIXME??*/ "S_DTRACE_DOF" }, // 0x0F + { 0, /*FIXME??*/ "S_LAZY_DYLIB_SYMBOL_POINTERS" }, // 0x10 + { "thread_local_regular", "S_THREAD_LOCAL_REGULAR" }, // 0x11 + { "thread_local_zerofill", "S_THREAD_LOCAL_ZEROFILL" }, // 0x12 + { "thread_local_variables", "S_THREAD_LOCAL_VARIABLES" }, // 0x13 + { "thread_local_variable_pointers", + "S_THREAD_LOCAL_VARIABLE_POINTERS" }, // 0x14 + { "thread_local_init_function_pointers", + "S_THREAD_LOCAL_INIT_FUNCTION_POINTERS"}, // 0x15 +}; + + +/// SectionAttrDescriptors - This is an array of descriptors for section +/// attributes. Unlike the SectionTypeDescriptors, this is not directly indexed +/// by attribute, instead it is searched. The last entry has an AttrFlagEnd +/// AttrFlag value. +static const struct { + unsigned AttrFlag; + const char *AssemblerName, *EnumName; +} SectionAttrDescriptors[] = { +#define ENTRY(ASMNAME, ENUM) \ + { MCSectionMachO::ENUM, ASMNAME, #ENUM }, +ENTRY("pure_instructions", S_ATTR_PURE_INSTRUCTIONS) +ENTRY("no_toc", S_ATTR_NO_TOC) +ENTRY("strip_static_syms", S_ATTR_STRIP_STATIC_SYMS) +ENTRY("no_dead_strip", S_ATTR_NO_DEAD_STRIP) +ENTRY("live_support", S_ATTR_LIVE_SUPPORT) +ENTRY("self_modifying_code", S_ATTR_SELF_MODIFYING_CODE) +ENTRY("debug", S_ATTR_DEBUG) +ENTRY(0 /*FIXME*/, S_ATTR_SOME_INSTRUCTIONS) +ENTRY(0 /*FIXME*/, S_ATTR_EXT_RELOC) +ENTRY(0 /*FIXME*/, S_ATTR_LOC_RELOC) +#undef ENTRY + { 0, "none", 0 }, // used if section has no attributes but has a stub size +#define AttrFlagEnd 0xffffffff // non legal value, multiple attribute bits set + { AttrFlagEnd, 0, 0 } +}; + +MCSectionMachO::MCSectionMachO(StringRef Segment, StringRef Section, + unsigned TAA, unsigned reserved2, SectionKind K) + : MCSection(SV_MachO, K), TypeAndAttributes(TAA), Reserved2(reserved2) { + assert(Segment.size() <= 16 && Section.size() <= 16 && + "Segment or section string too long"); + for (unsigned i = 0; i != 16; ++i) { + if (i < Segment.size()) + SegmentName[i] = Segment[i]; + else + SegmentName[i] = 0; + + if (i < Section.size()) + SectionName[i] = Section[i]; + else + SectionName[i] = 0; + } +} + +void MCSectionMachO::PrintSwitchToSection(const MCAsmInfo &MAI, + raw_ostream &OS) const { + OS << "\t.section\t" << getSegmentName() << ',' << getSectionName(); + + // Get the section type and attributes. + unsigned TAA = getTypeAndAttributes(); + if (TAA == 0) { + OS << '\n'; + return; + } + + OS << ','; + + unsigned SectionType = TAA & MCSectionMachO::SECTION_TYPE; + assert(SectionType <= MCSectionMachO::LAST_KNOWN_SECTION_TYPE && + "Invalid SectionType specified!"); + + if (SectionTypeDescriptors[SectionType].AssemblerName) + OS << SectionTypeDescriptors[SectionType].AssemblerName; + else + OS << "<<" << SectionTypeDescriptors[SectionType].EnumName << ">>"; + + // If we don't have any attributes, we're done. + unsigned SectionAttrs = TAA & MCSectionMachO::SECTION_ATTRIBUTES; + if (SectionAttrs == 0) { + // If we have a S_SYMBOL_STUBS size specified, print it along with 'none' as + // the attribute specifier. + if (Reserved2 != 0) + OS << ",none," << Reserved2; + OS << '\n'; + return; + } + + // Check each attribute to see if we have it. + char Separator = ','; + for (unsigned i = 0; SectionAttrDescriptors[i].AttrFlag; ++i) { + // Check to see if we have this attribute. + if ((SectionAttrDescriptors[i].AttrFlag & SectionAttrs) == 0) + continue; + + // Yep, clear it and print it. + SectionAttrs &= ~SectionAttrDescriptors[i].AttrFlag; + + OS << Separator; + if (SectionAttrDescriptors[i].AssemblerName) + OS << SectionAttrDescriptors[i].AssemblerName; + else + OS << "<<" << SectionAttrDescriptors[i].EnumName << ">>"; + Separator = '+'; + } + + assert(SectionAttrs == 0 && "Unknown section attributes!"); + + // If we have a S_SYMBOL_STUBS size specified, print it. + if (Reserved2 != 0) + OS << ',' << Reserved2; + OS << '\n'; +} + +/// StripSpaces - This removes leading and trailing spaces from the StringRef. +static void StripSpaces(StringRef &Str) { + while (!Str.empty() && isspace(Str[0])) + Str = Str.substr(1); + while (!Str.empty() && isspace(Str.back())) + Str = Str.substr(0, Str.size()-1); +} + +/// ParseSectionSpecifier - Parse the section specifier indicated by "Spec". +/// This is a string that can appear after a .section directive in a mach-o +/// flavored .s file. If successful, this fills in the specified Out +/// parameters and returns an empty string. When an invalid section +/// specifier is present, this returns a string indicating the problem. +std::string MCSectionMachO::ParseSectionSpecifier(StringRef Spec, // In. + StringRef &Segment, // Out. + StringRef &Section, // Out. + unsigned &TAA, // Out. + unsigned &StubSize) { // Out. + // Find the first comma. + std::pair<StringRef, StringRef> Comma = Spec.split(','); + + // If there is no comma, we fail. + if (Comma.second.empty()) + return "mach-o section specifier requires a segment and section " + "separated by a comma"; + + // Capture segment, remove leading and trailing whitespace. + Segment = Comma.first; + StripSpaces(Segment); + + // Verify that the segment is present and not too long. + if (Segment.empty() || Segment.size() > 16) + return "mach-o section specifier requires a segment whose length is " + "between 1 and 16 characters"; + + // Split the section name off from any attributes if present. + Comma = Comma.second.split(','); + + // Capture section, remove leading and trailing whitespace. + Section = Comma.first; + StripSpaces(Section); + + // Verify that the section is present and not too long. + if (Section.empty() || Section.size() > 16) + return "mach-o section specifier requires a section whose length is " + "between 1 and 16 characters"; + + // If there is no comma after the section, we're done. + TAA = 0; + StubSize = 0; + if (Comma.second.empty()) + return ""; + + // Otherwise, we need to parse the section type and attributes. + Comma = Comma.second.split(','); + + // Get the section type. + StringRef SectionType = Comma.first; + StripSpaces(SectionType); + + // Figure out which section type it is. + unsigned TypeID; + for (TypeID = 0; TypeID !=MCSectionMachO::LAST_KNOWN_SECTION_TYPE+1; ++TypeID) + if (SectionTypeDescriptors[TypeID].AssemblerName && + SectionType == SectionTypeDescriptors[TypeID].AssemblerName) + break; + + // If we didn't find the section type, reject it. + if (TypeID > MCSectionMachO::LAST_KNOWN_SECTION_TYPE) + return "mach-o section specifier uses an unknown section type"; + + // Remember the TypeID. + TAA = TypeID; + + // If we have no comma after the section type, there are no attributes. + if (Comma.second.empty()) { + // S_SYMBOL_STUBS always require a symbol stub size specifier. + if (TAA == MCSectionMachO::S_SYMBOL_STUBS) + return "mach-o section specifier of type 'symbol_stubs' requires a size " + "specifier"; + return ""; + } + + // Otherwise, we do have some attributes. Split off the size specifier if + // present. + Comma = Comma.second.split(','); + StringRef Attrs = Comma.first; + + // The attribute list is a '+' separated list of attributes. + std::pair<StringRef, StringRef> Plus = Attrs.split('+'); + + while (1) { + StringRef Attr = Plus.first; + StripSpaces(Attr); + + // Look up the attribute. + for (unsigned i = 0; ; ++i) { + if (SectionAttrDescriptors[i].AttrFlag == AttrFlagEnd) + return "mach-o section specifier has invalid attribute"; + + if (SectionAttrDescriptors[i].AssemblerName && + Attr == SectionAttrDescriptors[i].AssemblerName) { + TAA |= SectionAttrDescriptors[i].AttrFlag; + break; + } + } + + if (Plus.second.empty()) break; + Plus = Plus.second.split('+'); + }; + + // Okay, we've parsed the section attributes, see if we have a stub size spec. + if (Comma.second.empty()) { + // S_SYMBOL_STUBS always require a symbol stub size specifier. + if (TAA == MCSectionMachO::S_SYMBOL_STUBS) + return "mach-o section specifier of type 'symbol_stubs' requires a size " + "specifier"; + return ""; + } + + // If we have a stub size spec, we must have a sectiontype of S_SYMBOL_STUBS. + if ((TAA & MCSectionMachO::SECTION_TYPE) != MCSectionMachO::S_SYMBOL_STUBS) + return "mach-o section specifier cannot have a stub size specified because " + "it does not have type 'symbol_stubs'"; + + // Okay, if we do, it must be a number. + StringRef StubSizeStr = Comma.second; + StripSpaces(StubSizeStr); + + // Convert the stub size from a string to an integer. + if (StubSizeStr.getAsInteger(0, StubSize)) + return "mach-o section specifier has a malformed stub size"; + + return ""; +} + diff --git a/contrib/llvm/lib/MC/MCStreamer.cpp b/contrib/llvm/lib/MC/MCStreamer.cpp new file mode 100644 index 0000000..573f2a3 --- /dev/null +++ b/contrib/llvm/lib/MC/MCStreamer.cpp @@ -0,0 +1,64 @@ +//===- lib/MC/MCStreamer.cpp - Streaming Machine Code Output --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/Twine.h" +#include <cstdlib> +using namespace llvm; + +MCStreamer::MCStreamer(MCContext &_Context) : Context(_Context), CurSection(0) { +} + +MCStreamer::~MCStreamer() { +} + +raw_ostream &MCStreamer::GetCommentOS() { + // By default, discard comments. + return nulls(); +} + + +/// EmitIntValue - Special case of EmitValue that avoids the client having to +/// pass in a MCExpr for constant integers. +void MCStreamer::EmitIntValue(uint64_t Value, unsigned Size, + unsigned AddrSpace) { + EmitValue(MCConstantExpr::Create(Value, getContext()), Size, AddrSpace); +} + +void MCStreamer::EmitSymbolValue(const MCSymbol *Sym, unsigned Size, + unsigned AddrSpace) { + EmitValue(MCSymbolRefExpr::Create(Sym, getContext()), Size, AddrSpace); +} + +/// EmitFill - Emit NumBytes bytes worth of the value specified by +/// FillValue. This implements directives such as '.space'. +void MCStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue, + unsigned AddrSpace) { + const MCExpr *E = MCConstantExpr::Create(FillValue, getContext()); + for (uint64_t i = 0, e = NumBytes; i != e; ++i) + EmitValue(E, 1, AddrSpace); +} + +/// EmitRawText - If this file is backed by an assembly streamer, this dumps +/// the specified string in the output .s file. This capability is +/// indicated by the hasRawTextSupport() predicate. +void MCStreamer::EmitRawText(StringRef String) { + errs() << "EmitRawText called on an MCStreamer that doesn't support it, " + " something must not be fully mc'ized\n"; + abort(); +} + +void MCStreamer::EmitRawText(const Twine &T) { + SmallString<128> Str; + T.toVector(Str); + EmitRawText(Str.str()); +} diff --git a/contrib/llvm/lib/MC/MCSymbol.cpp b/contrib/llvm/lib/MC/MCSymbol.cpp new file mode 100644 index 0000000..07751f7 --- /dev/null +++ b/contrib/llvm/lib/MC/MCSymbol.cpp @@ -0,0 +1,67 @@ +//===- lib/MC/MCSymbol.cpp - MCSymbol implementation ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +// Sentinel value for the absolute pseudo section. +const MCSection *MCSymbol::AbsolutePseudoSection = + reinterpret_cast<const MCSection *>(1); + +static bool isAcceptableChar(char C) { + if ((C < 'a' || C > 'z') && + (C < 'A' || C > 'Z') && + (C < '0' || C > '9') && + C != '_' && C != '$' && C != '.' && C != '@') + return false; + return true; +} + +/// NameNeedsQuoting - Return true if the identifier \arg Str needs quotes to be +/// syntactically correct. +static bool NameNeedsQuoting(StringRef Str) { + assert(!Str.empty() && "Cannot create an empty MCSymbol"); + + // If any of the characters in the string is an unacceptable character, force + // quotes. + for (unsigned i = 0, e = Str.size(); i != e; ++i) + if (!isAcceptableChar(Str[i])) + return true; + return false; +} + +void MCSymbol::setVariableValue(const MCExpr *Value) { + assert(Value && "Invalid variable value!"); + assert((isUndefined() || (isAbsolute() && isa<MCConstantExpr>(Value))) && + "Invalid redefinition!"); + this->Value = Value; + + // Mark the variable as absolute as appropriate. + if (isa<MCConstantExpr>(Value)) + setAbsolute(); +} + +void MCSymbol::print(raw_ostream &OS) const { + // The name for this MCSymbol is required to be a valid target name. However, + // some targets support quoting names with funny characters. If the name + // contains a funny character, then print it quoted. + if (!NameNeedsQuoting(getName())) { + OS << getName(); + return; + } + + OS << '"' << getName() << '"'; +} + +void MCSymbol::dump() const { + print(dbgs()); +} diff --git a/contrib/llvm/lib/MC/MCValue.cpp b/contrib/llvm/lib/MC/MCValue.cpp new file mode 100644 index 0000000..c6ea16c --- /dev/null +++ b/contrib/llvm/lib/MC/MCValue.cpp @@ -0,0 +1,36 @@ +//===- lib/MC/MCValue.cpp - MCValue implementation ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCValue.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +void MCValue::print(raw_ostream &OS, const MCAsmInfo *MAI) const { + if (isAbsolute()) { + OS << getConstant(); + return; + } + + getSymA()->print(OS); + + if (getSymB()) { + OS << " - "; + getSymB()->print(OS); + } + + if (getConstant()) + OS << " + " << getConstant(); +} + +void MCValue::dump() const { + print(dbgs(), 0); +} diff --git a/contrib/llvm/lib/MC/MachObjectWriter.cpp b/contrib/llvm/lib/MC/MachObjectWriter.cpp new file mode 100644 index 0000000..7ca0951 --- /dev/null +++ b/contrib/llvm/lib/MC/MachObjectWriter.cpp @@ -0,0 +1,1229 @@ +//===- lib/MC/MachObjectWriter.cpp - Mach-O File Writer -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MachObjectWriter.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/Twine.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCAsmLayout.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCMachOSymbolFlags.h" +#include "llvm/MC/MCValue.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MachO.h" +#include "llvm/Target/TargetAsmBackend.h" + +// FIXME: Gross. +#include "../Target/X86/X86FixupKinds.h" + +#include <vector> +using namespace llvm; + +static unsigned getFixupKindLog2Size(unsigned Kind) { + switch (Kind) { + default: llvm_unreachable("invalid fixup kind!"); + case X86::reloc_pcrel_1byte: + case FK_Data_1: return 0; + case X86::reloc_pcrel_2byte: + case FK_Data_2: return 1; + case X86::reloc_pcrel_4byte: + case X86::reloc_riprel_4byte: + case X86::reloc_riprel_4byte_movq_load: + case FK_Data_4: return 2; + case FK_Data_8: return 3; + } +} + +static bool isFixupKindPCRel(unsigned Kind) { + switch (Kind) { + default: + return false; + case X86::reloc_pcrel_1byte: + case X86::reloc_pcrel_2byte: + case X86::reloc_pcrel_4byte: + case X86::reloc_riprel_4byte: + case X86::reloc_riprel_4byte_movq_load: + return true; + } +} + +static bool isFixupKindRIPRel(unsigned Kind) { + return Kind == X86::reloc_riprel_4byte || + Kind == X86::reloc_riprel_4byte_movq_load; +} + +static bool doesSymbolRequireExternRelocation(MCSymbolData *SD) { + // Undefined symbols are always extern. + if (SD->Symbol->isUndefined()) + return true; + + // References to weak definitions require external relocation entries; the + // definition may not always be the one in the same object file. + if (SD->getFlags() & SF_WeakDefinition) + return true; + + // Otherwise, we can use an internal relocation. + return false; +} + +namespace { + +class MachObjectWriterImpl { + // See <mach-o/loader.h>. + enum { + Header_Magic32 = 0xFEEDFACE, + Header_Magic64 = 0xFEEDFACF + }; + + enum { + Header32Size = 28, + Header64Size = 32, + SegmentLoadCommand32Size = 56, + SegmentLoadCommand64Size = 72, + Section32Size = 68, + Section64Size = 80, + SymtabLoadCommandSize = 24, + DysymtabLoadCommandSize = 80, + Nlist32Size = 12, + Nlist64Size = 16, + RelocationInfoSize = 8 + }; + + enum HeaderFileType { + HFT_Object = 0x1 + }; + + enum HeaderFlags { + HF_SubsectionsViaSymbols = 0x2000 + }; + + enum LoadCommandType { + LCT_Segment = 0x1, + LCT_Symtab = 0x2, + LCT_Dysymtab = 0xb, + LCT_Segment64 = 0x19 + }; + + // See <mach-o/nlist.h>. + enum SymbolTypeType { + STT_Undefined = 0x00, + STT_Absolute = 0x02, + STT_Section = 0x0e + }; + + enum SymbolTypeFlags { + // If any of these bits are set, then the entry is a stab entry number (see + // <mach-o/stab.h>. Otherwise the other masks apply. + STF_StabsEntryMask = 0xe0, + + STF_TypeMask = 0x0e, + STF_External = 0x01, + STF_PrivateExtern = 0x10 + }; + + /// IndirectSymbolFlags - Flags for encoding special values in the indirect + /// symbol entry. + enum IndirectSymbolFlags { + ISF_Local = 0x80000000, + ISF_Absolute = 0x40000000 + }; + + /// RelocationFlags - Special flags for addresses. + enum RelocationFlags { + RF_Scattered = 0x80000000 + }; + + enum RelocationInfoType { + RIT_Vanilla = 0, + RIT_Pair = 1, + RIT_Difference = 2, + RIT_PreboundLazyPointer = 3, + RIT_LocalDifference = 4, + RIT_TLV = 5 + }; + + /// X86_64 uses its own relocation types. + enum RelocationInfoTypeX86_64 { + RIT_X86_64_Unsigned = 0, + RIT_X86_64_Signed = 1, + RIT_X86_64_Branch = 2, + RIT_X86_64_GOTLoad = 3, + RIT_X86_64_GOT = 4, + RIT_X86_64_Subtractor = 5, + RIT_X86_64_Signed1 = 6, + RIT_X86_64_Signed2 = 7, + RIT_X86_64_Signed4 = 8, + RIT_X86_64_TLV = 9 + }; + + /// MachSymbolData - Helper struct for containing some precomputed information + /// on symbols. + struct MachSymbolData { + MCSymbolData *SymbolData; + uint64_t StringIndex; + uint8_t SectionIndex; + + // Support lexicographic sorting. + bool operator<(const MachSymbolData &RHS) const { + return SymbolData->getSymbol().getName() < + RHS.SymbolData->getSymbol().getName(); + } + }; + + /// @name Relocation Data + /// @{ + + struct MachRelocationEntry { + uint32_t Word0; + uint32_t Word1; + }; + + llvm::DenseMap<const MCSectionData*, + std::vector<MachRelocationEntry> > Relocations; + llvm::DenseMap<const MCSectionData*, unsigned> IndirectSymBase; + + /// @} + /// @name Symbol Table Data + /// @{ + + SmallString<256> StringTable; + std::vector<MachSymbolData> LocalSymbolData; + std::vector<MachSymbolData> ExternalSymbolData; + std::vector<MachSymbolData> UndefinedSymbolData; + + /// @} + + MachObjectWriter *Writer; + + raw_ostream &OS; + + unsigned Is64Bit : 1; + +public: + MachObjectWriterImpl(MachObjectWriter *_Writer, bool _Is64Bit) + : Writer(_Writer), OS(Writer->getStream()), Is64Bit(_Is64Bit) { + } + + void Write8(uint8_t Value) { Writer->Write8(Value); } + void Write16(uint16_t Value) { Writer->Write16(Value); } + void Write32(uint32_t Value) { Writer->Write32(Value); } + void Write64(uint64_t Value) { Writer->Write64(Value); } + void WriteZeros(unsigned N) { Writer->WriteZeros(N); } + void WriteBytes(StringRef Str, unsigned ZeroFillSize = 0) { + Writer->WriteBytes(Str, ZeroFillSize); + } + + void WriteHeader(unsigned NumLoadCommands, unsigned LoadCommandsSize, + bool SubsectionsViaSymbols) { + uint32_t Flags = 0; + + if (SubsectionsViaSymbols) + Flags |= HF_SubsectionsViaSymbols; + + // struct mach_header (28 bytes) or + // struct mach_header_64 (32 bytes) + + uint64_t Start = OS.tell(); + (void) Start; + + Write32(Is64Bit ? Header_Magic64 : Header_Magic32); + + // FIXME: Support cputype. + Write32(Is64Bit ? MachO::CPUTypeX86_64 : MachO::CPUTypeI386); + // FIXME: Support cpusubtype. + Write32(MachO::CPUSubType_I386_ALL); + Write32(HFT_Object); + Write32(NumLoadCommands); // Object files have a single load command, the + // segment. + Write32(LoadCommandsSize); + Write32(Flags); + if (Is64Bit) + Write32(0); // reserved + + assert(OS.tell() - Start == Is64Bit ? Header64Size : Header32Size); + } + + /// WriteSegmentLoadCommand - Write a segment load command. + /// + /// \arg NumSections - The number of sections in this segment. + /// \arg SectionDataSize - The total size of the sections. + void WriteSegmentLoadCommand(unsigned NumSections, + uint64_t VMSize, + uint64_t SectionDataStartOffset, + uint64_t SectionDataSize) { + // struct segment_command (56 bytes) or + // struct segment_command_64 (72 bytes) + + uint64_t Start = OS.tell(); + (void) Start; + + unsigned SegmentLoadCommandSize = Is64Bit ? SegmentLoadCommand64Size : + SegmentLoadCommand32Size; + Write32(Is64Bit ? LCT_Segment64 : LCT_Segment); + Write32(SegmentLoadCommandSize + + NumSections * (Is64Bit ? Section64Size : Section32Size)); + + WriteBytes("", 16); + if (Is64Bit) { + Write64(0); // vmaddr + Write64(VMSize); // vmsize + Write64(SectionDataStartOffset); // file offset + Write64(SectionDataSize); // file size + } else { + Write32(0); // vmaddr + Write32(VMSize); // vmsize + Write32(SectionDataStartOffset); // file offset + Write32(SectionDataSize); // file size + } + Write32(0x7); // maxprot + Write32(0x7); // initprot + Write32(NumSections); + Write32(0); // flags + + assert(OS.tell() - Start == SegmentLoadCommandSize); + } + + void WriteSection(const MCAssembler &Asm, const MCAsmLayout &Layout, + const MCSectionData &SD, uint64_t FileOffset, + uint64_t RelocationsStart, unsigned NumRelocations) { + uint64_t SectionSize = Layout.getSectionSize(&SD); + + // The offset is unused for virtual sections. + if (Asm.getBackend().isVirtualSection(SD.getSection())) { + assert(Layout.getSectionFileSize(&SD) == 0 && "Invalid file size!"); + FileOffset = 0; + } + + // struct section (68 bytes) or + // struct section_64 (80 bytes) + + uint64_t Start = OS.tell(); + (void) Start; + + const MCSectionMachO &Section = cast<MCSectionMachO>(SD.getSection()); + WriteBytes(Section.getSectionName(), 16); + WriteBytes(Section.getSegmentName(), 16); + if (Is64Bit) { + Write64(Layout.getSectionAddress(&SD)); // address + Write64(SectionSize); // size + } else { + Write32(Layout.getSectionAddress(&SD)); // address + Write32(SectionSize); // size + } + Write32(FileOffset); + + unsigned Flags = Section.getTypeAndAttributes(); + if (SD.hasInstructions()) + Flags |= MCSectionMachO::S_ATTR_SOME_INSTRUCTIONS; + + assert(isPowerOf2_32(SD.getAlignment()) && "Invalid alignment!"); + Write32(Log2_32(SD.getAlignment())); + Write32(NumRelocations ? RelocationsStart : 0); + Write32(NumRelocations); + Write32(Flags); + Write32(IndirectSymBase.lookup(&SD)); // reserved1 + Write32(Section.getStubSize()); // reserved2 + if (Is64Bit) + Write32(0); // reserved3 + + assert(OS.tell() - Start == Is64Bit ? Section64Size : Section32Size); + } + + void WriteSymtabLoadCommand(uint32_t SymbolOffset, uint32_t NumSymbols, + uint32_t StringTableOffset, + uint32_t StringTableSize) { + // struct symtab_command (24 bytes) + + uint64_t Start = OS.tell(); + (void) Start; + + Write32(LCT_Symtab); + Write32(SymtabLoadCommandSize); + Write32(SymbolOffset); + Write32(NumSymbols); + Write32(StringTableOffset); + Write32(StringTableSize); + + assert(OS.tell() - Start == SymtabLoadCommandSize); + } + + void WriteDysymtabLoadCommand(uint32_t FirstLocalSymbol, + uint32_t NumLocalSymbols, + uint32_t FirstExternalSymbol, + uint32_t NumExternalSymbols, + uint32_t FirstUndefinedSymbol, + uint32_t NumUndefinedSymbols, + uint32_t IndirectSymbolOffset, + uint32_t NumIndirectSymbols) { + // struct dysymtab_command (80 bytes) + + uint64_t Start = OS.tell(); + (void) Start; + + Write32(LCT_Dysymtab); + Write32(DysymtabLoadCommandSize); + Write32(FirstLocalSymbol); + Write32(NumLocalSymbols); + Write32(FirstExternalSymbol); + Write32(NumExternalSymbols); + Write32(FirstUndefinedSymbol); + Write32(NumUndefinedSymbols); + Write32(0); // tocoff + Write32(0); // ntoc + Write32(0); // modtaboff + Write32(0); // nmodtab + Write32(0); // extrefsymoff + Write32(0); // nextrefsyms + Write32(IndirectSymbolOffset); + Write32(NumIndirectSymbols); + Write32(0); // extreloff + Write32(0); // nextrel + Write32(0); // locreloff + Write32(0); // nlocrel + + assert(OS.tell() - Start == DysymtabLoadCommandSize); + } + + void WriteNlist(MachSymbolData &MSD, const MCAsmLayout &Layout) { + MCSymbolData &Data = *MSD.SymbolData; + const MCSymbol &Symbol = Data.getSymbol(); + uint8_t Type = 0; + uint16_t Flags = Data.getFlags(); + uint32_t Address = 0; + + // Set the N_TYPE bits. See <mach-o/nlist.h>. + // + // FIXME: Are the prebound or indirect fields possible here? + if (Symbol.isUndefined()) + Type = STT_Undefined; + else if (Symbol.isAbsolute()) + Type = STT_Absolute; + else + Type = STT_Section; + + // FIXME: Set STAB bits. + + if (Data.isPrivateExtern()) + Type |= STF_PrivateExtern; + + // Set external bit. + if (Data.isExternal() || Symbol.isUndefined()) + Type |= STF_External; + + // Compute the symbol address. + if (Symbol.isDefined()) { + if (Symbol.isAbsolute()) { + Address = cast<MCConstantExpr>(Symbol.getVariableValue())->getValue(); + } else { + Address = Layout.getSymbolAddress(&Data); + } + } else if (Data.isCommon()) { + // Common symbols are encoded with the size in the address + // field, and their alignment in the flags. + Address = Data.getCommonSize(); + + // Common alignment is packed into the 'desc' bits. + if (unsigned Align = Data.getCommonAlignment()) { + unsigned Log2Size = Log2_32(Align); + assert((1U << Log2Size) == Align && "Invalid 'common' alignment!"); + if (Log2Size > 15) + report_fatal_error("invalid 'common' alignment '" + + Twine(Align) + "'"); + // FIXME: Keep this mask with the SymbolFlags enumeration. + Flags = (Flags & 0xF0FF) | (Log2Size << 8); + } + } + + // struct nlist (12 bytes) + + Write32(MSD.StringIndex); + Write8(Type); + Write8(MSD.SectionIndex); + + // The Mach-O streamer uses the lowest 16-bits of the flags for the 'desc' + // value. + Write16(Flags); + if (Is64Bit) + Write64(Address); + else + Write32(Address); + } + + // FIXME: We really need to improve the relocation validation. Basically, we + // want to implement a separate computation which evaluates the relocation + // entry as the linker would, and verifies that the resultant fixup value is + // exactly what the encoder wanted. This will catch several classes of + // problems: + // + // - Relocation entry bugs, the two algorithms are unlikely to have the same + // exact bug. + // + // - Relaxation issues, where we forget to relax something. + // + // - Input errors, where something cannot be correctly encoded. 'as' allows + // these through in many cases. + + void RecordX86_64Relocation(const MCAssembler &Asm, const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, MCValue Target, + uint64_t &FixedValue) { + unsigned IsPCRel = isFixupKindPCRel(Fixup.getKind()); + unsigned IsRIPRel = isFixupKindRIPRel(Fixup.getKind()); + unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind()); + + // See <reloc.h>. + uint32_t FixupOffset = + Layout.getFragmentOffset(Fragment) + Fixup.getOffset(); + uint32_t FixupAddress = + Layout.getFragmentAddress(Fragment) + Fixup.getOffset(); + int64_t Value = 0; + unsigned Index = 0; + unsigned IsExtern = 0; + unsigned Type = 0; + + Value = Target.getConstant(); + + if (IsPCRel) { + // Compensate for the relocation offset, Darwin x86_64 relocations only + // have the addend and appear to have attempted to define it to be the + // actual expression addend without the PCrel bias. However, instructions + // with data following the relocation are not accomodated for (see comment + // below regarding SIGNED{1,2,4}), so it isn't exactly that either. + Value += 1LL << Log2Size; + } + + if (Target.isAbsolute()) { // constant + // SymbolNum of 0 indicates the absolute section. + Type = RIT_X86_64_Unsigned; + Index = 0; + + // FIXME: I believe this is broken, I don't think the linker can + // understand it. I think it would require a local relocation, but I'm not + // sure if that would work either. The official way to get an absolute + // PCrel relocation is to use an absolute symbol (which we don't support + // yet). + if (IsPCRel) { + IsExtern = 1; + Type = RIT_X86_64_Branch; + } + } else if (Target.getSymB()) { // A - B + constant + const MCSymbol *A = &Target.getSymA()->getSymbol(); + MCSymbolData &A_SD = Asm.getSymbolData(*A); + const MCSymbolData *A_Base = Asm.getAtom(Layout, &A_SD); + + const MCSymbol *B = &Target.getSymB()->getSymbol(); + MCSymbolData &B_SD = Asm.getSymbolData(*B); + const MCSymbolData *B_Base = Asm.getAtom(Layout, &B_SD); + + // Neither symbol can be modified. + if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None || + Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None) + report_fatal_error("unsupported relocation of modified symbol"); + + // We don't support PCrel relocations of differences. Darwin 'as' doesn't + // implement most of these correctly. + if (IsPCRel) + report_fatal_error("unsupported pc-relative relocation of difference"); + + // We don't currently support any situation where one or both of the + // symbols would require a local relocation. This is almost certainly + // unused and may not be possible to encode correctly. + if (!A_Base || !B_Base) + report_fatal_error("unsupported local relocations in difference"); + + // Darwin 'as' doesn't emit correct relocations for this (it ends up with + // a single SIGNED relocation); reject it for now. + if (A_Base == B_Base) + report_fatal_error("unsupported relocation with identical base"); + + Value += Layout.getSymbolAddress(&A_SD) - Layout.getSymbolAddress(A_Base); + Value -= Layout.getSymbolAddress(&B_SD) - Layout.getSymbolAddress(B_Base); + + Index = A_Base->getIndex(); + IsExtern = 1; + Type = RIT_X86_64_Unsigned; + + MachRelocationEntry MRE; + MRE.Word0 = FixupOffset; + MRE.Word1 = ((Index << 0) | + (IsPCRel << 24) | + (Log2Size << 25) | + (IsExtern << 27) | + (Type << 28)); + Relocations[Fragment->getParent()].push_back(MRE); + + Index = B_Base->getIndex(); + IsExtern = 1; + Type = RIT_X86_64_Subtractor; + } else { + const MCSymbol *Symbol = &Target.getSymA()->getSymbol(); + MCSymbolData &SD = Asm.getSymbolData(*Symbol); + const MCSymbolData *Base = Asm.getAtom(Layout, &SD); + + // Relocations inside debug sections always use local relocations when + // possible. This seems to be done because the debugger doesn't fully + // understand x86_64 relocation entries, and expects to find values that + // have already been fixed up. + if (Symbol->isInSection()) { + const MCSectionMachO &Section = static_cast<const MCSectionMachO&>( + Fragment->getParent()->getSection()); + if (Section.hasAttribute(MCSectionMachO::S_ATTR_DEBUG)) + Base = 0; + } + + // x86_64 almost always uses external relocations, except when there is no + // symbol to use as a base address (a local symbol with no preceeding + // non-local symbol). + if (Base) { + Index = Base->getIndex(); + IsExtern = 1; + + // Add the local offset, if needed. + if (Base != &SD) + Value += Layout.getSymbolAddress(&SD) - Layout.getSymbolAddress(Base); + } else if (Symbol->isInSection()) { + // The index is the section ordinal (1-based). + Index = SD.getFragment()->getParent()->getOrdinal() + 1; + IsExtern = 0; + Value += Layout.getSymbolAddress(&SD); + + if (IsPCRel) + Value -= FixupAddress + (1 << Log2Size); + } else { + report_fatal_error("unsupported relocation of undefined symbol '" + + Symbol->getName() + "'"); + } + + MCSymbolRefExpr::VariantKind Modifier = Target.getSymA()->getKind(); + if (IsPCRel) { + if (IsRIPRel) { + if (Modifier == MCSymbolRefExpr::VK_GOTPCREL) { + // x86_64 distinguishes movq foo@GOTPCREL so that the linker can + // rewrite the movq to an leaq at link time if the symbol ends up in + // the same linkage unit. + if (unsigned(Fixup.getKind()) == X86::reloc_riprel_4byte_movq_load) + Type = RIT_X86_64_GOTLoad; + else + Type = RIT_X86_64_GOT; + } else if (Modifier == MCSymbolRefExpr::VK_TLVP) { + Type = RIT_X86_64_TLV; + } else if (Modifier != MCSymbolRefExpr::VK_None) { + report_fatal_error("unsupported symbol modifier in relocation"); + } else { + Type = RIT_X86_64_Signed; + + // The Darwin x86_64 relocation format has a problem where it cannot + // encode an address (L<foo> + <constant>) which is outside the atom + // containing L<foo>. Generally, this shouldn't occur but it does + // happen when we have a RIPrel instruction with data following the + // relocation entry (e.g., movb $012, L0(%rip)). Even with the PCrel + // adjustment Darwin x86_64 uses, the offset is still negative and + // the linker has no way to recognize this. + // + // To work around this, Darwin uses several special relocation types + // to indicate the offsets. However, the specification or + // implementation of these seems to also be incomplete; they should + // adjust the addend as well based on the actual encoded instruction + // (the additional bias), but instead appear to just look at the + // final offset. + switch (-(Target.getConstant() + (1LL << Log2Size))) { + case 1: Type = RIT_X86_64_Signed1; break; + case 2: Type = RIT_X86_64_Signed2; break; + case 4: Type = RIT_X86_64_Signed4; break; + } + } + } else { + if (Modifier != MCSymbolRefExpr::VK_None) + report_fatal_error("unsupported symbol modifier in branch " + "relocation"); + + Type = RIT_X86_64_Branch; + } + } else { + if (Modifier == MCSymbolRefExpr::VK_GOT) { + Type = RIT_X86_64_GOT; + } else if (Modifier == MCSymbolRefExpr::VK_GOTPCREL) { + // GOTPCREL is allowed as a modifier on non-PCrel instructions, in + // which case all we do is set the PCrel bit in the relocation entry; + // this is used with exception handling, for example. The source is + // required to include any necessary offset directly. + Type = RIT_X86_64_GOT; + IsPCRel = 1; + } else if (Modifier == MCSymbolRefExpr::VK_TLVP) { + report_fatal_error("TLVP symbol modifier should have been rip-rel"); + } else if (Modifier != MCSymbolRefExpr::VK_None) + report_fatal_error("unsupported symbol modifier in relocation"); + else + Type = RIT_X86_64_Unsigned; + } + } + + // x86_64 always writes custom values into the fixups. + FixedValue = Value; + + // struct relocation_info (8 bytes) + MachRelocationEntry MRE; + MRE.Word0 = FixupOffset; + MRE.Word1 = ((Index << 0) | + (IsPCRel << 24) | + (Log2Size << 25) | + (IsExtern << 27) | + (Type << 28)); + Relocations[Fragment->getParent()].push_back(MRE); + } + + void RecordScatteredRelocation(const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, MCValue Target, + uint64_t &FixedValue) { + uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); + unsigned IsPCRel = isFixupKindPCRel(Fixup.getKind()); + unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind()); + unsigned Type = RIT_Vanilla; + + // See <reloc.h>. + const MCSymbol *A = &Target.getSymA()->getSymbol(); + MCSymbolData *A_SD = &Asm.getSymbolData(*A); + + if (!A_SD->getFragment()) + report_fatal_error("symbol '" + A->getName() + + "' can not be undefined in a subtraction expression"); + + uint32_t Value = Layout.getSymbolAddress(A_SD); + uint32_t Value2 = 0; + + if (const MCSymbolRefExpr *B = Target.getSymB()) { + MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol()); + + if (!B_SD->getFragment()) + report_fatal_error("symbol '" + B->getSymbol().getName() + + "' can not be undefined in a subtraction expression"); + + // Select the appropriate difference relocation type. + // + // Note that there is no longer any semantic difference between these two + // relocation types from the linkers point of view, this is done solely + // for pedantic compatibility with 'as'. + Type = A_SD->isExternal() ? RIT_Difference : RIT_LocalDifference; + Value2 = Layout.getSymbolAddress(B_SD); + } + + // Relocations are written out in reverse order, so the PAIR comes first. + if (Type == RIT_Difference || Type == RIT_LocalDifference) { + MachRelocationEntry MRE; + MRE.Word0 = ((0 << 0) | + (RIT_Pair << 24) | + (Log2Size << 28) | + (IsPCRel << 30) | + RF_Scattered); + MRE.Word1 = Value2; + Relocations[Fragment->getParent()].push_back(MRE); + } + + MachRelocationEntry MRE; + MRE.Word0 = ((FixupOffset << 0) | + (Type << 24) | + (Log2Size << 28) | + (IsPCRel << 30) | + RF_Scattered); + MRE.Word1 = Value; + Relocations[Fragment->getParent()].push_back(MRE); + } + + void RecordTLVPRelocation(const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, MCValue Target, + uint64_t &FixedValue) { + assert(Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP && + !Is64Bit && + "Should only be called with a 32-bit TLVP relocation!"); + + unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind()); + uint32_t Value = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); + unsigned IsPCRel = 0; + + // Get the symbol data. + MCSymbolData *SD_A = &Asm.getSymbolData(Target.getSymA()->getSymbol()); + unsigned Index = SD_A->getIndex(); + + // We're only going to have a second symbol in pic mode and it'll be a + // subtraction from the picbase. For 32-bit pic the addend is the difference + // between the picbase and the next address. For 32-bit static the addend + // is zero. + if (Target.getSymB()) { + // If this is a subtraction then we're pcrel. + uint32_t FixupAddress = + Layout.getFragmentAddress(Fragment) + Fixup.getOffset(); + MCSymbolData *SD_B = &Asm.getSymbolData(Target.getSymB()->getSymbol()); + IsPCRel = 1; + FixedValue = (FixupAddress - Layout.getSymbolAddress(SD_B) + + Target.getConstant()); + FixedValue += 1 << Log2Size; + } else { + FixedValue = 0; + } + + // struct relocation_info (8 bytes) + MachRelocationEntry MRE; + MRE.Word0 = Value; + MRE.Word1 = ((Index << 0) | + (IsPCRel << 24) | + (Log2Size << 25) | + (1 << 27) | // Extern + (RIT_TLV << 28)); // Type + Relocations[Fragment->getParent()].push_back(MRE); + } + + void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout, + const MCFragment *Fragment, const MCFixup &Fixup, + MCValue Target, uint64_t &FixedValue) { + if (Is64Bit) { + RecordX86_64Relocation(Asm, Layout, Fragment, Fixup, Target, FixedValue); + return; + } + + unsigned IsPCRel = isFixupKindPCRel(Fixup.getKind()); + unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind()); + + // If this is a 32-bit TLVP reloc it's handled a bit differently. + if (Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP) { + RecordTLVPRelocation(Asm, Layout, Fragment, Fixup, Target, FixedValue); + return; + } + + // If this is a difference or a defined symbol plus an offset, then we need + // a scattered relocation entry. + // Differences always require scattered relocations. + if (Target.getSymB()) + return RecordScatteredRelocation(Asm, Layout, Fragment, Fixup, + Target, FixedValue); + + // Get the symbol data, if any. + MCSymbolData *SD = 0; + if (Target.getSymA()) + SD = &Asm.getSymbolData(Target.getSymA()->getSymbol()); + + // If this is an internal relocation with an offset, it also needs a + // scattered relocation entry. + uint32_t Offset = Target.getConstant(); + if (IsPCRel) + Offset += 1 << Log2Size; + if (Offset && SD && !doesSymbolRequireExternRelocation(SD)) + return RecordScatteredRelocation(Asm, Layout, Fragment, Fixup, + Target, FixedValue); + + // See <reloc.h>. + uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); + unsigned Index = 0; + unsigned IsExtern = 0; + unsigned Type = 0; + + if (Target.isAbsolute()) { // constant + // SymbolNum of 0 indicates the absolute section. + // + // FIXME: Currently, these are never generated (see code below). I cannot + // find a case where they are actually emitted. + Type = RIT_Vanilla; + } else { + // Check whether we need an external or internal relocation. + if (doesSymbolRequireExternRelocation(SD)) { + IsExtern = 1; + Index = SD->getIndex(); + // For external relocations, make sure to offset the fixup value to + // compensate for the addend of the symbol address, if it was + // undefined. This occurs with weak definitions, for example. + if (!SD->Symbol->isUndefined()) + FixedValue -= Layout.getSymbolAddress(SD); + } else { + // The index is the section ordinal (1-based). + Index = SD->getFragment()->getParent()->getOrdinal() + 1; + } + + Type = RIT_Vanilla; + } + + // struct relocation_info (8 bytes) + MachRelocationEntry MRE; + MRE.Word0 = FixupOffset; + MRE.Word1 = ((Index << 0) | + (IsPCRel << 24) | + (Log2Size << 25) | + (IsExtern << 27) | + (Type << 28)); + Relocations[Fragment->getParent()].push_back(MRE); + } + + void BindIndirectSymbols(MCAssembler &Asm) { + // This is the point where 'as' creates actual symbols for indirect symbols + // (in the following two passes). It would be easier for us to do this + // sooner when we see the attribute, but that makes getting the order in the + // symbol table much more complicated than it is worth. + // + // FIXME: Revisit this when the dust settles. + + // Bind non lazy symbol pointers first. + unsigned IndirectIndex = 0; + for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(), + ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) { + const MCSectionMachO &Section = + cast<MCSectionMachO>(it->SectionData->getSection()); + + if (Section.getType() != MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS) + continue; + + // Initialize the section indirect symbol base, if necessary. + if (!IndirectSymBase.count(it->SectionData)) + IndirectSymBase[it->SectionData] = IndirectIndex; + + Asm.getOrCreateSymbolData(*it->Symbol); + } + + // Then lazy symbol pointers and symbol stubs. + IndirectIndex = 0; + for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(), + ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) { + const MCSectionMachO &Section = + cast<MCSectionMachO>(it->SectionData->getSection()); + + if (Section.getType() != MCSectionMachO::S_LAZY_SYMBOL_POINTERS && + Section.getType() != MCSectionMachO::S_SYMBOL_STUBS) + continue; + + // Initialize the section indirect symbol base, if necessary. + if (!IndirectSymBase.count(it->SectionData)) + IndirectSymBase[it->SectionData] = IndirectIndex; + + // Set the symbol type to undefined lazy, but only on construction. + // + // FIXME: Do not hardcode. + bool Created; + MCSymbolData &Entry = Asm.getOrCreateSymbolData(*it->Symbol, &Created); + if (Created) + Entry.setFlags(Entry.getFlags() | 0x0001); + } + } + + /// ComputeSymbolTable - Compute the symbol table data + /// + /// \param StringTable [out] - The string table data. + /// \param StringIndexMap [out] - Map from symbol names to offsets in the + /// string table. + void ComputeSymbolTable(MCAssembler &Asm, SmallString<256> &StringTable, + std::vector<MachSymbolData> &LocalSymbolData, + std::vector<MachSymbolData> &ExternalSymbolData, + std::vector<MachSymbolData> &UndefinedSymbolData) { + // Build section lookup table. + DenseMap<const MCSection*, uint8_t> SectionIndexMap; + unsigned Index = 1; + for (MCAssembler::iterator it = Asm.begin(), + ie = Asm.end(); it != ie; ++it, ++Index) + SectionIndexMap[&it->getSection()] = Index; + assert(Index <= 256 && "Too many sections!"); + + // Index 0 is always the empty string. + StringMap<uint64_t> StringIndexMap; + StringTable += '\x00'; + + // Build the symbol arrays and the string table, but only for non-local + // symbols. + // + // The particular order that we collect the symbols and create the string + // table, then sort the symbols is chosen to match 'as'. Even though it + // doesn't matter for correctness, this is important for letting us diff .o + // files. + for (MCAssembler::symbol_iterator it = Asm.symbol_begin(), + ie = Asm.symbol_end(); it != ie; ++it) { + const MCSymbol &Symbol = it->getSymbol(); + + // Ignore non-linker visible symbols. + if (!Asm.isSymbolLinkerVisible(it->getSymbol())) + continue; + + if (!it->isExternal() && !Symbol.isUndefined()) + continue; + + uint64_t &Entry = StringIndexMap[Symbol.getName()]; + if (!Entry) { + Entry = StringTable.size(); + StringTable += Symbol.getName(); + StringTable += '\x00'; + } + + MachSymbolData MSD; + MSD.SymbolData = it; + MSD.StringIndex = Entry; + + if (Symbol.isUndefined()) { + MSD.SectionIndex = 0; + UndefinedSymbolData.push_back(MSD); + } else if (Symbol.isAbsolute()) { + MSD.SectionIndex = 0; + ExternalSymbolData.push_back(MSD); + } else { + MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection()); + assert(MSD.SectionIndex && "Invalid section index!"); + ExternalSymbolData.push_back(MSD); + } + } + + // Now add the data for local symbols. + for (MCAssembler::symbol_iterator it = Asm.symbol_begin(), + ie = Asm.symbol_end(); it != ie; ++it) { + const MCSymbol &Symbol = it->getSymbol(); + + // Ignore non-linker visible symbols. + if (!Asm.isSymbolLinkerVisible(it->getSymbol())) + continue; + + if (it->isExternal() || Symbol.isUndefined()) + continue; + + uint64_t &Entry = StringIndexMap[Symbol.getName()]; + if (!Entry) { + Entry = StringTable.size(); + StringTable += Symbol.getName(); + StringTable += '\x00'; + } + + MachSymbolData MSD; + MSD.SymbolData = it; + MSD.StringIndex = Entry; + + if (Symbol.isAbsolute()) { + MSD.SectionIndex = 0; + LocalSymbolData.push_back(MSD); + } else { + MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection()); + assert(MSD.SectionIndex && "Invalid section index!"); + LocalSymbolData.push_back(MSD); + } + } + + // External and undefined symbols are required to be in lexicographic order. + std::sort(ExternalSymbolData.begin(), ExternalSymbolData.end()); + std::sort(UndefinedSymbolData.begin(), UndefinedSymbolData.end()); + + // Set the symbol indices. + Index = 0; + for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i) + LocalSymbolData[i].SymbolData->setIndex(Index++); + for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i) + ExternalSymbolData[i].SymbolData->setIndex(Index++); + for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i) + UndefinedSymbolData[i].SymbolData->setIndex(Index++); + + // The string table is padded to a multiple of 4. + while (StringTable.size() % 4) + StringTable += '\x00'; + } + + void ExecutePostLayoutBinding(MCAssembler &Asm) { + // Create symbol data for any indirect symbols. + BindIndirectSymbols(Asm); + + // Compute symbol table information and bind symbol indices. + ComputeSymbolTable(Asm, StringTable, LocalSymbolData, ExternalSymbolData, + UndefinedSymbolData); + } + + void WriteObject(const MCAssembler &Asm, const MCAsmLayout &Layout) { + unsigned NumSections = Asm.size(); + + // The section data starts after the header, the segment load command (and + // section headers) and the symbol table. + unsigned NumLoadCommands = 1; + uint64_t LoadCommandsSize = Is64Bit ? + SegmentLoadCommand64Size + NumSections * Section64Size : + SegmentLoadCommand32Size + NumSections * Section32Size; + + // Add the symbol table load command sizes, if used. + unsigned NumSymbols = LocalSymbolData.size() + ExternalSymbolData.size() + + UndefinedSymbolData.size(); + if (NumSymbols) { + NumLoadCommands += 2; + LoadCommandsSize += SymtabLoadCommandSize + DysymtabLoadCommandSize; + } + + // Compute the total size of the section data, as well as its file size and + // vm size. + uint64_t SectionDataStart = (Is64Bit ? Header64Size : Header32Size) + + LoadCommandsSize; + uint64_t SectionDataSize = 0; + uint64_t SectionDataFileSize = 0; + uint64_t VMSize = 0; + for (MCAssembler::const_iterator it = Asm.begin(), + ie = Asm.end(); it != ie; ++it) { + const MCSectionData &SD = *it; + uint64_t Address = Layout.getSectionAddress(&SD); + uint64_t Size = Layout.getSectionSize(&SD); + uint64_t FileSize = Layout.getSectionFileSize(&SD); + + VMSize = std::max(VMSize, Address + Size); + + if (Asm.getBackend().isVirtualSection(SD.getSection())) + continue; + + SectionDataSize = std::max(SectionDataSize, Address + Size); + SectionDataFileSize = std::max(SectionDataFileSize, Address + FileSize); + } + + // The section data is padded to 4 bytes. + // + // FIXME: Is this machine dependent? + unsigned SectionDataPadding = OffsetToAlignment(SectionDataFileSize, 4); + SectionDataFileSize += SectionDataPadding; + + // Write the prolog, starting with the header and load command... + WriteHeader(NumLoadCommands, LoadCommandsSize, + Asm.getSubsectionsViaSymbols()); + WriteSegmentLoadCommand(NumSections, VMSize, + SectionDataStart, SectionDataSize); + + // ... and then the section headers. + uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize; + for (MCAssembler::const_iterator it = Asm.begin(), + ie = Asm.end(); it != ie; ++it) { + std::vector<MachRelocationEntry> &Relocs = Relocations[it]; + unsigned NumRelocs = Relocs.size(); + uint64_t SectionStart = SectionDataStart + Layout.getSectionAddress(it); + WriteSection(Asm, Layout, *it, SectionStart, RelocTableEnd, NumRelocs); + RelocTableEnd += NumRelocs * RelocationInfoSize; + } + + // Write the symbol table load command, if used. + if (NumSymbols) { + unsigned FirstLocalSymbol = 0; + unsigned NumLocalSymbols = LocalSymbolData.size(); + unsigned FirstExternalSymbol = FirstLocalSymbol + NumLocalSymbols; + unsigned NumExternalSymbols = ExternalSymbolData.size(); + unsigned FirstUndefinedSymbol = FirstExternalSymbol + NumExternalSymbols; + unsigned NumUndefinedSymbols = UndefinedSymbolData.size(); + unsigned NumIndirectSymbols = Asm.indirect_symbol_size(); + unsigned NumSymTabSymbols = + NumLocalSymbols + NumExternalSymbols + NumUndefinedSymbols; + uint64_t IndirectSymbolSize = NumIndirectSymbols * 4; + uint64_t IndirectSymbolOffset = 0; + + // If used, the indirect symbols are written after the section data. + if (NumIndirectSymbols) + IndirectSymbolOffset = RelocTableEnd; + + // The symbol table is written after the indirect symbol data. + uint64_t SymbolTableOffset = RelocTableEnd + IndirectSymbolSize; + + // The string table is written after symbol table. + uint64_t StringTableOffset = + SymbolTableOffset + NumSymTabSymbols * (Is64Bit ? Nlist64Size : + Nlist32Size); + WriteSymtabLoadCommand(SymbolTableOffset, NumSymTabSymbols, + StringTableOffset, StringTable.size()); + + WriteDysymtabLoadCommand(FirstLocalSymbol, NumLocalSymbols, + FirstExternalSymbol, NumExternalSymbols, + FirstUndefinedSymbol, NumUndefinedSymbols, + IndirectSymbolOffset, NumIndirectSymbols); + } + + // Write the actual section data. + for (MCAssembler::const_iterator it = Asm.begin(), + ie = Asm.end(); it != ie; ++it) + Asm.WriteSectionData(it, Layout, Writer); + + // Write the extra padding. + WriteZeros(SectionDataPadding); + + // Write the relocation entries. + for (MCAssembler::const_iterator it = Asm.begin(), + ie = Asm.end(); it != ie; ++it) { + // Write the section relocation entries, in reverse order to match 'as' + // (approximately, the exact algorithm is more complicated than this). + std::vector<MachRelocationEntry> &Relocs = Relocations[it]; + for (unsigned i = 0, e = Relocs.size(); i != e; ++i) { + Write32(Relocs[e - i - 1].Word0); + Write32(Relocs[e - i - 1].Word1); + } + } + + // Write the symbol table data, if used. + if (NumSymbols) { + // Write the indirect symbol entries. + for (MCAssembler::const_indirect_symbol_iterator + it = Asm.indirect_symbol_begin(), + ie = Asm.indirect_symbol_end(); it != ie; ++it) { + // Indirect symbols in the non lazy symbol pointer section have some + // special handling. + const MCSectionMachO &Section = + static_cast<const MCSectionMachO&>(it->SectionData->getSection()); + if (Section.getType() == MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS) { + // If this symbol is defined and internal, mark it as such. + if (it->Symbol->isDefined() && + !Asm.getSymbolData(*it->Symbol).isExternal()) { + uint32_t Flags = ISF_Local; + if (it->Symbol->isAbsolute()) + Flags |= ISF_Absolute; + Write32(Flags); + continue; + } + } + + Write32(Asm.getSymbolData(*it->Symbol).getIndex()); + } + + // FIXME: Check that offsets match computed ones. + + // Write the symbol table entries. + for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i) + WriteNlist(LocalSymbolData[i], Layout); + for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i) + WriteNlist(ExternalSymbolData[i], Layout); + for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i) + WriteNlist(UndefinedSymbolData[i], Layout); + + // Write the string table. + OS << StringTable.str(); + } + } +}; + +} + +MachObjectWriter::MachObjectWriter(raw_ostream &OS, + bool Is64Bit, + bool IsLittleEndian) + : MCObjectWriter(OS, IsLittleEndian) +{ + Impl = new MachObjectWriterImpl(this, Is64Bit); +} + +MachObjectWriter::~MachObjectWriter() { + delete (MachObjectWriterImpl*) Impl; +} + +void MachObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm) { + ((MachObjectWriterImpl*) Impl)->ExecutePostLayoutBinding(Asm); +} + +void MachObjectWriter::RecordRelocation(const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, MCValue Target, + uint64_t &FixedValue) { + ((MachObjectWriterImpl*) Impl)->RecordRelocation(Asm, Layout, Fragment, Fixup, + Target, FixedValue); +} + +void MachObjectWriter::WriteObject(const MCAssembler &Asm, + const MCAsmLayout &Layout) { + ((MachObjectWriterImpl*) Impl)->WriteObject(Asm, Layout); +} diff --git a/contrib/llvm/lib/MC/Makefile b/contrib/llvm/lib/MC/Makefile new file mode 100644 index 0000000..a661fa6 --- /dev/null +++ b/contrib/llvm/lib/MC/Makefile @@ -0,0 +1,16 @@ +##===- lib/MC/Makefile -------------------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../.. +LIBRARYNAME = LLVMMC +BUILD_ARCHIVE := 1 +PARALLEL_DIRS := MCParser + +include $(LEVEL)/Makefile.common + diff --git a/contrib/llvm/lib/MC/TargetAsmBackend.cpp b/contrib/llvm/lib/MC/TargetAsmBackend.cpp new file mode 100644 index 0000000..bbfddbe --- /dev/null +++ b/contrib/llvm/lib/MC/TargetAsmBackend.cpp @@ -0,0 +1,22 @@ +//===-- TargetAsmBackend.cpp - Target Assembly Backend ---------------------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Target/TargetAsmBackend.h" +using namespace llvm; + +TargetAsmBackend::TargetAsmBackend(const Target &T) + : TheTarget(T), + HasAbsolutizedSet(false), + HasReliableSymbolDifference(false), + HasScatteredSymbols(false) +{ +} + +TargetAsmBackend::~TargetAsmBackend() { +} diff --git a/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp b/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp new file mode 100644 index 0000000..6804766 --- /dev/null +++ b/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp @@ -0,0 +1,71 @@ +//===-- llvm/MC/WinCOFFObjectWriter.cpp -------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains an implementation of a Win32 COFF object file writer. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "WinCOFFObjectWriter" +#include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCValue.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCAsmLayout.h" +using namespace llvm; + +namespace { + + class WinCOFFObjectWriter : public MCObjectWriter { + public: + WinCOFFObjectWriter(raw_ostream &OS); + + // MCObjectWriter interface implementation. + + void ExecutePostLayoutBinding(MCAssembler &Asm); + + void RecordRelocation(const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, + MCValue Target, + uint64_t &FixedValue); + + void WriteObject(const MCAssembler &Asm, const MCAsmLayout &Layout); + }; +} + +WinCOFFObjectWriter::WinCOFFObjectWriter(raw_ostream &OS) + : MCObjectWriter(OS, true) { +} + +//////////////////////////////////////////////////////////////////////////////// +// MCObjectWriter interface implementations + +void WinCOFFObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm) { +} + +void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, + MCValue Target, + uint64_t &FixedValue) { +} + +void WinCOFFObjectWriter::WriteObject(const MCAssembler &Asm, + const MCAsmLayout &Layout) { +} + +//------------------------------------------------------------------------------ +// WinCOFFObjectWriter factory function + +namespace llvm { + MCObjectWriter *createWinCOFFObjectWriter(raw_ostream &OS) { + return new WinCOFFObjectWriter(OS); + } +} diff --git a/contrib/llvm/lib/MC/WinCOFFStreamer.cpp b/contrib/llvm/lib/MC/WinCOFFStreamer.cpp new file mode 100644 index 0000000..1030cdb --- /dev/null +++ b/contrib/llvm/lib/MC/WinCOFFStreamer.cpp @@ -0,0 +1,198 @@ +//===-- llvm/MC/WinCOFFStreamer.cpp -----------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains an implementation of a Win32 COFF object file streamer. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "WinCOFFStreamer" + +#include "llvm/MC/MCObjectStreamer.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCSectionCOFF.h" +#include "llvm/Target/TargetAsmBackend.h" +#include "llvm/Support/COFF.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define dbg_notimpl(x) \ + do { dbgs() << "not implemented, " << __FUNCTION__ << " (" << x << ")"; \ + abort(); } while (false); + +namespace { +class WinCOFFStreamer : public MCObjectStreamer { +public: + WinCOFFStreamer(MCContext &Context, + TargetAsmBackend &TAB, + MCCodeEmitter &CE, + raw_ostream &OS); + + // MCStreamer interface + + virtual void EmitLabel(MCSymbol *Symbol); + virtual void EmitAssemblerFlag(MCAssemblerFlag Flag); + virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value); + virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute); + virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue); + virtual void BeginCOFFSymbolDef(MCSymbol const *Symbol); + virtual void EmitCOFFSymbolStorageClass(int StorageClass); + virtual void EmitCOFFSymbolType(int Type); + virtual void EndCOFFSymbolDef(); + virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value); + virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment); + virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size); + virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol, + unsigned Size,unsigned ByteAlignment); + virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol, + uint64_t Size, unsigned ByteAlignment); + virtual void EmitBytes(StringRef Data, unsigned AddrSpace); + virtual void EmitValue(const MCExpr *Value, unsigned Size, + unsigned AddrSpace); + virtual void EmitGPRel32Value(const MCExpr *Value); + virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value, + unsigned ValueSize, unsigned MaxBytesToEmit); + virtual void EmitCodeAlignment(unsigned ByteAlignment, + unsigned MaxBytesToEmit); + virtual void EmitValueToOffset(const MCExpr *Offset, unsigned char Value); + virtual void EmitFileDirective(StringRef Filename); + virtual void EmitDwarfFileDirective(unsigned FileNo,StringRef Filename); + virtual void EmitInstruction(const MCInst &Instruction); + virtual void Finish(); +}; +} // end anonymous namespace. + +WinCOFFStreamer::WinCOFFStreamer(MCContext &Context, + TargetAsmBackend &TAB, + MCCodeEmitter &CE, + raw_ostream &OS) + : MCObjectStreamer(Context, TAB, OS, &CE) { +} + +// MCStreamer interface + +void WinCOFFStreamer::EmitLabel(MCSymbol *Symbol) { +} + +void WinCOFFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) { + dbg_notimpl("Flag = " << Flag); +} + +void WinCOFFStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) { +} + +void WinCOFFStreamer::EmitSymbolAttribute(MCSymbol *Symbol, + MCSymbolAttr Attribute) { +} + +void WinCOFFStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) { + dbg_notimpl("Symbol = " << Symbol->getName() << ", DescValue = "<< DescValue); +} + +void WinCOFFStreamer::BeginCOFFSymbolDef(MCSymbol const *Symbol) { +} + +void WinCOFFStreamer::EmitCOFFSymbolStorageClass(int StorageClass) { +} + +void WinCOFFStreamer::EmitCOFFSymbolType(int Type) { +} + +void WinCOFFStreamer::EndCOFFSymbolDef() { +} + +void WinCOFFStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) { + dbg_notimpl("Symbol = " << Symbol->getName() << ", Value = " << *Value); +} + +void WinCOFFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment) { +} + +void WinCOFFStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) { +} + +void WinCOFFStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol, + unsigned Size,unsigned ByteAlignment) { + MCSectionCOFF const *SectionCOFF = + static_cast<MCSectionCOFF const *>(Section); + + dbg_notimpl("Section = " << SectionCOFF->getSectionName() << ", Symbol = " << + Symbol->getName() << ", Size = " << Size << ", ByteAlignment = " + << ByteAlignment); +} + +void WinCOFFStreamer::EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol, + uint64_t Size, unsigned ByteAlignment) { + MCSectionCOFF const *SectionCOFF = + static_cast<MCSectionCOFF const *>(Section); + + dbg_notimpl("Section = " << SectionCOFF->getSectionName() << ", Symbol = " << + Symbol->getName() << ", Size = " << Size << ", ByteAlignment = " + << ByteAlignment); +} + +void WinCOFFStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) { +} + +void WinCOFFStreamer::EmitValue(const MCExpr *Value, unsigned Size, + unsigned AddrSpace) { +} + +void WinCOFFStreamer::EmitGPRel32Value(const MCExpr *Value) { + dbg_notimpl("Value = '" << *Value); +} + +void WinCOFFStreamer::EmitValueToAlignment(unsigned ByteAlignment, + int64_t Value, + unsigned ValueSize, + unsigned MaxBytesToEmit) { +} + +void WinCOFFStreamer::EmitCodeAlignment(unsigned ByteAlignment, + unsigned MaxBytesToEmit = 0) { +} + +void WinCOFFStreamer::EmitValueToOffset(const MCExpr *Offset, + unsigned char Value = 0) { + dbg_notimpl("Offset = '" << *Offset << "', Value = " << Value); +} + +void WinCOFFStreamer::EmitFileDirective(StringRef Filename) { + // Ignore for now, linkers don't care, and proper debug + // info will be a much large effort. +} + +void WinCOFFStreamer::EmitDwarfFileDirective(unsigned FileNo, + StringRef Filename) { + dbg_notimpl("FileNo = " << FileNo << ", Filename = '" << Filename << "'"); +} + +void WinCOFFStreamer::EmitInstruction(const MCInst &Instruction) { +} + +void WinCOFFStreamer::Finish() { + MCObjectStreamer::Finish(); +} + +namespace llvm +{ + MCStreamer *createWinCOFFStreamer(MCContext &Context, + TargetAsmBackend &TAB, + MCCodeEmitter &CE, + raw_ostream &OS) { + return new WinCOFFStreamer(Context, TAB, CE, OS); + } +} |